subr_turnstile.c revision 90418
165557Sjasone/*- 265557Sjasone * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved. 365557Sjasone * 465557Sjasone * Redistribution and use in source and binary forms, with or without 565557Sjasone * modification, are permitted provided that the following conditions 665557Sjasone * are met: 765557Sjasone * 1. Redistributions of source code must retain the above copyright 865557Sjasone * notice, this list of conditions and the following disclaimer. 965557Sjasone * 2. Redistributions in binary form must reproduce the above copyright 1065557Sjasone * notice, this list of conditions and the following disclaimer in the 1165557Sjasone * documentation and/or other materials provided with the distribution. 1265557Sjasone * 3. Berkeley Software Design Inc's name may not be used to endorse or 1365557Sjasone * promote products derived from this software without specific prior 1465557Sjasone * written permission. 1565557Sjasone * 1665557Sjasone * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND 1765557Sjasone * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1865557Sjasone * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1965557Sjasone * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE 2065557Sjasone * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2165557Sjasone * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2265557Sjasone * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2365557Sjasone * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2465557Sjasone * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2565557Sjasone * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2665557Sjasone * SUCH DAMAGE. 2765557Sjasone * 2865557Sjasone * from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $ 2967352Sjhb * and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $ 3065557Sjasone * $FreeBSD: head/sys/kern/subr_turnstile.c 90418 2002-02-09 00:12:53Z jhb $ 3165557Sjasone */ 3265557Sjasone 3365557Sjasone/* 3486411Sjhb * Machine independent bits of mutex implementation. 3572200Sbmilekic */ 3672200Sbmilekic 3768790Sjhb#include "opt_ddb.h" 3867676Sjhb 3965557Sjasone#include <sys/param.h> 4067352Sjhb#include <sys/bus.h> 4167352Sjhb#include <sys/kernel.h> 4276166Smarkm#include <sys/lock.h> 4367352Sjhb#include <sys/malloc.h> 4474912Sjhb#include <sys/mutex.h> 4565557Sjasone#include <sys/proc.h> 4678766Sjhb#include <sys/resourcevar.h> 4767676Sjhb#include <sys/sysctl.h> 4865557Sjasone#include <sys/systm.h> 4967352Sjhb#include <sys/vmmeter.h> 5065557Sjasone#include <sys/ktr.h> 5165557Sjasone 5267352Sjhb#include <machine/atomic.h> 5367352Sjhb#include <machine/bus.h> 5467352Sjhb#include <machine/clock.h> 5565557Sjasone#include <machine/cpu.h> 5667352Sjhb 5768790Sjhb#include <ddb/ddb.h> 5868790Sjhb 5967352Sjhb#include <vm/vm.h> 6067352Sjhb#include <vm/vm_extern.h> 6167352Sjhb 6265557Sjasone/* 6372200Sbmilekic * Internal utility macros. 6471352Sjasone */ 6572200Sbmilekic#define mtx_unowned(m) ((m)->mtx_lock == MTX_UNOWNED) 6671352Sjasone 6772200Sbmilekic#define mtx_owner(m) (mtx_unowned((m)) ? NULL \ 6883366Sjulian : (struct thread *)((m)->mtx_lock & MTX_FLAGMASK)) 6971352Sjasone 7083366Sjulian#define SET_PRIO(td, pri) (td)->td_ksegrp->kg_pri.pri_level = (pri) 7171352Sjasone 7271352Sjasone/* 7374912Sjhb * Lock classes for sleep and spin mutexes. 7471352Sjasone */ 7574912Sjhbstruct lock_class lock_class_mtx_sleep = { 7674912Sjhb "sleep mutex", 7774912Sjhb LC_SLEEPLOCK | LC_RECURSABLE 7874912Sjhb}; 7974912Sjhbstruct lock_class lock_class_mtx_spin = { 8074912Sjhb "spin mutex", 8174912Sjhb LC_SPINLOCK | LC_RECURSABLE 8274912Sjhb}; 8371352Sjasone 8471352Sjasone/* 8572200Sbmilekic * Prototypes for non-exported routines. 8672200Sbmilekic */ 8783366Sjulianstatic void propagate_priority(struct thread *); 8867352Sjhb 8967352Sjhbstatic void 9083366Sjulianpropagate_priority(struct thread *td) 9167352Sjhb{ 9283366Sjulian struct ksegrp *kg = td->td_ksegrp; 9383366Sjulian int pri = kg->kg_pri.pri_level; 9483366Sjulian struct mtx *m = td->td_blocked; 9567352Sjhb 9669376Sjhb mtx_assert(&sched_lock, MA_OWNED); 9767352Sjhb for (;;) { 9883366Sjulian struct thread *td1; 9967352Sjhb 10083366Sjulian td = mtx_owner(m); 10167352Sjhb 10283366Sjulian if (td == NULL) { 10367352Sjhb /* 10467352Sjhb * This really isn't quite right. Really 10583366Sjulian * ought to bump priority of thread that 10667352Sjhb * next acquires the mutex. 10767352Sjhb */ 10867352Sjhb MPASS(m->mtx_lock == MTX_CONTESTED); 10967352Sjhb return; 11067352Sjhb } 11183679Sjhb kg = td->td_ksegrp; 11272200Sbmilekic 11383366Sjulian MPASS(td->td_proc->p_magic == P_MAGIC); 11483366Sjulian KASSERT(td->td_proc->p_stat != SSLEEP, ("sleeping thread owns a mutex")); 11583366Sjulian if (kg->kg_pri.pri_level <= pri) /* lower is higher priority */ 11667352Sjhb return; 11769376Sjhb 11867352Sjhb /* 11983366Sjulian * Bump this thread's priority. 12069376Sjhb */ 12183366Sjulian SET_PRIO(td, pri); 12269376Sjhb 12369376Sjhb /* 12467352Sjhb * If lock holder is actually running, just bump priority. 12567352Sjhb */ 12683366Sjulian /* XXXKSE this test is not sufficient */ 12783366Sjulian if (td->td_kse && (td->td_kse->ke_oncpu != NOCPU)) { 12883366Sjulian MPASS(td->td_proc->p_stat == SRUN 12983366Sjulian || td->td_proc->p_stat == SZOMB 13083366Sjulian || td->td_proc->p_stat == SSTOP); 13167352Sjhb return; 13267352Sjhb } 13372376Sjake 13473912Sjhb#ifndef SMP 13567352Sjhb /* 13683366Sjulian * For UP, we check to see if td is curthread (this shouldn't 13773912Sjhb * ever happen however as it would mean we are in a deadlock.) 13873912Sjhb */ 13983366Sjulian KASSERT(td != curthread, ("Deadlock detected")); 14073912Sjhb#endif 14173912Sjhb 14273912Sjhb /* 14383366Sjulian * If on run queue move to new run queue, and quit. 14483366Sjulian * XXXKSE this gets a lot more complicated under threads 14583366Sjulian * but try anyhow. 14667352Sjhb */ 14783366Sjulian if (td->td_proc->p_stat == SRUN) { 14883366Sjulian MPASS(td->td_blocked == NULL); 14983366Sjulian remrunqueue(td); 15083366Sjulian setrunqueue(td); 15167352Sjhb return; 15267352Sjhb } 15367352Sjhb 15467352Sjhb /* 15569376Sjhb * If we aren't blocked on a mutex, we should be. 15667352Sjhb */ 15783366Sjulian KASSERT(td->td_proc->p_stat == SMTX, ( 15869376Sjhb "process %d(%s):%d holds %s but isn't blocked on a mutex\n", 15983366Sjulian td->td_proc->p_pid, td->td_proc->p_comm, td->td_proc->p_stat, 16074912Sjhb m->mtx_object.lo_name)); 16167352Sjhb 16267352Sjhb /* 16383366Sjulian * Pick up the mutex that td is blocked on. 16467352Sjhb */ 16583366Sjulian m = td->td_blocked; 16667352Sjhb MPASS(m != NULL); 16767352Sjhb 16867352Sjhb /* 16983366Sjulian * Check if the thread needs to be moved up on 17067352Sjhb * the blocked chain 17167352Sjhb */ 17283366Sjulian if (td == TAILQ_FIRST(&m->mtx_blocked)) { 17369376Sjhb continue; 17469376Sjhb } 17572200Sbmilekic 17683366Sjulian td1 = TAILQ_PREV(td, threadqueue, td_blkq); 17783366Sjulian if (td1->td_ksegrp->kg_pri.pri_level <= pri) { 17867352Sjhb continue; 17967352Sjhb } 18067352Sjhb 18167352Sjhb /* 18283366Sjulian * Remove thread from blocked chain and determine where 18383366Sjulian * it should be moved up to. Since we know that td1 has 18483366Sjulian * a lower priority than td, we know that at least one 18583366Sjulian * thread in the chain has a lower priority and that 18683366Sjulian * td1 will thus not be NULL after the loop. 18767352Sjhb */ 18883366Sjulian TAILQ_REMOVE(&m->mtx_blocked, td, td_blkq); 18983366Sjulian TAILQ_FOREACH(td1, &m->mtx_blocked, td_blkq) { 19083366Sjulian MPASS(td1->td_proc->p_magic == P_MAGIC); 19183366Sjulian if (td1->td_ksegrp->kg_pri.pri_level > pri) 19267352Sjhb break; 19367352Sjhb } 19472200Sbmilekic 19583366Sjulian MPASS(td1 != NULL); 19683366Sjulian TAILQ_INSERT_BEFORE(td1, td, td_blkq); 19767352Sjhb CTR4(KTR_LOCK, 19871560Sjhb "propagate_priority: p %p moved before %p on [%p] %s", 19983366Sjulian td, td1, m, m->mtx_object.lo_name); 20067352Sjhb } 20167352Sjhb} 20267352Sjhb 20371352Sjasone/* 20474900Sjhb * Function versions of the inlined __mtx_* macros. These are used by 20574900Sjhb * modules and can also be called from assembly language if needed. 20674900Sjhb */ 20774900Sjhbvoid 20874900Sjhb_mtx_lock_flags(struct mtx *m, int opts, const char *file, int line) 20974900Sjhb{ 21074900Sjhb 21183841Sjhb MPASS(curthread != NULL); 21283841Sjhb _get_sleep_lock(m, curthread, opts, file, line); 21383841Sjhb LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file, 21483841Sjhb line); 21583841Sjhb WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line); 21674900Sjhb} 21774900Sjhb 21874900Sjhbvoid 21974900Sjhb_mtx_unlock_flags(struct mtx *m, int opts, const char *file, int line) 22074900Sjhb{ 22174900Sjhb 22283841Sjhb MPASS(curthread != NULL); 22383947Sjhb mtx_assert(m, MA_OWNED); 22483841Sjhb WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line); 22583841Sjhb LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file, 22683841Sjhb line); 22783841Sjhb _rel_sleep_lock(m, curthread, opts, file, line); 22874900Sjhb} 22974900Sjhb 23074900Sjhbvoid 23174900Sjhb_mtx_lock_spin_flags(struct mtx *m, int opts, const char *file, int line) 23274900Sjhb{ 23374900Sjhb 23483841Sjhb MPASS(curthread != NULL); 23583841Sjhb _get_spin_lock(m, curthread, opts, file, line); 23683841Sjhb LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file, 23783841Sjhb line); 23883841Sjhb WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line); 23974900Sjhb} 24074900Sjhb 24174900Sjhbvoid 24274900Sjhb_mtx_unlock_spin_flags(struct mtx *m, int opts, const char *file, int line) 24374900Sjhb{ 24474900Sjhb 24583841Sjhb MPASS(curthread != NULL); 24683947Sjhb mtx_assert(m, MA_OWNED); 24783841Sjhb WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line); 24883841Sjhb LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file, 24983841Sjhb line); 25083841Sjhb _rel_spin_lock(m); 25174900Sjhb} 25274900Sjhb 25374900Sjhb/* 25472200Sbmilekic * The important part of mtx_trylock{,_flags}() 25572200Sbmilekic * Tries to acquire lock `m.' We do NOT handle recursion here; we assume that 25672200Sbmilekic * if we're called, it's because we know we don't already own this lock. 25771352Sjasone */ 25872200Sbmilekicint 25972200Sbmilekic_mtx_trylock(struct mtx *m, int opts, const char *file, int line) 26071352Sjasone{ 26172200Sbmilekic int rval; 26271352Sjasone 26383366Sjulian MPASS(curthread != NULL); 26471352Sjasone 26583366Sjulian rval = _obtain_lock(m, curthread); 26672200Sbmilekic 26774912Sjhb LOCK_LOG_TRY("LOCK", &m->mtx_object, opts, rval, file, line); 26874912Sjhb if (rval) { 26971352Sjasone /* 27072200Sbmilekic * We do not handle recursion in _mtx_trylock; see the 27172200Sbmilekic * note at the top of the routine. 27271352Sjasone */ 27372344Sbmilekic KASSERT(!mtx_recursed(m), 27472344Sbmilekic ("mtx_trylock() called on a recursed mutex")); 27576272Sjhb WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK, 27676272Sjhb file, line); 27771352Sjasone } 27871352Sjasone 27974912Sjhb return (rval); 28071352Sjasone} 28171352Sjasone 28271352Sjasone/* 28372200Sbmilekic * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock. 28471352Sjasone * 28572200Sbmilekic * We call this if the lock is either contested (i.e. we need to go to 28672200Sbmilekic * sleep waiting for it), or if we need to recurse on it. 28771352Sjasone */ 28872200Sbmilekicvoid 28972200Sbmilekic_mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line) 29071352Sjasone{ 29183366Sjulian struct thread *td = curthread; 29283366Sjulian struct ksegrp *kg = td->td_ksegrp; 29371352Sjasone 29483366Sjulian if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)td) { 29572200Sbmilekic m->mtx_recurse++; 29672200Sbmilekic atomic_set_ptr(&m->mtx_lock, MTX_RECURSED); 29774912Sjhb if (LOCK_LOG_TEST(&m->mtx_object, opts)) 29872344Sbmilekic CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m); 29972200Sbmilekic return; 30071352Sjasone } 30171352Sjasone 30274912Sjhb if (LOCK_LOG_TEST(&m->mtx_object, opts)) 30372994Sjhb CTR4(KTR_LOCK, 30472994Sjhb "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d", 30574912Sjhb m->mtx_object.lo_name, (void *)m->mtx_lock, file, line); 30671352Sjasone 30783366Sjulian while (!_obtain_lock(m, td)) { 30872200Sbmilekic uintptr_t v; 30983366Sjulian struct thread *td1; 31071352Sjasone 31172200Sbmilekic mtx_lock_spin(&sched_lock); 31272200Sbmilekic /* 31372200Sbmilekic * Check if the lock has been released while spinning for 31472200Sbmilekic * the sched_lock. 31572200Sbmilekic */ 31672200Sbmilekic if ((v = m->mtx_lock) == MTX_UNOWNED) { 31772200Sbmilekic mtx_unlock_spin(&sched_lock); 31872200Sbmilekic continue; 31971352Sjasone } 32071352Sjasone 32172200Sbmilekic /* 32272200Sbmilekic * The mutex was marked contested on release. This means that 32383366Sjulian * there are threads blocked on it. 32472200Sbmilekic */ 32572200Sbmilekic if (v == MTX_CONTESTED) { 32683366Sjulian td1 = TAILQ_FIRST(&m->mtx_blocked); 32783366Sjulian MPASS(td1 != NULL); 32883366Sjulian m->mtx_lock = (uintptr_t)td | MTX_CONTESTED; 32967352Sjhb 33083366Sjulian if (td1->td_ksegrp->kg_pri.pri_level < kg->kg_pri.pri_level) 33183366Sjulian SET_PRIO(td, td1->td_ksegrp->kg_pri.pri_level); 33272200Sbmilekic mtx_unlock_spin(&sched_lock); 33367352Sjhb return; 33467352Sjhb } 33569376Sjhb 33669376Sjhb /* 33772200Sbmilekic * If the mutex isn't already contested and a failure occurs 33872200Sbmilekic * setting the contested bit, the mutex was either released 33972200Sbmilekic * or the state of the MTX_RECURSED bit changed. 34069376Sjhb */ 34172200Sbmilekic if ((v & MTX_CONTESTED) == 0 && 34272200Sbmilekic !atomic_cmpset_ptr(&m->mtx_lock, (void *)v, 34372200Sbmilekic (void *)(v | MTX_CONTESTED))) { 34472200Sbmilekic mtx_unlock_spin(&sched_lock); 34572200Sbmilekic continue; 34672200Sbmilekic } 34767352Sjhb 34872200Sbmilekic /* 34972200Sbmilekic * We deffinately must sleep for this lock. 35072200Sbmilekic */ 35172200Sbmilekic mtx_assert(m, MA_NOTOWNED); 35267352Sjhb 35367352Sjhb#ifdef notyet 35472200Sbmilekic /* 35572200Sbmilekic * If we're borrowing an interrupted thread's VM context, we 35672200Sbmilekic * must clean up before going to sleep. 35772200Sbmilekic */ 35883366Sjulian if (td->td_ithd != NULL) { 35983366Sjulian struct ithd *it = td->td_ithd; 36067352Sjhb 36172200Sbmilekic if (it->it_interrupted) { 36274912Sjhb if (LOCK_LOG_TEST(&m->mtx_object, opts)) 36372200Sbmilekic CTR2(KTR_LOCK, 36472994Sjhb "_mtx_lock_sleep: %p interrupted %p", 36572200Sbmilekic it, it->it_interrupted); 36672200Sbmilekic intr_thd_fixup(it); 36767352Sjhb } 36872200Sbmilekic } 36967352Sjhb#endif 37067352Sjhb 37172200Sbmilekic /* 37272200Sbmilekic * Put us on the list of threads blocked on this mutex. 37372200Sbmilekic */ 37472200Sbmilekic if (TAILQ_EMPTY(&m->mtx_blocked)) { 37590418Sjhb td1 = mtx_owner(m); 37683366Sjulian LIST_INSERT_HEAD(&td1->td_contested, m, mtx_contested); 37783366Sjulian TAILQ_INSERT_TAIL(&m->mtx_blocked, td, td_blkq); 37872200Sbmilekic } else { 37983366Sjulian TAILQ_FOREACH(td1, &m->mtx_blocked, td_blkq) 38083366Sjulian if (td1->td_ksegrp->kg_pri.pri_level > kg->kg_pri.pri_level) 38172200Sbmilekic break; 38283366Sjulian if (td1) 38383366Sjulian TAILQ_INSERT_BEFORE(td1, td, td_blkq); 38472200Sbmilekic else 38583366Sjulian TAILQ_INSERT_TAIL(&m->mtx_blocked, td, td_blkq); 38672200Sbmilekic } 38767352Sjhb 38872200Sbmilekic /* 38972200Sbmilekic * Save who we're blocked on. 39072200Sbmilekic */ 39183366Sjulian td->td_blocked = m; 39283366Sjulian td->td_mtxname = m->mtx_object.lo_name; 39383366Sjulian td->td_proc->p_stat = SMTX; 39483366Sjulian propagate_priority(td); 39567352Sjhb 39674912Sjhb if (LOCK_LOG_TEST(&m->mtx_object, opts)) 39772200Sbmilekic CTR3(KTR_LOCK, 39883366Sjulian "_mtx_lock_sleep: p %p blocked on [%p] %s", td, m, 39974912Sjhb m->mtx_object.lo_name); 40072200Sbmilekic 40183366Sjulian td->td_proc->p_stats->p_ru.ru_nvcsw++; 40272200Sbmilekic mi_switch(); 40372200Sbmilekic 40474912Sjhb if (LOCK_LOG_TEST(&m->mtx_object, opts)) 40572200Sbmilekic CTR3(KTR_LOCK, 40672200Sbmilekic "_mtx_lock_sleep: p %p free from blocked on [%p] %s", 40783366Sjulian td, m, m->mtx_object.lo_name); 40872200Sbmilekic 40972200Sbmilekic mtx_unlock_spin(&sched_lock); 41072200Sbmilekic } 41172200Sbmilekic 41272200Sbmilekic return; 41372200Sbmilekic} 41472200Sbmilekic 41572200Sbmilekic/* 41672200Sbmilekic * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock. 41772200Sbmilekic * 41872200Sbmilekic * This is only called if we need to actually spin for the lock. Recursion 41972200Sbmilekic * is handled inline. 42072200Sbmilekic */ 42172200Sbmilekicvoid 42288088Sjhb_mtx_lock_spin(struct mtx *m, int opts, const char *file, int line) 42372200Sbmilekic{ 42472200Sbmilekic int i = 0; 42572200Sbmilekic 42674912Sjhb if (LOCK_LOG_TEST(&m->mtx_object, opts)) 42772344Sbmilekic CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m); 42872200Sbmilekic 42972200Sbmilekic for (;;) { 43083366Sjulian if (_obtain_lock(m, curthread)) 43172200Sbmilekic break; 43272200Sbmilekic 43375568Sjhb /* Give interrupts a chance while we spin. */ 43488088Sjhb critical_exit(); 43572200Sbmilekic while (m->mtx_lock != MTX_UNOWNED) { 43689392Sjhb if (i++ < 10000000) 43772200Sbmilekic continue; 43889392Sjhb if (i++ < 60000000) 43972200Sbmilekic DELAY(1); 44067352Sjhb#ifdef DDB 44172200Sbmilekic else if (!db_active) 44267352Sjhb#else 44372200Sbmilekic else 44467352Sjhb#endif 44572200Sbmilekic panic("spin lock %s held by %p for > 5 seconds", 44674912Sjhb m->mtx_object.lo_name, (void *)m->mtx_lock); 44767352Sjhb } 44888088Sjhb critical_enter(); 44967352Sjhb } 45072200Sbmilekic 45174912Sjhb if (LOCK_LOG_TEST(&m->mtx_object, opts)) 45272200Sbmilekic CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m); 45372200Sbmilekic 45472200Sbmilekic return; 45567352Sjhb} 45667352Sjhb 45772200Sbmilekic/* 45872200Sbmilekic * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock. 45972200Sbmilekic * 46072200Sbmilekic * We are only called here if the lock is recursed or contested (i.e. we 46172200Sbmilekic * need to wake up a blocked thread). 46272200Sbmilekic */ 46367352Sjhbvoid 46472200Sbmilekic_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line) 46567352Sjhb{ 46683366Sjulian struct thread *td, *td1; 46767352Sjhb struct mtx *m1; 46867352Sjhb int pri; 46983366Sjulian struct ksegrp *kg; 47067352Sjhb 47183366Sjulian td = curthread; 47283366Sjulian kg = td->td_ksegrp; 47372200Sbmilekic 47472200Sbmilekic if (mtx_recursed(m)) { 47572200Sbmilekic if (--(m->mtx_recurse) == 0) 47672200Sbmilekic atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED); 47774912Sjhb if (LOCK_LOG_TEST(&m->mtx_object, opts)) 47872200Sbmilekic CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m); 47972200Sbmilekic return; 48072200Sbmilekic } 48172200Sbmilekic 48272200Sbmilekic mtx_lock_spin(&sched_lock); 48374912Sjhb if (LOCK_LOG_TEST(&m->mtx_object, opts)) 48472200Sbmilekic CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m); 48572200Sbmilekic 48683366Sjulian td1 = TAILQ_FIRST(&m->mtx_blocked); 48783366Sjulian MPASS(td->td_proc->p_magic == P_MAGIC); 48883366Sjulian MPASS(td1->td_proc->p_magic == P_MAGIC); 48972200Sbmilekic 49083366Sjulian TAILQ_REMOVE(&m->mtx_blocked, td1, td_blkq); 49172200Sbmilekic 49272200Sbmilekic if (TAILQ_EMPTY(&m->mtx_blocked)) { 49372200Sbmilekic LIST_REMOVE(m, mtx_contested); 49472200Sbmilekic _release_lock_quick(m); 49574912Sjhb if (LOCK_LOG_TEST(&m->mtx_object, opts)) 49672200Sbmilekic CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p not held", m); 49772200Sbmilekic } else 49872200Sbmilekic atomic_store_rel_ptr(&m->mtx_lock, (void *)MTX_CONTESTED); 49972200Sbmilekic 50072376Sjake pri = PRI_MAX; 50183366Sjulian LIST_FOREACH(m1, &td->td_contested, mtx_contested) { 50283366Sjulian int cp = TAILQ_FIRST(&m1->mtx_blocked)->td_ksegrp->kg_pri.pri_level; 50372200Sbmilekic if (cp < pri) 50472200Sbmilekic pri = cp; 50572200Sbmilekic } 50672200Sbmilekic 50783366Sjulian if (pri > kg->kg_pri.pri_native) 50883366Sjulian pri = kg->kg_pri.pri_native; 50983366Sjulian SET_PRIO(td, pri); 51072200Sbmilekic 51174912Sjhb if (LOCK_LOG_TEST(&m->mtx_object, opts)) 51272200Sbmilekic CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p contested setrunqueue %p", 51383366Sjulian m, td1); 51472200Sbmilekic 51583366Sjulian td1->td_blocked = NULL; 51683366Sjulian td1->td_proc->p_stat = SRUN; 51783366Sjulian setrunqueue(td1); 51872200Sbmilekic 51988900Sjhb if (td->td_critnest == 1 && td1->td_ksegrp->kg_pri.pri_level < pri) { 52067352Sjhb#ifdef notyet 52183366Sjulian if (td->td_ithd != NULL) { 52283366Sjulian struct ithd *it = td->td_ithd; 52367352Sjhb 52472200Sbmilekic if (it->it_interrupted) { 52574912Sjhb if (LOCK_LOG_TEST(&m->mtx_object, opts)) 52672200Sbmilekic CTR2(KTR_LOCK, 52772994Sjhb "_mtx_unlock_sleep: %p interrupted %p", 52872200Sbmilekic it, it->it_interrupted); 52972200Sbmilekic intr_thd_fixup(it); 53067352Sjhb } 53172200Sbmilekic } 53267352Sjhb#endif 53383366Sjulian setrunqueue(td); 53474912Sjhb if (LOCK_LOG_TEST(&m->mtx_object, opts)) 53572200Sbmilekic CTR2(KTR_LOCK, 53672200Sbmilekic "_mtx_unlock_sleep: %p switching out lock=%p", m, 53772200Sbmilekic (void *)m->mtx_lock); 53872200Sbmilekic 53983366Sjulian td->td_proc->p_stats->p_ru.ru_nivcsw++; 54072200Sbmilekic mi_switch(); 54174912Sjhb if (LOCK_LOG_TEST(&m->mtx_object, opts)) 54272200Sbmilekic CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p", 54372200Sbmilekic m, (void *)m->mtx_lock); 54467352Sjhb } 54572200Sbmilekic 54672200Sbmilekic mtx_unlock_spin(&sched_lock); 54772200Sbmilekic 54872200Sbmilekic return; 54967352Sjhb} 55067352Sjhb 55172200Sbmilekic/* 55272200Sbmilekic * All the unlocking of MTX_SPIN locks is done inline. 55372200Sbmilekic * See the _rel_spin_lock() macro for the details. 55472200Sbmilekic */ 55572200Sbmilekic 55672200Sbmilekic/* 55772994Sjhb * The backing function for the INVARIANTS-enabled mtx_assert() 55872200Sbmilekic */ 55972996Sjhb#ifdef INVARIANT_SUPPORT 56071352Sjasonevoid 56171360Sjasone_mtx_assert(struct mtx *m, int what, const char *file, int line) 56271352Sjasone{ 56380748Sjhb 56480748Sjhb if (panicstr != NULL) 56580748Sjhb return; 56673033Sjake switch (what) { 56771352Sjasone case MA_OWNED: 56871352Sjasone case MA_OWNED | MA_RECURSED: 56971352Sjasone case MA_OWNED | MA_NOTRECURSED: 57073033Sjake if (!mtx_owned(m)) 57171352Sjasone panic("mutex %s not owned at %s:%d", 57274912Sjhb m->mtx_object.lo_name, file, line); 57373033Sjake if (mtx_recursed(m)) { 57473033Sjake if ((what & MA_NOTRECURSED) != 0) 57571352Sjasone panic("mutex %s recursed at %s:%d", 57674912Sjhb m->mtx_object.lo_name, file, line); 57773033Sjake } else if ((what & MA_RECURSED) != 0) { 57871352Sjasone panic("mutex %s unrecursed at %s:%d", 57974912Sjhb m->mtx_object.lo_name, file, line); 58071352Sjasone } 58171352Sjasone break; 58271352Sjasone case MA_NOTOWNED: 58373033Sjake if (mtx_owned(m)) 58471352Sjasone panic("mutex %s owned at %s:%d", 58574912Sjhb m->mtx_object.lo_name, file, line); 58671352Sjasone break; 58771352Sjasone default: 58871360Sjasone panic("unknown mtx_assert at %s:%d", file, line); 58971352Sjasone } 59071352Sjasone} 59171352Sjasone#endif 59271352Sjasone 59372200Sbmilekic/* 59472200Sbmilekic * The MUTEX_DEBUG-enabled mtx_validate() 59574912Sjhb * 59674912Sjhb * Most of these checks have been moved off into the LO_INITIALIZED flag 59774912Sjhb * maintained by the witness code. 59872200Sbmilekic */ 59967352Sjhb#ifdef MUTEX_DEBUG 60067352Sjhb 60174912Sjhbvoid mtx_validate __P((struct mtx *)); 60267352Sjhb 60374912Sjhbvoid 60474912Sjhbmtx_validate(struct mtx *m) 60567352Sjhb{ 60667352Sjhb 60767352Sjhb/* 60867352Sjhb * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly 60967352Sjhb * we can re-enable the kernacc() checks. 61067352Sjhb */ 61167352Sjhb#ifndef __alpha__ 61282304Sbmilekic /* 61382304Sbmilekic * Can't call kernacc() from early init386(), especially when 61482304Sbmilekic * initializing Giant mutex, because some stuff in kernacc() 61582304Sbmilekic * requires Giant itself. 61682304Sbmilekic */ 61782302Sbmilekic if (!cold) 61882302Sbmilekic if (!kernacc((caddr_t)m, sizeof(m), 61982302Sbmilekic VM_PROT_READ | VM_PROT_WRITE)) 62082302Sbmilekic panic("Can't read and write to mutex %p", m); 62167352Sjhb#endif 62267352Sjhb} 62367352Sjhb#endif 62467352Sjhb 62572200Sbmilekic/* 62672200Sbmilekic * Mutex initialization routine; initialize lock `m' of type contained in 62772200Sbmilekic * `opts' with options contained in `opts' and description `description.' 62872200Sbmilekic */ 62967352Sjhbvoid 63072200Sbmilekicmtx_init(struct mtx *m, const char *description, int opts) 63167352Sjhb{ 63274912Sjhb struct lock_object *lock; 63372200Sbmilekic 63474912Sjhb MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE | 63574912Sjhb MTX_SLEEPABLE | MTX_NOWITNESS)) == 0); 63672200Sbmilekic 63767352Sjhb#ifdef MUTEX_DEBUG 63872200Sbmilekic /* Diagnostic and error correction */ 63974912Sjhb mtx_validate(m); 64069429Sjhb#endif 64167352Sjhb 64285205Sjhb lock = &m->mtx_object; 64385205Sjhb KASSERT((lock->lo_flags & LO_INITIALIZED) == 0, 64485205Sjhb ("mutex %s %p already initialized", description, m)); 64574912Sjhb bzero(m, sizeof(*m)); 64674912Sjhb if (opts & MTX_SPIN) 64774912Sjhb lock->lo_class = &lock_class_mtx_spin; 64874912Sjhb else 64974912Sjhb lock->lo_class = &lock_class_mtx_sleep; 65074912Sjhb lock->lo_name = description; 65174912Sjhb if (opts & MTX_QUIET) 65274912Sjhb lock->lo_flags = LO_QUIET; 65374912Sjhb if (opts & MTX_RECURSE) 65474912Sjhb lock->lo_flags |= LO_RECURSABLE; 65574912Sjhb if (opts & MTX_SLEEPABLE) 65674912Sjhb lock->lo_flags |= LO_SLEEPABLE; 65774912Sjhb if ((opts & MTX_NOWITNESS) == 0) 65874912Sjhb lock->lo_flags |= LO_WITNESS; 65972200Sbmilekic 66067352Sjhb m->mtx_lock = MTX_UNOWNED; 66174912Sjhb TAILQ_INIT(&m->mtx_blocked); 66272200Sbmilekic 66374912Sjhb LOCK_LOG_INIT(lock, opts); 66472200Sbmilekic 66574912Sjhb WITNESS_INIT(lock); 66667352Sjhb} 66767352Sjhb 66872200Sbmilekic/* 66974912Sjhb * Remove lock `m' from all_mtx queue. We don't allow MTX_QUIET to be 67074912Sjhb * passed in as a flag here because if the corresponding mtx_init() was 67174912Sjhb * called with MTX_QUIET set, then it will already be set in the mutex's 67274912Sjhb * flags. 67372200Sbmilekic */ 67467352Sjhbvoid 67567352Sjhbmtx_destroy(struct mtx *m) 67667352Sjhb{ 67767352Sjhb 67874912Sjhb LOCK_LOG_DESTROY(&m->mtx_object, 0); 67972200Sbmilekic 68074912Sjhb if (!mtx_owned(m)) 68174912Sjhb MPASS(mtx_unowned(m)); 68274912Sjhb else { 68371228Sbmilekic MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0); 68472200Sbmilekic 68574912Sjhb /* Tell witness this isn't locked to make it happy. */ 68688900Sjhb WITNESS_UNLOCK(&m->mtx_object, LOP_EXCLUSIVE, __FILE__, 68788900Sjhb __LINE__); 68871320Sjasone } 68971320Sjasone 69074912Sjhb WITNESS_DESTROY(&m->mtx_object); 69171320Sjasone} 69285564Sdillon 69385564Sdillon/* 69485564Sdillon * Encapsulated Giant mutex routines. These routines provide encapsulation 69585564Sdillon * control for the Giant mutex, allowing sysctls to be used to turn on and 69685564Sdillon * off Giant around certain subsystems. The default value for the sysctls 69785564Sdillon * are set to what developers believe is stable and working in regards to 69885564Sdillon * the Giant pushdown. Developers should not turn off Giant via these 69985564Sdillon * sysctls unless they know what they are doing. 70085564Sdillon * 70185564Sdillon * Callers of mtx_lock_giant() are expected to pass the return value to an 70285564Sdillon * accompanying mtx_unlock_giant() later on. If multiple subsystems are 70385564Sdillon * effected by a Giant wrap, all related sysctl variables must be zero for 70485564Sdillon * the subsystem call to operate without Giant (as determined by the caller). 70585564Sdillon */ 70685564Sdillon 70785564SdillonSYSCTL_NODE(_kern, OID_AUTO, giant, CTLFLAG_RD, NULL, "Giant mutex manipulation"); 70885564Sdillon 70985564Sdillonstatic int kern_giant_all = 0; 71085564SdillonSYSCTL_INT(_kern_giant, OID_AUTO, all, CTLFLAG_RW, &kern_giant_all, 0, ""); 71185564Sdillon 71285564Sdillonint kern_giant_proc = 1; /* Giant around PROC locks */ 71385564Sdillonint kern_giant_file = 1; /* Giant around struct file & filedesc */ 71485564SdillonSYSCTL_INT(_kern_giant, OID_AUTO, proc, CTLFLAG_RW, &kern_giant_proc, 0, ""); 71585564SdillonSYSCTL_INT(_kern_giant, OID_AUTO, file, CTLFLAG_RW, &kern_giant_file, 0, ""); 71685564Sdillon 71785564Sdillonint 71885564Sdillonmtx_lock_giant(int sysctlvar) 71985564Sdillon{ 72085564Sdillon if (sysctlvar || kern_giant_all) { 72185564Sdillon mtx_lock(&Giant); 72285564Sdillon return(1); 72385564Sdillon } 72485564Sdillon return(0); 72585564Sdillon} 72685564Sdillon 72785564Sdillonvoid 72885564Sdillonmtx_unlock_giant(int s) 72985564Sdillon{ 73085564Sdillon if (s) 73185564Sdillon mtx_unlock(&Giant); 73285564Sdillon} 73385564Sdillon 734