subr_turnstile.c revision 93692
142421Syokota/*- 242421Syokota * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved. 342421Syokota * 442421Syokota * Redistribution and use in source and binary forms, with or without 542421Syokota * modification, are permitted provided that the following conditions 642421Syokota * are met: 742421Syokota * 1. Redistributions of source code must retain the above copyright 842421Syokota * notice, this list of conditions and the following disclaimer. 942421Syokota * 2. Redistributions in binary form must reproduce the above copyright 1042421Syokota * notice, this list of conditions and the following disclaimer in the 1142421Syokota * documentation and/or other materials provided with the distribution. 1242421Syokota * 3. Berkeley Software Design Inc's name may not be used to endorse or 1342421Syokota * promote products derived from this software without specific prior 1442421Syokota * written permission. 1542421Syokota * 1642421Syokota * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND 1742421Syokota * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1842421Syokota * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1942421Syokota * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE 2042421Syokota * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2142421Syokota * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2242421Syokota * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2342421Syokota * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2442421Syokota * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2542421Syokota * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2642421Syokota * SUCH DAMAGE. 2742421Syokota * 2842421Syokota * from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $ 2942421Syokota * and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $ 3042421Syokota * $FreeBSD: head/sys/kern/subr_turnstile.c 93692 2002-04-02 20:44:30Z jhb $ 3142421Syokota */ 3242421Syokota 33119418Sobrien/* 34119418Sobrien * Machine independent bits of mutex implementation. 35119418Sobrien */ 3642421Syokota 3742421Syokota#include "opt_ddb.h" 3842421Syokota 3942421Syokota#include <sys/param.h> 4058271Syokota#include <sys/systm.h> 4142421Syokota#include <sys/bus.h> 4242421Syokota#include <sys/kernel.h> 4358271Syokota#include <sys/ktr.h> 4458271Syokota#include <sys/lock.h> 4558271Syokota#include <sys/malloc.h> 4642421Syokota#include <sys/mutex.h> 47147271Smarius#include <sys/proc.h> 4842421Syokota#include <sys/resourcevar.h> 49147271Smarius#include <sys/sbuf.h> 50147271Smarius#include <sys/sysctl.h> 51147271Smarius#include <sys/vmmeter.h> 52147271Smarius 53147271Smarius#include <machine/atomic.h> 5442421Syokota#include <machine/bus.h> 55147271Smarius#include <machine/clock.h> 5642421Syokota#include <machine/cpu.h> 5742421Syokota 5842421Syokota#include <ddb/ddb.h> 59102149Speter 6042421Syokota#include <vm/vm.h> 6142421Syokota#include <vm/vm_extern.h> 6242421Syokota 6342421Syokota/* 6442421Syokota * Internal utility macros. 6542421Syokota */ 6642421Syokota#define mtx_unowned(m) ((m)->mtx_lock == MTX_UNOWNED) 6742421Syokota 6842421Syokota#define mtx_owner(m) (mtx_unowned((m)) ? NULL \ 6942421Syokota : (struct thread *)((m)->mtx_lock & MTX_FLAGMASK)) 7042421Syokota 7142421Syokota/* 7242421Syokota * Lock classes for sleep and spin mutexes. 7342421Syokota */ 7442421Syokotastruct lock_class lock_class_mtx_sleep = { 7542421Syokota "sleep mutex", 7658271Syokota LC_SLEEPLOCK | LC_RECURSABLE 7758271Syokota}; 7858271Syokotastruct lock_class lock_class_mtx_spin = { 7958271Syokota "spin mutex", 8058271Syokota LC_SPINLOCK | LC_RECURSABLE 8158271Syokota}; 8258271Syokota 8342421Syokota/* 8442421Syokota * Prototypes for non-exported routines. 8542421Syokota */ 8642421Syokotastatic void propagate_priority(struct thread *); 8742421Syokota 8842421Syokotastatic void 8942421Syokotapropagate_priority(struct thread *td) 9042421Syokota{ 9142421Syokota int pri = td->td_priority; 9242421Syokota struct mtx *m = td->td_blocked; 9342421Syokota 9442421Syokota mtx_assert(&sched_lock, MA_OWNED); 9542421Syokota for (;;) { 96147271Smarius struct thread *td1; 97147271Smarius 98147271Smarius td = mtx_owner(m); 99147271Smarius 10042421Syokota if (td == NULL) { 10142421Syokota /* 10258271Syokota * This really isn't quite right. Really 10358271Syokota * ought to bump priority of thread that 10442421Syokota * next acquires the mutex. 10542421Syokota */ 10642421Syokota MPASS(m->mtx_lock == MTX_CONTESTED); 10742421Syokota return; 10842421Syokota } 10942421Syokota 11042421Syokota MPASS(td->td_proc->p_magic == P_MAGIC); 11142421Syokota KASSERT(td->td_proc->p_stat != SSLEEP, ("sleeping thread owns a mutex")); 11242421Syokota if (td->td_priority <= pri) /* lower is higher priority */ 11342421Syokota return; 11442421Syokota 11542421Syokota /* 11642421Syokota * Bump this thread's priority. 11742421Syokota */ 11842421Syokota td->td_priority = pri; 11942421Syokota 12042421Syokota /* 12142421Syokota * If lock holder is actually running, just bump priority. 12242421Syokota */ 12369781Sdwmalone /* XXXKSE this test is not sufficient */ 12442421Syokota if (td->td_kse && (td->td_kse->ke_oncpu != NOCPU)) { 12542421Syokota MPASS(td->td_proc->p_stat == SRUN 12642421Syokota || td->td_proc->p_stat == SZOMB 12742421Syokota || td->td_proc->p_stat == SSTOP); 12842421Syokota return; 12942421Syokota } 13042421Syokota 13158271Syokota#ifndef SMP 13242421Syokota /* 13358271Syokota * For UP, we check to see if td is curthread (this shouldn't 13447296Syokota * ever happen however as it would mean we are in a deadlock.) 13558271Syokota */ 13658271Syokota KASSERT(td != curthread, ("Deadlock detected")); 13747296Syokota#endif 13847296Syokota 13947296Syokota /* 14047296Syokota * If on run queue move to new run queue, and quit. 14158271Syokota * XXXKSE this gets a lot more complicated under threads 14258271Syokota * but try anyhow. 14347296Syokota */ 14458271Syokota if (td->td_proc->p_stat == SRUN) { 14558271Syokota MPASS(td->td_blocked == NULL); 14658271Syokota remrunqueue(td); 14742421Syokota setrunqueue(td); 14842421Syokota return; 14942421Syokota } 15042421Syokota 15142421Syokota /* 15242421Syokota * If we aren't blocked on a mutex, we should be. 15358271Syokota */ 15458271Syokota KASSERT(td->td_proc->p_stat == SMTX, ( 15558271Syokota "process %d(%s):%d holds %s but isn't blocked on a mutex\n", 156158041Ssobomax td->td_proc->p_pid, td->td_proc->p_comm, td->td_proc->p_stat, 157158041Ssobomax m->mtx_object.lo_name)); 158158041Ssobomax 159158041Ssobomax /* 160147271Smarius * Pick up the mutex that td is blocked on. 161147271Smarius */ 162147271Smarius m = td->td_blocked; 163147271Smarius MPASS(m != NULL); 164147271Smarius 165147271Smarius /* 166147271Smarius * Check if the thread needs to be moved up on 167147271Smarius * the blocked chain 16858271Syokota */ 16958271Syokota if (td == TAILQ_FIRST(&m->mtx_blocked)) { 17058271Syokota continue; 17158271Syokota } 17258271Syokota 173114930Speter td1 = TAILQ_PREV(td, threadqueue, td_blkq); 17458271Syokota if (td1->td_priority <= pri) { 175114930Speter continue; 176114930Speter } 17758271Syokota 17865176Sdfr /* 17992661Speter * Remove thread from blocked chain and determine where 18092661Speter * it should be moved up to. Since we know that td1 has 181147271Smarius * a lower priority than td, we know that at least one 182147271Smarius * thread in the chain has a lower priority and that 18392661Speter * td1 will thus not be NULL after the loop. 18492661Speter */ 18558271Syokota TAILQ_REMOVE(&m->mtx_blocked, td, td_blkq); 18658271Syokota TAILQ_FOREACH(td1, &m->mtx_blocked, td_blkq) { 187147271Smarius MPASS(td1->td_proc->p_magic == P_MAGIC); 188147271Smarius if (td1->td_priority > pri) 189147271Smarius break; 190147271Smarius } 191147271Smarius 192147271Smarius MPASS(td1 != NULL); 193147271Smarius TAILQ_INSERT_BEFORE(td1, td, td_blkq); 194147271Smarius CTR4(KTR_LOCK, 195147271Smarius "propagate_priority: p %p moved before %p on [%p] %s", 196147271Smarius td, td1, m, m->mtx_object.lo_name); 197147271Smarius } 198147271Smarius} 199147271Smarius 200147271Smarius#ifdef MUTEX_PROFILING 201147271SmariusSYSCTL_NODE(_debug, OID_AUTO, mutex, CTLFLAG_RD, NULL, "mutex debugging"); 202147271SmariusSYSCTL_NODE(_debug_mutex, OID_AUTO, prof, CTLFLAG_RD, NULL, "mutex profiling"); 203147271Smariusstatic int mutex_prof_enable = 0; 204147271SmariusSYSCTL_INT(_debug_mutex_prof, OID_AUTO, enable, CTLFLAG_RW, 205147271Smarius &mutex_prof_enable, 0, "Enable tracing of mutex holdtime"); 206147271Smarius 207147271Smariusstruct mutex_prof { 208147271Smarius const char *name; 209147271Smarius const char *file; 210147271Smarius int line; 211147271Smarius#define MPROF_MAX 0 212147271Smarius#define MPROF_TOT 1 213147271Smarius#define MPROF_CNT 2 214147271Smarius#define MPROF_AVG 3 215147271Smarius u_int64_t counter[4]; 216147271Smarius}; 217147271Smarius 218153072Sru/* 21958271Syokota * mprof_buf is a static pool of profiling records to avoid possible 22058271Syokota * reentrance of the memory allocation functions. 22158271Syokota * 22258271Syokota * Note: NUM_MPROF_BUFFERS must be smaller than MPROF_HASH_SIZE. 22358271Syokota */ 22458271Syokota#define NUM_MPROF_BUFFERS 4096 225147271Smariusstatic struct mutex_prof mprof_buf[NUM_MPROF_BUFFERS]; 226158041Ssobomaxstatic int first_free_mprof_buf; 227158041Ssobomax#define MPROF_HASH_SIZE 32771 228158041Ssobomaxstatic struct mutex_prof *mprof_hash[MPROF_HASH_SIZE]; 229158041Ssobomax 230158041Ssobomaxstatic int mutex_prof_acquisitions; 231158041SsobomaxSYSCTL_INT(_debug_mutex_prof, OID_AUTO, acquisitions, CTLFLAG_RD, 232158041Ssobomax &mutex_prof_acquisitions, 0, "Number of mutex acquistions recorded"); 233158041Ssobomaxstatic int mutex_prof_records; 234158041SsobomaxSYSCTL_INT(_debug_mutex_prof, OID_AUTO, records, CTLFLAG_RD, 235158041Ssobomax &mutex_prof_records, 0, "Number of profiling records"); 236158041Ssobomaxstatic int mutex_prof_maxrecords = NUM_MPROF_BUFFERS; 237158041SsobomaxSYSCTL_INT(_debug_mutex_prof, OID_AUTO, maxrecords, CTLFLAG_RD, 238158041Ssobomax &mutex_prof_maxrecords, 0, "Maximum number of profiling records"); 239158041Ssobomaxstatic int mutex_prof_rejected; 240158041SsobomaxSYSCTL_INT(_debug_mutex_prof, OID_AUTO, rejected, CTLFLAG_RD, 241158041Ssobomax &mutex_prof_rejected, 0, "Number of rejected profiling records"); 242158041Ssobomaxstatic int mutex_prof_hashsize = MPROF_HASH_SIZE; 243158041SsobomaxSYSCTL_INT(_debug_mutex_prof, OID_AUTO, hashsize, CTLFLAG_RD, 244158041Ssobomax &mutex_prof_hashsize, 0, "Hash size"); 245158041Ssobomaxstatic int mutex_prof_collisions = 0; 24658271SyokotaSYSCTL_INT(_debug_mutex_prof, OID_AUTO, collisions, CTLFLAG_RD, 24742421Syokota &mutex_prof_collisions, 0, "Number of hash collisions"); 24842421Syokota 24942421Syokota/* 25058271Syokota * mprof_mtx protects the profiling buffers and the hash. 25158271Syokota */ 25242421Syokotastatic struct mtx mprof_mtx; 25358271Syokota 25442421Syokotastatic void 25542421Syokotamprof_init(void *arg __unused) 25642421Syokota{ 25742421Syokota mtx_init(&mprof_mtx, "mutex profiling lock", MTX_SPIN | MTX_QUIET); 25842421Syokota} 25942421SyokotaSYSINIT(mprofinit, SI_SUB_LOCK, SI_ORDER_ANY, mprof_init, NULL); 26042421Syokota 26142421Syokotastatic u_int64_t 26242421Syokotananoseconds(void) 26342421Syokota{ 26442421Syokota struct timespec tv; 26542421Syokota 26658271Syokota nanotime(&tv); 26758271Syokota return (tv.tv_sec * (u_int64_t)1000000000 + tv.tv_nsec); 26858271Syokota} 26942421Syokota 27042421Syokotastatic int 27142421Syokotadump_mutex_prof_stats(SYSCTL_HANDLER_ARGS) 27258271Syokota{ 27342421Syokota struct sbuf *sb; 27458271Syokota int error, i; 27542421Syokota 27658271Syokota if (first_free_mprof_buf == 0) 27758271Syokota return SYSCTL_OUT(req, "No locking recorded", 27858271Syokota sizeof("No locking recorded")); 27958271Syokota 28058271Syokota sb = sbuf_new(NULL, NULL, 1024, SBUF_AUTOEXTEND); 28158271Syokota sbuf_printf(sb, "%12s %12s %12s %12s %s\n", 28258271Syokota "max", "total", "count", "average", "name"); 28342421Syokota mtx_lock_spin(&mprof_mtx); 28442421Syokota for (i = 0; i < first_free_mprof_buf; ++i) 28542421Syokota sbuf_printf(sb, "%12llu %12llu %12llu %12llu %s:%d (%s)\n", 28642421Syokota mprof_buf[i].counter[MPROF_MAX] / 1000, 28742421Syokota mprof_buf[i].counter[MPROF_TOT] / 1000, 28842421Syokota mprof_buf[i].counter[MPROF_CNT], 28942421Syokota mprof_buf[i].counter[MPROF_AVG] / 1000, 29042421Syokota mprof_buf[i].file, mprof_buf[i].line, mprof_buf[i].name); 29142421Syokota mtx_unlock_spin(&mprof_mtx); 29242421Syokota sbuf_finish(sb); 29393279Smurray error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 29442421Syokota sbuf_delete(sb); 29542421Syokota return (error); 29693279Smurray} 29742421SyokotaSYSCTL_PROC(_debug_mutex_prof, OID_AUTO, stats, CTLTYPE_STRING|CTLFLAG_RD, 29842421Syokota NULL, 0, dump_mutex_prof_stats, "A", "Mutex profiling statistics"); 29942421Syokota#endif 30042421Syokota 30142421Syokota/* 30242421Syokota * Function versions of the inlined __mtx_* macros. These are used by 30342421Syokota * modules and can also be called from assembly language if needed. 30442421Syokota */ 30542421Syokotavoid 30642421Syokota_mtx_lock_flags(struct mtx *m, int opts, const char *file, int line) 30742421Syokota{ 30842421Syokota 30942421Syokota MPASS(curthread != NULL); 31042421Syokota _get_sleep_lock(m, curthread, opts, file, line); 31142421Syokota LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file, 31242421Syokota line); 31342421Syokota WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line); 31442421Syokota#ifdef MUTEX_PROFILING 31593279Smurray /* don't reset the timer when/if recursing */ 31693279Smurray if (m->acqtime == 0) { 31742421Syokota m->file = file; 31893279Smurray m->line = line; 31993279Smurray m->acqtime = mutex_prof_enable ? nanoseconds() : 0; 32093279Smurray ++mutex_prof_acquisitions; 32142421Syokota } 32242421Syokota#endif 32342421Syokota} 32442421Syokota 32542421Syokotavoid 32642421Syokota_mtx_unlock_flags(struct mtx *m, int opts, const char *file, int line) 32742421Syokota{ 32842421Syokota 32942421Syokota MPASS(curthread != NULL); 33042421Syokota mtx_assert(m, MA_OWNED); 33142421Syokota#ifdef MUTEX_PROFILING 33242421Syokota if (m->acqtime != 0) { 33342421Syokota static const char *unknown = "(unknown)"; 33442421Syokota struct mutex_prof *mpp; 33542421Syokota u_int64_t acqtime, now; 33642421Syokota const char *p, *q; 33742421Syokota volatile u_int hash, n; 33842421Syokota 33942421Syokota now = nanoseconds(); 34042421Syokota acqtime = m->acqtime; 34158271Syokota m->acqtime = 0; 34242421Syokota if (now <= acqtime) 34342421Syokota goto out; 34442421Syokota for (p = file; strncmp(p, "../", 3) == 0; p += 3) 34542421Syokota /* nothing */ ; 34642421Syokota if (p == NULL || *p == '\0') 34742421Syokota p = unknown; 34842421Syokota for (hash = line, q = p; *q != '\0'; ++q) 34942421Syokota hash = (hash * 2 + *q) % MPROF_HASH_SIZE; 35042421Syokota mtx_lock_spin(&mprof_mtx); 35142421Syokota n = hash; 35242421Syokota while ((mpp = mprof_hash[n]) != NULL) { 35342421Syokota if (mpp->line == line && strcmp(mpp->file, p) == 0) 35442421Syokota break; 35542421Syokota n = (n + 1) % MPROF_HASH_SIZE; 35642421Syokota } 35742421Syokota if (mpp == NULL) { 35842421Syokota /* Just exit if we cannot get a trace buffer */ 35942421Syokota if (first_free_mprof_buf >= NUM_MPROF_BUFFERS) { 36042421Syokota ++mutex_prof_rejected; 36142421Syokota goto unlock; 36242421Syokota } 36342421Syokota mpp = &mprof_buf[first_free_mprof_buf++]; 36442421Syokota mpp->name = mtx_name(m); 36542421Syokota mpp->file = p; 36642421Syokota mpp->line = line; 36742421Syokota mutex_prof_collisions += n - hash; 36842421Syokota ++mutex_prof_records; 36942421Syokota mprof_hash[hash] = mpp; 37042421Syokota } 37142421Syokota /* 37242421Syokota * Record if the mutex has been held longer now than ever 37342421Syokota * before 37442421Syokota */ 37542421Syokota if ((now - acqtime) > mpp->counter[MPROF_MAX]) 37642421Syokota mpp->counter[MPROF_MAX] = now - acqtime; 37742421Syokota mpp->counter[MPROF_TOT] += now - acqtime; 37842421Syokota mpp->counter[MPROF_CNT] += 1; 37942421Syokota mpp->counter[MPROF_AVG] = 38042421Syokota mpp->counter[MPROF_TOT] / mpp->counter[MPROF_CNT]; 38142421Syokotaunlock: 38242421Syokota mtx_unlock_spin(&mprof_mtx); 38342421Syokota } 38442421Syokotaout: 38542421Syokota#endif 38642421Syokota WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line); 38742421Syokota LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file, 38842421Syokota line); 38958271Syokota _rel_sleep_lock(m, curthread, opts, file, line); 39042421Syokota} 39142421Syokota 39258271Syokotavoid 39342421Syokota_mtx_lock_spin_flags(struct mtx *m, int opts, const char *file, int line) 39442421Syokota{ 39558271Syokota 39642421Syokota MPASS(curthread != NULL); 39742421Syokota _get_spin_lock(m, curthread, opts, file, line); 39842421Syokota LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file, 39942421Syokota line); 40042421Syokota WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line); 40142421Syokota} 40242421Syokota 40342421Syokotavoid 40442421Syokota_mtx_unlock_spin_flags(struct mtx *m, int opts, const char *file, int line) 40542421Syokota{ 40642421Syokota 40742421Syokota MPASS(curthread != NULL); 40842421Syokota mtx_assert(m, MA_OWNED); 40942421Syokota WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line); 41042421Syokota LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file, 41142421Syokota line); 41242421Syokota _rel_spin_lock(m); 41342421Syokota} 41442421Syokota 41558271Syokota/* 41642421Syokota * The important part of mtx_trylock{,_flags}() 41742421Syokota * Tries to acquire lock `m.' We do NOT handle recursion here; we assume that 41842421Syokota * if we're called, it's because we know we don't already own this lock. 41942421Syokota */ 42042421Syokotaint 42142421Syokota_mtx_trylock(struct mtx *m, int opts, const char *file, int line) 42242421Syokota{ 42342421Syokota int rval; 42442421Syokota 42542421Syokota MPASS(curthread != NULL); 42642421Syokota 42742421Syokota rval = _obtain_lock(m, curthread); 42842421Syokota 42942421Syokota LOCK_LOG_TRY("LOCK", &m->mtx_object, opts, rval, file, line); 43042421Syokota if (rval) { 43142421Syokota /* 43258271Syokota * We do not handle recursion in _mtx_trylock; see the 43342421Syokota * note at the top of the routine. 43442421Syokota */ 43542421Syokota KASSERT(!mtx_recursed(m), 43658271Syokota ("mtx_trylock() called on a recursed mutex")); 43742421Syokota WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK, 43842421Syokota file, line); 43942421Syokota } 44042421Syokota 44142421Syokota return (rval); 44242421Syokota} 44342421Syokota 44442421Syokota/* 44542421Syokota * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock. 44642421Syokota * 44742421Syokota * We call this if the lock is either contested (i.e. we need to go to 44842421Syokota * sleep waiting for it), or if we need to recurse on it. 44942421Syokota */ 45042421Syokotavoid 45142421Syokota_mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line) 45242421Syokota{ 45342421Syokota struct thread *td = curthread; 45442421Syokota 45542421Syokota if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)td) { 45642421Syokota m->mtx_recurse++; 45742421Syokota atomic_set_ptr(&m->mtx_lock, MTX_RECURSED); 45842421Syokota if (LOCK_LOG_TEST(&m->mtx_object, opts)) 45958271Syokota CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m); 46042421Syokota return; 46158271Syokota } 46242421Syokota 46342421Syokota if (LOCK_LOG_TEST(&m->mtx_object, opts)) 46442421Syokota CTR4(KTR_LOCK, 46542421Syokota "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d", 46642421Syokota m->mtx_object.lo_name, (void *)m->mtx_lock, file, line); 46742421Syokota 46842421Syokota while (!_obtain_lock(m, td)) { 46942421Syokota uintptr_t v; 47042421Syokota struct thread *td1; 47142421Syokota 47242421Syokota mtx_lock_spin(&sched_lock); 47342421Syokota /* 47442421Syokota * Check if the lock has been released while spinning for 47542421Syokota * the sched_lock. 47642421Syokota */ 47742421Syokota if ((v = m->mtx_lock) == MTX_UNOWNED) { 47842421Syokota mtx_unlock_spin(&sched_lock); 47942421Syokota continue; 48042421Syokota } 48142421Syokota 48242421Syokota /* 48342421Syokota * The mutex was marked contested on release. This means that 48458271Syokota * there are threads blocked on it. 48542421Syokota */ 48642421Syokota if (v == MTX_CONTESTED) { 48742421Syokota td1 = TAILQ_FIRST(&m->mtx_blocked); 48858271Syokota MPASS(td1 != NULL); 48942421Syokota m->mtx_lock = (uintptr_t)td | MTX_CONTESTED; 49042421Syokota 49142421Syokota if (td1->td_priority < td->td_priority) 49242421Syokota td->td_priority = td1->td_priority; 49342421Syokota mtx_unlock_spin(&sched_lock); 49442421Syokota return; 49542421Syokota } 49642421Syokota 49742421Syokota /* 49842421Syokota * If the mutex isn't already contested and a failure occurs 49942421Syokota * setting the contested bit, the mutex was either released 50042421Syokota * or the state of the MTX_RECURSED bit changed. 50142421Syokota */ 50242421Syokota if ((v & MTX_CONTESTED) == 0 && 50342421Syokota !atomic_cmpset_ptr(&m->mtx_lock, (void *)v, 50442421Syokota (void *)(v | MTX_CONTESTED))) { 50542421Syokota mtx_unlock_spin(&sched_lock); 50642421Syokota continue; 50742421Syokota } 50842421Syokota 50942421Syokota /* 51042421Syokota * We definitely must sleep for this lock. 51158271Syokota */ 51242421Syokota mtx_assert(m, MA_NOTOWNED); 51358271Syokota 51442421Syokota#ifdef notyet 51542421Syokota /* 51642421Syokota * If we're borrowing an interrupted thread's VM context, we 51742421Syokota * must clean up before going to sleep. 51842421Syokota */ 51942421Syokota if (td->td_ithd != NULL) { 52042421Syokota struct ithd *it = td->td_ithd; 52142421Syokota 52242421Syokota if (it->it_interrupted) { 52342421Syokota if (LOCK_LOG_TEST(&m->mtx_object, opts)) 52442421Syokota CTR2(KTR_LOCK, 52542421Syokota "_mtx_lock_sleep: %p interrupted %p", 52642421Syokota it, it->it_interrupted); 52742421Syokota intr_thd_fixup(it); 52842421Syokota } 52942421Syokota } 53042421Syokota#endif 53142421Syokota 53242421Syokota /* 53342421Syokota * Put us on the list of threads blocked on this mutex. 53458271Syokota */ 53542421Syokota if (TAILQ_EMPTY(&m->mtx_blocked)) { 53642421Syokota td1 = mtx_owner(m); 53742421Syokota LIST_INSERT_HEAD(&td1->td_contested, m, mtx_contested); 53842421Syokota TAILQ_INSERT_TAIL(&m->mtx_blocked, td, td_blkq); 53942421Syokota } else { 54042421Syokota TAILQ_FOREACH(td1, &m->mtx_blocked, td_blkq) 54142421Syokota if (td1->td_priority > td->td_priority) 54242421Syokota break; 54342421Syokota if (td1) 54458271Syokota TAILQ_INSERT_BEFORE(td1, td, td_blkq); 54542421Syokota else 54642421Syokota TAILQ_INSERT_TAIL(&m->mtx_blocked, td, td_blkq); 54742421Syokota } 54842421Syokota 54942421Syokota /* 55042421Syokota * Save who we're blocked on. 55142421Syokota */ 55242421Syokota td->td_blocked = m; 55342421Syokota td->td_mtxname = m->mtx_object.lo_name; 55458271Syokota td->td_proc->p_stat = SMTX; 55542421Syokota propagate_priority(td); 55642421Syokota 55742421Syokota if (LOCK_LOG_TEST(&m->mtx_object, opts)) 55842421Syokota CTR3(KTR_LOCK, 55942421Syokota "_mtx_lock_sleep: p %p blocked on [%p] %s", td, m, 56042421Syokota m->mtx_object.lo_name); 56142421Syokota 56242421Syokota td->td_proc->p_stats->p_ru.ru_nvcsw++; 56342421Syokota mi_switch(); 56442421Syokota 56542421Syokota if (LOCK_LOG_TEST(&m->mtx_object, opts)) 56642421Syokota CTR3(KTR_LOCK, 56742421Syokota "_mtx_lock_sleep: p %p free from blocked on [%p] %s", 56842421Syokota td, m, m->mtx_object.lo_name); 56942421Syokota 57042421Syokota mtx_unlock_spin(&sched_lock); 57142421Syokota } 57242421Syokota 57342421Syokota return; 57442421Syokota} 57542421Syokota 57642421Syokota/* 57742421Syokota * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock. 57842421Syokota * 57942421Syokota * This is only called if we need to actually spin for the lock. Recursion 58042421Syokota * is handled inline. 58142421Syokota */ 58242421Syokotavoid 58342421Syokota_mtx_lock_spin(struct mtx *m, int opts, const char *file, int line) 58442421Syokota{ 58542421Syokota int i = 0; 58642421Syokota 58742421Syokota if (LOCK_LOG_TEST(&m->mtx_object, opts)) 58842421Syokota CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m); 58942421Syokota 59042421Syokota for (;;) { 59142421Syokota if (_obtain_lock(m, curthread)) 59242421Syokota break; 59342421Syokota 59442421Syokota /* Give interrupts a chance while we spin. */ 59542421Syokota critical_exit(); 59642421Syokota while (m->mtx_lock != MTX_UNOWNED) { 59742421Syokota if (i++ < 10000000) 59842421Syokota continue; 59942421Syokota if (i++ < 60000000) 60042421Syokota DELAY(1); 60142421Syokota#ifdef DDB 60242421Syokota else if (!db_active) 60342421Syokota#else 60442421Syokota else 60542421Syokota#endif 60642421Syokota panic("spin lock %s held by %p for > 5 seconds", 60742421Syokota m->mtx_object.lo_name, (void *)m->mtx_lock); 60842421Syokota } 60942421Syokota critical_enter(); 61042421Syokota } 61142421Syokota 61242421Syokota if (LOCK_LOG_TEST(&m->mtx_object, opts)) 61342421Syokota CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m); 61442421Syokota 61542421Syokota return; 61642421Syokota} 61742421Syokota 61842421Syokota/* 61942421Syokota * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock. 62042421Syokota * 62142421Syokota * We are only called here if the lock is recursed or contested (i.e. we 62242421Syokota * need to wake up a blocked thread). 62342421Syokota */ 62442421Syokotavoid 62542421Syokota_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line) 62642421Syokota{ 62742421Syokota struct thread *td, *td1; 62842421Syokota struct mtx *m1; 62942421Syokota int pri; 63042421Syokota 63142421Syokota td = curthread; 63242421Syokota 63342421Syokota if (mtx_recursed(m)) { 63442421Syokota if (--(m->mtx_recurse) == 0) 63542421Syokota atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED); 63642421Syokota if (LOCK_LOG_TEST(&m->mtx_object, opts)) 63742421Syokota CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m); 63842421Syokota return; 63942421Syokota } 64042421Syokota 64142421Syokota mtx_lock_spin(&sched_lock); 64242421Syokota if (LOCK_LOG_TEST(&m->mtx_object, opts)) 64342421Syokota CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m); 64442421Syokota 64542421Syokota td1 = TAILQ_FIRST(&m->mtx_blocked); 64642421Syokota MPASS(td->td_proc->p_magic == P_MAGIC); 64742421Syokota MPASS(td1->td_proc->p_magic == P_MAGIC); 64842421Syokota 64942421Syokota TAILQ_REMOVE(&m->mtx_blocked, td1, td_blkq); 65042421Syokota 65142421Syokota if (TAILQ_EMPTY(&m->mtx_blocked)) { 65242421Syokota LIST_REMOVE(m, mtx_contested); 65342421Syokota _release_lock_quick(m); 65442421Syokota if (LOCK_LOG_TEST(&m->mtx_object, opts)) 65542421Syokota CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p not held", m); 65642421Syokota } else 65742421Syokota atomic_store_rel_ptr(&m->mtx_lock, (void *)MTX_CONTESTED); 65842421Syokota 65942421Syokota pri = PRI_MAX; 66042421Syokota LIST_FOREACH(m1, &td->td_contested, mtx_contested) { 66142421Syokota int cp = TAILQ_FIRST(&m1->mtx_blocked)->td_priority; 66242421Syokota if (cp < pri) 66342421Syokota pri = cp; 66442421Syokota } 66542421Syokota 66642421Syokota if (pri > td->td_base_pri) 66742421Syokota pri = td->td_base_pri; 66842421Syokota td->td_priority = pri; 66942421Syokota 67042421Syokota if (LOCK_LOG_TEST(&m->mtx_object, opts)) 67142421Syokota CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p contested setrunqueue %p", 67242421Syokota m, td1); 67342421Syokota 67442421Syokota td1->td_blocked = NULL; 67542421Syokota td1->td_proc->p_stat = SRUN; 67642421Syokota setrunqueue(td1); 67742421Syokota 67842421Syokota if (td->td_critnest == 1 && td1->td_priority < pri) { 67942421Syokota#ifdef notyet 68042421Syokota if (td->td_ithd != NULL) { 68142421Syokota struct ithd *it = td->td_ithd; 68242421Syokota 68358271Syokota if (it->it_interrupted) { 68442421Syokota if (LOCK_LOG_TEST(&m->mtx_object, opts)) 68542421Syokota CTR2(KTR_LOCK, 68642421Syokota "_mtx_unlock_sleep: %p interrupted %p", 68742421Syokota it, it->it_interrupted); 68842421Syokota intr_thd_fixup(it); 68942421Syokota } 69042421Syokota } 69142421Syokota#endif 69242421Syokota setrunqueue(td); 69342421Syokota if (LOCK_LOG_TEST(&m->mtx_object, opts)) 69442421Syokota CTR2(KTR_LOCK, 69542421Syokota "_mtx_unlock_sleep: %p switching out lock=%p", m, 69642421Syokota (void *)m->mtx_lock); 69742421Syokota 69842421Syokota td->td_proc->p_stats->p_ru.ru_nivcsw++; 69942421Syokota mi_switch(); 70042421Syokota if (LOCK_LOG_TEST(&m->mtx_object, opts)) 70142421Syokota CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p", 70242421Syokota m, (void *)m->mtx_lock); 70342421Syokota } 70442421Syokota 70542421Syokota mtx_unlock_spin(&sched_lock); 70642421Syokota 70742421Syokota return; 70858271Syokota} 70942421Syokota 71042421Syokota/* 71142421Syokota * All the unlocking of MTX_SPIN locks is done inline. 71242421Syokota * See the _rel_spin_lock() macro for the details. 71342421Syokota */ 71442421Syokota 71542421Syokota/* 71642421Syokota * The backing function for the INVARIANTS-enabled mtx_assert() 71742421Syokota */ 71842421Syokota#ifdef INVARIANT_SUPPORT 71942421Syokotavoid 72042421Syokota_mtx_assert(struct mtx *m, int what, const char *file, int line) 72142421Syokota{ 72242421Syokota 72342421Syokota if (panicstr != NULL) 72442421Syokota return; 72542421Syokota switch (what) { 72642421Syokota case MA_OWNED: 72742421Syokota case MA_OWNED | MA_RECURSED: 72842421Syokota case MA_OWNED | MA_NOTRECURSED: 72942421Syokota if (!mtx_owned(m)) 73042421Syokota panic("mutex %s not owned at %s:%d", 73158271Syokota m->mtx_object.lo_name, file, line); 73242421Syokota if (mtx_recursed(m)) { 73342421Syokota if ((what & MA_NOTRECURSED) != 0) 73458271Syokota panic("mutex %s recursed at %s:%d", 73558271Syokota m->mtx_object.lo_name, file, line); 73642421Syokota } else if ((what & MA_RECURSED) != 0) { 73742421Syokota panic("mutex %s unrecursed at %s:%d", 73842421Syokota m->mtx_object.lo_name, file, line); 73958271Syokota } 74042421Syokota break; 74142421Syokota case MA_NOTOWNED: 74242421Syokota if (mtx_owned(m)) 74342421Syokota panic("mutex %s owned at %s:%d", 74442421Syokota m->mtx_object.lo_name, file, line); 74542421Syokota break; 74642421Syokota default: 74742421Syokota panic("unknown mtx_assert at %s:%d", file, line); 74842421Syokota } 74942421Syokota} 75042421Syokota#endif 75142421Syokota 75258271Syokota/* 75342421Syokota * The MUTEX_DEBUG-enabled mtx_validate() 75442421Syokota * 75542421Syokota * Most of these checks have been moved off into the LO_INITIALIZED flag 75642421Syokota * maintained by the witness code. 75742421Syokota */ 75842421Syokota#ifdef MUTEX_DEBUG 75942421Syokota 76042421Syokotavoid mtx_validate(struct mtx *); 76142421Syokota 76242421Syokotavoid 76342421Syokotamtx_validate(struct mtx *m) 76442421Syokota{ 76558271Syokota 76642421Syokota/* 76742421Syokota * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly 76858271Syokota * we can re-enable the kernacc() checks. 76958271Syokota */ 77042421Syokota#ifndef __alpha__ 77142421Syokota /* 77242421Syokota * Can't call kernacc() from early init386(), especially when 77358271Syokota * initializing Giant mutex, because some stuff in kernacc() 77442421Syokota * requires Giant itself. 77542421Syokota */ 77642421Syokota if (!cold) 77742421Syokota if (!kernacc((caddr_t)m, sizeof(m), 77842421Syokota VM_PROT_READ | VM_PROT_WRITE)) 77942421Syokota panic("Can't read and write to mutex %p", m); 78042421Syokota#endif 78142421Syokota} 78242421Syokota#endif 78342421Syokota 78442421Syokota/* 78542421Syokota * General init routine used by the MTX_SYSINIT() macro. 78642421Syokota */ 78742421Syokotavoid 78842421Syokotamtx_sysinit(void *arg) 78942421Syokota{ 79042421Syokota struct mtx_args *margs = arg; 79142421Syokota 79258271Syokota mtx_init(margs->ma_mtx, margs->ma_desc, margs->ma_opts); 79342421Syokota} 79458271Syokota 79542421Syokota/* 79642421Syokota * Mutex initialization routine; initialize lock `m' of type contained in 79742421Syokota * `opts' with options contained in `opts' and description `description.' 79842421Syokota */ 79942421Syokotavoid 80042421Syokotamtx_init(struct mtx *m, const char *description, int opts) 80142421Syokota{ 80242421Syokota struct lock_object *lock; 80342421Syokota 80442421Syokota MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE | 80542421Syokota MTX_SLEEPABLE | MTX_NOWITNESS | MTX_DUPOK)) == 0); 80642421Syokota 80742421Syokota#ifdef MUTEX_DEBUG 80842421Syokota /* Diagnostic and error correction */ 80942421Syokota mtx_validate(m); 81042421Syokota#endif 81142421Syokota 81242421Syokota lock = &m->mtx_object; 81342421Syokota KASSERT((lock->lo_flags & LO_INITIALIZED) == 0, 81442421Syokota ("mutex %s %p already initialized", description, m)); 81542421Syokota bzero(m, sizeof(*m)); 81642421Syokota if (opts & MTX_SPIN) 81742421Syokota lock->lo_class = &lock_class_mtx_spin; 81842421Syokota else 81942421Syokota lock->lo_class = &lock_class_mtx_sleep; 82042421Syokota lock->lo_name = description; 82142421Syokota if (opts & MTX_QUIET) 82242421Syokota lock->lo_flags = LO_QUIET; 82342421Syokota if (opts & MTX_RECURSE) 82442421Syokota lock->lo_flags |= LO_RECURSABLE; 82542421Syokota if (opts & MTX_SLEEPABLE) 82642421Syokota lock->lo_flags |= LO_SLEEPABLE; 82742421Syokota if ((opts & MTX_NOWITNESS) == 0) 82842421Syokota lock->lo_flags |= LO_WITNESS; 82942421Syokota if (opts & MTX_DUPOK) 83042421Syokota lock->lo_flags |= LO_DUPOK; 83158271Syokota 83242421Syokota m->mtx_lock = MTX_UNOWNED; 83358271Syokota TAILQ_INIT(&m->mtx_blocked); 83442421Syokota 83542421Syokota LOCK_LOG_INIT(lock, opts); 83642421Syokota 83742421Syokota WITNESS_INIT(lock); 83842421Syokota} 83942421Syokota 84042421Syokota/* 84142421Syokota * Remove lock `m' from all_mtx queue. We don't allow MTX_QUIET to be 84242421Syokota * passed in as a flag here because if the corresponding mtx_init() was 84342421Syokota * called with MTX_QUIET set, then it will already be set in the mutex's 84442421Syokota * flags. 84542421Syokota */ 84642421Syokotavoid 84742421Syokotamtx_destroy(struct mtx *m) 84842421Syokota{ 84942421Syokota 85042421Syokota LOCK_LOG_DESTROY(&m->mtx_object, 0); 85142421Syokota 85242421Syokota if (!mtx_owned(m)) 85342421Syokota MPASS(mtx_unowned(m)); 85442421Syokota else { 85542421Syokota MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0); 85642421Syokota 85742421Syokota /* Tell witness this isn't locked to make it happy. */ 85842421Syokota WITNESS_UNLOCK(&m->mtx_object, LOP_EXCLUSIVE, __FILE__, 85942421Syokota __LINE__); 86042421Syokota } 86142421Syokota 86242421Syokota WITNESS_DESTROY(&m->mtx_object); 86342421Syokota} 86442421Syokota 86542421Syokota/* 86642421Syokota * Encapsulated Giant mutex routines. These routines provide encapsulation 86742421Syokota * control for the Giant mutex, allowing sysctls to be used to turn on and 86842421Syokota * off Giant around certain subsystems. The default value for the sysctls 86958271Syokota * are set to what developers believe is stable and working in regards to 87042421Syokota * the Giant pushdown. Developers should not turn off Giant via these 87158271Syokota * sysctls unless they know what they are doing. 87242421Syokota * 87342421Syokota * Callers of mtx_lock_giant() are expected to pass the return value to an 87442421Syokota * accompanying mtx_unlock_giant() later on. If multiple subsystems are 87542421Syokota * effected by a Giant wrap, all related sysctl variables must be zero for 87642421Syokota * the subsystem call to operate without Giant (as determined by the caller). 87742421Syokota */ 87842421Syokota 87942421SyokotaSYSCTL_NODE(_kern, OID_AUTO, giant, CTLFLAG_RD, NULL, "Giant mutex manipulation"); 88042421Syokota 88142421Syokotastatic int kern_giant_all = 0; 88242421SyokotaSYSCTL_INT(_kern_giant, OID_AUTO, all, CTLFLAG_RW, &kern_giant_all, 0, ""); 88342421Syokota 88442421Syokotaint kern_giant_proc = 1; /* Giant around PROC locks */ 88542421Syokotaint kern_giant_file = 1; /* Giant around struct file & filedesc */ 88642421Syokotaint kern_giant_ucred = 1; /* Giant around ucred */ 88742421SyokotaSYSCTL_INT(_kern_giant, OID_AUTO, proc, CTLFLAG_RW, &kern_giant_proc, 0, ""); 88842421SyokotaSYSCTL_INT(_kern_giant, OID_AUTO, file, CTLFLAG_RW, &kern_giant_file, 0, ""); 88942421SyokotaSYSCTL_INT(_kern_giant, OID_AUTO, ucred, CTLFLAG_RW, &kern_giant_ucred, 0, ""); 89042421Syokota 89142421Syokotaint 89242421Syokotamtx_lock_giant(int sysctlvar) 89342421Syokota{ 89442421Syokota if (sysctlvar || kern_giant_all) { 89542421Syokota mtx_lock(&Giant); 89642421Syokota return(1); 89742421Syokota } 89842421Syokota return(0); 89942421Syokota} 90042421Syokota 90142421Syokotavoid 90242421Syokotamtx_unlock_giant(int s) 90342421Syokota{ 90442421Syokota if (s) 90542421Syokota mtx_unlock(&Giant); 90642421Syokota} 90742421Syokota 90842421Syokota