kern_mutex.c revision 144806
119370Spst/*- 298944Sobrien * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved. 3130803Smarcel * 498944Sobrien * Redistribution and use in source and binary forms, with or without 519370Spst * modification, are permitted provided that the following conditions 698944Sobrien * are met: 719370Spst * 1. Redistributions of source code must retain the above copyright 898944Sobrien * notice, this list of conditions and the following disclaimer. 998944Sobrien * 2. Redistributions in binary form must reproduce the above copyright 1098944Sobrien * notice, this list of conditions and the following disclaimer in the 1198944Sobrien * documentation and/or other materials provided with the distribution. 1219370Spst * 3. Berkeley Software Design Inc's name may not be used to endorse or 1398944Sobrien * promote products derived from this software without specific prior 1498944Sobrien * written permission. 1598944Sobrien * 1698944Sobrien * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND 1719370Spst * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1898944Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1998944Sobrien * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE 2098944Sobrien * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2198944Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2219370Spst * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2319370Spst * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2446283Sdfr * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2519370Spst * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2646283Sdfr * SUCH DAMAGE. 2746283Sdfr * 2819370Spst * from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $ 2919370Spst * and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $ 3019370Spst */ 3119370Spst 3219370Spst/* 3319370Spst * Machine independent bits of mutex implementation. 3419370Spst */ 3546283Sdfr 3698944Sobrien#include <sys/cdefs.h> 3798944Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_mutex.c 144806 2005-04-08 14:14:09Z glebius $"); 38130803Smarcel 3919370Spst#include "opt_adaptive_mutexes.h" 4046283Sdfr#include "opt_ddb.h" 4198944Sobrien#include "opt_mprof.h" 4246283Sdfr#include "opt_mutex_wake_all.h" 4319370Spst#include "opt_sched.h" 4419370Spst 45130803Smarcel#include <sys/param.h> 46130803Smarcel#include <sys/systm.h> 47130803Smarcel#include <sys/bus.h> 48130803Smarcel#include <sys/kdb.h> 4919370Spst#include <sys/kernel.h> 5019370Spst#include <sys/ktr.h> 5119370Spst#include <sys/lock.h> 5219370Spst#include <sys/malloc.h> 5398944Sobrien#include <sys/mutex.h> 5419370Spst#include <sys/proc.h> 5546283Sdfr#include <sys/resourcevar.h> 5698944Sobrien#include <sys/sched.h> 5719370Spst#include <sys/sbuf.h> 5819370Spst#include <sys/sysctl.h> 5919370Spst#include <sys/turnstile.h> 6019370Spst#include <sys/vmmeter.h> 6119370Spst 6219370Spst#include <machine/atomic.h> 6346283Sdfr#include <machine/bus.h> 6446283Sdfr#include <machine/clock.h> 6546283Sdfr#include <machine/cpu.h> 6698944Sobrien 6746283Sdfr#include <ddb/ddb.h> 6846283Sdfr 6946283Sdfr#include <vm/vm.h> 7046283Sdfr#include <vm/vm_extern.h> 7198944Sobrien 7246283Sdfr/* 7398944Sobrien * Force MUTEX_WAKE_ALL for now. 7446283Sdfr * single thread wakeup needs fixes to avoid race conditions with 7546283Sdfr * priority inheritance. 7698944Sobrien */ 7746283Sdfr#ifndef MUTEX_WAKE_ALL 7846283Sdfr#define MUTEX_WAKE_ALL 7946283Sdfr#endif 8046283Sdfr 8146283Sdfr/* 8246283Sdfr * Internal utility macros. 8346283Sdfr */ 8446283Sdfr#define mtx_unowned(m) ((m)->mtx_lock == MTX_UNOWNED) 8546283Sdfr 8646283Sdfr#define mtx_owner(m) (mtx_unowned((m)) ? NULL \ 8746283Sdfr : (struct thread *)((m)->mtx_lock & MTX_FLAGMASK)) 8898944Sobrien 8946283Sdfr/* 9098944Sobrien * Lock classes for sleep and spin mutexes. 9146283Sdfr */ 9246283Sdfrstruct lock_class lock_class_mtx_sleep = { 9346283Sdfr "sleep mutex", 9446283Sdfr LC_SLEEPLOCK | LC_RECURSABLE 9546283Sdfr}; 9646283Sdfrstruct lock_class lock_class_mtx_spin = { 9746283Sdfr "spin mutex", 9846283Sdfr LC_SPINLOCK | LC_RECURSABLE 9946283Sdfr}; 10046283Sdfr 10146283Sdfr/* 10246283Sdfr * System-wide mutexes 10346283Sdfr */ 10446283Sdfrstruct mtx sched_lock; 10546283Sdfrstruct mtx Giant; 10646283Sdfr 10798944Sobrien#ifdef MUTEX_PROFILING 10819370SpstSYSCTL_NODE(_debug, OID_AUTO, mutex, CTLFLAG_RD, NULL, "mutex debugging"); 10919370SpstSYSCTL_NODE(_debug_mutex, OID_AUTO, prof, CTLFLAG_RD, NULL, "mutex profiling"); 11019370Spststatic int mutex_prof_enable = 0; 11119370SpstSYSCTL_INT(_debug_mutex_prof, OID_AUTO, enable, CTLFLAG_RW, 11219370Spst &mutex_prof_enable, 0, "Enable tracing of mutex holdtime"); 11319370Spst 11419370Spststruct mutex_prof { 11519370Spst const char *name; 11619370Spst const char *file; 11719370Spst int line; 11819370Spst uintmax_t cnt_max; 11919370Spst uintmax_t cnt_tot; 12019370Spst uintmax_t cnt_cur; 12119370Spst uintmax_t cnt_contest_holding; 12219370Spst uintmax_t cnt_contest_locking; 12319370Spst struct mutex_prof *next; 12419370Spst}; 12519370Spst 12619370Spst/* 12798944Sobrien * mprof_buf is a static pool of profiling records to avoid possible 12898944Sobrien * reentrance of the memory allocation functions. 12919370Spst * 13019370Spst * Note: NUM_MPROF_BUFFERS must be smaller than MPROF_HASH_SIZE. 13198944Sobrien */ 13219370Spst#ifdef MPROF_BUFFERS 13319370Spst#define NUM_MPROF_BUFFERS MPROF_BUFFERS 13498944Sobrien#else 13519370Spst#define NUM_MPROF_BUFFERS 1000 13619370Spst#endif 13719370Spststatic struct mutex_prof mprof_buf[NUM_MPROF_BUFFERS]; 138130803Smarcelstatic int first_free_mprof_buf; 139130803Smarcel#ifndef MPROF_HASH_SIZE 14019370Spst#define MPROF_HASH_SIZE 1009 14198944Sobrien#endif 14219370Spst#if NUM_MPROF_BUFFERS >= MPROF_HASH_SIZE 14319370Spst#error MPROF_BUFFERS must be larger than MPROF_HASH_SIZE 14419370Spst#endif 14519370Spststatic struct mutex_prof *mprof_hash[MPROF_HASH_SIZE]; 14619370Spst/* SWAG: sbuf size = avg stat. line size * number of locks */ 14798944Sobrien#define MPROF_SBUF_SIZE 256 * 400 14819370Spst 14919370Spststatic int mutex_prof_acquisitions; 15098944SobrienSYSCTL_INT(_debug_mutex_prof, OID_AUTO, acquisitions, CTLFLAG_RD, 15119370Spst &mutex_prof_acquisitions, 0, "Number of mutex acquistions recorded"); 15298944Sobrienstatic int mutex_prof_records; 15398944SobrienSYSCTL_INT(_debug_mutex_prof, OID_AUTO, records, CTLFLAG_RD, 15419370Spst &mutex_prof_records, 0, "Number of profiling records"); 15519370Spststatic int mutex_prof_maxrecords = NUM_MPROF_BUFFERS; 15619370SpstSYSCTL_INT(_debug_mutex_prof, OID_AUTO, maxrecords, CTLFLAG_RD, 15798944Sobrien &mutex_prof_maxrecords, 0, "Maximum number of profiling records"); 15898944Sobrienstatic int mutex_prof_rejected; 15998944SobrienSYSCTL_INT(_debug_mutex_prof, OID_AUTO, rejected, CTLFLAG_RD, 16098944Sobrien &mutex_prof_rejected, 0, "Number of rejected profiling records"); 16198944Sobrienstatic int mutex_prof_hashsize = MPROF_HASH_SIZE; 16298944SobrienSYSCTL_INT(_debug_mutex_prof, OID_AUTO, hashsize, CTLFLAG_RD, 16319370Spst &mutex_prof_hashsize, 0, "Hash size"); 16419370Spststatic int mutex_prof_collisions = 0; 16546283SdfrSYSCTL_INT(_debug_mutex_prof, OID_AUTO, collisions, CTLFLAG_RD, 16646283Sdfr &mutex_prof_collisions, 0, "Number of hash collisions"); 16746283Sdfr 16898944Sobrien/* 16998944Sobrien * mprof_mtx protects the profiling buffers and the hash. 17098944Sobrien */ 17198944Sobrienstatic struct mtx mprof_mtx; 17246283SdfrMTX_SYSINIT(mprof, &mprof_mtx, "mutex profiling lock", MTX_SPIN | MTX_QUIET); 17319370Spst 17419370Spststatic u_int64_t 17519370Spstnanoseconds(void) 17698944Sobrien{ 17719370Spst struct timespec tv; 17819370Spst 17919370Spst nanotime(&tv); 18019370Spst return (tv.tv_sec * (u_int64_t)1000000000 + tv.tv_nsec); 18119370Spst} 18219370Spst 18319370Spststatic int 18498944Sobriendump_mutex_prof_stats(SYSCTL_HANDLER_ARGS) 18519370Spst{ 18619370Spst struct sbuf *sb; 18719370Spst int error, i; 18819370Spst static int multiplier = 1; 18919370Spst 19019370Spst if (first_free_mprof_buf == 0) 19119370Spst return (SYSCTL_OUT(req, "No locking recorded", 19298944Sobrien sizeof("No locking recorded"))); 19319370Spst 19419370Spstretry_sbufops: 195130803Smarcel sb = sbuf_new(NULL, NULL, MPROF_SBUF_SIZE * multiplier, SBUF_FIXEDLEN); 196130803Smarcel sbuf_printf(sb, "\n%6s %12s %11s %5s %12s %12s %s\n", 197130803Smarcel "max", "total", "count", "avg", "cnt_hold", "cnt_lock", "name"); 198130803Smarcel /* 199130803Smarcel * XXX this spinlock seems to be by far the largest perpetrator 200130803Smarcel * of spinlock latency (1.6 msec on an Athlon1600 was recorded 201130803Smarcel * even before I pessimized it further by moving the average 202130803Smarcel * computation here). 203130803Smarcel */ 204130803Smarcel mtx_lock_spin(&mprof_mtx); 205130803Smarcel for (i = 0; i < first_free_mprof_buf; ++i) { 206130803Smarcel sbuf_printf(sb, "%6ju %12ju %11ju %5ju %12ju %12ju %s:%d (%s)\n", 207130803Smarcel mprof_buf[i].cnt_max / 1000, 208130803Smarcel mprof_buf[i].cnt_tot / 1000, 209130803Smarcel mprof_buf[i].cnt_cur, 210130803Smarcel mprof_buf[i].cnt_cur == 0 ? (uintmax_t)0 : 211130803Smarcel mprof_buf[i].cnt_tot / (mprof_buf[i].cnt_cur * 1000), 212130803Smarcel mprof_buf[i].cnt_contest_holding, 213130803Smarcel mprof_buf[i].cnt_contest_locking, 214130803Smarcel mprof_buf[i].file, mprof_buf[i].line, mprof_buf[i].name); 215130803Smarcel if (sbuf_overflowed(sb)) { 216130803Smarcel mtx_unlock_spin(&mprof_mtx); 217130803Smarcel sbuf_delete(sb); 218130803Smarcel multiplier++; 219130803Smarcel goto retry_sbufops; 220130803Smarcel } 221130803Smarcel } 222130803Smarcel mtx_unlock_spin(&mprof_mtx); 223130803Smarcel sbuf_finish(sb); 224130803Smarcel error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 225130803Smarcel sbuf_delete(sb); 226130803Smarcel return (error); 227130803Smarcel} 228130803SmarcelSYSCTL_PROC(_debug_mutex_prof, OID_AUTO, stats, CTLTYPE_STRING | CTLFLAG_RD, 229130803Smarcel NULL, 0, dump_mutex_prof_stats, "A", "Mutex profiling statistics"); 230130803Smarcel 23119370Spststatic int 23219370Spstreset_mutex_prof_stats(SYSCTL_HANDLER_ARGS) 23319370Spst{ 23498944Sobrien int error, v; 23519370Spst 23619370Spst if (first_free_mprof_buf == 0) 23719370Spst return (0); 23819370Spst 23919370Spst v = 0; 24019370Spst error = sysctl_handle_int(oidp, &v, 0, req); 24119370Spst if (error) 242130803Smarcel return (error); 243130803Smarcel if (req->newptr == NULL) 244130803Smarcel return (error); 245130803Smarcel if (v == 0) 246130803Smarcel return (0); 247130803Smarcel 248130803Smarcel mtx_lock_spin(&mprof_mtx); 249130803Smarcel bzero(mprof_buf, sizeof(*mprof_buf) * first_free_mprof_buf); 250130803Smarcel bzero(mprof_hash, sizeof(struct mtx *) * MPROF_HASH_SIZE); 251130803Smarcel first_free_mprof_buf = 0; 252130803Smarcel mtx_unlock_spin(&mprof_mtx); 253130803Smarcel return (0); 254130803Smarcel} 255130803SmarcelSYSCTL_PROC(_debug_mutex_prof, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_RW, 256130803Smarcel NULL, 0, reset_mutex_prof_stats, "I", "Reset mutex profiling statistics"); 257130803Smarcel#endif 258130803Smarcel 259130803Smarcel/* 26019370Spst * Function versions of the inlined __mtx_* macros. These are used by 26119370Spst * modules and can also be called from assembly language if needed. 26219370Spst */ 26319370Spstvoid 26498944Sobrien_mtx_lock_flags(struct mtx *m, int opts, const char *file, int line) 26519370Spst{ 26619370Spst 26719370Spst MPASS(curthread != NULL); 26898944Sobrien KASSERT(m->mtx_object.lo_class == &lock_class_mtx_sleep, 26919370Spst ("mtx_lock() of spin mutex %s @ %s:%d", m->mtx_object.lo_name, 27019370Spst file, line)); 27119370Spst WITNESS_CHECKORDER(&m->mtx_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE, 27219370Spst file, line); 27398944Sobrien _get_sleep_lock(m, curthread, opts, file, line); 27498944Sobrien LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file, 27519370Spst line); 27619370Spst WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line); 27719370Spst#ifdef MUTEX_PROFILING 27819370Spst /* don't reset the timer when/if recursing */ 27919370Spst if (m->mtx_acqtime == 0) { 28019370Spst m->mtx_filename = file; 28119370Spst m->mtx_lineno = line; 28219370Spst m->mtx_acqtime = mutex_prof_enable ? nanoseconds() : 0; 28319370Spst ++mutex_prof_acquisitions; 28419370Spst } 28519370Spst#endif 28619370Spst} 28719370Spst 28819370Spstvoid 28919370Spst_mtx_unlock_flags(struct mtx *m, int opts, const char *file, int line) 29046283Sdfr{ 29146283Sdfr 29219370Spst MPASS(curthread != NULL); 29319370Spst KASSERT(m->mtx_object.lo_class == &lock_class_mtx_sleep, 29419370Spst ("mtx_unlock() of spin mutex %s @ %s:%d", m->mtx_object.lo_name, 29519370Spst file, line)); 29619370Spst WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line); 29719370Spst LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file, 29819370Spst line); 29919370Spst mtx_assert(m, MA_OWNED); 30019370Spst#ifdef MUTEX_PROFILING 30119370Spst if (m->mtx_acqtime != 0) { 30219370Spst static const char *unknown = "(unknown)"; 30319370Spst struct mutex_prof *mpp; 30446283Sdfr u_int64_t acqtime, now; 305130803Smarcel const char *p, *q; 306130803Smarcel volatile u_int hash; 307130803Smarcel 308130803Smarcel now = nanoseconds(); 30919370Spst acqtime = m->mtx_acqtime; 31046283Sdfr m->mtx_acqtime = 0; 311130803Smarcel if (now <= acqtime) 312130803Smarcel goto out; 313130803Smarcel for (p = m->mtx_filename; 314130803Smarcel p != NULL && strncmp(p, "../", 3) == 0; p += 3) 315130803Smarcel /* nothing */ ; 31619370Spst if (p == NULL || *p == '\0') 31719370Spst p = unknown; 31819370Spst for (hash = m->mtx_lineno, q = p; *q != '\0'; ++q) 31919370Spst hash = (hash * 2 + *q) % MPROF_HASH_SIZE; 32019370Spst mtx_lock_spin(&mprof_mtx); 32119370Spst for (mpp = mprof_hash[hash]; mpp != NULL; mpp = mpp->next) 32219370Spst if (mpp->line == m->mtx_lineno && 32398944Sobrien strcmp(mpp->file, p) == 0) 32419370Spst break; 32519370Spst if (mpp == NULL) { 326130803Smarcel /* Just exit if we cannot get a trace buffer */ 327130803Smarcel if (first_free_mprof_buf >= NUM_MPROF_BUFFERS) { 328130803Smarcel ++mutex_prof_rejected; 329130803Smarcel goto unlock; 33019370Spst } 33119370Spst mpp = &mprof_buf[first_free_mprof_buf++]; 33219370Spst mpp->name = mtx_name(m); 33398944Sobrien mpp->file = p; 33419370Spst mpp->line = m->mtx_lineno; 33519370Spst mpp->next = mprof_hash[hash]; 33619370Spst if (mprof_hash[hash] != NULL) 33719370Spst ++mutex_prof_collisions; 33819370Spst mprof_hash[hash] = mpp; 33919370Spst ++mutex_prof_records; 34019370Spst } 34119370Spst /* 34219370Spst * Record if the mutex has been held longer now than ever 34398944Sobrien * before. 34419370Spst */ 34598944Sobrien if (now - acqtime > mpp->cnt_max) 34698944Sobrien mpp->cnt_max = now - acqtime; 34719370Spst mpp->cnt_tot += now - acqtime; 34898944Sobrien mpp->cnt_cur++; 34998944Sobrien /* 35019370Spst * There's a small race, really we should cmpxchg 35198944Sobrien * 0 with the current value, but that would bill 35298944Sobrien * the contention to the wrong lock instance if 35398944Sobrien * it followed this also. 35419370Spst */ 35598944Sobrien mpp->cnt_contest_holding += m->mtx_contest_holding; 35698944Sobrien m->mtx_contest_holding = 0; 35798944Sobrien mpp->cnt_contest_locking += m->mtx_contest_locking; 35898944Sobrien m->mtx_contest_locking = 0; 35998944Sobrienunlock: 36098944Sobrien mtx_unlock_spin(&mprof_mtx); 36198944Sobrien } 36298944Sobrienout: 36398944Sobrien#endif 36498944Sobrien _rel_sleep_lock(m, curthread, opts, file, line); 36598944Sobrien} 366130803Smarcel 36719370Spstvoid 36819370Spst_mtx_lock_spin_flags(struct mtx *m, int opts, const char *file, int line) 36919370Spst{ 37019370Spst 371130803Smarcel MPASS(curthread != NULL); 372130803Smarcel KASSERT(m->mtx_object.lo_class == &lock_class_mtx_spin, 373130803Smarcel ("mtx_lock_spin() of sleep mutex %s @ %s:%d", 374130803Smarcel m->mtx_object.lo_name, file, line)); 375130803Smarcel WITNESS_CHECKORDER(&m->mtx_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE, 376130803Smarcel file, line); 377130803Smarcel _get_spin_lock(m, curthread, opts, file, line); 378130803Smarcel LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file, 379130803Smarcel line); 380130803Smarcel WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line); 381130803Smarcel} 382130803Smarcel 383130803Smarcelvoid 384130803Smarcel_mtx_unlock_spin_flags(struct mtx *m, int opts, const char *file, int line) 385130803Smarcel{ 386130803Smarcel 387130803Smarcel MPASS(curthread != NULL); 388130803Smarcel KASSERT(m->mtx_object.lo_class == &lock_class_mtx_spin, 38998944Sobrien ("mtx_unlock_spin() of sleep mutex %s @ %s:%d", 39019370Spst m->mtx_object.lo_name, file, line)); 39119370Spst WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line); 39219370Spst LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file, 39319370Spst line); 39419370Spst mtx_assert(m, MA_OWNED); 39598944Sobrien _rel_spin_lock(m); 39619370Spst} 39719370Spst 39819370Spst/* 39919370Spst * The important part of mtx_trylock{,_flags}() 40019370Spst * Tries to acquire lock `m.' If this function is called on a mutex that 40119370Spst * is already owned, it will recursively acquire the lock. 40219370Spst */ 40398944Sobrienint 40419370Spst_mtx_trylock(struct mtx *m, int opts, const char *file, int line) 40519370Spst{ 40619370Spst int rval; 40719370Spst 40819370Spst MPASS(curthread != NULL); 40919370Spst 41019370Spst if (mtx_owned(m) && (m->mtx_object.lo_flags & LO_RECURSABLE) != 0) { 41119370Spst m->mtx_recurse++; 41219370Spst atomic_set_ptr(&m->mtx_lock, MTX_RECURSED); 41319370Spst rval = 1; 41419370Spst } else 41519370Spst rval = _obtain_lock(m, curthread); 41619370Spst 41719370Spst LOCK_LOG_TRY("LOCK", &m->mtx_object, opts, rval, file, line); 41819370Spst if (rval) 41919370Spst WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK, 42019370Spst file, line); 42119370Spst 42219370Spst return (rval); 42319370Spst} 42419370Spst 42519370Spst/* 42619370Spst * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock. 42719370Spst * 42819370Spst * We call this if the lock is either contested (i.e. we need to go to 42919370Spst * sleep waiting for it), or if we need to recurse on it. 43019370Spst */ 43119370Spstvoid 43219370Spst_mtx_lock_sleep(struct mtx *m, struct thread *td, int opts, const char *file, 43398944Sobrien int line) 43419370Spst{ 43519370Spst#if defined(SMP) && !defined(NO_ADAPTIVE_MUTEXES) 43619370Spst struct thread *owner; 43719370Spst#endif 43819370Spst uintptr_t v; 43919370Spst#ifdef KTR 44019370Spst int cont_logged = 0; 44119370Spst#endif 44219370Spst#ifdef MUTEX_PROFILING 44319370Spst int contested; 44419370Spst#endif 44519370Spst 44619370Spst if (mtx_owned(m)) { 44719370Spst KASSERT((m->mtx_object.lo_flags & LO_RECURSABLE) != 0, 44846283Sdfr ("_mtx_lock_sleep: recursed on non-recursive mutex %s @ %s:%d\n", 449130803Smarcel m->mtx_object.lo_name, file, line)); 450130803Smarcel m->mtx_recurse++; 451130803Smarcel atomic_set_ptr(&m->mtx_lock, MTX_RECURSED); 452130803Smarcel if (LOCK_LOG_TEST(&m->mtx_object, opts)) 453130803Smarcel CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m); 45419370Spst return; 45519370Spst } 45619370Spst 45719370Spst if (LOCK_LOG_TEST(&m->mtx_object, opts)) 45819370Spst CTR4(KTR_LOCK, 45919370Spst "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d", 46019370Spst m->mtx_object.lo_name, (void *)m->mtx_lock, file, line); 46119370Spst 46219370Spst#ifdef MUTEX_PROFILING 46319370Spst contested = 0; 46419370Spst#endif 46519370Spst while (!_obtain_lock(m, td)) { 46619370Spst#ifdef MUTEX_PROFILING 46719370Spst contested = 1; 46819370Spst atomic_add_int(&m->mtx_contest_holding, 1); 46919370Spst#endif 47019370Spst turnstile_lock(&m->mtx_object); 47119370Spst v = m->mtx_lock; 47219370Spst 47398944Sobrien /* 47419370Spst * Check if the lock has been released while spinning for 47519370Spst * the turnstile chain lock. 47619370Spst */ 47719370Spst if (v == MTX_UNOWNED) { 47819370Spst turnstile_release(&m->mtx_object); 47919370Spst cpu_spinwait(); 48019370Spst continue; 48119370Spst } 48246283Sdfr 483130803Smarcel#ifdef MUTEX_WAKE_ALL 484130803Smarcel MPASS(v != MTX_CONTESTED); 485130803Smarcel#else 486130803Smarcel /* 48746283Sdfr * The mutex was marked contested on release. This means that 48819370Spst * there are other threads blocked on it. Grab ownership of 48946283Sdfr * it and propagate its priority to the current thread if 49046283Sdfr * necessary. 491130803Smarcel */ 492130803Smarcel if (v == MTX_CONTESTED) { 493130803Smarcel m->mtx_lock = (uintptr_t)td | MTX_CONTESTED; 494130803Smarcel turnstile_claim(&m->mtx_object); 495130803Smarcel break; 49619370Spst } 49719370Spst#endif 49819370Spst 49919370Spst /* 50098944Sobrien * If the mutex isn't already contested and a failure occurs 50119370Spst * setting the contested bit, the mutex was either released 50219370Spst * or the state of the MTX_RECURSED bit changed. 50319370Spst */ 50419370Spst if ((v & MTX_CONTESTED) == 0 && 50519370Spst !atomic_cmpset_ptr(&m->mtx_lock, (void *)v, 50619370Spst (void *)(v | MTX_CONTESTED))) { 50719370Spst turnstile_release(&m->mtx_object); 50898944Sobrien cpu_spinwait(); 50919370Spst continue; 510130803Smarcel } 51119370Spst 512130803Smarcel#if defined(SMP) && !defined(NO_ADAPTIVE_MUTEXES) 513130803Smarcel /* 51419370Spst * If the current owner of the lock is executing on another 51519370Spst * CPU, spin instead of blocking. 51619370Spst */ 51719370Spst owner = (struct thread *)(v & MTX_FLAGMASK); 51898944Sobrien#ifdef ADAPTIVE_GIANT 51919370Spst if (TD_IS_RUNNING(owner)) { 52019370Spst#else 52119370Spst if (m != &Giant && TD_IS_RUNNING(owner)) { 52219370Spst#endif 52319370Spst turnstile_release(&m->mtx_object); 52419370Spst while (mtx_owner(m) == owner && TD_IS_RUNNING(owner)) { 52519370Spst cpu_spinwait(); 52619370Spst } 52719370Spst continue; 52819370Spst } 52919370Spst#endif /* SMP && !NO_ADAPTIVE_MUTEXES */ 53019370Spst 53119370Spst /* 53219370Spst * We definitely must sleep for this lock. 53319370Spst */ 53419370Spst mtx_assert(m, MA_NOTOWNED); 53598944Sobrien 53619370Spst#ifdef KTR 53719370Spst if (!cont_logged) { 53819370Spst CTR6(KTR_CONTENTION, 53919370Spst "contention: %p at %s:%d wants %s, taken by %s:%d", 54019370Spst td, file, line, m->mtx_object.lo_name, 54119370Spst WITNESS_FILE(&m->mtx_object), 54219370Spst WITNESS_LINE(&m->mtx_object)); 54319370Spst cont_logged = 1; 54419370Spst } 54519370Spst#endif 54619370Spst 54798944Sobrien /* 54819370Spst * Block on the turnstile. 54998944Sobrien */ 55098944Sobrien turnstile_wait(&m->mtx_object, mtx_owner(m)); 55198944Sobrien } 55298944Sobrien 55398944Sobrien#ifdef KTR 55419370Spst if (cont_logged) { 55519370Spst CTR4(KTR_CONTENTION, 556130803Smarcel "contention end: %s acquired by %p at %s:%d", 557130803Smarcel m->mtx_object.lo_name, td, file, line); 55819370Spst } 55919370Spst#endif 56019370Spst#ifdef MUTEX_PROFILING 56119370Spst if (contested) 56219370Spst m->mtx_contest_locking++; 56319370Spst m->mtx_contest_holding = 0; 564130803Smarcel#endif 565130803Smarcel return; 566130803Smarcel} 567130803Smarcel 568130803Smarcel#ifdef SMP 569130803Smarcel/* 570130803Smarcel * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock. 571130803Smarcel * 572130803Smarcel * This is only called if we need to actually spin for the lock. Recursion 573130803Smarcel * is handled inline. 574130803Smarcel */ 575130803Smarcelvoid 576130803Smarcel_mtx_lock_spin(struct mtx *m, struct thread *td, int opts, const char *file, 577130803Smarcel int line) 578130803Smarcel{ 579130803Smarcel int i = 0; 580130803Smarcel 581130803Smarcel if (LOCK_LOG_TEST(&m->mtx_object, opts)) 582130803Smarcel CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m); 583130803Smarcel 584130803Smarcel for (;;) { 585130803Smarcel if (_obtain_lock(m, td)) 586130803Smarcel break; 587130803Smarcel 588130803Smarcel /* Give interrupts a chance while we spin. */ 589130803Smarcel spinlock_exit(); 590130803Smarcel while (m->mtx_lock != MTX_UNOWNED) { 591130803Smarcel if (i++ < 10000000) { 592130803Smarcel cpu_spinwait(); 593130803Smarcel continue; 594130803Smarcel } 595130803Smarcel if (i < 60000000) 596130803Smarcel DELAY(1); 59798944Sobrien else if (!kdb_active) { 59819370Spst printf("spin lock %s held by %p for > 5 seconds\n", 59998944Sobrien m->mtx_object.lo_name, (void *)m->mtx_lock); 60019370Spst#ifdef WITNESS 60119370Spst witness_display_spinlock(&m->mtx_object, 60219370Spst mtx_owner(m)); 60398944Sobrien#endif 60498944Sobrien panic("spin lock held too long"); 60598944Sobrien } 60619370Spst cpu_spinwait(); 60798944Sobrien } 60819370Spst spinlock_enter(); 60919370Spst } 61019370Spst 61119370Spst if (LOCK_LOG_TEST(&m->mtx_object, opts)) 61298944Sobrien CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m); 61398944Sobrien 61498944Sobrien return; 61598944Sobrien} 61698944Sobrien#endif /* SMP */ 61798944Sobrien 61819370Spst/* 619130803Smarcel * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock. 620130803Smarcel * 621130803Smarcel * We are only called here if the lock is recursed or contested (i.e. we 622130803Smarcel * need to wake up a blocked thread). 623130803Smarcel */ 624130803Smarcelvoid 625130803Smarcel_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line) 626130803Smarcel{ 627130803Smarcel struct turnstile *ts; 62819370Spst#ifndef PREEMPTION 629130803Smarcel struct thread *td, *td1; 630130803Smarcel#endif 631130803Smarcel 632130803Smarcel if (mtx_recursed(m)) { 633130803Smarcel if (--(m->mtx_recurse) == 0) 634130803Smarcel atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED); 635130803Smarcel if (LOCK_LOG_TEST(&m->mtx_object, opts)) 636130803Smarcel CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m); 637130803Smarcel return; 63819370Spst } 639130803Smarcel 64019370Spst turnstile_lock(&m->mtx_object); 64119370Spst ts = turnstile_lookup(&m->mtx_object); 64219370Spst if (LOCK_LOG_TEST(&m->mtx_object, opts)) 64319370Spst CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m); 64419370Spst 64519370Spst#if defined(SMP) && !defined(NO_ADAPTIVE_MUTEXES) 64619370Spst if (ts == NULL) { 64719370Spst _release_lock_quick(m); 64819370Spst if (LOCK_LOG_TEST(&m->mtx_object, opts)) 64919370Spst CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p no sleepers", m); 65019370Spst turnstile_release(&m->mtx_object); 65198944Sobrien return; 65219370Spst } 65319370Spst#else 65419370Spst MPASS(ts != NULL); 65519370Spst#endif 65619370Spst#ifndef PREEMPTION 65719370Spst /* XXX */ 65819370Spst td1 = turnstile_head(ts); 65919370Spst#endif 66019370Spst#ifdef MUTEX_WAKE_ALL 66119370Spst turnstile_broadcast(ts); 66219370Spst _release_lock_quick(m); 66319370Spst#else 66419370Spst if (turnstile_signal(ts)) { 66519370Spst _release_lock_quick(m); 66619370Spst if (LOCK_LOG_TEST(&m->mtx_object, opts)) 66719370Spst CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p not held", m); 66819370Spst } else { 66919370Spst m->mtx_lock = MTX_CONTESTED; 67019370Spst if (LOCK_LOG_TEST(&m->mtx_object, opts)) 67119370Spst CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p still contested", 67219370Spst m); 67398944Sobrien } 67498944Sobrien#endif 67519370Spst turnstile_unpend(ts); 67646283Sdfr 67719370Spst#ifndef PREEMPTION 67819370Spst /* 67919370Spst * XXX: This is just a hack until preemption is done. However, 68019370Spst * once preemption is done we need to either wrap the 68119370Spst * turnstile_signal() and release of the actual lock in an 68219370Spst * extra critical section or change the preemption code to 68319370Spst * always just set a flag and never do instant-preempts. 68419370Spst */ 68519370Spst td = curthread; 68619370Spst if (td->td_critnest > 0 || td1->td_priority >= td->td_priority) 68719370Spst return; 68819370Spst mtx_lock_spin(&sched_lock); 68919370Spst if (!TD_IS_RUNNING(td1)) { 69019370Spst#ifdef notyet 69119370Spst if (td->td_ithd != NULL) { 69219370Spst struct ithd *it = td->td_ithd; 69319370Spst 694130803Smarcel if (it->it_interrupted) { 695130803Smarcel if (LOCK_LOG_TEST(&m->mtx_object, opts)) 696130803Smarcel CTR2(KTR_LOCK, 697130803Smarcel "_mtx_unlock_sleep: %p interrupted %p", 698130803Smarcel it, it->it_interrupted); 699130803Smarcel intr_thd_fixup(it); 700130803Smarcel } 701130803Smarcel } 702130803Smarcel#endif 703130803Smarcel if (LOCK_LOG_TEST(&m->mtx_object, opts)) 704130803Smarcel CTR2(KTR_LOCK, 705130803Smarcel "_mtx_unlock_sleep: %p switching out lock=%p", m, 706130803Smarcel (void *)m->mtx_lock); 707130803Smarcel 708130803Smarcel mi_switch(SW_INVOL, NULL); 709130803Smarcel if (LOCK_LOG_TEST(&m->mtx_object, opts)) 710130803Smarcel CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p", 711130803Smarcel m, (void *)m->mtx_lock); 712130803Smarcel } 713130803Smarcel mtx_unlock_spin(&sched_lock); 714130803Smarcel#endif 715130803Smarcel 716130803Smarcel return; 717130803Smarcel} 718130803Smarcel 719130803Smarcel/* 720130803Smarcel * All the unlocking of MTX_SPIN locks is done inline. 721130803Smarcel * See the _rel_spin_lock() macro for the details. 722130803Smarcel */ 723130803Smarcel 724130803Smarcel/* 725130803Smarcel * The backing function for the INVARIANTS-enabled mtx_assert() 726130803Smarcel */ 727130803Smarcel#ifdef INVARIANT_SUPPORT 728130803Smarcelvoid 729130803Smarcel_mtx_assert(struct mtx *m, int what, const char *file, int line) 730130803Smarcel{ 731130803Smarcel 732130803Smarcel if (panicstr != NULL) 733130803Smarcel return; 734130803Smarcel switch (what) { 735130803Smarcel case MA_OWNED: 736130803Smarcel case MA_OWNED | MA_RECURSED: 737 case MA_OWNED | MA_NOTRECURSED: 738 if (!mtx_owned(m)) 739 panic("mutex %s not owned at %s:%d", 740 m->mtx_object.lo_name, file, line); 741 if (mtx_recursed(m)) { 742 if ((what & MA_NOTRECURSED) != 0) 743 panic("mutex %s recursed at %s:%d", 744 m->mtx_object.lo_name, file, line); 745 } else if ((what & MA_RECURSED) != 0) { 746 panic("mutex %s unrecursed at %s:%d", 747 m->mtx_object.lo_name, file, line); 748 } 749 break; 750 case MA_NOTOWNED: 751 if (mtx_owned(m)) 752 panic("mutex %s owned at %s:%d", 753 m->mtx_object.lo_name, file, line); 754 break; 755 default: 756 panic("unknown mtx_assert at %s:%d", file, line); 757 } 758} 759#endif 760 761/* 762 * The MUTEX_DEBUG-enabled mtx_validate() 763 * 764 * Most of these checks have been moved off into the LO_INITIALIZED flag 765 * maintained by the witness code. 766 */ 767#ifdef MUTEX_DEBUG 768 769void mtx_validate(struct mtx *); 770 771void 772mtx_validate(struct mtx *m) 773{ 774 775/* 776 * XXX: When kernacc() does not require Giant we can reenable this check 777 */ 778#ifdef notyet 779/* 780 * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly 781 * we can re-enable the kernacc() checks. 782 */ 783#ifndef __alpha__ 784 /* 785 * Can't call kernacc() from early init386(), especially when 786 * initializing Giant mutex, because some stuff in kernacc() 787 * requires Giant itself. 788 */ 789 if (!cold) 790 if (!kernacc((caddr_t)m, sizeof(m), 791 VM_PROT_READ | VM_PROT_WRITE)) 792 panic("Can't read and write to mutex %p", m); 793#endif 794#endif 795} 796#endif 797 798/* 799 * General init routine used by the MTX_SYSINIT() macro. 800 */ 801void 802mtx_sysinit(void *arg) 803{ 804 struct mtx_args *margs = arg; 805 806 mtx_init(margs->ma_mtx, margs->ma_desc, NULL, margs->ma_opts); 807} 808 809/* 810 * Mutex initialization routine; initialize lock `m' of type contained in 811 * `opts' with options contained in `opts' and name `name.' The optional 812 * lock type `type' is used as a general lock category name for use with 813 * witness. 814 */ 815void 816mtx_init(struct mtx *m, const char *name, const char *type, int opts) 817{ 818 struct lock_object *lock; 819 820 MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE | 821 MTX_NOWITNESS | MTX_DUPOK)) == 0); 822 823#ifdef MUTEX_DEBUG 824 /* Diagnostic and error correction */ 825 mtx_validate(m); 826#endif 827 828 lock = &m->mtx_object; 829 KASSERT((lock->lo_flags & LO_INITIALIZED) == 0, 830 ("mutex \"%s\" %p already initialized", name, m)); 831 bzero(m, sizeof(*m)); 832 if (opts & MTX_SPIN) 833 lock->lo_class = &lock_class_mtx_spin; 834 else 835 lock->lo_class = &lock_class_mtx_sleep; 836 lock->lo_name = name; 837 lock->lo_type = type != NULL ? type : name; 838 if (opts & MTX_QUIET) 839 lock->lo_flags = LO_QUIET; 840 if (opts & MTX_RECURSE) 841 lock->lo_flags |= LO_RECURSABLE; 842 if ((opts & MTX_NOWITNESS) == 0) 843 lock->lo_flags |= LO_WITNESS; 844 if (opts & MTX_DUPOK) 845 lock->lo_flags |= LO_DUPOK; 846 847 m->mtx_lock = MTX_UNOWNED; 848 849 LOCK_LOG_INIT(lock, opts); 850 851 WITNESS_INIT(lock); 852} 853 854/* 855 * Remove lock `m' from all_mtx queue. We don't allow MTX_QUIET to be 856 * passed in as a flag here because if the corresponding mtx_init() was 857 * called with MTX_QUIET set, then it will already be set in the mutex's 858 * flags. 859 */ 860void 861mtx_destroy(struct mtx *m) 862{ 863 864 LOCK_LOG_DESTROY(&m->mtx_object, 0); 865 866 if (!mtx_owned(m)) 867 MPASS(mtx_unowned(m)); 868 else { 869 MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0); 870 871 /* Tell witness this isn't locked to make it happy. */ 872 WITNESS_UNLOCK(&m->mtx_object, LOP_EXCLUSIVE, __FILE__, 873 __LINE__); 874 } 875 876 WITNESS_DESTROY(&m->mtx_object); 877} 878 879/* 880 * Intialize the mutex code and system mutexes. This is called from the MD 881 * startup code prior to mi_startup(). The per-CPU data space needs to be 882 * setup before this is called. 883 */ 884void 885mutex_init(void) 886{ 887 888 /* Setup thread0 so that mutexes work. */ 889 LIST_INIT(&thread0.td_contested); 890 891 /* Setup turnstiles so that sleep mutexes work. */ 892 init_turnstiles(); 893 894 /* 895 * Initialize mutexes. 896 */ 897 mtx_init(&Giant, "Giant", NULL, MTX_DEF | MTX_RECURSE); 898 mtx_init(&sched_lock, "sched lock", NULL, MTX_SPIN | MTX_RECURSE); 899 mtx_init(&proc0.p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK); 900 mtx_lock(&Giant); 901} 902