kern_mutex.c revision 149737
1250199Sgrehan/*- 2250199Sgrehan * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved. 3250199Sgrehan * 4250199Sgrehan * Redistribution and use in source and binary forms, with or without 5250199Sgrehan * modification, are permitted provided that the following conditions 6250199Sgrehan * are met: 7250199Sgrehan * 1. Redistributions of source code must retain the above copyright 8250199Sgrehan * notice, this list of conditions and the following disclaimer. 9250199Sgrehan * 2. Redistributions in binary form must reproduce the above copyright 10250199Sgrehan * notice, this list of conditions and the following disclaimer in the 11250199Sgrehan * documentation and/or other materials provided with the distribution. 12250199Sgrehan * 3. Berkeley Software Design Inc's name may not be used to endorse or 13250199Sgrehan * promote products derived from this software without specific prior 14250199Sgrehan * written permission. 15250199Sgrehan * 16250199Sgrehan * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND 17250199Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18250199Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19250199Sgrehan * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE 20250199Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21250199Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22250199Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23250199Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24250199Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25250199Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26250199Sgrehan * SUCH DAMAGE. 27250199Sgrehan * 28250199Sgrehan * from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $ 29283280Swhu * and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $ 30283280Swhu */ 31283280Swhu 32250199Sgrehan/* 33250199Sgrehan * Machine independent bits of mutex implementation. 34250199Sgrehan */ 35250199Sgrehan 36250199Sgrehan#include <sys/cdefs.h> 37250199Sgrehan__FBSDID("$FreeBSD: head/sys/kern/kern_mutex.c 149737 2005-09-02 20:21:49Z jhb $"); 38250199Sgrehan 39250199Sgrehan#include "opt_adaptive_mutexes.h" 40250199Sgrehan#include "opt_ddb.h" 41250199Sgrehan#include "opt_mprof.h" 42250199Sgrehan#include "opt_mutex_wake_all.h" 43250199Sgrehan#include "opt_sched.h" 44250199Sgrehan 45250199Sgrehan#include <sys/param.h> 46250199Sgrehan#include <sys/systm.h> 47250199Sgrehan#include <sys/bus.h> 48250199Sgrehan#include <sys/conf.h> 49250199Sgrehan#include <sys/kdb.h> 50250199Sgrehan#include <sys/kernel.h> 51283280Swhu#include <sys/ktr.h> 52283280Swhu#include <sys/lock.h> 53283280Swhu#include <sys/malloc.h> 54283280Swhu#include <sys/mutex.h> 55283280Swhu#include <sys/proc.h> 56283280Swhu#include <sys/resourcevar.h> 57283280Swhu#include <sys/sched.h> 58283280Swhu#include <sys/sbuf.h> 59283280Swhu#include <sys/sysctl.h> 60283280Swhu#include <sys/turnstile.h> 61283280Swhu#include <sys/vmmeter.h> 62283280Swhu 63283280Swhu#include <machine/atomic.h> 64283280Swhu#include <machine/bus.h> 65283280Swhu#include <machine/clock.h> 66283280Swhu#include <machine/cpu.h> 67283280Swhu 68283280Swhu#include <ddb/ddb.h> 69283280Swhu 70283280Swhu#include <vm/vm.h> 71283280Swhu#include <vm/vm_extern.h> 72250199Sgrehan 73283280Swhu/* 74283280Swhu * Force MUTEX_WAKE_ALL for now. 75283280Swhu * single thread wakeup needs fixes to avoid race conditions with 76283280Swhu * priority inheritance. 77283280Swhu */ 78283280Swhu#ifndef MUTEX_WAKE_ALL 79283280Swhu#define MUTEX_WAKE_ALL 80283280Swhu#endif 81283280Swhu 82283280Swhu/* 83283280Swhu * Internal utility macros. 84283280Swhu */ 85283280Swhu#define mtx_unowned(m) ((m)->mtx_lock == MTX_UNOWNED) 86283280Swhu 87283280Swhu#define mtx_owner(m) (mtx_unowned((m)) ? NULL \ 88283280Swhu : (struct thread *)((m)->mtx_lock & MTX_FLAGMASK)) 89283280Swhu 90283280Swhu/* 91283280Swhu * Lock classes for sleep and spin mutexes. 92283280Swhu */ 93283280Swhustruct lock_class lock_class_mtx_sleep = { 94283280Swhu "sleep mutex", 95283280Swhu LC_SLEEPLOCK | LC_RECURSABLE 96283280Swhu}; 97283280Swhustruct lock_class lock_class_mtx_spin = { 98283280Swhu "spin mutex", 99283280Swhu LC_SPINLOCK | LC_RECURSABLE 100283280Swhu}; 101283280Swhu 102283280Swhu/* 103283280Swhu * System-wide mutexes 104283280Swhu */ 105283280Swhustruct mtx sched_lock; 106283280Swhustruct mtx Giant; 107283280Swhu 108283280Swhu#ifdef MUTEX_PROFILING 109283280SwhuSYSCTL_NODE(_debug, OID_AUTO, mutex, CTLFLAG_RD, NULL, "mutex debugging"); 110283280SwhuSYSCTL_NODE(_debug_mutex, OID_AUTO, prof, CTLFLAG_RD, NULL, "mutex profiling"); 111283280Swhustatic int mutex_prof_enable = 0; 112283280SwhuSYSCTL_INT(_debug_mutex_prof, OID_AUTO, enable, CTLFLAG_RW, 113283280Swhu &mutex_prof_enable, 0, "Enable tracing of mutex holdtime"); 114283280Swhu 115283280Swhustruct mutex_prof { 116283280Swhu const char *name; 117283280Swhu const char *file; 118283280Swhu int line; 119283280Swhu uintmax_t cnt_max; 120283280Swhu uintmax_t cnt_tot; 121283280Swhu uintmax_t cnt_cur; 122283280Swhu uintmax_t cnt_contest_holding; 123283280Swhu uintmax_t cnt_contest_locking; 124283280Swhu struct mutex_prof *next; 125283280Swhu}; 126283280Swhu 127283280Swhu/* 128283280Swhu * mprof_buf is a static pool of profiling records to avoid possible 129283280Swhu * reentrance of the memory allocation functions. 130283280Swhu * 131283280Swhu * Note: NUM_MPROF_BUFFERS must be smaller than MPROF_HASH_SIZE. 132283280Swhu */ 133283280Swhu#ifdef MPROF_BUFFERS 134283280Swhu#define NUM_MPROF_BUFFERS MPROF_BUFFERS 135283280Swhu#else 136283280Swhu#define NUM_MPROF_BUFFERS 1000 137283280Swhu#endif 138283280Swhustatic struct mutex_prof mprof_buf[NUM_MPROF_BUFFERS]; 139283280Swhustatic int first_free_mprof_buf; 140283280Swhu#ifndef MPROF_HASH_SIZE 141283280Swhu#define MPROF_HASH_SIZE 1009 142283280Swhu#endif 143283280Swhu#if NUM_MPROF_BUFFERS >= MPROF_HASH_SIZE 144283280Swhu#error MPROF_BUFFERS must be larger than MPROF_HASH_SIZE 145283280Swhu#endif 146283280Swhustatic struct mutex_prof *mprof_hash[MPROF_HASH_SIZE]; 147283280Swhu/* SWAG: sbuf size = avg stat. line size * number of locks */ 148283280Swhu#define MPROF_SBUF_SIZE 256 * 400 149283280Swhu 150283280Swhustatic int mutex_prof_acquisitions; 151250199SgrehanSYSCTL_INT(_debug_mutex_prof, OID_AUTO, acquisitions, CTLFLAG_RD, 152250199Sgrehan &mutex_prof_acquisitions, 0, "Number of mutex acquistions recorded"); 153250199Sgrehanstatic int mutex_prof_records; 154250199SgrehanSYSCTL_INT(_debug_mutex_prof, OID_AUTO, records, CTLFLAG_RD, 155250199Sgrehan &mutex_prof_records, 0, "Number of profiling records"); 156283280Swhustatic int mutex_prof_maxrecords = NUM_MPROF_BUFFERS; 157250199SgrehanSYSCTL_INT(_debug_mutex_prof, OID_AUTO, maxrecords, CTLFLAG_RD, 158250199Sgrehan &mutex_prof_maxrecords, 0, "Maximum number of profiling records"); 159250199Sgrehanstatic int mutex_prof_rejected; 160250199SgrehanSYSCTL_INT(_debug_mutex_prof, OID_AUTO, rejected, CTLFLAG_RD, 161250199Sgrehan &mutex_prof_rejected, 0, "Number of rejected profiling records"); 162250199Sgrehanstatic int mutex_prof_hashsize = MPROF_HASH_SIZE; 163250199SgrehanSYSCTL_INT(_debug_mutex_prof, OID_AUTO, hashsize, CTLFLAG_RD, 164250199Sgrehan &mutex_prof_hashsize, 0, "Hash size"); 165250199Sgrehanstatic int mutex_prof_collisions = 0; 166250199SgrehanSYSCTL_INT(_debug_mutex_prof, OID_AUTO, collisions, CTLFLAG_RD, 167250199Sgrehan &mutex_prof_collisions, 0, "Number of hash collisions"); 168250199Sgrehan 169250199Sgrehan/* 170250199Sgrehan * mprof_mtx protects the profiling buffers and the hash. 171250199Sgrehan */ 172250199Sgrehanstatic struct mtx mprof_mtx; 173250199SgrehanMTX_SYSINIT(mprof, &mprof_mtx, "mutex profiling lock", MTX_SPIN | MTX_QUIET); 174250199Sgrehan 175250199Sgrehanstatic u_int64_t 176250199Sgrehannanoseconds(void) 177250199Sgrehan{ 178250199Sgrehan struct timespec tv; 179283280Swhu 180250199Sgrehan nanotime(&tv); 181250199Sgrehan return (tv.tv_sec * (u_int64_t)1000000000 + tv.tv_nsec); 182250199Sgrehan} 183250199Sgrehan 184250199Sgrehanstatic int 185250199Sgrehandump_mutex_prof_stats(SYSCTL_HANDLER_ARGS) 186250199Sgrehan{ 187250199Sgrehan struct sbuf *sb; 188250199Sgrehan int error, i; 189250199Sgrehan static int multiplier = 1; 190250199Sgrehan 191250199Sgrehan if (first_free_mprof_buf == 0) 192250199Sgrehan return (SYSCTL_OUT(req, "No locking recorded", 193250199Sgrehan sizeof("No locking recorded"))); 194250199Sgrehan 195250199Sgrehanretry_sbufops: 196250199Sgrehan sb = sbuf_new(NULL, NULL, MPROF_SBUF_SIZE * multiplier, SBUF_FIXEDLEN); 197250199Sgrehan sbuf_printf(sb, "\n%6s %12s %11s %5s %12s %12s %s\n", 198250199Sgrehan "max", "total", "count", "avg", "cnt_hold", "cnt_lock", "name"); 199250199Sgrehan /* 200250199Sgrehan * XXX this spinlock seems to be by far the largest perpetrator 201250199Sgrehan * of spinlock latency (1.6 msec on an Athlon1600 was recorded 202250199Sgrehan * even before I pessimized it further by moving the average 203250199Sgrehan * computation here). 204250199Sgrehan */ 205250199Sgrehan mtx_lock_spin(&mprof_mtx); 206250199Sgrehan for (i = 0; i < first_free_mprof_buf; ++i) { 207250199Sgrehan sbuf_printf(sb, "%6ju %12ju %11ju %5ju %12ju %12ju %s:%d (%s)\n", 208250199Sgrehan mprof_buf[i].cnt_max / 1000, 209250199Sgrehan mprof_buf[i].cnt_tot / 1000, 210250199Sgrehan mprof_buf[i].cnt_cur, 211250199Sgrehan mprof_buf[i].cnt_cur == 0 ? (uintmax_t)0 : 212250199Sgrehan mprof_buf[i].cnt_tot / (mprof_buf[i].cnt_cur * 1000), 213250199Sgrehan mprof_buf[i].cnt_contest_holding, 214250199Sgrehan mprof_buf[i].cnt_contest_locking, 215250199Sgrehan mprof_buf[i].file, mprof_buf[i].line, mprof_buf[i].name); 216250199Sgrehan if (sbuf_overflowed(sb)) { 217250199Sgrehan mtx_unlock_spin(&mprof_mtx); 218250199Sgrehan sbuf_delete(sb); 219250199Sgrehan multiplier++; 220250199Sgrehan goto retry_sbufops; 221250199Sgrehan } 222250199Sgrehan } 223250199Sgrehan mtx_unlock_spin(&mprof_mtx); 224250199Sgrehan sbuf_finish(sb); 225250199Sgrehan error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 226250199Sgrehan sbuf_delete(sb); 227250199Sgrehan return (error); 228250199Sgrehan} 229250199SgrehanSYSCTL_PROC(_debug_mutex_prof, OID_AUTO, stats, CTLTYPE_STRING | CTLFLAG_RD, 230250199Sgrehan NULL, 0, dump_mutex_prof_stats, "A", "Mutex profiling statistics"); 231250199Sgrehan 232250199Sgrehanstatic int 233250199Sgrehanreset_mutex_prof_stats(SYSCTL_HANDLER_ARGS) 234250199Sgrehan{ 235283280Swhu int error, v; 236283280Swhu 237250199Sgrehan if (first_free_mprof_buf == 0) 238283280Swhu return (0); 239250199Sgrehan 240283280Swhu v = 0; 241283280Swhu error = sysctl_handle_int(oidp, &v, 0, req); 242283280Swhu if (error) 243283280Swhu return (error); 244283280Swhu if (req->newptr == NULL) 245283280Swhu return (error); 246283280Swhu if (v == 0) 247283280Swhu return (0); 248250199Sgrehan 249283280Swhu mtx_lock_spin(&mprof_mtx); 250283280Swhu bzero(mprof_buf, sizeof(*mprof_buf) * first_free_mprof_buf); 251250199Sgrehan bzero(mprof_hash, sizeof(struct mtx *) * MPROF_HASH_SIZE); 252283280Swhu first_free_mprof_buf = 0; 253283280Swhu mtx_unlock_spin(&mprof_mtx); 254250199Sgrehan return (0); 255283280Swhu} 256283280SwhuSYSCTL_PROC(_debug_mutex_prof, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_RW, 257283280Swhu NULL, 0, reset_mutex_prof_stats, "I", "Reset mutex profiling statistics"); 258283280Swhu#endif 259250199Sgrehan 260250199Sgrehan/* 261250199Sgrehan * Function versions of the inlined __mtx_* macros. These are used by 262250199Sgrehan * modules and can also be called from assembly language if needed. 263250199Sgrehan */ 264250199Sgrehanvoid 265250199Sgrehan_mtx_lock_flags(struct mtx *m, int opts, const char *file, int line) 266250199Sgrehan{ 267250199Sgrehan 268250199Sgrehan MPASS(curthread != NULL); 269250199Sgrehan KASSERT(m->mtx_object.lo_class == &lock_class_mtx_sleep, 270250199Sgrehan ("mtx_lock() of spin mutex %s @ %s:%d", m->mtx_object.lo_name, 271250199Sgrehan file, line)); 272250199Sgrehan WITNESS_CHECKORDER(&m->mtx_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE, 273250199Sgrehan file, line); 274250199Sgrehan _get_sleep_lock(m, curthread, opts, file, line); 275250199Sgrehan LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file, 276250199Sgrehan line); 277250199Sgrehan WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line); 278250199Sgrehan#ifdef MUTEX_PROFILING 279250199Sgrehan /* don't reset the timer when/if recursing */ 280250199Sgrehan if (m->mtx_acqtime == 0) { 281250199Sgrehan m->mtx_filename = file; 282250199Sgrehan m->mtx_lineno = line; 283250199Sgrehan m->mtx_acqtime = mutex_prof_enable ? nanoseconds() : 0; 284250199Sgrehan ++mutex_prof_acquisitions; 285250199Sgrehan } 286250199Sgrehan#endif 287250199Sgrehan} 288250199Sgrehan 289250199Sgrehanvoid 290250199Sgrehan_mtx_unlock_flags(struct mtx *m, int opts, const char *file, int line) 291250199Sgrehan{ 292250199Sgrehan 293250199Sgrehan MPASS(curthread != NULL); 294250199Sgrehan KASSERT(m->mtx_object.lo_class == &lock_class_mtx_sleep, 295250199Sgrehan ("mtx_unlock() of spin mutex %s @ %s:%d", m->mtx_object.lo_name, 296250199Sgrehan file, line)); 297250199Sgrehan WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line); 298250199Sgrehan LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file, 299250199Sgrehan line); 300250199Sgrehan mtx_assert(m, MA_OWNED); 301250199Sgrehan#ifdef MUTEX_PROFILING 302250199Sgrehan if (m->mtx_acqtime != 0) { 303250199Sgrehan static const char *unknown = "(unknown)"; 304250199Sgrehan struct mutex_prof *mpp; 305250199Sgrehan u_int64_t acqtime, now; 306250199Sgrehan const char *p, *q; 307250199Sgrehan volatile u_int hash; 308250199Sgrehan 309250199Sgrehan now = nanoseconds(); 310250199Sgrehan acqtime = m->mtx_acqtime; 311250199Sgrehan m->mtx_acqtime = 0; 312250199Sgrehan if (now <= acqtime) 313250199Sgrehan goto out; 314250199Sgrehan for (p = m->mtx_filename; 315250199Sgrehan p != NULL && strncmp(p, "../", 3) == 0; p += 3) 316250199Sgrehan /* nothing */ ; 317250199Sgrehan if (p == NULL || *p == '\0') 318250199Sgrehan p = unknown; 319250199Sgrehan for (hash = m->mtx_lineno, q = p; *q != '\0'; ++q) 320250199Sgrehan hash = (hash * 2 + *q) % MPROF_HASH_SIZE; 321250199Sgrehan mtx_lock_spin(&mprof_mtx); 322250199Sgrehan for (mpp = mprof_hash[hash]; mpp != NULL; mpp = mpp->next) 323250199Sgrehan if (mpp->line == m->mtx_lineno && 324250199Sgrehan strcmp(mpp->file, p) == 0) 325250199Sgrehan break; 326250199Sgrehan if (mpp == NULL) { 327250199Sgrehan /* Just exit if we cannot get a trace buffer */ 328250199Sgrehan if (first_free_mprof_buf >= NUM_MPROF_BUFFERS) { 329250199Sgrehan ++mutex_prof_rejected; 330250199Sgrehan goto unlock; 331250199Sgrehan } 332250199Sgrehan mpp = &mprof_buf[first_free_mprof_buf++]; 333250199Sgrehan mpp->name = mtx_name(m); 334250199Sgrehan mpp->file = p; 335250199Sgrehan mpp->line = m->mtx_lineno; 336250199Sgrehan mpp->next = mprof_hash[hash]; 337250199Sgrehan if (mprof_hash[hash] != NULL) 338250199Sgrehan ++mutex_prof_collisions; 339250199Sgrehan mprof_hash[hash] = mpp; 340250199Sgrehan ++mutex_prof_records; 341250199Sgrehan } 342250199Sgrehan /* 343250199Sgrehan * Record if the mutex has been held longer now than ever 344250199Sgrehan * before. 345250199Sgrehan */ 346250199Sgrehan if (now - acqtime > mpp->cnt_max) 347250199Sgrehan mpp->cnt_max = now - acqtime; 348250199Sgrehan mpp->cnt_tot += now - acqtime; 349250199Sgrehan mpp->cnt_cur++; 350283280Swhu /* 351250199Sgrehan * There's a small race, really we should cmpxchg 352250199Sgrehan * 0 with the current value, but that would bill 353250199Sgrehan * the contention to the wrong lock instance if 354250199Sgrehan * it followed this also. 355250199Sgrehan */ 356250199Sgrehan mpp->cnt_contest_holding += m->mtx_contest_holding; 357250199Sgrehan m->mtx_contest_holding = 0; 358250199Sgrehan mpp->cnt_contest_locking += m->mtx_contest_locking; 359283280Swhu m->mtx_contest_locking = 0; 360250199Sgrehanunlock: 361250199Sgrehan mtx_unlock_spin(&mprof_mtx); 362250199Sgrehan } 363250199Sgrehanout: 364250199Sgrehan#endif 365250199Sgrehan _rel_sleep_lock(m, curthread, opts, file, line); 366250199Sgrehan} 367250199Sgrehan 368250199Sgrehanvoid 369250199Sgrehan_mtx_lock_spin_flags(struct mtx *m, int opts, const char *file, int line) 370283280Swhu{ 371283280Swhu 372250199Sgrehan MPASS(curthread != NULL); 373283280Swhu KASSERT(m->mtx_object.lo_class == &lock_class_mtx_spin, 374250199Sgrehan ("mtx_lock_spin() of sleep mutex %s @ %s:%d", 375250199Sgrehan m->mtx_object.lo_name, file, line)); 376250199Sgrehan WITNESS_CHECKORDER(&m->mtx_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE, 377250199Sgrehan file, line); 378250199Sgrehan _get_spin_lock(m, curthread, opts, file, line); 379250199Sgrehan LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file, 380250199Sgrehan line); 381250199Sgrehan WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line); 382250199Sgrehan} 383250199Sgrehan 384250199Sgrehanvoid 385250199Sgrehan_mtx_unlock_spin_flags(struct mtx *m, int opts, const char *file, int line) 386250199Sgrehan{ 387250199Sgrehan 388250199Sgrehan MPASS(curthread != NULL); 389250199Sgrehan KASSERT(m->mtx_object.lo_class == &lock_class_mtx_spin, 390250199Sgrehan ("mtx_unlock_spin() of sleep mutex %s @ %s:%d", 391250199Sgrehan m->mtx_object.lo_name, file, line)); 392250199Sgrehan WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line); 393250199Sgrehan LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file, 394283280Swhu line); 395283280Swhu mtx_assert(m, MA_OWNED); 396283280Swhu _rel_spin_lock(m); 397283280Swhu} 398283280Swhu 399283280Swhu/* 400250199Sgrehan * The important part of mtx_trylock{,_flags}() 401283280Swhu * Tries to acquire lock `m.' If this function is called on a mutex that 402283280Swhu * is already owned, it will recursively acquire the lock. 403283280Swhu */ 404283280Swhuint 405283280Swhu_mtx_trylock(struct mtx *m, int opts, const char *file, int line) 406283280Swhu{ 407283280Swhu int rval; 408283280Swhu 409283280Swhu MPASS(curthread != NULL); 410283280Swhu KASSERT(m->mtx_object.lo_class == &lock_class_mtx_sleep, 411283280Swhu ("mtx_trylock() of spin mutex %s @ %s:%d", m->mtx_object.lo_name, 412283280Swhu file, line)); 413283280Swhu 414283280Swhu if (mtx_owned(m) && (m->mtx_object.lo_flags & LO_RECURSABLE) != 0) { 415283280Swhu m->mtx_recurse++; 416283280Swhu atomic_set_ptr(&m->mtx_lock, MTX_RECURSED); 417283280Swhu rval = 1; 418283280Swhu } else 419283280Swhu rval = _obtain_lock(m, (uintptr_t)curthread); 420283280Swhu 421283280Swhu LOCK_LOG_TRY("LOCK", &m->mtx_object, opts, rval, file, line); 422283280Swhu if (rval) 423283280Swhu WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK, 424283280Swhu file, line); 425250199Sgrehan 426283280Swhu return (rval); 427250199Sgrehan} 428250199Sgrehan 429283280Swhu/* 430283280Swhu * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock. 431283280Swhu * 432283280Swhu * We call this if the lock is either contested (i.e. we need to go to 433283280Swhu * sleep waiting for it), or if we need to recurse on it. 434283280Swhu */ 435250199Sgrehanvoid 436250199Sgrehan_mtx_lock_sleep(struct mtx *m, uintptr_t tid, int opts, const char *file, 437250199Sgrehan int line) 438250199Sgrehan{ 439250199Sgrehan#if defined(SMP) && !defined(NO_ADAPTIVE_MUTEXES) 440250199Sgrehan struct thread *owner; 441283280Swhu#endif 442283280Swhu uintptr_t v; 443250199Sgrehan#ifdef KTR 444283280Swhu int cont_logged = 0; 445283280Swhu#endif 446250199Sgrehan#ifdef MUTEX_PROFILING 447283280Swhu int contested; 448283280Swhu#endif 449250199Sgrehan 450250199Sgrehan if (mtx_owned(m)) { 451283280Swhu KASSERT((m->mtx_object.lo_flags & LO_RECURSABLE) != 0, 452283280Swhu ("_mtx_lock_sleep: recursed on non-recursive mutex %s @ %s:%d\n", 453283280Swhu m->mtx_object.lo_name, file, line)); 454250199Sgrehan m->mtx_recurse++; 455283280Swhu atomic_set_ptr(&m->mtx_lock, MTX_RECURSED); 456283280Swhu if (LOCK_LOG_TEST(&m->mtx_object, opts)) 457283280Swhu CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m); 458283280Swhu return; 459283280Swhu } 460283280Swhu 461283280Swhu if (LOCK_LOG_TEST(&m->mtx_object, opts)) 462283280Swhu CTR4(KTR_LOCK, 463283280Swhu "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d", 464283280Swhu m->mtx_object.lo_name, (void *)m->mtx_lock, file, line); 465250199Sgrehan 466283280Swhu#ifdef MUTEX_PROFILING 467283280Swhu contested = 0; 468283280Swhu#endif 469283280Swhu while (!_obtain_lock(m, tid)) { 470283280Swhu#ifdef MUTEX_PROFILING 471283280Swhu contested = 1; 472283280Swhu atomic_add_int(&m->mtx_contest_holding, 1); 473283280Swhu#endif 474283280Swhu turnstile_lock(&m->mtx_object); 475283280Swhu v = m->mtx_lock; 476283280Swhu 477283280Swhu /* 478283280Swhu * Check if the lock has been released while spinning for 479283280Swhu * the turnstile chain lock. 480283280Swhu */ 481283280Swhu if (v == MTX_UNOWNED) { 482283280Swhu turnstile_release(&m->mtx_object); 483283280Swhu cpu_spinwait(); 484283280Swhu continue; 485283280Swhu } 486283280Swhu 487250199Sgrehan#ifdef MUTEX_WAKE_ALL 488250199Sgrehan MPASS(v != MTX_CONTESTED); 489250199Sgrehan#else 490250199Sgrehan /* 491250199Sgrehan * The mutex was marked contested on release. This means that 492250199Sgrehan * there are other threads blocked on it. Grab ownership of 493250199Sgrehan * it and propagate its priority to the current thread if 494250199Sgrehan * necessary. 495250199Sgrehan */ 496250199Sgrehan if (v == MTX_CONTESTED) { 497250199Sgrehan m->mtx_lock = tid | MTX_CONTESTED; 498250199Sgrehan turnstile_claim(&m->mtx_object); 499250199Sgrehan break; 500250199Sgrehan } 501250199Sgrehan#endif 502250199Sgrehan 503250199Sgrehan /* 504250199Sgrehan * If the mutex isn't already contested and a failure occurs 505250199Sgrehan * setting the contested bit, the mutex was either released 506250199Sgrehan * or the state of the MTX_RECURSED bit changed. 507250199Sgrehan */ 508250199Sgrehan if ((v & MTX_CONTESTED) == 0 && 509250199Sgrehan !atomic_cmpset_ptr(&m->mtx_lock, v, v | MTX_CONTESTED)) { 510250199Sgrehan turnstile_release(&m->mtx_object); 511250199Sgrehan cpu_spinwait(); 512250199Sgrehan continue; 513250199Sgrehan } 514250199Sgrehan 515250199Sgrehan#if defined(SMP) && !defined(NO_ADAPTIVE_MUTEXES) 516250199Sgrehan /* 517250199Sgrehan * If the current owner of the lock is executing on another 518250199Sgrehan * CPU, spin instead of blocking. 519250199Sgrehan */ 520250199Sgrehan owner = (struct thread *)(v & MTX_FLAGMASK); 521250199Sgrehan#ifdef ADAPTIVE_GIANT 522250199Sgrehan if (TD_IS_RUNNING(owner)) { 523250199Sgrehan#else 524250199Sgrehan if (m != &Giant && TD_IS_RUNNING(owner)) { 525250199Sgrehan#endif 526250199Sgrehan turnstile_release(&m->mtx_object); 527250199Sgrehan while (mtx_owner(m) == owner && TD_IS_RUNNING(owner)) { 528250199Sgrehan cpu_spinwait(); 529250199Sgrehan } 530250199Sgrehan continue; 531250199Sgrehan } 532250199Sgrehan#endif /* SMP && !NO_ADAPTIVE_MUTEXES */ 533250199Sgrehan 534250199Sgrehan /* 535250199Sgrehan * We definitely must sleep for this lock. 536250199Sgrehan */ 537250199Sgrehan mtx_assert(m, MA_NOTOWNED); 538250199Sgrehan 539250199Sgrehan#ifdef KTR 540250199Sgrehan if (!cont_logged) { 541250199Sgrehan CTR6(KTR_CONTENTION, 542250199Sgrehan "contention: %p at %s:%d wants %s, taken by %s:%d", 543250199Sgrehan (void *)tid, file, line, m->mtx_object.lo_name, 544250199Sgrehan WITNESS_FILE(&m->mtx_object), 545250199Sgrehan WITNESS_LINE(&m->mtx_object)); 546250199Sgrehan cont_logged = 1; 547250199Sgrehan } 548250199Sgrehan#endif 549250199Sgrehan 550283280Swhu /* 551250199Sgrehan * Block on the turnstile. 552283280Swhu */ 553250199Sgrehan turnstile_wait(&m->mtx_object, mtx_owner(m)); 554250199Sgrehan } 555250199Sgrehan 556250199Sgrehan#ifdef KTR 557250199Sgrehan if (cont_logged) { 558250199Sgrehan CTR4(KTR_CONTENTION, 559283280Swhu "contention end: %s acquired by %p at %s:%d", 560250199Sgrehan m->mtx_object.lo_name, (void *)tid, file, line); 561250199Sgrehan } 562250199Sgrehan#endif 563#ifdef MUTEX_PROFILING 564 if (contested) 565 m->mtx_contest_locking++; 566 m->mtx_contest_holding = 0; 567#endif 568 return; 569} 570 571#ifdef SMP 572/* 573 * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock. 574 * 575 * This is only called if we need to actually spin for the lock. Recursion 576 * is handled inline. 577 */ 578void 579_mtx_lock_spin(struct mtx *m, uintptr_t tid, int opts, const char *file, 580 int line) 581{ 582 int i = 0; 583 584 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 585 CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m); 586 587 for (;;) { 588 if (_obtain_lock(m, tid)) 589 break; 590 591 /* Give interrupts a chance while we spin. */ 592 spinlock_exit(); 593 while (m->mtx_lock != MTX_UNOWNED) { 594 if (i++ < 10000000) { 595 cpu_spinwait(); 596 continue; 597 } 598 if (i < 60000000) 599 DELAY(1); 600 else if (!kdb_active && !panicstr) { 601 printf("spin lock %s held by %p for > 5 seconds\n", 602 m->mtx_object.lo_name, (void *)m->mtx_lock); 603#ifdef WITNESS 604 witness_display_spinlock(&m->mtx_object, 605 mtx_owner(m)); 606#endif 607 panic("spin lock held too long"); 608 } 609 cpu_spinwait(); 610 } 611 spinlock_enter(); 612 } 613 614 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 615 CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m); 616 617 return; 618} 619#endif /* SMP */ 620 621/* 622 * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock. 623 * 624 * We are only called here if the lock is recursed or contested (i.e. we 625 * need to wake up a blocked thread). 626 */ 627void 628_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line) 629{ 630 struct turnstile *ts; 631#ifndef PREEMPTION 632 struct thread *td, *td1; 633#endif 634 635 if (mtx_recursed(m)) { 636 if (--(m->mtx_recurse) == 0) 637 atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED); 638 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 639 CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m); 640 return; 641 } 642 643 turnstile_lock(&m->mtx_object); 644 ts = turnstile_lookup(&m->mtx_object); 645 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 646 CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m); 647 648#if defined(SMP) && !defined(NO_ADAPTIVE_MUTEXES) 649 if (ts == NULL) { 650 _release_lock_quick(m); 651 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 652 CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p no sleepers", m); 653 turnstile_release(&m->mtx_object); 654 return; 655 } 656#else 657 MPASS(ts != NULL); 658#endif 659#ifndef PREEMPTION 660 /* XXX */ 661 td1 = turnstile_head(ts); 662#endif 663#ifdef MUTEX_WAKE_ALL 664 turnstile_broadcast(ts); 665 _release_lock_quick(m); 666#else 667 if (turnstile_signal(ts)) { 668 _release_lock_quick(m); 669 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 670 CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p not held", m); 671 } else { 672 m->mtx_lock = MTX_CONTESTED; 673 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 674 CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p still contested", 675 m); 676 } 677#endif 678 turnstile_unpend(ts); 679 680#ifndef PREEMPTION 681 /* 682 * XXX: This is just a hack until preemption is done. However, 683 * once preemption is done we need to either wrap the 684 * turnstile_signal() and release of the actual lock in an 685 * extra critical section or change the preemption code to 686 * always just set a flag and never do instant-preempts. 687 */ 688 td = curthread; 689 if (td->td_critnest > 0 || td1->td_priority >= td->td_priority) 690 return; 691 mtx_lock_spin(&sched_lock); 692 if (!TD_IS_RUNNING(td1)) { 693#ifdef notyet 694 if (td->td_ithd != NULL) { 695 struct ithd *it = td->td_ithd; 696 697 if (it->it_interrupted) { 698 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 699 CTR2(KTR_LOCK, 700 "_mtx_unlock_sleep: %p interrupted %p", 701 it, it->it_interrupted); 702 intr_thd_fixup(it); 703 } 704 } 705#endif 706 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 707 CTR2(KTR_LOCK, 708 "_mtx_unlock_sleep: %p switching out lock=%p", m, 709 (void *)m->mtx_lock); 710 711 mi_switch(SW_INVOL, NULL); 712 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 713 CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p", 714 m, (void *)m->mtx_lock); 715 } 716 mtx_unlock_spin(&sched_lock); 717#endif 718 719 return; 720} 721 722/* 723 * All the unlocking of MTX_SPIN locks is done inline. 724 * See the _rel_spin_lock() macro for the details. 725 */ 726 727/* 728 * The backing function for the INVARIANTS-enabled mtx_assert() 729 */ 730#ifdef INVARIANT_SUPPORT 731void 732_mtx_assert(struct mtx *m, int what, const char *file, int line) 733{ 734 735 if (panicstr != NULL || dumping) 736 return; 737 switch (what) { 738 case MA_OWNED: 739 case MA_OWNED | MA_RECURSED: 740 case MA_OWNED | MA_NOTRECURSED: 741 if (!mtx_owned(m)) 742 panic("mutex %s not owned at %s:%d", 743 m->mtx_object.lo_name, file, line); 744 if (mtx_recursed(m)) { 745 if ((what & MA_NOTRECURSED) != 0) 746 panic("mutex %s recursed at %s:%d", 747 m->mtx_object.lo_name, file, line); 748 } else if ((what & MA_RECURSED) != 0) { 749 panic("mutex %s unrecursed at %s:%d", 750 m->mtx_object.lo_name, file, line); 751 } 752 break; 753 case MA_NOTOWNED: 754 if (mtx_owned(m)) 755 panic("mutex %s owned at %s:%d", 756 m->mtx_object.lo_name, file, line); 757 break; 758 default: 759 panic("unknown mtx_assert at %s:%d", file, line); 760 } 761} 762#endif 763 764/* 765 * The MUTEX_DEBUG-enabled mtx_validate() 766 * 767 * Most of these checks have been moved off into the LO_INITIALIZED flag 768 * maintained by the witness code. 769 */ 770#ifdef MUTEX_DEBUG 771 772void mtx_validate(struct mtx *); 773 774void 775mtx_validate(struct mtx *m) 776{ 777 778/* 779 * XXX: When kernacc() does not require Giant we can reenable this check 780 */ 781#ifdef notyet 782/* 783 * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly 784 * we can re-enable the kernacc() checks. 785 */ 786#ifndef __alpha__ 787 /* 788 * Can't call kernacc() from early init386(), especially when 789 * initializing Giant mutex, because some stuff in kernacc() 790 * requires Giant itself. 791 */ 792 if (!cold) 793 if (!kernacc((caddr_t)m, sizeof(m), 794 VM_PROT_READ | VM_PROT_WRITE)) 795 panic("Can't read and write to mutex %p", m); 796#endif 797#endif 798} 799#endif 800 801/* 802 * General init routine used by the MTX_SYSINIT() macro. 803 */ 804void 805mtx_sysinit(void *arg) 806{ 807 struct mtx_args *margs = arg; 808 809 mtx_init(margs->ma_mtx, margs->ma_desc, NULL, margs->ma_opts); 810} 811 812/* 813 * Mutex initialization routine; initialize lock `m' of type contained in 814 * `opts' with options contained in `opts' and name `name.' The optional 815 * lock type `type' is used as a general lock category name for use with 816 * witness. 817 */ 818void 819mtx_init(struct mtx *m, const char *name, const char *type, int opts) 820{ 821 struct lock_object *lock; 822 823 MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE | 824 MTX_NOWITNESS | MTX_DUPOK)) == 0); 825 826#ifdef MUTEX_DEBUG 827 /* Diagnostic and error correction */ 828 mtx_validate(m); 829#endif 830 831 lock = &m->mtx_object; 832 KASSERT((lock->lo_flags & LO_INITIALIZED) == 0, 833 ("mutex \"%s\" %p already initialized", name, m)); 834 bzero(m, sizeof(*m)); 835 if (opts & MTX_SPIN) 836 lock->lo_class = &lock_class_mtx_spin; 837 else 838 lock->lo_class = &lock_class_mtx_sleep; 839 lock->lo_name = name; 840 lock->lo_type = type != NULL ? type : name; 841 if (opts & MTX_QUIET) 842 lock->lo_flags = LO_QUIET; 843 if (opts & MTX_RECURSE) 844 lock->lo_flags |= LO_RECURSABLE; 845 if ((opts & MTX_NOWITNESS) == 0) 846 lock->lo_flags |= LO_WITNESS; 847 if (opts & MTX_DUPOK) 848 lock->lo_flags |= LO_DUPOK; 849 850 m->mtx_lock = MTX_UNOWNED; 851 852 LOCK_LOG_INIT(lock, opts); 853 854 WITNESS_INIT(lock); 855} 856 857/* 858 * Remove lock `m' from all_mtx queue. We don't allow MTX_QUIET to be 859 * passed in as a flag here because if the corresponding mtx_init() was 860 * called with MTX_QUIET set, then it will already be set in the mutex's 861 * flags. 862 */ 863void 864mtx_destroy(struct mtx *m) 865{ 866 867 LOCK_LOG_DESTROY(&m->mtx_object, 0); 868 869 if (!mtx_owned(m)) 870 MPASS(mtx_unowned(m)); 871 else { 872 MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0); 873 874 /* Tell witness this isn't locked to make it happy. */ 875 WITNESS_UNLOCK(&m->mtx_object, LOP_EXCLUSIVE, __FILE__, 876 __LINE__); 877 } 878 879 WITNESS_DESTROY(&m->mtx_object); 880} 881 882/* 883 * Intialize the mutex code and system mutexes. This is called from the MD 884 * startup code prior to mi_startup(). The per-CPU data space needs to be 885 * setup before this is called. 886 */ 887void 888mutex_init(void) 889{ 890 891 /* Setup thread0 so that mutexes work. */ 892 LIST_INIT(&thread0.td_contested); 893 894 /* Setup turnstiles so that sleep mutexes work. */ 895 init_turnstiles(); 896 897 /* 898 * Initialize mutexes. 899 */ 900 mtx_init(&Giant, "Giant", NULL, MTX_DEF | MTX_RECURSE); 901 mtx_init(&sched_lock, "sched lock", NULL, MTX_SPIN | MTX_RECURSE); 902 mtx_init(&proc0.p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK); 903 mtx_lock(&Giant); 904} 905