kern_mutex.c revision 161336
1238106Sdes/*- 2238106Sdes * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved. 3238106Sdes * 4238106Sdes * Redistribution and use in source and binary forms, with or without 5238106Sdes * modification, are permitted provided that the following conditions 6238106Sdes * are met: 7238106Sdes * 1. Redistributions of source code must retain the above copyright 8238106Sdes * notice, this list of conditions and the following disclaimer. 9238106Sdes * 2. Redistributions in binary form must reproduce the above copyright 10238106Sdes * notice, this list of conditions and the following disclaimer in the 11238106Sdes * documentation and/or other materials provided with the distribution. 12238106Sdes * 3. Berkeley Software Design Inc's name may not be used to endorse or 13238106Sdes * promote products derived from this software without specific prior 14238106Sdes * written permission. 15238106Sdes * 16238106Sdes * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND 17238106Sdes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18238106Sdes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19238106Sdes * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE 20238106Sdes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21238106Sdes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22238106Sdes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23238106Sdes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24269257Sdes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25269257Sdes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26269257Sdes * SUCH DAMAGE. 27269257Sdes * 28269257Sdes * from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $ 29269257Sdes * and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $ 30269257Sdes */ 31269257Sdes 32269257Sdes/* 33269257Sdes * Machine independent bits of mutex implementation. 34238106Sdes */ 35238106Sdes 36238106Sdes#include <sys/cdefs.h> 37238106Sdes__FBSDID("$FreeBSD: head/sys/kern/kern_mutex.c 161336 2006-08-15 18:26:12Z jhb $"); 38238106Sdes 39238106Sdes#include "opt_adaptive_mutexes.h" 40238106Sdes#include "opt_ddb.h" 41238106Sdes#include "opt_mprof.h" 42238106Sdes#include "opt_mutex_wake_all.h" 43238106Sdes#include "opt_sched.h" 44238106Sdes 45238106Sdes#include <sys/param.h> 46238106Sdes#include <sys/systm.h> 47238106Sdes#include <sys/bus.h> 48238106Sdes#include <sys/conf.h> 49238106Sdes#include <sys/kdb.h> 50291767Sdes#include <sys/kernel.h> 51291767Sdes#include <sys/ktr.h> 52291767Sdes#include <sys/lock.h> 53238106Sdes#include <sys/malloc.h> 54238106Sdes#include <sys/mutex.h> 55238106Sdes#include <sys/proc.h> 56238106Sdes#include <sys/resourcevar.h> 57238106Sdes#include <sys/sched.h> 58238106Sdes#include <sys/sbuf.h> 59238106Sdes#include <sys/sysctl.h> 60238106Sdes#include <sys/turnstile.h> 61238106Sdes#include <sys/vmmeter.h> 62238106Sdes 63238106Sdes#include <machine/atomic.h> 64238106Sdes#include <machine/bus.h> 65238106Sdes#include <machine/cpu.h> 66238106Sdes 67238106Sdes#include <ddb/ddb.h> 68238106Sdes 69238106Sdes#include <fs/devfs/devfs_int.h> 70238106Sdes 71238106Sdes#include <vm/vm.h> 72238106Sdes#include <vm/vm_extern.h> 73238106Sdes 74238106Sdes/* 75238106Sdes * Force MUTEX_WAKE_ALL for now. 76238106Sdes * single thread wakeup needs fixes to avoid race conditions with 77238106Sdes * priority inheritance. 78238106Sdes */ 79238106Sdes#ifndef MUTEX_WAKE_ALL 80238106Sdes#define MUTEX_WAKE_ALL 81238106Sdes#endif 82238106Sdes 83238106Sdes/* 84238106Sdes * Internal utility macros. 85238106Sdes */ 86238106Sdes#define mtx_unowned(m) ((m)->mtx_lock == MTX_UNOWNED) 87238106Sdes 88238106Sdes#define mtx_owner(m) ((struct thread *)((m)->mtx_lock & ~MTX_FLAGMASK)) 89238106Sdes 90238106Sdes#ifdef DDB 91238106Sdesstatic void db_show_mtx(struct lock_object *lock); 92238106Sdes#endif 93238106Sdes 94238106Sdes/* 95238106Sdes * Lock classes for sleep and spin mutexes. 96238106Sdes */ 97238106Sdesstruct lock_class lock_class_mtx_sleep = { 98238106Sdes "sleep mutex", 99238106Sdes LC_SLEEPLOCK | LC_RECURSABLE, 100238106Sdes#ifdef DDB 101238106Sdes db_show_mtx 102238106Sdes#endif 103238106Sdes}; 104238106Sdesstruct lock_class lock_class_mtx_spin = { 105238106Sdes "spin mutex", 106238106Sdes LC_SPINLOCK | LC_RECURSABLE, 107238106Sdes#ifdef DDB 108238106Sdes db_show_mtx 109238106Sdes#endif 110238106Sdes}; 111238106Sdes 112238106Sdes/* 113238106Sdes * System-wide mutexes 114238106Sdes */ 115238106Sdesstruct mtx sched_lock; 116238106Sdesstruct mtx Giant; 117238106Sdes 118238106Sdes#ifdef MUTEX_PROFILING 119238106SdesSYSCTL_NODE(_debug, OID_AUTO, mutex, CTLFLAG_RD, NULL, "mutex debugging"); 120238106SdesSYSCTL_NODE(_debug_mutex, OID_AUTO, prof, CTLFLAG_RD, NULL, "mutex profiling"); 121238106Sdesstatic int mutex_prof_enable = 0; 122238106SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, enable, CTLFLAG_RW, 123238106Sdes &mutex_prof_enable, 0, "Enable tracing of mutex holdtime"); 124238106Sdes 125238106Sdesstruct mutex_prof { 126238106Sdes const char *name; 127238106Sdes const char *file; 128238106Sdes int line; 129238106Sdes uintmax_t cnt_max; 130238106Sdes uintmax_t cnt_tot; 131238106Sdes uintmax_t cnt_cur; 132238106Sdes uintmax_t cnt_contest_holding; 133238106Sdes uintmax_t cnt_contest_locking; 134238106Sdes struct mutex_prof *next; 135238106Sdes}; 136238106Sdes 137238106Sdes/* 138238106Sdes * mprof_buf is a static pool of profiling records to avoid possible 139238106Sdes * reentrance of the memory allocation functions. 140238106Sdes * 141238106Sdes * Note: NUM_MPROF_BUFFERS must be smaller than MPROF_HASH_SIZE. 142238106Sdes */ 143238106Sdes#ifdef MPROF_BUFFERS 144238106Sdes#define NUM_MPROF_BUFFERS MPROF_BUFFERS 145238106Sdes#else 146238106Sdes#define NUM_MPROF_BUFFERS 1000 147238106Sdes#endif 148238106Sdesstatic struct mutex_prof mprof_buf[NUM_MPROF_BUFFERS]; 149238106Sdesstatic int first_free_mprof_buf; 150238106Sdes#ifndef MPROF_HASH_SIZE 151238106Sdes#define MPROF_HASH_SIZE 1009 152238106Sdes#endif 153238106Sdes#if NUM_MPROF_BUFFERS >= MPROF_HASH_SIZE 154238106Sdes#error MPROF_BUFFERS must be larger than MPROF_HASH_SIZE 155238106Sdes#endif 156238106Sdesstatic struct mutex_prof *mprof_hash[MPROF_HASH_SIZE]; 157238106Sdes/* SWAG: sbuf size = avg stat. line size * number of locks */ 158238106Sdes#define MPROF_SBUF_SIZE 256 * 400 159238106Sdes 160238106Sdesstatic int mutex_prof_acquisitions; 161238106SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, acquisitions, CTLFLAG_RD, 162238106Sdes &mutex_prof_acquisitions, 0, "Number of mutex acquistions recorded"); 163238106Sdesstatic int mutex_prof_records; 164238106SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, records, CTLFLAG_RD, 165238106Sdes &mutex_prof_records, 0, "Number of profiling records"); 166238106Sdesstatic int mutex_prof_maxrecords = NUM_MPROF_BUFFERS; 167238106SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, maxrecords, CTLFLAG_RD, 168238106Sdes &mutex_prof_maxrecords, 0, "Maximum number of profiling records"); 169238106Sdesstatic int mutex_prof_rejected; 170238106SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, rejected, CTLFLAG_RD, 171238106Sdes &mutex_prof_rejected, 0, "Number of rejected profiling records"); 172238106Sdesstatic int mutex_prof_hashsize = MPROF_HASH_SIZE; 173238106SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, hashsize, CTLFLAG_RD, 174238106Sdes &mutex_prof_hashsize, 0, "Hash size"); 175238106Sdesstatic int mutex_prof_collisions = 0; 176238106SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, collisions, CTLFLAG_RD, 177238106Sdes &mutex_prof_collisions, 0, "Number of hash collisions"); 178238106Sdes 179238106Sdes/* 180238106Sdes * mprof_mtx protects the profiling buffers and the hash. 181238106Sdes */ 182238106Sdesstatic struct mtx mprof_mtx; 183238106SdesMTX_SYSINIT(mprof, &mprof_mtx, "mutex profiling lock", MTX_SPIN | MTX_QUIET); 184238106Sdes 185238106Sdesstatic u_int64_t 186238106Sdesnanoseconds(void) 187238106Sdes{ 188269257Sdes struct timespec tv; 189238106Sdes 190238106Sdes nanotime(&tv); 191238106Sdes return (tv.tv_sec * (u_int64_t)1000000000 + tv.tv_nsec); 192238106Sdes} 193238106Sdes 194238106Sdesstatic int 195238106Sdesdump_mutex_prof_stats(SYSCTL_HANDLER_ARGS) 196238106Sdes{ 197238106Sdes struct sbuf *sb; 198238106Sdes int error, i; 199238106Sdes static int multiplier = 1; 200238106Sdes 201238106Sdes if (first_free_mprof_buf == 0) 202238106Sdes return (SYSCTL_OUT(req, "No locking recorded", 203238106Sdes sizeof("No locking recorded"))); 204238106Sdes 205238106Sdesretry_sbufops: 206238106Sdes sb = sbuf_new(NULL, NULL, MPROF_SBUF_SIZE * multiplier, SBUF_FIXEDLEN); 207238106Sdes sbuf_printf(sb, "\n%6s %12s %11s %5s %12s %12s %s\n", 208238106Sdes "max", "total", "count", "avg", "cnt_hold", "cnt_lock", "name"); 209238106Sdes /* 210269257Sdes * XXX this spinlock seems to be by far the largest perpetrator 211238106Sdes * of spinlock latency (1.6 msec on an Athlon1600 was recorded 212238106Sdes * even before I pessimized it further by moving the average 213238106Sdes * computation here). 214238106Sdes */ 215238106Sdes mtx_lock_spin(&mprof_mtx); 216238106Sdes for (i = 0; i < first_free_mprof_buf; ++i) { 217238106Sdes sbuf_printf(sb, "%6ju %12ju %11ju %5ju %12ju %12ju %s:%d (%s)\n", 218238106Sdes mprof_buf[i].cnt_max / 1000, 219238106Sdes mprof_buf[i].cnt_tot / 1000, 220269257Sdes mprof_buf[i].cnt_cur, 221238106Sdes mprof_buf[i].cnt_cur == 0 ? (uintmax_t)0 : 222238106Sdes mprof_buf[i].cnt_tot / (mprof_buf[i].cnt_cur * 1000), 223238106Sdes mprof_buf[i].cnt_contest_holding, 224238106Sdes mprof_buf[i].cnt_contest_locking, 225238106Sdes mprof_buf[i].file, mprof_buf[i].line, mprof_buf[i].name); 226238106Sdes if (sbuf_overflowed(sb)) { 227238106Sdes mtx_unlock_spin(&mprof_mtx); 228238106Sdes sbuf_delete(sb); 229238106Sdes multiplier++; 230238106Sdes goto retry_sbufops; 231238106Sdes } 232238106Sdes } 233238106Sdes mtx_unlock_spin(&mprof_mtx); 234238106Sdes sbuf_finish(sb); 235238106Sdes error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 236238106Sdes sbuf_delete(sb); 237238106Sdes return (error); 238238106Sdes} 239238106SdesSYSCTL_PROC(_debug_mutex_prof, OID_AUTO, stats, CTLTYPE_STRING | CTLFLAG_RD, 240238106Sdes NULL, 0, dump_mutex_prof_stats, "A", "Mutex profiling statistics"); 241238106Sdes 242238106Sdesstatic int 243238106Sdesreset_mutex_prof_stats(SYSCTL_HANDLER_ARGS) 244238106Sdes{ 245238106Sdes int error, v; 246238106Sdes 247238106Sdes if (first_free_mprof_buf == 0) 248238106Sdes return (0); 249238106Sdes 250238106Sdes v = 0; 251238106Sdes error = sysctl_handle_int(oidp, &v, 0, req); 252238106Sdes if (error) 253238106Sdes return (error); 254238106Sdes if (req->newptr == NULL) 255238106Sdes return (error); 256238106Sdes if (v == 0) 257238106Sdes return (0); 258238106Sdes 259238106Sdes mtx_lock_spin(&mprof_mtx); 260238106Sdes bzero(mprof_buf, sizeof(*mprof_buf) * first_free_mprof_buf); 261238106Sdes bzero(mprof_hash, sizeof(struct mtx *) * MPROF_HASH_SIZE); 262238106Sdes first_free_mprof_buf = 0; 263238106Sdes mtx_unlock_spin(&mprof_mtx); 264238106Sdes return (0); 265238106Sdes} 266238106SdesSYSCTL_PROC(_debug_mutex_prof, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_RW, 267238106Sdes NULL, 0, reset_mutex_prof_stats, "I", "Reset mutex profiling statistics"); 268238106Sdes#endif 269238106Sdes 270238106Sdes/* 271269257Sdes * Function versions of the inlined __mtx_* macros. These are used by 272269257Sdes * modules and can also be called from assembly language if needed. 273269257Sdes */ 274269257Sdesvoid 275269257Sdes_mtx_lock_flags(struct mtx *m, int opts, const char *file, int line) 276269257Sdes{ 277269257Sdes 278269257Sdes MPASS(curthread != NULL); 279269257Sdes KASSERT(m->mtx_lock != MTX_DESTROYED, 280269257Sdes ("mtx_lock() of destroyed mutex @ %s:%d", file, line)); 281269257Sdes KASSERT(LOCK_CLASS(&m->mtx_object) == &lock_class_mtx_sleep, 282269257Sdes ("mtx_lock() of spin mutex %s @ %s:%d", m->mtx_object.lo_name, 283269257Sdes file, line)); 284269257Sdes WITNESS_CHECKORDER(&m->mtx_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE, 285269257Sdes file, line); 286269257Sdes _get_sleep_lock(m, curthread, opts, file, line); 287269257Sdes LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file, 288269257Sdes line); 289269257Sdes WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line); 290269257Sdes curthread->td_locks++; 291269257Sdes#ifdef MUTEX_PROFILING 292269257Sdes /* don't reset the timer when/if recursing */ 293269257Sdes if (m->mtx_acqtime == 0) { 294269257Sdes m->mtx_filename = file; 295269257Sdes m->mtx_lineno = line; 296269257Sdes m->mtx_acqtime = mutex_prof_enable ? nanoseconds() : 0; 297269257Sdes ++mutex_prof_acquisitions; 298269257Sdes } 299269257Sdes#endif 300269257Sdes} 301269257Sdes 302269257Sdesvoid 303269257Sdes_mtx_unlock_flags(struct mtx *m, int opts, const char *file, int line) 304269257Sdes{ 305269257Sdes 306269257Sdes MPASS(curthread != NULL); 307269257Sdes KASSERT(m->mtx_lock != MTX_DESTROYED, 308269257Sdes ("mtx_unlock() of destroyed mutex @ %s:%d", file, line)); 309269257Sdes KASSERT(LOCK_CLASS(&m->mtx_object) == &lock_class_mtx_sleep, 310269257Sdes ("mtx_unlock() of spin mutex %s @ %s:%d", m->mtx_object.lo_name, 311269257Sdes file, line)); 312269257Sdes curthread->td_locks--; 313269257Sdes WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line); 314269257Sdes LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file, 315269257Sdes line); 316269257Sdes mtx_assert(m, MA_OWNED); 317269257Sdes#ifdef MUTEX_PROFILING 318238106Sdes if (m->mtx_acqtime != 0) { 319238106Sdes static const char *unknown = "(unknown)"; 320238106Sdes struct mutex_prof *mpp; 321238106Sdes u_int64_t acqtime, now; 322238106Sdes const char *p, *q; 323238106Sdes volatile u_int hash; 324238106Sdes 325238106Sdes now = nanoseconds(); 326238106Sdes acqtime = m->mtx_acqtime; 327269257Sdes m->mtx_acqtime = 0; 328238106Sdes if (now <= acqtime) 329238106Sdes goto out; 330238106Sdes for (p = m->mtx_filename; 331238106Sdes p != NULL && strncmp(p, "../", 3) == 0; p += 3) 332238106Sdes /* nothing */ ; 333238106Sdes if (p == NULL || *p == '\0') 334238106Sdes p = unknown; 335238106Sdes for (hash = m->mtx_lineno, q = p; *q != '\0'; ++q) 336238106Sdes hash = (hash * 2 + *q) % MPROF_HASH_SIZE; 337238106Sdes mtx_lock_spin(&mprof_mtx); 338238106Sdes for (mpp = mprof_hash[hash]; mpp != NULL; mpp = mpp->next) 339238106Sdes if (mpp->line == m->mtx_lineno && 340238106Sdes strcmp(mpp->file, p) == 0) 341238106Sdes break; 342238106Sdes if (mpp == NULL) { 343238106Sdes /* Just exit if we cannot get a trace buffer */ 344238106Sdes if (first_free_mprof_buf >= NUM_MPROF_BUFFERS) { 345238106Sdes ++mutex_prof_rejected; 346238106Sdes goto unlock; 347238106Sdes } 348238106Sdes mpp = &mprof_buf[first_free_mprof_buf++]; 349238106Sdes mpp->name = mtx_name(m); 350238106Sdes mpp->file = p; 351238106Sdes mpp->line = m->mtx_lineno; 352238106Sdes mpp->next = mprof_hash[hash]; 353238106Sdes if (mprof_hash[hash] != NULL) 354238106Sdes ++mutex_prof_collisions; 355238106Sdes mprof_hash[hash] = mpp; 356238106Sdes ++mutex_prof_records; 357238106Sdes } 358238106Sdes /* 359238106Sdes * Record if the mutex has been held longer now than ever 360238106Sdes * before. 361238106Sdes */ 362238106Sdes if (now - acqtime > mpp->cnt_max) 363238106Sdes mpp->cnt_max = now - acqtime; 364238106Sdes mpp->cnt_tot += now - acqtime; 365238106Sdes mpp->cnt_cur++; 366269257Sdes /* 367238106Sdes * There's a small race, really we should cmpxchg 368238106Sdes * 0 with the current value, but that would bill 369238106Sdes * the contention to the wrong lock instance if 370238106Sdes * it followed this also. 371238106Sdes */ 372238106Sdes mpp->cnt_contest_holding += m->mtx_contest_holding; 373238106Sdes m->mtx_contest_holding = 0; 374238106Sdes mpp->cnt_contest_locking += m->mtx_contest_locking; 375238106Sdes m->mtx_contest_locking = 0; 376238106Sdesunlock: 377238106Sdes mtx_unlock_spin(&mprof_mtx); 378238106Sdes } 379238106Sdesout: 380238106Sdes#endif 381238106Sdes _rel_sleep_lock(m, curthread, opts, file, line); 382238106Sdes} 383238106Sdes 384238106Sdesvoid 385238106Sdes_mtx_lock_spin_flags(struct mtx *m, int opts, const char *file, int line) 386238106Sdes{ 387238106Sdes 388238106Sdes MPASS(curthread != NULL); 389238106Sdes KASSERT(m->mtx_lock != MTX_DESTROYED, 390 ("mtx_lock_spin() of destroyed mutex @ %s:%d", file, line)); 391 KASSERT(LOCK_CLASS(&m->mtx_object) == &lock_class_mtx_spin, 392 ("mtx_lock_spin() of sleep mutex %s @ %s:%d", 393 m->mtx_object.lo_name, file, line)); 394 WITNESS_CHECKORDER(&m->mtx_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE, 395 file, line); 396 _get_spin_lock(m, curthread, opts, file, line); 397 LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file, 398 line); 399 WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line); 400} 401 402void 403_mtx_unlock_spin_flags(struct mtx *m, int opts, const char *file, int line) 404{ 405 406 MPASS(curthread != NULL); 407 KASSERT(m->mtx_lock != MTX_DESTROYED, 408 ("mtx_unlock_spin() of destroyed mutex @ %s:%d", file, line)); 409 KASSERT(LOCK_CLASS(&m->mtx_object) == &lock_class_mtx_spin, 410 ("mtx_unlock_spin() of sleep mutex %s @ %s:%d", 411 m->mtx_object.lo_name, file, line)); 412 WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line); 413 LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file, 414 line); 415 mtx_assert(m, MA_OWNED); 416 _rel_spin_lock(m); 417} 418 419/* 420 * The important part of mtx_trylock{,_flags}() 421 * Tries to acquire lock `m.' If this function is called on a mutex that 422 * is already owned, it will recursively acquire the lock. 423 */ 424int 425_mtx_trylock(struct mtx *m, int opts, const char *file, int line) 426{ 427 int rval; 428 429 MPASS(curthread != NULL); 430 KASSERT(m->mtx_lock != MTX_DESTROYED, 431 ("mtx_trylock() of destroyed mutex @ %s:%d", file, line)); 432 KASSERT(LOCK_CLASS(&m->mtx_object) == &lock_class_mtx_sleep, 433 ("mtx_trylock() of spin mutex %s @ %s:%d", m->mtx_object.lo_name, 434 file, line)); 435 436 if (mtx_owned(m) && (m->mtx_object.lo_flags & LO_RECURSABLE) != 0) { 437 m->mtx_recurse++; 438 atomic_set_ptr(&m->mtx_lock, MTX_RECURSED); 439 rval = 1; 440 } else 441 rval = _obtain_lock(m, (uintptr_t)curthread); 442 443 LOCK_LOG_TRY("LOCK", &m->mtx_object, opts, rval, file, line); 444 if (rval) { 445 WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK, 446 file, line); 447 curthread->td_locks++; 448 } 449 450 return (rval); 451} 452 453/* 454 * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock. 455 * 456 * We call this if the lock is either contested (i.e. we need to go to 457 * sleep waiting for it), or if we need to recurse on it. 458 */ 459void 460_mtx_lock_sleep(struct mtx *m, uintptr_t tid, int opts, const char *file, 461 int line) 462{ 463#if defined(SMP) && !defined(NO_ADAPTIVE_MUTEXES) 464 volatile struct thread *owner; 465#endif 466 uintptr_t v; 467#ifdef KTR 468 int cont_logged = 0; 469#endif 470#ifdef MUTEX_PROFILING 471 int contested; 472#endif 473 474 if (mtx_owned(m)) { 475 KASSERT((m->mtx_object.lo_flags & LO_RECURSABLE) != 0, 476 ("_mtx_lock_sleep: recursed on non-recursive mutex %s @ %s:%d\n", 477 m->mtx_object.lo_name, file, line)); 478 m->mtx_recurse++; 479 atomic_set_ptr(&m->mtx_lock, MTX_RECURSED); 480 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 481 CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m); 482 return; 483 } 484 485 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 486 CTR4(KTR_LOCK, 487 "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d", 488 m->mtx_object.lo_name, (void *)m->mtx_lock, file, line); 489 490#ifdef MUTEX_PROFILING 491 contested = 0; 492#endif 493 while (!_obtain_lock(m, tid)) { 494#ifdef MUTEX_PROFILING 495 contested = 1; 496 atomic_add_int(&m->mtx_contest_holding, 1); 497#endif 498 turnstile_lock(&m->mtx_object); 499 v = m->mtx_lock; 500 501 /* 502 * Check if the lock has been released while spinning for 503 * the turnstile chain lock. 504 */ 505 if (v == MTX_UNOWNED) { 506 turnstile_release(&m->mtx_object); 507 cpu_spinwait(); 508 continue; 509 } 510 511#ifdef MUTEX_WAKE_ALL 512 MPASS(v != MTX_CONTESTED); 513#else 514 /* 515 * The mutex was marked contested on release. This means that 516 * there are other threads blocked on it. Grab ownership of 517 * it and propagate its priority to the current thread if 518 * necessary. 519 */ 520 if (v == MTX_CONTESTED) { 521 m->mtx_lock = tid | MTX_CONTESTED; 522 turnstile_claim(&m->mtx_object); 523 break; 524 } 525#endif 526 527 /* 528 * If the mutex isn't already contested and a failure occurs 529 * setting the contested bit, the mutex was either released 530 * or the state of the MTX_RECURSED bit changed. 531 */ 532 if ((v & MTX_CONTESTED) == 0 && 533 !atomic_cmpset_ptr(&m->mtx_lock, v, v | MTX_CONTESTED)) { 534 turnstile_release(&m->mtx_object); 535 cpu_spinwait(); 536 continue; 537 } 538 539#if defined(SMP) && !defined(NO_ADAPTIVE_MUTEXES) 540 /* 541 * If the current owner of the lock is executing on another 542 * CPU, spin instead of blocking. 543 */ 544 owner = (struct thread *)(v & ~MTX_FLAGMASK); 545#ifdef ADAPTIVE_GIANT 546 if (TD_IS_RUNNING(owner)) { 547#else 548 if (m != &Giant && TD_IS_RUNNING(owner)) { 549#endif 550 turnstile_release(&m->mtx_object); 551 while (mtx_owner(m) == owner && TD_IS_RUNNING(owner)) { 552 cpu_spinwait(); 553 } 554 continue; 555 } 556#endif /* SMP && !NO_ADAPTIVE_MUTEXES */ 557 558 /* 559 * We definitely must sleep for this lock. 560 */ 561 mtx_assert(m, MA_NOTOWNED); 562 563#ifdef KTR 564 if (!cont_logged) { 565 CTR6(KTR_CONTENTION, 566 "contention: %p at %s:%d wants %s, taken by %s:%d", 567 (void *)tid, file, line, m->mtx_object.lo_name, 568 WITNESS_FILE(&m->mtx_object), 569 WITNESS_LINE(&m->mtx_object)); 570 cont_logged = 1; 571 } 572#endif 573 574 /* 575 * Block on the turnstile. 576 */ 577 turnstile_wait(&m->mtx_object, mtx_owner(m), 578 TS_EXCLUSIVE_QUEUE); 579 } 580 581#ifdef KTR 582 if (cont_logged) { 583 CTR4(KTR_CONTENTION, 584 "contention end: %s acquired by %p at %s:%d", 585 m->mtx_object.lo_name, (void *)tid, file, line); 586 } 587#endif 588#ifdef MUTEX_PROFILING 589 if (contested) 590 m->mtx_contest_locking++; 591 m->mtx_contest_holding = 0; 592#endif 593 return; 594} 595 596#ifdef SMP 597/* 598 * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock. 599 * 600 * This is only called if we need to actually spin for the lock. Recursion 601 * is handled inline. 602 */ 603void 604_mtx_lock_spin(struct mtx *m, uintptr_t tid, int opts, const char *file, 605 int line) 606{ 607 struct thread *td; 608 int i = 0; 609 610 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 611 CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m); 612 613 while (!_obtain_lock(m, tid)) { 614 615 /* Give interrupts a chance while we spin. */ 616 spinlock_exit(); 617 while (m->mtx_lock != MTX_UNOWNED) { 618 if (i++ < 10000000) { 619 cpu_spinwait(); 620 continue; 621 } 622 if (i < 60000000 || kdb_active || panicstr != NULL) 623 DELAY(1); 624 else { 625 td = mtx_owner(m); 626 627 /* If the mutex is unlocked, try again. */ 628 if (td == NULL) 629 continue; 630 printf( 631 "spin lock %p (%s) held by %p (tid %d) too long\n", 632 m, m->mtx_object.lo_name, td, td->td_tid); 633#ifdef WITNESS 634 witness_display_spinlock(&m->mtx_object, td); 635#endif 636 panic("spin lock held too long"); 637 } 638 cpu_spinwait(); 639 } 640 spinlock_enter(); 641 } 642 643 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 644 CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m); 645 646 return; 647} 648#endif /* SMP */ 649 650/* 651 * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock. 652 * 653 * We are only called here if the lock is recursed or contested (i.e. we 654 * need to wake up a blocked thread). 655 */ 656void 657_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line) 658{ 659 struct turnstile *ts; 660#ifndef PREEMPTION 661 struct thread *td, *td1; 662#endif 663 664 if (mtx_recursed(m)) { 665 if (--(m->mtx_recurse) == 0) 666 atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED); 667 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 668 CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m); 669 return; 670 } 671 672 turnstile_lock(&m->mtx_object); 673 ts = turnstile_lookup(&m->mtx_object); 674 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 675 CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m); 676 677#if defined(SMP) && !defined(NO_ADAPTIVE_MUTEXES) 678 if (ts == NULL) { 679 _release_lock_quick(m); 680 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 681 CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p no sleepers", m); 682 turnstile_release(&m->mtx_object); 683 return; 684 } 685#else 686 MPASS(ts != NULL); 687#endif 688#ifndef PREEMPTION 689 /* XXX */ 690 td1 = turnstile_head(ts, TS_EXCLUSIVE_QUEUE); 691#endif 692#ifdef MUTEX_WAKE_ALL 693 turnstile_broadcast(ts, TS_EXCLUSIVE_QUEUE); 694 _release_lock_quick(m); 695#else 696 if (turnstile_signal(ts, TS_EXCLUSIVE_QUEUE)) { 697 _release_lock_quick(m); 698 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 699 CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p not held", m); 700 } else { 701 m->mtx_lock = MTX_CONTESTED; 702 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 703 CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p still contested", 704 m); 705 } 706#endif 707 turnstile_unpend(ts, TS_EXCLUSIVE_LOCK); 708 709#ifndef PREEMPTION 710 /* 711 * XXX: This is just a hack until preemption is done. However, 712 * once preemption is done we need to either wrap the 713 * turnstile_signal() and release of the actual lock in an 714 * extra critical section or change the preemption code to 715 * always just set a flag and never do instant-preempts. 716 */ 717 td = curthread; 718 if (td->td_critnest > 0 || td1->td_priority >= td->td_priority) 719 return; 720 mtx_lock_spin(&sched_lock); 721 if (!TD_IS_RUNNING(td1)) { 722#ifdef notyet 723 if (td->td_ithd != NULL) { 724 struct ithd *it = td->td_ithd; 725 726 if (it->it_interrupted) { 727 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 728 CTR2(KTR_LOCK, 729 "_mtx_unlock_sleep: %p interrupted %p", 730 it, it->it_interrupted); 731 intr_thd_fixup(it); 732 } 733 } 734#endif 735 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 736 CTR2(KTR_LOCK, 737 "_mtx_unlock_sleep: %p switching out lock=%p", m, 738 (void *)m->mtx_lock); 739 740 mi_switch(SW_INVOL, NULL); 741 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 742 CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p", 743 m, (void *)m->mtx_lock); 744 } 745 mtx_unlock_spin(&sched_lock); 746#endif 747 748 return; 749} 750 751/* 752 * All the unlocking of MTX_SPIN locks is done inline. 753 * See the _rel_spin_lock() macro for the details. 754 */ 755 756/* 757 * The backing function for the INVARIANTS-enabled mtx_assert() 758 */ 759#ifdef INVARIANT_SUPPORT 760void 761_mtx_assert(struct mtx *m, int what, const char *file, int line) 762{ 763 764 if (panicstr != NULL || dumping) 765 return; 766 switch (what) { 767 case MA_OWNED: 768 case MA_OWNED | MA_RECURSED: 769 case MA_OWNED | MA_NOTRECURSED: 770 if (!mtx_owned(m)) 771 panic("mutex %s not owned at %s:%d", 772 m->mtx_object.lo_name, file, line); 773 if (mtx_recursed(m)) { 774 if ((what & MA_NOTRECURSED) != 0) 775 panic("mutex %s recursed at %s:%d", 776 m->mtx_object.lo_name, file, line); 777 } else if ((what & MA_RECURSED) != 0) { 778 panic("mutex %s unrecursed at %s:%d", 779 m->mtx_object.lo_name, file, line); 780 } 781 break; 782 case MA_NOTOWNED: 783 if (mtx_owned(m)) 784 panic("mutex %s owned at %s:%d", 785 m->mtx_object.lo_name, file, line); 786 break; 787 default: 788 panic("unknown mtx_assert at %s:%d", file, line); 789 } 790} 791#endif 792 793/* 794 * The MUTEX_DEBUG-enabled mtx_validate() 795 * 796 * Most of these checks have been moved off into the LO_INITIALIZED flag 797 * maintained by the witness code. 798 */ 799#ifdef MUTEX_DEBUG 800 801void mtx_validate(struct mtx *); 802 803void 804mtx_validate(struct mtx *m) 805{ 806 807/* 808 * XXX: When kernacc() does not require Giant we can reenable this check 809 */ 810#ifdef notyet 811 /* 812 * Can't call kernacc() from early init386(), especially when 813 * initializing Giant mutex, because some stuff in kernacc() 814 * requires Giant itself. 815 */ 816 if (!cold) 817 if (!kernacc((caddr_t)m, sizeof(m), 818 VM_PROT_READ | VM_PROT_WRITE)) 819 panic("Can't read and write to mutex %p", m); 820#endif 821} 822#endif 823 824/* 825 * General init routine used by the MTX_SYSINIT() macro. 826 */ 827void 828mtx_sysinit(void *arg) 829{ 830 struct mtx_args *margs = arg; 831 832 mtx_init(margs->ma_mtx, margs->ma_desc, NULL, margs->ma_opts); 833} 834 835/* 836 * Mutex initialization routine; initialize lock `m' of type contained in 837 * `opts' with options contained in `opts' and name `name.' The optional 838 * lock type `type' is used as a general lock category name for use with 839 * witness. 840 */ 841void 842mtx_init(struct mtx *m, const char *name, const char *type, int opts) 843{ 844 struct lock_class *class; 845 int flags; 846 847 MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE | 848 MTX_NOWITNESS | MTX_DUPOK)) == 0); 849 850#ifdef MUTEX_DEBUG 851 /* Diagnostic and error correction */ 852 mtx_validate(m); 853#endif 854 855 /* Determine lock class and lock flags. */ 856 if (opts & MTX_SPIN) 857 class = &lock_class_mtx_spin; 858 else 859 class = &lock_class_mtx_sleep; 860 flags = 0; 861 if (opts & MTX_QUIET) 862 flags |= LO_QUIET; 863 if (opts & MTX_RECURSE) 864 flags |= LO_RECURSABLE; 865 if ((opts & MTX_NOWITNESS) == 0) 866 flags |= LO_WITNESS; 867 if (opts & MTX_DUPOK) 868 flags |= LO_DUPOK; 869 870 /* Initialize mutex. */ 871 m->mtx_lock = MTX_UNOWNED; 872 m->mtx_recurse = 0; 873#ifdef MUTEX_PROFILING 874 m->mtx_acqtime = 0; 875 m->mtx_filename = NULL; 876 m->mtx_lineno = 0; 877 m->mtx_contest_holding = 0; 878 m->mtx_contest_locking = 0; 879#endif 880 881 lock_init(&m->mtx_object, class, name, type, flags); 882} 883 884/* 885 * Remove lock `m' from all_mtx queue. We don't allow MTX_QUIET to be 886 * passed in as a flag here because if the corresponding mtx_init() was 887 * called with MTX_QUIET set, then it will already be set in the mutex's 888 * flags. 889 */ 890void 891mtx_destroy(struct mtx *m) 892{ 893 894 if (!mtx_owned(m)) 895 MPASS(mtx_unowned(m)); 896 else { 897 MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0); 898 899 /* Perform the non-mtx related part of mtx_unlock_spin(). */ 900 if (LOCK_CLASS(&m->mtx_object) == &lock_class_mtx_spin) 901 spinlock_exit(); 902 else 903 curthread->td_locks--; 904 905 /* Tell witness this isn't locked to make it happy. */ 906 WITNESS_UNLOCK(&m->mtx_object, LOP_EXCLUSIVE, __FILE__, 907 __LINE__); 908 } 909 910 m->mtx_lock = MTX_DESTROYED; 911 lock_destroy(&m->mtx_object); 912} 913 914/* 915 * Intialize the mutex code and system mutexes. This is called from the MD 916 * startup code prior to mi_startup(). The per-CPU data space needs to be 917 * setup before this is called. 918 */ 919void 920mutex_init(void) 921{ 922 923 /* Setup turnstiles so that sleep mutexes work. */ 924 init_turnstiles(); 925 926 /* 927 * Initialize mutexes. 928 */ 929 mtx_init(&Giant, "Giant", NULL, MTX_DEF | MTX_RECURSE); 930 mtx_init(&sched_lock, "sched lock", NULL, MTX_SPIN | MTX_RECURSE); 931 mtx_init(&proc0.p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK); 932 mtx_init(&devmtx, "cdev", NULL, MTX_DEF); 933 mtx_lock(&Giant); 934} 935 936#ifdef DDB 937void 938db_show_mtx(struct lock_object *lock) 939{ 940 struct thread *td; 941 struct mtx *m; 942 943 m = (struct mtx *)lock; 944 945 db_printf(" flags: {"); 946 if (LOCK_CLASS(lock) == &lock_class_mtx_spin) 947 db_printf("SPIN"); 948 else 949 db_printf("DEF"); 950 if (m->mtx_object.lo_flags & LO_RECURSABLE) 951 db_printf(", RECURSE"); 952 if (m->mtx_object.lo_flags & LO_DUPOK) 953 db_printf(", DUPOK"); 954 db_printf("}\n"); 955 db_printf(" state: {"); 956 if (mtx_unowned(m)) 957 db_printf("UNOWNED"); 958 else { 959 db_printf("OWNED"); 960 if (m->mtx_lock & MTX_CONTESTED) 961 db_printf(", CONTESTED"); 962 if (m->mtx_lock & MTX_RECURSED) 963 db_printf(", RECURSED"); 964 } 965 db_printf("}\n"); 966 if (!mtx_unowned(m)) { 967 td = mtx_owner(m); 968 db_printf(" owner: %p (tid %d, pid %d, \"%s\")\n", td, 969 td->td_tid, td->td_proc->p_pid, td->td_proc->p_comm); 970 if (mtx_recursed(m)) 971 db_printf(" recursed: %d\n", m->mtx_recurse); 972 } 973} 974#endif 975