kern_mutex.c revision 104964
1/*- 2 * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 3. Berkeley Software Design Inc's name may not be used to endorse or 13 * promote products derived from this software without specific prior 14 * written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $ 29 * and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $ 30 * $FreeBSD: head/sys/kern/kern_mutex.c 104964 2002-10-12 05:32:24Z jeff $ 31 */ 32 33/* 34 * Machine independent bits of mutex implementation. 35 */ 36 37#include "opt_adaptive_mutexes.h" 38#include "opt_ddb.h" 39 40#include <sys/param.h> 41#include <sys/systm.h> 42#include <sys/bus.h> 43#include <sys/kernel.h> 44#include <sys/ktr.h> 45#include <sys/lock.h> 46#include <sys/malloc.h> 47#include <sys/mutex.h> 48#include <sys/proc.h> 49#include <sys/resourcevar.h> 50#include <sys/sched.h> 51#include <sys/sbuf.h> 52#include <sys/stdint.h> 53#include <sys/sysctl.h> 54#include <sys/vmmeter.h> 55 56#include <machine/atomic.h> 57#include <machine/bus.h> 58#include <machine/clock.h> 59#include <machine/cpu.h> 60 61#include <ddb/ddb.h> 62 63#include <vm/vm.h> 64#include <vm/vm_extern.h> 65 66/* 67 * Internal utility macros. 68 */ 69#define mtx_unowned(m) ((m)->mtx_lock == MTX_UNOWNED) 70 71#define mtx_owner(m) (mtx_unowned((m)) ? NULL \ 72 : (struct thread *)((m)->mtx_lock & MTX_FLAGMASK)) 73 74/* XXXKSE This test will change. */ 75#define thread_running(td) \ 76 ((td)->td_kse != NULL && (td)->td_kse->ke_oncpu != NOCPU) 77 78/* 79 * Lock classes for sleep and spin mutexes. 80 */ 81struct lock_class lock_class_mtx_sleep = { 82 "sleep mutex", 83 LC_SLEEPLOCK | LC_RECURSABLE 84}; 85struct lock_class lock_class_mtx_spin = { 86 "spin mutex", 87 LC_SPINLOCK | LC_RECURSABLE 88}; 89 90/* 91 * System-wide mutexes 92 */ 93struct mtx sched_lock; 94struct mtx Giant; 95 96/* 97 * Prototypes for non-exported routines. 98 */ 99static void propagate_priority(struct thread *); 100 101static void 102propagate_priority(struct thread *td) 103{ 104 int pri = td->td_priority; 105 struct mtx *m = td->td_blocked; 106 107 mtx_assert(&sched_lock, MA_OWNED); 108 for (;;) { 109 struct thread *td1; 110 111 td = mtx_owner(m); 112 113 if (td == NULL) { 114 /* 115 * This really isn't quite right. Really 116 * ought to bump priority of thread that 117 * next acquires the mutex. 118 */ 119 MPASS(m->mtx_lock == MTX_CONTESTED); 120 return; 121 } 122 123 MPASS(td->td_proc != NULL); 124 MPASS(td->td_proc->p_magic == P_MAGIC); 125 KASSERT(!TD_IS_SLEEPING(td), ("sleeping thread owns a mutex")); 126 if (td->td_priority <= pri) /* lower is higher priority */ 127 return; 128 129 130 /* 131 * If lock holder is actually running, just bump priority. 132 */ 133 if (TD_IS_RUNNING(td)) { 134 td->td_priority = pri; 135 return; 136 } 137 138#ifndef SMP 139 /* 140 * For UP, we check to see if td is curthread (this shouldn't 141 * ever happen however as it would mean we are in a deadlock.) 142 */ 143 KASSERT(td != curthread, ("Deadlock detected")); 144#endif 145 146 /* 147 * If on run queue move to new run queue, and quit. 148 * XXXKSE this gets a lot more complicated under threads 149 * but try anyhow. 150 */ 151 if (TD_ON_RUNQ(td)) { 152 MPASS(td->td_blocked == NULL); 153 sched_prio(td, pri); 154 return; 155 } 156 /* 157 * Adjust for any other cases. 158 */ 159 td->td_priority = pri; 160 161 /* 162 * If we aren't blocked on a mutex, we should be. 163 */ 164 KASSERT(TD_ON_LOCK(td), ( 165 "process %d(%s):%d holds %s but isn't blocked on a mutex\n", 166 td->td_proc->p_pid, td->td_proc->p_comm, td->td_state, 167 m->mtx_object.lo_name)); 168 169 /* 170 * Pick up the mutex that td is blocked on. 171 */ 172 m = td->td_blocked; 173 MPASS(m != NULL); 174 175 /* 176 * Check if the thread needs to be moved up on 177 * the blocked chain 178 */ 179 if (td == TAILQ_FIRST(&m->mtx_blocked)) { 180 continue; 181 } 182 183 td1 = TAILQ_PREV(td, threadqueue, td_lockq); 184 if (td1->td_priority <= pri) { 185 continue; 186 } 187 188 /* 189 * Remove thread from blocked chain and determine where 190 * it should be moved up to. Since we know that td1 has 191 * a lower priority than td, we know that at least one 192 * thread in the chain has a lower priority and that 193 * td1 will thus not be NULL after the loop. 194 */ 195 TAILQ_REMOVE(&m->mtx_blocked, td, td_lockq); 196 TAILQ_FOREACH(td1, &m->mtx_blocked, td_lockq) { 197 MPASS(td1->td_proc->p_magic == P_MAGIC); 198 if (td1->td_priority > pri) 199 break; 200 } 201 202 MPASS(td1 != NULL); 203 TAILQ_INSERT_BEFORE(td1, td, td_lockq); 204 CTR4(KTR_LOCK, 205 "propagate_priority: p %p moved before %p on [%p] %s", 206 td, td1, m, m->mtx_object.lo_name); 207 } 208} 209 210#ifdef MUTEX_PROFILING 211SYSCTL_NODE(_debug, OID_AUTO, mutex, CTLFLAG_RD, NULL, "mutex debugging"); 212SYSCTL_NODE(_debug_mutex, OID_AUTO, prof, CTLFLAG_RD, NULL, "mutex profiling"); 213static int mutex_prof_enable = 0; 214SYSCTL_INT(_debug_mutex_prof, OID_AUTO, enable, CTLFLAG_RW, 215 &mutex_prof_enable, 0, "Enable tracing of mutex holdtime"); 216 217struct mutex_prof { 218 const char *name; 219 const char *file; 220 int line; 221#define MPROF_MAX 0 222#define MPROF_TOT 1 223#define MPROF_CNT 2 224#define MPROF_AVG 3 225 uintmax_t counter[4]; 226 struct mutex_prof *next; 227}; 228 229/* 230 * mprof_buf is a static pool of profiling records to avoid possible 231 * reentrance of the memory allocation functions. 232 * 233 * Note: NUM_MPROF_BUFFERS must be smaller than MPROF_HASH_SIZE. 234 */ 235#define NUM_MPROF_BUFFERS 1000 236static struct mutex_prof mprof_buf[NUM_MPROF_BUFFERS]; 237static int first_free_mprof_buf; 238#define MPROF_HASH_SIZE 1009 239static struct mutex_prof *mprof_hash[MPROF_HASH_SIZE]; 240 241static int mutex_prof_acquisitions; 242SYSCTL_INT(_debug_mutex_prof, OID_AUTO, acquisitions, CTLFLAG_RD, 243 &mutex_prof_acquisitions, 0, "Number of mutex acquistions recorded"); 244static int mutex_prof_records; 245SYSCTL_INT(_debug_mutex_prof, OID_AUTO, records, CTLFLAG_RD, 246 &mutex_prof_records, 0, "Number of profiling records"); 247static int mutex_prof_maxrecords = NUM_MPROF_BUFFERS; 248SYSCTL_INT(_debug_mutex_prof, OID_AUTO, maxrecords, CTLFLAG_RD, 249 &mutex_prof_maxrecords, 0, "Maximum number of profiling records"); 250static int mutex_prof_rejected; 251SYSCTL_INT(_debug_mutex_prof, OID_AUTO, rejected, CTLFLAG_RD, 252 &mutex_prof_rejected, 0, "Number of rejected profiling records"); 253static int mutex_prof_hashsize = MPROF_HASH_SIZE; 254SYSCTL_INT(_debug_mutex_prof, OID_AUTO, hashsize, CTLFLAG_RD, 255 &mutex_prof_hashsize, 0, "Hash size"); 256static int mutex_prof_collisions = 0; 257SYSCTL_INT(_debug_mutex_prof, OID_AUTO, collisions, CTLFLAG_RD, 258 &mutex_prof_collisions, 0, "Number of hash collisions"); 259 260/* 261 * mprof_mtx protects the profiling buffers and the hash. 262 */ 263static struct mtx mprof_mtx; 264MTX_SYSINIT(mprof, &mprof_mtx, "mutex profiling lock", MTX_SPIN | MTX_QUIET); 265 266static u_int64_t 267nanoseconds(void) 268{ 269 struct timespec tv; 270 271 nanotime(&tv); 272 return (tv.tv_sec * (u_int64_t)1000000000 + tv.tv_nsec); 273} 274 275static int 276dump_mutex_prof_stats(SYSCTL_HANDLER_ARGS) 277{ 278 struct sbuf *sb; 279 int error, i; 280 281 if (first_free_mprof_buf == 0) 282 return SYSCTL_OUT(req, "No locking recorded", 283 sizeof("No locking recorded")); 284 285 sb = sbuf_new(NULL, NULL, 1024, SBUF_AUTOEXTEND); 286 sbuf_printf(sb, "%12s %12s %12s %12s %s\n", 287 "max", "total", "count", "average", "name"); 288 mtx_lock_spin(&mprof_mtx); 289 for (i = 0; i < first_free_mprof_buf; ++i) 290 sbuf_printf(sb, "%12ju %12ju %12ju %12ju %s:%d (%s)\n", 291 mprof_buf[i].counter[MPROF_MAX] / 1000, 292 mprof_buf[i].counter[MPROF_TOT] / 1000, 293 mprof_buf[i].counter[MPROF_CNT], 294 mprof_buf[i].counter[MPROF_AVG] / 1000, 295 mprof_buf[i].file, mprof_buf[i].line, mprof_buf[i].name); 296 mtx_unlock_spin(&mprof_mtx); 297 sbuf_finish(sb); 298 error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 299 sbuf_delete(sb); 300 return (error); 301} 302SYSCTL_PROC(_debug_mutex_prof, OID_AUTO, stats, CTLTYPE_STRING|CTLFLAG_RD, 303 NULL, 0, dump_mutex_prof_stats, "A", "Mutex profiling statistics"); 304#endif 305 306/* 307 * Function versions of the inlined __mtx_* macros. These are used by 308 * modules and can also be called from assembly language if needed. 309 */ 310void 311_mtx_lock_flags(struct mtx *m, int opts, const char *file, int line) 312{ 313 314 MPASS(curthread != NULL); 315 KASSERT(m->mtx_object.lo_class == &lock_class_mtx_sleep, 316 ("mtx_lock() of spin mutex %s @ %s:%d", m->mtx_object.lo_name, 317 file, line)); 318 _get_sleep_lock(m, curthread, opts, file, line); 319 LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file, 320 line); 321 WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line); 322#ifdef MUTEX_PROFILING 323 /* don't reset the timer when/if recursing */ 324 if (m->mtx_acqtime == 0) { 325 m->mtx_filename = file; 326 m->mtx_lineno = line; 327 m->mtx_acqtime = mutex_prof_enable ? nanoseconds() : 0; 328 ++mutex_prof_acquisitions; 329 } 330#endif 331} 332 333void 334_mtx_unlock_flags(struct mtx *m, int opts, const char *file, int line) 335{ 336 337 MPASS(curthread != NULL); 338 KASSERT(m->mtx_object.lo_class == &lock_class_mtx_sleep, 339 ("mtx_unlock() of spin mutex %s @ %s:%d", m->mtx_object.lo_name, 340 file, line)); 341 WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line); 342 LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file, 343 line); 344 mtx_assert(m, MA_OWNED); 345#ifdef MUTEX_PROFILING 346 if (m->mtx_acqtime != 0) { 347 static const char *unknown = "(unknown)"; 348 struct mutex_prof *mpp; 349 u_int64_t acqtime, now; 350 const char *p, *q; 351 volatile u_int hash; 352 353 now = nanoseconds(); 354 acqtime = m->mtx_acqtime; 355 m->mtx_acqtime = 0; 356 if (now <= acqtime) 357 goto out; 358 for (p = m->mtx_filename; strncmp(p, "../", 3) == 0; p += 3) 359 /* nothing */ ; 360 if (p == NULL || *p == '\0') 361 p = unknown; 362 for (hash = m->mtx_lineno, q = p; *q != '\0'; ++q) 363 hash = (hash * 2 + *q) % MPROF_HASH_SIZE; 364 mtx_lock_spin(&mprof_mtx); 365 for (mpp = mprof_hash[hash]; mpp != NULL; mpp = mpp->next) 366 if (mpp->line == m->mtx_lineno && 367 strcmp(mpp->file, p) == 0) 368 break; 369 if (mpp == NULL) { 370 /* Just exit if we cannot get a trace buffer */ 371 if (first_free_mprof_buf >= NUM_MPROF_BUFFERS) { 372 ++mutex_prof_rejected; 373 goto unlock; 374 } 375 mpp = &mprof_buf[first_free_mprof_buf++]; 376 mpp->name = mtx_name(m); 377 mpp->file = p; 378 mpp->line = m->mtx_lineno; 379 mpp->next = mprof_hash[hash]; 380 if (mprof_hash[hash] != NULL) 381 ++mutex_prof_collisions; 382 mprof_hash[hash] = mpp; 383 ++mutex_prof_records; 384 } 385 /* 386 * Record if the mutex has been held longer now than ever 387 * before 388 */ 389 if ((now - acqtime) > mpp->counter[MPROF_MAX]) 390 mpp->counter[MPROF_MAX] = now - acqtime; 391 mpp->counter[MPROF_TOT] += now - acqtime; 392 mpp->counter[MPROF_CNT] += 1; 393 mpp->counter[MPROF_AVG] = 394 mpp->counter[MPROF_TOT] / mpp->counter[MPROF_CNT]; 395unlock: 396 mtx_unlock_spin(&mprof_mtx); 397 } 398out: 399#endif 400 _rel_sleep_lock(m, curthread, opts, file, line); 401} 402 403void 404_mtx_lock_spin_flags(struct mtx *m, int opts, const char *file, int line) 405{ 406 407 MPASS(curthread != NULL); 408 KASSERT(m->mtx_object.lo_class == &lock_class_mtx_spin, 409 ("mtx_lock_spin() of sleep mutex %s @ %s:%d", 410 m->mtx_object.lo_name, file, line)); 411#if defined(SMP) || LOCK_DEBUG > 0 || 1 412 _get_spin_lock(m, curthread, opts, file, line); 413#else 414 critical_enter(); 415#endif 416 LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file, 417 line); 418 WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line); 419} 420 421void 422_mtx_unlock_spin_flags(struct mtx *m, int opts, const char *file, int line) 423{ 424 425 MPASS(curthread != NULL); 426 KASSERT(m->mtx_object.lo_class == &lock_class_mtx_spin, 427 ("mtx_unlock_spin() of sleep mutex %s @ %s:%d", 428 m->mtx_object.lo_name, file, line)); 429 WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line); 430 LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file, 431 line); 432 mtx_assert(m, MA_OWNED); 433#if defined(SMP) || LOCK_DEBUG > 0 || 1 434 _rel_spin_lock(m); 435#else 436 critical_exit(); 437#endif 438} 439 440/* 441 * The important part of mtx_trylock{,_flags}() 442 * Tries to acquire lock `m.' We do NOT handle recursion here; we assume that 443 * if we're called, it's because we know we don't already own this lock. 444 */ 445int 446_mtx_trylock(struct mtx *m, int opts, const char *file, int line) 447{ 448 int rval; 449 450 MPASS(curthread != NULL); 451 452 rval = _obtain_lock(m, curthread); 453 454 LOCK_LOG_TRY("LOCK", &m->mtx_object, opts, rval, file, line); 455 if (rval) { 456 /* 457 * We do not handle recursion in _mtx_trylock; see the 458 * note at the top of the routine. 459 */ 460 KASSERT(!mtx_recursed(m), 461 ("mtx_trylock() called on a recursed mutex")); 462 WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK, 463 file, line); 464 } 465 466 return (rval); 467} 468 469/* 470 * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock. 471 * 472 * We call this if the lock is either contested (i.e. we need to go to 473 * sleep waiting for it), or if we need to recurse on it. 474 */ 475void 476_mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line) 477{ 478 struct thread *td = curthread; 479#if defined(SMP) && defined(ADAPTIVE_MUTEXES) 480 struct thread *owner; 481#endif 482#ifdef KTR 483 int cont_logged = 0; 484#endif 485 486 if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)td) { 487 m->mtx_recurse++; 488 atomic_set_ptr(&m->mtx_lock, MTX_RECURSED); 489 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 490 CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m); 491 return; 492 } 493 494 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 495 CTR4(KTR_LOCK, 496 "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d", 497 m->mtx_object.lo_name, (void *)m->mtx_lock, file, line); 498 499 while (!_obtain_lock(m, td)) { 500 uintptr_t v; 501 struct thread *td1; 502 503 mtx_lock_spin(&sched_lock); 504 /* 505 * Check if the lock has been released while spinning for 506 * the sched_lock. 507 */ 508 if ((v = m->mtx_lock) == MTX_UNOWNED) { 509 mtx_unlock_spin(&sched_lock); 510#ifdef __i386__ 511 ia32_pause(); 512#endif 513 continue; 514 } 515 516 /* 517 * The mutex was marked contested on release. This means that 518 * there are threads blocked on it. 519 */ 520 if (v == MTX_CONTESTED) { 521 td1 = TAILQ_FIRST(&m->mtx_blocked); 522 MPASS(td1 != NULL); 523 m->mtx_lock = (uintptr_t)td | MTX_CONTESTED; 524 525 if (td1->td_priority < td->td_priority) 526 td->td_priority = td1->td_priority; 527 mtx_unlock_spin(&sched_lock); 528 return; 529 } 530 531 /* 532 * If the mutex isn't already contested and a failure occurs 533 * setting the contested bit, the mutex was either released 534 * or the state of the MTX_RECURSED bit changed. 535 */ 536 if ((v & MTX_CONTESTED) == 0 && 537 !atomic_cmpset_ptr(&m->mtx_lock, (void *)v, 538 (void *)(v | MTX_CONTESTED))) { 539 mtx_unlock_spin(&sched_lock); 540#ifdef __i386__ 541 ia32_pause(); 542#endif 543 continue; 544 } 545 546#if defined(SMP) && defined(ADAPTIVE_MUTEXES) 547 /* 548 * If the current owner of the lock is executing on another 549 * CPU, spin instead of blocking. 550 */ 551 owner = (struct thread *)(v & MTX_FLAGMASK); 552 if (m != &Giant && thread_running(owner)) { 553 mtx_unlock_spin(&sched_lock); 554 while (mtx_owner(m) == owner && thread_running(owner)) { 555#ifdef __i386__ 556 ia32_pause(); 557#endif 558 } 559 continue; 560 } 561#endif /* SMP && ADAPTIVE_MUTEXES */ 562 563 /* 564 * We definitely must sleep for this lock. 565 */ 566 mtx_assert(m, MA_NOTOWNED); 567 568#ifdef notyet 569 /* 570 * If we're borrowing an interrupted thread's VM context, we 571 * must clean up before going to sleep. 572 */ 573 if (td->td_ithd != NULL) { 574 struct ithd *it = td->td_ithd; 575 576 if (it->it_interrupted) { 577 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 578 CTR2(KTR_LOCK, 579 "_mtx_lock_sleep: %p interrupted %p", 580 it, it->it_interrupted); 581 intr_thd_fixup(it); 582 } 583 } 584#endif 585 586 /* 587 * Put us on the list of threads blocked on this mutex. 588 */ 589 if (TAILQ_EMPTY(&m->mtx_blocked)) { 590 td1 = mtx_owner(m); 591 LIST_INSERT_HEAD(&td1->td_contested, m, mtx_contested); 592 TAILQ_INSERT_TAIL(&m->mtx_blocked, td, td_lockq); 593 } else { 594 TAILQ_FOREACH(td1, &m->mtx_blocked, td_lockq) 595 if (td1->td_priority > td->td_priority) 596 break; 597 if (td1) 598 TAILQ_INSERT_BEFORE(td1, td, td_lockq); 599 else 600 TAILQ_INSERT_TAIL(&m->mtx_blocked, td, td_lockq); 601 } 602#ifdef KTR 603 if (!cont_logged) { 604 CTR6(KTR_CONTENTION, 605 "contention: %p at %s:%d wants %s, taken by %s:%d", 606 td, file, line, m->mtx_object.lo_name, 607 WITNESS_FILE(&m->mtx_object), 608 WITNESS_LINE(&m->mtx_object)); 609 cont_logged = 1; 610 } 611#endif 612 613 /* 614 * Save who we're blocked on. 615 */ 616 td->td_blocked = m; 617 td->td_lockname = m->mtx_object.lo_name; 618 TD_SET_LOCK(td); 619 propagate_priority(td); 620 621 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 622 CTR3(KTR_LOCK, 623 "_mtx_lock_sleep: p %p blocked on [%p] %s", td, m, 624 m->mtx_object.lo_name); 625 626 td->td_proc->p_stats->p_ru.ru_nvcsw++; 627 mi_switch(); 628 629 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 630 CTR3(KTR_LOCK, 631 "_mtx_lock_sleep: p %p free from blocked on [%p] %s", 632 td, m, m->mtx_object.lo_name); 633 634 mtx_unlock_spin(&sched_lock); 635 } 636 637#ifdef KTR 638 if (cont_logged) { 639 CTR4(KTR_CONTENTION, 640 "contention end: %s acquired by %p at %s:%d", 641 m->mtx_object.lo_name, td, file, line); 642 } 643#endif 644 return; 645} 646 647/* 648 * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock. 649 * 650 * This is only called if we need to actually spin for the lock. Recursion 651 * is handled inline. 652 */ 653void 654_mtx_lock_spin(struct mtx *m, int opts, const char *file, int line) 655{ 656 int i = 0; 657 658 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 659 CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m); 660 661 for (;;) { 662 if (_obtain_lock(m, curthread)) 663 break; 664 665 /* Give interrupts a chance while we spin. */ 666 critical_exit(); 667 while (m->mtx_lock != MTX_UNOWNED) { 668 if (i++ < 10000000) { 669#ifdef __i386__ 670 ia32_pause(); 671#endif 672 continue; 673 } 674 if (i < 60000000) 675 DELAY(1); 676#ifdef DDB 677 else if (!db_active) 678#else 679 else 680#endif 681 panic("spin lock %s held by %p for > 5 seconds", 682 m->mtx_object.lo_name, (void *)m->mtx_lock); 683#ifdef __i386__ 684 ia32_pause(); 685#endif 686 } 687 critical_enter(); 688 } 689 690 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 691 CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m); 692 693 return; 694} 695 696/* 697 * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock. 698 * 699 * We are only called here if the lock is recursed or contested (i.e. we 700 * need to wake up a blocked thread). 701 */ 702void 703_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line) 704{ 705 struct thread *td, *td1; 706 struct mtx *m1; 707 int pri; 708 709 td = curthread; 710 711 if (mtx_recursed(m)) { 712 if (--(m->mtx_recurse) == 0) 713 atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED); 714 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 715 CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m); 716 return; 717 } 718 719 mtx_lock_spin(&sched_lock); 720 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 721 CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m); 722 723 td1 = TAILQ_FIRST(&m->mtx_blocked); 724#if defined(SMP) && defined(ADAPTIVE_MUTEXES) 725 if (td1 == NULL) { 726 _release_lock_quick(m); 727 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 728 CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p no sleepers", m); 729 mtx_unlock_spin(&sched_lock); 730 return; 731 } 732#endif 733 MPASS(td->td_proc->p_magic == P_MAGIC); 734 MPASS(td1->td_proc->p_magic == P_MAGIC); 735 736 TAILQ_REMOVE(&m->mtx_blocked, td1, td_lockq); 737 738 if (TAILQ_EMPTY(&m->mtx_blocked)) { 739 LIST_REMOVE(m, mtx_contested); 740 _release_lock_quick(m); 741 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 742 CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p not held", m); 743 } else 744 atomic_store_rel_ptr(&m->mtx_lock, (void *)MTX_CONTESTED); 745 746 pri = PRI_MAX; 747 LIST_FOREACH(m1, &td->td_contested, mtx_contested) { 748 int cp = TAILQ_FIRST(&m1->mtx_blocked)->td_priority; 749 if (cp < pri) 750 pri = cp; 751 } 752 753 if (pri > td->td_base_pri) 754 pri = td->td_base_pri; 755 td->td_priority = pri; 756 757 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 758 CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p contested setrunqueue %p", 759 m, td1); 760 761 td1->td_blocked = NULL; 762 TD_CLR_LOCK(td1); 763 if (!TD_CAN_RUN(td1)) { 764 mtx_unlock_spin(&sched_lock); 765 return; 766 } 767 setrunqueue(td1); 768 769 if (td->td_critnest == 1 && td1->td_priority < pri) { 770#ifdef notyet 771 if (td->td_ithd != NULL) { 772 struct ithd *it = td->td_ithd; 773 774 if (it->it_interrupted) { 775 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 776 CTR2(KTR_LOCK, 777 "_mtx_unlock_sleep: %p interrupted %p", 778 it, it->it_interrupted); 779 intr_thd_fixup(it); 780 } 781 } 782#endif 783 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 784 CTR2(KTR_LOCK, 785 "_mtx_unlock_sleep: %p switching out lock=%p", m, 786 (void *)m->mtx_lock); 787 788 td->td_proc->p_stats->p_ru.ru_nivcsw++; 789 mi_switch(); 790 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 791 CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p", 792 m, (void *)m->mtx_lock); 793 } 794 795 mtx_unlock_spin(&sched_lock); 796 797 return; 798} 799 800/* 801 * All the unlocking of MTX_SPIN locks is done inline. 802 * See the _rel_spin_lock() macro for the details. 803 */ 804 805/* 806 * The backing function for the INVARIANTS-enabled mtx_assert() 807 */ 808#ifdef INVARIANT_SUPPORT 809void 810_mtx_assert(struct mtx *m, int what, const char *file, int line) 811{ 812 813 if (panicstr != NULL) 814 return; 815 switch (what) { 816 case MA_OWNED: 817 case MA_OWNED | MA_RECURSED: 818 case MA_OWNED | MA_NOTRECURSED: 819 if (!mtx_owned(m)) 820 panic("mutex %s not owned at %s:%d", 821 m->mtx_object.lo_name, file, line); 822 if (mtx_recursed(m)) { 823 if ((what & MA_NOTRECURSED) != 0) 824 panic("mutex %s recursed at %s:%d", 825 m->mtx_object.lo_name, file, line); 826 } else if ((what & MA_RECURSED) != 0) { 827 panic("mutex %s unrecursed at %s:%d", 828 m->mtx_object.lo_name, file, line); 829 } 830 break; 831 case MA_NOTOWNED: 832 if (mtx_owned(m)) 833 panic("mutex %s owned at %s:%d", 834 m->mtx_object.lo_name, file, line); 835 break; 836 default: 837 panic("unknown mtx_assert at %s:%d", file, line); 838 } 839} 840#endif 841 842/* 843 * The MUTEX_DEBUG-enabled mtx_validate() 844 * 845 * Most of these checks have been moved off into the LO_INITIALIZED flag 846 * maintained by the witness code. 847 */ 848#ifdef MUTEX_DEBUG 849 850void mtx_validate(struct mtx *); 851 852void 853mtx_validate(struct mtx *m) 854{ 855 856/* 857 * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly 858 * we can re-enable the kernacc() checks. 859 */ 860#ifndef __alpha__ 861 /* 862 * Can't call kernacc() from early init386(), especially when 863 * initializing Giant mutex, because some stuff in kernacc() 864 * requires Giant itself. 865 */ 866 if (!cold) 867 if (!kernacc((caddr_t)m, sizeof(m), 868 VM_PROT_READ | VM_PROT_WRITE)) 869 panic("Can't read and write to mutex %p", m); 870#endif 871} 872#endif 873 874/* 875 * General init routine used by the MTX_SYSINIT() macro. 876 */ 877void 878mtx_sysinit(void *arg) 879{ 880 struct mtx_args *margs = arg; 881 882 mtx_init(margs->ma_mtx, margs->ma_desc, NULL, margs->ma_opts); 883} 884 885/* 886 * Mutex initialization routine; initialize lock `m' of type contained in 887 * `opts' with options contained in `opts' and name `name.' The optional 888 * lock type `type' is used as a general lock category name for use with 889 * witness. 890 */ 891void 892mtx_init(struct mtx *m, const char *name, const char *type, int opts) 893{ 894 struct lock_object *lock; 895 896 MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE | 897 MTX_SLEEPABLE | MTX_NOWITNESS | MTX_DUPOK)) == 0); 898 899#ifdef MUTEX_DEBUG 900 /* Diagnostic and error correction */ 901 mtx_validate(m); 902#endif 903 904 lock = &m->mtx_object; 905 KASSERT((lock->lo_flags & LO_INITIALIZED) == 0, 906 ("mutex %s %p already initialized", name, m)); 907 bzero(m, sizeof(*m)); 908 if (opts & MTX_SPIN) 909 lock->lo_class = &lock_class_mtx_spin; 910 else 911 lock->lo_class = &lock_class_mtx_sleep; 912 lock->lo_name = name; 913 lock->lo_type = type != NULL ? type : name; 914 if (opts & MTX_QUIET) 915 lock->lo_flags = LO_QUIET; 916 if (opts & MTX_RECURSE) 917 lock->lo_flags |= LO_RECURSABLE; 918 if (opts & MTX_SLEEPABLE) 919 lock->lo_flags |= LO_SLEEPABLE; 920 if ((opts & MTX_NOWITNESS) == 0) 921 lock->lo_flags |= LO_WITNESS; 922 if (opts & MTX_DUPOK) 923 lock->lo_flags |= LO_DUPOK; 924 925 m->mtx_lock = MTX_UNOWNED; 926 TAILQ_INIT(&m->mtx_blocked); 927 928 LOCK_LOG_INIT(lock, opts); 929 930 WITNESS_INIT(lock); 931} 932 933/* 934 * Remove lock `m' from all_mtx queue. We don't allow MTX_QUIET to be 935 * passed in as a flag here because if the corresponding mtx_init() was 936 * called with MTX_QUIET set, then it will already be set in the mutex's 937 * flags. 938 */ 939void 940mtx_destroy(struct mtx *m) 941{ 942 943 LOCK_LOG_DESTROY(&m->mtx_object, 0); 944 945 if (!mtx_owned(m)) 946 MPASS(mtx_unowned(m)); 947 else { 948 MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0); 949 950 /* Tell witness this isn't locked to make it happy. */ 951 WITNESS_UNLOCK(&m->mtx_object, LOP_EXCLUSIVE, __FILE__, 952 __LINE__); 953 } 954 955 WITNESS_DESTROY(&m->mtx_object); 956} 957 958/* 959 * Intialize the mutex code and system mutexes. This is called from the MD 960 * startup code prior to mi_startup(). The per-CPU data space needs to be 961 * setup before this is called. 962 */ 963void 964mutex_init(void) 965{ 966 967 /* Setup thread0 so that mutexes work. */ 968 LIST_INIT(&thread0.td_contested); 969 970 /* 971 * Initialize mutexes. 972 */ 973 mtx_init(&Giant, "Giant", NULL, MTX_DEF | MTX_RECURSE); 974 mtx_init(&sched_lock, "sched lock", NULL, MTX_SPIN | MTX_RECURSE); 975 mtx_init(&proc0.p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK); 976 mtx_lock(&Giant); 977} 978 979/* 980 * Encapsulated Giant mutex routines. These routines provide encapsulation 981 * control for the Giant mutex, allowing sysctls to be used to turn on and 982 * off Giant around certain subsystems. The default value for the sysctls 983 * are set to what developers believe is stable and working in regards to 984 * the Giant pushdown. Developers should not turn off Giant via these 985 * sysctls unless they know what they are doing. 986 * 987 * Callers of mtx_lock_giant() are expected to pass the return value to an 988 * accompanying mtx_unlock_giant() later on. If multiple subsystems are 989 * effected by a Giant wrap, all related sysctl variables must be zero for 990 * the subsystem call to operate without Giant (as determined by the caller). 991 */ 992 993SYSCTL_NODE(_kern, OID_AUTO, giant, CTLFLAG_RD, NULL, "Giant mutex manipulation"); 994 995static int kern_giant_all = 0; 996SYSCTL_INT(_kern_giant, OID_AUTO, all, CTLFLAG_RW, &kern_giant_all, 0, ""); 997 998int kern_giant_proc = 1; /* Giant around PROC locks */ 999int kern_giant_file = 1; /* Giant around struct file & filedesc */ 1000int kern_giant_ucred = 1; /* Giant around ucred */ 1001SYSCTL_INT(_kern_giant, OID_AUTO, proc, CTLFLAG_RW, &kern_giant_proc, 0, ""); 1002SYSCTL_INT(_kern_giant, OID_AUTO, file, CTLFLAG_RW, &kern_giant_file, 0, ""); 1003SYSCTL_INT(_kern_giant, OID_AUTO, ucred, CTLFLAG_RW, &kern_giant_ucred, 0, ""); 1004 1005int 1006mtx_lock_giant(int sysctlvar) 1007{ 1008 if (sysctlvar || kern_giant_all) { 1009 mtx_lock(&Giant); 1010 return(1); 1011 } 1012 return(0); 1013} 1014 1015void 1016mtx_unlock_giant(int s) 1017{ 1018 if (s) 1019 mtx_unlock(&Giant); 1020} 1021 1022