subr_turnstile.c revision 76272
1/*- 2 * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 3. Berkeley Software Design Inc's name may not be used to endorse or 13 * promote products derived from this software without specific prior 14 * written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $ 29 * and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $ 30 * $FreeBSD: head/sys/kern/subr_turnstile.c 76272 2001-05-04 17:15:16Z jhb $ 31 */ 32 33/* 34 * Machine independent bits of mutex implementation and implementation of 35 * `witness' structure & related debugging routines. 36 */ 37 38/* 39 * Main Entry: witness 40 * Pronunciation: 'wit-n&s 41 * Function: noun 42 * Etymology: Middle English witnesse, from Old English witnes knowledge, 43 * testimony, witness, from 2wit 44 * Date: before 12th century 45 * 1 : attestation of a fact or event : TESTIMONY 46 * 2 : one that gives evidence; specifically : one who testifies in 47 * a cause or before a judicial tribunal 48 * 3 : one asked to be present at a transaction so as to be able to 49 * testify to its having taken place 50 * 4 : one who has personal knowledge of something 51 * 5 a : something serving as evidence or proof : SIGN 52 * b : public affirmation by word or example of usually 53 * religious faith or conviction <the heroic witness to divine 54 * life -- Pilot> 55 * 6 capitalized : a member of the Jehovah's Witnesses 56 */ 57 58#include "opt_ddb.h" 59 60#include <sys/param.h> 61#include <sys/bus.h> 62#include <sys/kernel.h> 63#include <sys/lock.h> 64#include <sys/malloc.h> 65#include <sys/mutex.h> 66#include <sys/proc.h> 67#include <sys/sysctl.h> 68#include <sys/systm.h> 69#include <sys/vmmeter.h> 70#include <sys/ktr.h> 71 72#include <machine/atomic.h> 73#include <machine/bus.h> 74#include <machine/clock.h> 75#include <machine/cpu.h> 76 77#include <ddb/ddb.h> 78 79#include <vm/vm.h> 80#include <vm/vm_extern.h> 81 82/* 83 * Internal utility macros. 84 */ 85#define mtx_unowned(m) ((m)->mtx_lock == MTX_UNOWNED) 86 87#define mtx_owner(m) (mtx_unowned((m)) ? NULL \ 88 : (struct proc *)((m)->mtx_lock & MTX_FLAGMASK)) 89 90#define SET_PRIO(p, pri) (p)->p_pri.pri_level = (pri) 91 92/* 93 * Lock classes for sleep and spin mutexes. 94 */ 95struct lock_class lock_class_mtx_sleep = { 96 "sleep mutex", 97 LC_SLEEPLOCK | LC_RECURSABLE 98}; 99struct lock_class lock_class_mtx_spin = { 100 "spin mutex", 101 LC_SPINLOCK | LC_RECURSABLE 102}; 103 104/* 105 * Prototypes for non-exported routines. 106 */ 107static void propagate_priority(struct proc *); 108 109static void 110propagate_priority(struct proc *p) 111{ 112 int pri = p->p_pri.pri_level; 113 struct mtx *m = p->p_blocked; 114 115 mtx_assert(&sched_lock, MA_OWNED); 116 for (;;) { 117 struct proc *p1; 118 119 p = mtx_owner(m); 120 121 if (p == NULL) { 122 /* 123 * This really isn't quite right. Really 124 * ought to bump priority of process that 125 * next acquires the mutex. 126 */ 127 MPASS(m->mtx_lock == MTX_CONTESTED); 128 return; 129 } 130 131 MPASS(p->p_magic == P_MAGIC); 132 KASSERT(p->p_stat != SSLEEP, ("sleeping process owns a mutex")); 133 if (p->p_pri.pri_level <= pri) 134 return; 135 136 /* 137 * Bump this process' priority. 138 */ 139 SET_PRIO(p, pri); 140 141 /* 142 * If lock holder is actually running, just bump priority. 143 */ 144 if (p->p_oncpu != NOCPU) { 145 MPASS(p->p_stat == SRUN || p->p_stat == SZOMB || p->p_stat == SSTOP); 146 return; 147 } 148 149#ifndef SMP 150 /* 151 * For UP, we check to see if p is curproc (this shouldn't 152 * ever happen however as it would mean we are in a deadlock.) 153 */ 154 KASSERT(p != curproc, ("Deadlock detected")); 155#endif 156 157 /* 158 * If on run queue move to new run queue, and 159 * quit. 160 */ 161 if (p->p_stat == SRUN) { 162 MPASS(p->p_blocked == NULL); 163 remrunqueue(p); 164 setrunqueue(p); 165 return; 166 } 167 168 /* 169 * If we aren't blocked on a mutex, we should be. 170 */ 171 KASSERT(p->p_stat == SMTX, ( 172 "process %d(%s):%d holds %s but isn't blocked on a mutex\n", 173 p->p_pid, p->p_comm, p->p_stat, 174 m->mtx_object.lo_name)); 175 176 /* 177 * Pick up the mutex that p is blocked on. 178 */ 179 m = p->p_blocked; 180 MPASS(m != NULL); 181 182 /* 183 * Check if the proc needs to be moved up on 184 * the blocked chain 185 */ 186 if (p == TAILQ_FIRST(&m->mtx_blocked)) { 187 continue; 188 } 189 190 p1 = TAILQ_PREV(p, procqueue, p_procq); 191 if (p1->p_pri.pri_level <= pri) { 192 continue; 193 } 194 195 /* 196 * Remove proc from blocked chain and determine where 197 * it should be moved up to. Since we know that p1 has 198 * a lower priority than p, we know that at least one 199 * process in the chain has a lower priority and that 200 * p1 will thus not be NULL after the loop. 201 */ 202 TAILQ_REMOVE(&m->mtx_blocked, p, p_procq); 203 TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) { 204 MPASS(p1->p_magic == P_MAGIC); 205 if (p1->p_pri.pri_level > pri) 206 break; 207 } 208 209 MPASS(p1 != NULL); 210 TAILQ_INSERT_BEFORE(p1, p, p_procq); 211 CTR4(KTR_LOCK, 212 "propagate_priority: p %p moved before %p on [%p] %s", 213 p, p1, m, m->mtx_object.lo_name); 214 } 215} 216 217/* 218 * Function versions of the inlined __mtx_* macros. These are used by 219 * modules and can also be called from assembly language if needed. 220 */ 221void 222_mtx_lock_flags(struct mtx *m, int opts, const char *file, int line) 223{ 224 225 __mtx_lock_flags(m, opts, file, line); 226} 227 228void 229_mtx_unlock_flags(struct mtx *m, int opts, const char *file, int line) 230{ 231 232 __mtx_unlock_flags(m, opts, file, line); 233} 234 235void 236_mtx_lock_spin_flags(struct mtx *m, int opts, const char *file, int line) 237{ 238 239 __mtx_lock_spin_flags(m, opts, file, line); 240} 241 242void 243_mtx_unlock_spin_flags(struct mtx *m, int opts, const char *file, int line) 244{ 245 246 __mtx_unlock_spin_flags(m, opts, file, line); 247} 248 249/* 250 * The important part of mtx_trylock{,_flags}() 251 * Tries to acquire lock `m.' We do NOT handle recursion here; we assume that 252 * if we're called, it's because we know we don't already own this lock. 253 */ 254int 255_mtx_trylock(struct mtx *m, int opts, const char *file, int line) 256{ 257 int rval; 258 259 MPASS(curproc != NULL); 260 261 /* 262 * _mtx_trylock does not accept MTX_NOSWITCH option. 263 */ 264 KASSERT((opts & MTX_NOSWITCH) == 0, 265 ("mtx_trylock() called with invalid option flag(s) %d", opts)); 266 267 rval = _obtain_lock(m, curproc); 268 269 LOCK_LOG_TRY("LOCK", &m->mtx_object, opts, rval, file, line); 270 if (rval) { 271 /* 272 * We do not handle recursion in _mtx_trylock; see the 273 * note at the top of the routine. 274 */ 275 KASSERT(!mtx_recursed(m), 276 ("mtx_trylock() called on a recursed mutex")); 277 WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK, 278 file, line); 279 } 280 281 return (rval); 282} 283 284/* 285 * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock. 286 * 287 * We call this if the lock is either contested (i.e. we need to go to 288 * sleep waiting for it), or if we need to recurse on it. 289 */ 290void 291_mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line) 292{ 293 struct proc *p = curproc; 294 295 if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)p) { 296 m->mtx_recurse++; 297 atomic_set_ptr(&m->mtx_lock, MTX_RECURSED); 298 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 299 CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m); 300 return; 301 } 302 303 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 304 CTR4(KTR_LOCK, 305 "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d", 306 m->mtx_object.lo_name, (void *)m->mtx_lock, file, line); 307 308 while (!_obtain_lock(m, p)) { 309 uintptr_t v; 310 struct proc *p1; 311 312 mtx_lock_spin(&sched_lock); 313 /* 314 * Check if the lock has been released while spinning for 315 * the sched_lock. 316 */ 317 if ((v = m->mtx_lock) == MTX_UNOWNED) { 318 mtx_unlock_spin(&sched_lock); 319 continue; 320 } 321 322 /* 323 * The mutex was marked contested on release. This means that 324 * there are processes blocked on it. 325 */ 326 if (v == MTX_CONTESTED) { 327 p1 = TAILQ_FIRST(&m->mtx_blocked); 328 MPASS(p1 != NULL); 329 m->mtx_lock = (uintptr_t)p | MTX_CONTESTED; 330 331 if (p1->p_pri.pri_level < p->p_pri.pri_level) 332 SET_PRIO(p, p1->p_pri.pri_level); 333 mtx_unlock_spin(&sched_lock); 334 return; 335 } 336 337 /* 338 * If the mutex isn't already contested and a failure occurs 339 * setting the contested bit, the mutex was either released 340 * or the state of the MTX_RECURSED bit changed. 341 */ 342 if ((v & MTX_CONTESTED) == 0 && 343 !atomic_cmpset_ptr(&m->mtx_lock, (void *)v, 344 (void *)(v | MTX_CONTESTED))) { 345 mtx_unlock_spin(&sched_lock); 346 continue; 347 } 348 349 /* 350 * We deffinately must sleep for this lock. 351 */ 352 mtx_assert(m, MA_NOTOWNED); 353 354#ifdef notyet 355 /* 356 * If we're borrowing an interrupted thread's VM context, we 357 * must clean up before going to sleep. 358 */ 359 if (p->p_ithd != NULL) { 360 struct ithd *it = p->p_ithd; 361 362 if (it->it_interrupted) { 363 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 364 CTR2(KTR_LOCK, 365 "_mtx_lock_sleep: %p interrupted %p", 366 it, it->it_interrupted); 367 intr_thd_fixup(it); 368 } 369 } 370#endif 371 372 /* 373 * Put us on the list of threads blocked on this mutex. 374 */ 375 if (TAILQ_EMPTY(&m->mtx_blocked)) { 376 p1 = (struct proc *)(m->mtx_lock & MTX_FLAGMASK); 377 LIST_INSERT_HEAD(&p1->p_contested, m, mtx_contested); 378 TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq); 379 } else { 380 TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) 381 if (p1->p_pri.pri_level > p->p_pri.pri_level) 382 break; 383 if (p1) 384 TAILQ_INSERT_BEFORE(p1, p, p_procq); 385 else 386 TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq); 387 } 388 389 /* 390 * Save who we're blocked on. 391 */ 392 p->p_blocked = m; 393 p->p_mtxname = m->mtx_object.lo_name; 394 p->p_stat = SMTX; 395 propagate_priority(p); 396 397 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 398 CTR3(KTR_LOCK, 399 "_mtx_lock_sleep: p %p blocked on [%p] %s", p, m, 400 m->mtx_object.lo_name); 401 402 mi_switch(); 403 404 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 405 CTR3(KTR_LOCK, 406 "_mtx_lock_sleep: p %p free from blocked on [%p] %s", 407 p, m, m->mtx_object.lo_name); 408 409 mtx_unlock_spin(&sched_lock); 410 } 411 412 return; 413} 414 415/* 416 * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock. 417 * 418 * This is only called if we need to actually spin for the lock. Recursion 419 * is handled inline. 420 */ 421void 422_mtx_lock_spin(struct mtx *m, int opts, critical_t mtx_crit, const char *file, 423 int line) 424{ 425 int i = 0; 426 427 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 428 CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m); 429 430 for (;;) { 431 if (_obtain_lock(m, curproc)) 432 break; 433 434 /* Give interrupts a chance while we spin. */ 435 critical_exit(mtx_crit); 436 while (m->mtx_lock != MTX_UNOWNED) { 437 if (i++ < 1000000) 438 continue; 439 if (i++ < 6000000) 440 DELAY(1); 441#ifdef DDB 442 else if (!db_active) 443#else 444 else 445#endif 446 panic("spin lock %s held by %p for > 5 seconds", 447 m->mtx_object.lo_name, (void *)m->mtx_lock); 448 } 449 mtx_crit = critical_enter(); 450 } 451 452 m->mtx_savecrit = mtx_crit; 453 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 454 CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m); 455 456 return; 457} 458 459/* 460 * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock. 461 * 462 * We are only called here if the lock is recursed or contested (i.e. we 463 * need to wake up a blocked thread). 464 */ 465void 466_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line) 467{ 468 struct proc *p, *p1; 469 struct mtx *m1; 470 int pri; 471 472 p = curproc; 473 474 if (mtx_recursed(m)) { 475 if (--(m->mtx_recurse) == 0) 476 atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED); 477 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 478 CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m); 479 return; 480 } 481 482 mtx_lock_spin(&sched_lock); 483 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 484 CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m); 485 486 p1 = TAILQ_FIRST(&m->mtx_blocked); 487 MPASS(p->p_magic == P_MAGIC); 488 MPASS(p1->p_magic == P_MAGIC); 489 490 TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq); 491 492 if (TAILQ_EMPTY(&m->mtx_blocked)) { 493 LIST_REMOVE(m, mtx_contested); 494 _release_lock_quick(m); 495 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 496 CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p not held", m); 497 } else 498 atomic_store_rel_ptr(&m->mtx_lock, (void *)MTX_CONTESTED); 499 500 pri = PRI_MAX; 501 LIST_FOREACH(m1, &p->p_contested, mtx_contested) { 502 int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_pri.pri_level; 503 if (cp < pri) 504 pri = cp; 505 } 506 507 if (pri > p->p_pri.pri_native) 508 pri = p->p_pri.pri_native; 509 SET_PRIO(p, pri); 510 511 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 512 CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p contested setrunqueue %p", 513 m, p1); 514 515 p1->p_blocked = NULL; 516 p1->p_stat = SRUN; 517 setrunqueue(p1); 518 519 if ((opts & MTX_NOSWITCH) == 0 && p1->p_pri.pri_level < pri) { 520#ifdef notyet 521 if (p->p_ithd != NULL) { 522 struct ithd *it = p->p_ithd; 523 524 if (it->it_interrupted) { 525 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 526 CTR2(KTR_LOCK, 527 "_mtx_unlock_sleep: %p interrupted %p", 528 it, it->it_interrupted); 529 intr_thd_fixup(it); 530 } 531 } 532#endif 533 setrunqueue(p); 534 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 535 CTR2(KTR_LOCK, 536 "_mtx_unlock_sleep: %p switching out lock=%p", m, 537 (void *)m->mtx_lock); 538 539 mi_switch(); 540 if (LOCK_LOG_TEST(&m->mtx_object, opts)) 541 CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p", 542 m, (void *)m->mtx_lock); 543 } 544 545 mtx_unlock_spin(&sched_lock); 546 547 return; 548} 549 550/* 551 * All the unlocking of MTX_SPIN locks is done inline. 552 * See the _rel_spin_lock() macro for the details. 553 */ 554 555/* 556 * The backing function for the INVARIANTS-enabled mtx_assert() 557 */ 558#ifdef INVARIANT_SUPPORT 559void 560_mtx_assert(struct mtx *m, int what, const char *file, int line) 561{ 562 switch (what) { 563 case MA_OWNED: 564 case MA_OWNED | MA_RECURSED: 565 case MA_OWNED | MA_NOTRECURSED: 566 if (!mtx_owned(m)) 567 panic("mutex %s not owned at %s:%d", 568 m->mtx_object.lo_name, file, line); 569 if (mtx_recursed(m)) { 570 if ((what & MA_NOTRECURSED) != 0) 571 panic("mutex %s recursed at %s:%d", 572 m->mtx_object.lo_name, file, line); 573 } else if ((what & MA_RECURSED) != 0) { 574 panic("mutex %s unrecursed at %s:%d", 575 m->mtx_object.lo_name, file, line); 576 } 577 break; 578 case MA_NOTOWNED: 579 if (mtx_owned(m)) 580 panic("mutex %s owned at %s:%d", 581 m->mtx_object.lo_name, file, line); 582 break; 583 default: 584 panic("unknown mtx_assert at %s:%d", file, line); 585 } 586} 587#endif 588 589/* 590 * The MUTEX_DEBUG-enabled mtx_validate() 591 * 592 * Most of these checks have been moved off into the LO_INITIALIZED flag 593 * maintained by the witness code. 594 */ 595#ifdef MUTEX_DEBUG 596 597void mtx_validate __P((struct mtx *)); 598 599void 600mtx_validate(struct mtx *m) 601{ 602 603/* 604 * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly 605 * we can re-enable the kernacc() checks. 606 */ 607#ifndef __alpha__ 608 if (!kernacc((caddr_t)m, sizeof(m), VM_PROT_READ | VM_PROT_WRITE)) 609 panic("Can't read and write to mutex %p", m); 610#endif 611} 612#endif 613 614/* 615 * Mutex initialization routine; initialize lock `m' of type contained in 616 * `opts' with options contained in `opts' and description `description.' 617 */ 618void 619mtx_init(struct mtx *m, const char *description, int opts) 620{ 621 struct lock_object *lock; 622 623 MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE | 624 MTX_SLEEPABLE | MTX_NOWITNESS)) == 0); 625 626#ifdef MUTEX_DEBUG 627 /* Diagnostic and error correction */ 628 mtx_validate(m); 629#endif 630 631 bzero(m, sizeof(*m)); 632 lock = &m->mtx_object; 633 if (opts & MTX_SPIN) 634 lock->lo_class = &lock_class_mtx_spin; 635 else 636 lock->lo_class = &lock_class_mtx_sleep; 637 lock->lo_name = description; 638 if (opts & MTX_QUIET) 639 lock->lo_flags = LO_QUIET; 640 if (opts & MTX_RECURSE) 641 lock->lo_flags |= LO_RECURSABLE; 642 if (opts & MTX_SLEEPABLE) 643 lock->lo_flags |= LO_SLEEPABLE; 644 if ((opts & MTX_NOWITNESS) == 0) 645 lock->lo_flags |= LO_WITNESS; 646 647 m->mtx_lock = MTX_UNOWNED; 648 TAILQ_INIT(&m->mtx_blocked); 649 650 LOCK_LOG_INIT(lock, opts); 651 652 WITNESS_INIT(lock); 653} 654 655/* 656 * Remove lock `m' from all_mtx queue. We don't allow MTX_QUIET to be 657 * passed in as a flag here because if the corresponding mtx_init() was 658 * called with MTX_QUIET set, then it will already be set in the mutex's 659 * flags. 660 */ 661void 662mtx_destroy(struct mtx *m) 663{ 664 665 LOCK_LOG_DESTROY(&m->mtx_object, 0); 666 667 if (!mtx_owned(m)) 668 MPASS(mtx_unowned(m)); 669 else { 670 MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0); 671 672 /* Tell witness this isn't locked to make it happy. */ 673 WITNESS_UNLOCK(&m->mtx_object, LOP_EXCLUSIVE | LOP_NOSWITCH, 674 __FILE__, __LINE__); 675 } 676 677 WITNESS_DESTROY(&m->mtx_object); 678} 679