subr_turnstile.c revision 67352
1/*- 2 * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 3. Berkeley Software Design Inc's name may not be used to endorse or 13 * promote products derived from this software without specific prior 14 * written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $ 29 * and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $ 30 * $FreeBSD: head/sys/kern/subr_turnstile.c 67352 2000-10-20 07:26:37Z jhb $ 31 */ 32 33/* 34 * Main Entry: witness 35 * Pronunciation: 'wit-n&s 36 * Function: noun 37 * Etymology: Middle English witnesse, from Old English witnes knowledge, 38 * testimony, witness, from 2wit 39 * Date: before 12th century 40 * 1 : attestation of a fact or event : TESTIMONY 41 * 2 : one that gives evidence; specifically : one who testifies in 42 * a cause or before a judicial tribunal 43 * 3 : one asked to be present at a transaction so as to be able to 44 * testify to its having taken place 45 * 4 : one who has personal knowledge of something 46 * 5 a : something serving as evidence or proof : SIGN 47 * b : public affirmation by word or example of usually 48 * religious faith or conviction <the heroic witness to divine 49 * life -- Pilot> 50 * 6 capitalized : a member of the Jehovah's Witnesses 51 */ 52 53#include <sys/param.h> 54#include <sys/bus.h> 55#include <sys/kernel.h> 56#include <sys/malloc.h> 57#include <sys/proc.h> 58#include <sys/systm.h> 59#include <sys/vmmeter.h> 60#include <sys/ktr.h> 61 62#include <machine/atomic.h> 63#include <machine/bus.h> 64#include <machine/clock.h> 65#include <machine/cpu.h> 66 67#include <vm/vm.h> 68#include <vm/vm_extern.h> 69 70#define _KERN_MUTEX_C_ /* Cause non-inlined mtx_*() to be compiled. */ 71#include <sys/mutex.h> 72 73/* 74 * Machine independent bits of the mutex implementation 75 */ 76/* All mutexes in system (used for debug/panic) */ 77#ifdef MUTEX_DEBUG 78static struct mtx_debug all_mtx_debug = { NULL, {NULL, NULL}, NULL, 0, 79 "All mutexes queue head" }; 80static struct mtx all_mtx = { MTX_UNOWNED, 0, 0, &all_mtx_debug, 81 TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked), 82 { NULL, NULL }, &all_mtx, &all_mtx }; 83#else /* MUTEX_DEBUG */ 84static struct mtx all_mtx = { MTX_UNOWNED, 0, 0, "All mutexes queue head", 85 TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked), 86 { NULL, NULL }, &all_mtx, &all_mtx }; 87#endif /* MUTEX_DEBUG */ 88 89static int mtx_cur_cnt; 90static int mtx_max_cnt; 91 92void _mtx_enter_giant_def(void); 93void _mtx_exit_giant_def(void); 94static void propagate_priority(struct proc *) __unused; 95 96#define mtx_unowned(m) ((m)->mtx_lock == MTX_UNOWNED) 97#define mtx_owner(m) (mtx_unowned(m) ? NULL \ 98 : (struct proc *)((m)->mtx_lock & MTX_FLAGMASK)) 99 100#define RETIP(x) *(((uintptr_t *)(&x)) - 1) 101#define SET_PRIO(p, pri) (p)->p_priority = (pri) 102 103/* 104 * XXX Temporary, for use from assembly language 105 */ 106 107void 108_mtx_enter_giant_def(void) 109{ 110 111 mtx_enter(&Giant, MTX_DEF); 112} 113 114void 115_mtx_exit_giant_def(void) 116{ 117 118 mtx_exit(&Giant, MTX_DEF); 119} 120 121static void 122propagate_priority(struct proc *p) 123{ 124 int pri = p->p_priority; 125 struct mtx *m = p->p_blocked; 126 127 for (;;) { 128 struct proc *p1; 129 130 p = mtx_owner(m); 131 132 if (p == NULL) { 133 /* 134 * This really isn't quite right. Really 135 * ought to bump priority of process that 136 * next acquires the mutex. 137 */ 138 MPASS(m->mtx_lock == MTX_CONTESTED); 139 return; 140 } 141 MPASS(p->p_magic == P_MAGIC); 142 if (p->p_priority <= pri) 143 return; 144 /* 145 * If lock holder is actually running, just bump priority. 146 */ 147 if (TAILQ_NEXT(p, p_procq) == NULL) { 148 MPASS(p->p_stat == SRUN || p->p_stat == SZOMB); 149 SET_PRIO(p, pri); 150 return; 151 } 152 /* 153 * If on run queue move to new run queue, and 154 * quit. 155 */ 156 if (p->p_stat == SRUN) { 157 MPASS(p->p_blocked == NULL); 158 remrunqueue(p); 159 SET_PRIO(p, pri); 160 setrunqueue(p); 161 return; 162 } 163 164 /* 165 * If we aren't blocked on a mutex, give up and quit. 166 */ 167 if (p->p_stat != SMTX) { 168 printf( 169 "XXX: process %d(%s):%d holds %s but isn't blocked on a mutex\n", 170 p->p_pid, p->p_comm, p->p_stat, m->mtx_description); 171 return; 172 } 173 174 /* 175 * Pick up the mutex that p is blocked on. 176 */ 177 m = p->p_blocked; 178 MPASS(m != NULL); 179 180 printf("XXX: process %d(%s) is blocked on %s\n", p->p_pid, 181 p->p_comm, m->mtx_description); 182 /* 183 * Check if the proc needs to be moved up on 184 * the blocked chain 185 */ 186 if ((p1 = TAILQ_PREV(p, rq, p_procq)) == NULL || 187 p1->p_priority <= pri) { 188 if (p1) 189 printf( 190 "XXX: previous process %d(%s) has higher priority\n", 191 p->p_pid, p->p_comm); 192 else 193 printf("XXX: process at head of run queue\n"); 194 continue; 195 } 196 197 /* 198 * Remove proc from blocked chain 199 */ 200 TAILQ_REMOVE(&m->mtx_blocked, p, p_procq); 201 TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) { 202 MPASS(p1->p_magic == P_MAGIC); 203 if (p1->p_priority > pri) 204 break; 205 } 206 if (p1) 207 TAILQ_INSERT_BEFORE(p1, p, p_procq); 208 else 209 TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq); 210 CTR4(KTR_LOCK, 211 "propagate priority: p 0x%p moved before 0x%p on [0x%p] %s", 212 p, p1, m, m->mtx_description); 213 } 214} 215 216void 217mtx_enter_hard(struct mtx *m, int type, int saveintr) 218{ 219 struct proc *p = CURPROC; 220 struct timeval new_switchtime; 221 222 KASSERT(p != NULL, ("curproc is NULL in mutex")); 223 224 switch (type) { 225 case MTX_DEF: 226 if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)p) { 227 m->mtx_recurse++; 228 atomic_set_ptr(&m->mtx_lock, MTX_RECURSE); 229 CTR1(KTR_LOCK, "mtx_enter: 0x%p recurse", m); 230 return; 231 } 232 CTR3(KTR_LOCK, "mtx_enter: 0x%p contested (lock=%p) [0x%p]", 233 m, m->mtx_lock, RETIP(m)); 234 while (!_obtain_lock(m, p)) { 235 int v; 236 struct proc *p1; 237 238 mtx_enter(&sched_lock, MTX_SPIN | MTX_RLIKELY); 239 /* 240 * check if the lock has been released while 241 * waiting for the schedlock. 242 */ 243 if ((v = m->mtx_lock) == MTX_UNOWNED) { 244 mtx_exit(&sched_lock, MTX_SPIN); 245 continue; 246 } 247 /* 248 * The mutex was marked contested on release. This 249 * means that there are processes blocked on it. 250 */ 251 if (v == MTX_CONTESTED) { 252 p1 = TAILQ_FIRST(&m->mtx_blocked); 253 KASSERT(p1 != NULL, ("contested mutex has no contesters")); 254 KASSERT(p != NULL, ("curproc is NULL for contested mutex")); 255 m->mtx_lock = (uintptr_t)p | MTX_CONTESTED; 256 if (p1->p_priority < p->p_priority) { 257 SET_PRIO(p, p1->p_priority); 258 } 259 mtx_exit(&sched_lock, MTX_SPIN); 260 return; 261 } 262 /* 263 * If the mutex isn't already contested and 264 * a failure occurs setting the contested bit the 265 * mutex was either release or the 266 * state of the RECURSION bit changed. 267 */ 268 if ((v & MTX_CONTESTED) == 0 && 269 !atomic_cmpset_ptr(&m->mtx_lock, (void *)v, 270 (void *)(v | MTX_CONTESTED))) { 271 mtx_exit(&sched_lock, MTX_SPIN); 272 continue; 273 } 274 275 /* We definitely have to sleep for this lock */ 276 mtx_assert(m, MA_NOTOWNED); 277 278#ifdef notyet 279 /* 280 * If we're borrowing an interrupted thread's VM 281 * context must clean up before going to sleep. 282 */ 283 if (p->p_flag & (P_ITHD | P_SITHD)) { 284 ithd_t *it = (ithd_t *)p; 285 286 if (it->it_interrupted) { 287 CTR2(KTR_LOCK, 288 "mtx_enter: 0x%x interrupted 0x%x", 289 it, it->it_interrupted); 290 intr_thd_fixup(it); 291 } 292 } 293#endif 294 295 /* Put us on the list of procs blocked on this mutex */ 296 if (TAILQ_EMPTY(&m->mtx_blocked)) { 297 p1 = (struct proc *)(m->mtx_lock & 298 MTX_FLAGMASK); 299 LIST_INSERT_HEAD(&p1->p_contested, m, 300 mtx_contested); 301 TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq); 302 } else { 303 TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) 304 if (p1->p_priority > p->p_priority) 305 break; 306 if (p1) 307 TAILQ_INSERT_BEFORE(p1, p, p_procq); 308 else 309 TAILQ_INSERT_TAIL(&m->mtx_blocked, p, 310 p_procq); 311 } 312 313 p->p_blocked = m; /* Who we're blocked on */ 314 p->p_stat = SMTX; 315#if 0 316 propagate_priority(p); 317#endif 318 CTR3(KTR_LOCK, "mtx_enter: p 0x%p blocked on [0x%p] %s", 319 p, m, m->mtx_description); 320 /* 321 * Blatantly copied from mi_switch nearly verbatim. 322 * When Giant goes away and we stop dinking with it 323 * in mi_switch, we can go back to calling mi_switch 324 * directly here. 325 */ 326 327 /* 328 * Compute the amount of time during which the current 329 * process was running, and add that to its total so 330 * far. 331 */ 332 microuptime(&new_switchtime); 333 if (timevalcmp(&new_switchtime, &switchtime, <)) { 334 printf( 335 "microuptime() went backwards (%ld.%06ld -> %ld.%06ld)\n", 336 switchtime.tv_sec, switchtime.tv_usec, 337 new_switchtime.tv_sec, 338 new_switchtime.tv_usec); 339 new_switchtime = switchtime; 340 } else { 341 p->p_runtime += (new_switchtime.tv_usec - 342 switchtime.tv_usec) + 343 (new_switchtime.tv_sec - switchtime.tv_sec) * 344 (int64_t)1000000; 345 } 346 347 /* 348 * Pick a new current process and record its start time. 349 */ 350 cnt.v_swtch++; 351 switchtime = new_switchtime; 352 cpu_switch(); 353 if (switchtime.tv_sec == 0) 354 microuptime(&switchtime); 355 switchticks = ticks; 356 CTR3(KTR_LOCK, 357 "mtx_enter: p 0x%p free from blocked on [0x%p] %s", 358 p, m, m->mtx_description); 359 mtx_exit(&sched_lock, MTX_SPIN); 360 } 361 return; 362 case MTX_SPIN: 363 case MTX_SPIN | MTX_FIRST: 364 case MTX_SPIN | MTX_TOPHALF: 365 { 366 int i = 0; 367 368 if (m->mtx_lock == (uintptr_t)p) { 369 m->mtx_recurse++; 370 return; 371 } 372 CTR1(KTR_LOCK, "mtx_enter: %p spinning", m); 373 for (;;) { 374 if (_obtain_lock(m, p)) 375 break; 376 while (m->mtx_lock != MTX_UNOWNED) { 377 if (i++ < 1000000) 378 continue; 379 if (i++ < 6000000) 380 DELAY (1); 381#ifdef DDB 382 else if (!db_active) 383#else 384 else 385#endif 386 panic( 387 "spin lock %s held by 0x%p for > 5 seconds", 388 m->mtx_description, 389 (void *)m->mtx_lock); 390 } 391 } 392 393#ifdef MUTEX_DEBUG 394 if (type != MTX_SPIN) 395 m->mtx_saveintr = 0xbeefface; 396 else 397#endif 398 m->mtx_saveintr = saveintr; 399 CTR1(KTR_LOCK, "mtx_enter: 0x%p spin done", m); 400 return; 401 } 402 } 403} 404 405void 406mtx_exit_hard(struct mtx *m, int type) 407{ 408 struct proc *p, *p1; 409 struct mtx *m1; 410 int pri; 411 412 p = CURPROC; 413 switch (type) { 414 case MTX_DEF: 415 case MTX_DEF | MTX_NOSWITCH: 416 if (m->mtx_recurse != 0) { 417 if (--(m->mtx_recurse) == 0) 418 atomic_clear_ptr(&m->mtx_lock, MTX_RECURSE); 419 CTR1(KTR_LOCK, "mtx_exit: 0x%p unrecurse", m); 420 return; 421 } 422 mtx_enter(&sched_lock, MTX_SPIN); 423 CTR1(KTR_LOCK, "mtx_exit: 0x%p contested", m); 424 p1 = TAILQ_FIRST(&m->mtx_blocked); 425 MPASS(p->p_magic == P_MAGIC); 426 MPASS(p1->p_magic == P_MAGIC); 427 TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq); 428 if (TAILQ_EMPTY(&m->mtx_blocked)) { 429 LIST_REMOVE(m, mtx_contested); 430 _release_lock_quick(m); 431 CTR1(KTR_LOCK, "mtx_exit: 0x%p not held", m); 432 } else 433 m->mtx_lock = MTX_CONTESTED; 434 pri = MAXPRI; 435 LIST_FOREACH(m1, &p->p_contested, mtx_contested) { 436 int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_priority; 437 if (cp < pri) 438 pri = cp; 439 } 440 if (pri > p->p_nativepri) 441 pri = p->p_nativepri; 442 SET_PRIO(p, pri); 443 CTR2(KTR_LOCK, "mtx_exit: 0x%p contested setrunqueue 0x%p", 444 m, p1); 445 p1->p_blocked = NULL; 446 p1->p_stat = SRUN; 447 setrunqueue(p1); 448 if ((type & MTX_NOSWITCH) == 0 && p1->p_priority < pri) { 449#ifdef notyet 450 if (p->p_flag & (P_ITHD | P_SITHD)) { 451 ithd_t *it = (ithd_t *)p; 452 453 if (it->it_interrupted) { 454 CTR2(KTR_LOCK, 455 "mtx_exit: 0x%x interruped 0x%x", 456 it, it->it_interrupted); 457 intr_thd_fixup(it); 458 } 459 } 460#endif 461 setrunqueue(p); 462 CTR2(KTR_LOCK, "mtx_exit: 0x%p switching out lock=0x%p", 463 m, m->mtx_lock); 464 mi_switch(); 465 CTR2(KTR_LOCK, "mtx_exit: 0x%p resuming lock=0x%p", 466 m, m->mtx_lock); 467 } 468 mtx_exit(&sched_lock, MTX_SPIN); 469 break; 470 case MTX_SPIN: 471 case MTX_SPIN | MTX_FIRST: 472 if (m->mtx_recurse != 0) { 473 m->mtx_recurse--; 474 return; 475 } 476 MPASS(mtx_owned(m)); 477 _release_lock_quick(m); 478 if (type & MTX_FIRST) 479 enable_intr(); /* XXX is this kosher? */ 480 else { 481 MPASS(m->mtx_saveintr != 0xbeefface); 482 restore_intr(m->mtx_saveintr); 483 } 484 break; 485 case MTX_SPIN | MTX_TOPHALF: 486 if (m->mtx_recurse != 0) { 487 m->mtx_recurse--; 488 return; 489 } 490 MPASS(mtx_owned(m)); 491 _release_lock_quick(m); 492 break; 493 default: 494 panic("mtx_exit_hard: unsupported type 0x%x\n", type); 495 } 496} 497 498#define MV_DESTROY 0 /* validate before destory */ 499#define MV_INIT 1 /* validate before init */ 500 501#ifdef MUTEX_DEBUG 502 503int mtx_validate __P((struct mtx *, int)); 504 505int 506mtx_validate(struct mtx *m, int when) 507{ 508 struct mtx *mp; 509 int i; 510 int retval = 0; 511 512 if (m == &all_mtx || cold) 513 return 0; 514 515 mtx_enter(&all_mtx, MTX_DEF); 516/* 517 * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly 518 * we can re-enable the kernacc() checks. 519 */ 520#ifndef __alpha__ 521 MPASS(kernacc((caddr_t)all_mtx.mtx_next, sizeof(uintptr_t), 522 VM_PROT_READ) == 1); 523#endif 524 MPASS(all_mtx.mtx_next->mtx_prev == &all_mtx); 525 for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) { 526#ifndef __alpha__ 527 if (kernacc((caddr_t)mp->mtx_next, sizeof(uintptr_t), 528 VM_PROT_READ) != 1) { 529 panic("mtx_validate: mp=%p mp->mtx_next=%p", 530 mp, mp->mtx_next); 531 } 532#endif 533 i++; 534 if (i > mtx_cur_cnt) { 535 panic("mtx_validate: too many in chain, known=%d\n", 536 mtx_cur_cnt); 537 } 538 } 539 MPASS(i == mtx_cur_cnt); 540 switch (when) { 541 case MV_DESTROY: 542 for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) 543 if (mp == m) 544 break; 545 MPASS(mp == m); 546 break; 547 case MV_INIT: 548 for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) 549 if (mp == m) { 550 /* 551 * Not good. This mutex already exists. 552 */ 553 printf("re-initing existing mutex %s\n", 554 m->mtx_description); 555 MPASS(m->mtx_lock == MTX_UNOWNED); 556 retval = 1; 557 } 558 } 559 mtx_exit(&all_mtx, MTX_DEF); 560 return (retval); 561} 562#endif 563 564void 565mtx_init(struct mtx *m, const char *t, int flag) 566{ 567#ifdef MUTEX_DEBUG 568 struct mtx_debug *debug; 569#endif 570 571 CTR2(KTR_LOCK, "mtx_init 0x%p (%s)", m, t); 572#ifdef MUTEX_DEBUG 573 if (mtx_validate(m, MV_INIT)) /* diagnostic and error correction */ 574 return; 575 if (flag & MTX_COLD) 576 debug = m->mtx_debug; 577 else 578 debug = NULL; 579 if (debug == NULL) { 580#ifdef DIAGNOSTIC 581 if(cold && bootverbose) 582 printf("malloc'ing mtx_debug while cold for %s\n", t); 583#endif 584 585 /* XXX - should not use DEVBUF */ 586 debug = malloc(sizeof(struct mtx_debug), M_DEVBUF, M_NOWAIT); 587 MPASS(debug != NULL); 588 bzero(debug, sizeof(struct mtx_debug)); 589 } 590#endif 591 bzero((void *)m, sizeof *m); 592 TAILQ_INIT(&m->mtx_blocked); 593#ifdef MUTEX_DEBUG 594 m->mtx_debug = debug; 595#endif 596 m->mtx_description = t; 597 m->mtx_lock = MTX_UNOWNED; 598 /* Put on all mutex queue */ 599 mtx_enter(&all_mtx, MTX_DEF); 600 m->mtx_next = &all_mtx; 601 m->mtx_prev = all_mtx.mtx_prev; 602 m->mtx_prev->mtx_next = m; 603 all_mtx.mtx_prev = m; 604 if (++mtx_cur_cnt > mtx_max_cnt) 605 mtx_max_cnt = mtx_cur_cnt; 606 mtx_exit(&all_mtx, MTX_DEF); 607 witness_init(m, flag); 608} 609 610void 611mtx_destroy(struct mtx *m) 612{ 613 614 CTR2(KTR_LOCK, "mtx_destroy 0x%p (%s)", m, m->mtx_description); 615#ifdef MUTEX_DEBUG 616 if (m->mtx_next == NULL) 617 panic("mtx_destroy: %p (%s) already destroyed", 618 m, m->mtx_description); 619 620 if (!mtx_owned(m)) { 621 MPASS(m->mtx_lock == MTX_UNOWNED); 622 } else { 623 MPASS((m->mtx_lock & (MTX_RECURSE|MTX_CONTESTED)) == 0); 624 } 625 mtx_validate(m, MV_DESTROY); /* diagnostic */ 626#endif 627 628#ifdef WITNESS 629 if (m->mtx_witness) 630 witness_destroy(m); 631#endif /* WITNESS */ 632 633 /* Remove from the all mutex queue */ 634 mtx_enter(&all_mtx, MTX_DEF); 635 m->mtx_next->mtx_prev = m->mtx_prev; 636 m->mtx_prev->mtx_next = m->mtx_next; 637#ifdef MUTEX_DEBUG 638 m->mtx_next = m->mtx_prev = NULL; 639 free(m->mtx_debug, M_DEVBUF); 640 m->mtx_debug = NULL; 641#endif 642 mtx_cur_cnt--; 643 mtx_exit(&all_mtx, MTX_DEF); 644} 645 646/* 647 * The non-inlined versions of the mtx_*() functions are always built (above), 648 * but the witness code depends on the MUTEX_DEBUG and WITNESS kernel options 649 * being specified. 650 */ 651#if (defined(MUTEX_DEBUG) && defined(WITNESS)) 652 653#define WITNESS_COUNT 200 654#define WITNESS_NCHILDREN 2 655 656#ifndef WITNESS 657#define WITNESS 0 /* default off */ 658#endif 659 660#ifndef SMP 661extern int witness_spin_check; 662#endif 663 664int witness_watch; 665 666struct witness { 667 struct witness *w_next; 668 char *w_description; 669 const char *w_file; 670 int w_line; 671 struct witness *w_morechildren; 672 u_char w_childcnt; 673 u_char w_Giant_squawked:1; 674 u_char w_other_squawked:1; 675 u_char w_same_squawked:1; 676 u_char w_sleep:1; 677 u_char w_spin:1; /* this is a spin mutex */ 678 u_int w_level; 679 struct witness *w_children[WITNESS_NCHILDREN]; 680}; 681 682struct witness_blessed { 683 char *b_lock1; 684 char *b_lock2; 685}; 686 687#ifdef KDEBUG 688/* 689 * When WITNESS_KDEBUG is set to 1, it will cause the system to 690 * drop into kdebug() when: 691 * - a lock heirarchy violation occurs 692 * - locks are held when going to sleep. 693 */ 694#ifndef WITNESS_KDEBUG 695#define WITNESS_KDEBUG 0 696#endif 697int witness_kdebug = WITNESS_KDEBUG; 698#endif /* KDEBUG */ 699 700#ifndef WITNESS_SKIPSPIN 701#define WITNESS_SKIPSPIN 0 702#endif 703int witness_skipspin = WITNESS_SKIPSPIN; 704 705 706static struct mtx w_mtx; 707static struct witness *w_free; 708static struct witness *w_all; 709static int w_inited; 710static int witness_dead; /* fatal error, probably no memory */ 711 712static struct witness w_data[WITNESS_COUNT]; 713 714static struct witness *enroll __P((char *description, int flag)); 715static int itismychild __P((struct witness *parent, struct witness *child)); 716static void removechild __P((struct witness *parent, struct witness *child)); 717static int isitmychild __P((struct witness *parent, struct witness *child)); 718static int isitmydescendant __P((struct witness *parent, struct witness *child)); 719static int dup_ok __P((struct witness *)); 720static int blessed __P((struct witness *, struct witness *)); 721static void witness_displaydescendants 722 __P((void(*)(const char *fmt, ...), struct witness *)); 723static void witness_leveldescendents __P((struct witness *parent, int level)); 724static void witness_levelall __P((void)); 725static struct witness * witness_get __P((void)); 726static void witness_free __P((struct witness *m)); 727 728 729static char *ignore_list[] = { 730 "witness lock", 731 "Kdebug", /* breaks rules and may or may not work */ 732 "Page Alias", /* sparc only, witness lock won't block intr */ 733 NULL 734}; 735 736static char *spin_order_list[] = { 737 "sched lock", 738 "log mtx", 739 "zslock", /* sparc only above log, this one is a real hack */ 740 "time lock", /* above callout */ 741 "callout mtx", /* above wayout */ 742 /* 743 * leaf locks 744 */ 745 "wayout mtx", 746 "kernel_pmap", /* sparc only, logically equal "pmap" below */ 747 "pmap", /* sparc only */ 748 NULL 749}; 750 751static char *order_list[] = { 752 "tcb", "inp", "so_snd", "so_rcv", "Giant lock", NULL, 753 "udb", "inp", NULL, 754 "unp head", "unp", "so_snd", NULL, 755 "de0", "Giant lock", NULL, 756 "ifnet", "Giant lock", NULL, 757 "fifo", "so_snd", NULL, 758 "hme0", "Giant lock", NULL, 759 "esp0", "Giant lock", NULL, 760 "hfa0", "Giant lock", NULL, 761 "so_rcv", "atm_global", NULL, 762 "so_snd", "atm_global", NULL, 763 "NFS", "Giant lock", NULL, 764 NULL 765}; 766 767static char *dup_list[] = { 768 "inp", 769 "process group", 770 "session", 771 "unp", 772 "rtentry", 773 "rawcb", 774 NULL 775}; 776 777static char *sleep_list[] = { 778 "Giant lock", 779 NULL 780}; 781 782/* 783 * Pairs of locks which have been blessed 784 * Don't complain about order problems with blessed locks 785 */ 786static struct witness_blessed blessed_list[] = { 787}; 788static int blessed_count = sizeof(blessed_list) / sizeof(struct witness_blessed); 789 790void 791witness_init(struct mtx *m, int flag) 792{ 793 m->mtx_witness = enroll(m->mtx_description, flag); 794} 795 796void 797witness_destroy(struct mtx *m) 798{ 799 struct mtx *m1; 800 struct proc *p; 801 p = CURPROC; 802 for ((m1 = LIST_FIRST(&p->p_heldmtx)); m1 != NULL; 803 m1 = LIST_NEXT(m1, mtx_held)) { 804 if (m1 == m) { 805 LIST_REMOVE(m, mtx_held); 806 break; 807 } 808 } 809 return; 810 811} 812 813void 814witness_enter(struct mtx *m, int flags, const char *file, int line) 815{ 816 struct witness *w, *w1; 817 struct mtx *m1; 818 struct proc *p; 819 int i; 820#ifdef KDEBUG 821 int go_into_kdebug = 0; 822#endif /* KDEBUG */ 823 824 w = m->mtx_witness; 825 p = CURPROC; 826 827 if (flags & MTX_SPIN) { 828 if (!w->w_spin) 829 panic("mutex_enter: MTX_SPIN on MTX_DEF mutex %s @" 830 " %s:%d", m->mtx_description, file, line); 831 if (m->mtx_recurse != 0) 832 return; 833 mtx_enter(&w_mtx, MTX_SPIN); 834 i = witness_spin_check; 835 if (i != 0 && w->w_level < i) { 836 mtx_exit(&w_mtx, MTX_SPIN); 837 panic("mutex_enter(%s:%x, MTX_SPIN) out of order @" 838 " %s:%d already holding %s:%x", 839 m->mtx_description, w->w_level, file, line, 840 spin_order_list[ffs(i)-1], i); 841 } 842 PCPU_SET(witness_spin_check, i | w->w_level); 843 mtx_exit(&w_mtx, MTX_SPIN); 844 return; 845 } 846 if (w->w_spin) 847 panic("mutex_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d", 848 m->mtx_description, file, line); 849 850 if (m->mtx_recurse != 0) 851 return; 852 if (witness_dead) 853 goto out; 854 if (cold) 855 goto out; 856 857 if (!mtx_legal2block()) 858 panic("blockable mtx_enter() of %s when not legal @ %s:%d", 859 m->mtx_description, file, line); 860 /* 861 * Is this the first mutex acquired 862 */ 863 if ((m1 = LIST_FIRST(&p->p_heldmtx)) == NULL) 864 goto out; 865 866 if ((w1 = m1->mtx_witness) == w) { 867 if (w->w_same_squawked || dup_ok(w)) 868 goto out; 869 w->w_same_squawked = 1; 870 printf("acquring duplicate lock of same type: \"%s\"\n", 871 m->mtx_description); 872 printf(" 1st @ %s:%d\n", w->w_file, w->w_line); 873 printf(" 2nd @ %s:%d\n", file, line); 874#ifdef KDEBUG 875 go_into_kdebug = 1; 876#endif /* KDEBUG */ 877 goto out; 878 } 879 MPASS(!mtx_owned(&w_mtx)); 880 mtx_enter(&w_mtx, MTX_SPIN); 881 /* 882 * If we have a known higher number just say ok 883 */ 884 if (witness_watch > 1 && w->w_level > w1->w_level) { 885 mtx_exit(&w_mtx, MTX_SPIN); 886 goto out; 887 } 888 if (isitmydescendant(m1->mtx_witness, w)) { 889 mtx_exit(&w_mtx, MTX_SPIN); 890 goto out; 891 } 892 for (i = 0; m1 != NULL; m1 = LIST_NEXT(m1, mtx_held), i++) { 893 894 MPASS(i < 200); 895 w1 = m1->mtx_witness; 896 if (isitmydescendant(w, w1)) { 897 mtx_exit(&w_mtx, MTX_SPIN); 898 if (blessed(w, w1)) 899 goto out; 900 if (m1 == &Giant) { 901 if (w1->w_Giant_squawked) 902 goto out; 903 else 904 w1->w_Giant_squawked = 1; 905 } else { 906 if (w1->w_other_squawked) 907 goto out; 908 else 909 w1->w_other_squawked = 1; 910 } 911 printf("lock order reversal\n"); 912 printf(" 1st %s last acquired @ %s:%d\n", 913 w->w_description, w->w_file, w->w_line); 914 printf(" 2nd %p %s @ %s:%d\n", 915 m1, w1->w_description, w1->w_file, w1->w_line); 916 printf(" 3rd %p %s @ %s:%d\n", 917 m, w->w_description, file, line); 918#ifdef KDEBUG 919 go_into_kdebug = 1; 920#endif /* KDEBUG */ 921 goto out; 922 } 923 } 924 m1 = LIST_FIRST(&p->p_heldmtx); 925 if (!itismychild(m1->mtx_witness, w)) 926 mtx_exit(&w_mtx, MTX_SPIN); 927 928out: 929#ifdef KDEBUG 930 if (witness_kdebug && go_into_kdebug) 931 kdebug(); 932#endif /* KDEBUG */ 933 w->w_file = file; 934 w->w_line = line; 935 m->mtx_line = line; 936 m->mtx_file = file; 937 938 /* 939 * If this pays off it likely means that a mutex being witnessed 940 * is acquired in hardclock. Put it in the ignore list. It is 941 * likely not the mutex this assert fails on. 942 */ 943 MPASS(m->mtx_held.le_prev == NULL); 944 LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held); 945} 946 947void 948witness_exit(struct mtx *m, int flags, const char *file, int line) 949{ 950 struct witness *w; 951 952 w = m->mtx_witness; 953 954 if (flags & MTX_SPIN) { 955 if (!w->w_spin) 956 panic("mutex_exit: MTX_SPIN on MTX_DEF mutex %s @" 957 " %s:%d", m->mtx_description, file, line); 958 if (m->mtx_recurse != 0) 959 return; 960 mtx_enter(&w_mtx, MTX_SPIN); 961 PCPU_SET(witness_spin_check, witness_spin_check & ~w->w_level); 962 mtx_exit(&w_mtx, MTX_SPIN); 963 return; 964 } 965 if (w->w_spin) 966 panic("mutex_exit: MTX_DEF on MTX_SPIN mutex %s @ %s:%d", 967 m->mtx_description, file, line); 968 969 if (m->mtx_recurse != 0) 970 return; 971 972 if ((flags & MTX_NOSWITCH) == 0 && !mtx_legal2block() && !cold) 973 panic("switchable mtx_exit() of %s when not legal @ %s:%d", 974 m->mtx_description, file, line); 975 LIST_REMOVE(m, mtx_held); 976 m->mtx_held.le_prev = NULL; 977} 978 979void 980witness_try_enter(struct mtx *m, int flags, const char *file, int line) 981{ 982 struct proc *p; 983 struct witness *w = m->mtx_witness; 984 985 if (flags & MTX_SPIN) { 986 if (!w->w_spin) 987 panic("mutex_try_enter: " 988 "MTX_SPIN on MTX_DEF mutex %s @ %s:%d", 989 m->mtx_description, file, line); 990 if (m->mtx_recurse != 0) 991 return; 992 mtx_enter(&w_mtx, MTX_SPIN); 993 PCPU_SET(witness_spin_check, witness_spin_check | w->w_level); 994 mtx_exit(&w_mtx, MTX_SPIN); 995 return; 996 } 997 998 if (w->w_spin) 999 panic("mutex_try_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d", 1000 m->mtx_description, file, line); 1001 1002 if (m->mtx_recurse != 0) 1003 return; 1004 1005 w->w_file = file; 1006 w->w_line = line; 1007 m->mtx_line = line; 1008 m->mtx_file = file; 1009 p = CURPROC; 1010 MPASS(m->mtx_held.le_prev == NULL); 1011 LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held); 1012} 1013 1014void 1015witness_display(void(*prnt)(const char *fmt, ...)) 1016{ 1017 struct witness *w, *w1; 1018 1019 witness_levelall(); 1020 1021 for (w = w_all; w; w = w->w_next) { 1022 if (w->w_file == NULL) 1023 continue; 1024 for (w1 = w_all; w1; w1 = w1->w_next) { 1025 if (isitmychild(w1, w)) 1026 break; 1027 } 1028 if (w1 != NULL) 1029 continue; 1030 /* 1031 * This lock has no anscestors, display its descendants. 1032 */ 1033 witness_displaydescendants(prnt, w); 1034 } 1035 prnt("\nMutex which were never acquired\n"); 1036 for (w = w_all; w; w = w->w_next) { 1037 if (w->w_file != NULL) 1038 continue; 1039 prnt("%s\n", w->w_description); 1040 } 1041} 1042 1043int 1044witness_sleep(int check_only, struct mtx *mtx, const char *file, int line) 1045{ 1046 struct mtx *m; 1047 struct proc *p; 1048 char **sleep; 1049 int n = 0; 1050 1051 p = CURPROC; 1052 for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL; 1053 m = LIST_NEXT(m, mtx_held)) { 1054 if (m == mtx) 1055 continue; 1056 for (sleep = sleep_list; *sleep!= NULL; sleep++) 1057 if (strcmp(m->mtx_description, *sleep) == 0) 1058 goto next; 1059 printf("%s:%d: %s with \"%s\" locked from %s:%d\n", 1060 file, line, check_only ? "could sleep" : "sleeping", 1061 m->mtx_description, 1062 m->mtx_witness->w_file, m->mtx_witness->w_line); 1063 n++; 1064 next: 1065 } 1066#ifdef KDEBUG 1067 if (witness_kdebug && n) 1068 kdebug(); 1069#endif /* KDEBUG */ 1070 return (n); 1071} 1072 1073static struct witness * 1074enroll(char *description, int flag) 1075{ 1076 int i; 1077 struct witness *w, *w1; 1078 char **ignore; 1079 char **order; 1080 1081 if (!witness_watch) 1082 return (NULL); 1083 for (ignore = ignore_list; *ignore != NULL; ignore++) 1084 if (strcmp(description, *ignore) == 0) 1085 return (NULL); 1086 1087 if (w_inited == 0) { 1088 mtx_init(&w_mtx, "witness lock", MTX_DEF); 1089 for (i = 0; i < WITNESS_COUNT; i++) { 1090 w = &w_data[i]; 1091 witness_free(w); 1092 } 1093 w_inited = 1; 1094 for (order = order_list; *order != NULL; order++) { 1095 w = enroll(*order, MTX_DEF); 1096 w->w_file = "order list"; 1097 for (order++; *order != NULL; order++) { 1098 w1 = enroll(*order, MTX_DEF); 1099 w1->w_file = "order list"; 1100 itismychild(w, w1); 1101 w = w1; 1102 } 1103 } 1104 } 1105 if ((flag & MTX_SPIN) && witness_skipspin) 1106 return (NULL); 1107 mtx_enter(&w_mtx, MTX_SPIN); 1108 for (w = w_all; w; w = w->w_next) { 1109 if (strcmp(description, w->w_description) == 0) { 1110 mtx_exit(&w_mtx, MTX_SPIN); 1111 return (w); 1112 } 1113 } 1114 if ((w = witness_get()) == NULL) 1115 return (NULL); 1116 w->w_next = w_all; 1117 w_all = w; 1118 w->w_description = description; 1119 mtx_exit(&w_mtx, MTX_SPIN); 1120 if (flag & MTX_SPIN) { 1121 w->w_spin = 1; 1122 1123 i = 1; 1124 for (order = spin_order_list; *order != NULL; order++) { 1125 if (strcmp(description, *order) == 0) 1126 break; 1127 i <<= 1; 1128 } 1129 if (*order == NULL) 1130 panic("spin lock %s not in order list", description); 1131 w->w_level = i; 1132 } 1133 return (w); 1134} 1135 1136static int 1137itismychild(struct witness *parent, struct witness *child) 1138{ 1139 static int recursed; 1140 1141 /* 1142 * Insert "child" after "parent" 1143 */ 1144 while (parent->w_morechildren) 1145 parent = parent->w_morechildren; 1146 1147 if (parent->w_childcnt == WITNESS_NCHILDREN) { 1148 if ((parent->w_morechildren = witness_get()) == NULL) 1149 return (1); 1150 parent = parent->w_morechildren; 1151 } 1152 MPASS(child != NULL); 1153 parent->w_children[parent->w_childcnt++] = child; 1154 /* 1155 * now prune whole tree 1156 */ 1157 if (recursed) 1158 return (0); 1159 recursed = 1; 1160 for (child = w_all; child != NULL; child = child->w_next) { 1161 for (parent = w_all; parent != NULL; 1162 parent = parent->w_next) { 1163 if (!isitmychild(parent, child)) 1164 continue; 1165 removechild(parent, child); 1166 if (isitmydescendant(parent, child)) 1167 continue; 1168 itismychild(parent, child); 1169 } 1170 } 1171 recursed = 0; 1172 witness_levelall(); 1173 return (0); 1174} 1175 1176static void 1177removechild(struct witness *parent, struct witness *child) 1178{ 1179 struct witness *w, *w1; 1180 int i; 1181 1182 for (w = parent; w != NULL; w = w->w_morechildren) 1183 for (i = 0; i < w->w_childcnt; i++) 1184 if (w->w_children[i] == child) 1185 goto found; 1186 return; 1187found: 1188 for (w1 = w; w1->w_morechildren != NULL; w1 = w1->w_morechildren) 1189 continue; 1190 w->w_children[i] = w1->w_children[--w1->w_childcnt]; 1191 MPASS(w->w_children[i] != NULL); 1192 1193 if (w1->w_childcnt != 0) 1194 return; 1195 1196 if (w1 == parent) 1197 return; 1198 for (w = parent; w->w_morechildren != w1; w = w->w_morechildren) 1199 continue; 1200 w->w_morechildren = 0; 1201 witness_free(w1); 1202} 1203 1204static int 1205isitmychild(struct witness *parent, struct witness *child) 1206{ 1207 struct witness *w; 1208 int i; 1209 1210 for (w = parent; w != NULL; w = w->w_morechildren) { 1211 for (i = 0; i < w->w_childcnt; i++) { 1212 if (w->w_children[i] == child) 1213 return (1); 1214 } 1215 } 1216 return (0); 1217} 1218 1219static int 1220isitmydescendant(struct witness *parent, struct witness *child) 1221{ 1222 struct witness *w; 1223 int i; 1224 int j; 1225 1226 for (j = 0, w = parent; w != NULL; w = w->w_morechildren, j++) { 1227 MPASS(j < 1000); 1228 for (i = 0; i < w->w_childcnt; i++) { 1229 if (w->w_children[i] == child) 1230 return (1); 1231 } 1232 for (i = 0; i < w->w_childcnt; i++) { 1233 if (isitmydescendant(w->w_children[i], child)) 1234 return (1); 1235 } 1236 } 1237 return (0); 1238} 1239 1240void 1241witness_levelall (void) 1242{ 1243 struct witness *w, *w1; 1244 1245 for (w = w_all; w; w = w->w_next) 1246 if (!w->w_spin) 1247 w->w_level = 0; 1248 for (w = w_all; w; w = w->w_next) { 1249 if (w->w_spin) 1250 continue; 1251 for (w1 = w_all; w1; w1 = w1->w_next) { 1252 if (isitmychild(w1, w)) 1253 break; 1254 } 1255 if (w1 != NULL) 1256 continue; 1257 witness_leveldescendents(w, 0); 1258 } 1259} 1260 1261static void 1262witness_leveldescendents(struct witness *parent, int level) 1263{ 1264 int i; 1265 struct witness *w; 1266 1267 if (parent->w_level < level) 1268 parent->w_level = level; 1269 level++; 1270 for (w = parent; w != NULL; w = w->w_morechildren) 1271 for (i = 0; i < w->w_childcnt; i++) 1272 witness_leveldescendents(w->w_children[i], level); 1273} 1274 1275static void 1276witness_displaydescendants(void(*prnt)(const char *fmt, ...), 1277 struct witness *parent) 1278{ 1279 struct witness *w; 1280 int i; 1281 int level = parent->w_level; 1282 1283 prnt("%d", level); 1284 if (level < 10) 1285 prnt(" "); 1286 for (i = 0; i < level; i++) 1287 prnt(" "); 1288 prnt("%s", parent->w_description); 1289 if (parent->w_file != NULL) { 1290 prnt(" -- last acquired @ %s", parent->w_file); 1291#ifndef W_USE_WHERE 1292 prnt(":%d", parent->w_line); 1293#endif 1294 prnt("\n"); 1295 } 1296 1297 for (w = parent; w != NULL; w = w->w_morechildren) 1298 for (i = 0; i < w->w_childcnt; i++) 1299 witness_displaydescendants(prnt, w->w_children[i]); 1300 } 1301 1302static int 1303dup_ok(struct witness *w) 1304{ 1305 char **dup; 1306 1307 for (dup = dup_list; *dup!= NULL; dup++) 1308 if (strcmp(w->w_description, *dup) == 0) 1309 return (1); 1310 return (0); 1311} 1312 1313static int 1314blessed(struct witness *w1, struct witness *w2) 1315{ 1316 int i; 1317 struct witness_blessed *b; 1318 1319 for (i = 0; i < blessed_count; i++) { 1320 b = &blessed_list[i]; 1321 if (strcmp(w1->w_description, b->b_lock1) == 0) { 1322 if (strcmp(w2->w_description, b->b_lock2) == 0) 1323 return (1); 1324 continue; 1325 } 1326 if (strcmp(w1->w_description, b->b_lock2) == 0) 1327 if (strcmp(w2->w_description, b->b_lock1) == 0) 1328 return (1); 1329 } 1330 return (0); 1331} 1332 1333static struct witness * 1334witness_get() 1335{ 1336 struct witness *w; 1337 1338 if ((w = w_free) == NULL) { 1339 witness_dead = 1; 1340 mtx_exit(&w_mtx, MTX_SPIN); 1341 printf("witness exhausted\n"); 1342 return (NULL); 1343 } 1344 w_free = w->w_next; 1345 bzero(w, sizeof(*w)); 1346 return (w); 1347} 1348 1349static void 1350witness_free(struct witness *w) 1351{ 1352 w->w_next = w_free; 1353 w_free = w; 1354} 1355 1356void 1357witness_list(struct proc *p) 1358{ 1359 struct mtx *m; 1360 1361 for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL; 1362 m = LIST_NEXT(m, mtx_held)) { 1363 printf("\t\"%s\" (%p) locked at %s:%d\n", 1364 m->mtx_description, m, 1365 m->mtx_witness->w_file, m->mtx_witness->w_line); 1366 } 1367} 1368 1369void 1370witness_save(struct mtx *m, const char **filep, int *linep) 1371{ 1372 *filep = m->mtx_witness->w_file; 1373 *linep = m->mtx_witness->w_line; 1374} 1375 1376void 1377witness_restore(struct mtx *m, const char *file, int line) 1378{ 1379 m->mtx_witness->w_file = file; 1380 m->mtx_witness->w_line = line; 1381} 1382 1383#endif /* (defined(MUTEX_DEBUG) && defined(WITNESS)) */ 1384