subr_witness.c revision 68785
1/*- 2 * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 3. Berkeley Software Design Inc's name may not be used to endorse or 13 * promote products derived from this software without specific prior 14 * written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $ 29 * and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $ 30 * $FreeBSD: head/sys/kern/subr_witness.c 68785 2000-11-15 22:00:31Z jhb $ 31 */ 32 33/* 34 * Main Entry: witness 35 * Pronunciation: 'wit-n&s 36 * Function: noun 37 * Etymology: Middle English witnesse, from Old English witnes knowledge, 38 * testimony, witness, from 2wit 39 * Date: before 12th century 40 * 1 : attestation of a fact or event : TESTIMONY 41 * 2 : one that gives evidence; specifically : one who testifies in 42 * a cause or before a judicial tribunal 43 * 3 : one asked to be present at a transaction so as to be able to 44 * testify to its having taken place 45 * 4 : one who has personal knowledge of something 46 * 5 a : something serving as evidence or proof : SIGN 47 * b : public affirmation by word or example of usually 48 * religious faith or conviction <the heroic witness to divine 49 * life -- Pilot> 50 * 6 capitalized : a member of the Jehovah's Witnesses 51 */ 52 53#include "opt_witness.h" 54 55#include <sys/param.h> 56#include <sys/bus.h> 57#include <sys/kernel.h> 58#include <sys/malloc.h> 59#include <sys/proc.h> 60#include <sys/sysctl.h> 61#include <sys/systm.h> 62#include <sys/vmmeter.h> 63#include <sys/ktr.h> 64 65#include <machine/atomic.h> 66#include <machine/bus.h> 67#include <machine/clock.h> 68#include <machine/cpu.h> 69 70#include <vm/vm.h> 71#include <vm/vm_extern.h> 72 73#define _KERN_MUTEX_C_ /* Cause non-inlined mtx_*() to be compiled. */ 74#include <sys/mutex.h> 75 76/* 77 * Machine independent bits of the mutex implementation 78 */ 79/* All mutexes in system (used for debug/panic) */ 80#ifdef MUTEX_DEBUG 81static struct mtx_debug all_mtx_debug = { NULL, {NULL, NULL}, NULL, 0, 82 "All mutexes queue head" }; 83static struct mtx all_mtx = { MTX_UNOWNED, 0, 0, &all_mtx_debug, 84 TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked), 85 { NULL, NULL }, &all_mtx, &all_mtx }; 86#else /* MUTEX_DEBUG */ 87static struct mtx all_mtx = { MTX_UNOWNED, 0, 0, "All mutexes queue head", 88 TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked), 89 { NULL, NULL }, &all_mtx, &all_mtx }; 90#endif /* MUTEX_DEBUG */ 91 92static int mtx_cur_cnt; 93static int mtx_max_cnt; 94 95void _mtx_enter_giant_def(void); 96void _mtx_exit_giant_def(void); 97static void propagate_priority(struct proc *) __unused; 98 99#define mtx_unowned(m) ((m)->mtx_lock == MTX_UNOWNED) 100#define mtx_owner(m) (mtx_unowned(m) ? NULL \ 101 : (struct proc *)((m)->mtx_lock & MTX_FLAGMASK)) 102 103#define RETIP(x) *(((uintptr_t *)(&x)) - 1) 104#define SET_PRIO(p, pri) (p)->p_priority = (pri) 105 106/* 107 * XXX Temporary, for use from assembly language 108 */ 109 110void 111_mtx_enter_giant_def(void) 112{ 113 114 mtx_enter(&Giant, MTX_DEF); 115} 116 117void 118_mtx_exit_giant_def(void) 119{ 120 121 mtx_exit(&Giant, MTX_DEF); 122} 123 124static void 125propagate_priority(struct proc *p) 126{ 127 int pri = p->p_priority; 128 struct mtx *m = p->p_blocked; 129 130 for (;;) { 131 struct proc *p1; 132 133 p = mtx_owner(m); 134 135 if (p == NULL) { 136 /* 137 * This really isn't quite right. Really 138 * ought to bump priority of process that 139 * next acquires the mutex. 140 */ 141 MPASS(m->mtx_lock == MTX_CONTESTED); 142 return; 143 } 144 MPASS(p->p_magic == P_MAGIC); 145 if (p->p_priority <= pri) 146 return; 147 /* 148 * If lock holder is actually running, just bump priority. 149 */ 150 if (TAILQ_NEXT(p, p_procq) == NULL) { 151 MPASS(p->p_stat == SRUN || p->p_stat == SZOMB); 152 SET_PRIO(p, pri); 153 return; 154 } 155 /* 156 * If on run queue move to new run queue, and 157 * quit. 158 */ 159 if (p->p_stat == SRUN) { 160 MPASS(p->p_blocked == NULL); 161 remrunqueue(p); 162 SET_PRIO(p, pri); 163 setrunqueue(p); 164 return; 165 } 166 167 /* 168 * If we aren't blocked on a mutex, give up and quit. 169 */ 170 if (p->p_stat != SMTX) { 171 printf( 172 "XXX: process %d(%s):%d holds %s but isn't blocked on a mutex\n", 173 p->p_pid, p->p_comm, p->p_stat, m->mtx_description); 174 return; 175 } 176 177 /* 178 * Pick up the mutex that p is blocked on. 179 */ 180 m = p->p_blocked; 181 MPASS(m != NULL); 182 183 printf("XXX: process %d(%s) is blocked on %s\n", p->p_pid, 184 p->p_comm, m->mtx_description); 185 /* 186 * Check if the proc needs to be moved up on 187 * the blocked chain 188 */ 189 if ((p1 = TAILQ_PREV(p, rq, p_procq)) == NULL || 190 p1->p_priority <= pri) { 191 if (p1) 192 printf( 193 "XXX: previous process %d(%s) has higher priority\n", 194 p->p_pid, p->p_comm); 195 else 196 printf("XXX: process at head of run queue\n"); 197 continue; 198 } 199 200 /* 201 * Remove proc from blocked chain 202 */ 203 TAILQ_REMOVE(&m->mtx_blocked, p, p_procq); 204 TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) { 205 MPASS(p1->p_magic == P_MAGIC); 206 if (p1->p_priority > pri) 207 break; 208 } 209 if (p1) 210 TAILQ_INSERT_BEFORE(p1, p, p_procq); 211 else 212 TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq); 213 CTR4(KTR_LOCK, 214 "propagate priority: p 0x%p moved before 0x%p on [0x%p] %s", 215 p, p1, m, m->mtx_description); 216 } 217} 218 219void 220mtx_enter_hard(struct mtx *m, int type, int saveintr) 221{ 222 struct proc *p = CURPROC; 223 struct timeval new_switchtime; 224 225 KASSERT(p != NULL, ("curproc is NULL in mutex")); 226 227 switch (type) { 228 case MTX_DEF: 229 if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)p) { 230 m->mtx_recurse++; 231 atomic_set_ptr(&m->mtx_lock, MTX_RECURSE); 232 CTR1(KTR_LOCK, "mtx_enter: 0x%p recurse", m); 233 return; 234 } 235 CTR3(KTR_LOCK, "mtx_enter: 0x%p contested (lock=%p) [0x%p]", 236 m, (void *)m->mtx_lock, (void *)RETIP(m)); 237 while (!_obtain_lock(m, p)) { 238 uintptr_t v; 239 struct proc *p1; 240 241 mtx_enter(&sched_lock, MTX_SPIN | MTX_RLIKELY); 242 /* 243 * check if the lock has been released while 244 * waiting for the schedlock. 245 */ 246 if ((v = m->mtx_lock) == MTX_UNOWNED) { 247 mtx_exit(&sched_lock, MTX_SPIN); 248 continue; 249 } 250 /* 251 * The mutex was marked contested on release. This 252 * means that there are processes blocked on it. 253 */ 254 if (v == MTX_CONTESTED) { 255 p1 = TAILQ_FIRST(&m->mtx_blocked); 256 KASSERT(p1 != NULL, ("contested mutex has no contesters")); 257 KASSERT(p != NULL, ("curproc is NULL for contested mutex")); 258 m->mtx_lock = (uintptr_t)p | MTX_CONTESTED; 259 if (p1->p_priority < p->p_priority) { 260 SET_PRIO(p, p1->p_priority); 261 } 262 mtx_exit(&sched_lock, MTX_SPIN); 263 return; 264 } 265 /* 266 * If the mutex isn't already contested and 267 * a failure occurs setting the contested bit the 268 * mutex was either release or the 269 * state of the RECURSION bit changed. 270 */ 271 if ((v & MTX_CONTESTED) == 0 && 272 !atomic_cmpset_ptr(&m->mtx_lock, (void *)v, 273 (void *)(v | MTX_CONTESTED))) { 274 mtx_exit(&sched_lock, MTX_SPIN); 275 continue; 276 } 277 278 /* We definitely have to sleep for this lock */ 279 mtx_assert(m, MA_NOTOWNED); 280 281#ifdef notyet 282 /* 283 * If we're borrowing an interrupted thread's VM 284 * context must clean up before going to sleep. 285 */ 286 if (p->p_flag & (P_ITHD | P_SITHD)) { 287 ithd_t *it = (ithd_t *)p; 288 289 if (it->it_interrupted) { 290 CTR2(KTR_LOCK, 291 "mtx_enter: 0x%x interrupted 0x%x", 292 it, it->it_interrupted); 293 intr_thd_fixup(it); 294 } 295 } 296#endif 297 298 /* Put us on the list of procs blocked on this mutex */ 299 if (TAILQ_EMPTY(&m->mtx_blocked)) { 300 p1 = (struct proc *)(m->mtx_lock & 301 MTX_FLAGMASK); 302 LIST_INSERT_HEAD(&p1->p_contested, m, 303 mtx_contested); 304 TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq); 305 } else { 306 TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) 307 if (p1->p_priority > p->p_priority) 308 break; 309 if (p1) 310 TAILQ_INSERT_BEFORE(p1, p, p_procq); 311 else 312 TAILQ_INSERT_TAIL(&m->mtx_blocked, p, 313 p_procq); 314 } 315 316 p->p_blocked = m; /* Who we're blocked on */ 317 p->p_stat = SMTX; 318#if 0 319 propagate_priority(p); 320#endif 321 CTR3(KTR_LOCK, "mtx_enter: p 0x%p blocked on [0x%p] %s", 322 p, m, m->mtx_description); 323 /* 324 * Blatantly copied from mi_switch nearly verbatim. 325 * When Giant goes away and we stop dinking with it 326 * in mi_switch, we can go back to calling mi_switch 327 * directly here. 328 */ 329 330 /* 331 * Compute the amount of time during which the current 332 * process was running, and add that to its total so 333 * far. 334 */ 335 microuptime(&new_switchtime); 336 if (timevalcmp(&new_switchtime, &switchtime, <)) { 337 printf( 338 "microuptime() went backwards (%ld.%06ld -> %ld.%06ld)\n", 339 switchtime.tv_sec, switchtime.tv_usec, 340 new_switchtime.tv_sec, 341 new_switchtime.tv_usec); 342 new_switchtime = switchtime; 343 } else { 344 p->p_runtime += (new_switchtime.tv_usec - 345 switchtime.tv_usec) + 346 (new_switchtime.tv_sec - switchtime.tv_sec) * 347 (int64_t)1000000; 348 } 349 350 /* 351 * Pick a new current process and record its start time. 352 */ 353 cnt.v_swtch++; 354 switchtime = new_switchtime; 355 cpu_switch(); 356 if (switchtime.tv_sec == 0) 357 microuptime(&switchtime); 358 switchticks = ticks; 359 CTR3(KTR_LOCK, 360 "mtx_enter: p 0x%p free from blocked on [0x%p] %s", 361 p, m, m->mtx_description); 362 mtx_exit(&sched_lock, MTX_SPIN); 363 } 364 return; 365 case MTX_SPIN: 366 case MTX_SPIN | MTX_FIRST: 367 case MTX_SPIN | MTX_TOPHALF: 368 { 369 int i = 0; 370 371 if (m->mtx_lock == (uintptr_t)p) { 372 m->mtx_recurse++; 373 return; 374 } 375 CTR1(KTR_LOCK, "mtx_enter: %p spinning", m); 376 for (;;) { 377 if (_obtain_lock(m, p)) 378 break; 379 while (m->mtx_lock != MTX_UNOWNED) { 380 if (i++ < 1000000) 381 continue; 382 if (i++ < 6000000) 383 DELAY (1); 384#ifdef DDB 385 else if (!db_active) 386#else 387 else 388#endif 389 panic( 390 "spin lock %s held by 0x%p for > 5 seconds", 391 m->mtx_description, 392 (void *)m->mtx_lock); 393 } 394 } 395 396#ifdef MUTEX_DEBUG 397 if (type != MTX_SPIN) 398 m->mtx_saveintr = 0xbeefface; 399 else 400#endif 401 m->mtx_saveintr = saveintr; 402 CTR1(KTR_LOCK, "mtx_enter: 0x%p spin done", m); 403 return; 404 } 405 } 406} 407 408void 409mtx_exit_hard(struct mtx *m, int type) 410{ 411 struct proc *p, *p1; 412 struct mtx *m1; 413 int pri; 414 415 p = CURPROC; 416 switch (type) { 417 case MTX_DEF: 418 case MTX_DEF | MTX_NOSWITCH: 419 if (m->mtx_recurse != 0) { 420 if (--(m->mtx_recurse) == 0) 421 atomic_clear_ptr(&m->mtx_lock, MTX_RECURSE); 422 CTR1(KTR_LOCK, "mtx_exit: 0x%p unrecurse", m); 423 return; 424 } 425 mtx_enter(&sched_lock, MTX_SPIN); 426 CTR1(KTR_LOCK, "mtx_exit: 0x%p contested", m); 427 p1 = TAILQ_FIRST(&m->mtx_blocked); 428 MPASS(p->p_magic == P_MAGIC); 429 MPASS(p1->p_magic == P_MAGIC); 430 TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq); 431 if (TAILQ_EMPTY(&m->mtx_blocked)) { 432 LIST_REMOVE(m, mtx_contested); 433 _release_lock_quick(m); 434 CTR1(KTR_LOCK, "mtx_exit: 0x%p not held", m); 435 } else 436 m->mtx_lock = MTX_CONTESTED; 437 pri = MAXPRI; 438 LIST_FOREACH(m1, &p->p_contested, mtx_contested) { 439 int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_priority; 440 if (cp < pri) 441 pri = cp; 442 } 443 if (pri > p->p_nativepri) 444 pri = p->p_nativepri; 445 SET_PRIO(p, pri); 446 CTR2(KTR_LOCK, "mtx_exit: 0x%p contested setrunqueue 0x%p", 447 m, p1); 448 p1->p_blocked = NULL; 449 p1->p_stat = SRUN; 450 setrunqueue(p1); 451 if ((type & MTX_NOSWITCH) == 0 && p1->p_priority < pri) { 452#ifdef notyet 453 if (p->p_flag & (P_ITHD | P_SITHD)) { 454 ithd_t *it = (ithd_t *)p; 455 456 if (it->it_interrupted) { 457 CTR2(KTR_LOCK, 458 "mtx_exit: 0x%x interruped 0x%x", 459 it, it->it_interrupted); 460 intr_thd_fixup(it); 461 } 462 } 463#endif 464 setrunqueue(p); 465 CTR2(KTR_LOCK, "mtx_exit: 0x%p switching out lock=0x%p", 466 m, (void *)m->mtx_lock); 467 mi_switch(); 468 CTR2(KTR_LOCK, "mtx_exit: 0x%p resuming lock=0x%p", 469 m, (void *)m->mtx_lock); 470 } 471 mtx_exit(&sched_lock, MTX_SPIN); 472 break; 473 case MTX_SPIN: 474 case MTX_SPIN | MTX_FIRST: 475 if (m->mtx_recurse != 0) { 476 m->mtx_recurse--; 477 return; 478 } 479 MPASS(mtx_owned(m)); 480 _release_lock_quick(m); 481 if (type & MTX_FIRST) 482 enable_intr(); /* XXX is this kosher? */ 483 else { 484 MPASS(m->mtx_saveintr != 0xbeefface); 485 restore_intr(m->mtx_saveintr); 486 } 487 break; 488 case MTX_SPIN | MTX_TOPHALF: 489 if (m->mtx_recurse != 0) { 490 m->mtx_recurse--; 491 return; 492 } 493 MPASS(mtx_owned(m)); 494 _release_lock_quick(m); 495 break; 496 default: 497 panic("mtx_exit_hard: unsupported type 0x%x\n", type); 498 } 499} 500 501#define MV_DESTROY 0 /* validate before destory */ 502#define MV_INIT 1 /* validate before init */ 503 504#ifdef MUTEX_DEBUG 505 506int mtx_validate __P((struct mtx *, int)); 507 508int 509mtx_validate(struct mtx *m, int when) 510{ 511 struct mtx *mp; 512 int i; 513 int retval = 0; 514 515 if (m == &all_mtx || cold) 516 return 0; 517 518 mtx_enter(&all_mtx, MTX_DEF); 519/* 520 * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly 521 * we can re-enable the kernacc() checks. 522 */ 523#ifndef __alpha__ 524 MPASS(kernacc((caddr_t)all_mtx.mtx_next, sizeof(uintptr_t), 525 VM_PROT_READ) == 1); 526#endif 527 MPASS(all_mtx.mtx_next->mtx_prev == &all_mtx); 528 for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) { 529#ifndef __alpha__ 530 if (kernacc((caddr_t)mp->mtx_next, sizeof(uintptr_t), 531 VM_PROT_READ) != 1) { 532 panic("mtx_validate: mp=%p mp->mtx_next=%p", 533 mp, mp->mtx_next); 534 } 535#endif 536 i++; 537 if (i > mtx_cur_cnt) { 538 panic("mtx_validate: too many in chain, known=%d\n", 539 mtx_cur_cnt); 540 } 541 } 542 MPASS(i == mtx_cur_cnt); 543 switch (when) { 544 case MV_DESTROY: 545 for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) 546 if (mp == m) 547 break; 548 MPASS(mp == m); 549 break; 550 case MV_INIT: 551 for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) 552 if (mp == m) { 553 /* 554 * Not good. This mutex already exists. 555 */ 556 printf("re-initing existing mutex %s\n", 557 m->mtx_description); 558 MPASS(m->mtx_lock == MTX_UNOWNED); 559 retval = 1; 560 } 561 } 562 mtx_exit(&all_mtx, MTX_DEF); 563 return (retval); 564} 565#endif 566 567void 568mtx_init(struct mtx *m, const char *t, int flag) 569{ 570#ifdef MUTEX_DEBUG 571 struct mtx_debug *debug; 572#endif 573 574 CTR2(KTR_LOCK, "mtx_init 0x%p (%s)", m, t); 575#ifdef MUTEX_DEBUG 576 if (mtx_validate(m, MV_INIT)) /* diagnostic and error correction */ 577 return; 578 if (flag & MTX_COLD) 579 debug = m->mtx_debug; 580 else 581 debug = NULL; 582 if (debug == NULL) { 583#ifdef DIAGNOSTIC 584 if(cold && bootverbose) 585 printf("malloc'ing mtx_debug while cold for %s\n", t); 586#endif 587 588 /* XXX - should not use DEVBUF */ 589 debug = malloc(sizeof(struct mtx_debug), M_DEVBUF, M_NOWAIT); 590 MPASS(debug != NULL); 591 bzero(debug, sizeof(struct mtx_debug)); 592 } 593#endif 594 bzero((void *)m, sizeof *m); 595 TAILQ_INIT(&m->mtx_blocked); 596#ifdef MUTEX_DEBUG 597 m->mtx_debug = debug; 598#endif 599 m->mtx_description = t; 600 m->mtx_lock = MTX_UNOWNED; 601 /* Put on all mutex queue */ 602 mtx_enter(&all_mtx, MTX_DEF); 603 m->mtx_next = &all_mtx; 604 m->mtx_prev = all_mtx.mtx_prev; 605 m->mtx_prev->mtx_next = m; 606 all_mtx.mtx_prev = m; 607 if (++mtx_cur_cnt > mtx_max_cnt) 608 mtx_max_cnt = mtx_cur_cnt; 609 mtx_exit(&all_mtx, MTX_DEF); 610 witness_init(m, flag); 611} 612 613void 614mtx_destroy(struct mtx *m) 615{ 616 617 CTR2(KTR_LOCK, "mtx_destroy 0x%p (%s)", m, m->mtx_description); 618#ifdef MUTEX_DEBUG 619 if (m->mtx_next == NULL) 620 panic("mtx_destroy: %p (%s) already destroyed", 621 m, m->mtx_description); 622 623 if (!mtx_owned(m)) { 624 MPASS(m->mtx_lock == MTX_UNOWNED); 625 } else { 626 MPASS((m->mtx_lock & (MTX_RECURSE|MTX_CONTESTED)) == 0); 627 } 628 mtx_validate(m, MV_DESTROY); /* diagnostic */ 629#endif 630 631#ifdef WITNESS 632 if (m->mtx_witness) 633 witness_destroy(m); 634#endif /* WITNESS */ 635 636 /* Remove from the all mutex queue */ 637 mtx_enter(&all_mtx, MTX_DEF); 638 m->mtx_next->mtx_prev = m->mtx_prev; 639 m->mtx_prev->mtx_next = m->mtx_next; 640#ifdef MUTEX_DEBUG 641 m->mtx_next = m->mtx_prev = NULL; 642 free(m->mtx_debug, M_DEVBUF); 643 m->mtx_debug = NULL; 644#endif 645 mtx_cur_cnt--; 646 mtx_exit(&all_mtx, MTX_DEF); 647} 648 649/* 650 * The non-inlined versions of the mtx_*() functions are always built (above), 651 * but the witness code depends on the MUTEX_DEBUG and WITNESS kernel options 652 * being specified. 653 */ 654#if (defined(MUTEX_DEBUG) && defined(WITNESS)) 655 656#define WITNESS_COUNT 200 657#define WITNESS_NCHILDREN 2 658 659#ifndef SMP 660extern int witness_spin_check; 661#endif 662 663int witness_watch = 1; 664 665struct witness { 666 struct witness *w_next; 667 const char *w_description; 668 const char *w_file; 669 int w_line; 670 struct witness *w_morechildren; 671 u_char w_childcnt; 672 u_char w_Giant_squawked:1; 673 u_char w_other_squawked:1; 674 u_char w_same_squawked:1; 675 u_char w_sleep:1; 676 u_char w_spin:1; /* this is a spin mutex */ 677 u_int w_level; 678 struct witness *w_children[WITNESS_NCHILDREN]; 679}; 680 681struct witness_blessed { 682 char *b_lock1; 683 char *b_lock2; 684}; 685 686#ifdef DDB 687/* 688 * When DDB is enabled and witness_ddb is set to 1, it will cause the system to 689 * drop into kdebug() when: 690 * - a lock heirarchy violation occurs 691 * - locks are held when going to sleep. 692 */ 693#ifdef WITNESS_DDB 694int witness_ddb = 1; 695#else 696int witness_ddb = 0; 697#endif 698SYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, ""); 699#endif /* DDB */ 700 701#ifdef WITNESS_SKIPSPIN 702int witness_skipspin = 1; 703#else 704int witness_skipspin = 0; 705#endif 706SYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RD, &witness_skipspin, 0, 707 ""); 708 709MUTEX_DECLARE(static,w_mtx); 710static struct witness *w_free; 711static struct witness *w_all; 712static int w_inited; 713static int witness_dead; /* fatal error, probably no memory */ 714 715static struct witness w_data[WITNESS_COUNT]; 716 717static struct witness *enroll __P((const char *description, int flag)); 718static int itismychild __P((struct witness *parent, struct witness *child)); 719static void removechild __P((struct witness *parent, struct witness *child)); 720static int isitmychild __P((struct witness *parent, struct witness *child)); 721static int isitmydescendant __P((struct witness *parent, struct witness *child)); 722static int dup_ok __P((struct witness *)); 723static int blessed __P((struct witness *, struct witness *)); 724static void witness_displaydescendants 725 __P((void(*)(const char *fmt, ...), struct witness *)); 726static void witness_leveldescendents __P((struct witness *parent, int level)); 727static void witness_levelall __P((void)); 728static struct witness * witness_get __P((void)); 729static void witness_free __P((struct witness *m)); 730 731 732static char *ignore_list[] = { 733 "witness lock", 734 NULL 735}; 736 737static char *spin_order_list[] = { 738 "sched lock", 739 "clk", 740 "sio", 741 /* 742 * leaf locks 743 */ 744 NULL 745}; 746 747static char *order_list[] = { 748 NULL 749}; 750 751static char *dup_list[] = { 752 NULL 753}; 754 755static char *sleep_list[] = { 756 "Giant lock", 757 NULL 758}; 759 760/* 761 * Pairs of locks which have been blessed 762 * Don't complain about order problems with blessed locks 763 */ 764static struct witness_blessed blessed_list[] = { 765}; 766static int blessed_count = sizeof(blessed_list) / sizeof(struct witness_blessed); 767 768void 769witness_init(struct mtx *m, int flag) 770{ 771 m->mtx_witness = enroll(m->mtx_description, flag); 772} 773 774void 775witness_destroy(struct mtx *m) 776{ 777 struct mtx *m1; 778 struct proc *p; 779 p = CURPROC; 780 for ((m1 = LIST_FIRST(&p->p_heldmtx)); m1 != NULL; 781 m1 = LIST_NEXT(m1, mtx_held)) { 782 if (m1 == m) { 783 LIST_REMOVE(m, mtx_held); 784 break; 785 } 786 } 787 return; 788 789} 790 791void 792witness_enter(struct mtx *m, int flags, const char *file, int line) 793{ 794 struct witness *w, *w1; 795 struct mtx *m1; 796 struct proc *p; 797 int i; 798#ifdef DDB 799 int go_into_ddb = 0; 800#endif /* DDB */ 801 802 w = m->mtx_witness; 803 p = CURPROC; 804 805 if (flags & MTX_SPIN) { 806 if (!w->w_spin) 807 panic("mutex_enter: MTX_SPIN on MTX_DEF mutex %s @" 808 " %s:%d", m->mtx_description, file, line); 809 if (m->mtx_recurse != 0) 810 return; 811 mtx_enter(&w_mtx, MTX_SPIN); 812 i = witness_spin_check; 813 if (i != 0 && w->w_level < i) { 814 mtx_exit(&w_mtx, MTX_SPIN); 815 panic("mutex_enter(%s:%x, MTX_SPIN) out of order @" 816 " %s:%d already holding %s:%x", 817 m->mtx_description, w->w_level, file, line, 818 spin_order_list[ffs(i)-1], i); 819 } 820 PCPU_SET(witness_spin_check, i | w->w_level); 821 mtx_exit(&w_mtx, MTX_SPIN); 822 return; 823 } 824 if (w->w_spin) 825 panic("mutex_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d", 826 m->mtx_description, file, line); 827 828 if (m->mtx_recurse != 0) 829 return; 830 if (witness_dead) 831 goto out; 832 if (cold || panicstr) 833 goto out; 834 835 if (!mtx_legal2block()) 836 panic("blockable mtx_enter() of %s when not legal @ %s:%d", 837 m->mtx_description, file, line); 838 /* 839 * Is this the first mutex acquired 840 */ 841 if ((m1 = LIST_FIRST(&p->p_heldmtx)) == NULL) 842 goto out; 843 844 if ((w1 = m1->mtx_witness) == w) { 845 if (w->w_same_squawked || dup_ok(w)) 846 goto out; 847 w->w_same_squawked = 1; 848 printf("acquring duplicate lock of same type: \"%s\"\n", 849 m->mtx_description); 850 printf(" 1st @ %s:%d\n", w->w_file, w->w_line); 851 printf(" 2nd @ %s:%d\n", file, line); 852#ifdef DDB 853 go_into_ddb = 1; 854#endif /* DDB */ 855 goto out; 856 } 857 MPASS(!mtx_owned(&w_mtx)); 858 mtx_enter(&w_mtx, MTX_SPIN); 859 /* 860 * If we have a known higher number just say ok 861 */ 862 if (witness_watch > 1 && w->w_level > w1->w_level) { 863 mtx_exit(&w_mtx, MTX_SPIN); 864 goto out; 865 } 866 if (isitmydescendant(m1->mtx_witness, w)) { 867 mtx_exit(&w_mtx, MTX_SPIN); 868 goto out; 869 } 870 for (i = 0; m1 != NULL; m1 = LIST_NEXT(m1, mtx_held), i++) { 871 872 MPASS(i < 200); 873 w1 = m1->mtx_witness; 874 if (isitmydescendant(w, w1)) { 875 mtx_exit(&w_mtx, MTX_SPIN); 876 if (blessed(w, w1)) 877 goto out; 878 if (m1 == &Giant) { 879 if (w1->w_Giant_squawked) 880 goto out; 881 else 882 w1->w_Giant_squawked = 1; 883 } else { 884 if (w1->w_other_squawked) 885 goto out; 886 else 887 w1->w_other_squawked = 1; 888 } 889 printf("lock order reversal\n"); 890 printf(" 1st %s last acquired @ %s:%d\n", 891 w->w_description, w->w_file, w->w_line); 892 printf(" 2nd %p %s @ %s:%d\n", 893 m1, w1->w_description, w1->w_file, w1->w_line); 894 printf(" 3rd %p %s @ %s:%d\n", 895 m, w->w_description, file, line); 896#ifdef DDB 897 go_into_ddb = 1; 898#endif /* DDB */ 899 goto out; 900 } 901 } 902 m1 = LIST_FIRST(&p->p_heldmtx); 903 if (!itismychild(m1->mtx_witness, w)) 904 mtx_exit(&w_mtx, MTX_SPIN); 905 906out: 907#ifdef DDB 908 if (witness_ddb && go_into_ddb) 909 Debugger("witness_enter"); 910#endif /* DDB */ 911 w->w_file = file; 912 w->w_line = line; 913 m->mtx_line = line; 914 m->mtx_file = file; 915 916 /* 917 * If this pays off it likely means that a mutex being witnessed 918 * is acquired in hardclock. Put it in the ignore list. It is 919 * likely not the mutex this assert fails on. 920 */ 921 MPASS(m->mtx_held.le_prev == NULL); 922 LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held); 923} 924 925void 926witness_exit(struct mtx *m, int flags, const char *file, int line) 927{ 928 struct witness *w; 929 930 w = m->mtx_witness; 931 932 if (flags & MTX_SPIN) { 933 if (!w->w_spin) 934 panic("mutex_exit: MTX_SPIN on MTX_DEF mutex %s @" 935 " %s:%d", m->mtx_description, file, line); 936 if (m->mtx_recurse != 0) 937 return; 938 mtx_enter(&w_mtx, MTX_SPIN); 939 PCPU_SET(witness_spin_check, witness_spin_check & ~w->w_level); 940 mtx_exit(&w_mtx, MTX_SPIN); 941 return; 942 } 943 if (w->w_spin) 944 panic("mutex_exit: MTX_DEF on MTX_SPIN mutex %s @ %s:%d", 945 m->mtx_description, file, line); 946 947 if (m->mtx_recurse != 0) 948 return; 949 950 if ((flags & MTX_NOSWITCH) == 0 && !mtx_legal2block() && !cold) 951 panic("switchable mtx_exit() of %s when not legal @ %s:%d", 952 m->mtx_description, file, line); 953 LIST_REMOVE(m, mtx_held); 954 m->mtx_held.le_prev = NULL; 955} 956 957void 958witness_try_enter(struct mtx *m, int flags, const char *file, int line) 959{ 960 struct proc *p; 961 struct witness *w = m->mtx_witness; 962 963 if (flags & MTX_SPIN) { 964 if (!w->w_spin) 965 panic("mutex_try_enter: " 966 "MTX_SPIN on MTX_DEF mutex %s @ %s:%d", 967 m->mtx_description, file, line); 968 if (m->mtx_recurse != 0) 969 return; 970 mtx_enter(&w_mtx, MTX_SPIN); 971 PCPU_SET(witness_spin_check, witness_spin_check | w->w_level); 972 mtx_exit(&w_mtx, MTX_SPIN); 973 return; 974 } 975 976 if (w->w_spin) 977 panic("mutex_try_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d", 978 m->mtx_description, file, line); 979 980 if (m->mtx_recurse != 0) 981 return; 982 983 w->w_file = file; 984 w->w_line = line; 985 m->mtx_line = line; 986 m->mtx_file = file; 987 p = CURPROC; 988 MPASS(m->mtx_held.le_prev == NULL); 989 LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held); 990} 991 992void 993witness_display(void(*prnt)(const char *fmt, ...)) 994{ 995 struct witness *w, *w1; 996 997 witness_levelall(); 998 999 for (w = w_all; w; w = w->w_next) { 1000 if (w->w_file == NULL) 1001 continue; 1002 for (w1 = w_all; w1; w1 = w1->w_next) { 1003 if (isitmychild(w1, w)) 1004 break; 1005 } 1006 if (w1 != NULL) 1007 continue; 1008 /* 1009 * This lock has no anscestors, display its descendants. 1010 */ 1011 witness_displaydescendants(prnt, w); 1012 } 1013 prnt("\nMutex which were never acquired\n"); 1014 for (w = w_all; w; w = w->w_next) { 1015 if (w->w_file != NULL) 1016 continue; 1017 prnt("%s\n", w->w_description); 1018 } 1019} 1020 1021int 1022witness_sleep(int check_only, struct mtx *mtx, const char *file, int line) 1023{ 1024 struct mtx *m; 1025 struct proc *p; 1026 char **sleep; 1027 int n = 0; 1028 1029 p = CURPROC; 1030 for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL; 1031 m = LIST_NEXT(m, mtx_held)) { 1032 if (m == mtx) 1033 continue; 1034 for (sleep = sleep_list; *sleep!= NULL; sleep++) 1035 if (strcmp(m->mtx_description, *sleep) == 0) 1036 goto next; 1037 printf("%s:%d: %s with \"%s\" locked from %s:%d\n", 1038 file, line, check_only ? "could sleep" : "sleeping", 1039 m->mtx_description, 1040 m->mtx_witness->w_file, m->mtx_witness->w_line); 1041 n++; 1042 next: 1043 } 1044#ifdef DDB 1045 if (witness_ddb && n) 1046 Debugger("witness_sleep"); 1047#endif /* DDB */ 1048 return (n); 1049} 1050 1051static struct witness * 1052enroll(const char *description, int flag) 1053{ 1054 int i; 1055 struct witness *w, *w1; 1056 char **ignore; 1057 char **order; 1058 1059 if (!witness_watch) 1060 return (NULL); 1061 for (ignore = ignore_list; *ignore != NULL; ignore++) 1062 if (strcmp(description, *ignore) == 0) 1063 return (NULL); 1064 1065 if (w_inited == 0) { 1066 mtx_init(&w_mtx, "witness lock", MTX_COLD | MTX_DEF); 1067 for (i = 0; i < WITNESS_COUNT; i++) { 1068 w = &w_data[i]; 1069 witness_free(w); 1070 } 1071 w_inited = 1; 1072 for (order = order_list; *order != NULL; order++) { 1073 w = enroll(*order, MTX_DEF); 1074 w->w_file = "order list"; 1075 for (order++; *order != NULL; order++) { 1076 w1 = enroll(*order, MTX_DEF); 1077 w1->w_file = "order list"; 1078 itismychild(w, w1); 1079 w = w1; 1080 } 1081 } 1082 } 1083 if ((flag & MTX_SPIN) && witness_skipspin) 1084 return (NULL); 1085 mtx_enter(&w_mtx, MTX_SPIN); 1086 for (w = w_all; w; w = w->w_next) { 1087 if (strcmp(description, w->w_description) == 0) { 1088 mtx_exit(&w_mtx, MTX_SPIN); 1089 return (w); 1090 } 1091 } 1092 if ((w = witness_get()) == NULL) 1093 return (NULL); 1094 w->w_next = w_all; 1095 w_all = w; 1096 w->w_description = description; 1097 mtx_exit(&w_mtx, MTX_SPIN); 1098 if (flag & MTX_SPIN) { 1099 w->w_spin = 1; 1100 1101 i = 1; 1102 for (order = spin_order_list; *order != NULL; order++) { 1103 if (strcmp(description, *order) == 0) 1104 break; 1105 i <<= 1; 1106 } 1107 if (*order == NULL) 1108 panic("spin lock %s not in order list", description); 1109 w->w_level = i; 1110 } 1111 return (w); 1112} 1113 1114static int 1115itismychild(struct witness *parent, struct witness *child) 1116{ 1117 static int recursed; 1118 1119 /* 1120 * Insert "child" after "parent" 1121 */ 1122 while (parent->w_morechildren) 1123 parent = parent->w_morechildren; 1124 1125 if (parent->w_childcnt == WITNESS_NCHILDREN) { 1126 if ((parent->w_morechildren = witness_get()) == NULL) 1127 return (1); 1128 parent = parent->w_morechildren; 1129 } 1130 MPASS(child != NULL); 1131 parent->w_children[parent->w_childcnt++] = child; 1132 /* 1133 * now prune whole tree 1134 */ 1135 if (recursed) 1136 return (0); 1137 recursed = 1; 1138 for (child = w_all; child != NULL; child = child->w_next) { 1139 for (parent = w_all; parent != NULL; 1140 parent = parent->w_next) { 1141 if (!isitmychild(parent, child)) 1142 continue; 1143 removechild(parent, child); 1144 if (isitmydescendant(parent, child)) 1145 continue; 1146 itismychild(parent, child); 1147 } 1148 } 1149 recursed = 0; 1150 witness_levelall(); 1151 return (0); 1152} 1153 1154static void 1155removechild(struct witness *parent, struct witness *child) 1156{ 1157 struct witness *w, *w1; 1158 int i; 1159 1160 for (w = parent; w != NULL; w = w->w_morechildren) 1161 for (i = 0; i < w->w_childcnt; i++) 1162 if (w->w_children[i] == child) 1163 goto found; 1164 return; 1165found: 1166 for (w1 = w; w1->w_morechildren != NULL; w1 = w1->w_morechildren) 1167 continue; 1168 w->w_children[i] = w1->w_children[--w1->w_childcnt]; 1169 MPASS(w->w_children[i] != NULL); 1170 1171 if (w1->w_childcnt != 0) 1172 return; 1173 1174 if (w1 == parent) 1175 return; 1176 for (w = parent; w->w_morechildren != w1; w = w->w_morechildren) 1177 continue; 1178 w->w_morechildren = 0; 1179 witness_free(w1); 1180} 1181 1182static int 1183isitmychild(struct witness *parent, struct witness *child) 1184{ 1185 struct witness *w; 1186 int i; 1187 1188 for (w = parent; w != NULL; w = w->w_morechildren) { 1189 for (i = 0; i < w->w_childcnt; i++) { 1190 if (w->w_children[i] == child) 1191 return (1); 1192 } 1193 } 1194 return (0); 1195} 1196 1197static int 1198isitmydescendant(struct witness *parent, struct witness *child) 1199{ 1200 struct witness *w; 1201 int i; 1202 int j; 1203 1204 for (j = 0, w = parent; w != NULL; w = w->w_morechildren, j++) { 1205 MPASS(j < 1000); 1206 for (i = 0; i < w->w_childcnt; i++) { 1207 if (w->w_children[i] == child) 1208 return (1); 1209 } 1210 for (i = 0; i < w->w_childcnt; i++) { 1211 if (isitmydescendant(w->w_children[i], child)) 1212 return (1); 1213 } 1214 } 1215 return (0); 1216} 1217 1218void 1219witness_levelall (void) 1220{ 1221 struct witness *w, *w1; 1222 1223 for (w = w_all; w; w = w->w_next) 1224 if (!w->w_spin) 1225 w->w_level = 0; 1226 for (w = w_all; w; w = w->w_next) { 1227 if (w->w_spin) 1228 continue; 1229 for (w1 = w_all; w1; w1 = w1->w_next) { 1230 if (isitmychild(w1, w)) 1231 break; 1232 } 1233 if (w1 != NULL) 1234 continue; 1235 witness_leveldescendents(w, 0); 1236 } 1237} 1238 1239static void 1240witness_leveldescendents(struct witness *parent, int level) 1241{ 1242 int i; 1243 struct witness *w; 1244 1245 if (parent->w_level < level) 1246 parent->w_level = level; 1247 level++; 1248 for (w = parent; w != NULL; w = w->w_morechildren) 1249 for (i = 0; i < w->w_childcnt; i++) 1250 witness_leveldescendents(w->w_children[i], level); 1251} 1252 1253static void 1254witness_displaydescendants(void(*prnt)(const char *fmt, ...), 1255 struct witness *parent) 1256{ 1257 struct witness *w; 1258 int i; 1259 int level = parent->w_level; 1260 1261 prnt("%d", level); 1262 if (level < 10) 1263 prnt(" "); 1264 for (i = 0; i < level; i++) 1265 prnt(" "); 1266 prnt("%s", parent->w_description); 1267 if (parent->w_file != NULL) { 1268 prnt(" -- last acquired @ %s", parent->w_file); 1269#ifndef W_USE_WHERE 1270 prnt(":%d", parent->w_line); 1271#endif 1272 prnt("\n"); 1273 } 1274 1275 for (w = parent; w != NULL; w = w->w_morechildren) 1276 for (i = 0; i < w->w_childcnt; i++) 1277 witness_displaydescendants(prnt, w->w_children[i]); 1278 } 1279 1280static int 1281dup_ok(struct witness *w) 1282{ 1283 char **dup; 1284 1285 for (dup = dup_list; *dup!= NULL; dup++) 1286 if (strcmp(w->w_description, *dup) == 0) 1287 return (1); 1288 return (0); 1289} 1290 1291static int 1292blessed(struct witness *w1, struct witness *w2) 1293{ 1294 int i; 1295 struct witness_blessed *b; 1296 1297 for (i = 0; i < blessed_count; i++) { 1298 b = &blessed_list[i]; 1299 if (strcmp(w1->w_description, b->b_lock1) == 0) { 1300 if (strcmp(w2->w_description, b->b_lock2) == 0) 1301 return (1); 1302 continue; 1303 } 1304 if (strcmp(w1->w_description, b->b_lock2) == 0) 1305 if (strcmp(w2->w_description, b->b_lock1) == 0) 1306 return (1); 1307 } 1308 return (0); 1309} 1310 1311static struct witness * 1312witness_get() 1313{ 1314 struct witness *w; 1315 1316 if ((w = w_free) == NULL) { 1317 witness_dead = 1; 1318 mtx_exit(&w_mtx, MTX_SPIN); 1319 printf("witness exhausted\n"); 1320 return (NULL); 1321 } 1322 w_free = w->w_next; 1323 bzero(w, sizeof(*w)); 1324 return (w); 1325} 1326 1327static void 1328witness_free(struct witness *w) 1329{ 1330 w->w_next = w_free; 1331 w_free = w; 1332} 1333 1334void 1335witness_list(struct proc *p) 1336{ 1337 struct mtx *m; 1338 1339 for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL; 1340 m = LIST_NEXT(m, mtx_held)) { 1341 printf("\t\"%s\" (%p) locked at %s:%d\n", 1342 m->mtx_description, m, 1343 m->mtx_witness->w_file, m->mtx_witness->w_line); 1344 } 1345} 1346 1347void 1348witness_save(struct mtx *m, const char **filep, int *linep) 1349{ 1350 *filep = m->mtx_witness->w_file; 1351 *linep = m->mtx_witness->w_line; 1352} 1353 1354void 1355witness_restore(struct mtx *m, const char *file, int line) 1356{ 1357 m->mtx_witness->w_file = file; 1358 m->mtx_witness->w_line = line; 1359} 1360 1361#endif /* (defined(MUTEX_DEBUG) && defined(WITNESS)) */ 1362