subr_witness.c revision 70861
1/*- 2 * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 3. Berkeley Software Design Inc's name may not be used to endorse or 13 * promote products derived from this software without specific prior 14 * written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $ 29 * and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $ 30 * $FreeBSD: head/sys/kern/subr_witness.c 70861 2001-01-10 04:43:51Z jake $ 31 */ 32 33/* 34 * Main Entry: witness 35 * Pronunciation: 'wit-n&s 36 * Function: noun 37 * Etymology: Middle English witnesse, from Old English witnes knowledge, 38 * testimony, witness, from 2wit 39 * Date: before 12th century 40 * 1 : attestation of a fact or event : TESTIMONY 41 * 2 : one that gives evidence; specifically : one who testifies in 42 * a cause or before a judicial tribunal 43 * 3 : one asked to be present at a transaction so as to be able to 44 * testify to its having taken place 45 * 4 : one who has personal knowledge of something 46 * 5 a : something serving as evidence or proof : SIGN 47 * b : public affirmation by word or example of usually 48 * religious faith or conviction <the heroic witness to divine 49 * life -- Pilot> 50 * 6 capitalized : a member of the Jehovah's Witnesses 51 */ 52 53#include "opt_ddb.h" 54#include "opt_witness.h" 55 56/* 57 * Cause non-inlined mtx_*() to be compiled. 58 * Must be defined early because other system headers may include mutex.h. 59 */ 60#define _KERN_MUTEX_C_ 61 62#include <sys/param.h> 63#include <sys/bus.h> 64#include <sys/kernel.h> 65#include <sys/malloc.h> 66#include <sys/proc.h> 67#include <sys/sysctl.h> 68#include <sys/systm.h> 69#include <sys/vmmeter.h> 70#include <sys/ktr.h> 71 72#include <machine/atomic.h> 73#include <machine/bus.h> 74#include <machine/clock.h> 75#include <machine/cpu.h> 76 77#include <ddb/ddb.h> 78 79#include <vm/vm.h> 80#include <vm/vm_extern.h> 81 82#include <sys/mutex.h> 83 84/* 85 * Machine independent bits of the mutex implementation 86 */ 87/* All mutexes in system (used for debug/panic) */ 88#ifdef WITNESS 89static struct mtx_debug all_mtx_debug = { NULL, {NULL, NULL}, NULL, 0, 90 "All mutexes queue head" }; 91static struct mtx all_mtx = { MTX_UNOWNED, 0, 0, &all_mtx_debug, 92 TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked), 93 { NULL, NULL }, &all_mtx, &all_mtx }; 94#else /* WITNESS */ 95static struct mtx all_mtx = { MTX_UNOWNED, 0, 0, "All mutexes queue head", 96 TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked), 97 { NULL, NULL }, &all_mtx, &all_mtx }; 98#endif /* WITNESS */ 99 100static int mtx_cur_cnt; 101static int mtx_max_cnt; 102 103void _mtx_enter_giant_def(void); 104void _mtx_exit_giant_def(void); 105static void propagate_priority(struct proc *); 106 107#define mtx_unowned(m) ((m)->mtx_lock == MTX_UNOWNED) 108#define mtx_owner(m) (mtx_unowned(m) ? NULL \ 109 : (struct proc *)((m)->mtx_lock & MTX_FLAGMASK)) 110 111#define RETIP(x) *(((uintptr_t *)(&x)) - 1) 112#define SET_PRIO(p, pri) (p)->p_priority = (pri) 113 114/* 115 * XXX Temporary, for use from assembly language 116 */ 117 118void 119_mtx_enter_giant_def(void) 120{ 121 122 mtx_enter(&Giant, MTX_DEF); 123} 124 125void 126_mtx_exit_giant_def(void) 127{ 128 129 mtx_exit(&Giant, MTX_DEF); 130} 131 132static void 133propagate_priority(struct proc *p) 134{ 135 int pri = p->p_priority; 136 struct mtx *m = p->p_blocked; 137 138 mtx_assert(&sched_lock, MA_OWNED); 139 for (;;) { 140 struct proc *p1; 141 142 p = mtx_owner(m); 143 144 if (p == NULL) { 145 /* 146 * This really isn't quite right. Really 147 * ought to bump priority of process that 148 * next acquires the mutex. 149 */ 150 MPASS(m->mtx_lock == MTX_CONTESTED); 151 return; 152 } 153 MPASS(p->p_magic == P_MAGIC); 154 KASSERT(p->p_stat != SSLEEP, ("sleeping process owns a mutex")); 155 if (p->p_priority <= pri) 156 return; 157 158 /* 159 * Bump this process' priority. 160 */ 161 SET_PRIO(p, pri); 162 163 /* 164 * If lock holder is actually running, just bump priority. 165 */ 166#ifdef SMP 167 /* 168 * For SMP, we can check the p_oncpu field to see if we are 169 * running. 170 */ 171 if (p->p_oncpu != 0xff) { 172 MPASS(p->p_stat == SRUN || p->p_stat == SZOMB); 173 return; 174 } 175#else 176 /* 177 * For UP, we check to see if p is curproc (this shouldn't 178 * ever happen however as it would mean we are in a deadlock.) 179 */ 180 if (p == curproc) { 181 panic("Deadlock detected"); 182 return; 183 } 184#endif 185 /* 186 * If on run queue move to new run queue, and 187 * quit. 188 */ 189 if (p->p_stat == SRUN) { 190 printf("XXX: moving process %d(%s) to a new run queue\n", 191 p->p_pid, p->p_comm); 192 MPASS(p->p_blocked == NULL); 193 remrunqueue(p); 194 setrunqueue(p); 195 return; 196 } 197 198 /* 199 * If we aren't blocked on a mutex, we should be. 200 */ 201 KASSERT(p->p_stat == SMTX, ( 202 "process %d(%s):%d holds %s but isn't blocked on a mutex\n", 203 p->p_pid, p->p_comm, p->p_stat, 204 m->mtx_description)); 205 206 /* 207 * Pick up the mutex that p is blocked on. 208 */ 209 m = p->p_blocked; 210 MPASS(m != NULL); 211 212 printf("XXX: process %d(%s) is blocked on %s\n", p->p_pid, 213 p->p_comm, m->mtx_description); 214 /* 215 * Check if the proc needs to be moved up on 216 * the blocked chain 217 */ 218 if (p == TAILQ_FIRST(&m->mtx_blocked)) { 219 printf("XXX: process at head of run queue\n"); 220 continue; 221 } 222 p1 = TAILQ_PREV(p, rq, p_procq); 223 if (p1->p_priority <= pri) { 224 printf( 225 "XXX: previous process %d(%s) has higher priority\n", 226 p->p_pid, p->p_comm); 227 continue; 228 } 229 230 /* 231 * Remove proc from blocked chain and determine where 232 * it should be moved up to. Since we know that p1 has 233 * a lower priority than p, we know that at least one 234 * process in the chain has a lower priority and that 235 * p1 will thus not be NULL after the loop. 236 */ 237 TAILQ_REMOVE(&m->mtx_blocked, p, p_procq); 238 TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) { 239 MPASS(p1->p_magic == P_MAGIC); 240 if (p1->p_priority > pri) 241 break; 242 } 243 MPASS(p1 != NULL); 244 TAILQ_INSERT_BEFORE(p1, p, p_procq); 245 CTR4(KTR_LOCK, 246 "propagate_priority: p 0x%p moved before 0x%p on [0x%p] %s", 247 p, p1, m, m->mtx_description); 248 } 249} 250 251void 252mtx_enter_hard(struct mtx *m, int type, int saveintr) 253{ 254 struct proc *p = CURPROC; 255 256 KASSERT(p != NULL, ("curproc is NULL in mutex")); 257 258 switch (type) { 259 case MTX_DEF: 260 if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)p) { 261 m->mtx_recurse++; 262 atomic_set_ptr(&m->mtx_lock, MTX_RECURSE); 263 if ((type & MTX_QUIET) == 0) 264 CTR1(KTR_LOCK, "mtx_enter: 0x%p recurse", m); 265 return; 266 } 267 if ((type & MTX_QUIET) == 0) 268 CTR3(KTR_LOCK, 269 "mtx_enter: 0x%p contested (lock=%p) [0x%p]", 270 m, (void *)m->mtx_lock, (void *)RETIP(m)); 271 272 /* 273 * Save our priority. Even though p_nativepri is protected 274 * by sched_lock, we don't obtain it here as it can be 275 * expensive. Since this is the only place p_nativepri is 276 * set, and since two CPUs will not be executing the same 277 * process concurrently, we know that no other CPU is going 278 * to be messing with this. Also, p_nativepri is only read 279 * when we are blocked on a mutex, so that can't be happening 280 * right now either. 281 */ 282 p->p_nativepri = p->p_priority; 283 while (!_obtain_lock(m, p)) { 284 uintptr_t v; 285 struct proc *p1; 286 287 mtx_enter(&sched_lock, MTX_SPIN | MTX_RLIKELY); 288 /* 289 * check if the lock has been released while 290 * waiting for the schedlock. 291 */ 292 if ((v = m->mtx_lock) == MTX_UNOWNED) { 293 mtx_exit(&sched_lock, MTX_SPIN); 294 continue; 295 } 296 /* 297 * The mutex was marked contested on release. This 298 * means that there are processes blocked on it. 299 */ 300 if (v == MTX_CONTESTED) { 301 p1 = TAILQ_FIRST(&m->mtx_blocked); 302 KASSERT(p1 != NULL, ("contested mutex has no contesters")); 303 KASSERT(p != NULL, ("curproc is NULL for contested mutex")); 304 m->mtx_lock = (uintptr_t)p | MTX_CONTESTED; 305 if (p1->p_priority < p->p_priority) { 306 SET_PRIO(p, p1->p_priority); 307 } 308 mtx_exit(&sched_lock, MTX_SPIN); 309 return; 310 } 311 /* 312 * If the mutex isn't already contested and 313 * a failure occurs setting the contested bit the 314 * mutex was either release or the 315 * state of the RECURSION bit changed. 316 */ 317 if ((v & MTX_CONTESTED) == 0 && 318 !atomic_cmpset_ptr(&m->mtx_lock, (void *)v, 319 (void *)(v | MTX_CONTESTED))) { 320 mtx_exit(&sched_lock, MTX_SPIN); 321 continue; 322 } 323 324 /* We definitely have to sleep for this lock */ 325 mtx_assert(m, MA_NOTOWNED); 326 327#ifdef notyet 328 /* 329 * If we're borrowing an interrupted thread's VM 330 * context must clean up before going to sleep. 331 */ 332 if (p->p_flag & (P_ITHD | P_SITHD)) { 333 ithd_t *it = (ithd_t *)p; 334 335 if (it->it_interrupted) { 336 if ((type & MTX_QUIET) == 0) 337 CTR2(KTR_LOCK, 338 "mtx_enter: 0x%x interrupted 0x%x", 339 it, it->it_interrupted); 340 intr_thd_fixup(it); 341 } 342 } 343#endif 344 345 /* Put us on the list of procs blocked on this mutex */ 346 if (TAILQ_EMPTY(&m->mtx_blocked)) { 347 p1 = (struct proc *)(m->mtx_lock & 348 MTX_FLAGMASK); 349 LIST_INSERT_HEAD(&p1->p_contested, m, 350 mtx_contested); 351 TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq); 352 } else { 353 TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) 354 if (p1->p_priority > p->p_priority) 355 break; 356 if (p1) 357 TAILQ_INSERT_BEFORE(p1, p, p_procq); 358 else 359 TAILQ_INSERT_TAIL(&m->mtx_blocked, p, 360 p_procq); 361 } 362 363 p->p_blocked = m; /* Who we're blocked on */ 364 p->p_mtxname = m->mtx_description; 365 p->p_stat = SMTX; 366#if 0 367 propagate_priority(p); 368#endif 369 if ((type & MTX_QUIET) == 0) 370 CTR3(KTR_LOCK, 371 "mtx_enter: p 0x%p blocked on [0x%p] %s", 372 p, m, m->mtx_description); 373 mi_switch(); 374 if ((type & MTX_QUIET) == 0) 375 CTR3(KTR_LOCK, 376 "mtx_enter: p 0x%p free from blocked on [0x%p] %s", 377 p, m, m->mtx_description); 378 mtx_exit(&sched_lock, MTX_SPIN); 379 } 380 return; 381 case MTX_SPIN: 382 case MTX_SPIN | MTX_FIRST: 383 case MTX_SPIN | MTX_TOPHALF: 384 { 385 int i = 0; 386 387 if (m->mtx_lock == (uintptr_t)p) { 388 m->mtx_recurse++; 389 return; 390 } 391 if ((type & MTX_QUIET) == 0) 392 CTR1(KTR_LOCK, "mtx_enter: %p spinning", m); 393 for (;;) { 394 if (_obtain_lock(m, p)) 395 break; 396 while (m->mtx_lock != MTX_UNOWNED) { 397 if (i++ < 1000000) 398 continue; 399 if (i++ < 6000000) 400 DELAY (1); 401#ifdef DDB 402 else if (!db_active) 403#else 404 else 405#endif 406 panic( 407 "spin lock %s held by 0x%p for > 5 seconds", 408 m->mtx_description, 409 (void *)m->mtx_lock); 410 } 411 } 412 413#ifdef MUTEX_DEBUG 414 if (type != MTX_SPIN) 415 m->mtx_saveintr = 0xbeefface; 416 else 417#endif 418 m->mtx_saveintr = saveintr; 419 if ((type & MTX_QUIET) == 0) 420 CTR1(KTR_LOCK, "mtx_enter: 0x%p spin done", m); 421 return; 422 } 423 } 424} 425 426void 427mtx_exit_hard(struct mtx *m, int type) 428{ 429 struct proc *p, *p1; 430 struct mtx *m1; 431 int pri; 432 433 p = CURPROC; 434 switch (type) { 435 case MTX_DEF: 436 case MTX_DEF | MTX_NOSWITCH: 437 if (m->mtx_recurse != 0) { 438 if (--(m->mtx_recurse) == 0) 439 atomic_clear_ptr(&m->mtx_lock, MTX_RECURSE); 440 if ((type & MTX_QUIET) == 0) 441 CTR1(KTR_LOCK, "mtx_exit: 0x%p unrecurse", m); 442 return; 443 } 444 mtx_enter(&sched_lock, MTX_SPIN); 445 if ((type & MTX_QUIET) == 0) 446 CTR1(KTR_LOCK, "mtx_exit: 0x%p contested", m); 447 p1 = TAILQ_FIRST(&m->mtx_blocked); 448 MPASS(p->p_magic == P_MAGIC); 449 MPASS(p1->p_magic == P_MAGIC); 450 TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq); 451 if (TAILQ_EMPTY(&m->mtx_blocked)) { 452 LIST_REMOVE(m, mtx_contested); 453 _release_lock_quick(m); 454 if ((type & MTX_QUIET) == 0) 455 CTR1(KTR_LOCK, "mtx_exit: 0x%p not held", m); 456 } else 457 atomic_store_rel_ptr(&m->mtx_lock, 458 (void *)MTX_CONTESTED); 459 pri = MAXPRI; 460 LIST_FOREACH(m1, &p->p_contested, mtx_contested) { 461 int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_priority; 462 if (cp < pri) 463 pri = cp; 464 } 465 if (pri > p->p_nativepri) 466 pri = p->p_nativepri; 467 SET_PRIO(p, pri); 468 if ((type & MTX_QUIET) == 0) 469 CTR2(KTR_LOCK, 470 "mtx_exit: 0x%p contested setrunqueue 0x%p", m, p1); 471 p1->p_blocked = NULL; 472 p1->p_mtxname = NULL; 473 p1->p_stat = SRUN; 474 setrunqueue(p1); 475 if ((type & MTX_NOSWITCH) == 0 && p1->p_priority < pri) { 476#ifdef notyet 477 if (p->p_flag & (P_ITHD | P_SITHD)) { 478 ithd_t *it = (ithd_t *)p; 479 480 if (it->it_interrupted) { 481 if ((type & MTX_QUIET) == 0) 482 CTR2(KTR_LOCK, 483 "mtx_exit: 0x%x interruped 0x%x", 484 it, it->it_interrupted); 485 intr_thd_fixup(it); 486 } 487 } 488#endif 489 setrunqueue(p); 490 if ((type & MTX_QUIET) == 0) 491 CTR2(KTR_LOCK, 492 "mtx_exit: 0x%p switching out lock=0x%p", 493 m, (void *)m->mtx_lock); 494 mi_switch(); 495 if ((type & MTX_QUIET) == 0) 496 CTR2(KTR_LOCK, 497 "mtx_exit: 0x%p resuming lock=0x%p", 498 m, (void *)m->mtx_lock); 499 } 500 mtx_exit(&sched_lock, MTX_SPIN); 501 break; 502 case MTX_SPIN: 503 case MTX_SPIN | MTX_FIRST: 504 if (m->mtx_recurse != 0) { 505 m->mtx_recurse--; 506 return; 507 } 508 MPASS(mtx_owned(m)); 509 _release_lock_quick(m); 510 if (type & MTX_FIRST) 511 enable_intr(); /* XXX is this kosher? */ 512 else { 513 MPASS(m->mtx_saveintr != 0xbeefface); 514 restore_intr(m->mtx_saveintr); 515 } 516 break; 517 case MTX_SPIN | MTX_TOPHALF: 518 if (m->mtx_recurse != 0) { 519 m->mtx_recurse--; 520 return; 521 } 522 MPASS(mtx_owned(m)); 523 _release_lock_quick(m); 524 break; 525 default: 526 panic("mtx_exit_hard: unsupported type 0x%x\n", type); 527 } 528} 529 530#define MV_DESTROY 0 /* validate before destory */ 531#define MV_INIT 1 /* validate before init */ 532 533#ifdef MUTEX_DEBUG 534 535int mtx_validate __P((struct mtx *, int)); 536 537int 538mtx_validate(struct mtx *m, int when) 539{ 540 struct mtx *mp; 541 int i; 542 int retval = 0; 543 544 if (m == &all_mtx || cold) 545 return 0; 546 547 mtx_enter(&all_mtx, MTX_DEF); 548/* 549 * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly 550 * we can re-enable the kernacc() checks. 551 */ 552#ifndef __alpha__ 553 MPASS(kernacc((caddr_t)all_mtx.mtx_next, sizeof(uintptr_t), 554 VM_PROT_READ) == 1); 555#endif 556 MPASS(all_mtx.mtx_next->mtx_prev == &all_mtx); 557 for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) { 558#ifndef __alpha__ 559 if (kernacc((caddr_t)mp->mtx_next, sizeof(uintptr_t), 560 VM_PROT_READ) != 1) { 561 panic("mtx_validate: mp=%p mp->mtx_next=%p", 562 mp, mp->mtx_next); 563 } 564#endif 565 i++; 566 if (i > mtx_cur_cnt) { 567 panic("mtx_validate: too many in chain, known=%d\n", 568 mtx_cur_cnt); 569 } 570 } 571 MPASS(i == mtx_cur_cnt); 572 switch (when) { 573 case MV_DESTROY: 574 for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) 575 if (mp == m) 576 break; 577 MPASS(mp == m); 578 break; 579 case MV_INIT: 580 for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) 581 if (mp == m) { 582 /* 583 * Not good. This mutex already exists. 584 */ 585 printf("re-initing existing mutex %s\n", 586 m->mtx_description); 587 MPASS(m->mtx_lock == MTX_UNOWNED); 588 retval = 1; 589 } 590 } 591 mtx_exit(&all_mtx, MTX_DEF); 592 return (retval); 593} 594#endif 595 596void 597mtx_init(struct mtx *m, const char *t, int flag) 598{ 599#ifdef WITNESS 600 struct mtx_debug *debug; 601#endif 602 603 if ((flag & MTX_QUIET) == 0) 604 CTR2(KTR_LOCK, "mtx_init 0x%p (%s)", m, t); 605#ifdef MUTEX_DEBUG 606 if (mtx_validate(m, MV_INIT)) /* diagnostic and error correction */ 607 return; 608#endif 609#ifdef WITNESS 610 if (flag & MTX_COLD) 611 debug = m->mtx_debug; 612 else 613 debug = NULL; 614 if (debug == NULL) { 615#ifdef DIAGNOSTIC 616 if(cold && bootverbose) 617 printf("malloc'ing mtx_debug while cold for %s\n", t); 618#endif 619 620 /* XXX - should not use DEVBUF */ 621 debug = malloc(sizeof(struct mtx_debug), M_DEVBUF, 622 M_NOWAIT | M_ZERO); 623 MPASS(debug != NULL); 624 } 625#endif 626 bzero((void *)m, sizeof *m); 627 TAILQ_INIT(&m->mtx_blocked); 628#ifdef WITNESS 629 m->mtx_debug = debug; 630#endif 631 m->mtx_description = t; 632 m->mtx_lock = MTX_UNOWNED; 633 /* Put on all mutex queue */ 634 mtx_enter(&all_mtx, MTX_DEF); 635 m->mtx_next = &all_mtx; 636 m->mtx_prev = all_mtx.mtx_prev; 637 m->mtx_prev->mtx_next = m; 638 all_mtx.mtx_prev = m; 639 if (++mtx_cur_cnt > mtx_max_cnt) 640 mtx_max_cnt = mtx_cur_cnt; 641 mtx_exit(&all_mtx, MTX_DEF); 642 witness_init(m, flag); 643} 644 645void 646mtx_destroy(struct mtx *m) 647{ 648 649 CTR2(KTR_LOCK, "mtx_destroy 0x%p (%s)", m, m->mtx_description); 650#ifdef MUTEX_DEBUG 651 if (m->mtx_next == NULL) 652 panic("mtx_destroy: %p (%s) already destroyed", 653 m, m->mtx_description); 654 655 if (!mtx_owned(m)) { 656 MPASS(m->mtx_lock == MTX_UNOWNED); 657 } else { 658 MPASS((m->mtx_lock & (MTX_RECURSE|MTX_CONTESTED)) == 0); 659 } 660 mtx_validate(m, MV_DESTROY); /* diagnostic */ 661#endif 662 663#ifdef WITNESS 664 if (m->mtx_witness) 665 witness_destroy(m); 666#endif /* WITNESS */ 667 668 /* Remove from the all mutex queue */ 669 mtx_enter(&all_mtx, MTX_DEF); 670 m->mtx_next->mtx_prev = m->mtx_prev; 671 m->mtx_prev->mtx_next = m->mtx_next; 672#ifdef MUTEX_DEBUG 673 m->mtx_next = m->mtx_prev = NULL; 674#endif 675#ifdef WITNESS 676 free(m->mtx_debug, M_DEVBUF); 677 m->mtx_debug = NULL; 678#endif 679 mtx_cur_cnt--; 680 mtx_exit(&all_mtx, MTX_DEF); 681} 682 683/* 684 * The non-inlined versions of the mtx_*() functions are always built (above), 685 * but the witness code depends on the WITNESS kernel option being specified. 686 */ 687#ifdef WITNESS 688 689#define WITNESS_COUNT 200 690#define WITNESS_NCHILDREN 2 691 692int witness_watch = 1; 693 694struct witness { 695 struct witness *w_next; 696 const char *w_description; 697 const char *w_file; 698 int w_line; 699 struct witness *w_morechildren; 700 u_char w_childcnt; 701 u_char w_Giant_squawked:1; 702 u_char w_other_squawked:1; 703 u_char w_same_squawked:1; 704 u_char w_sleep:1; 705 u_char w_spin:1; /* this is a spin mutex */ 706 u_int w_level; 707 struct witness *w_children[WITNESS_NCHILDREN]; 708}; 709 710struct witness_blessed { 711 char *b_lock1; 712 char *b_lock2; 713}; 714 715#ifdef DDB 716/* 717 * When DDB is enabled and witness_ddb is set to 1, it will cause the system to 718 * drop into kdebug() when: 719 * - a lock heirarchy violation occurs 720 * - locks are held when going to sleep. 721 */ 722#ifdef WITNESS_DDB 723int witness_ddb = 1; 724#else 725int witness_ddb = 0; 726#endif 727SYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, ""); 728#endif /* DDB */ 729 730#ifdef WITNESS_SKIPSPIN 731int witness_skipspin = 1; 732#else 733int witness_skipspin = 0; 734#endif 735SYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RD, &witness_skipspin, 0, 736 ""); 737 738MUTEX_DECLARE(static,w_mtx); 739static struct witness *w_free; 740static struct witness *w_all; 741static int w_inited; 742static int witness_dead; /* fatal error, probably no memory */ 743 744static struct witness w_data[WITNESS_COUNT]; 745 746static struct witness *enroll __P((const char *description, int flag)); 747static int itismychild __P((struct witness *parent, struct witness *child)); 748static void removechild __P((struct witness *parent, struct witness *child)); 749static int isitmychild __P((struct witness *parent, struct witness *child)); 750static int isitmydescendant __P((struct witness *parent, struct witness *child)); 751static int dup_ok __P((struct witness *)); 752static int blessed __P((struct witness *, struct witness *)); 753static void witness_displaydescendants 754 __P((void(*)(const char *fmt, ...), struct witness *)); 755static void witness_leveldescendents __P((struct witness *parent, int level)); 756static void witness_levelall __P((void)); 757static struct witness * witness_get __P((void)); 758static void witness_free __P((struct witness *m)); 759 760 761static char *ignore_list[] = { 762 "witness lock", 763 NULL 764}; 765 766static char *spin_order_list[] = { 767 "sio", 768 "sched lock", 769#ifdef __i386__ 770 "clk", 771#endif 772 "callout", 773 /* 774 * leaf locks 775 */ 776 NULL 777}; 778 779static char *order_list[] = { 780 "uidinfo hash", "uidinfo struct", NULL, 781 NULL 782}; 783 784static char *dup_list[] = { 785 NULL 786}; 787 788static char *sleep_list[] = { 789 "Giant", 790 NULL 791}; 792 793/* 794 * Pairs of locks which have been blessed 795 * Don't complain about order problems with blessed locks 796 */ 797static struct witness_blessed blessed_list[] = { 798}; 799static int blessed_count = sizeof(blessed_list) / sizeof(struct witness_blessed); 800 801void 802witness_init(struct mtx *m, int flag) 803{ 804 m->mtx_witness = enroll(m->mtx_description, flag); 805} 806 807void 808witness_destroy(struct mtx *m) 809{ 810 struct mtx *m1; 811 struct proc *p; 812 p = CURPROC; 813 for ((m1 = LIST_FIRST(&p->p_heldmtx)); m1 != NULL; 814 m1 = LIST_NEXT(m1, mtx_held)) { 815 if (m1 == m) { 816 LIST_REMOVE(m, mtx_held); 817 break; 818 } 819 } 820 return; 821 822} 823 824void 825witness_enter(struct mtx *m, int flags, const char *file, int line) 826{ 827 struct witness *w, *w1; 828 struct mtx *m1; 829 struct proc *p; 830 int i; 831#ifdef DDB 832 int go_into_ddb = 0; 833#endif /* DDB */ 834 835 if (panicstr) 836 return; 837 w = m->mtx_witness; 838 p = CURPROC; 839 840 if (flags & MTX_SPIN) { 841 if (!w->w_spin) 842 panic("mutex_enter: MTX_SPIN on MTX_DEF mutex %s @" 843 " %s:%d", m->mtx_description, file, line); 844 if (m->mtx_recurse != 0) 845 return; 846 mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET); 847 i = PCPU_GET(witness_spin_check); 848 if (i != 0 && w->w_level < i) { 849 mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET); 850 panic("mutex_enter(%s:%x, MTX_SPIN) out of order @" 851 " %s:%d already holding %s:%x", 852 m->mtx_description, w->w_level, file, line, 853 spin_order_list[ffs(i)-1], i); 854 } 855 PCPU_SET(witness_spin_check, i | w->w_level); 856 mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET); 857 w->w_file = file; 858 w->w_line = line; 859 m->mtx_line = line; 860 m->mtx_file = file; 861 return; 862 } 863 if (w->w_spin) 864 panic("mutex_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d", 865 m->mtx_description, file, line); 866 867 if (m->mtx_recurse != 0) 868 return; 869 if (witness_dead) 870 goto out; 871 if (cold) 872 goto out; 873 874 if (!mtx_legal2block()) 875 panic("blockable mtx_enter() of %s when not legal @ %s:%d", 876 m->mtx_description, file, line); 877 /* 878 * Is this the first mutex acquired 879 */ 880 if ((m1 = LIST_FIRST(&p->p_heldmtx)) == NULL) 881 goto out; 882 883 if ((w1 = m1->mtx_witness) == w) { 884 if (w->w_same_squawked || dup_ok(w)) 885 goto out; 886 w->w_same_squawked = 1; 887 printf("acquring duplicate lock of same type: \"%s\"\n", 888 m->mtx_description); 889 printf(" 1st @ %s:%d\n", w->w_file, w->w_line); 890 printf(" 2nd @ %s:%d\n", file, line); 891#ifdef DDB 892 go_into_ddb = 1; 893#endif /* DDB */ 894 goto out; 895 } 896 MPASS(!mtx_owned(&w_mtx)); 897 mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET); 898 /* 899 * If we have a known higher number just say ok 900 */ 901 if (witness_watch > 1 && w->w_level > w1->w_level) { 902 mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET); 903 goto out; 904 } 905 if (isitmydescendant(m1->mtx_witness, w)) { 906 mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET); 907 goto out; 908 } 909 for (i = 0; m1 != NULL; m1 = LIST_NEXT(m1, mtx_held), i++) { 910 911 MPASS(i < 200); 912 w1 = m1->mtx_witness; 913 if (isitmydescendant(w, w1)) { 914 mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET); 915 if (blessed(w, w1)) 916 goto out; 917 if (m1 == &Giant) { 918 if (w1->w_Giant_squawked) 919 goto out; 920 else 921 w1->w_Giant_squawked = 1; 922 } else { 923 if (w1->w_other_squawked) 924 goto out; 925 else 926 w1->w_other_squawked = 1; 927 } 928 printf("lock order reversal\n"); 929 printf(" 1st %s last acquired @ %s:%d\n", 930 w->w_description, w->w_file, w->w_line); 931 printf(" 2nd %p %s @ %s:%d\n", 932 m1, w1->w_description, w1->w_file, w1->w_line); 933 printf(" 3rd %p %s @ %s:%d\n", 934 m, w->w_description, file, line); 935#ifdef DDB 936 go_into_ddb = 1; 937#endif /* DDB */ 938 goto out; 939 } 940 } 941 m1 = LIST_FIRST(&p->p_heldmtx); 942 if (!itismychild(m1->mtx_witness, w)) 943 mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET); 944 945out: 946#ifdef DDB 947 if (witness_ddb && go_into_ddb) 948 Debugger("witness_enter"); 949#endif /* DDB */ 950 w->w_file = file; 951 w->w_line = line; 952 m->mtx_line = line; 953 m->mtx_file = file; 954 955 /* 956 * If this pays off it likely means that a mutex being witnessed 957 * is acquired in hardclock. Put it in the ignore list. It is 958 * likely not the mutex this assert fails on. 959 */ 960 MPASS(m->mtx_held.le_prev == NULL); 961 LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held); 962} 963 964void 965witness_exit(struct mtx *m, int flags, const char *file, int line) 966{ 967 struct witness *w; 968 969 if (panicstr) 970 return; 971 w = m->mtx_witness; 972 973 if (flags & MTX_SPIN) { 974 if (!w->w_spin) 975 panic("mutex_exit: MTX_SPIN on MTX_DEF mutex %s @" 976 " %s:%d", m->mtx_description, file, line); 977 if (m->mtx_recurse != 0) 978 return; 979 mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET); 980 PCPU_SET(witness_spin_check, 981 PCPU_GET(witness_spin_check) & ~w->w_level); 982 mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET); 983 return; 984 } 985 if (w->w_spin) 986 panic("mutex_exit: MTX_DEF on MTX_SPIN mutex %s @ %s:%d", 987 m->mtx_description, file, line); 988 989 if (m->mtx_recurse != 0) 990 return; 991 992 if ((flags & MTX_NOSWITCH) == 0 && !mtx_legal2block() && !cold) 993 panic("switchable mtx_exit() of %s when not legal @ %s:%d", 994 m->mtx_description, file, line); 995 LIST_REMOVE(m, mtx_held); 996 m->mtx_held.le_prev = NULL; 997} 998 999void 1000witness_try_enter(struct mtx *m, int flags, const char *file, int line) 1001{ 1002 struct proc *p; 1003 struct witness *w = m->mtx_witness; 1004 1005 if (panicstr) 1006 return; 1007 if (flags & MTX_SPIN) { 1008 if (!w->w_spin) 1009 panic("mutex_try_enter: " 1010 "MTX_SPIN on MTX_DEF mutex %s @ %s:%d", 1011 m->mtx_description, file, line); 1012 if (m->mtx_recurse != 0) 1013 return; 1014 mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET); 1015 PCPU_SET(witness_spin_check, 1016 PCPU_GET(witness_spin_check) | w->w_level); 1017 mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET); 1018 w->w_file = file; 1019 w->w_line = line; 1020 m->mtx_line = line; 1021 m->mtx_file = file; 1022 return; 1023 } 1024 1025 if (w->w_spin) 1026 panic("mutex_try_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d", 1027 m->mtx_description, file, line); 1028 1029 if (m->mtx_recurse != 0) 1030 return; 1031 1032 w->w_file = file; 1033 w->w_line = line; 1034 m->mtx_line = line; 1035 m->mtx_file = file; 1036 p = CURPROC; 1037 MPASS(m->mtx_held.le_prev == NULL); 1038 LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held); 1039} 1040 1041void 1042witness_display(void(*prnt)(const char *fmt, ...)) 1043{ 1044 struct witness *w, *w1; 1045 1046 witness_levelall(); 1047 1048 for (w = w_all; w; w = w->w_next) { 1049 if (w->w_file == NULL) 1050 continue; 1051 for (w1 = w_all; w1; w1 = w1->w_next) { 1052 if (isitmychild(w1, w)) 1053 break; 1054 } 1055 if (w1 != NULL) 1056 continue; 1057 /* 1058 * This lock has no anscestors, display its descendants. 1059 */ 1060 witness_displaydescendants(prnt, w); 1061 } 1062 prnt("\nMutex which were never acquired\n"); 1063 for (w = w_all; w; w = w->w_next) { 1064 if (w->w_file != NULL) 1065 continue; 1066 prnt("%s\n", w->w_description); 1067 } 1068} 1069 1070int 1071witness_sleep(int check_only, struct mtx *mtx, const char *file, int line) 1072{ 1073 struct mtx *m; 1074 struct proc *p; 1075 char **sleep; 1076 int n = 0; 1077 1078 p = CURPROC; 1079 for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL; 1080 m = LIST_NEXT(m, mtx_held)) { 1081 if (m == mtx) 1082 continue; 1083 for (sleep = sleep_list; *sleep!= NULL; sleep++) 1084 if (strcmp(m->mtx_description, *sleep) == 0) 1085 goto next; 1086 printf("%s:%d: %s with \"%s\" locked from %s:%d\n", 1087 file, line, check_only ? "could sleep" : "sleeping", 1088 m->mtx_description, 1089 m->mtx_witness->w_file, m->mtx_witness->w_line); 1090 n++; 1091 next: 1092 } 1093#ifdef DDB 1094 if (witness_ddb && n) 1095 Debugger("witness_sleep"); 1096#endif /* DDB */ 1097 return (n); 1098} 1099 1100static struct witness * 1101enroll(const char *description, int flag) 1102{ 1103 int i; 1104 struct witness *w, *w1; 1105 char **ignore; 1106 char **order; 1107 1108 if (!witness_watch) 1109 return (NULL); 1110 for (ignore = ignore_list; *ignore != NULL; ignore++) 1111 if (strcmp(description, *ignore) == 0) 1112 return (NULL); 1113 1114 if (w_inited == 0) { 1115 mtx_init(&w_mtx, "witness lock", MTX_COLD | MTX_SPIN); 1116 for (i = 0; i < WITNESS_COUNT; i++) { 1117 w = &w_data[i]; 1118 witness_free(w); 1119 } 1120 w_inited = 1; 1121 for (order = order_list; *order != NULL; order++) { 1122 w = enroll(*order, MTX_DEF); 1123 w->w_file = "order list"; 1124 for (order++; *order != NULL; order++) { 1125 w1 = enroll(*order, MTX_DEF); 1126 w1->w_file = "order list"; 1127 itismychild(w, w1); 1128 w = w1; 1129 } 1130 } 1131 } 1132 if ((flag & MTX_SPIN) && witness_skipspin) 1133 return (NULL); 1134 mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET); 1135 for (w = w_all; w; w = w->w_next) { 1136 if (strcmp(description, w->w_description) == 0) { 1137 mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET); 1138 return (w); 1139 } 1140 } 1141 if ((w = witness_get()) == NULL) 1142 return (NULL); 1143 w->w_next = w_all; 1144 w_all = w; 1145 w->w_description = description; 1146 mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET); 1147 if (flag & MTX_SPIN) { 1148 w->w_spin = 1; 1149 1150 i = 1; 1151 for (order = spin_order_list; *order != NULL; order++) { 1152 if (strcmp(description, *order) == 0) 1153 break; 1154 i <<= 1; 1155 } 1156 if (*order == NULL) 1157 panic("spin lock %s not in order list", description); 1158 w->w_level = i; 1159 } 1160 return (w); 1161} 1162 1163static int 1164itismychild(struct witness *parent, struct witness *child) 1165{ 1166 static int recursed; 1167 1168 /* 1169 * Insert "child" after "parent" 1170 */ 1171 while (parent->w_morechildren) 1172 parent = parent->w_morechildren; 1173 1174 if (parent->w_childcnt == WITNESS_NCHILDREN) { 1175 if ((parent->w_morechildren = witness_get()) == NULL) 1176 return (1); 1177 parent = parent->w_morechildren; 1178 } 1179 MPASS(child != NULL); 1180 parent->w_children[parent->w_childcnt++] = child; 1181 /* 1182 * now prune whole tree 1183 */ 1184 if (recursed) 1185 return (0); 1186 recursed = 1; 1187 for (child = w_all; child != NULL; child = child->w_next) { 1188 for (parent = w_all; parent != NULL; 1189 parent = parent->w_next) { 1190 if (!isitmychild(parent, child)) 1191 continue; 1192 removechild(parent, child); 1193 if (isitmydescendant(parent, child)) 1194 continue; 1195 itismychild(parent, child); 1196 } 1197 } 1198 recursed = 0; 1199 witness_levelall(); 1200 return (0); 1201} 1202 1203static void 1204removechild(struct witness *parent, struct witness *child) 1205{ 1206 struct witness *w, *w1; 1207 int i; 1208 1209 for (w = parent; w != NULL; w = w->w_morechildren) 1210 for (i = 0; i < w->w_childcnt; i++) 1211 if (w->w_children[i] == child) 1212 goto found; 1213 return; 1214found: 1215 for (w1 = w; w1->w_morechildren != NULL; w1 = w1->w_morechildren) 1216 continue; 1217 w->w_children[i] = w1->w_children[--w1->w_childcnt]; 1218 MPASS(w->w_children[i] != NULL); 1219 1220 if (w1->w_childcnt != 0) 1221 return; 1222 1223 if (w1 == parent) 1224 return; 1225 for (w = parent; w->w_morechildren != w1; w = w->w_morechildren) 1226 continue; 1227 w->w_morechildren = 0; 1228 witness_free(w1); 1229} 1230 1231static int 1232isitmychild(struct witness *parent, struct witness *child) 1233{ 1234 struct witness *w; 1235 int i; 1236 1237 for (w = parent; w != NULL; w = w->w_morechildren) { 1238 for (i = 0; i < w->w_childcnt; i++) { 1239 if (w->w_children[i] == child) 1240 return (1); 1241 } 1242 } 1243 return (0); 1244} 1245 1246static int 1247isitmydescendant(struct witness *parent, struct witness *child) 1248{ 1249 struct witness *w; 1250 int i; 1251 int j; 1252 1253 for (j = 0, w = parent; w != NULL; w = w->w_morechildren, j++) { 1254 MPASS(j < 1000); 1255 for (i = 0; i < w->w_childcnt; i++) { 1256 if (w->w_children[i] == child) 1257 return (1); 1258 } 1259 for (i = 0; i < w->w_childcnt; i++) { 1260 if (isitmydescendant(w->w_children[i], child)) 1261 return (1); 1262 } 1263 } 1264 return (0); 1265} 1266 1267void 1268witness_levelall (void) 1269{ 1270 struct witness *w, *w1; 1271 1272 for (w = w_all; w; w = w->w_next) 1273 if (!w->w_spin) 1274 w->w_level = 0; 1275 for (w = w_all; w; w = w->w_next) { 1276 if (w->w_spin) 1277 continue; 1278 for (w1 = w_all; w1; w1 = w1->w_next) { 1279 if (isitmychild(w1, w)) 1280 break; 1281 } 1282 if (w1 != NULL) 1283 continue; 1284 witness_leveldescendents(w, 0); 1285 } 1286} 1287 1288static void 1289witness_leveldescendents(struct witness *parent, int level) 1290{ 1291 int i; 1292 struct witness *w; 1293 1294 if (parent->w_level < level) 1295 parent->w_level = level; 1296 level++; 1297 for (w = parent; w != NULL; w = w->w_morechildren) 1298 for (i = 0; i < w->w_childcnt; i++) 1299 witness_leveldescendents(w->w_children[i], level); 1300} 1301 1302static void 1303witness_displaydescendants(void(*prnt)(const char *fmt, ...), 1304 struct witness *parent) 1305{ 1306 struct witness *w; 1307 int i; 1308 int level = parent->w_level; 1309 1310 prnt("%d", level); 1311 if (level < 10) 1312 prnt(" "); 1313 for (i = 0; i < level; i++) 1314 prnt(" "); 1315 prnt("%s", parent->w_description); 1316 if (parent->w_file != NULL) { 1317 prnt(" -- last acquired @ %s", parent->w_file); 1318#ifndef W_USE_WHERE 1319 prnt(":%d", parent->w_line); 1320#endif 1321 prnt("\n"); 1322 } 1323 1324 for (w = parent; w != NULL; w = w->w_morechildren) 1325 for (i = 0; i < w->w_childcnt; i++) 1326 witness_displaydescendants(prnt, w->w_children[i]); 1327 } 1328 1329static int 1330dup_ok(struct witness *w) 1331{ 1332 char **dup; 1333 1334 for (dup = dup_list; *dup!= NULL; dup++) 1335 if (strcmp(w->w_description, *dup) == 0) 1336 return (1); 1337 return (0); 1338} 1339 1340static int 1341blessed(struct witness *w1, struct witness *w2) 1342{ 1343 int i; 1344 struct witness_blessed *b; 1345 1346 for (i = 0; i < blessed_count; i++) { 1347 b = &blessed_list[i]; 1348 if (strcmp(w1->w_description, b->b_lock1) == 0) { 1349 if (strcmp(w2->w_description, b->b_lock2) == 0) 1350 return (1); 1351 continue; 1352 } 1353 if (strcmp(w1->w_description, b->b_lock2) == 0) 1354 if (strcmp(w2->w_description, b->b_lock1) == 0) 1355 return (1); 1356 } 1357 return (0); 1358} 1359 1360static struct witness * 1361witness_get() 1362{ 1363 struct witness *w; 1364 1365 if ((w = w_free) == NULL) { 1366 witness_dead = 1; 1367 mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET); 1368 printf("witness exhausted\n"); 1369 return (NULL); 1370 } 1371 w_free = w->w_next; 1372 bzero(w, sizeof(*w)); 1373 return (w); 1374} 1375 1376static void 1377witness_free(struct witness *w) 1378{ 1379 w->w_next = w_free; 1380 w_free = w; 1381} 1382 1383int 1384witness_list(struct proc *p) 1385{ 1386 struct mtx *m; 1387 int nheld; 1388 1389 nheld = 0; 1390 for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL; 1391 m = LIST_NEXT(m, mtx_held)) { 1392 printf("\t\"%s\" (%p) locked at %s:%d\n", 1393 m->mtx_description, m, 1394 m->mtx_witness->w_file, m->mtx_witness->w_line); 1395 nheld++; 1396 } 1397 1398 return (nheld); 1399} 1400 1401void 1402witness_save(struct mtx *m, const char **filep, int *linep) 1403{ 1404 *filep = m->mtx_witness->w_file; 1405 *linep = m->mtx_witness->w_line; 1406} 1407 1408void 1409witness_restore(struct mtx *m, const char *file, int line) 1410{ 1411 m->mtx_witness->w_file = file; 1412 m->mtx_witness->w_line = line; 1413} 1414 1415#endif /* WITNESS */ 1416