subr_witness.c revision 125348
1/*- 2 * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 3. Berkeley Software Design Inc's name may not be used to endorse or 13 * promote products derived from this software without specific prior 14 * written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $ 29 * and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $ 30 */ 31 32/* 33 * Implementation of the `witness' lock verifier. Originally implemented for 34 * mutexes in BSD/OS. Extended to handle generic lock objects and lock 35 * classes in FreeBSD. 36 */ 37 38/* 39 * Main Entry: witness 40 * Pronunciation: 'wit-n&s 41 * Function: noun 42 * Etymology: Middle English witnesse, from Old English witnes knowledge, 43 * testimony, witness, from 2wit 44 * Date: before 12th century 45 * 1 : attestation of a fact or event : TESTIMONY 46 * 2 : one that gives evidence; specifically : one who testifies in 47 * a cause or before a judicial tribunal 48 * 3 : one asked to be present at a transaction so as to be able to 49 * testify to its having taken place 50 * 4 : one who has personal knowledge of something 51 * 5 a : something serving as evidence or proof : SIGN 52 * b : public affirmation by word or example of usually 53 * religious faith or conviction <the heroic witness to divine 54 * life -- Pilot> 55 * 6 capitalized : a member of the Jehovah's Witnesses 56 */ 57 58/* 59 * Special rules concerning Giant and lock orders: 60 * 61 * 1) Giant must be acquired before any other mutexes. Stated another way, 62 * no other mutex may be held when Giant is acquired. 63 * 64 * 2) Giant must be released when blocking on a sleepable lock. 65 * 66 * This rule is less obvious, but is a result of Giant providing the same 67 * semantics as spl(). Basically, when a thread sleeps, it must release 68 * Giant. When a thread blocks on a sleepable lock, it sleeps. Hence rule 69 * 2). 70 * 71 * 3) Giant may be acquired before or after sleepable locks. 72 * 73 * This rule is also not quite as obvious. Giant may be acquired after 74 * a sleepable lock because it is a non-sleepable lock and non-sleepable 75 * locks may always be acquired while holding a sleepable lock. The second 76 * case, Giant before a sleepable lock, follows from rule 2) above. Suppose 77 * you have two threads T1 and T2 and a sleepable lock X. Suppose that T1 78 * acquires X and blocks on Giant. Then suppose that T2 acquires Giant and 79 * blocks on X. When T2 blocks on X, T2 will release Giant allowing T1 to 80 * execute. Thus, acquiring Giant both before and after a sleepable lock 81 * will not result in a lock order reversal. 82 */ 83 84#include <sys/cdefs.h> 85__FBSDID("$FreeBSD: head/sys/kern/subr_witness.c 125348 2004-02-02 22:15:17Z jhb $"); 86 87#include "opt_ddb.h" 88#include "opt_witness.h" 89 90#include <sys/param.h> 91#include <sys/bus.h> 92#include <sys/kernel.h> 93#include <sys/ktr.h> 94#include <sys/lock.h> 95#include <sys/malloc.h> 96#include <sys/mutex.h> 97#include <sys/proc.h> 98#include <sys/sysctl.h> 99#include <sys/systm.h> 100 101#include <ddb/ddb.h> 102 103#include <machine/stdarg.h> 104 105/* Define this to check for blessed mutexes */ 106#undef BLESSING 107 108#define WITNESS_COUNT 200 109#define WITNESS_CHILDCOUNT (WITNESS_COUNT * 4) 110/* 111 * XXX: This is somewhat bogus, as we assume here that at most 1024 threads 112 * will hold LOCK_NCHILDREN * 2 locks. We handle failure ok, and we should 113 * probably be safe for the most part, but it's still a SWAG. 114 */ 115#define LOCK_CHILDCOUNT (MAXCPU + 1024) * 2 116 117#define WITNESS_NCHILDREN 6 118 119struct witness_child_list_entry; 120 121struct witness { 122 const char *w_name; 123 struct lock_class *w_class; 124 STAILQ_ENTRY(witness) w_list; /* List of all witnesses. */ 125 STAILQ_ENTRY(witness) w_typelist; /* Witnesses of a type. */ 126 struct witness_child_list_entry *w_children; /* Great evilness... */ 127 const char *w_file; 128 int w_line; 129 u_int w_level; 130 u_int w_refcount; 131 u_char w_Giant_squawked:1; 132 u_char w_other_squawked:1; 133 u_char w_same_squawked:1; 134 u_char w_displayed:1; 135}; 136 137struct witness_child_list_entry { 138 struct witness_child_list_entry *wcl_next; 139 struct witness *wcl_children[WITNESS_NCHILDREN]; 140 u_int wcl_count; 141}; 142 143STAILQ_HEAD(witness_list, witness); 144 145#ifdef BLESSING 146struct witness_blessed { 147 const char *b_lock1; 148 const char *b_lock2; 149}; 150#endif 151 152struct witness_order_list_entry { 153 const char *w_name; 154 struct lock_class *w_class; 155}; 156 157#ifdef BLESSING 158static int blessed(struct witness *, struct witness *); 159#endif 160static int depart(struct witness *w); 161static struct witness *enroll(const char *description, 162 struct lock_class *lock_class); 163static int insertchild(struct witness *parent, struct witness *child); 164static int isitmychild(struct witness *parent, struct witness *child); 165static int isitmydescendant(struct witness *parent, struct witness *child); 166static int itismychild(struct witness *parent, struct witness *child); 167static int rebalancetree(struct witness_list *list); 168static void removechild(struct witness *parent, struct witness *child); 169static int reparentchildren(struct witness *newparent, 170 struct witness *oldparent); 171static int sysctl_debug_witness_watch(SYSCTL_HANDLER_ARGS); 172static void witness_displaydescendants(void(*)(const char *fmt, ...), 173 struct witness *, int indent); 174static const char *fixup_filename(const char *file); 175static void witness_leveldescendents(struct witness *parent, int level); 176static void witness_levelall(void); 177static struct witness *witness_get(void); 178static void witness_free(struct witness *m); 179static struct witness_child_list_entry *witness_child_get(void); 180static void witness_child_free(struct witness_child_list_entry *wcl); 181static struct lock_list_entry *witness_lock_list_get(void); 182static void witness_lock_list_free(struct lock_list_entry *lle); 183static struct lock_instance *find_instance(struct lock_list_entry *lock_list, 184 struct lock_object *lock); 185static void witness_list_lock(struct lock_instance *instance); 186#ifdef DDB 187static void witness_list(struct thread *td); 188static void witness_display_list(void(*prnt)(const char *fmt, ...), 189 struct witness_list *list); 190static void witness_display(void(*)(const char *fmt, ...)); 191#endif 192 193MALLOC_DEFINE(M_WITNESS, "witness", "witness structure"); 194 195/* 196 * If set to 0, witness is disabled. If set to 1, witness performs full lock 197 * order checking for all locks. If set to 2 or higher, then witness skips 198 * the full lock order check if the lock being acquired is at a higher level 199 * (i.e. farther down in the tree) than the current lock. This last mode is 200 * somewhat experimental and not considered fully safe. At runtime, this 201 * value may be set to 0 to turn off witness. witness is not allowed be 202 * turned on once it is turned off, however. 203 */ 204static int witness_watch = 1; 205TUNABLE_INT("debug.witness_watch", &witness_watch); 206SYSCTL_PROC(_debug, OID_AUTO, witness_watch, CTLFLAG_RW | CTLTYPE_INT, NULL, 0, 207 sysctl_debug_witness_watch, "I", "witness is watching lock operations"); 208 209#ifdef DDB 210/* 211 * When DDB is enabled and witness_ddb is set to 1, it will cause the system to 212 * drop into kdebug() when: 213 * - a lock heirarchy violation occurs 214 * - locks are held when going to sleep. 215 */ 216#ifdef WITNESS_DDB 217int witness_ddb = 1; 218#else 219int witness_ddb = 0; 220#endif 221TUNABLE_INT("debug.witness_ddb", &witness_ddb); 222SYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, ""); 223 224/* 225 * When DDB is enabled and witness_trace is set to 1, it will cause the system 226 * to print a stack trace: 227 * - a lock heirarchy violation occurs 228 * - locks are held when going to sleep. 229 */ 230int witness_trace = 1; 231TUNABLE_INT("debug.witness_trace", &witness_trace); 232SYSCTL_INT(_debug, OID_AUTO, witness_trace, CTLFLAG_RW, &witness_trace, 0, ""); 233#endif /* DDB */ 234 235#ifdef WITNESS_SKIPSPIN 236int witness_skipspin = 1; 237#else 238int witness_skipspin = 0; 239#endif 240TUNABLE_INT("debug.witness_skipspin", &witness_skipspin); 241SYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RDTUN, &witness_skipspin, 0, 242 ""); 243 244static struct mtx w_mtx; 245static struct witness_list w_free = STAILQ_HEAD_INITIALIZER(w_free); 246static struct witness_list w_all = STAILQ_HEAD_INITIALIZER(w_all); 247static struct witness_list w_spin = STAILQ_HEAD_INITIALIZER(w_spin); 248static struct witness_list w_sleep = STAILQ_HEAD_INITIALIZER(w_sleep); 249static struct witness_child_list_entry *w_child_free = NULL; 250static struct lock_list_entry *w_lock_list_free = NULL; 251 252static struct witness w_data[WITNESS_COUNT]; 253static struct witness_child_list_entry w_childdata[WITNESS_CHILDCOUNT]; 254static struct lock_list_entry w_locklistdata[LOCK_CHILDCOUNT]; 255 256static struct witness_order_list_entry order_lists[] = { 257 { "proctree", &lock_class_sx }, 258 { "allproc", &lock_class_sx }, 259 { "Giant", &lock_class_mtx_sleep }, 260 { "filedesc structure", &lock_class_mtx_sleep }, 261 { "pipe mutex", &lock_class_mtx_sleep }, 262 { "sigio lock", &lock_class_mtx_sleep }, 263 { "process group", &lock_class_mtx_sleep }, 264 { "process lock", &lock_class_mtx_sleep }, 265 { "session", &lock_class_mtx_sleep }, 266 { "uidinfo hash", &lock_class_mtx_sleep }, 267 { "uidinfo struct", &lock_class_mtx_sleep }, 268 { "allprison", &lock_class_mtx_sleep }, 269 { NULL, NULL }, 270 /* 271 * spin locks 272 */ 273#ifdef SMP 274 { "ap boot", &lock_class_mtx_spin }, 275#endif 276 { "sio", &lock_class_mtx_spin }, 277#ifdef __i386__ 278 { "cy", &lock_class_mtx_spin }, 279#endif 280 { "uart_hwmtx", &lock_class_mtx_spin }, 281 { "sabtty", &lock_class_mtx_spin }, 282 { "zstty", &lock_class_mtx_spin }, 283 { "ng_node", &lock_class_mtx_spin }, 284 { "ng_worklist", &lock_class_mtx_spin }, 285 { "taskqueue_fast", &lock_class_mtx_spin }, 286 { "intr table", &lock_class_mtx_spin }, 287 { "ithread table lock", &lock_class_mtx_spin }, 288 { "sched lock", &lock_class_mtx_spin }, 289 { "turnstile chain", &lock_class_mtx_spin }, 290 { "td_contested", &lock_class_mtx_spin }, 291 { "callout", &lock_class_mtx_spin }, 292 { "entropy harvest", &lock_class_mtx_spin }, 293 { "entropy harvest buffers", &lock_class_mtx_spin }, 294 /* 295 * leaf locks 296 */ 297 { "allpmaps", &lock_class_mtx_spin }, 298 { "vm page queue free mutex", &lock_class_mtx_spin }, 299 { "icu", &lock_class_mtx_spin }, 300#ifdef SMP 301 { "smp rendezvous", &lock_class_mtx_spin }, 302#if defined(__i386__) || defined(__amd64__) 303 { "tlb", &lock_class_mtx_spin }, 304 { "lazypmap", &lock_class_mtx_spin }, 305#endif 306#ifdef __sparc64__ 307 { "ipi", &lock_class_mtx_spin }, 308#endif 309#endif 310 { "clk", &lock_class_mtx_spin }, 311 { "mutex profiling lock", &lock_class_mtx_spin }, 312 { "kse zombie lock", &lock_class_mtx_spin }, 313 { "ALD Queue", &lock_class_mtx_spin }, 314#ifdef __ia64__ 315 { "MCA spin lock", &lock_class_mtx_spin }, 316#endif 317#if defined(__i386__) || defined(__amd64__) 318 { "pcicfg", &lock_class_mtx_spin }, 319#endif 320 { NULL, NULL }, 321 { NULL, NULL } 322}; 323 324#ifdef BLESSING 325/* 326 * Pairs of locks which have been blessed 327 * Don't complain about order problems with blessed locks 328 */ 329static struct witness_blessed blessed_list[] = { 330}; 331static int blessed_count = 332 sizeof(blessed_list) / sizeof(struct witness_blessed); 333#endif 334 335/* 336 * List of all locks in the system. 337 */ 338TAILQ_HEAD(, lock_object) all_locks = TAILQ_HEAD_INITIALIZER(all_locks); 339 340static struct mtx all_mtx = { 341 { &lock_class_mtx_sleep, /* mtx_object.lo_class */ 342 "All locks list", /* mtx_object.lo_name */ 343 "All locks list", /* mtx_object.lo_type */ 344 LO_INITIALIZED, /* mtx_object.lo_flags */ 345 { NULL, NULL }, /* mtx_object.lo_list */ 346 NULL }, /* mtx_object.lo_witness */ 347 MTX_UNOWNED, 0 /* mtx_lock, mtx_recurse */ 348}; 349 350/* 351 * This global is set to 0 once it becomes safe to use the witness code. 352 */ 353static int witness_cold = 1; 354 355/* 356 * Global variables for book keeping. 357 */ 358static int lock_cur_cnt; 359static int lock_max_cnt; 360 361/* 362 * The WITNESS-enabled diagnostic code. 363 */ 364static void 365witness_initialize(void *dummy __unused) 366{ 367 struct lock_object *lock; 368 struct witness_order_list_entry *order; 369 struct witness *w, *w1; 370 int i; 371 372 /* 373 * We have to release Giant before initializing its witness 374 * structure so that WITNESS doesn't get confused. 375 */ 376 mtx_unlock(&Giant); 377 mtx_assert(&Giant, MA_NOTOWNED); 378 379 CTR1(KTR_WITNESS, "%s: initializing witness", __func__); 380 TAILQ_INSERT_HEAD(&all_locks, &all_mtx.mtx_object, lo_list); 381 mtx_init(&w_mtx, "witness lock", NULL, MTX_SPIN | MTX_QUIET | 382 MTX_NOWITNESS); 383 for (i = 0; i < WITNESS_COUNT; i++) 384 witness_free(&w_data[i]); 385 for (i = 0; i < WITNESS_CHILDCOUNT; i++) 386 witness_child_free(&w_childdata[i]); 387 for (i = 0; i < LOCK_CHILDCOUNT; i++) 388 witness_lock_list_free(&w_locklistdata[i]); 389 390 /* First add in all the specified order lists. */ 391 for (order = order_lists; order->w_name != NULL; order++) { 392 w = enroll(order->w_name, order->w_class); 393 if (w == NULL) 394 continue; 395 w->w_file = "order list"; 396 for (order++; order->w_name != NULL; order++) { 397 w1 = enroll(order->w_name, order->w_class); 398 if (w1 == NULL) 399 continue; 400 w1->w_file = "order list"; 401 if (!itismychild(w, w1)) 402 panic("Not enough memory for static orders!"); 403 w = w1; 404 } 405 } 406 407 /* Iterate through all locks and add them to witness. */ 408 mtx_lock(&all_mtx); 409 TAILQ_FOREACH(lock, &all_locks, lo_list) { 410 if (lock->lo_flags & LO_WITNESS) 411 lock->lo_witness = enroll(lock->lo_type, 412 lock->lo_class); 413 else 414 lock->lo_witness = NULL; 415 } 416 mtx_unlock(&all_mtx); 417 418 /* Mark the witness code as being ready for use. */ 419 atomic_store_rel_int(&witness_cold, 0); 420 421 mtx_lock(&Giant); 422} 423SYSINIT(witness_init, SI_SUB_WITNESS, SI_ORDER_FIRST, witness_initialize, NULL) 424 425static int 426sysctl_debug_witness_watch(SYSCTL_HANDLER_ARGS) 427{ 428 int error, value; 429 430 value = witness_watch; 431 error = sysctl_handle_int(oidp, &value, 0, req); 432 if (error != 0 || req->newptr == NULL) 433 return (error); 434 error = suser(req->td); 435 if (error != 0) 436 return (error); 437 if (value == witness_watch) 438 return (0); 439 if (value != 0) 440 return (EINVAL); 441 witness_watch = 0; 442 return (0); 443} 444 445void 446witness_init(struct lock_object *lock) 447{ 448 struct lock_class *class; 449 450 class = lock->lo_class; 451 if (lock->lo_flags & LO_INITIALIZED) 452 panic("%s: lock (%s) %s is already initialized", __func__, 453 class->lc_name, lock->lo_name); 454 if ((lock->lo_flags & LO_RECURSABLE) != 0 && 455 (class->lc_flags & LC_RECURSABLE) == 0) 456 panic("%s: lock (%s) %s can not be recursable", __func__, 457 class->lc_name, lock->lo_name); 458 if ((lock->lo_flags & LO_SLEEPABLE) != 0 && 459 (class->lc_flags & LC_SLEEPABLE) == 0) 460 panic("%s: lock (%s) %s can not be sleepable", __func__, 461 class->lc_name, lock->lo_name); 462 if ((lock->lo_flags & LO_UPGRADABLE) != 0 && 463 (class->lc_flags & LC_UPGRADABLE) == 0) 464 panic("%s: lock (%s) %s can not be upgradable", __func__, 465 class->lc_name, lock->lo_name); 466 467 mtx_lock(&all_mtx); 468 TAILQ_INSERT_TAIL(&all_locks, lock, lo_list); 469 lock->lo_flags |= LO_INITIALIZED; 470 lock_cur_cnt++; 471 if (lock_cur_cnt > lock_max_cnt) 472 lock_max_cnt = lock_cur_cnt; 473 mtx_unlock(&all_mtx); 474 if (!witness_cold && witness_watch != 0 && panicstr == NULL && 475 (lock->lo_flags & LO_WITNESS) != 0) 476 lock->lo_witness = enroll(lock->lo_type, class); 477 else 478 lock->lo_witness = NULL; 479} 480 481void 482witness_destroy(struct lock_object *lock) 483{ 484 struct witness *w; 485 486 if (witness_cold) 487 panic("lock (%s) %s destroyed while witness_cold", 488 lock->lo_class->lc_name, lock->lo_name); 489 if ((lock->lo_flags & LO_INITIALIZED) == 0) 490 panic("%s: lock (%s) %s is not initialized", __func__, 491 lock->lo_class->lc_name, lock->lo_name); 492 493 /* XXX: need to verify that no one holds the lock */ 494 w = lock->lo_witness; 495 if (w != NULL) { 496 mtx_lock_spin(&w_mtx); 497 MPASS(w->w_refcount > 0); 498 w->w_refcount--; 499 500 /* 501 * Lock is already released if we have an allocation failure 502 * and depart() fails. 503 */ 504 if (w->w_refcount != 0 || depart(w)) 505 mtx_unlock_spin(&w_mtx); 506 } 507 508 mtx_lock(&all_mtx); 509 lock_cur_cnt--; 510 TAILQ_REMOVE(&all_locks, lock, lo_list); 511 lock->lo_flags &= ~LO_INITIALIZED; 512 mtx_unlock(&all_mtx); 513} 514 515#ifdef DDB 516static void 517witness_display_list(void(*prnt)(const char *fmt, ...), 518 struct witness_list *list) 519{ 520 struct witness *w; 521 522 STAILQ_FOREACH(w, list, w_typelist) { 523 if (w->w_file == NULL || w->w_level > 0) 524 continue; 525 /* 526 * This lock has no anscestors, display its descendants. 527 */ 528 witness_displaydescendants(prnt, w, 0); 529 } 530} 531 532static void 533witness_display(void(*prnt)(const char *fmt, ...)) 534{ 535 struct witness *w; 536 537 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 538 witness_levelall(); 539 540 /* Clear all the displayed flags. */ 541 STAILQ_FOREACH(w, &w_all, w_list) { 542 w->w_displayed = 0; 543 } 544 545 /* 546 * First, handle sleep locks which have been acquired at least 547 * once. 548 */ 549 prnt("Sleep locks:\n"); 550 witness_display_list(prnt, &w_sleep); 551 552 /* 553 * Now do spin locks which have been acquired at least once. 554 */ 555 prnt("\nSpin locks:\n"); 556 witness_display_list(prnt, &w_spin); 557 558 /* 559 * Finally, any locks which have not been acquired yet. 560 */ 561 prnt("\nLocks which were never acquired:\n"); 562 STAILQ_FOREACH(w, &w_all, w_list) { 563 if (w->w_file != NULL || w->w_refcount == 0) 564 continue; 565 prnt("%s\n", w->w_name); 566 } 567} 568#endif /* DDB */ 569 570/* Trim useless garbage from filenames. */ 571static const char * 572fixup_filename(const char *file) 573{ 574 575 if (file == NULL) 576 return (NULL); 577 while (strncmp(file, "../", 3) == 0) 578 file += 3; 579 return (file); 580} 581 582int 583witness_defineorder(struct lock_object *lock1, struct lock_object *lock2) 584{ 585 586 if (witness_watch == 0 || panicstr != NULL) 587 return (0); 588 589 /* Require locks that witness knows about. */ 590 if (lock1 == NULL || lock1->lo_witness == NULL || lock2 == NULL || 591 lock2->lo_witness == NULL) 592 return (EINVAL); 593 594 MPASS(!mtx_owned(&w_mtx)); 595 mtx_lock_spin(&w_mtx); 596 597 /* 598 * If we already have either an explicit or implied lock order that 599 * is the other way around, then return an error. 600 */ 601 if (isitmydescendant(lock2->lo_witness, lock1->lo_witness)) { 602 mtx_unlock_spin(&w_mtx); 603 return (EDOOFUS); 604 } 605 606 /* Try to add the new order. */ 607 CTR3(KTR_WITNESS, "%s: adding %s as a child of %s", __func__, 608 lock2->lo_type, lock1->lo_type); 609 if (!itismychild(lock1->lo_witness, lock2->lo_witness)) 610 return (ENOMEM); 611 mtx_unlock_spin(&w_mtx); 612 return (0); 613} 614 615void 616witness_checkorder(struct lock_object *lock, int flags, const char *file, 617 int line) 618{ 619 struct lock_list_entry **lock_list, *lle; 620 struct lock_instance *lock1, *lock2; 621 struct lock_class *class; 622 struct witness *w, *w1; 623 struct thread *td; 624 int i, j; 625 626 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL || 627 panicstr != NULL) 628 return; 629 630 /* 631 * Try locks do not block if they fail to acquire the lock, thus 632 * there is no danger of deadlocks or of switching while holding a 633 * spin lock if we acquire a lock via a try operation. This 634 * function shouldn't even be called for try locks, so panic if 635 * that happens. 636 */ 637 if (flags & LOP_TRYLOCK) 638 panic("%s should not be called for try lock operations", 639 __func__); 640 641 w = lock->lo_witness; 642 class = lock->lo_class; 643 td = curthread; 644 file = fixup_filename(file); 645 646 if (class->lc_flags & LC_SLEEPLOCK) { 647 /* 648 * Since spin locks include a critical section, this check 649 * impliclty enforces a lock order of all sleep locks before 650 * all spin locks. 651 */ 652 if (td->td_critnest != 0) 653 panic("blockable sleep lock (%s) %s @ %s:%d", 654 class->lc_name, lock->lo_name, file, line); 655 lock_list = &td->td_sleeplocks; 656 } else 657 lock_list = PCPU_PTR(spinlocks); 658 659 /* 660 * Is this the first lock acquired? If so, then no order checking 661 * is needed. 662 */ 663 if (*lock_list == NULL) 664 return; 665 666 /* 667 * Check to see if we are recursing on a lock we already own. If 668 * so, make sure that we don't mismatch exclusive and shared lock 669 * acquires. 670 */ 671 lock1 = find_instance(*lock_list, lock); 672 if (lock1 != NULL) { 673 if ((lock1->li_flags & LI_EXCLUSIVE) != 0 && 674 (flags & LOP_EXCLUSIVE) == 0) { 675 printf("shared lock of (%s) %s @ %s:%d\n", 676 class->lc_name, lock->lo_name, file, line); 677 printf("while exclusively locked from %s:%d\n", 678 lock1->li_file, lock1->li_line); 679 panic("share->excl"); 680 } 681 if ((lock1->li_flags & LI_EXCLUSIVE) == 0 && 682 (flags & LOP_EXCLUSIVE) != 0) { 683 printf("exclusive lock of (%s) %s @ %s:%d\n", 684 class->lc_name, lock->lo_name, file, line); 685 printf("while share locked from %s:%d\n", 686 lock1->li_file, lock1->li_line); 687 panic("excl->share"); 688 } 689 return; 690 } 691 692 /* 693 * Try locks do not block if they fail to acquire the lock, thus 694 * there is no danger of deadlocks or of switching while holding a 695 * spin lock if we acquire a lock via a try operation. 696 */ 697 if (flags & LOP_TRYLOCK) 698 return; 699 700 /* 701 * Check for duplicate locks of the same type. Note that we only 702 * have to check for this on the last lock we just acquired. Any 703 * other cases will be caught as lock order violations. 704 */ 705 lock1 = &(*lock_list)->ll_children[(*lock_list)->ll_count - 1]; 706 w1 = lock1->li_lock->lo_witness; 707 if (w1 == w) { 708 if (w->w_same_squawked || (lock->lo_flags & LO_DUPOK)) 709 return; 710 w->w_same_squawked = 1; 711 printf("acquiring duplicate lock of same type: \"%s\"\n", 712 lock->lo_type); 713 printf(" 1st %s @ %s:%d\n", lock1->li_lock->lo_name, 714 lock1->li_file, lock1->li_line); 715 printf(" 2nd %s @ %s:%d\n", lock->lo_name, file, line); 716#ifdef DDB 717 goto debugger; 718#else 719 return; 720#endif 721 } 722 MPASS(!mtx_owned(&w_mtx)); 723 mtx_lock_spin(&w_mtx); 724 /* 725 * If we have a known higher number just say ok 726 */ 727 if (witness_watch > 1 && w->w_level > w1->w_level) { 728 mtx_unlock_spin(&w_mtx); 729 return; 730 } 731 /* 732 * If we know that the the lock we are acquiring comes after 733 * the lock we most recently acquired in the lock order tree, 734 * then there is no need for any further checks. 735 */ 736 if (isitmydescendant(w1, w)) { 737 mtx_unlock_spin(&w_mtx); 738 return; 739 } 740 for (j = 0, lle = *lock_list; lle != NULL; lle = lle->ll_next) { 741 for (i = lle->ll_count - 1; i >= 0; i--, j++) { 742 743 MPASS(j < WITNESS_COUNT); 744 lock1 = &lle->ll_children[i]; 745 w1 = lock1->li_lock->lo_witness; 746 747 /* 748 * If this lock doesn't undergo witness checking, 749 * then skip it. 750 */ 751 if (w1 == NULL) { 752 KASSERT((lock1->li_lock->lo_flags & LO_WITNESS) == 0, 753 ("lock missing witness structure")); 754 continue; 755 } 756 /* 757 * If we are locking Giant and this is a sleepable 758 * lock, then skip it. 759 */ 760 if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0 && 761 lock == &Giant.mtx_object) 762 continue; 763 /* 764 * If we are locking a sleepable lock and this lock 765 * is Giant, then skip it. 766 */ 767 if ((lock->lo_flags & LO_SLEEPABLE) != 0 && 768 lock1->li_lock == &Giant.mtx_object) 769 continue; 770 /* 771 * If we are locking a sleepable lock and this lock 772 * isn't sleepable, we want to treat it as a lock 773 * order violation to enfore a general lock order of 774 * sleepable locks before non-sleepable locks. 775 */ 776 if (!((lock->lo_flags & LO_SLEEPABLE) != 0 && 777 (lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0)) 778 /* 779 * Check the lock order hierarchy for a reveresal. 780 */ 781 if (!isitmydescendant(w, w1)) 782 continue; 783 /* 784 * We have a lock order violation, check to see if it 785 * is allowed or has already been yelled about. 786 */ 787 mtx_unlock_spin(&w_mtx); 788#ifdef BLESSING 789 /* 790 * If the lock order is blessed, just bail. We don't 791 * look for other lock order violations though, which 792 * may be a bug. 793 */ 794 if (blessed(w, w1)) 795 return; 796#endif 797 if (lock1->li_lock == &Giant.mtx_object) { 798 if (w1->w_Giant_squawked) 799 return; 800 else 801 w1->w_Giant_squawked = 1; 802 } else { 803 if (w1->w_other_squawked) 804 return; 805 else 806 w1->w_other_squawked = 1; 807 } 808 /* 809 * Ok, yell about it. 810 */ 811 printf("lock order reversal\n"); 812 /* 813 * Try to locate an earlier lock with 814 * witness w in our list. 815 */ 816 do { 817 lock2 = &lle->ll_children[i]; 818 MPASS(lock2->li_lock != NULL); 819 if (lock2->li_lock->lo_witness == w) 820 break; 821 if (i == 0 && lle->ll_next != NULL) { 822 lle = lle->ll_next; 823 i = lle->ll_count - 1; 824 MPASS(i >= 0 && i < LOCK_NCHILDREN); 825 } else 826 i--; 827 } while (i >= 0); 828 if (i < 0) { 829 printf(" 1st %p %s (%s) @ %s:%d\n", 830 lock1->li_lock, lock1->li_lock->lo_name, 831 lock1->li_lock->lo_type, lock1->li_file, 832 lock1->li_line); 833 printf(" 2nd %p %s (%s) @ %s:%d\n", lock, 834 lock->lo_name, lock->lo_type, file, line); 835 } else { 836 printf(" 1st %p %s (%s) @ %s:%d\n", 837 lock2->li_lock, lock2->li_lock->lo_name, 838 lock2->li_lock->lo_type, lock2->li_file, 839 lock2->li_line); 840 printf(" 2nd %p %s (%s) @ %s:%d\n", 841 lock1->li_lock, lock1->li_lock->lo_name, 842 lock1->li_lock->lo_type, lock1->li_file, 843 lock1->li_line); 844 printf(" 3rd %p %s (%s) @ %s:%d\n", lock, 845 lock->lo_name, lock->lo_type, file, line); 846 } 847#ifdef DDB 848 goto debugger; 849#else 850 return; 851#endif 852 } 853 } 854 lock1 = &(*lock_list)->ll_children[(*lock_list)->ll_count - 1]; 855 /* 856 * If requested, build a new lock order. However, don't build a new 857 * relationship between a sleepable lock and Giant if it is in the 858 * wrong direction. The correct lock order is that sleepable locks 859 * always come before Giant. 860 */ 861 if (flags & LOP_NEWORDER && 862 !(lock1->li_lock == &Giant.mtx_object && 863 (lock->lo_flags & LO_SLEEPABLE) != 0)) { 864 CTR3(KTR_WITNESS, "%s: adding %s as a child of %s", __func__, 865 lock->lo_type, lock1->li_lock->lo_type); 866 if (!itismychild(lock1->li_lock->lo_witness, w)) 867 /* Witness is dead. */ 868 return; 869 } 870 mtx_unlock_spin(&w_mtx); 871 return; 872 873#ifdef DDB 874debugger: 875 if (witness_trace) 876 backtrace(); 877 if (witness_ddb) 878 Debugger(__func__); 879#endif 880} 881 882void 883witness_lock(struct lock_object *lock, int flags, const char *file, int line) 884{ 885 struct lock_list_entry **lock_list, *lle; 886 struct lock_instance *instance; 887 struct witness *w; 888 struct thread *td; 889 890 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL || 891 panicstr != NULL) 892 return; 893 w = lock->lo_witness; 894 td = curthread; 895 file = fixup_filename(file); 896 897 /* Determine lock list for this lock. */ 898 if (lock->lo_class->lc_flags & LC_SLEEPLOCK) 899 lock_list = &td->td_sleeplocks; 900 else 901 lock_list = PCPU_PTR(spinlocks); 902 903 /* Check to see if we are recursing on a lock we already own. */ 904 instance = find_instance(*lock_list, lock); 905 if (instance != NULL) { 906 instance->li_flags++; 907 CTR4(KTR_WITNESS, "%s: pid %d recursed on %s r=%d", __func__, 908 td->td_proc->p_pid, lock->lo_name, 909 instance->li_flags & LI_RECURSEMASK); 910 instance->li_file = file; 911 instance->li_line = line; 912 return; 913 } 914 915 /* Update per-witness last file and line acquire. */ 916 w->w_file = file; 917 w->w_line = line; 918 919 /* Find the next open lock instance in the list and fill it. */ 920 lle = *lock_list; 921 if (lle == NULL || lle->ll_count == LOCK_NCHILDREN) { 922 lle = witness_lock_list_get(); 923 if (lle == NULL) 924 return; 925 lle->ll_next = *lock_list; 926 CTR3(KTR_WITNESS, "%s: pid %d added lle %p", __func__, 927 td->td_proc->p_pid, lle); 928 *lock_list = lle; 929 } 930 instance = &lle->ll_children[lle->ll_count++]; 931 instance->li_lock = lock; 932 instance->li_line = line; 933 instance->li_file = file; 934 if ((flags & LOP_EXCLUSIVE) != 0) 935 instance->li_flags = LI_EXCLUSIVE; 936 else 937 instance->li_flags = 0; 938 CTR4(KTR_WITNESS, "%s: pid %d added %s as lle[%d]", __func__, 939 td->td_proc->p_pid, lock->lo_name, lle->ll_count - 1); 940} 941 942void 943witness_upgrade(struct lock_object *lock, int flags, const char *file, int line) 944{ 945 struct lock_instance *instance; 946 struct lock_class *class; 947 948 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 949 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 950 return; 951 class = lock->lo_class; 952 file = fixup_filename(file); 953 if ((lock->lo_flags & LO_UPGRADABLE) == 0) 954 panic("upgrade of non-upgradable lock (%s) %s @ %s:%d", 955 class->lc_name, lock->lo_name, file, line); 956 if ((flags & LOP_TRYLOCK) == 0) 957 panic("non-try upgrade of lock (%s) %s @ %s:%d", class->lc_name, 958 lock->lo_name, file, line); 959 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0) 960 panic("upgrade of non-sleep lock (%s) %s @ %s:%d", 961 class->lc_name, lock->lo_name, file, line); 962 instance = find_instance(curthread->td_sleeplocks, lock); 963 if (instance == NULL) 964 panic("upgrade of unlocked lock (%s) %s @ %s:%d", 965 class->lc_name, lock->lo_name, file, line); 966 if ((instance->li_flags & LI_EXCLUSIVE) != 0) 967 panic("upgrade of exclusive lock (%s) %s @ %s:%d", 968 class->lc_name, lock->lo_name, file, line); 969 if ((instance->li_flags & LI_RECURSEMASK) != 0) 970 panic("upgrade of recursed lock (%s) %s r=%d @ %s:%d", 971 class->lc_name, lock->lo_name, 972 instance->li_flags & LI_RECURSEMASK, file, line); 973 instance->li_flags |= LI_EXCLUSIVE; 974} 975 976void 977witness_downgrade(struct lock_object *lock, int flags, const char *file, 978 int line) 979{ 980 struct lock_instance *instance; 981 struct lock_class *class; 982 983 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 984 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 985 return; 986 class = lock->lo_class; 987 file = fixup_filename(file); 988 if ((lock->lo_flags & LO_UPGRADABLE) == 0) 989 panic("downgrade of non-upgradable lock (%s) %s @ %s:%d", 990 class->lc_name, lock->lo_name, file, line); 991 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0) 992 panic("downgrade of non-sleep lock (%s) %s @ %s:%d", 993 class->lc_name, lock->lo_name, file, line); 994 instance = find_instance(curthread->td_sleeplocks, lock); 995 if (instance == NULL) 996 panic("downgrade of unlocked lock (%s) %s @ %s:%d", 997 class->lc_name, lock->lo_name, file, line); 998 if ((instance->li_flags & LI_EXCLUSIVE) == 0) 999 panic("downgrade of shared lock (%s) %s @ %s:%d", 1000 class->lc_name, lock->lo_name, file, line); 1001 if ((instance->li_flags & LI_RECURSEMASK) != 0) 1002 panic("downgrade of recursed lock (%s) %s r=%d @ %s:%d", 1003 class->lc_name, lock->lo_name, 1004 instance->li_flags & LI_RECURSEMASK, file, line); 1005 instance->li_flags &= ~LI_EXCLUSIVE; 1006} 1007 1008void 1009witness_unlock(struct lock_object *lock, int flags, const char *file, int line) 1010{ 1011 struct lock_list_entry **lock_list, *lle; 1012 struct lock_instance *instance; 1013 struct lock_class *class; 1014 struct thread *td; 1015 register_t s; 1016 int i, j; 1017 1018 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL || 1019 panicstr != NULL) 1020 return; 1021 td = curthread; 1022 class = lock->lo_class; 1023 file = fixup_filename(file); 1024 1025 /* Find lock instance associated with this lock. */ 1026 if (class->lc_flags & LC_SLEEPLOCK) 1027 lock_list = &td->td_sleeplocks; 1028 else 1029 lock_list = PCPU_PTR(spinlocks); 1030 for (; *lock_list != NULL; lock_list = &(*lock_list)->ll_next) 1031 for (i = 0; i < (*lock_list)->ll_count; i++) { 1032 instance = &(*lock_list)->ll_children[i]; 1033 if (instance->li_lock == lock) 1034 goto found; 1035 } 1036 panic("lock (%s) %s not locked @ %s:%d", class->lc_name, lock->lo_name, 1037 file, line); 1038found: 1039 1040 /* First, check for shared/exclusive mismatches. */ 1041 if ((instance->li_flags & LI_EXCLUSIVE) != 0 && 1042 (flags & LOP_EXCLUSIVE) == 0) { 1043 printf("shared unlock of (%s) %s @ %s:%d\n", class->lc_name, 1044 lock->lo_name, file, line); 1045 printf("while exclusively locked from %s:%d\n", 1046 instance->li_file, instance->li_line); 1047 panic("excl->ushare"); 1048 } 1049 if ((instance->li_flags & LI_EXCLUSIVE) == 0 && 1050 (flags & LOP_EXCLUSIVE) != 0) { 1051 printf("exclusive unlock of (%s) %s @ %s:%d\n", class->lc_name, 1052 lock->lo_name, file, line); 1053 printf("while share locked from %s:%d\n", instance->li_file, 1054 instance->li_line); 1055 panic("share->uexcl"); 1056 } 1057 1058 /* If we are recursed, unrecurse. */ 1059 if ((instance->li_flags & LI_RECURSEMASK) > 0) { 1060 CTR4(KTR_WITNESS, "%s: pid %d unrecursed on %s r=%d", __func__, 1061 td->td_proc->p_pid, instance->li_lock->lo_name, 1062 instance->li_flags); 1063 instance->li_flags--; 1064 return; 1065 } 1066 1067 /* Otherwise, remove this item from the list. */ 1068 s = intr_disable(); 1069 CTR4(KTR_WITNESS, "%s: pid %d removed %s from lle[%d]", __func__, 1070 td->td_proc->p_pid, instance->li_lock->lo_name, 1071 (*lock_list)->ll_count - 1); 1072 for (j = i; j < (*lock_list)->ll_count - 1; j++) 1073 (*lock_list)->ll_children[j] = 1074 (*lock_list)->ll_children[j + 1]; 1075 (*lock_list)->ll_count--; 1076 intr_restore(s); 1077 1078 /* If this lock list entry is now empty, free it. */ 1079 if ((*lock_list)->ll_count == 0) { 1080 lle = *lock_list; 1081 *lock_list = lle->ll_next; 1082 CTR3(KTR_WITNESS, "%s: pid %d removed lle %p", __func__, 1083 td->td_proc->p_pid, lle); 1084 witness_lock_list_free(lle); 1085 } 1086} 1087 1088/* 1089 * Warn if any locks other than 'lock' are held. Flags can be passed in to 1090 * exempt Giant and sleepable locks from the checks as well. If any 1091 * non-exempt locks are held, then a supplied message is printed to the 1092 * console along with a list of the offending locks. If indicated in the 1093 * flags then a failure results in a panic as well. 1094 */ 1095int 1096witness_warn(int flags, struct lock_object *lock, const char *fmt, ...) 1097{ 1098 struct lock_list_entry *lle; 1099 struct lock_instance *lock1; 1100 struct thread *td; 1101 va_list ap; 1102 int i, n; 1103 1104 if (witness_cold || witness_watch == 0 || panicstr != NULL) 1105 return (0); 1106 n = 0; 1107 td = curthread; 1108 for (lle = td->td_sleeplocks; lle != NULL; lle = lle->ll_next) 1109 for (i = lle->ll_count - 1; i >= 0; i--) { 1110 lock1 = &lle->ll_children[i]; 1111 if (lock1->li_lock == lock) 1112 continue; 1113 if (flags & WARN_GIANTOK && 1114 lock1->li_lock == &Giant.mtx_object) 1115 continue; 1116 if (flags & WARN_SLEEPOK && 1117 (lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0) 1118 continue; 1119 if (n == 0) { 1120 va_start(ap, fmt); 1121 vprintf(fmt, ap); 1122 va_end(ap); 1123 printf(" with the following"); 1124 if (flags & WARN_SLEEPOK) 1125 printf(" non-sleepable"); 1126 printf(" locks held:\n"); 1127 } 1128 n++; 1129 witness_list_lock(lock1); 1130 } 1131 if (PCPU_GET(spinlocks) != NULL) { 1132 /* 1133 * Since we already hold a spinlock preemption is 1134 * already blocked. 1135 */ 1136 if (n == 0) { 1137 va_start(ap, fmt); 1138 vprintf(fmt, ap); 1139 va_end(ap); 1140 printf(" with the following"); 1141 if (flags & WARN_SLEEPOK) 1142 printf(" non-sleepable"); 1143 printf(" locks held:\n"); 1144 } 1145 n += witness_list_locks(PCPU_PTR(spinlocks)); 1146 } 1147 if (flags & WARN_PANIC && n) 1148 panic("witness_warn"); 1149#ifdef DDB 1150 else if (witness_ddb && n) 1151 Debugger(__func__); 1152#endif 1153 return (n); 1154} 1155 1156const char * 1157witness_file(struct lock_object *lock) 1158{ 1159 struct witness *w; 1160 1161 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL) 1162 return ("?"); 1163 w = lock->lo_witness; 1164 return (w->w_file); 1165} 1166 1167int 1168witness_line(struct lock_object *lock) 1169{ 1170 struct witness *w; 1171 1172 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL) 1173 return (0); 1174 w = lock->lo_witness; 1175 return (w->w_line); 1176} 1177 1178static struct witness * 1179enroll(const char *description, struct lock_class *lock_class) 1180{ 1181 struct witness *w; 1182 1183 KASSERT(!witness_cold, ("enroll called too early")); 1184 if (witness_watch == 0 || panicstr != NULL) 1185 return (NULL); 1186 if ((lock_class->lc_flags & LC_SPINLOCK) && witness_skipspin) 1187 return (NULL); 1188 mtx_lock_spin(&w_mtx); 1189 STAILQ_FOREACH(w, &w_all, w_list) { 1190 if (w->w_name == description || (w->w_refcount > 0 && 1191 strcmp(description, w->w_name) == 0)) { 1192 w->w_refcount++; 1193 mtx_unlock_spin(&w_mtx); 1194 if (lock_class != w->w_class) 1195 panic( 1196 "lock (%s) %s does not match earlier (%s) lock", 1197 description, lock_class->lc_name, 1198 w->w_class->lc_name); 1199 return (w); 1200 } 1201 } 1202 /* 1203 * This isn't quite right, as witness_cold is still 0 while we 1204 * enroll all the locks initialized before witness_initialize(). 1205 */ 1206 if ((lock_class->lc_flags & LC_SPINLOCK) && !witness_cold) { 1207 mtx_unlock_spin(&w_mtx); 1208 panic("spin lock %s not in order list", description); 1209 } 1210 if ((w = witness_get()) == NULL) 1211 return (NULL); 1212 w->w_name = description; 1213 w->w_class = lock_class; 1214 w->w_refcount = 1; 1215 STAILQ_INSERT_HEAD(&w_all, w, w_list); 1216 if (lock_class->lc_flags & LC_SPINLOCK) 1217 STAILQ_INSERT_HEAD(&w_spin, w, w_typelist); 1218 else if (lock_class->lc_flags & LC_SLEEPLOCK) 1219 STAILQ_INSERT_HEAD(&w_sleep, w, w_typelist); 1220 else { 1221 mtx_unlock_spin(&w_mtx); 1222 panic("lock class %s is not sleep or spin", 1223 lock_class->lc_name); 1224 } 1225 mtx_unlock_spin(&w_mtx); 1226 return (w); 1227} 1228 1229/* Don't let the door bang you on the way out... */ 1230static int 1231depart(struct witness *w) 1232{ 1233 struct witness_child_list_entry *wcl, *nwcl; 1234 struct witness_list *list; 1235 struct witness *parent; 1236 1237 MPASS(w->w_refcount == 0); 1238 if (w->w_class->lc_flags & LC_SLEEPLOCK) 1239 list = &w_sleep; 1240 else 1241 list = &w_spin; 1242 /* 1243 * First, we run through the entire tree looking for any 1244 * witnesses that the outgoing witness is a child of. For 1245 * each parent that we find, we reparent all the direct 1246 * children of the outgoing witness to its parent. 1247 */ 1248 STAILQ_FOREACH(parent, list, w_typelist) { 1249 if (!isitmychild(parent, w)) 1250 continue; 1251 removechild(parent, w); 1252 if (!reparentchildren(parent, w)) 1253 return (0); 1254 } 1255 1256 /* 1257 * Now we go through and free up the child list of the 1258 * outgoing witness. 1259 */ 1260 for (wcl = w->w_children; wcl != NULL; wcl = nwcl) { 1261 nwcl = wcl->wcl_next; 1262 witness_child_free(wcl); 1263 } 1264 1265 /* 1266 * Detach from various lists and free. 1267 */ 1268 STAILQ_REMOVE(list, w, witness, w_typelist); 1269 STAILQ_REMOVE(&w_all, w, witness, w_list); 1270 witness_free(w); 1271 1272 /* Finally, fixup the tree. */ 1273 return (rebalancetree(list)); 1274} 1275 1276/* 1277 * Prune an entire lock order tree. We look for cases where a lock 1278 * is now both a descendant and a direct child of a given lock. In 1279 * that case, we want to remove the direct child link from the tree. 1280 * 1281 * Returns false if insertchild() fails. 1282 */ 1283static int 1284rebalancetree(struct witness_list *list) 1285{ 1286 struct witness *child, *parent; 1287 1288 STAILQ_FOREACH(child, list, w_typelist) { 1289 STAILQ_FOREACH(parent, list, w_typelist) { 1290 if (!isitmychild(parent, child)) 1291 continue; 1292 removechild(parent, child); 1293 if (isitmydescendant(parent, child)) 1294 continue; 1295 if (!insertchild(parent, child)) 1296 return (0); 1297 } 1298 } 1299 witness_levelall(); 1300 return (1); 1301} 1302 1303/* 1304 * Add "child" as a direct child of "parent". Returns false if 1305 * we fail due to out of memory. 1306 */ 1307static int 1308insertchild(struct witness *parent, struct witness *child) 1309{ 1310 struct witness_child_list_entry **wcl; 1311 1312 MPASS(child != NULL && parent != NULL); 1313 1314 /* 1315 * Insert "child" after "parent" 1316 */ 1317 wcl = &parent->w_children; 1318 while (*wcl != NULL && (*wcl)->wcl_count == WITNESS_NCHILDREN) 1319 wcl = &(*wcl)->wcl_next; 1320 if (*wcl == NULL) { 1321 *wcl = witness_child_get(); 1322 if (*wcl == NULL) 1323 return (0); 1324 } 1325 (*wcl)->wcl_children[(*wcl)->wcl_count++] = child; 1326 1327 return (1); 1328} 1329 1330/* 1331 * Make all the direct descendants of oldparent be direct descendants 1332 * of newparent. 1333 */ 1334static int 1335reparentchildren(struct witness *newparent, struct witness *oldparent) 1336{ 1337 struct witness_child_list_entry *wcl; 1338 int i; 1339 1340 /* Avoid making a witness a child of itself. */ 1341 MPASS(!isitmychild(oldparent, newparent)); 1342 1343 for (wcl = oldparent->w_children; wcl != NULL; wcl = wcl->wcl_next) 1344 for (i = 0; i < wcl->wcl_count; i++) 1345 if (!insertchild(newparent, wcl->wcl_children[i])) 1346 return (0); 1347 return (1); 1348} 1349 1350static int 1351itismychild(struct witness *parent, struct witness *child) 1352{ 1353 struct witness_list *list; 1354 1355 MPASS(child != NULL && parent != NULL); 1356 if ((parent->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)) != 1357 (child->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK))) 1358 panic( 1359 "%s: parent (%s) and child (%s) are not the same lock type", 1360 __func__, parent->w_class->lc_name, 1361 child->w_class->lc_name); 1362 1363 if (!insertchild(parent, child)) 1364 return (0); 1365 1366 if (parent->w_class->lc_flags & LC_SLEEPLOCK) 1367 list = &w_sleep; 1368 else 1369 list = &w_spin; 1370 return (rebalancetree(list)); 1371} 1372 1373static void 1374removechild(struct witness *parent, struct witness *child) 1375{ 1376 struct witness_child_list_entry **wcl, *wcl1; 1377 int i; 1378 1379 for (wcl = &parent->w_children; *wcl != NULL; wcl = &(*wcl)->wcl_next) 1380 for (i = 0; i < (*wcl)->wcl_count; i++) 1381 if ((*wcl)->wcl_children[i] == child) 1382 goto found; 1383 return; 1384found: 1385 (*wcl)->wcl_count--; 1386 if ((*wcl)->wcl_count > i) 1387 (*wcl)->wcl_children[i] = 1388 (*wcl)->wcl_children[(*wcl)->wcl_count]; 1389 MPASS((*wcl)->wcl_children[i] != NULL); 1390 if ((*wcl)->wcl_count != 0) 1391 return; 1392 wcl1 = *wcl; 1393 *wcl = wcl1->wcl_next; 1394 witness_child_free(wcl1); 1395} 1396 1397static int 1398isitmychild(struct witness *parent, struct witness *child) 1399{ 1400 struct witness_child_list_entry *wcl; 1401 int i; 1402 1403 for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) { 1404 for (i = 0; i < wcl->wcl_count; i++) { 1405 if (wcl->wcl_children[i] == child) 1406 return (1); 1407 } 1408 } 1409 return (0); 1410} 1411 1412static int 1413isitmydescendant(struct witness *parent, struct witness *child) 1414{ 1415 struct witness_child_list_entry *wcl; 1416 int i, j; 1417 1418 if (isitmychild(parent, child)) 1419 return (1); 1420 j = 0; 1421 for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) { 1422 MPASS(j < 1000); 1423 for (i = 0; i < wcl->wcl_count; i++) { 1424 if (isitmydescendant(wcl->wcl_children[i], child)) 1425 return (1); 1426 } 1427 j++; 1428 } 1429 return (0); 1430} 1431 1432static void 1433witness_levelall (void) 1434{ 1435 struct witness_list *list; 1436 struct witness *w, *w1; 1437 1438 /* 1439 * First clear all levels. 1440 */ 1441 STAILQ_FOREACH(w, &w_all, w_list) { 1442 w->w_level = 0; 1443 } 1444 1445 /* 1446 * Look for locks with no parent and level all their descendants. 1447 */ 1448 STAILQ_FOREACH(w, &w_all, w_list) { 1449 /* 1450 * This is just an optimization, technically we could get 1451 * away just walking the all list each time. 1452 */ 1453 if (w->w_class->lc_flags & LC_SLEEPLOCK) 1454 list = &w_sleep; 1455 else 1456 list = &w_spin; 1457 STAILQ_FOREACH(w1, list, w_typelist) { 1458 if (isitmychild(w1, w)) 1459 goto skip; 1460 } 1461 witness_leveldescendents(w, 0); 1462 skip: 1463 ; /* silence GCC 3.x */ 1464 } 1465} 1466 1467static void 1468witness_leveldescendents(struct witness *parent, int level) 1469{ 1470 struct witness_child_list_entry *wcl; 1471 int i; 1472 1473 if (parent->w_level < level) 1474 parent->w_level = level; 1475 level++; 1476 for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) 1477 for (i = 0; i < wcl->wcl_count; i++) 1478 witness_leveldescendents(wcl->wcl_children[i], level); 1479} 1480 1481static void 1482witness_displaydescendants(void(*prnt)(const char *fmt, ...), 1483 struct witness *parent, int indent) 1484{ 1485 struct witness_child_list_entry *wcl; 1486 int i, level; 1487 1488 level = parent->w_level; 1489 prnt("%-2d", level); 1490 for (i = 0; i < indent; i++) 1491 prnt(" "); 1492 if (parent->w_refcount > 0) 1493 prnt("%s", parent->w_name); 1494 else 1495 prnt("(dead)"); 1496 if (parent->w_displayed) { 1497 prnt(" -- (already displayed)\n"); 1498 return; 1499 } 1500 parent->w_displayed = 1; 1501 if (parent->w_refcount > 0) { 1502 if (parent->w_file != NULL) 1503 prnt(" -- last acquired @ %s:%d", parent->w_file, 1504 parent->w_line); 1505 } 1506 prnt("\n"); 1507 for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) 1508 for (i = 0; i < wcl->wcl_count; i++) 1509 witness_displaydescendants(prnt, 1510 wcl->wcl_children[i], indent + 1); 1511} 1512 1513#ifdef BLESSING 1514static int 1515blessed(struct witness *w1, struct witness *w2) 1516{ 1517 int i; 1518 struct witness_blessed *b; 1519 1520 for (i = 0; i < blessed_count; i++) { 1521 b = &blessed_list[i]; 1522 if (strcmp(w1->w_name, b->b_lock1) == 0) { 1523 if (strcmp(w2->w_name, b->b_lock2) == 0) 1524 return (1); 1525 continue; 1526 } 1527 if (strcmp(w1->w_name, b->b_lock2) == 0) 1528 if (strcmp(w2->w_name, b->b_lock1) == 0) 1529 return (1); 1530 } 1531 return (0); 1532} 1533#endif 1534 1535static struct witness * 1536witness_get(void) 1537{ 1538 struct witness *w; 1539 1540 if (witness_watch == 0) { 1541 mtx_unlock_spin(&w_mtx); 1542 return (NULL); 1543 } 1544 if (STAILQ_EMPTY(&w_free)) { 1545 witness_watch = 0; 1546 mtx_unlock_spin(&w_mtx); 1547 printf("%s: witness exhausted\n", __func__); 1548 return (NULL); 1549 } 1550 w = STAILQ_FIRST(&w_free); 1551 STAILQ_REMOVE_HEAD(&w_free, w_list); 1552 bzero(w, sizeof(*w)); 1553 return (w); 1554} 1555 1556static void 1557witness_free(struct witness *w) 1558{ 1559 1560 STAILQ_INSERT_HEAD(&w_free, w, w_list); 1561} 1562 1563static struct witness_child_list_entry * 1564witness_child_get(void) 1565{ 1566 struct witness_child_list_entry *wcl; 1567 1568 if (witness_watch == 0) { 1569 mtx_unlock_spin(&w_mtx); 1570 return (NULL); 1571 } 1572 wcl = w_child_free; 1573 if (wcl == NULL) { 1574 witness_watch = 0; 1575 mtx_unlock_spin(&w_mtx); 1576 printf("%s: witness exhausted\n", __func__); 1577 return (NULL); 1578 } 1579 w_child_free = wcl->wcl_next; 1580 bzero(wcl, sizeof(*wcl)); 1581 return (wcl); 1582} 1583 1584static void 1585witness_child_free(struct witness_child_list_entry *wcl) 1586{ 1587 1588 wcl->wcl_next = w_child_free; 1589 w_child_free = wcl; 1590} 1591 1592static struct lock_list_entry * 1593witness_lock_list_get(void) 1594{ 1595 struct lock_list_entry *lle; 1596 1597 if (witness_watch == 0) 1598 return (NULL); 1599 mtx_lock_spin(&w_mtx); 1600 lle = w_lock_list_free; 1601 if (lle == NULL) { 1602 witness_watch = 0; 1603 mtx_unlock_spin(&w_mtx); 1604 printf("%s: witness exhausted\n", __func__); 1605 return (NULL); 1606 } 1607 w_lock_list_free = lle->ll_next; 1608 mtx_unlock_spin(&w_mtx); 1609 bzero(lle, sizeof(*lle)); 1610 return (lle); 1611} 1612 1613static void 1614witness_lock_list_free(struct lock_list_entry *lle) 1615{ 1616 1617 mtx_lock_spin(&w_mtx); 1618 lle->ll_next = w_lock_list_free; 1619 w_lock_list_free = lle; 1620 mtx_unlock_spin(&w_mtx); 1621} 1622 1623static struct lock_instance * 1624find_instance(struct lock_list_entry *lock_list, struct lock_object *lock) 1625{ 1626 struct lock_list_entry *lle; 1627 struct lock_instance *instance; 1628 int i; 1629 1630 for (lle = lock_list; lle != NULL; lle = lle->ll_next) 1631 for (i = lle->ll_count - 1; i >= 0; i--) { 1632 instance = &lle->ll_children[i]; 1633 if (instance->li_lock == lock) 1634 return (instance); 1635 } 1636 return (NULL); 1637} 1638 1639static void 1640witness_list_lock(struct lock_instance *instance) 1641{ 1642 struct lock_object *lock; 1643 1644 lock = instance->li_lock; 1645 printf("%s %s %s", (instance->li_flags & LI_EXCLUSIVE) != 0 ? 1646 "exclusive" : "shared", lock->lo_class->lc_name, lock->lo_name); 1647 if (lock->lo_type != lock->lo_name) 1648 printf(" (%s)", lock->lo_type); 1649 printf(" r = %d (%p) locked @ %s:%d\n", 1650 instance->li_flags & LI_RECURSEMASK, lock, instance->li_file, 1651 instance->li_line); 1652} 1653 1654int 1655witness_list_locks(struct lock_list_entry **lock_list) 1656{ 1657 struct lock_list_entry *lle; 1658 int i, nheld; 1659 1660 nheld = 0; 1661 for (lle = *lock_list; lle != NULL; lle = lle->ll_next) 1662 for (i = lle->ll_count - 1; i >= 0; i--) { 1663 witness_list_lock(&lle->ll_children[i]); 1664 nheld++; 1665 } 1666 return (nheld); 1667} 1668 1669/* 1670 * This is a bit risky at best. We call this function when we have timed 1671 * out acquiring a spin lock, and we assume that the other CPU is stuck 1672 * with this lock held. So, we go groveling around in the other CPU's 1673 * per-cpu data to try to find the lock instance for this spin lock to 1674 * see when it was last acquired. 1675 */ 1676void 1677witness_display_spinlock(struct lock_object *lock, struct thread *owner) 1678{ 1679 struct lock_instance *instance; 1680 struct pcpu *pc; 1681 1682 if (owner->td_critnest == 0 || owner->td_oncpu == NOCPU) 1683 return; 1684 pc = pcpu_find(owner->td_oncpu); 1685 instance = find_instance(pc->pc_spinlocks, lock); 1686 if (instance != NULL) 1687 witness_list_lock(instance); 1688} 1689 1690void 1691witness_save(struct lock_object *lock, const char **filep, int *linep) 1692{ 1693 struct lock_instance *instance; 1694 1695 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1696 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1697 return; 1698 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0) 1699 panic("%s: lock (%s) %s is not a sleep lock", __func__, 1700 lock->lo_class->lc_name, lock->lo_name); 1701 instance = find_instance(curthread->td_sleeplocks, lock); 1702 if (instance == NULL) 1703 panic("%s: lock (%s) %s not locked", __func__, 1704 lock->lo_class->lc_name, lock->lo_name); 1705 *filep = instance->li_file; 1706 *linep = instance->li_line; 1707} 1708 1709void 1710witness_restore(struct lock_object *lock, const char *file, int line) 1711{ 1712 struct lock_instance *instance; 1713 1714 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1715 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1716 return; 1717 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0) 1718 panic("%s: lock (%s) %s is not a sleep lock", __func__, 1719 lock->lo_class->lc_name, lock->lo_name); 1720 instance = find_instance(curthread->td_sleeplocks, lock); 1721 if (instance == NULL) 1722 panic("%s: lock (%s) %s not locked", __func__, 1723 lock->lo_class->lc_name, lock->lo_name); 1724 lock->lo_witness->w_file = file; 1725 lock->lo_witness->w_line = line; 1726 instance->li_file = file; 1727 instance->li_line = line; 1728} 1729 1730void 1731witness_assert(struct lock_object *lock, int flags, const char *file, int line) 1732{ 1733#ifdef INVARIANT_SUPPORT 1734 struct lock_instance *instance; 1735 1736 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1737 return; 1738 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) != 0) 1739 instance = find_instance(curthread->td_sleeplocks, lock); 1740 else if ((lock->lo_class->lc_flags & LC_SPINLOCK) != 0) 1741 instance = find_instance(PCPU_GET(spinlocks), lock); 1742 else { 1743 panic("Lock (%s) %s is not sleep or spin!", 1744 lock->lo_class->lc_name, lock->lo_name); 1745 } 1746 file = fixup_filename(file); 1747 switch (flags) { 1748 case LA_UNLOCKED: 1749 if (instance != NULL) 1750 panic("Lock (%s) %s locked @ %s:%d.", 1751 lock->lo_class->lc_name, lock->lo_name, file, line); 1752 break; 1753 case LA_LOCKED: 1754 case LA_LOCKED | LA_RECURSED: 1755 case LA_LOCKED | LA_NOTRECURSED: 1756 case LA_SLOCKED: 1757 case LA_SLOCKED | LA_RECURSED: 1758 case LA_SLOCKED | LA_NOTRECURSED: 1759 case LA_XLOCKED: 1760 case LA_XLOCKED | LA_RECURSED: 1761 case LA_XLOCKED | LA_NOTRECURSED: 1762 if (instance == NULL) { 1763 panic("Lock (%s) %s not locked @ %s:%d.", 1764 lock->lo_class->lc_name, lock->lo_name, file, line); 1765 break; 1766 } 1767 if ((flags & LA_XLOCKED) != 0 && 1768 (instance->li_flags & LI_EXCLUSIVE) == 0) 1769 panic("Lock (%s) %s not exclusively locked @ %s:%d.", 1770 lock->lo_class->lc_name, lock->lo_name, file, line); 1771 if ((flags & LA_SLOCKED) != 0 && 1772 (instance->li_flags & LI_EXCLUSIVE) != 0) 1773 panic("Lock (%s) %s exclusively locked @ %s:%d.", 1774 lock->lo_class->lc_name, lock->lo_name, file, line); 1775 if ((flags & LA_RECURSED) != 0 && 1776 (instance->li_flags & LI_RECURSEMASK) == 0) 1777 panic("Lock (%s) %s not recursed @ %s:%d.", 1778 lock->lo_class->lc_name, lock->lo_name, file, line); 1779 if ((flags & LA_NOTRECURSED) != 0 && 1780 (instance->li_flags & LI_RECURSEMASK) != 0) 1781 panic("Lock (%s) %s recursed @ %s:%d.", 1782 lock->lo_class->lc_name, lock->lo_name, file, line); 1783 break; 1784 default: 1785 panic("Invalid lock assertion at %s:%d.", file, line); 1786 1787 } 1788#endif /* INVARIANT_SUPPORT */ 1789} 1790 1791#ifdef DDB 1792static void 1793witness_list(struct thread *td) 1794{ 1795 1796 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1797 KASSERT(db_active, ("%s: not in the debugger", __func__)); 1798 1799 if (witness_watch == 0) 1800 return; 1801 1802 witness_list_locks(&td->td_sleeplocks); 1803 1804 /* 1805 * We only handle spinlocks if td == curthread. This is somewhat broken 1806 * if td is currently executing on some other CPU and holds spin locks 1807 * as we won't display those locks. If we had a MI way of getting 1808 * the per-cpu data for a given cpu then we could use 1809 * td->td_oncpu to get the list of spinlocks for this thread 1810 * and "fix" this. 1811 * 1812 * That still wouldn't really fix this unless we locked sched_lock 1813 * or stopped the other CPU to make sure it wasn't changing the list 1814 * out from under us. It is probably best to just not try to handle 1815 * threads on other CPU's for now. 1816 */ 1817 if (td == curthread && PCPU_GET(spinlocks) != NULL) 1818 witness_list_locks(PCPU_PTR(spinlocks)); 1819} 1820 1821DB_SHOW_COMMAND(locks, db_witness_list) 1822{ 1823 struct thread *td; 1824 pid_t pid; 1825 struct proc *p; 1826 1827 if (have_addr) { 1828 pid = (addr % 16) + ((addr >> 4) % 16) * 10 + 1829 ((addr >> 8) % 16) * 100 + ((addr >> 12) % 16) * 1000 + 1830 ((addr >> 16) % 16) * 10000; 1831 /* sx_slock(&allproc_lock); */ 1832 FOREACH_PROC_IN_SYSTEM(p) { 1833 if (p->p_pid == pid) 1834 break; 1835 } 1836 /* sx_sunlock(&allproc_lock); */ 1837 if (p == NULL) { 1838 db_printf("pid %d not found\n", pid); 1839 return; 1840 } 1841 FOREACH_THREAD_IN_PROC(p, td) { 1842 witness_list(td); 1843 } 1844 } else { 1845 td = curthread; 1846 witness_list(td); 1847 } 1848} 1849 1850DB_SHOW_COMMAND(witness, db_witness_display) 1851{ 1852 1853 witness_display(db_printf); 1854} 1855#endif 1856