subr_witness.c revision 136374
1/*- 2 * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 3. Berkeley Software Design Inc's name may not be used to endorse or 13 * promote products derived from this software without specific prior 14 * written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $ 29 * and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $ 30 */ 31 32/* 33 * Implementation of the `witness' lock verifier. Originally implemented for 34 * mutexes in BSD/OS. Extended to handle generic lock objects and lock 35 * classes in FreeBSD. 36 */ 37 38/* 39 * Main Entry: witness 40 * Pronunciation: 'wit-n&s 41 * Function: noun 42 * Etymology: Middle English witnesse, from Old English witnes knowledge, 43 * testimony, witness, from 2wit 44 * Date: before 12th century 45 * 1 : attestation of a fact or event : TESTIMONY 46 * 2 : one that gives evidence; specifically : one who testifies in 47 * a cause or before a judicial tribunal 48 * 3 : one asked to be present at a transaction so as to be able to 49 * testify to its having taken place 50 * 4 : one who has personal knowledge of something 51 * 5 a : something serving as evidence or proof : SIGN 52 * b : public affirmation by word or example of usually 53 * religious faith or conviction <the heroic witness to divine 54 * life -- Pilot> 55 * 6 capitalized : a member of the Jehovah's Witnesses 56 */ 57 58/* 59 * Special rules concerning Giant and lock orders: 60 * 61 * 1) Giant must be acquired before any other mutexes. Stated another way, 62 * no other mutex may be held when Giant is acquired. 63 * 64 * 2) Giant must be released when blocking on a sleepable lock. 65 * 66 * This rule is less obvious, but is a result of Giant providing the same 67 * semantics as spl(). Basically, when a thread sleeps, it must release 68 * Giant. When a thread blocks on a sleepable lock, it sleeps. Hence rule 69 * 2). 70 * 71 * 3) Giant may be acquired before or after sleepable locks. 72 * 73 * This rule is also not quite as obvious. Giant may be acquired after 74 * a sleepable lock because it is a non-sleepable lock and non-sleepable 75 * locks may always be acquired while holding a sleepable lock. The second 76 * case, Giant before a sleepable lock, follows from rule 2) above. Suppose 77 * you have two threads T1 and T2 and a sleepable lock X. Suppose that T1 78 * acquires X and blocks on Giant. Then suppose that T2 acquires Giant and 79 * blocks on X. When T2 blocks on X, T2 will release Giant allowing T1 to 80 * execute. Thus, acquiring Giant both before and after a sleepable lock 81 * will not result in a lock order reversal. 82 */ 83 84#include <sys/cdefs.h> 85__FBSDID("$FreeBSD: head/sys/kern/subr_witness.c 136374 2004-10-11 08:26:18Z rwatson $"); 86 87#include "opt_ddb.h" 88#include "opt_witness.h" 89 90#include <sys/param.h> 91#include <sys/bus.h> 92#include <sys/kdb.h> 93#include <sys/kernel.h> 94#include <sys/ktr.h> 95#include <sys/lock.h> 96#include <sys/malloc.h> 97#include <sys/mutex.h> 98#include <sys/proc.h> 99#include <sys/sysctl.h> 100#include <sys/systm.h> 101 102#include <ddb/ddb.h> 103 104#include <machine/stdarg.h> 105 106/* Define this to check for blessed mutexes */ 107#undef BLESSING 108 109#define WITNESS_COUNT 200 110#define WITNESS_CHILDCOUNT (WITNESS_COUNT * 4) 111/* 112 * XXX: This is somewhat bogus, as we assume here that at most 1024 threads 113 * will hold LOCK_NCHILDREN * 2 locks. We handle failure ok, and we should 114 * probably be safe for the most part, but it's still a SWAG. 115 */ 116#define LOCK_CHILDCOUNT (MAXCPU + 1024) * 2 117 118#define WITNESS_NCHILDREN 6 119 120struct witness_child_list_entry; 121 122struct witness { 123 const char *w_name; 124 struct lock_class *w_class; 125 STAILQ_ENTRY(witness) w_list; /* List of all witnesses. */ 126 STAILQ_ENTRY(witness) w_typelist; /* Witnesses of a type. */ 127 struct witness_child_list_entry *w_children; /* Great evilness... */ 128 const char *w_file; 129 int w_line; 130 u_int w_level; 131 u_int w_refcount; 132 u_char w_Giant_squawked:1; 133 u_char w_other_squawked:1; 134 u_char w_same_squawked:1; 135 u_char w_displayed:1; 136}; 137 138struct witness_child_list_entry { 139 struct witness_child_list_entry *wcl_next; 140 struct witness *wcl_children[WITNESS_NCHILDREN]; 141 u_int wcl_count; 142}; 143 144STAILQ_HEAD(witness_list, witness); 145 146#ifdef BLESSING 147struct witness_blessed { 148 const char *b_lock1; 149 const char *b_lock2; 150}; 151#endif 152 153struct witness_order_list_entry { 154 const char *w_name; 155 struct lock_class *w_class; 156}; 157 158#ifdef BLESSING 159static int blessed(struct witness *, struct witness *); 160#endif 161static int depart(struct witness *w); 162static struct witness *enroll(const char *description, 163 struct lock_class *lock_class); 164static int insertchild(struct witness *parent, struct witness *child); 165static int isitmychild(struct witness *parent, struct witness *child); 166static int isitmydescendant(struct witness *parent, struct witness *child); 167static int itismychild(struct witness *parent, struct witness *child); 168static int rebalancetree(struct witness_list *list); 169static void removechild(struct witness *parent, struct witness *child); 170static int reparentchildren(struct witness *newparent, 171 struct witness *oldparent); 172static int sysctl_debug_witness_watch(SYSCTL_HANDLER_ARGS); 173static void witness_displaydescendants(void(*)(const char *fmt, ...), 174 struct witness *, int indent); 175static const char *fixup_filename(const char *file); 176static void witness_leveldescendents(struct witness *parent, int level); 177static void witness_levelall(void); 178static struct witness *witness_get(void); 179static void witness_free(struct witness *m); 180static struct witness_child_list_entry *witness_child_get(void); 181static void witness_child_free(struct witness_child_list_entry *wcl); 182static struct lock_list_entry *witness_lock_list_get(void); 183static void witness_lock_list_free(struct lock_list_entry *lle); 184static struct lock_instance *find_instance(struct lock_list_entry *lock_list, 185 struct lock_object *lock); 186static void witness_list_lock(struct lock_instance *instance); 187#ifdef DDB 188static void witness_list(struct thread *td); 189static void witness_display_list(void(*prnt)(const char *fmt, ...), 190 struct witness_list *list); 191static void witness_display(void(*)(const char *fmt, ...)); 192#endif 193 194MALLOC_DEFINE(M_WITNESS, "witness", "witness structure"); 195SYSCTL_NODE(_debug, OID_AUTO, witness, CTLFLAG_RW, 0, "Witness Locking"); 196 197/* 198 * If set to 0, witness is disabled. If set to 1, witness performs full lock 199 * order checking for all locks. If set to 2 or higher, then witness skips 200 * the full lock order check if the lock being acquired is at a higher level 201 * (i.e. farther down in the tree) than the current lock. This last mode is 202 * somewhat experimental and not considered fully safe. At runtime, this 203 * value may be set to 0 to turn off witness. witness is not allowed be 204 * turned on once it is turned off, however. 205 */ 206static int witness_watch = 1; 207TUNABLE_INT("debug.witness_watch", &witness_watch); 208TUNABLE_INT("debug.witness.watch", &witness_watch); 209SYSCTL_PROC(_debug_witness, OID_AUTO, watch, CTLFLAG_RW | CTLTYPE_INT, NULL, 0, 210 sysctl_debug_witness_watch, "I", "witness is watching lock operations"); 211 212#ifdef KDB 213/* 214 * When KDB is enabled and witness_kdb is set to 1, it will cause the system 215 * to drop into kdebug() when: 216 * - a lock heirarchy violation occurs 217 * - locks are held when going to sleep. 218 */ 219#ifdef WITNESS_KDB 220int witness_kdb = 1; 221#else 222int witness_kdb = 0; 223#endif 224TUNABLE_INT("debug.witness_kdb", &witness_kdb); 225TUNABLE_INT("debug.witness.kdb", &witness_kdb); 226SYSCTL_INT(_debug_witness, OID_AUTO, kdb, CTLFLAG_RW, &witness_kdb, 0, ""); 227 228/* 229 * When KDB is enabled and witness_trace is set to 1, it will cause the system 230 * to print a stack trace: 231 * - a lock heirarchy violation occurs 232 * - locks are held when going to sleep. 233 */ 234int witness_trace = 1; 235TUNABLE_INT("debug.witness_trace", &witness_trace); 236TUNABLE_INT("debug.witness.trace", &witness_trace); 237SYSCTL_INT(_debug_witness, OID_AUTO, trace, CTLFLAG_RW, &witness_trace, 0, ""); 238#endif /* KDB */ 239 240#ifdef WITNESS_SKIPSPIN 241int witness_skipspin = 1; 242#else 243int witness_skipspin = 0; 244#endif 245TUNABLE_INT("debug.witness_skipspin", &witness_skipspin); 246TUNABLE_INT("debug.witness.skipspin", &witness_skipspin); 247SYSCTL_INT(_debug_witness, OID_AUTO, skipspin, CTLFLAG_RDTUN, 248 &witness_skipspin, 0, ""); 249 250static struct mtx w_mtx; 251static struct witness_list w_free = STAILQ_HEAD_INITIALIZER(w_free); 252static struct witness_list w_all = STAILQ_HEAD_INITIALIZER(w_all); 253static struct witness_list w_spin = STAILQ_HEAD_INITIALIZER(w_spin); 254static struct witness_list w_sleep = STAILQ_HEAD_INITIALIZER(w_sleep); 255static struct witness_child_list_entry *w_child_free = NULL; 256static struct lock_list_entry *w_lock_list_free = NULL; 257 258static struct witness w_data[WITNESS_COUNT]; 259static struct witness_child_list_entry w_childdata[WITNESS_CHILDCOUNT]; 260static struct lock_list_entry w_locklistdata[LOCK_CHILDCOUNT]; 261 262static struct witness_order_list_entry order_lists[] = { 263 { "proctree", &lock_class_sx }, 264 { "allproc", &lock_class_sx }, 265 { "Giant", &lock_class_mtx_sleep }, 266 { "filedesc structure", &lock_class_mtx_sleep }, 267 { "pipe mutex", &lock_class_mtx_sleep }, 268 { "sigio lock", &lock_class_mtx_sleep }, 269 { "process group", &lock_class_mtx_sleep }, 270 { "process lock", &lock_class_mtx_sleep }, 271 { "session", &lock_class_mtx_sleep }, 272 { "uidinfo hash", &lock_class_mtx_sleep }, 273 { "uidinfo struct", &lock_class_mtx_sleep }, 274 { "allprison", &lock_class_mtx_sleep }, 275 { NULL, NULL }, 276 /* 277 * Sockets 278 */ 279 { "filedesc structure", &lock_class_mtx_sleep }, 280 { "accept", &lock_class_mtx_sleep }, 281 { "so_snd", &lock_class_mtx_sleep }, 282 { "so_rcv", &lock_class_mtx_sleep }, 283 { "sellck", &lock_class_mtx_sleep }, 284 { NULL, NULL }, 285 /* 286 * Routing 287 */ 288 { "so_rcv", &lock_class_mtx_sleep }, 289 { "radix node head", &lock_class_mtx_sleep }, 290 { "rtentry", &lock_class_mtx_sleep }, 291 { "ifaddr", &lock_class_mtx_sleep }, 292 { NULL, NULL }, 293 /* 294 * UNIX Domain Sockets 295 */ 296 { "unp", &lock_class_mtx_sleep }, 297 { "so_snd", &lock_class_mtx_sleep }, 298 { NULL, NULL }, 299 /* 300 * UDP/IP 301 */ 302 { "udp", &lock_class_mtx_sleep }, 303 { "udpinp", &lock_class_mtx_sleep }, 304 { "so_snd", &lock_class_mtx_sleep }, 305 { NULL, NULL }, 306 /* 307 * TCP/IP 308 */ 309 { "tcp", &lock_class_mtx_sleep }, 310 { "tcpinp", &lock_class_mtx_sleep }, 311 { "so_snd", &lock_class_mtx_sleep }, 312 { NULL, NULL }, 313 /* 314 * SLIP 315 */ 316 { "slip_mtx", &lock_class_mtx_sleep }, 317 { "slip sc_mtx", &lock_class_mtx_sleep }, 318 { NULL, NULL }, 319 /* 320 * netatalk 321 */ 322 { "ddp_list_mtx", &lock_class_mtx_sleep }, 323 { "ddp_mtx", &lock_class_mtx_sleep }, 324 { NULL, NULL }, 325 /* 326 * BPF 327 */ 328 { "bpf global lock", &lock_class_mtx_sleep }, 329 { "bpf interface lock", &lock_class_mtx_sleep }, 330 { "bpf cdev lock", &lock_class_mtx_sleep }, 331 { NULL, NULL }, 332 /* 333 * spin locks 334 */ 335#ifdef SMP 336 { "ap boot", &lock_class_mtx_spin }, 337#endif 338 { "sio", &lock_class_mtx_spin }, 339#ifdef __i386__ 340 { "cy", &lock_class_mtx_spin }, 341#endif 342 { "uart_hwmtx", &lock_class_mtx_spin }, 343 { "sabtty", &lock_class_mtx_spin }, 344 { "zstty", &lock_class_mtx_spin }, 345 { "ng_node", &lock_class_mtx_spin }, 346 { "ng_worklist", &lock_class_mtx_spin }, 347 { "taskqueue_fast", &lock_class_mtx_spin }, 348 { "intr table", &lock_class_mtx_spin }, 349 { "ithread table lock", &lock_class_mtx_spin }, 350 { "sleepq chain", &lock_class_mtx_spin }, 351 { "sched lock", &lock_class_mtx_spin }, 352 { "turnstile chain", &lock_class_mtx_spin }, 353 { "td_contested", &lock_class_mtx_spin }, 354 { "callout", &lock_class_mtx_spin }, 355 { "entropy harvest mutex", &lock_class_mtx_spin }, 356 /* 357 * leaf locks 358 */ 359 { "allpmaps", &lock_class_mtx_spin }, 360 { "vm page queue free mutex", &lock_class_mtx_spin }, 361 { "icu", &lock_class_mtx_spin }, 362#ifdef SMP 363 { "smp rendezvous", &lock_class_mtx_spin }, 364#if defined(__i386__) || defined(__amd64__) 365 { "tlb", &lock_class_mtx_spin }, 366#endif 367#ifdef __sparc64__ 368 { "ipi", &lock_class_mtx_spin }, 369#endif 370#endif 371 { "clk", &lock_class_mtx_spin }, 372 { "mutex profiling lock", &lock_class_mtx_spin }, 373 { "kse zombie lock", &lock_class_mtx_spin }, 374 { "ALD Queue", &lock_class_mtx_spin }, 375#ifdef __ia64__ 376 { "MCA spin lock", &lock_class_mtx_spin }, 377#endif 378#if defined(__i386__) || defined(__amd64__) 379 { "pcicfg", &lock_class_mtx_spin }, 380#endif 381 { NULL, NULL }, 382 { NULL, NULL } 383}; 384 385#ifdef BLESSING 386/* 387 * Pairs of locks which have been blessed 388 * Don't complain about order problems with blessed locks 389 */ 390static struct witness_blessed blessed_list[] = { 391}; 392static int blessed_count = 393 sizeof(blessed_list) / sizeof(struct witness_blessed); 394#endif 395 396/* 397 * List of all locks in the system. 398 */ 399TAILQ_HEAD(, lock_object) all_locks = TAILQ_HEAD_INITIALIZER(all_locks); 400 401static struct mtx all_mtx = { 402 { &lock_class_mtx_sleep, /* mtx_object.lo_class */ 403 "All locks list", /* mtx_object.lo_name */ 404 "All locks list", /* mtx_object.lo_type */ 405 LO_INITIALIZED, /* mtx_object.lo_flags */ 406 { NULL, NULL }, /* mtx_object.lo_list */ 407 NULL }, /* mtx_object.lo_witness */ 408 MTX_UNOWNED, 0 /* mtx_lock, mtx_recurse */ 409}; 410 411/* 412 * This global is set to 0 once it becomes safe to use the witness code. 413 */ 414static int witness_cold = 1; 415 416/* 417 * Global variables for book keeping. 418 */ 419static int lock_cur_cnt; 420static int lock_max_cnt; 421 422/* 423 * The WITNESS-enabled diagnostic code. 424 */ 425static void 426witness_initialize(void *dummy __unused) 427{ 428 struct lock_object *lock; 429 struct witness_order_list_entry *order; 430 struct witness *w, *w1; 431 int i; 432 433 /* 434 * We have to release Giant before initializing its witness 435 * structure so that WITNESS doesn't get confused. 436 */ 437 mtx_unlock(&Giant); 438 mtx_assert(&Giant, MA_NOTOWNED); 439 440 CTR1(KTR_WITNESS, "%s: initializing witness", __func__); 441 TAILQ_INSERT_HEAD(&all_locks, &all_mtx.mtx_object, lo_list); 442 mtx_init(&w_mtx, "witness lock", NULL, MTX_SPIN | MTX_QUIET | 443 MTX_NOWITNESS); 444 for (i = 0; i < WITNESS_COUNT; i++) 445 witness_free(&w_data[i]); 446 for (i = 0; i < WITNESS_CHILDCOUNT; i++) 447 witness_child_free(&w_childdata[i]); 448 for (i = 0; i < LOCK_CHILDCOUNT; i++) 449 witness_lock_list_free(&w_locklistdata[i]); 450 451 /* First add in all the specified order lists. */ 452 for (order = order_lists; order->w_name != NULL; order++) { 453 w = enroll(order->w_name, order->w_class); 454 if (w == NULL) 455 continue; 456 w->w_file = "order list"; 457 for (order++; order->w_name != NULL; order++) { 458 w1 = enroll(order->w_name, order->w_class); 459 if (w1 == NULL) 460 continue; 461 w1->w_file = "order list"; 462 if (!itismychild(w, w1)) 463 panic("Not enough memory for static orders!"); 464 w = w1; 465 } 466 } 467 468 /* Iterate through all locks and add them to witness. */ 469 mtx_lock(&all_mtx); 470 TAILQ_FOREACH(lock, &all_locks, lo_list) { 471 if (lock->lo_flags & LO_WITNESS) 472 lock->lo_witness = enroll(lock->lo_type, 473 lock->lo_class); 474 else 475 lock->lo_witness = NULL; 476 } 477 mtx_unlock(&all_mtx); 478 479 /* Mark the witness code as being ready for use. */ 480 atomic_store_rel_int(&witness_cold, 0); 481 482 mtx_lock(&Giant); 483} 484SYSINIT(witness_init, SI_SUB_WITNESS, SI_ORDER_FIRST, witness_initialize, NULL) 485 486static int 487sysctl_debug_witness_watch(SYSCTL_HANDLER_ARGS) 488{ 489 int error, value; 490 491 value = witness_watch; 492 error = sysctl_handle_int(oidp, &value, 0, req); 493 if (error != 0 || req->newptr == NULL) 494 return (error); 495 error = suser(req->td); 496 if (error != 0) 497 return (error); 498 if (value == witness_watch) 499 return (0); 500 if (value != 0) 501 return (EINVAL); 502 witness_watch = 0; 503 return (0); 504} 505 506void 507witness_init(struct lock_object *lock) 508{ 509 struct lock_class *class; 510 511 class = lock->lo_class; 512 if (lock->lo_flags & LO_INITIALIZED) 513 panic("%s: lock (%s) %s is already initialized", __func__, 514 class->lc_name, lock->lo_name); 515 if ((lock->lo_flags & LO_RECURSABLE) != 0 && 516 (class->lc_flags & LC_RECURSABLE) == 0) 517 panic("%s: lock (%s) %s can not be recursable", __func__, 518 class->lc_name, lock->lo_name); 519 if ((lock->lo_flags & LO_SLEEPABLE) != 0 && 520 (class->lc_flags & LC_SLEEPABLE) == 0) 521 panic("%s: lock (%s) %s can not be sleepable", __func__, 522 class->lc_name, lock->lo_name); 523 if ((lock->lo_flags & LO_UPGRADABLE) != 0 && 524 (class->lc_flags & LC_UPGRADABLE) == 0) 525 panic("%s: lock (%s) %s can not be upgradable", __func__, 526 class->lc_name, lock->lo_name); 527 528 mtx_lock(&all_mtx); 529 TAILQ_INSERT_TAIL(&all_locks, lock, lo_list); 530 lock->lo_flags |= LO_INITIALIZED; 531 lock_cur_cnt++; 532 if (lock_cur_cnt > lock_max_cnt) 533 lock_max_cnt = lock_cur_cnt; 534 mtx_unlock(&all_mtx); 535 if (!witness_cold && witness_watch != 0 && panicstr == NULL && 536 (lock->lo_flags & LO_WITNESS) != 0) 537 lock->lo_witness = enroll(lock->lo_type, class); 538 else 539 lock->lo_witness = NULL; 540} 541 542void 543witness_destroy(struct lock_object *lock) 544{ 545 struct witness *w; 546 547 if (witness_cold) 548 panic("lock (%s) %s destroyed while witness_cold", 549 lock->lo_class->lc_name, lock->lo_name); 550 if ((lock->lo_flags & LO_INITIALIZED) == 0) 551 panic("%s: lock (%s) %s is not initialized", __func__, 552 lock->lo_class->lc_name, lock->lo_name); 553 554 /* XXX: need to verify that no one holds the lock */ 555 w = lock->lo_witness; 556 if (w != NULL) { 557 mtx_lock_spin(&w_mtx); 558 MPASS(w->w_refcount > 0); 559 w->w_refcount--; 560 561 /* 562 * Lock is already released if we have an allocation failure 563 * and depart() fails. 564 */ 565 if (w->w_refcount != 0 || depart(w)) 566 mtx_unlock_spin(&w_mtx); 567 } 568 569 mtx_lock(&all_mtx); 570 lock_cur_cnt--; 571 TAILQ_REMOVE(&all_locks, lock, lo_list); 572 lock->lo_flags &= ~LO_INITIALIZED; 573 mtx_unlock(&all_mtx); 574} 575 576#ifdef DDB 577static void 578witness_display_list(void(*prnt)(const char *fmt, ...), 579 struct witness_list *list) 580{ 581 struct witness *w; 582 583 STAILQ_FOREACH(w, list, w_typelist) { 584 if (w->w_file == NULL || w->w_level > 0) 585 continue; 586 /* 587 * This lock has no anscestors, display its descendants. 588 */ 589 witness_displaydescendants(prnt, w, 0); 590 } 591} 592 593static void 594witness_display(void(*prnt)(const char *fmt, ...)) 595{ 596 struct witness *w; 597 598 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 599 witness_levelall(); 600 601 /* Clear all the displayed flags. */ 602 STAILQ_FOREACH(w, &w_all, w_list) { 603 w->w_displayed = 0; 604 } 605 606 /* 607 * First, handle sleep locks which have been acquired at least 608 * once. 609 */ 610 prnt("Sleep locks:\n"); 611 witness_display_list(prnt, &w_sleep); 612 613 /* 614 * Now do spin locks which have been acquired at least once. 615 */ 616 prnt("\nSpin locks:\n"); 617 witness_display_list(prnt, &w_spin); 618 619 /* 620 * Finally, any locks which have not been acquired yet. 621 */ 622 prnt("\nLocks which were never acquired:\n"); 623 STAILQ_FOREACH(w, &w_all, w_list) { 624 if (w->w_file != NULL || w->w_refcount == 0) 625 continue; 626 prnt("%s\n", w->w_name); 627 } 628} 629#endif /* DDB */ 630 631/* Trim useless garbage from filenames. */ 632static const char * 633fixup_filename(const char *file) 634{ 635 636 if (file == NULL) 637 return (NULL); 638 while (strncmp(file, "../", 3) == 0) 639 file += 3; 640 return (file); 641} 642 643int 644witness_defineorder(struct lock_object *lock1, struct lock_object *lock2) 645{ 646 647 if (witness_watch == 0 || panicstr != NULL) 648 return (0); 649 650 /* Require locks that witness knows about. */ 651 if (lock1 == NULL || lock1->lo_witness == NULL || lock2 == NULL || 652 lock2->lo_witness == NULL) 653 return (EINVAL); 654 655 MPASS(!mtx_owned(&w_mtx)); 656 mtx_lock_spin(&w_mtx); 657 658 /* 659 * If we already have either an explicit or implied lock order that 660 * is the other way around, then return an error. 661 */ 662 if (isitmydescendant(lock2->lo_witness, lock1->lo_witness)) { 663 mtx_unlock_spin(&w_mtx); 664 return (EDOOFUS); 665 } 666 667 /* Try to add the new order. */ 668 CTR3(KTR_WITNESS, "%s: adding %s as a child of %s", __func__, 669 lock2->lo_type, lock1->lo_type); 670 if (!itismychild(lock1->lo_witness, lock2->lo_witness)) 671 return (ENOMEM); 672 mtx_unlock_spin(&w_mtx); 673 return (0); 674} 675 676void 677witness_checkorder(struct lock_object *lock, int flags, const char *file, 678 int line) 679{ 680 struct lock_list_entry **lock_list, *lle; 681 struct lock_instance *lock1, *lock2; 682 struct lock_class *class; 683 struct witness *w, *w1; 684 struct thread *td; 685 int i, j; 686 687 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL || 688 panicstr != NULL) 689 return; 690 691 /* 692 * Try locks do not block if they fail to acquire the lock, thus 693 * there is no danger of deadlocks or of switching while holding a 694 * spin lock if we acquire a lock via a try operation. This 695 * function shouldn't even be called for try locks, so panic if 696 * that happens. 697 */ 698 if (flags & LOP_TRYLOCK) 699 panic("%s should not be called for try lock operations", 700 __func__); 701 702 w = lock->lo_witness; 703 class = lock->lo_class; 704 td = curthread; 705 file = fixup_filename(file); 706 707 if (class->lc_flags & LC_SLEEPLOCK) { 708 /* 709 * Since spin locks include a critical section, this check 710 * implicitly enforces a lock order of all sleep locks before 711 * all spin locks. 712 */ 713 if (td->td_critnest != 0 && !kdb_active) 714 panic("blockable sleep lock (%s) %s @ %s:%d", 715 class->lc_name, lock->lo_name, file, line); 716 717 /* 718 * If this is the first lock acquired then just return as 719 * no order checking is needed. 720 */ 721 if (td->td_sleeplocks == NULL) 722 return; 723 lock_list = &td->td_sleeplocks; 724 } else { 725 /* 726 * If this is the first lock, just return as no order 727 * checking is needed. We check this in both if clauses 728 * here as unifying the check would require us to use a 729 * critical section to ensure we don't migrate while doing 730 * the check. Note that if this is not the first lock, we 731 * are already in a critical section and are safe for the 732 * rest of the check. 733 */ 734 if (PCPU_GET(spinlocks) == NULL) 735 return; 736 lock_list = PCPU_PTR(spinlocks); 737 } 738 739 /* 740 * Check to see if we are recursing on a lock we already own. If 741 * so, make sure that we don't mismatch exclusive and shared lock 742 * acquires. 743 */ 744 lock1 = find_instance(*lock_list, lock); 745 if (lock1 != NULL) { 746 if ((lock1->li_flags & LI_EXCLUSIVE) != 0 && 747 (flags & LOP_EXCLUSIVE) == 0) { 748 printf("shared lock of (%s) %s @ %s:%d\n", 749 class->lc_name, lock->lo_name, file, line); 750 printf("while exclusively locked from %s:%d\n", 751 lock1->li_file, lock1->li_line); 752 panic("share->excl"); 753 } 754 if ((lock1->li_flags & LI_EXCLUSIVE) == 0 && 755 (flags & LOP_EXCLUSIVE) != 0) { 756 printf("exclusive lock of (%s) %s @ %s:%d\n", 757 class->lc_name, lock->lo_name, file, line); 758 printf("while share locked from %s:%d\n", 759 lock1->li_file, lock1->li_line); 760 panic("excl->share"); 761 } 762 return; 763 } 764 765 /* 766 * Try locks do not block if they fail to acquire the lock, thus 767 * there is no danger of deadlocks or of switching while holding a 768 * spin lock if we acquire a lock via a try operation. 769 */ 770 if (flags & LOP_TRYLOCK) 771 return; 772 773 /* 774 * Check for duplicate locks of the same type. Note that we only 775 * have to check for this on the last lock we just acquired. Any 776 * other cases will be caught as lock order violations. 777 */ 778 lock1 = &(*lock_list)->ll_children[(*lock_list)->ll_count - 1]; 779 w1 = lock1->li_lock->lo_witness; 780 if (w1 == w) { 781 if (w->w_same_squawked || (lock->lo_flags & LO_DUPOK)) 782 return; 783 w->w_same_squawked = 1; 784 printf("acquiring duplicate lock of same type: \"%s\"\n", 785 lock->lo_type); 786 printf(" 1st %s @ %s:%d\n", lock1->li_lock->lo_name, 787 lock1->li_file, lock1->li_line); 788 printf(" 2nd %s @ %s:%d\n", lock->lo_name, file, line); 789#ifdef KDB 790 goto debugger; 791#else 792 return; 793#endif 794 } 795 MPASS(!mtx_owned(&w_mtx)); 796 mtx_lock_spin(&w_mtx); 797 /* 798 * If we have a known higher number just say ok 799 */ 800 if (witness_watch > 1 && w->w_level > w1->w_level) { 801 mtx_unlock_spin(&w_mtx); 802 return; 803 } 804 /* 805 * If we know that the the lock we are acquiring comes after 806 * the lock we most recently acquired in the lock order tree, 807 * then there is no need for any further checks. 808 */ 809 if (isitmydescendant(w1, w)) { 810 mtx_unlock_spin(&w_mtx); 811 return; 812 } 813 for (j = 0, lle = *lock_list; lle != NULL; lle = lle->ll_next) { 814 for (i = lle->ll_count - 1; i >= 0; i--, j++) { 815 816 MPASS(j < WITNESS_COUNT); 817 lock1 = &lle->ll_children[i]; 818 w1 = lock1->li_lock->lo_witness; 819 820 /* 821 * If this lock doesn't undergo witness checking, 822 * then skip it. 823 */ 824 if (w1 == NULL) { 825 KASSERT((lock1->li_lock->lo_flags & LO_WITNESS) == 0, 826 ("lock missing witness structure")); 827 continue; 828 } 829 /* 830 * If we are locking Giant and this is a sleepable 831 * lock, then skip it. 832 */ 833 if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0 && 834 lock == &Giant.mtx_object) 835 continue; 836 /* 837 * If we are locking a sleepable lock and this lock 838 * is Giant, then skip it. 839 */ 840 if ((lock->lo_flags & LO_SLEEPABLE) != 0 && 841 lock1->li_lock == &Giant.mtx_object) 842 continue; 843 /* 844 * If we are locking a sleepable lock and this lock 845 * isn't sleepable, we want to treat it as a lock 846 * order violation to enfore a general lock order of 847 * sleepable locks before non-sleepable locks. 848 */ 849 if (!((lock->lo_flags & LO_SLEEPABLE) != 0 && 850 (lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0)) 851 /* 852 * Check the lock order hierarchy for a reveresal. 853 */ 854 if (!isitmydescendant(w, w1)) 855 continue; 856 /* 857 * We have a lock order violation, check to see if it 858 * is allowed or has already been yelled about. 859 */ 860 mtx_unlock_spin(&w_mtx); 861#ifdef BLESSING 862 /* 863 * If the lock order is blessed, just bail. We don't 864 * look for other lock order violations though, which 865 * may be a bug. 866 */ 867 if (blessed(w, w1)) 868 return; 869#endif 870 if (lock1->li_lock == &Giant.mtx_object) { 871 if (w1->w_Giant_squawked) 872 return; 873 else 874 w1->w_Giant_squawked = 1; 875 } else { 876 if (w1->w_other_squawked) 877 return; 878 else 879 w1->w_other_squawked = 1; 880 } 881 /* 882 * Ok, yell about it. 883 */ 884 printf("lock order reversal\n"); 885 /* 886 * Try to locate an earlier lock with 887 * witness w in our list. 888 */ 889 do { 890 lock2 = &lle->ll_children[i]; 891 MPASS(lock2->li_lock != NULL); 892 if (lock2->li_lock->lo_witness == w) 893 break; 894 if (i == 0 && lle->ll_next != NULL) { 895 lle = lle->ll_next; 896 i = lle->ll_count - 1; 897 MPASS(i >= 0 && i < LOCK_NCHILDREN); 898 } else 899 i--; 900 } while (i >= 0); 901 if (i < 0) { 902 printf(" 1st %p %s (%s) @ %s:%d\n", 903 lock1->li_lock, lock1->li_lock->lo_name, 904 lock1->li_lock->lo_type, lock1->li_file, 905 lock1->li_line); 906 printf(" 2nd %p %s (%s) @ %s:%d\n", lock, 907 lock->lo_name, lock->lo_type, file, line); 908 } else { 909 printf(" 1st %p %s (%s) @ %s:%d\n", 910 lock2->li_lock, lock2->li_lock->lo_name, 911 lock2->li_lock->lo_type, lock2->li_file, 912 lock2->li_line); 913 printf(" 2nd %p %s (%s) @ %s:%d\n", 914 lock1->li_lock, lock1->li_lock->lo_name, 915 lock1->li_lock->lo_type, lock1->li_file, 916 lock1->li_line); 917 printf(" 3rd %p %s (%s) @ %s:%d\n", lock, 918 lock->lo_name, lock->lo_type, file, line); 919 } 920#ifdef KDB 921 goto debugger; 922#else 923 return; 924#endif 925 } 926 } 927 lock1 = &(*lock_list)->ll_children[(*lock_list)->ll_count - 1]; 928 /* 929 * If requested, build a new lock order. However, don't build a new 930 * relationship between a sleepable lock and Giant if it is in the 931 * wrong direction. The correct lock order is that sleepable locks 932 * always come before Giant. 933 */ 934 if (flags & LOP_NEWORDER && 935 !(lock1->li_lock == &Giant.mtx_object && 936 (lock->lo_flags & LO_SLEEPABLE) != 0)) { 937 CTR3(KTR_WITNESS, "%s: adding %s as a child of %s", __func__, 938 lock->lo_type, lock1->li_lock->lo_type); 939 if (!itismychild(lock1->li_lock->lo_witness, w)) 940 /* Witness is dead. */ 941 return; 942 } 943 mtx_unlock_spin(&w_mtx); 944 return; 945 946#ifdef KDB 947debugger: 948 if (witness_trace) 949 kdb_backtrace(); 950 if (witness_kdb) 951 kdb_enter(__func__); 952#endif 953} 954 955void 956witness_lock(struct lock_object *lock, int flags, const char *file, int line) 957{ 958 struct lock_list_entry **lock_list, *lle; 959 struct lock_instance *instance; 960 struct witness *w; 961 struct thread *td; 962 963 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL || 964 panicstr != NULL) 965 return; 966 w = lock->lo_witness; 967 td = curthread; 968 file = fixup_filename(file); 969 970 /* Determine lock list for this lock. */ 971 if (lock->lo_class->lc_flags & LC_SLEEPLOCK) 972 lock_list = &td->td_sleeplocks; 973 else 974 lock_list = PCPU_PTR(spinlocks); 975 976 /* Check to see if we are recursing on a lock we already own. */ 977 instance = find_instance(*lock_list, lock); 978 if (instance != NULL) { 979 instance->li_flags++; 980 CTR4(KTR_WITNESS, "%s: pid %d recursed on %s r=%d", __func__, 981 td->td_proc->p_pid, lock->lo_name, 982 instance->li_flags & LI_RECURSEMASK); 983 instance->li_file = file; 984 instance->li_line = line; 985 return; 986 } 987 988 /* Update per-witness last file and line acquire. */ 989 w->w_file = file; 990 w->w_line = line; 991 992 /* Find the next open lock instance in the list and fill it. */ 993 lle = *lock_list; 994 if (lle == NULL || lle->ll_count == LOCK_NCHILDREN) { 995 lle = witness_lock_list_get(); 996 if (lle == NULL) 997 return; 998 lle->ll_next = *lock_list; 999 CTR3(KTR_WITNESS, "%s: pid %d added lle %p", __func__, 1000 td->td_proc->p_pid, lle); 1001 *lock_list = lle; 1002 } 1003 instance = &lle->ll_children[lle->ll_count++]; 1004 instance->li_lock = lock; 1005 instance->li_line = line; 1006 instance->li_file = file; 1007 if ((flags & LOP_EXCLUSIVE) != 0) 1008 instance->li_flags = LI_EXCLUSIVE; 1009 else 1010 instance->li_flags = 0; 1011 CTR4(KTR_WITNESS, "%s: pid %d added %s as lle[%d]", __func__, 1012 td->td_proc->p_pid, lock->lo_name, lle->ll_count - 1); 1013} 1014 1015void 1016witness_upgrade(struct lock_object *lock, int flags, const char *file, int line) 1017{ 1018 struct lock_instance *instance; 1019 struct lock_class *class; 1020 1021 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1022 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1023 return; 1024 class = lock->lo_class; 1025 file = fixup_filename(file); 1026 if ((lock->lo_flags & LO_UPGRADABLE) == 0) 1027 panic("upgrade of non-upgradable lock (%s) %s @ %s:%d", 1028 class->lc_name, lock->lo_name, file, line); 1029 if ((flags & LOP_TRYLOCK) == 0) 1030 panic("non-try upgrade of lock (%s) %s @ %s:%d", class->lc_name, 1031 lock->lo_name, file, line); 1032 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0) 1033 panic("upgrade of non-sleep lock (%s) %s @ %s:%d", 1034 class->lc_name, lock->lo_name, file, line); 1035 instance = find_instance(curthread->td_sleeplocks, lock); 1036 if (instance == NULL) 1037 panic("upgrade of unlocked lock (%s) %s @ %s:%d", 1038 class->lc_name, lock->lo_name, file, line); 1039 if ((instance->li_flags & LI_EXCLUSIVE) != 0) 1040 panic("upgrade of exclusive lock (%s) %s @ %s:%d", 1041 class->lc_name, lock->lo_name, file, line); 1042 if ((instance->li_flags & LI_RECURSEMASK) != 0) 1043 panic("upgrade of recursed lock (%s) %s r=%d @ %s:%d", 1044 class->lc_name, lock->lo_name, 1045 instance->li_flags & LI_RECURSEMASK, file, line); 1046 instance->li_flags |= LI_EXCLUSIVE; 1047} 1048 1049void 1050witness_downgrade(struct lock_object *lock, int flags, const char *file, 1051 int line) 1052{ 1053 struct lock_instance *instance; 1054 struct lock_class *class; 1055 1056 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1057 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1058 return; 1059 class = lock->lo_class; 1060 file = fixup_filename(file); 1061 if ((lock->lo_flags & LO_UPGRADABLE) == 0) 1062 panic("downgrade of non-upgradable lock (%s) %s @ %s:%d", 1063 class->lc_name, lock->lo_name, file, line); 1064 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0) 1065 panic("downgrade of non-sleep lock (%s) %s @ %s:%d", 1066 class->lc_name, lock->lo_name, file, line); 1067 instance = find_instance(curthread->td_sleeplocks, lock); 1068 if (instance == NULL) 1069 panic("downgrade of unlocked lock (%s) %s @ %s:%d", 1070 class->lc_name, lock->lo_name, file, line); 1071 if ((instance->li_flags & LI_EXCLUSIVE) == 0) 1072 panic("downgrade of shared lock (%s) %s @ %s:%d", 1073 class->lc_name, lock->lo_name, file, line); 1074 if ((instance->li_flags & LI_RECURSEMASK) != 0) 1075 panic("downgrade of recursed lock (%s) %s r=%d @ %s:%d", 1076 class->lc_name, lock->lo_name, 1077 instance->li_flags & LI_RECURSEMASK, file, line); 1078 instance->li_flags &= ~LI_EXCLUSIVE; 1079} 1080 1081void 1082witness_unlock(struct lock_object *lock, int flags, const char *file, int line) 1083{ 1084 struct lock_list_entry **lock_list, *lle; 1085 struct lock_instance *instance; 1086 struct lock_class *class; 1087 struct thread *td; 1088 register_t s; 1089 int i, j; 1090 1091 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL || 1092 panicstr != NULL) 1093 return; 1094 td = curthread; 1095 class = lock->lo_class; 1096 file = fixup_filename(file); 1097 1098 /* Find lock instance associated with this lock. */ 1099 if (class->lc_flags & LC_SLEEPLOCK) 1100 lock_list = &td->td_sleeplocks; 1101 else 1102 lock_list = PCPU_PTR(spinlocks); 1103 for (; *lock_list != NULL; lock_list = &(*lock_list)->ll_next) 1104 for (i = 0; i < (*lock_list)->ll_count; i++) { 1105 instance = &(*lock_list)->ll_children[i]; 1106 if (instance->li_lock == lock) 1107 goto found; 1108 } 1109 panic("lock (%s) %s not locked @ %s:%d", class->lc_name, lock->lo_name, 1110 file, line); 1111found: 1112 1113 /* First, check for shared/exclusive mismatches. */ 1114 if ((instance->li_flags & LI_EXCLUSIVE) != 0 && 1115 (flags & LOP_EXCLUSIVE) == 0) { 1116 printf("shared unlock of (%s) %s @ %s:%d\n", class->lc_name, 1117 lock->lo_name, file, line); 1118 printf("while exclusively locked from %s:%d\n", 1119 instance->li_file, instance->li_line); 1120 panic("excl->ushare"); 1121 } 1122 if ((instance->li_flags & LI_EXCLUSIVE) == 0 && 1123 (flags & LOP_EXCLUSIVE) != 0) { 1124 printf("exclusive unlock of (%s) %s @ %s:%d\n", class->lc_name, 1125 lock->lo_name, file, line); 1126 printf("while share locked from %s:%d\n", instance->li_file, 1127 instance->li_line); 1128 panic("share->uexcl"); 1129 } 1130 1131 /* If we are recursed, unrecurse. */ 1132 if ((instance->li_flags & LI_RECURSEMASK) > 0) { 1133 CTR4(KTR_WITNESS, "%s: pid %d unrecursed on %s r=%d", __func__, 1134 td->td_proc->p_pid, instance->li_lock->lo_name, 1135 instance->li_flags); 1136 instance->li_flags--; 1137 return; 1138 } 1139 1140 /* Otherwise, remove this item from the list. */ 1141 s = intr_disable(); 1142 CTR4(KTR_WITNESS, "%s: pid %d removed %s from lle[%d]", __func__, 1143 td->td_proc->p_pid, instance->li_lock->lo_name, 1144 (*lock_list)->ll_count - 1); 1145 for (j = i; j < (*lock_list)->ll_count - 1; j++) 1146 (*lock_list)->ll_children[j] = 1147 (*lock_list)->ll_children[j + 1]; 1148 (*lock_list)->ll_count--; 1149 intr_restore(s); 1150 1151 /* If this lock list entry is now empty, free it. */ 1152 if ((*lock_list)->ll_count == 0) { 1153 lle = *lock_list; 1154 *lock_list = lle->ll_next; 1155 CTR3(KTR_WITNESS, "%s: pid %d removed lle %p", __func__, 1156 td->td_proc->p_pid, lle); 1157 witness_lock_list_free(lle); 1158 } 1159} 1160 1161/* 1162 * Warn if any locks other than 'lock' are held. Flags can be passed in to 1163 * exempt Giant and sleepable locks from the checks as well. If any 1164 * non-exempt locks are held, then a supplied message is printed to the 1165 * console along with a list of the offending locks. If indicated in the 1166 * flags then a failure results in a panic as well. 1167 */ 1168int 1169witness_warn(int flags, struct lock_object *lock, const char *fmt, ...) 1170{ 1171 struct lock_list_entry *lle; 1172 struct lock_instance *lock1; 1173 struct thread *td; 1174 va_list ap; 1175 int i, n; 1176 1177 if (witness_cold || witness_watch == 0 || panicstr != NULL) 1178 return (0); 1179 n = 0; 1180 td = curthread; 1181 for (lle = td->td_sleeplocks; lle != NULL; lle = lle->ll_next) 1182 for (i = lle->ll_count - 1; i >= 0; i--) { 1183 lock1 = &lle->ll_children[i]; 1184 if (lock1->li_lock == lock) 1185 continue; 1186 if (flags & WARN_GIANTOK && 1187 lock1->li_lock == &Giant.mtx_object) 1188 continue; 1189 if (flags & WARN_SLEEPOK && 1190 (lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0) 1191 continue; 1192 if (n == 0) { 1193 va_start(ap, fmt); 1194 vprintf(fmt, ap); 1195 va_end(ap); 1196 printf(" with the following"); 1197 if (flags & WARN_SLEEPOK) 1198 printf(" non-sleepable"); 1199 printf(" locks held:\n"); 1200 } 1201 n++; 1202 witness_list_lock(lock1); 1203 } 1204 if (PCPU_GET(spinlocks) != NULL) { 1205 /* 1206 * Since we already hold a spinlock preemption is 1207 * already blocked. 1208 */ 1209 if (n == 0) { 1210 va_start(ap, fmt); 1211 vprintf(fmt, ap); 1212 va_end(ap); 1213 printf(" with the following"); 1214 if (flags & WARN_SLEEPOK) 1215 printf(" non-sleepable"); 1216 printf(" locks held:\n"); 1217 } 1218 n += witness_list_locks(PCPU_PTR(spinlocks)); 1219 } 1220 if (flags & WARN_PANIC && n) 1221 panic("witness_warn"); 1222#ifdef KDB 1223 else if (witness_kdb && n) 1224 kdb_enter(__func__); 1225 else if (witness_trace && n) 1226 kdb_backtrace(); 1227#endif 1228 return (n); 1229} 1230 1231const char * 1232witness_file(struct lock_object *lock) 1233{ 1234 struct witness *w; 1235 1236 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL) 1237 return ("?"); 1238 w = lock->lo_witness; 1239 return (w->w_file); 1240} 1241 1242int 1243witness_line(struct lock_object *lock) 1244{ 1245 struct witness *w; 1246 1247 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL) 1248 return (0); 1249 w = lock->lo_witness; 1250 return (w->w_line); 1251} 1252 1253static struct witness * 1254enroll(const char *description, struct lock_class *lock_class) 1255{ 1256 struct witness *w; 1257 1258 if (witness_watch == 0 || panicstr != NULL) 1259 return (NULL); 1260 if ((lock_class->lc_flags & LC_SPINLOCK) && witness_skipspin) 1261 return (NULL); 1262 mtx_lock_spin(&w_mtx); 1263 STAILQ_FOREACH(w, &w_all, w_list) { 1264 if (w->w_name == description || (w->w_refcount > 0 && 1265 strcmp(description, w->w_name) == 0)) { 1266 w->w_refcount++; 1267 mtx_unlock_spin(&w_mtx); 1268 if (lock_class != w->w_class) 1269 panic( 1270 "lock (%s) %s does not match earlier (%s) lock", 1271 description, lock_class->lc_name, 1272 w->w_class->lc_name); 1273 return (w); 1274 } 1275 } 1276 /* 1277 * This isn't quite right, as witness_cold is still 0 while we 1278 * enroll all the locks initialized before witness_initialize(). 1279 */ 1280 if ((lock_class->lc_flags & LC_SPINLOCK) && !witness_cold) { 1281 mtx_unlock_spin(&w_mtx); 1282 panic("spin lock %s not in order list", description); 1283 } 1284 if ((w = witness_get()) == NULL) 1285 return (NULL); 1286 w->w_name = description; 1287 w->w_class = lock_class; 1288 w->w_refcount = 1; 1289 STAILQ_INSERT_HEAD(&w_all, w, w_list); 1290 if (lock_class->lc_flags & LC_SPINLOCK) 1291 STAILQ_INSERT_HEAD(&w_spin, w, w_typelist); 1292 else if (lock_class->lc_flags & LC_SLEEPLOCK) 1293 STAILQ_INSERT_HEAD(&w_sleep, w, w_typelist); 1294 else { 1295 mtx_unlock_spin(&w_mtx); 1296 panic("lock class %s is not sleep or spin", 1297 lock_class->lc_name); 1298 } 1299 mtx_unlock_spin(&w_mtx); 1300 return (w); 1301} 1302 1303/* Don't let the door bang you on the way out... */ 1304static int 1305depart(struct witness *w) 1306{ 1307 struct witness_child_list_entry *wcl, *nwcl; 1308 struct witness_list *list; 1309 struct witness *parent; 1310 1311 MPASS(w->w_refcount == 0); 1312 if (w->w_class->lc_flags & LC_SLEEPLOCK) 1313 list = &w_sleep; 1314 else 1315 list = &w_spin; 1316 /* 1317 * First, we run through the entire tree looking for any 1318 * witnesses that the outgoing witness is a child of. For 1319 * each parent that we find, we reparent all the direct 1320 * children of the outgoing witness to its parent. 1321 */ 1322 STAILQ_FOREACH(parent, list, w_typelist) { 1323 if (!isitmychild(parent, w)) 1324 continue; 1325 removechild(parent, w); 1326 if (!reparentchildren(parent, w)) 1327 return (0); 1328 } 1329 1330 /* 1331 * Now we go through and free up the child list of the 1332 * outgoing witness. 1333 */ 1334 for (wcl = w->w_children; wcl != NULL; wcl = nwcl) { 1335 nwcl = wcl->wcl_next; 1336 witness_child_free(wcl); 1337 } 1338 1339 /* 1340 * Detach from various lists and free. 1341 */ 1342 STAILQ_REMOVE(list, w, witness, w_typelist); 1343 STAILQ_REMOVE(&w_all, w, witness, w_list); 1344 witness_free(w); 1345 1346 /* Finally, fixup the tree. */ 1347 return (rebalancetree(list)); 1348} 1349 1350/* 1351 * Prune an entire lock order tree. We look for cases where a lock 1352 * is now both a descendant and a direct child of a given lock. In 1353 * that case, we want to remove the direct child link from the tree. 1354 * 1355 * Returns false if insertchild() fails. 1356 */ 1357static int 1358rebalancetree(struct witness_list *list) 1359{ 1360 struct witness *child, *parent; 1361 1362 STAILQ_FOREACH(child, list, w_typelist) { 1363 STAILQ_FOREACH(parent, list, w_typelist) { 1364 if (!isitmychild(parent, child)) 1365 continue; 1366 removechild(parent, child); 1367 if (isitmydescendant(parent, child)) 1368 continue; 1369 if (!insertchild(parent, child)) 1370 return (0); 1371 } 1372 } 1373 witness_levelall(); 1374 return (1); 1375} 1376 1377/* 1378 * Add "child" as a direct child of "parent". Returns false if 1379 * we fail due to out of memory. 1380 */ 1381static int 1382insertchild(struct witness *parent, struct witness *child) 1383{ 1384 struct witness_child_list_entry **wcl; 1385 1386 MPASS(child != NULL && parent != NULL); 1387 1388 /* 1389 * Insert "child" after "parent" 1390 */ 1391 wcl = &parent->w_children; 1392 while (*wcl != NULL && (*wcl)->wcl_count == WITNESS_NCHILDREN) 1393 wcl = &(*wcl)->wcl_next; 1394 if (*wcl == NULL) { 1395 *wcl = witness_child_get(); 1396 if (*wcl == NULL) 1397 return (0); 1398 } 1399 (*wcl)->wcl_children[(*wcl)->wcl_count++] = child; 1400 1401 return (1); 1402} 1403 1404/* 1405 * Make all the direct descendants of oldparent be direct descendants 1406 * of newparent. 1407 */ 1408static int 1409reparentchildren(struct witness *newparent, struct witness *oldparent) 1410{ 1411 struct witness_child_list_entry *wcl; 1412 int i; 1413 1414 /* Avoid making a witness a child of itself. */ 1415 MPASS(!isitmychild(oldparent, newparent)); 1416 1417 for (wcl = oldparent->w_children; wcl != NULL; wcl = wcl->wcl_next) 1418 for (i = 0; i < wcl->wcl_count; i++) 1419 if (!insertchild(newparent, wcl->wcl_children[i])) 1420 return (0); 1421 return (1); 1422} 1423 1424static int 1425itismychild(struct witness *parent, struct witness *child) 1426{ 1427 struct witness_list *list; 1428 1429 MPASS(child != NULL && parent != NULL); 1430 if ((parent->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)) != 1431 (child->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK))) 1432 panic( 1433 "%s: parent (%s) and child (%s) are not the same lock type", 1434 __func__, parent->w_class->lc_name, 1435 child->w_class->lc_name); 1436 1437 if (!insertchild(parent, child)) 1438 return (0); 1439 1440 if (parent->w_class->lc_flags & LC_SLEEPLOCK) 1441 list = &w_sleep; 1442 else 1443 list = &w_spin; 1444 return (rebalancetree(list)); 1445} 1446 1447static void 1448removechild(struct witness *parent, struct witness *child) 1449{ 1450 struct witness_child_list_entry **wcl, *wcl1; 1451 int i; 1452 1453 for (wcl = &parent->w_children; *wcl != NULL; wcl = &(*wcl)->wcl_next) 1454 for (i = 0; i < (*wcl)->wcl_count; i++) 1455 if ((*wcl)->wcl_children[i] == child) 1456 goto found; 1457 return; 1458found: 1459 (*wcl)->wcl_count--; 1460 if ((*wcl)->wcl_count > i) 1461 (*wcl)->wcl_children[i] = 1462 (*wcl)->wcl_children[(*wcl)->wcl_count]; 1463 MPASS((*wcl)->wcl_children[i] != NULL); 1464 if ((*wcl)->wcl_count != 0) 1465 return; 1466 wcl1 = *wcl; 1467 *wcl = wcl1->wcl_next; 1468 witness_child_free(wcl1); 1469} 1470 1471static int 1472isitmychild(struct witness *parent, struct witness *child) 1473{ 1474 struct witness_child_list_entry *wcl; 1475 int i; 1476 1477 for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) { 1478 for (i = 0; i < wcl->wcl_count; i++) { 1479 if (wcl->wcl_children[i] == child) 1480 return (1); 1481 } 1482 } 1483 return (0); 1484} 1485 1486static int 1487isitmydescendant(struct witness *parent, struct witness *child) 1488{ 1489 struct witness_child_list_entry *wcl; 1490 int i, j; 1491 1492 if (isitmychild(parent, child)) 1493 return (1); 1494 j = 0; 1495 for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) { 1496 MPASS(j < 1000); 1497 for (i = 0; i < wcl->wcl_count; i++) { 1498 if (isitmydescendant(wcl->wcl_children[i], child)) 1499 return (1); 1500 } 1501 j++; 1502 } 1503 return (0); 1504} 1505 1506static void 1507witness_levelall (void) 1508{ 1509 struct witness_list *list; 1510 struct witness *w, *w1; 1511 1512 /* 1513 * First clear all levels. 1514 */ 1515 STAILQ_FOREACH(w, &w_all, w_list) { 1516 w->w_level = 0; 1517 } 1518 1519 /* 1520 * Look for locks with no parent and level all their descendants. 1521 */ 1522 STAILQ_FOREACH(w, &w_all, w_list) { 1523 /* 1524 * This is just an optimization, technically we could get 1525 * away just walking the all list each time. 1526 */ 1527 if (w->w_class->lc_flags & LC_SLEEPLOCK) 1528 list = &w_sleep; 1529 else 1530 list = &w_spin; 1531 STAILQ_FOREACH(w1, list, w_typelist) { 1532 if (isitmychild(w1, w)) 1533 goto skip; 1534 } 1535 witness_leveldescendents(w, 0); 1536 skip: 1537 ; /* silence GCC 3.x */ 1538 } 1539} 1540 1541static void 1542witness_leveldescendents(struct witness *parent, int level) 1543{ 1544 struct witness_child_list_entry *wcl; 1545 int i; 1546 1547 if (parent->w_level < level) 1548 parent->w_level = level; 1549 level++; 1550 for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) 1551 for (i = 0; i < wcl->wcl_count; i++) 1552 witness_leveldescendents(wcl->wcl_children[i], level); 1553} 1554 1555static void 1556witness_displaydescendants(void(*prnt)(const char *fmt, ...), 1557 struct witness *parent, int indent) 1558{ 1559 struct witness_child_list_entry *wcl; 1560 int i, level; 1561 1562 level = parent->w_level; 1563 prnt("%-2d", level); 1564 for (i = 0; i < indent; i++) 1565 prnt(" "); 1566 if (parent->w_refcount > 0) 1567 prnt("%s", parent->w_name); 1568 else 1569 prnt("(dead)"); 1570 if (parent->w_displayed) { 1571 prnt(" -- (already displayed)\n"); 1572 return; 1573 } 1574 parent->w_displayed = 1; 1575 if (parent->w_refcount > 0) { 1576 if (parent->w_file != NULL) 1577 prnt(" -- last acquired @ %s:%d", parent->w_file, 1578 parent->w_line); 1579 } 1580 prnt("\n"); 1581 for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) 1582 for (i = 0; i < wcl->wcl_count; i++) 1583 witness_displaydescendants(prnt, 1584 wcl->wcl_children[i], indent + 1); 1585} 1586 1587#ifdef BLESSING 1588static int 1589blessed(struct witness *w1, struct witness *w2) 1590{ 1591 int i; 1592 struct witness_blessed *b; 1593 1594 for (i = 0; i < blessed_count; i++) { 1595 b = &blessed_list[i]; 1596 if (strcmp(w1->w_name, b->b_lock1) == 0) { 1597 if (strcmp(w2->w_name, b->b_lock2) == 0) 1598 return (1); 1599 continue; 1600 } 1601 if (strcmp(w1->w_name, b->b_lock2) == 0) 1602 if (strcmp(w2->w_name, b->b_lock1) == 0) 1603 return (1); 1604 } 1605 return (0); 1606} 1607#endif 1608 1609static struct witness * 1610witness_get(void) 1611{ 1612 struct witness *w; 1613 1614 if (witness_watch == 0) { 1615 mtx_unlock_spin(&w_mtx); 1616 return (NULL); 1617 } 1618 if (STAILQ_EMPTY(&w_free)) { 1619 witness_watch = 0; 1620 mtx_unlock_spin(&w_mtx); 1621 printf("%s: witness exhausted\n", __func__); 1622 return (NULL); 1623 } 1624 w = STAILQ_FIRST(&w_free); 1625 STAILQ_REMOVE_HEAD(&w_free, w_list); 1626 bzero(w, sizeof(*w)); 1627 return (w); 1628} 1629 1630static void 1631witness_free(struct witness *w) 1632{ 1633 1634 STAILQ_INSERT_HEAD(&w_free, w, w_list); 1635} 1636 1637static struct witness_child_list_entry * 1638witness_child_get(void) 1639{ 1640 struct witness_child_list_entry *wcl; 1641 1642 if (witness_watch == 0) { 1643 mtx_unlock_spin(&w_mtx); 1644 return (NULL); 1645 } 1646 wcl = w_child_free; 1647 if (wcl == NULL) { 1648 witness_watch = 0; 1649 mtx_unlock_spin(&w_mtx); 1650 printf("%s: witness exhausted\n", __func__); 1651 return (NULL); 1652 } 1653 w_child_free = wcl->wcl_next; 1654 bzero(wcl, sizeof(*wcl)); 1655 return (wcl); 1656} 1657 1658static void 1659witness_child_free(struct witness_child_list_entry *wcl) 1660{ 1661 1662 wcl->wcl_next = w_child_free; 1663 w_child_free = wcl; 1664} 1665 1666static struct lock_list_entry * 1667witness_lock_list_get(void) 1668{ 1669 struct lock_list_entry *lle; 1670 1671 if (witness_watch == 0) 1672 return (NULL); 1673 mtx_lock_spin(&w_mtx); 1674 lle = w_lock_list_free; 1675 if (lle == NULL) { 1676 witness_watch = 0; 1677 mtx_unlock_spin(&w_mtx); 1678 printf("%s: witness exhausted\n", __func__); 1679 return (NULL); 1680 } 1681 w_lock_list_free = lle->ll_next; 1682 mtx_unlock_spin(&w_mtx); 1683 bzero(lle, sizeof(*lle)); 1684 return (lle); 1685} 1686 1687static void 1688witness_lock_list_free(struct lock_list_entry *lle) 1689{ 1690 1691 mtx_lock_spin(&w_mtx); 1692 lle->ll_next = w_lock_list_free; 1693 w_lock_list_free = lle; 1694 mtx_unlock_spin(&w_mtx); 1695} 1696 1697static struct lock_instance * 1698find_instance(struct lock_list_entry *lock_list, struct lock_object *lock) 1699{ 1700 struct lock_list_entry *lle; 1701 struct lock_instance *instance; 1702 int i; 1703 1704 for (lle = lock_list; lle != NULL; lle = lle->ll_next) 1705 for (i = lle->ll_count - 1; i >= 0; i--) { 1706 instance = &lle->ll_children[i]; 1707 if (instance->li_lock == lock) 1708 return (instance); 1709 } 1710 return (NULL); 1711} 1712 1713static void 1714witness_list_lock(struct lock_instance *instance) 1715{ 1716 struct lock_object *lock; 1717 1718 lock = instance->li_lock; 1719 printf("%s %s %s", (instance->li_flags & LI_EXCLUSIVE) != 0 ? 1720 "exclusive" : "shared", lock->lo_class->lc_name, lock->lo_name); 1721 if (lock->lo_type != lock->lo_name) 1722 printf(" (%s)", lock->lo_type); 1723 printf(" r = %d (%p) locked @ %s:%d\n", 1724 instance->li_flags & LI_RECURSEMASK, lock, instance->li_file, 1725 instance->li_line); 1726} 1727 1728int 1729witness_list_locks(struct lock_list_entry **lock_list) 1730{ 1731 struct lock_list_entry *lle; 1732 int i, nheld; 1733 1734 nheld = 0; 1735 for (lle = *lock_list; lle != NULL; lle = lle->ll_next) 1736 for (i = lle->ll_count - 1; i >= 0; i--) { 1737 witness_list_lock(&lle->ll_children[i]); 1738 nheld++; 1739 } 1740 return (nheld); 1741} 1742 1743/* 1744 * This is a bit risky at best. We call this function when we have timed 1745 * out acquiring a spin lock, and we assume that the other CPU is stuck 1746 * with this lock held. So, we go groveling around in the other CPU's 1747 * per-cpu data to try to find the lock instance for this spin lock to 1748 * see when it was last acquired. 1749 */ 1750void 1751witness_display_spinlock(struct lock_object *lock, struct thread *owner) 1752{ 1753 struct lock_instance *instance; 1754 struct pcpu *pc; 1755 1756 if (owner->td_critnest == 0 || owner->td_oncpu == NOCPU) 1757 return; 1758 pc = pcpu_find(owner->td_oncpu); 1759 instance = find_instance(pc->pc_spinlocks, lock); 1760 if (instance != NULL) 1761 witness_list_lock(instance); 1762} 1763 1764void 1765witness_save(struct lock_object *lock, const char **filep, int *linep) 1766{ 1767 struct lock_instance *instance; 1768 1769 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1770 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1771 return; 1772 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0) 1773 panic("%s: lock (%s) %s is not a sleep lock", __func__, 1774 lock->lo_class->lc_name, lock->lo_name); 1775 instance = find_instance(curthread->td_sleeplocks, lock); 1776 if (instance == NULL) 1777 panic("%s: lock (%s) %s not locked", __func__, 1778 lock->lo_class->lc_name, lock->lo_name); 1779 *filep = instance->li_file; 1780 *linep = instance->li_line; 1781} 1782 1783void 1784witness_restore(struct lock_object *lock, const char *file, int line) 1785{ 1786 struct lock_instance *instance; 1787 1788 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1789 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1790 return; 1791 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0) 1792 panic("%s: lock (%s) %s is not a sleep lock", __func__, 1793 lock->lo_class->lc_name, lock->lo_name); 1794 instance = find_instance(curthread->td_sleeplocks, lock); 1795 if (instance == NULL) 1796 panic("%s: lock (%s) %s not locked", __func__, 1797 lock->lo_class->lc_name, lock->lo_name); 1798 lock->lo_witness->w_file = file; 1799 lock->lo_witness->w_line = line; 1800 instance->li_file = file; 1801 instance->li_line = line; 1802} 1803 1804void 1805witness_assert(struct lock_object *lock, int flags, const char *file, int line) 1806{ 1807#ifdef INVARIANT_SUPPORT 1808 struct lock_instance *instance; 1809 1810 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1811 return; 1812 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) != 0) 1813 instance = find_instance(curthread->td_sleeplocks, lock); 1814 else if ((lock->lo_class->lc_flags & LC_SPINLOCK) != 0) 1815 instance = find_instance(PCPU_GET(spinlocks), lock); 1816 else { 1817 panic("Lock (%s) %s is not sleep or spin!", 1818 lock->lo_class->lc_name, lock->lo_name); 1819 } 1820 file = fixup_filename(file); 1821 switch (flags) { 1822 case LA_UNLOCKED: 1823 if (instance != NULL) 1824 panic("Lock (%s) %s locked @ %s:%d.", 1825 lock->lo_class->lc_name, lock->lo_name, file, line); 1826 break; 1827 case LA_LOCKED: 1828 case LA_LOCKED | LA_RECURSED: 1829 case LA_LOCKED | LA_NOTRECURSED: 1830 case LA_SLOCKED: 1831 case LA_SLOCKED | LA_RECURSED: 1832 case LA_SLOCKED | LA_NOTRECURSED: 1833 case LA_XLOCKED: 1834 case LA_XLOCKED | LA_RECURSED: 1835 case LA_XLOCKED | LA_NOTRECURSED: 1836 if (instance == NULL) { 1837 panic("Lock (%s) %s not locked @ %s:%d.", 1838 lock->lo_class->lc_name, lock->lo_name, file, line); 1839 break; 1840 } 1841 if ((flags & LA_XLOCKED) != 0 && 1842 (instance->li_flags & LI_EXCLUSIVE) == 0) 1843 panic("Lock (%s) %s not exclusively locked @ %s:%d.", 1844 lock->lo_class->lc_name, lock->lo_name, file, line); 1845 if ((flags & LA_SLOCKED) != 0 && 1846 (instance->li_flags & LI_EXCLUSIVE) != 0) 1847 panic("Lock (%s) %s exclusively locked @ %s:%d.", 1848 lock->lo_class->lc_name, lock->lo_name, file, line); 1849 if ((flags & LA_RECURSED) != 0 && 1850 (instance->li_flags & LI_RECURSEMASK) == 0) 1851 panic("Lock (%s) %s not recursed @ %s:%d.", 1852 lock->lo_class->lc_name, lock->lo_name, file, line); 1853 if ((flags & LA_NOTRECURSED) != 0 && 1854 (instance->li_flags & LI_RECURSEMASK) != 0) 1855 panic("Lock (%s) %s recursed @ %s:%d.", 1856 lock->lo_class->lc_name, lock->lo_name, file, line); 1857 break; 1858 default: 1859 panic("Invalid lock assertion at %s:%d.", file, line); 1860 1861 } 1862#endif /* INVARIANT_SUPPORT */ 1863} 1864 1865#ifdef DDB 1866static void 1867witness_list(struct thread *td) 1868{ 1869 1870 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1871 KASSERT(kdb_active, ("%s: not in the debugger", __func__)); 1872 1873 if (witness_watch == 0) 1874 return; 1875 1876 witness_list_locks(&td->td_sleeplocks); 1877 1878 /* 1879 * We only handle spinlocks if td == curthread. This is somewhat broken 1880 * if td is currently executing on some other CPU and holds spin locks 1881 * as we won't display those locks. If we had a MI way of getting 1882 * the per-cpu data for a given cpu then we could use 1883 * td->td_oncpu to get the list of spinlocks for this thread 1884 * and "fix" this. 1885 * 1886 * That still wouldn't really fix this unless we locked sched_lock 1887 * or stopped the other CPU to make sure it wasn't changing the list 1888 * out from under us. It is probably best to just not try to handle 1889 * threads on other CPU's for now. 1890 */ 1891 if (td == curthread && PCPU_GET(spinlocks) != NULL) 1892 witness_list_locks(PCPU_PTR(spinlocks)); 1893} 1894 1895DB_SHOW_COMMAND(locks, db_witness_list) 1896{ 1897 struct thread *td; 1898 pid_t pid; 1899 struct proc *p; 1900 1901 if (have_addr) { 1902 pid = (addr % 16) + ((addr >> 4) % 16) * 10 + 1903 ((addr >> 8) % 16) * 100 + ((addr >> 12) % 16) * 1000 + 1904 ((addr >> 16) % 16) * 10000; 1905 /* sx_slock(&allproc_lock); */ 1906 FOREACH_PROC_IN_SYSTEM(p) { 1907 if (p->p_pid == pid) 1908 break; 1909 } 1910 /* sx_sunlock(&allproc_lock); */ 1911 if (p == NULL) { 1912 db_printf("pid %d not found\n", pid); 1913 return; 1914 } 1915 FOREACH_THREAD_IN_PROC(p, td) { 1916 witness_list(td); 1917 } 1918 } else { 1919 td = curthread; 1920 witness_list(td); 1921 } 1922} 1923 1924DB_SHOW_COMMAND(witness, db_witness_display) 1925{ 1926 1927 witness_display(db_printf); 1928} 1929#endif 1930