subr_witness.c revision 134971
1/*- 2 * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 3. Berkeley Software Design Inc's name may not be used to endorse or 13 * promote products derived from this software without specific prior 14 * written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $ 29 * and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $ 30 */ 31 32/* 33 * Implementation of the `witness' lock verifier. Originally implemented for 34 * mutexes in BSD/OS. Extended to handle generic lock objects and lock 35 * classes in FreeBSD. 36 */ 37 38/* 39 * Main Entry: witness 40 * Pronunciation: 'wit-n&s 41 * Function: noun 42 * Etymology: Middle English witnesse, from Old English witnes knowledge, 43 * testimony, witness, from 2wit 44 * Date: before 12th century 45 * 1 : attestation of a fact or event : TESTIMONY 46 * 2 : one that gives evidence; specifically : one who testifies in 47 * a cause or before a judicial tribunal 48 * 3 : one asked to be present at a transaction so as to be able to 49 * testify to its having taken place 50 * 4 : one who has personal knowledge of something 51 * 5 a : something serving as evidence or proof : SIGN 52 * b : public affirmation by word or example of usually 53 * religious faith or conviction <the heroic witness to divine 54 * life -- Pilot> 55 * 6 capitalized : a member of the Jehovah's Witnesses 56 */ 57 58/* 59 * Special rules concerning Giant and lock orders: 60 * 61 * 1) Giant must be acquired before any other mutexes. Stated another way, 62 * no other mutex may be held when Giant is acquired. 63 * 64 * 2) Giant must be released when blocking on a sleepable lock. 65 * 66 * This rule is less obvious, but is a result of Giant providing the same 67 * semantics as spl(). Basically, when a thread sleeps, it must release 68 * Giant. When a thread blocks on a sleepable lock, it sleeps. Hence rule 69 * 2). 70 * 71 * 3) Giant may be acquired before or after sleepable locks. 72 * 73 * This rule is also not quite as obvious. Giant may be acquired after 74 * a sleepable lock because it is a non-sleepable lock and non-sleepable 75 * locks may always be acquired while holding a sleepable lock. The second 76 * case, Giant before a sleepable lock, follows from rule 2) above. Suppose 77 * you have two threads T1 and T2 and a sleepable lock X. Suppose that T1 78 * acquires X and blocks on Giant. Then suppose that T2 acquires Giant and 79 * blocks on X. When T2 blocks on X, T2 will release Giant allowing T1 to 80 * execute. Thus, acquiring Giant both before and after a sleepable lock 81 * will not result in a lock order reversal. 82 */ 83 84#include <sys/cdefs.h> 85__FBSDID("$FreeBSD: head/sys/kern/subr_witness.c 134971 2004-09-09 05:01:37Z rwatson $"); 86 87#include "opt_ddb.h" 88#include "opt_witness.h" 89 90#include <sys/param.h> 91#include <sys/bus.h> 92#include <sys/kdb.h> 93#include <sys/kernel.h> 94#include <sys/ktr.h> 95#include <sys/lock.h> 96#include <sys/malloc.h> 97#include <sys/mutex.h> 98#include <sys/proc.h> 99#include <sys/sysctl.h> 100#include <sys/systm.h> 101 102#include <ddb/ddb.h> 103 104#include <machine/stdarg.h> 105 106/* Define this to check for blessed mutexes */ 107#undef BLESSING 108 109#define WITNESS_COUNT 200 110#define WITNESS_CHILDCOUNT (WITNESS_COUNT * 4) 111/* 112 * XXX: This is somewhat bogus, as we assume here that at most 1024 threads 113 * will hold LOCK_NCHILDREN * 2 locks. We handle failure ok, and we should 114 * probably be safe for the most part, but it's still a SWAG. 115 */ 116#define LOCK_CHILDCOUNT (MAXCPU + 1024) * 2 117 118#define WITNESS_NCHILDREN 6 119 120struct witness_child_list_entry; 121 122struct witness { 123 const char *w_name; 124 struct lock_class *w_class; 125 STAILQ_ENTRY(witness) w_list; /* List of all witnesses. */ 126 STAILQ_ENTRY(witness) w_typelist; /* Witnesses of a type. */ 127 struct witness_child_list_entry *w_children; /* Great evilness... */ 128 const char *w_file; 129 int w_line; 130 u_int w_level; 131 u_int w_refcount; 132 u_char w_Giant_squawked:1; 133 u_char w_other_squawked:1; 134 u_char w_same_squawked:1; 135 u_char w_displayed:1; 136}; 137 138struct witness_child_list_entry { 139 struct witness_child_list_entry *wcl_next; 140 struct witness *wcl_children[WITNESS_NCHILDREN]; 141 u_int wcl_count; 142}; 143 144STAILQ_HEAD(witness_list, witness); 145 146#ifdef BLESSING 147struct witness_blessed { 148 const char *b_lock1; 149 const char *b_lock2; 150}; 151#endif 152 153struct witness_order_list_entry { 154 const char *w_name; 155 struct lock_class *w_class; 156}; 157 158#ifdef BLESSING 159static int blessed(struct witness *, struct witness *); 160#endif 161static int depart(struct witness *w); 162static struct witness *enroll(const char *description, 163 struct lock_class *lock_class); 164static int insertchild(struct witness *parent, struct witness *child); 165static int isitmychild(struct witness *parent, struct witness *child); 166static int isitmydescendant(struct witness *parent, struct witness *child); 167static int itismychild(struct witness *parent, struct witness *child); 168static int rebalancetree(struct witness_list *list); 169static void removechild(struct witness *parent, struct witness *child); 170static int reparentchildren(struct witness *newparent, 171 struct witness *oldparent); 172static int sysctl_debug_witness_watch(SYSCTL_HANDLER_ARGS); 173static void witness_displaydescendants(void(*)(const char *fmt, ...), 174 struct witness *, int indent); 175static const char *fixup_filename(const char *file); 176static void witness_leveldescendents(struct witness *parent, int level); 177static void witness_levelall(void); 178static struct witness *witness_get(void); 179static void witness_free(struct witness *m); 180static struct witness_child_list_entry *witness_child_get(void); 181static void witness_child_free(struct witness_child_list_entry *wcl); 182static struct lock_list_entry *witness_lock_list_get(void); 183static void witness_lock_list_free(struct lock_list_entry *lle); 184static struct lock_instance *find_instance(struct lock_list_entry *lock_list, 185 struct lock_object *lock); 186static void witness_list_lock(struct lock_instance *instance); 187#ifdef DDB 188static void witness_list(struct thread *td); 189static void witness_display_list(void(*prnt)(const char *fmt, ...), 190 struct witness_list *list); 191static void witness_display(void(*)(const char *fmt, ...)); 192#endif 193 194MALLOC_DEFINE(M_WITNESS, "witness", "witness structure"); 195SYSCTL_NODE(_debug, OID_AUTO, witness, CTLFLAG_RW, 0, "Witness Locking"); 196 197/* 198 * If set to 0, witness is disabled. If set to 1, witness performs full lock 199 * order checking for all locks. If set to 2 or higher, then witness skips 200 * the full lock order check if the lock being acquired is at a higher level 201 * (i.e. farther down in the tree) than the current lock. This last mode is 202 * somewhat experimental and not considered fully safe. At runtime, this 203 * value may be set to 0 to turn off witness. witness is not allowed be 204 * turned on once it is turned off, however. 205 */ 206static int witness_watch = 1; 207TUNABLE_INT("debug.witness_watch", &witness_watch); 208TUNABLE_INT("debug.witness.watch", &witness_watch); 209SYSCTL_PROC(_debug_witness, OID_AUTO, watch, CTLFLAG_RW | CTLTYPE_INT, NULL, 0, 210 sysctl_debug_witness_watch, "I", "witness is watching lock operations"); 211 212#ifdef KDB 213/* 214 * When KDB is enabled and witness_kdb is set to 1, it will cause the system 215 * to drop into kdebug() when: 216 * - a lock heirarchy violation occurs 217 * - locks are held when going to sleep. 218 */ 219#ifdef WITNESS_KDB 220int witness_kdb = 1; 221#else 222int witness_kdb = 0; 223#endif 224TUNABLE_INT("debug.witness_kdb", &witness_kdb); 225TUNABLE_INT("debug.witness.kdb", &witness_kdb); 226SYSCTL_INT(_debug_witness, OID_AUTO, kdb, CTLFLAG_RW, &witness_kdb, 0, ""); 227 228/* 229 * When KDB is enabled and witness_trace is set to 1, it will cause the system 230 * to print a stack trace: 231 * - a lock heirarchy violation occurs 232 * - locks are held when going to sleep. 233 */ 234int witness_trace = 1; 235TUNABLE_INT("debug.witness_trace", &witness_trace); 236TUNABLE_INT("debug.witness.trace", &witness_trace); 237SYSCTL_INT(_debug_witness, OID_AUTO, trace, CTLFLAG_RW, &witness_trace, 0, ""); 238#endif /* KDB */ 239 240#ifdef WITNESS_SKIPSPIN 241int witness_skipspin = 1; 242#else 243int witness_skipspin = 0; 244#endif 245TUNABLE_INT("debug.witness_skipspin", &witness_skipspin); 246TUNABLE_INT("debug.witness.skipspin", &witness_skipspin); 247SYSCTL_INT(_debug_witness, OID_AUTO, skipspin, CTLFLAG_RDTUN, 248 &witness_skipspin, 0, ""); 249 250static struct mtx w_mtx; 251static struct witness_list w_free = STAILQ_HEAD_INITIALIZER(w_free); 252static struct witness_list w_all = STAILQ_HEAD_INITIALIZER(w_all); 253static struct witness_list w_spin = STAILQ_HEAD_INITIALIZER(w_spin); 254static struct witness_list w_sleep = STAILQ_HEAD_INITIALIZER(w_sleep); 255static struct witness_child_list_entry *w_child_free = NULL; 256static struct lock_list_entry *w_lock_list_free = NULL; 257 258static struct witness w_data[WITNESS_COUNT]; 259static struct witness_child_list_entry w_childdata[WITNESS_CHILDCOUNT]; 260static struct lock_list_entry w_locklistdata[LOCK_CHILDCOUNT]; 261 262static struct witness_order_list_entry order_lists[] = { 263 { "proctree", &lock_class_sx }, 264 { "allproc", &lock_class_sx }, 265 { "Giant", &lock_class_mtx_sleep }, 266 { "filedesc structure", &lock_class_mtx_sleep }, 267 { "pipe mutex", &lock_class_mtx_sleep }, 268 { "sigio lock", &lock_class_mtx_sleep }, 269 { "process group", &lock_class_mtx_sleep }, 270 { "process lock", &lock_class_mtx_sleep }, 271 { "session", &lock_class_mtx_sleep }, 272 { "uidinfo hash", &lock_class_mtx_sleep }, 273 { "uidinfo struct", &lock_class_mtx_sleep }, 274 { "allprison", &lock_class_mtx_sleep }, 275 { NULL, NULL }, 276 /* 277 * Sockets 278 */ 279 { "filedesc structure", &lock_class_mtx_sleep }, 280 { "accept", &lock_class_mtx_sleep }, 281 { "so_snd", &lock_class_mtx_sleep }, 282 { "so_rcv", &lock_class_mtx_sleep }, 283 { "sellck", &lock_class_mtx_sleep }, 284 { NULL, NULL }, 285 /* 286 * Routing 287 */ 288 { "so_rcv", &lock_class_mtx_sleep }, 289 { "radix node head", &lock_class_mtx_sleep }, 290 { "rtentry", &lock_class_mtx_sleep }, 291 { "ifaddr", &lock_class_mtx_sleep }, 292 { NULL, NULL }, 293 /* 294 * UNIX Domain Sockets 295 */ 296 { "unp", &lock_class_mtx_sleep }, 297 { "so_snd", &lock_class_mtx_sleep }, 298 { NULL, NULL }, 299 /* 300 * UDP/IP 301 */ 302 { "udp", &lock_class_mtx_sleep }, 303 { "udpinp", &lock_class_mtx_sleep }, 304 { "so_snd", &lock_class_mtx_sleep }, 305 { NULL, NULL }, 306 /* 307 * TCP/IP 308 */ 309 { "tcp", &lock_class_mtx_sleep }, 310 { "tcpinp", &lock_class_mtx_sleep }, 311 { "so_snd", &lock_class_mtx_sleep }, 312 { NULL, NULL }, 313 /* 314 * SLIP 315 */ 316 { "slip_mtx", &lock_class_mtx_sleep }, 317 { "slip sc_mtx", &lock_class_mtx_sleep }, 318 { NULL, NULL }, 319 /* 320 * netatalk 321 */ 322 { "ddp_list_mtx", &lock_class_mtx_sleep }, 323 { "ddp_mtx", &lock_class_mtx_sleep }, 324 { NULL, NULL }, 325 /* 326 * BPF 327 */ 328 { "bpf global lock", &lock_class_mtx_sleep }, 329 { "bpf interface lock", &lock_class_mtx_sleep }, 330 { "bpf cdev lock", &lock_class_mtx_sleep }, 331 { NULL, NULL }, 332 /* 333 * spin locks 334 */ 335#ifdef SMP 336 { "ap boot", &lock_class_mtx_spin }, 337#endif 338 { "sio", &lock_class_mtx_spin }, 339#ifdef __i386__ 340 { "cy", &lock_class_mtx_spin }, 341#endif 342 { "uart_hwmtx", &lock_class_mtx_spin }, 343 { "sabtty", &lock_class_mtx_spin }, 344 { "zstty", &lock_class_mtx_spin }, 345 { "ng_node", &lock_class_mtx_spin }, 346 { "ng_worklist", &lock_class_mtx_spin }, 347 { "taskqueue_fast", &lock_class_mtx_spin }, 348 { "intr table", &lock_class_mtx_spin }, 349 { "ithread table lock", &lock_class_mtx_spin }, 350 { "sleepq chain", &lock_class_mtx_spin }, 351 { "sched lock", &lock_class_mtx_spin }, 352 { "turnstile chain", &lock_class_mtx_spin }, 353 { "td_contested", &lock_class_mtx_spin }, 354 { "callout", &lock_class_mtx_spin }, 355 { "entropy harvest", &lock_class_mtx_spin }, 356 { "entropy harvest buffers", &lock_class_mtx_spin }, 357 /* 358 * leaf locks 359 */ 360 { "allpmaps", &lock_class_mtx_spin }, 361 { "vm page queue free mutex", &lock_class_mtx_spin }, 362 { "icu", &lock_class_mtx_spin }, 363#ifdef SMP 364 { "smp rendezvous", &lock_class_mtx_spin }, 365#if defined(__i386__) || defined(__amd64__) 366 { "tlb", &lock_class_mtx_spin }, 367#endif 368#ifdef __sparc64__ 369 { "ipi", &lock_class_mtx_spin }, 370#endif 371#endif 372 { "clk", &lock_class_mtx_spin }, 373 { "mutex profiling lock", &lock_class_mtx_spin }, 374 { "kse zombie lock", &lock_class_mtx_spin }, 375 { "ALD Queue", &lock_class_mtx_spin }, 376#ifdef __ia64__ 377 { "MCA spin lock", &lock_class_mtx_spin }, 378#endif 379#if defined(__i386__) || defined(__amd64__) 380 { "pcicfg", &lock_class_mtx_spin }, 381#endif 382 { NULL, NULL }, 383 { NULL, NULL } 384}; 385 386#ifdef BLESSING 387/* 388 * Pairs of locks which have been blessed 389 * Don't complain about order problems with blessed locks 390 */ 391static struct witness_blessed blessed_list[] = { 392}; 393static int blessed_count = 394 sizeof(blessed_list) / sizeof(struct witness_blessed); 395#endif 396 397/* 398 * List of all locks in the system. 399 */ 400TAILQ_HEAD(, lock_object) all_locks = TAILQ_HEAD_INITIALIZER(all_locks); 401 402static struct mtx all_mtx = { 403 { &lock_class_mtx_sleep, /* mtx_object.lo_class */ 404 "All locks list", /* mtx_object.lo_name */ 405 "All locks list", /* mtx_object.lo_type */ 406 LO_INITIALIZED, /* mtx_object.lo_flags */ 407 { NULL, NULL }, /* mtx_object.lo_list */ 408 NULL }, /* mtx_object.lo_witness */ 409 MTX_UNOWNED, 0 /* mtx_lock, mtx_recurse */ 410}; 411 412/* 413 * This global is set to 0 once it becomes safe to use the witness code. 414 */ 415static int witness_cold = 1; 416 417/* 418 * Global variables for book keeping. 419 */ 420static int lock_cur_cnt; 421static int lock_max_cnt; 422 423/* 424 * The WITNESS-enabled diagnostic code. 425 */ 426static void 427witness_initialize(void *dummy __unused) 428{ 429 struct lock_object *lock; 430 struct witness_order_list_entry *order; 431 struct witness *w, *w1; 432 int i; 433 434 /* 435 * We have to release Giant before initializing its witness 436 * structure so that WITNESS doesn't get confused. 437 */ 438 mtx_unlock(&Giant); 439 mtx_assert(&Giant, MA_NOTOWNED); 440 441 CTR1(KTR_WITNESS, "%s: initializing witness", __func__); 442 TAILQ_INSERT_HEAD(&all_locks, &all_mtx.mtx_object, lo_list); 443 mtx_init(&w_mtx, "witness lock", NULL, MTX_SPIN | MTX_QUIET | 444 MTX_NOWITNESS); 445 for (i = 0; i < WITNESS_COUNT; i++) 446 witness_free(&w_data[i]); 447 for (i = 0; i < WITNESS_CHILDCOUNT; i++) 448 witness_child_free(&w_childdata[i]); 449 for (i = 0; i < LOCK_CHILDCOUNT; i++) 450 witness_lock_list_free(&w_locklistdata[i]); 451 452 /* First add in all the specified order lists. */ 453 for (order = order_lists; order->w_name != NULL; order++) { 454 w = enroll(order->w_name, order->w_class); 455 if (w == NULL) 456 continue; 457 w->w_file = "order list"; 458 for (order++; order->w_name != NULL; order++) { 459 w1 = enroll(order->w_name, order->w_class); 460 if (w1 == NULL) 461 continue; 462 w1->w_file = "order list"; 463 if (!itismychild(w, w1)) 464 panic("Not enough memory for static orders!"); 465 w = w1; 466 } 467 } 468 469 /* Iterate through all locks and add them to witness. */ 470 mtx_lock(&all_mtx); 471 TAILQ_FOREACH(lock, &all_locks, lo_list) { 472 if (lock->lo_flags & LO_WITNESS) 473 lock->lo_witness = enroll(lock->lo_type, 474 lock->lo_class); 475 else 476 lock->lo_witness = NULL; 477 } 478 mtx_unlock(&all_mtx); 479 480 /* Mark the witness code as being ready for use. */ 481 atomic_store_rel_int(&witness_cold, 0); 482 483 mtx_lock(&Giant); 484} 485SYSINIT(witness_init, SI_SUB_WITNESS, SI_ORDER_FIRST, witness_initialize, NULL) 486 487static int 488sysctl_debug_witness_watch(SYSCTL_HANDLER_ARGS) 489{ 490 int error, value; 491 492 value = witness_watch; 493 error = sysctl_handle_int(oidp, &value, 0, req); 494 if (error != 0 || req->newptr == NULL) 495 return (error); 496 error = suser(req->td); 497 if (error != 0) 498 return (error); 499 if (value == witness_watch) 500 return (0); 501 if (value != 0) 502 return (EINVAL); 503 witness_watch = 0; 504 return (0); 505} 506 507void 508witness_init(struct lock_object *lock) 509{ 510 struct lock_class *class; 511 512 class = lock->lo_class; 513 if (lock->lo_flags & LO_INITIALIZED) 514 panic("%s: lock (%s) %s is already initialized", __func__, 515 class->lc_name, lock->lo_name); 516 if ((lock->lo_flags & LO_RECURSABLE) != 0 && 517 (class->lc_flags & LC_RECURSABLE) == 0) 518 panic("%s: lock (%s) %s can not be recursable", __func__, 519 class->lc_name, lock->lo_name); 520 if ((lock->lo_flags & LO_SLEEPABLE) != 0 && 521 (class->lc_flags & LC_SLEEPABLE) == 0) 522 panic("%s: lock (%s) %s can not be sleepable", __func__, 523 class->lc_name, lock->lo_name); 524 if ((lock->lo_flags & LO_UPGRADABLE) != 0 && 525 (class->lc_flags & LC_UPGRADABLE) == 0) 526 panic("%s: lock (%s) %s can not be upgradable", __func__, 527 class->lc_name, lock->lo_name); 528 529 mtx_lock(&all_mtx); 530 TAILQ_INSERT_TAIL(&all_locks, lock, lo_list); 531 lock->lo_flags |= LO_INITIALIZED; 532 lock_cur_cnt++; 533 if (lock_cur_cnt > lock_max_cnt) 534 lock_max_cnt = lock_cur_cnt; 535 mtx_unlock(&all_mtx); 536 if (!witness_cold && witness_watch != 0 && panicstr == NULL && 537 (lock->lo_flags & LO_WITNESS) != 0) 538 lock->lo_witness = enroll(lock->lo_type, class); 539 else 540 lock->lo_witness = NULL; 541} 542 543void 544witness_destroy(struct lock_object *lock) 545{ 546 struct witness *w; 547 548 if (witness_cold) 549 panic("lock (%s) %s destroyed while witness_cold", 550 lock->lo_class->lc_name, lock->lo_name); 551 if ((lock->lo_flags & LO_INITIALIZED) == 0) 552 panic("%s: lock (%s) %s is not initialized", __func__, 553 lock->lo_class->lc_name, lock->lo_name); 554 555 /* XXX: need to verify that no one holds the lock */ 556 w = lock->lo_witness; 557 if (w != NULL) { 558 mtx_lock_spin(&w_mtx); 559 MPASS(w->w_refcount > 0); 560 w->w_refcount--; 561 562 /* 563 * Lock is already released if we have an allocation failure 564 * and depart() fails. 565 */ 566 if (w->w_refcount != 0 || depart(w)) 567 mtx_unlock_spin(&w_mtx); 568 } 569 570 mtx_lock(&all_mtx); 571 lock_cur_cnt--; 572 TAILQ_REMOVE(&all_locks, lock, lo_list); 573 lock->lo_flags &= ~LO_INITIALIZED; 574 mtx_unlock(&all_mtx); 575} 576 577#ifdef DDB 578static void 579witness_display_list(void(*prnt)(const char *fmt, ...), 580 struct witness_list *list) 581{ 582 struct witness *w; 583 584 STAILQ_FOREACH(w, list, w_typelist) { 585 if (w->w_file == NULL || w->w_level > 0) 586 continue; 587 /* 588 * This lock has no anscestors, display its descendants. 589 */ 590 witness_displaydescendants(prnt, w, 0); 591 } 592} 593 594static void 595witness_display(void(*prnt)(const char *fmt, ...)) 596{ 597 struct witness *w; 598 599 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 600 witness_levelall(); 601 602 /* Clear all the displayed flags. */ 603 STAILQ_FOREACH(w, &w_all, w_list) { 604 w->w_displayed = 0; 605 } 606 607 /* 608 * First, handle sleep locks which have been acquired at least 609 * once. 610 */ 611 prnt("Sleep locks:\n"); 612 witness_display_list(prnt, &w_sleep); 613 614 /* 615 * Now do spin locks which have been acquired at least once. 616 */ 617 prnt("\nSpin locks:\n"); 618 witness_display_list(prnt, &w_spin); 619 620 /* 621 * Finally, any locks which have not been acquired yet. 622 */ 623 prnt("\nLocks which were never acquired:\n"); 624 STAILQ_FOREACH(w, &w_all, w_list) { 625 if (w->w_file != NULL || w->w_refcount == 0) 626 continue; 627 prnt("%s\n", w->w_name); 628 } 629} 630#endif /* DDB */ 631 632/* Trim useless garbage from filenames. */ 633static const char * 634fixup_filename(const char *file) 635{ 636 637 if (file == NULL) 638 return (NULL); 639 while (strncmp(file, "../", 3) == 0) 640 file += 3; 641 return (file); 642} 643 644int 645witness_defineorder(struct lock_object *lock1, struct lock_object *lock2) 646{ 647 648 if (witness_watch == 0 || panicstr != NULL) 649 return (0); 650 651 /* Require locks that witness knows about. */ 652 if (lock1 == NULL || lock1->lo_witness == NULL || lock2 == NULL || 653 lock2->lo_witness == NULL) 654 return (EINVAL); 655 656 MPASS(!mtx_owned(&w_mtx)); 657 mtx_lock_spin(&w_mtx); 658 659 /* 660 * If we already have either an explicit or implied lock order that 661 * is the other way around, then return an error. 662 */ 663 if (isitmydescendant(lock2->lo_witness, lock1->lo_witness)) { 664 mtx_unlock_spin(&w_mtx); 665 return (EDOOFUS); 666 } 667 668 /* Try to add the new order. */ 669 CTR3(KTR_WITNESS, "%s: adding %s as a child of %s", __func__, 670 lock2->lo_type, lock1->lo_type); 671 if (!itismychild(lock1->lo_witness, lock2->lo_witness)) 672 return (ENOMEM); 673 mtx_unlock_spin(&w_mtx); 674 return (0); 675} 676 677void 678witness_checkorder(struct lock_object *lock, int flags, const char *file, 679 int line) 680{ 681 struct lock_list_entry **lock_list, *lle; 682 struct lock_instance *lock1, *lock2; 683 struct lock_class *class; 684 struct witness *w, *w1; 685 struct thread *td; 686 int i, j; 687 688 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL || 689 panicstr != NULL) 690 return; 691 692 /* 693 * Try locks do not block if they fail to acquire the lock, thus 694 * there is no danger of deadlocks or of switching while holding a 695 * spin lock if we acquire a lock via a try operation. This 696 * function shouldn't even be called for try locks, so panic if 697 * that happens. 698 */ 699 if (flags & LOP_TRYLOCK) 700 panic("%s should not be called for try lock operations", 701 __func__); 702 703 w = lock->lo_witness; 704 class = lock->lo_class; 705 td = curthread; 706 file = fixup_filename(file); 707 708 if (class->lc_flags & LC_SLEEPLOCK) { 709 /* 710 * Since spin locks include a critical section, this check 711 * implicitly enforces a lock order of all sleep locks before 712 * all spin locks. 713 */ 714 if (td->td_critnest != 0) 715 panic("blockable sleep lock (%s) %s @ %s:%d", 716 class->lc_name, lock->lo_name, file, line); 717 718 /* 719 * If this is the first lock acquired then just return as 720 * no order checking is needed. 721 */ 722 if (td->td_sleeplocks == NULL) 723 return; 724 lock_list = &td->td_sleeplocks; 725 } else { 726 /* 727 * If this is the first lock, just return as no order 728 * checking is needed. We check this in both if clauses 729 * here as unifying the check would require us to use a 730 * critical section to ensure we don't migrate while doing 731 * the check. Note that if this is not the first lock, we 732 * are already in a critical section and are safe for the 733 * rest of the check. 734 */ 735 if (PCPU_GET(spinlocks) == NULL) 736 return; 737 lock_list = PCPU_PTR(spinlocks); 738 } 739 740 /* 741 * Check to see if we are recursing on a lock we already own. If 742 * so, make sure that we don't mismatch exclusive and shared lock 743 * acquires. 744 */ 745 lock1 = find_instance(*lock_list, lock); 746 if (lock1 != NULL) { 747 if ((lock1->li_flags & LI_EXCLUSIVE) != 0 && 748 (flags & LOP_EXCLUSIVE) == 0) { 749 printf("shared lock of (%s) %s @ %s:%d\n", 750 class->lc_name, lock->lo_name, file, line); 751 printf("while exclusively locked from %s:%d\n", 752 lock1->li_file, lock1->li_line); 753 panic("share->excl"); 754 } 755 if ((lock1->li_flags & LI_EXCLUSIVE) == 0 && 756 (flags & LOP_EXCLUSIVE) != 0) { 757 printf("exclusive lock of (%s) %s @ %s:%d\n", 758 class->lc_name, lock->lo_name, file, line); 759 printf("while share locked from %s:%d\n", 760 lock1->li_file, lock1->li_line); 761 panic("excl->share"); 762 } 763 return; 764 } 765 766 /* 767 * Try locks do not block if they fail to acquire the lock, thus 768 * there is no danger of deadlocks or of switching while holding a 769 * spin lock if we acquire a lock via a try operation. 770 */ 771 if (flags & LOP_TRYLOCK) 772 return; 773 774 /* 775 * Check for duplicate locks of the same type. Note that we only 776 * have to check for this on the last lock we just acquired. Any 777 * other cases will be caught as lock order violations. 778 */ 779 lock1 = &(*lock_list)->ll_children[(*lock_list)->ll_count - 1]; 780 w1 = lock1->li_lock->lo_witness; 781 if (w1 == w) { 782 if (w->w_same_squawked || (lock->lo_flags & LO_DUPOK)) 783 return; 784 w->w_same_squawked = 1; 785 printf("acquiring duplicate lock of same type: \"%s\"\n", 786 lock->lo_type); 787 printf(" 1st %s @ %s:%d\n", lock1->li_lock->lo_name, 788 lock1->li_file, lock1->li_line); 789 printf(" 2nd %s @ %s:%d\n", lock->lo_name, file, line); 790#ifdef KDB 791 goto debugger; 792#else 793 return; 794#endif 795 } 796 MPASS(!mtx_owned(&w_mtx)); 797 mtx_lock_spin(&w_mtx); 798 /* 799 * If we have a known higher number just say ok 800 */ 801 if (witness_watch > 1 && w->w_level > w1->w_level) { 802 mtx_unlock_spin(&w_mtx); 803 return; 804 } 805 /* 806 * If we know that the the lock we are acquiring comes after 807 * the lock we most recently acquired in the lock order tree, 808 * then there is no need for any further checks. 809 */ 810 if (isitmydescendant(w1, w)) { 811 mtx_unlock_spin(&w_mtx); 812 return; 813 } 814 for (j = 0, lle = *lock_list; lle != NULL; lle = lle->ll_next) { 815 for (i = lle->ll_count - 1; i >= 0; i--, j++) { 816 817 MPASS(j < WITNESS_COUNT); 818 lock1 = &lle->ll_children[i]; 819 w1 = lock1->li_lock->lo_witness; 820 821 /* 822 * If this lock doesn't undergo witness checking, 823 * then skip it. 824 */ 825 if (w1 == NULL) { 826 KASSERT((lock1->li_lock->lo_flags & LO_WITNESS) == 0, 827 ("lock missing witness structure")); 828 continue; 829 } 830 /* 831 * If we are locking Giant and this is a sleepable 832 * lock, then skip it. 833 */ 834 if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0 && 835 lock == &Giant.mtx_object) 836 continue; 837 /* 838 * If we are locking a sleepable lock and this lock 839 * is Giant, then skip it. 840 */ 841 if ((lock->lo_flags & LO_SLEEPABLE) != 0 && 842 lock1->li_lock == &Giant.mtx_object) 843 continue; 844 /* 845 * If we are locking a sleepable lock and this lock 846 * isn't sleepable, we want to treat it as a lock 847 * order violation to enfore a general lock order of 848 * sleepable locks before non-sleepable locks. 849 */ 850 if (!((lock->lo_flags & LO_SLEEPABLE) != 0 && 851 (lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0)) 852 /* 853 * Check the lock order hierarchy for a reveresal. 854 */ 855 if (!isitmydescendant(w, w1)) 856 continue; 857 /* 858 * We have a lock order violation, check to see if it 859 * is allowed or has already been yelled about. 860 */ 861 mtx_unlock_spin(&w_mtx); 862#ifdef BLESSING 863 /* 864 * If the lock order is blessed, just bail. We don't 865 * look for other lock order violations though, which 866 * may be a bug. 867 */ 868 if (blessed(w, w1)) 869 return; 870#endif 871 if (lock1->li_lock == &Giant.mtx_object) { 872 if (w1->w_Giant_squawked) 873 return; 874 else 875 w1->w_Giant_squawked = 1; 876 } else { 877 if (w1->w_other_squawked) 878 return; 879 else 880 w1->w_other_squawked = 1; 881 } 882 /* 883 * Ok, yell about it. 884 */ 885 printf("lock order reversal\n"); 886 /* 887 * Try to locate an earlier lock with 888 * witness w in our list. 889 */ 890 do { 891 lock2 = &lle->ll_children[i]; 892 MPASS(lock2->li_lock != NULL); 893 if (lock2->li_lock->lo_witness == w) 894 break; 895 if (i == 0 && lle->ll_next != NULL) { 896 lle = lle->ll_next; 897 i = lle->ll_count - 1; 898 MPASS(i >= 0 && i < LOCK_NCHILDREN); 899 } else 900 i--; 901 } while (i >= 0); 902 if (i < 0) { 903 printf(" 1st %p %s (%s) @ %s:%d\n", 904 lock1->li_lock, lock1->li_lock->lo_name, 905 lock1->li_lock->lo_type, lock1->li_file, 906 lock1->li_line); 907 printf(" 2nd %p %s (%s) @ %s:%d\n", lock, 908 lock->lo_name, lock->lo_type, file, line); 909 } else { 910 printf(" 1st %p %s (%s) @ %s:%d\n", 911 lock2->li_lock, lock2->li_lock->lo_name, 912 lock2->li_lock->lo_type, lock2->li_file, 913 lock2->li_line); 914 printf(" 2nd %p %s (%s) @ %s:%d\n", 915 lock1->li_lock, lock1->li_lock->lo_name, 916 lock1->li_lock->lo_type, lock1->li_file, 917 lock1->li_line); 918 printf(" 3rd %p %s (%s) @ %s:%d\n", lock, 919 lock->lo_name, lock->lo_type, file, line); 920 } 921#ifdef KDB 922 goto debugger; 923#else 924 return; 925#endif 926 } 927 } 928 lock1 = &(*lock_list)->ll_children[(*lock_list)->ll_count - 1]; 929 /* 930 * If requested, build a new lock order. However, don't build a new 931 * relationship between a sleepable lock and Giant if it is in the 932 * wrong direction. The correct lock order is that sleepable locks 933 * always come before Giant. 934 */ 935 if (flags & LOP_NEWORDER && 936 !(lock1->li_lock == &Giant.mtx_object && 937 (lock->lo_flags & LO_SLEEPABLE) != 0)) { 938 CTR3(KTR_WITNESS, "%s: adding %s as a child of %s", __func__, 939 lock->lo_type, lock1->li_lock->lo_type); 940 if (!itismychild(lock1->li_lock->lo_witness, w)) 941 /* Witness is dead. */ 942 return; 943 } 944 mtx_unlock_spin(&w_mtx); 945 return; 946 947#ifdef KDB 948debugger: 949 if (witness_trace) 950 kdb_backtrace(); 951 if (witness_kdb) 952 kdb_enter(__func__); 953#endif 954} 955 956void 957witness_lock(struct lock_object *lock, int flags, const char *file, int line) 958{ 959 struct lock_list_entry **lock_list, *lle; 960 struct lock_instance *instance; 961 struct witness *w; 962 struct thread *td; 963 964 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL || 965 panicstr != NULL) 966 return; 967 w = lock->lo_witness; 968 td = curthread; 969 file = fixup_filename(file); 970 971 /* Determine lock list for this lock. */ 972 if (lock->lo_class->lc_flags & LC_SLEEPLOCK) 973 lock_list = &td->td_sleeplocks; 974 else 975 lock_list = PCPU_PTR(spinlocks); 976 977 /* Check to see if we are recursing on a lock we already own. */ 978 instance = find_instance(*lock_list, lock); 979 if (instance != NULL) { 980 instance->li_flags++; 981 CTR4(KTR_WITNESS, "%s: pid %d recursed on %s r=%d", __func__, 982 td->td_proc->p_pid, lock->lo_name, 983 instance->li_flags & LI_RECURSEMASK); 984 instance->li_file = file; 985 instance->li_line = line; 986 return; 987 } 988 989 /* Update per-witness last file and line acquire. */ 990 w->w_file = file; 991 w->w_line = line; 992 993 /* Find the next open lock instance in the list and fill it. */ 994 lle = *lock_list; 995 if (lle == NULL || lle->ll_count == LOCK_NCHILDREN) { 996 lle = witness_lock_list_get(); 997 if (lle == NULL) 998 return; 999 lle->ll_next = *lock_list; 1000 CTR3(KTR_WITNESS, "%s: pid %d added lle %p", __func__, 1001 td->td_proc->p_pid, lle); 1002 *lock_list = lle; 1003 } 1004 instance = &lle->ll_children[lle->ll_count++]; 1005 instance->li_lock = lock; 1006 instance->li_line = line; 1007 instance->li_file = file; 1008 if ((flags & LOP_EXCLUSIVE) != 0) 1009 instance->li_flags = LI_EXCLUSIVE; 1010 else 1011 instance->li_flags = 0; 1012 CTR4(KTR_WITNESS, "%s: pid %d added %s as lle[%d]", __func__, 1013 td->td_proc->p_pid, lock->lo_name, lle->ll_count - 1); 1014} 1015 1016void 1017witness_upgrade(struct lock_object *lock, int flags, const char *file, int line) 1018{ 1019 struct lock_instance *instance; 1020 struct lock_class *class; 1021 1022 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1023 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1024 return; 1025 class = lock->lo_class; 1026 file = fixup_filename(file); 1027 if ((lock->lo_flags & LO_UPGRADABLE) == 0) 1028 panic("upgrade of non-upgradable lock (%s) %s @ %s:%d", 1029 class->lc_name, lock->lo_name, file, line); 1030 if ((flags & LOP_TRYLOCK) == 0) 1031 panic("non-try upgrade of lock (%s) %s @ %s:%d", class->lc_name, 1032 lock->lo_name, file, line); 1033 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0) 1034 panic("upgrade of non-sleep lock (%s) %s @ %s:%d", 1035 class->lc_name, lock->lo_name, file, line); 1036 instance = find_instance(curthread->td_sleeplocks, lock); 1037 if (instance == NULL) 1038 panic("upgrade of unlocked lock (%s) %s @ %s:%d", 1039 class->lc_name, lock->lo_name, file, line); 1040 if ((instance->li_flags & LI_EXCLUSIVE) != 0) 1041 panic("upgrade of exclusive lock (%s) %s @ %s:%d", 1042 class->lc_name, lock->lo_name, file, line); 1043 if ((instance->li_flags & LI_RECURSEMASK) != 0) 1044 panic("upgrade of recursed lock (%s) %s r=%d @ %s:%d", 1045 class->lc_name, lock->lo_name, 1046 instance->li_flags & LI_RECURSEMASK, file, line); 1047 instance->li_flags |= LI_EXCLUSIVE; 1048} 1049 1050void 1051witness_downgrade(struct lock_object *lock, int flags, const char *file, 1052 int line) 1053{ 1054 struct lock_instance *instance; 1055 struct lock_class *class; 1056 1057 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1058 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1059 return; 1060 class = lock->lo_class; 1061 file = fixup_filename(file); 1062 if ((lock->lo_flags & LO_UPGRADABLE) == 0) 1063 panic("downgrade of non-upgradable lock (%s) %s @ %s:%d", 1064 class->lc_name, lock->lo_name, file, line); 1065 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0) 1066 panic("downgrade of non-sleep lock (%s) %s @ %s:%d", 1067 class->lc_name, lock->lo_name, file, line); 1068 instance = find_instance(curthread->td_sleeplocks, lock); 1069 if (instance == NULL) 1070 panic("downgrade of unlocked lock (%s) %s @ %s:%d", 1071 class->lc_name, lock->lo_name, file, line); 1072 if ((instance->li_flags & LI_EXCLUSIVE) == 0) 1073 panic("downgrade of shared lock (%s) %s @ %s:%d", 1074 class->lc_name, lock->lo_name, file, line); 1075 if ((instance->li_flags & LI_RECURSEMASK) != 0) 1076 panic("downgrade of recursed lock (%s) %s r=%d @ %s:%d", 1077 class->lc_name, lock->lo_name, 1078 instance->li_flags & LI_RECURSEMASK, file, line); 1079 instance->li_flags &= ~LI_EXCLUSIVE; 1080} 1081 1082void 1083witness_unlock(struct lock_object *lock, int flags, const char *file, int line) 1084{ 1085 struct lock_list_entry **lock_list, *lle; 1086 struct lock_instance *instance; 1087 struct lock_class *class; 1088 struct thread *td; 1089 register_t s; 1090 int i, j; 1091 1092 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL || 1093 panicstr != NULL) 1094 return; 1095 td = curthread; 1096 class = lock->lo_class; 1097 file = fixup_filename(file); 1098 1099 /* Find lock instance associated with this lock. */ 1100 if (class->lc_flags & LC_SLEEPLOCK) 1101 lock_list = &td->td_sleeplocks; 1102 else 1103 lock_list = PCPU_PTR(spinlocks); 1104 for (; *lock_list != NULL; lock_list = &(*lock_list)->ll_next) 1105 for (i = 0; i < (*lock_list)->ll_count; i++) { 1106 instance = &(*lock_list)->ll_children[i]; 1107 if (instance->li_lock == lock) 1108 goto found; 1109 } 1110 panic("lock (%s) %s not locked @ %s:%d", class->lc_name, lock->lo_name, 1111 file, line); 1112found: 1113 1114 /* First, check for shared/exclusive mismatches. */ 1115 if ((instance->li_flags & LI_EXCLUSIVE) != 0 && 1116 (flags & LOP_EXCLUSIVE) == 0) { 1117 printf("shared unlock of (%s) %s @ %s:%d\n", class->lc_name, 1118 lock->lo_name, file, line); 1119 printf("while exclusively locked from %s:%d\n", 1120 instance->li_file, instance->li_line); 1121 panic("excl->ushare"); 1122 } 1123 if ((instance->li_flags & LI_EXCLUSIVE) == 0 && 1124 (flags & LOP_EXCLUSIVE) != 0) { 1125 printf("exclusive unlock of (%s) %s @ %s:%d\n", class->lc_name, 1126 lock->lo_name, file, line); 1127 printf("while share locked from %s:%d\n", instance->li_file, 1128 instance->li_line); 1129 panic("share->uexcl"); 1130 } 1131 1132 /* If we are recursed, unrecurse. */ 1133 if ((instance->li_flags & LI_RECURSEMASK) > 0) { 1134 CTR4(KTR_WITNESS, "%s: pid %d unrecursed on %s r=%d", __func__, 1135 td->td_proc->p_pid, instance->li_lock->lo_name, 1136 instance->li_flags); 1137 instance->li_flags--; 1138 return; 1139 } 1140 1141 /* Otherwise, remove this item from the list. */ 1142 s = intr_disable(); 1143 CTR4(KTR_WITNESS, "%s: pid %d removed %s from lle[%d]", __func__, 1144 td->td_proc->p_pid, instance->li_lock->lo_name, 1145 (*lock_list)->ll_count - 1); 1146 for (j = i; j < (*lock_list)->ll_count - 1; j++) 1147 (*lock_list)->ll_children[j] = 1148 (*lock_list)->ll_children[j + 1]; 1149 (*lock_list)->ll_count--; 1150 intr_restore(s); 1151 1152 /* If this lock list entry is now empty, free it. */ 1153 if ((*lock_list)->ll_count == 0) { 1154 lle = *lock_list; 1155 *lock_list = lle->ll_next; 1156 CTR3(KTR_WITNESS, "%s: pid %d removed lle %p", __func__, 1157 td->td_proc->p_pid, lle); 1158 witness_lock_list_free(lle); 1159 } 1160} 1161 1162/* 1163 * Warn if any locks other than 'lock' are held. Flags can be passed in to 1164 * exempt Giant and sleepable locks from the checks as well. If any 1165 * non-exempt locks are held, then a supplied message is printed to the 1166 * console along with a list of the offending locks. If indicated in the 1167 * flags then a failure results in a panic as well. 1168 */ 1169int 1170witness_warn(int flags, struct lock_object *lock, const char *fmt, ...) 1171{ 1172 struct lock_list_entry *lle; 1173 struct lock_instance *lock1; 1174 struct thread *td; 1175 va_list ap; 1176 int i, n; 1177 1178 if (witness_cold || witness_watch == 0 || panicstr != NULL) 1179 return (0); 1180 n = 0; 1181 td = curthread; 1182 for (lle = td->td_sleeplocks; lle != NULL; lle = lle->ll_next) 1183 for (i = lle->ll_count - 1; i >= 0; i--) { 1184 lock1 = &lle->ll_children[i]; 1185 if (lock1->li_lock == lock) 1186 continue; 1187 if (flags & WARN_GIANTOK && 1188 lock1->li_lock == &Giant.mtx_object) 1189 continue; 1190 if (flags & WARN_SLEEPOK && 1191 (lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0) 1192 continue; 1193 if (n == 0) { 1194 va_start(ap, fmt); 1195 vprintf(fmt, ap); 1196 va_end(ap); 1197 printf(" with the following"); 1198 if (flags & WARN_SLEEPOK) 1199 printf(" non-sleepable"); 1200 printf(" locks held:\n"); 1201 } 1202 n++; 1203 witness_list_lock(lock1); 1204 } 1205 if (PCPU_GET(spinlocks) != NULL) { 1206 /* 1207 * Since we already hold a spinlock preemption is 1208 * already blocked. 1209 */ 1210 if (n == 0) { 1211 va_start(ap, fmt); 1212 vprintf(fmt, ap); 1213 va_end(ap); 1214 printf(" with the following"); 1215 if (flags & WARN_SLEEPOK) 1216 printf(" non-sleepable"); 1217 printf(" locks held:\n"); 1218 } 1219 n += witness_list_locks(PCPU_PTR(spinlocks)); 1220 } 1221 if (flags & WARN_PANIC && n) 1222 panic("witness_warn"); 1223#ifdef KDB 1224 else if (witness_kdb && n) 1225 kdb_enter(__func__); 1226 else if (witness_trace && n) 1227 kdb_backtrace(); 1228#endif 1229 return (n); 1230} 1231 1232const char * 1233witness_file(struct lock_object *lock) 1234{ 1235 struct witness *w; 1236 1237 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL) 1238 return ("?"); 1239 w = lock->lo_witness; 1240 return (w->w_file); 1241} 1242 1243int 1244witness_line(struct lock_object *lock) 1245{ 1246 struct witness *w; 1247 1248 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL) 1249 return (0); 1250 w = lock->lo_witness; 1251 return (w->w_line); 1252} 1253 1254static struct witness * 1255enroll(const char *description, struct lock_class *lock_class) 1256{ 1257 struct witness *w; 1258 1259 if (witness_watch == 0 || panicstr != NULL) 1260 return (NULL); 1261 if ((lock_class->lc_flags & LC_SPINLOCK) && witness_skipspin) 1262 return (NULL); 1263 mtx_lock_spin(&w_mtx); 1264 STAILQ_FOREACH(w, &w_all, w_list) { 1265 if (w->w_name == description || (w->w_refcount > 0 && 1266 strcmp(description, w->w_name) == 0)) { 1267 w->w_refcount++; 1268 mtx_unlock_spin(&w_mtx); 1269 if (lock_class != w->w_class) 1270 panic( 1271 "lock (%s) %s does not match earlier (%s) lock", 1272 description, lock_class->lc_name, 1273 w->w_class->lc_name); 1274 return (w); 1275 } 1276 } 1277 /* 1278 * This isn't quite right, as witness_cold is still 0 while we 1279 * enroll all the locks initialized before witness_initialize(). 1280 */ 1281 if ((lock_class->lc_flags & LC_SPINLOCK) && !witness_cold) { 1282 mtx_unlock_spin(&w_mtx); 1283 panic("spin lock %s not in order list", description); 1284 } 1285 if ((w = witness_get()) == NULL) 1286 return (NULL); 1287 w->w_name = description; 1288 w->w_class = lock_class; 1289 w->w_refcount = 1; 1290 STAILQ_INSERT_HEAD(&w_all, w, w_list); 1291 if (lock_class->lc_flags & LC_SPINLOCK) 1292 STAILQ_INSERT_HEAD(&w_spin, w, w_typelist); 1293 else if (lock_class->lc_flags & LC_SLEEPLOCK) 1294 STAILQ_INSERT_HEAD(&w_sleep, w, w_typelist); 1295 else { 1296 mtx_unlock_spin(&w_mtx); 1297 panic("lock class %s is not sleep or spin", 1298 lock_class->lc_name); 1299 } 1300 mtx_unlock_spin(&w_mtx); 1301 return (w); 1302} 1303 1304/* Don't let the door bang you on the way out... */ 1305static int 1306depart(struct witness *w) 1307{ 1308 struct witness_child_list_entry *wcl, *nwcl; 1309 struct witness_list *list; 1310 struct witness *parent; 1311 1312 MPASS(w->w_refcount == 0); 1313 if (w->w_class->lc_flags & LC_SLEEPLOCK) 1314 list = &w_sleep; 1315 else 1316 list = &w_spin; 1317 /* 1318 * First, we run through the entire tree looking for any 1319 * witnesses that the outgoing witness is a child of. For 1320 * each parent that we find, we reparent all the direct 1321 * children of the outgoing witness to its parent. 1322 */ 1323 STAILQ_FOREACH(parent, list, w_typelist) { 1324 if (!isitmychild(parent, w)) 1325 continue; 1326 removechild(parent, w); 1327 if (!reparentchildren(parent, w)) 1328 return (0); 1329 } 1330 1331 /* 1332 * Now we go through and free up the child list of the 1333 * outgoing witness. 1334 */ 1335 for (wcl = w->w_children; wcl != NULL; wcl = nwcl) { 1336 nwcl = wcl->wcl_next; 1337 witness_child_free(wcl); 1338 } 1339 1340 /* 1341 * Detach from various lists and free. 1342 */ 1343 STAILQ_REMOVE(list, w, witness, w_typelist); 1344 STAILQ_REMOVE(&w_all, w, witness, w_list); 1345 witness_free(w); 1346 1347 /* Finally, fixup the tree. */ 1348 return (rebalancetree(list)); 1349} 1350 1351/* 1352 * Prune an entire lock order tree. We look for cases where a lock 1353 * is now both a descendant and a direct child of a given lock. In 1354 * that case, we want to remove the direct child link from the tree. 1355 * 1356 * Returns false if insertchild() fails. 1357 */ 1358static int 1359rebalancetree(struct witness_list *list) 1360{ 1361 struct witness *child, *parent; 1362 1363 STAILQ_FOREACH(child, list, w_typelist) { 1364 STAILQ_FOREACH(parent, list, w_typelist) { 1365 if (!isitmychild(parent, child)) 1366 continue; 1367 removechild(parent, child); 1368 if (isitmydescendant(parent, child)) 1369 continue; 1370 if (!insertchild(parent, child)) 1371 return (0); 1372 } 1373 } 1374 witness_levelall(); 1375 return (1); 1376} 1377 1378/* 1379 * Add "child" as a direct child of "parent". Returns false if 1380 * we fail due to out of memory. 1381 */ 1382static int 1383insertchild(struct witness *parent, struct witness *child) 1384{ 1385 struct witness_child_list_entry **wcl; 1386 1387 MPASS(child != NULL && parent != NULL); 1388 1389 /* 1390 * Insert "child" after "parent" 1391 */ 1392 wcl = &parent->w_children; 1393 while (*wcl != NULL && (*wcl)->wcl_count == WITNESS_NCHILDREN) 1394 wcl = &(*wcl)->wcl_next; 1395 if (*wcl == NULL) { 1396 *wcl = witness_child_get(); 1397 if (*wcl == NULL) 1398 return (0); 1399 } 1400 (*wcl)->wcl_children[(*wcl)->wcl_count++] = child; 1401 1402 return (1); 1403} 1404 1405/* 1406 * Make all the direct descendants of oldparent be direct descendants 1407 * of newparent. 1408 */ 1409static int 1410reparentchildren(struct witness *newparent, struct witness *oldparent) 1411{ 1412 struct witness_child_list_entry *wcl; 1413 int i; 1414 1415 /* Avoid making a witness a child of itself. */ 1416 MPASS(!isitmychild(oldparent, newparent)); 1417 1418 for (wcl = oldparent->w_children; wcl != NULL; wcl = wcl->wcl_next) 1419 for (i = 0; i < wcl->wcl_count; i++) 1420 if (!insertchild(newparent, wcl->wcl_children[i])) 1421 return (0); 1422 return (1); 1423} 1424 1425static int 1426itismychild(struct witness *parent, struct witness *child) 1427{ 1428 struct witness_list *list; 1429 1430 MPASS(child != NULL && parent != NULL); 1431 if ((parent->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)) != 1432 (child->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK))) 1433 panic( 1434 "%s: parent (%s) and child (%s) are not the same lock type", 1435 __func__, parent->w_class->lc_name, 1436 child->w_class->lc_name); 1437 1438 if (!insertchild(parent, child)) 1439 return (0); 1440 1441 if (parent->w_class->lc_flags & LC_SLEEPLOCK) 1442 list = &w_sleep; 1443 else 1444 list = &w_spin; 1445 return (rebalancetree(list)); 1446} 1447 1448static void 1449removechild(struct witness *parent, struct witness *child) 1450{ 1451 struct witness_child_list_entry **wcl, *wcl1; 1452 int i; 1453 1454 for (wcl = &parent->w_children; *wcl != NULL; wcl = &(*wcl)->wcl_next) 1455 for (i = 0; i < (*wcl)->wcl_count; i++) 1456 if ((*wcl)->wcl_children[i] == child) 1457 goto found; 1458 return; 1459found: 1460 (*wcl)->wcl_count--; 1461 if ((*wcl)->wcl_count > i) 1462 (*wcl)->wcl_children[i] = 1463 (*wcl)->wcl_children[(*wcl)->wcl_count]; 1464 MPASS((*wcl)->wcl_children[i] != NULL); 1465 if ((*wcl)->wcl_count != 0) 1466 return; 1467 wcl1 = *wcl; 1468 *wcl = wcl1->wcl_next; 1469 witness_child_free(wcl1); 1470} 1471 1472static int 1473isitmychild(struct witness *parent, struct witness *child) 1474{ 1475 struct witness_child_list_entry *wcl; 1476 int i; 1477 1478 for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) { 1479 for (i = 0; i < wcl->wcl_count; i++) { 1480 if (wcl->wcl_children[i] == child) 1481 return (1); 1482 } 1483 } 1484 return (0); 1485} 1486 1487static int 1488isitmydescendant(struct witness *parent, struct witness *child) 1489{ 1490 struct witness_child_list_entry *wcl; 1491 int i, j; 1492 1493 if (isitmychild(parent, child)) 1494 return (1); 1495 j = 0; 1496 for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) { 1497 MPASS(j < 1000); 1498 for (i = 0; i < wcl->wcl_count; i++) { 1499 if (isitmydescendant(wcl->wcl_children[i], child)) 1500 return (1); 1501 } 1502 j++; 1503 } 1504 return (0); 1505} 1506 1507static void 1508witness_levelall (void) 1509{ 1510 struct witness_list *list; 1511 struct witness *w, *w1; 1512 1513 /* 1514 * First clear all levels. 1515 */ 1516 STAILQ_FOREACH(w, &w_all, w_list) { 1517 w->w_level = 0; 1518 } 1519 1520 /* 1521 * Look for locks with no parent and level all their descendants. 1522 */ 1523 STAILQ_FOREACH(w, &w_all, w_list) { 1524 /* 1525 * This is just an optimization, technically we could get 1526 * away just walking the all list each time. 1527 */ 1528 if (w->w_class->lc_flags & LC_SLEEPLOCK) 1529 list = &w_sleep; 1530 else 1531 list = &w_spin; 1532 STAILQ_FOREACH(w1, list, w_typelist) { 1533 if (isitmychild(w1, w)) 1534 goto skip; 1535 } 1536 witness_leveldescendents(w, 0); 1537 skip: 1538 ; /* silence GCC 3.x */ 1539 } 1540} 1541 1542static void 1543witness_leveldescendents(struct witness *parent, int level) 1544{ 1545 struct witness_child_list_entry *wcl; 1546 int i; 1547 1548 if (parent->w_level < level) 1549 parent->w_level = level; 1550 level++; 1551 for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) 1552 for (i = 0; i < wcl->wcl_count; i++) 1553 witness_leveldescendents(wcl->wcl_children[i], level); 1554} 1555 1556static void 1557witness_displaydescendants(void(*prnt)(const char *fmt, ...), 1558 struct witness *parent, int indent) 1559{ 1560 struct witness_child_list_entry *wcl; 1561 int i, level; 1562 1563 level = parent->w_level; 1564 prnt("%-2d", level); 1565 for (i = 0; i < indent; i++) 1566 prnt(" "); 1567 if (parent->w_refcount > 0) 1568 prnt("%s", parent->w_name); 1569 else 1570 prnt("(dead)"); 1571 if (parent->w_displayed) { 1572 prnt(" -- (already displayed)\n"); 1573 return; 1574 } 1575 parent->w_displayed = 1; 1576 if (parent->w_refcount > 0) { 1577 if (parent->w_file != NULL) 1578 prnt(" -- last acquired @ %s:%d", parent->w_file, 1579 parent->w_line); 1580 } 1581 prnt("\n"); 1582 for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) 1583 for (i = 0; i < wcl->wcl_count; i++) 1584 witness_displaydescendants(prnt, 1585 wcl->wcl_children[i], indent + 1); 1586} 1587 1588#ifdef BLESSING 1589static int 1590blessed(struct witness *w1, struct witness *w2) 1591{ 1592 int i; 1593 struct witness_blessed *b; 1594 1595 for (i = 0; i < blessed_count; i++) { 1596 b = &blessed_list[i]; 1597 if (strcmp(w1->w_name, b->b_lock1) == 0) { 1598 if (strcmp(w2->w_name, b->b_lock2) == 0) 1599 return (1); 1600 continue; 1601 } 1602 if (strcmp(w1->w_name, b->b_lock2) == 0) 1603 if (strcmp(w2->w_name, b->b_lock1) == 0) 1604 return (1); 1605 } 1606 return (0); 1607} 1608#endif 1609 1610static struct witness * 1611witness_get(void) 1612{ 1613 struct witness *w; 1614 1615 if (witness_watch == 0) { 1616 mtx_unlock_spin(&w_mtx); 1617 return (NULL); 1618 } 1619 if (STAILQ_EMPTY(&w_free)) { 1620 witness_watch = 0; 1621 mtx_unlock_spin(&w_mtx); 1622 printf("%s: witness exhausted\n", __func__); 1623 return (NULL); 1624 } 1625 w = STAILQ_FIRST(&w_free); 1626 STAILQ_REMOVE_HEAD(&w_free, w_list); 1627 bzero(w, sizeof(*w)); 1628 return (w); 1629} 1630 1631static void 1632witness_free(struct witness *w) 1633{ 1634 1635 STAILQ_INSERT_HEAD(&w_free, w, w_list); 1636} 1637 1638static struct witness_child_list_entry * 1639witness_child_get(void) 1640{ 1641 struct witness_child_list_entry *wcl; 1642 1643 if (witness_watch == 0) { 1644 mtx_unlock_spin(&w_mtx); 1645 return (NULL); 1646 } 1647 wcl = w_child_free; 1648 if (wcl == NULL) { 1649 witness_watch = 0; 1650 mtx_unlock_spin(&w_mtx); 1651 printf("%s: witness exhausted\n", __func__); 1652 return (NULL); 1653 } 1654 w_child_free = wcl->wcl_next; 1655 bzero(wcl, sizeof(*wcl)); 1656 return (wcl); 1657} 1658 1659static void 1660witness_child_free(struct witness_child_list_entry *wcl) 1661{ 1662 1663 wcl->wcl_next = w_child_free; 1664 w_child_free = wcl; 1665} 1666 1667static struct lock_list_entry * 1668witness_lock_list_get(void) 1669{ 1670 struct lock_list_entry *lle; 1671 1672 if (witness_watch == 0) 1673 return (NULL); 1674 mtx_lock_spin(&w_mtx); 1675 lle = w_lock_list_free; 1676 if (lle == NULL) { 1677 witness_watch = 0; 1678 mtx_unlock_spin(&w_mtx); 1679 printf("%s: witness exhausted\n", __func__); 1680 return (NULL); 1681 } 1682 w_lock_list_free = lle->ll_next; 1683 mtx_unlock_spin(&w_mtx); 1684 bzero(lle, sizeof(*lle)); 1685 return (lle); 1686} 1687 1688static void 1689witness_lock_list_free(struct lock_list_entry *lle) 1690{ 1691 1692 mtx_lock_spin(&w_mtx); 1693 lle->ll_next = w_lock_list_free; 1694 w_lock_list_free = lle; 1695 mtx_unlock_spin(&w_mtx); 1696} 1697 1698static struct lock_instance * 1699find_instance(struct lock_list_entry *lock_list, struct lock_object *lock) 1700{ 1701 struct lock_list_entry *lle; 1702 struct lock_instance *instance; 1703 int i; 1704 1705 for (lle = lock_list; lle != NULL; lle = lle->ll_next) 1706 for (i = lle->ll_count - 1; i >= 0; i--) { 1707 instance = &lle->ll_children[i]; 1708 if (instance->li_lock == lock) 1709 return (instance); 1710 } 1711 return (NULL); 1712} 1713 1714static void 1715witness_list_lock(struct lock_instance *instance) 1716{ 1717 struct lock_object *lock; 1718 1719 lock = instance->li_lock; 1720 printf("%s %s %s", (instance->li_flags & LI_EXCLUSIVE) != 0 ? 1721 "exclusive" : "shared", lock->lo_class->lc_name, lock->lo_name); 1722 if (lock->lo_type != lock->lo_name) 1723 printf(" (%s)", lock->lo_type); 1724 printf(" r = %d (%p) locked @ %s:%d\n", 1725 instance->li_flags & LI_RECURSEMASK, lock, instance->li_file, 1726 instance->li_line); 1727} 1728 1729int 1730witness_list_locks(struct lock_list_entry **lock_list) 1731{ 1732 struct lock_list_entry *lle; 1733 int i, nheld; 1734 1735 nheld = 0; 1736 for (lle = *lock_list; lle != NULL; lle = lle->ll_next) 1737 for (i = lle->ll_count - 1; i >= 0; i--) { 1738 witness_list_lock(&lle->ll_children[i]); 1739 nheld++; 1740 } 1741 return (nheld); 1742} 1743 1744/* 1745 * This is a bit risky at best. We call this function when we have timed 1746 * out acquiring a spin lock, and we assume that the other CPU is stuck 1747 * with this lock held. So, we go groveling around in the other CPU's 1748 * per-cpu data to try to find the lock instance for this spin lock to 1749 * see when it was last acquired. 1750 */ 1751void 1752witness_display_spinlock(struct lock_object *lock, struct thread *owner) 1753{ 1754 struct lock_instance *instance; 1755 struct pcpu *pc; 1756 1757 if (owner->td_critnest == 0 || owner->td_oncpu == NOCPU) 1758 return; 1759 pc = pcpu_find(owner->td_oncpu); 1760 instance = find_instance(pc->pc_spinlocks, lock); 1761 if (instance != NULL) 1762 witness_list_lock(instance); 1763} 1764 1765void 1766witness_save(struct lock_object *lock, const char **filep, int *linep) 1767{ 1768 struct lock_instance *instance; 1769 1770 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1771 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1772 return; 1773 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0) 1774 panic("%s: lock (%s) %s is not a sleep lock", __func__, 1775 lock->lo_class->lc_name, lock->lo_name); 1776 instance = find_instance(curthread->td_sleeplocks, lock); 1777 if (instance == NULL) 1778 panic("%s: lock (%s) %s not locked", __func__, 1779 lock->lo_class->lc_name, lock->lo_name); 1780 *filep = instance->li_file; 1781 *linep = instance->li_line; 1782} 1783 1784void 1785witness_restore(struct lock_object *lock, const char *file, int line) 1786{ 1787 struct lock_instance *instance; 1788 1789 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1790 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1791 return; 1792 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0) 1793 panic("%s: lock (%s) %s is not a sleep lock", __func__, 1794 lock->lo_class->lc_name, lock->lo_name); 1795 instance = find_instance(curthread->td_sleeplocks, lock); 1796 if (instance == NULL) 1797 panic("%s: lock (%s) %s not locked", __func__, 1798 lock->lo_class->lc_name, lock->lo_name); 1799 lock->lo_witness->w_file = file; 1800 lock->lo_witness->w_line = line; 1801 instance->li_file = file; 1802 instance->li_line = line; 1803} 1804 1805void 1806witness_assert(struct lock_object *lock, int flags, const char *file, int line) 1807{ 1808#ifdef INVARIANT_SUPPORT 1809 struct lock_instance *instance; 1810 1811 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1812 return; 1813 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) != 0) 1814 instance = find_instance(curthread->td_sleeplocks, lock); 1815 else if ((lock->lo_class->lc_flags & LC_SPINLOCK) != 0) 1816 instance = find_instance(PCPU_GET(spinlocks), lock); 1817 else { 1818 panic("Lock (%s) %s is not sleep or spin!", 1819 lock->lo_class->lc_name, lock->lo_name); 1820 } 1821 file = fixup_filename(file); 1822 switch (flags) { 1823 case LA_UNLOCKED: 1824 if (instance != NULL) 1825 panic("Lock (%s) %s locked @ %s:%d.", 1826 lock->lo_class->lc_name, lock->lo_name, file, line); 1827 break; 1828 case LA_LOCKED: 1829 case LA_LOCKED | LA_RECURSED: 1830 case LA_LOCKED | LA_NOTRECURSED: 1831 case LA_SLOCKED: 1832 case LA_SLOCKED | LA_RECURSED: 1833 case LA_SLOCKED | LA_NOTRECURSED: 1834 case LA_XLOCKED: 1835 case LA_XLOCKED | LA_RECURSED: 1836 case LA_XLOCKED | LA_NOTRECURSED: 1837 if (instance == NULL) { 1838 panic("Lock (%s) %s not locked @ %s:%d.", 1839 lock->lo_class->lc_name, lock->lo_name, file, line); 1840 break; 1841 } 1842 if ((flags & LA_XLOCKED) != 0 && 1843 (instance->li_flags & LI_EXCLUSIVE) == 0) 1844 panic("Lock (%s) %s not exclusively locked @ %s:%d.", 1845 lock->lo_class->lc_name, lock->lo_name, file, line); 1846 if ((flags & LA_SLOCKED) != 0 && 1847 (instance->li_flags & LI_EXCLUSIVE) != 0) 1848 panic("Lock (%s) %s exclusively locked @ %s:%d.", 1849 lock->lo_class->lc_name, lock->lo_name, file, line); 1850 if ((flags & LA_RECURSED) != 0 && 1851 (instance->li_flags & LI_RECURSEMASK) == 0) 1852 panic("Lock (%s) %s not recursed @ %s:%d.", 1853 lock->lo_class->lc_name, lock->lo_name, file, line); 1854 if ((flags & LA_NOTRECURSED) != 0 && 1855 (instance->li_flags & LI_RECURSEMASK) != 0) 1856 panic("Lock (%s) %s recursed @ %s:%d.", 1857 lock->lo_class->lc_name, lock->lo_name, file, line); 1858 break; 1859 default: 1860 panic("Invalid lock assertion at %s:%d.", file, line); 1861 1862 } 1863#endif /* INVARIANT_SUPPORT */ 1864} 1865 1866#ifdef DDB 1867static void 1868witness_list(struct thread *td) 1869{ 1870 1871 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1872 KASSERT(kdb_active, ("%s: not in the debugger", __func__)); 1873 1874 if (witness_watch == 0) 1875 return; 1876 1877 witness_list_locks(&td->td_sleeplocks); 1878 1879 /* 1880 * We only handle spinlocks if td == curthread. This is somewhat broken 1881 * if td is currently executing on some other CPU and holds spin locks 1882 * as we won't display those locks. If we had a MI way of getting 1883 * the per-cpu data for a given cpu then we could use 1884 * td->td_oncpu to get the list of spinlocks for this thread 1885 * and "fix" this. 1886 * 1887 * That still wouldn't really fix this unless we locked sched_lock 1888 * or stopped the other CPU to make sure it wasn't changing the list 1889 * out from under us. It is probably best to just not try to handle 1890 * threads on other CPU's for now. 1891 */ 1892 if (td == curthread && PCPU_GET(spinlocks) != NULL) 1893 witness_list_locks(PCPU_PTR(spinlocks)); 1894} 1895 1896DB_SHOW_COMMAND(locks, db_witness_list) 1897{ 1898 struct thread *td; 1899 pid_t pid; 1900 struct proc *p; 1901 1902 if (have_addr) { 1903 pid = (addr % 16) + ((addr >> 4) % 16) * 10 + 1904 ((addr >> 8) % 16) * 100 + ((addr >> 12) % 16) * 1000 + 1905 ((addr >> 16) % 16) * 10000; 1906 /* sx_slock(&allproc_lock); */ 1907 FOREACH_PROC_IN_SYSTEM(p) { 1908 if (p->p_pid == pid) 1909 break; 1910 } 1911 /* sx_sunlock(&allproc_lock); */ 1912 if (p == NULL) { 1913 db_printf("pid %d not found\n", pid); 1914 return; 1915 } 1916 FOREACH_THREAD_IN_PROC(p, td) { 1917 witness_list(td); 1918 } 1919 } else { 1920 td = curthread; 1921 witness_list(td); 1922 } 1923} 1924 1925DB_SHOW_COMMAND(witness, db_witness_display) 1926{ 1927 1928 witness_display(db_printf); 1929} 1930#endif 1931