subr_witness.c revision 131884
1/*- 2 * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 3. Berkeley Software Design Inc's name may not be used to endorse or 13 * promote products derived from this software without specific prior 14 * written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $ 29 * and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $ 30 */ 31 32/* 33 * Implementation of the `witness' lock verifier. Originally implemented for 34 * mutexes in BSD/OS. Extended to handle generic lock objects and lock 35 * classes in FreeBSD. 36 */ 37 38/* 39 * Main Entry: witness 40 * Pronunciation: 'wit-n&s 41 * Function: noun 42 * Etymology: Middle English witnesse, from Old English witnes knowledge, 43 * testimony, witness, from 2wit 44 * Date: before 12th century 45 * 1 : attestation of a fact or event : TESTIMONY 46 * 2 : one that gives evidence; specifically : one who testifies in 47 * a cause or before a judicial tribunal 48 * 3 : one asked to be present at a transaction so as to be able to 49 * testify to its having taken place 50 * 4 : one who has personal knowledge of something 51 * 5 a : something serving as evidence or proof : SIGN 52 * b : public affirmation by word or example of usually 53 * religious faith or conviction <the heroic witness to divine 54 * life -- Pilot> 55 * 6 capitalized : a member of the Jehovah's Witnesses 56 */ 57 58/* 59 * Special rules concerning Giant and lock orders: 60 * 61 * 1) Giant must be acquired before any other mutexes. Stated another way, 62 * no other mutex may be held when Giant is acquired. 63 * 64 * 2) Giant must be released when blocking on a sleepable lock. 65 * 66 * This rule is less obvious, but is a result of Giant providing the same 67 * semantics as spl(). Basically, when a thread sleeps, it must release 68 * Giant. When a thread blocks on a sleepable lock, it sleeps. Hence rule 69 * 2). 70 * 71 * 3) Giant may be acquired before or after sleepable locks. 72 * 73 * This rule is also not quite as obvious. Giant may be acquired after 74 * a sleepable lock because it is a non-sleepable lock and non-sleepable 75 * locks may always be acquired while holding a sleepable lock. The second 76 * case, Giant before a sleepable lock, follows from rule 2) above. Suppose 77 * you have two threads T1 and T2 and a sleepable lock X. Suppose that T1 78 * acquires X and blocks on Giant. Then suppose that T2 acquires Giant and 79 * blocks on X. When T2 blocks on X, T2 will release Giant allowing T1 to 80 * execute. Thus, acquiring Giant both before and after a sleepable lock 81 * will not result in a lock order reversal. 82 */ 83 84#include <sys/cdefs.h> 85__FBSDID("$FreeBSD: head/sys/kern/subr_witness.c 131884 2004-07-09 17:46:27Z jhb $"); 86 87#include "opt_ddb.h" 88#include "opt_witness.h" 89 90#include <sys/param.h> 91#include <sys/bus.h> 92#include <sys/kernel.h> 93#include <sys/ktr.h> 94#include <sys/lock.h> 95#include <sys/malloc.h> 96#include <sys/mutex.h> 97#include <sys/proc.h> 98#include <sys/sysctl.h> 99#include <sys/systm.h> 100 101#include <ddb/ddb.h> 102 103#include <machine/stdarg.h> 104 105/* Define this to check for blessed mutexes */ 106#undef BLESSING 107 108#define WITNESS_COUNT 200 109#define WITNESS_CHILDCOUNT (WITNESS_COUNT * 4) 110/* 111 * XXX: This is somewhat bogus, as we assume here that at most 1024 threads 112 * will hold LOCK_NCHILDREN * 2 locks. We handle failure ok, and we should 113 * probably be safe for the most part, but it's still a SWAG. 114 */ 115#define LOCK_CHILDCOUNT (MAXCPU + 1024) * 2 116 117#define WITNESS_NCHILDREN 6 118 119struct witness_child_list_entry; 120 121struct witness { 122 const char *w_name; 123 struct lock_class *w_class; 124 STAILQ_ENTRY(witness) w_list; /* List of all witnesses. */ 125 STAILQ_ENTRY(witness) w_typelist; /* Witnesses of a type. */ 126 struct witness_child_list_entry *w_children; /* Great evilness... */ 127 const char *w_file; 128 int w_line; 129 u_int w_level; 130 u_int w_refcount; 131 u_char w_Giant_squawked:1; 132 u_char w_other_squawked:1; 133 u_char w_same_squawked:1; 134 u_char w_displayed:1; 135}; 136 137struct witness_child_list_entry { 138 struct witness_child_list_entry *wcl_next; 139 struct witness *wcl_children[WITNESS_NCHILDREN]; 140 u_int wcl_count; 141}; 142 143STAILQ_HEAD(witness_list, witness); 144 145#ifdef BLESSING 146struct witness_blessed { 147 const char *b_lock1; 148 const char *b_lock2; 149}; 150#endif 151 152struct witness_order_list_entry { 153 const char *w_name; 154 struct lock_class *w_class; 155}; 156 157#ifdef BLESSING 158static int blessed(struct witness *, struct witness *); 159#endif 160static int depart(struct witness *w); 161static struct witness *enroll(const char *description, 162 struct lock_class *lock_class); 163static int insertchild(struct witness *parent, struct witness *child); 164static int isitmychild(struct witness *parent, struct witness *child); 165static int isitmydescendant(struct witness *parent, struct witness *child); 166static int itismychild(struct witness *parent, struct witness *child); 167static int rebalancetree(struct witness_list *list); 168static void removechild(struct witness *parent, struct witness *child); 169static int reparentchildren(struct witness *newparent, 170 struct witness *oldparent); 171static int sysctl_debug_witness_watch(SYSCTL_HANDLER_ARGS); 172static void witness_displaydescendants(void(*)(const char *fmt, ...), 173 struct witness *, int indent); 174static const char *fixup_filename(const char *file); 175static void witness_leveldescendents(struct witness *parent, int level); 176static void witness_levelall(void); 177static struct witness *witness_get(void); 178static void witness_free(struct witness *m); 179static struct witness_child_list_entry *witness_child_get(void); 180static void witness_child_free(struct witness_child_list_entry *wcl); 181static struct lock_list_entry *witness_lock_list_get(void); 182static void witness_lock_list_free(struct lock_list_entry *lle); 183static struct lock_instance *find_instance(struct lock_list_entry *lock_list, 184 struct lock_object *lock); 185static void witness_list_lock(struct lock_instance *instance); 186#ifdef DDB 187static void witness_list(struct thread *td); 188static void witness_display_list(void(*prnt)(const char *fmt, ...), 189 struct witness_list *list); 190static void witness_display(void(*)(const char *fmt, ...)); 191#endif 192 193MALLOC_DEFINE(M_WITNESS, "witness", "witness structure"); 194 195/* 196 * If set to 0, witness is disabled. If set to 1, witness performs full lock 197 * order checking for all locks. If set to 2 or higher, then witness skips 198 * the full lock order check if the lock being acquired is at a higher level 199 * (i.e. farther down in the tree) than the current lock. This last mode is 200 * somewhat experimental and not considered fully safe. At runtime, this 201 * value may be set to 0 to turn off witness. witness is not allowed be 202 * turned on once it is turned off, however. 203 */ 204static int witness_watch = 1; 205TUNABLE_INT("debug.witness_watch", &witness_watch); 206SYSCTL_PROC(_debug, OID_AUTO, witness_watch, CTLFLAG_RW | CTLTYPE_INT, NULL, 0, 207 sysctl_debug_witness_watch, "I", "witness is watching lock operations"); 208 209#ifdef DDB 210/* 211 * When DDB is enabled and witness_ddb is set to 1, it will cause the system to 212 * drop into kdebug() when: 213 * - a lock heirarchy violation occurs 214 * - locks are held when going to sleep. 215 */ 216#ifdef WITNESS_DDB 217int witness_ddb = 1; 218#else 219int witness_ddb = 0; 220#endif 221TUNABLE_INT("debug.witness_ddb", &witness_ddb); 222SYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, ""); 223 224/* 225 * When DDB is enabled and witness_trace is set to 1, it will cause the system 226 * to print a stack trace: 227 * - a lock heirarchy violation occurs 228 * - locks are held when going to sleep. 229 */ 230int witness_trace = 1; 231TUNABLE_INT("debug.witness_trace", &witness_trace); 232SYSCTL_INT(_debug, OID_AUTO, witness_trace, CTLFLAG_RW, &witness_trace, 0, ""); 233#endif /* DDB */ 234 235#ifdef WITNESS_SKIPSPIN 236int witness_skipspin = 1; 237#else 238int witness_skipspin = 0; 239#endif 240TUNABLE_INT("debug.witness_skipspin", &witness_skipspin); 241SYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RDTUN, &witness_skipspin, 0, 242 ""); 243 244static struct mtx w_mtx; 245static struct witness_list w_free = STAILQ_HEAD_INITIALIZER(w_free); 246static struct witness_list w_all = STAILQ_HEAD_INITIALIZER(w_all); 247static struct witness_list w_spin = STAILQ_HEAD_INITIALIZER(w_spin); 248static struct witness_list w_sleep = STAILQ_HEAD_INITIALIZER(w_sleep); 249static struct witness_child_list_entry *w_child_free = NULL; 250static struct lock_list_entry *w_lock_list_free = NULL; 251 252static struct witness w_data[WITNESS_COUNT]; 253static struct witness_child_list_entry w_childdata[WITNESS_CHILDCOUNT]; 254static struct lock_list_entry w_locklistdata[LOCK_CHILDCOUNT]; 255 256static struct witness_order_list_entry order_lists[] = { 257 { "proctree", &lock_class_sx }, 258 { "allproc", &lock_class_sx }, 259 { "Giant", &lock_class_mtx_sleep }, 260 { "filedesc structure", &lock_class_mtx_sleep }, 261 { "pipe mutex", &lock_class_mtx_sleep }, 262 { "sigio lock", &lock_class_mtx_sleep }, 263 { "process group", &lock_class_mtx_sleep }, 264 { "process lock", &lock_class_mtx_sleep }, 265 { "session", &lock_class_mtx_sleep }, 266 { "uidinfo hash", &lock_class_mtx_sleep }, 267 { "uidinfo struct", &lock_class_mtx_sleep }, 268 { "allprison", &lock_class_mtx_sleep }, 269 { NULL, NULL }, 270 /* 271 * Sockets 272 */ 273 { "filedesc structure", &lock_class_mtx_sleep }, 274 { "accept", &lock_class_mtx_sleep }, 275 { "so_snd", &lock_class_mtx_sleep }, 276 { "so_rcv", &lock_class_mtx_sleep }, 277 { "sellck", &lock_class_mtx_sleep }, 278 { NULL, NULL }, 279 /* 280 * Routing 281 */ 282 { "so_rcv", &lock_class_mtx_sleep }, 283 { "radix node head", &lock_class_mtx_sleep }, 284 { "rtentry", &lock_class_mtx_sleep }, 285 { "ifaddr", &lock_class_mtx_sleep }, 286 { NULL, NULL }, 287 /* 288 * UNIX Domain Sockets 289 */ 290 { "unp", &lock_class_mtx_sleep }, 291 { "so_snd", &lock_class_mtx_sleep }, 292 { NULL, NULL }, 293 /* 294 * UDP/IP 295 */ 296 { "udp", &lock_class_mtx_sleep }, 297 { "udpinp", &lock_class_mtx_sleep }, 298 { "so_snd", &lock_class_mtx_sleep }, 299 { NULL, NULL }, 300 /* 301 * TCP/IP 302 */ 303 { "tcp", &lock_class_mtx_sleep }, 304 { "tcpinp", &lock_class_mtx_sleep }, 305 { "so_snd", &lock_class_mtx_sleep }, 306 { NULL, NULL }, 307 /* 308 * SLIP 309 */ 310 { "slip_mtx", &lock_class_mtx_sleep }, 311 { "slip sc_mtx", &lock_class_mtx_sleep }, 312 { NULL, NULL }, 313 /* 314 * spin locks 315 */ 316#ifdef SMP 317 { "ap boot", &lock_class_mtx_spin }, 318#endif 319 { "sio", &lock_class_mtx_spin }, 320#ifdef __i386__ 321 { "cy", &lock_class_mtx_spin }, 322#endif 323 { "uart_hwmtx", &lock_class_mtx_spin }, 324 { "sabtty", &lock_class_mtx_spin }, 325 { "zstty", &lock_class_mtx_spin }, 326 { "ng_node", &lock_class_mtx_spin }, 327 { "ng_worklist", &lock_class_mtx_spin }, 328 { "taskqueue_fast", &lock_class_mtx_spin }, 329 { "intr table", &lock_class_mtx_spin }, 330 { "ithread table lock", &lock_class_mtx_spin }, 331 { "sleepq chain", &lock_class_mtx_spin }, 332 { "sched lock", &lock_class_mtx_spin }, 333 { "turnstile chain", &lock_class_mtx_spin }, 334 { "td_contested", &lock_class_mtx_spin }, 335 { "callout", &lock_class_mtx_spin }, 336 { "entropy harvest", &lock_class_mtx_spin }, 337 { "entropy harvest buffers", &lock_class_mtx_spin }, 338 /* 339 * leaf locks 340 */ 341 { "allpmaps", &lock_class_mtx_spin }, 342 { "vm page queue free mutex", &lock_class_mtx_spin }, 343 { "icu", &lock_class_mtx_spin }, 344#ifdef SMP 345 { "smp rendezvous", &lock_class_mtx_spin }, 346#if defined(__i386__) || defined(__amd64__) 347 { "tlb", &lock_class_mtx_spin }, 348 { "lazypmap", &lock_class_mtx_spin }, 349#endif 350#ifdef __sparc64__ 351 { "ipi", &lock_class_mtx_spin }, 352#endif 353#endif 354 { "clk", &lock_class_mtx_spin }, 355 { "mutex profiling lock", &lock_class_mtx_spin }, 356 { "kse zombie lock", &lock_class_mtx_spin }, 357 { "ALD Queue", &lock_class_mtx_spin }, 358#ifdef __ia64__ 359 { "MCA spin lock", &lock_class_mtx_spin }, 360#endif 361#if defined(__i386__) || defined(__amd64__) 362 { "pcicfg", &lock_class_mtx_spin }, 363#endif 364 { NULL, NULL }, 365 { NULL, NULL } 366}; 367 368#ifdef BLESSING 369/* 370 * Pairs of locks which have been blessed 371 * Don't complain about order problems with blessed locks 372 */ 373static struct witness_blessed blessed_list[] = { 374}; 375static int blessed_count = 376 sizeof(blessed_list) / sizeof(struct witness_blessed); 377#endif 378 379/* 380 * List of all locks in the system. 381 */ 382TAILQ_HEAD(, lock_object) all_locks = TAILQ_HEAD_INITIALIZER(all_locks); 383 384static struct mtx all_mtx = { 385 { &lock_class_mtx_sleep, /* mtx_object.lo_class */ 386 "All locks list", /* mtx_object.lo_name */ 387 "All locks list", /* mtx_object.lo_type */ 388 LO_INITIALIZED, /* mtx_object.lo_flags */ 389 { NULL, NULL }, /* mtx_object.lo_list */ 390 NULL }, /* mtx_object.lo_witness */ 391 MTX_UNOWNED, 0 /* mtx_lock, mtx_recurse */ 392}; 393 394/* 395 * This global is set to 0 once it becomes safe to use the witness code. 396 */ 397static int witness_cold = 1; 398 399/* 400 * Global variables for book keeping. 401 */ 402static int lock_cur_cnt; 403static int lock_max_cnt; 404 405/* 406 * The WITNESS-enabled diagnostic code. 407 */ 408static void 409witness_initialize(void *dummy __unused) 410{ 411 struct lock_object *lock; 412 struct witness_order_list_entry *order; 413 struct witness *w, *w1; 414 int i; 415 416 /* 417 * We have to release Giant before initializing its witness 418 * structure so that WITNESS doesn't get confused. 419 */ 420 mtx_unlock(&Giant); 421 mtx_assert(&Giant, MA_NOTOWNED); 422 423 CTR1(KTR_WITNESS, "%s: initializing witness", __func__); 424 TAILQ_INSERT_HEAD(&all_locks, &all_mtx.mtx_object, lo_list); 425 mtx_init(&w_mtx, "witness lock", NULL, MTX_SPIN | MTX_QUIET | 426 MTX_NOWITNESS); 427 for (i = 0; i < WITNESS_COUNT; i++) 428 witness_free(&w_data[i]); 429 for (i = 0; i < WITNESS_CHILDCOUNT; i++) 430 witness_child_free(&w_childdata[i]); 431 for (i = 0; i < LOCK_CHILDCOUNT; i++) 432 witness_lock_list_free(&w_locklistdata[i]); 433 434 /* First add in all the specified order lists. */ 435 for (order = order_lists; order->w_name != NULL; order++) { 436 w = enroll(order->w_name, order->w_class); 437 if (w == NULL) 438 continue; 439 w->w_file = "order list"; 440 for (order++; order->w_name != NULL; order++) { 441 w1 = enroll(order->w_name, order->w_class); 442 if (w1 == NULL) 443 continue; 444 w1->w_file = "order list"; 445 if (!itismychild(w, w1)) 446 panic("Not enough memory for static orders!"); 447 w = w1; 448 } 449 } 450 451 /* Iterate through all locks and add them to witness. */ 452 mtx_lock(&all_mtx); 453 TAILQ_FOREACH(lock, &all_locks, lo_list) { 454 if (lock->lo_flags & LO_WITNESS) 455 lock->lo_witness = enroll(lock->lo_type, 456 lock->lo_class); 457 else 458 lock->lo_witness = NULL; 459 } 460 mtx_unlock(&all_mtx); 461 462 /* Mark the witness code as being ready for use. */ 463 atomic_store_rel_int(&witness_cold, 0); 464 465 mtx_lock(&Giant); 466} 467SYSINIT(witness_init, SI_SUB_WITNESS, SI_ORDER_FIRST, witness_initialize, NULL) 468 469static int 470sysctl_debug_witness_watch(SYSCTL_HANDLER_ARGS) 471{ 472 int error, value; 473 474 value = witness_watch; 475 error = sysctl_handle_int(oidp, &value, 0, req); 476 if (error != 0 || req->newptr == NULL) 477 return (error); 478 error = suser(req->td); 479 if (error != 0) 480 return (error); 481 if (value == witness_watch) 482 return (0); 483 if (value != 0) 484 return (EINVAL); 485 witness_watch = 0; 486 return (0); 487} 488 489void 490witness_init(struct lock_object *lock) 491{ 492 struct lock_class *class; 493 494 class = lock->lo_class; 495 if (lock->lo_flags & LO_INITIALIZED) 496 panic("%s: lock (%s) %s is already initialized", __func__, 497 class->lc_name, lock->lo_name); 498 if ((lock->lo_flags & LO_RECURSABLE) != 0 && 499 (class->lc_flags & LC_RECURSABLE) == 0) 500 panic("%s: lock (%s) %s can not be recursable", __func__, 501 class->lc_name, lock->lo_name); 502 if ((lock->lo_flags & LO_SLEEPABLE) != 0 && 503 (class->lc_flags & LC_SLEEPABLE) == 0) 504 panic("%s: lock (%s) %s can not be sleepable", __func__, 505 class->lc_name, lock->lo_name); 506 if ((lock->lo_flags & LO_UPGRADABLE) != 0 && 507 (class->lc_flags & LC_UPGRADABLE) == 0) 508 panic("%s: lock (%s) %s can not be upgradable", __func__, 509 class->lc_name, lock->lo_name); 510 511 mtx_lock(&all_mtx); 512 TAILQ_INSERT_TAIL(&all_locks, lock, lo_list); 513 lock->lo_flags |= LO_INITIALIZED; 514 lock_cur_cnt++; 515 if (lock_cur_cnt > lock_max_cnt) 516 lock_max_cnt = lock_cur_cnt; 517 mtx_unlock(&all_mtx); 518 if (!witness_cold && witness_watch != 0 && panicstr == NULL && 519 (lock->lo_flags & LO_WITNESS) != 0) 520 lock->lo_witness = enroll(lock->lo_type, class); 521 else 522 lock->lo_witness = NULL; 523} 524 525void 526witness_destroy(struct lock_object *lock) 527{ 528 struct witness *w; 529 530 if (witness_cold) 531 panic("lock (%s) %s destroyed while witness_cold", 532 lock->lo_class->lc_name, lock->lo_name); 533 if ((lock->lo_flags & LO_INITIALIZED) == 0) 534 panic("%s: lock (%s) %s is not initialized", __func__, 535 lock->lo_class->lc_name, lock->lo_name); 536 537 /* XXX: need to verify that no one holds the lock */ 538 w = lock->lo_witness; 539 if (w != NULL) { 540 mtx_lock_spin(&w_mtx); 541 MPASS(w->w_refcount > 0); 542 w->w_refcount--; 543 544 /* 545 * Lock is already released if we have an allocation failure 546 * and depart() fails. 547 */ 548 if (w->w_refcount != 0 || depart(w)) 549 mtx_unlock_spin(&w_mtx); 550 } 551 552 mtx_lock(&all_mtx); 553 lock_cur_cnt--; 554 TAILQ_REMOVE(&all_locks, lock, lo_list); 555 lock->lo_flags &= ~LO_INITIALIZED; 556 mtx_unlock(&all_mtx); 557} 558 559#ifdef DDB 560static void 561witness_display_list(void(*prnt)(const char *fmt, ...), 562 struct witness_list *list) 563{ 564 struct witness *w; 565 566 STAILQ_FOREACH(w, list, w_typelist) { 567 if (w->w_file == NULL || w->w_level > 0) 568 continue; 569 /* 570 * This lock has no anscestors, display its descendants. 571 */ 572 witness_displaydescendants(prnt, w, 0); 573 } 574} 575 576static void 577witness_display(void(*prnt)(const char *fmt, ...)) 578{ 579 struct witness *w; 580 581 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 582 witness_levelall(); 583 584 /* Clear all the displayed flags. */ 585 STAILQ_FOREACH(w, &w_all, w_list) { 586 w->w_displayed = 0; 587 } 588 589 /* 590 * First, handle sleep locks which have been acquired at least 591 * once. 592 */ 593 prnt("Sleep locks:\n"); 594 witness_display_list(prnt, &w_sleep); 595 596 /* 597 * Now do spin locks which have been acquired at least once. 598 */ 599 prnt("\nSpin locks:\n"); 600 witness_display_list(prnt, &w_spin); 601 602 /* 603 * Finally, any locks which have not been acquired yet. 604 */ 605 prnt("\nLocks which were never acquired:\n"); 606 STAILQ_FOREACH(w, &w_all, w_list) { 607 if (w->w_file != NULL || w->w_refcount == 0) 608 continue; 609 prnt("%s\n", w->w_name); 610 } 611} 612#endif /* DDB */ 613 614/* Trim useless garbage from filenames. */ 615static const char * 616fixup_filename(const char *file) 617{ 618 619 if (file == NULL) 620 return (NULL); 621 while (strncmp(file, "../", 3) == 0) 622 file += 3; 623 return (file); 624} 625 626int 627witness_defineorder(struct lock_object *lock1, struct lock_object *lock2) 628{ 629 630 if (witness_watch == 0 || panicstr != NULL) 631 return (0); 632 633 /* Require locks that witness knows about. */ 634 if (lock1 == NULL || lock1->lo_witness == NULL || lock2 == NULL || 635 lock2->lo_witness == NULL) 636 return (EINVAL); 637 638 MPASS(!mtx_owned(&w_mtx)); 639 mtx_lock_spin(&w_mtx); 640 641 /* 642 * If we already have either an explicit or implied lock order that 643 * is the other way around, then return an error. 644 */ 645 if (isitmydescendant(lock2->lo_witness, lock1->lo_witness)) { 646 mtx_unlock_spin(&w_mtx); 647 return (EDOOFUS); 648 } 649 650 /* Try to add the new order. */ 651 CTR3(KTR_WITNESS, "%s: adding %s as a child of %s", __func__, 652 lock2->lo_type, lock1->lo_type); 653 if (!itismychild(lock1->lo_witness, lock2->lo_witness)) 654 return (ENOMEM); 655 mtx_unlock_spin(&w_mtx); 656 return (0); 657} 658 659void 660witness_checkorder(struct lock_object *lock, int flags, const char *file, 661 int line) 662{ 663 struct lock_list_entry **lock_list, *lle; 664 struct lock_instance *lock1, *lock2; 665 struct lock_class *class; 666 struct witness *w, *w1; 667 struct thread *td; 668 int i, j; 669 670 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL || 671 panicstr != NULL) 672 return; 673 674 /* 675 * Try locks do not block if they fail to acquire the lock, thus 676 * there is no danger of deadlocks or of switching while holding a 677 * spin lock if we acquire a lock via a try operation. This 678 * function shouldn't even be called for try locks, so panic if 679 * that happens. 680 */ 681 if (flags & LOP_TRYLOCK) 682 panic("%s should not be called for try lock operations", 683 __func__); 684 685 w = lock->lo_witness; 686 class = lock->lo_class; 687 td = curthread; 688 file = fixup_filename(file); 689 690 if (class->lc_flags & LC_SLEEPLOCK) { 691 /* 692 * Since spin locks include a critical section, this check 693 * implicitly enforces a lock order of all sleep locks before 694 * all spin locks. 695 */ 696 if (td->td_critnest != 0) 697 panic("blockable sleep lock (%s) %s @ %s:%d", 698 class->lc_name, lock->lo_name, file, line); 699 700 /* 701 * If this is the first lock acquired then just return as 702 * no order checking is needed. 703 */ 704 if (td->td_sleeplocks == NULL) 705 return; 706 lock_list = &td->td_sleeplocks; 707 } else { 708 /* 709 * If this is the first lock, just return as no order 710 * checking is needed. We check this in both if clauses 711 * here as unifying the check would require us to use a 712 * critical section to ensure we don't migrate while doing 713 * the check. Note that if this is not the first lock, we 714 * are already in a critical section and are safe for the 715 * rest of the check. 716 */ 717 if (PCPU_GET(spinlocks) == NULL) 718 return; 719 lock_list = PCPU_PTR(spinlocks); 720 } 721 722 /* 723 * Check to see if we are recursing on a lock we already own. If 724 * so, make sure that we don't mismatch exclusive and shared lock 725 * acquires. 726 */ 727 lock1 = find_instance(*lock_list, lock); 728 if (lock1 != NULL) { 729 if ((lock1->li_flags & LI_EXCLUSIVE) != 0 && 730 (flags & LOP_EXCLUSIVE) == 0) { 731 printf("shared lock of (%s) %s @ %s:%d\n", 732 class->lc_name, lock->lo_name, file, line); 733 printf("while exclusively locked from %s:%d\n", 734 lock1->li_file, lock1->li_line); 735 panic("share->excl"); 736 } 737 if ((lock1->li_flags & LI_EXCLUSIVE) == 0 && 738 (flags & LOP_EXCLUSIVE) != 0) { 739 printf("exclusive lock of (%s) %s @ %s:%d\n", 740 class->lc_name, lock->lo_name, file, line); 741 printf("while share locked from %s:%d\n", 742 lock1->li_file, lock1->li_line); 743 panic("excl->share"); 744 } 745 return; 746 } 747 748 /* 749 * Try locks do not block if they fail to acquire the lock, thus 750 * there is no danger of deadlocks or of switching while holding a 751 * spin lock if we acquire a lock via a try operation. 752 */ 753 if (flags & LOP_TRYLOCK) 754 return; 755 756 /* 757 * Check for duplicate locks of the same type. Note that we only 758 * have to check for this on the last lock we just acquired. Any 759 * other cases will be caught as lock order violations. 760 */ 761 lock1 = &(*lock_list)->ll_children[(*lock_list)->ll_count - 1]; 762 w1 = lock1->li_lock->lo_witness; 763 if (w1 == w) { 764 if (w->w_same_squawked || (lock->lo_flags & LO_DUPOK)) 765 return; 766 w->w_same_squawked = 1; 767 printf("acquiring duplicate lock of same type: \"%s\"\n", 768 lock->lo_type); 769 printf(" 1st %s @ %s:%d\n", lock1->li_lock->lo_name, 770 lock1->li_file, lock1->li_line); 771 printf(" 2nd %s @ %s:%d\n", lock->lo_name, file, line); 772#ifdef DDB 773 goto debugger; 774#else 775 return; 776#endif 777 } 778 MPASS(!mtx_owned(&w_mtx)); 779 mtx_lock_spin(&w_mtx); 780 /* 781 * If we have a known higher number just say ok 782 */ 783 if (witness_watch > 1 && w->w_level > w1->w_level) { 784 mtx_unlock_spin(&w_mtx); 785 return; 786 } 787 /* 788 * If we know that the the lock we are acquiring comes after 789 * the lock we most recently acquired in the lock order tree, 790 * then there is no need for any further checks. 791 */ 792 if (isitmydescendant(w1, w)) { 793 mtx_unlock_spin(&w_mtx); 794 return; 795 } 796 for (j = 0, lle = *lock_list; lle != NULL; lle = lle->ll_next) { 797 for (i = lle->ll_count - 1; i >= 0; i--, j++) { 798 799 MPASS(j < WITNESS_COUNT); 800 lock1 = &lle->ll_children[i]; 801 w1 = lock1->li_lock->lo_witness; 802 803 /* 804 * If this lock doesn't undergo witness checking, 805 * then skip it. 806 */ 807 if (w1 == NULL) { 808 KASSERT((lock1->li_lock->lo_flags & LO_WITNESS) == 0, 809 ("lock missing witness structure")); 810 continue; 811 } 812 /* 813 * If we are locking Giant and this is a sleepable 814 * lock, then skip it. 815 */ 816 if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0 && 817 lock == &Giant.mtx_object) 818 continue; 819 /* 820 * If we are locking a sleepable lock and this lock 821 * is Giant, then skip it. 822 */ 823 if ((lock->lo_flags & LO_SLEEPABLE) != 0 && 824 lock1->li_lock == &Giant.mtx_object) 825 continue; 826 /* 827 * If we are locking a sleepable lock and this lock 828 * isn't sleepable, we want to treat it as a lock 829 * order violation to enfore a general lock order of 830 * sleepable locks before non-sleepable locks. 831 */ 832 if (!((lock->lo_flags & LO_SLEEPABLE) != 0 && 833 (lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0)) 834 /* 835 * Check the lock order hierarchy for a reveresal. 836 */ 837 if (!isitmydescendant(w, w1)) 838 continue; 839 /* 840 * We have a lock order violation, check to see if it 841 * is allowed or has already been yelled about. 842 */ 843 mtx_unlock_spin(&w_mtx); 844#ifdef BLESSING 845 /* 846 * If the lock order is blessed, just bail. We don't 847 * look for other lock order violations though, which 848 * may be a bug. 849 */ 850 if (blessed(w, w1)) 851 return; 852#endif 853 if (lock1->li_lock == &Giant.mtx_object) { 854 if (w1->w_Giant_squawked) 855 return; 856 else 857 w1->w_Giant_squawked = 1; 858 } else { 859 if (w1->w_other_squawked) 860 return; 861 else 862 w1->w_other_squawked = 1; 863 } 864 /* 865 * Ok, yell about it. 866 */ 867 printf("lock order reversal\n"); 868 /* 869 * Try to locate an earlier lock with 870 * witness w in our list. 871 */ 872 do { 873 lock2 = &lle->ll_children[i]; 874 MPASS(lock2->li_lock != NULL); 875 if (lock2->li_lock->lo_witness == w) 876 break; 877 if (i == 0 && lle->ll_next != NULL) { 878 lle = lle->ll_next; 879 i = lle->ll_count - 1; 880 MPASS(i >= 0 && i < LOCK_NCHILDREN); 881 } else 882 i--; 883 } while (i >= 0); 884 if (i < 0) { 885 printf(" 1st %p %s (%s) @ %s:%d\n", 886 lock1->li_lock, lock1->li_lock->lo_name, 887 lock1->li_lock->lo_type, lock1->li_file, 888 lock1->li_line); 889 printf(" 2nd %p %s (%s) @ %s:%d\n", lock, 890 lock->lo_name, lock->lo_type, file, line); 891 } else { 892 printf(" 1st %p %s (%s) @ %s:%d\n", 893 lock2->li_lock, lock2->li_lock->lo_name, 894 lock2->li_lock->lo_type, lock2->li_file, 895 lock2->li_line); 896 printf(" 2nd %p %s (%s) @ %s:%d\n", 897 lock1->li_lock, lock1->li_lock->lo_name, 898 lock1->li_lock->lo_type, lock1->li_file, 899 lock1->li_line); 900 printf(" 3rd %p %s (%s) @ %s:%d\n", lock, 901 lock->lo_name, lock->lo_type, file, line); 902 } 903#ifdef DDB 904 goto debugger; 905#else 906 return; 907#endif 908 } 909 } 910 lock1 = &(*lock_list)->ll_children[(*lock_list)->ll_count - 1]; 911 /* 912 * If requested, build a new lock order. However, don't build a new 913 * relationship between a sleepable lock and Giant if it is in the 914 * wrong direction. The correct lock order is that sleepable locks 915 * always come before Giant. 916 */ 917 if (flags & LOP_NEWORDER && 918 !(lock1->li_lock == &Giant.mtx_object && 919 (lock->lo_flags & LO_SLEEPABLE) != 0)) { 920 CTR3(KTR_WITNESS, "%s: adding %s as a child of %s", __func__, 921 lock->lo_type, lock1->li_lock->lo_type); 922 if (!itismychild(lock1->li_lock->lo_witness, w)) 923 /* Witness is dead. */ 924 return; 925 } 926 mtx_unlock_spin(&w_mtx); 927 return; 928 929#ifdef DDB 930debugger: 931 if (witness_trace) 932 backtrace(); 933 if (witness_ddb) 934 Debugger(__func__); 935#endif 936} 937 938void 939witness_lock(struct lock_object *lock, int flags, const char *file, int line) 940{ 941 struct lock_list_entry **lock_list, *lle; 942 struct lock_instance *instance; 943 struct witness *w; 944 struct thread *td; 945 946 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL || 947 panicstr != NULL) 948 return; 949 w = lock->lo_witness; 950 td = curthread; 951 file = fixup_filename(file); 952 953 /* Determine lock list for this lock. */ 954 if (lock->lo_class->lc_flags & LC_SLEEPLOCK) 955 lock_list = &td->td_sleeplocks; 956 else 957 lock_list = PCPU_PTR(spinlocks); 958 959 /* Check to see if we are recursing on a lock we already own. */ 960 instance = find_instance(*lock_list, lock); 961 if (instance != NULL) { 962 instance->li_flags++; 963 CTR4(KTR_WITNESS, "%s: pid %d recursed on %s r=%d", __func__, 964 td->td_proc->p_pid, lock->lo_name, 965 instance->li_flags & LI_RECURSEMASK); 966 instance->li_file = file; 967 instance->li_line = line; 968 return; 969 } 970 971 /* Update per-witness last file and line acquire. */ 972 w->w_file = file; 973 w->w_line = line; 974 975 /* Find the next open lock instance in the list and fill it. */ 976 lle = *lock_list; 977 if (lle == NULL || lle->ll_count == LOCK_NCHILDREN) { 978 lle = witness_lock_list_get(); 979 if (lle == NULL) 980 return; 981 lle->ll_next = *lock_list; 982 CTR3(KTR_WITNESS, "%s: pid %d added lle %p", __func__, 983 td->td_proc->p_pid, lle); 984 *lock_list = lle; 985 } 986 instance = &lle->ll_children[lle->ll_count++]; 987 instance->li_lock = lock; 988 instance->li_line = line; 989 instance->li_file = file; 990 if ((flags & LOP_EXCLUSIVE) != 0) 991 instance->li_flags = LI_EXCLUSIVE; 992 else 993 instance->li_flags = 0; 994 CTR4(KTR_WITNESS, "%s: pid %d added %s as lle[%d]", __func__, 995 td->td_proc->p_pid, lock->lo_name, lle->ll_count - 1); 996} 997 998void 999witness_upgrade(struct lock_object *lock, int flags, const char *file, int line) 1000{ 1001 struct lock_instance *instance; 1002 struct lock_class *class; 1003 1004 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1005 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1006 return; 1007 class = lock->lo_class; 1008 file = fixup_filename(file); 1009 if ((lock->lo_flags & LO_UPGRADABLE) == 0) 1010 panic("upgrade of non-upgradable lock (%s) %s @ %s:%d", 1011 class->lc_name, lock->lo_name, file, line); 1012 if ((flags & LOP_TRYLOCK) == 0) 1013 panic("non-try upgrade of lock (%s) %s @ %s:%d", class->lc_name, 1014 lock->lo_name, file, line); 1015 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0) 1016 panic("upgrade of non-sleep lock (%s) %s @ %s:%d", 1017 class->lc_name, lock->lo_name, file, line); 1018 instance = find_instance(curthread->td_sleeplocks, lock); 1019 if (instance == NULL) 1020 panic("upgrade of unlocked lock (%s) %s @ %s:%d", 1021 class->lc_name, lock->lo_name, file, line); 1022 if ((instance->li_flags & LI_EXCLUSIVE) != 0) 1023 panic("upgrade of exclusive lock (%s) %s @ %s:%d", 1024 class->lc_name, lock->lo_name, file, line); 1025 if ((instance->li_flags & LI_RECURSEMASK) != 0) 1026 panic("upgrade of recursed lock (%s) %s r=%d @ %s:%d", 1027 class->lc_name, lock->lo_name, 1028 instance->li_flags & LI_RECURSEMASK, file, line); 1029 instance->li_flags |= LI_EXCLUSIVE; 1030} 1031 1032void 1033witness_downgrade(struct lock_object *lock, int flags, const char *file, 1034 int line) 1035{ 1036 struct lock_instance *instance; 1037 struct lock_class *class; 1038 1039 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1040 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1041 return; 1042 class = lock->lo_class; 1043 file = fixup_filename(file); 1044 if ((lock->lo_flags & LO_UPGRADABLE) == 0) 1045 panic("downgrade of non-upgradable lock (%s) %s @ %s:%d", 1046 class->lc_name, lock->lo_name, file, line); 1047 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0) 1048 panic("downgrade of non-sleep lock (%s) %s @ %s:%d", 1049 class->lc_name, lock->lo_name, file, line); 1050 instance = find_instance(curthread->td_sleeplocks, lock); 1051 if (instance == NULL) 1052 panic("downgrade of unlocked lock (%s) %s @ %s:%d", 1053 class->lc_name, lock->lo_name, file, line); 1054 if ((instance->li_flags & LI_EXCLUSIVE) == 0) 1055 panic("downgrade of shared lock (%s) %s @ %s:%d", 1056 class->lc_name, lock->lo_name, file, line); 1057 if ((instance->li_flags & LI_RECURSEMASK) != 0) 1058 panic("downgrade of recursed lock (%s) %s r=%d @ %s:%d", 1059 class->lc_name, lock->lo_name, 1060 instance->li_flags & LI_RECURSEMASK, file, line); 1061 instance->li_flags &= ~LI_EXCLUSIVE; 1062} 1063 1064void 1065witness_unlock(struct lock_object *lock, int flags, const char *file, int line) 1066{ 1067 struct lock_list_entry **lock_list, *lle; 1068 struct lock_instance *instance; 1069 struct lock_class *class; 1070 struct thread *td; 1071 register_t s; 1072 int i, j; 1073 1074 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL || 1075 panicstr != NULL) 1076 return; 1077 td = curthread; 1078 class = lock->lo_class; 1079 file = fixup_filename(file); 1080 1081 /* Find lock instance associated with this lock. */ 1082 if (class->lc_flags & LC_SLEEPLOCK) 1083 lock_list = &td->td_sleeplocks; 1084 else 1085 lock_list = PCPU_PTR(spinlocks); 1086 for (; *lock_list != NULL; lock_list = &(*lock_list)->ll_next) 1087 for (i = 0; i < (*lock_list)->ll_count; i++) { 1088 instance = &(*lock_list)->ll_children[i]; 1089 if (instance->li_lock == lock) 1090 goto found; 1091 } 1092 panic("lock (%s) %s not locked @ %s:%d", class->lc_name, lock->lo_name, 1093 file, line); 1094found: 1095 1096 /* First, check for shared/exclusive mismatches. */ 1097 if ((instance->li_flags & LI_EXCLUSIVE) != 0 && 1098 (flags & LOP_EXCLUSIVE) == 0) { 1099 printf("shared unlock of (%s) %s @ %s:%d\n", class->lc_name, 1100 lock->lo_name, file, line); 1101 printf("while exclusively locked from %s:%d\n", 1102 instance->li_file, instance->li_line); 1103 panic("excl->ushare"); 1104 } 1105 if ((instance->li_flags & LI_EXCLUSIVE) == 0 && 1106 (flags & LOP_EXCLUSIVE) != 0) { 1107 printf("exclusive unlock of (%s) %s @ %s:%d\n", class->lc_name, 1108 lock->lo_name, file, line); 1109 printf("while share locked from %s:%d\n", instance->li_file, 1110 instance->li_line); 1111 panic("share->uexcl"); 1112 } 1113 1114 /* If we are recursed, unrecurse. */ 1115 if ((instance->li_flags & LI_RECURSEMASK) > 0) { 1116 CTR4(KTR_WITNESS, "%s: pid %d unrecursed on %s r=%d", __func__, 1117 td->td_proc->p_pid, instance->li_lock->lo_name, 1118 instance->li_flags); 1119 instance->li_flags--; 1120 return; 1121 } 1122 1123 /* Otherwise, remove this item from the list. */ 1124 s = intr_disable(); 1125 CTR4(KTR_WITNESS, "%s: pid %d removed %s from lle[%d]", __func__, 1126 td->td_proc->p_pid, instance->li_lock->lo_name, 1127 (*lock_list)->ll_count - 1); 1128 for (j = i; j < (*lock_list)->ll_count - 1; j++) 1129 (*lock_list)->ll_children[j] = 1130 (*lock_list)->ll_children[j + 1]; 1131 (*lock_list)->ll_count--; 1132 intr_restore(s); 1133 1134 /* If this lock list entry is now empty, free it. */ 1135 if ((*lock_list)->ll_count == 0) { 1136 lle = *lock_list; 1137 *lock_list = lle->ll_next; 1138 CTR3(KTR_WITNESS, "%s: pid %d removed lle %p", __func__, 1139 td->td_proc->p_pid, lle); 1140 witness_lock_list_free(lle); 1141 } 1142} 1143 1144/* 1145 * Warn if any locks other than 'lock' are held. Flags can be passed in to 1146 * exempt Giant and sleepable locks from the checks as well. If any 1147 * non-exempt locks are held, then a supplied message is printed to the 1148 * console along with a list of the offending locks. If indicated in the 1149 * flags then a failure results in a panic as well. 1150 */ 1151int 1152witness_warn(int flags, struct lock_object *lock, const char *fmt, ...) 1153{ 1154 struct lock_list_entry *lle; 1155 struct lock_instance *lock1; 1156 struct thread *td; 1157 va_list ap; 1158 int i, n; 1159 1160 if (witness_cold || witness_watch == 0 || panicstr != NULL) 1161 return (0); 1162 n = 0; 1163 td = curthread; 1164 for (lle = td->td_sleeplocks; lle != NULL; lle = lle->ll_next) 1165 for (i = lle->ll_count - 1; i >= 0; i--) { 1166 lock1 = &lle->ll_children[i]; 1167 if (lock1->li_lock == lock) 1168 continue; 1169 if (flags & WARN_GIANTOK && 1170 lock1->li_lock == &Giant.mtx_object) 1171 continue; 1172 if (flags & WARN_SLEEPOK && 1173 (lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0) 1174 continue; 1175 if (n == 0) { 1176 va_start(ap, fmt); 1177 vprintf(fmt, ap); 1178 va_end(ap); 1179 printf(" with the following"); 1180 if (flags & WARN_SLEEPOK) 1181 printf(" non-sleepable"); 1182 printf(" locks held:\n"); 1183 } 1184 n++; 1185 witness_list_lock(lock1); 1186 } 1187 if (PCPU_GET(spinlocks) != NULL) { 1188 /* 1189 * Since we already hold a spinlock preemption is 1190 * already blocked. 1191 */ 1192 if (n == 0) { 1193 va_start(ap, fmt); 1194 vprintf(fmt, ap); 1195 va_end(ap); 1196 printf(" with the following"); 1197 if (flags & WARN_SLEEPOK) 1198 printf(" non-sleepable"); 1199 printf(" locks held:\n"); 1200 } 1201 n += witness_list_locks(PCPU_PTR(spinlocks)); 1202 } 1203 if (flags & WARN_PANIC && n) 1204 panic("witness_warn"); 1205#ifdef DDB 1206 else if (witness_ddb && n) 1207 Debugger(__func__); 1208 else if (witness_trace && n) 1209 backtrace(); 1210#endif 1211 return (n); 1212} 1213 1214const char * 1215witness_file(struct lock_object *lock) 1216{ 1217 struct witness *w; 1218 1219 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL) 1220 return ("?"); 1221 w = lock->lo_witness; 1222 return (w->w_file); 1223} 1224 1225int 1226witness_line(struct lock_object *lock) 1227{ 1228 struct witness *w; 1229 1230 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL) 1231 return (0); 1232 w = lock->lo_witness; 1233 return (w->w_line); 1234} 1235 1236static struct witness * 1237enroll(const char *description, struct lock_class *lock_class) 1238{ 1239 struct witness *w; 1240 1241 if (witness_watch == 0 || panicstr != NULL) 1242 return (NULL); 1243 if ((lock_class->lc_flags & LC_SPINLOCK) && witness_skipspin) 1244 return (NULL); 1245 mtx_lock_spin(&w_mtx); 1246 STAILQ_FOREACH(w, &w_all, w_list) { 1247 if (w->w_name == description || (w->w_refcount > 0 && 1248 strcmp(description, w->w_name) == 0)) { 1249 w->w_refcount++; 1250 mtx_unlock_spin(&w_mtx); 1251 if (lock_class != w->w_class) 1252 panic( 1253 "lock (%s) %s does not match earlier (%s) lock", 1254 description, lock_class->lc_name, 1255 w->w_class->lc_name); 1256 return (w); 1257 } 1258 } 1259 /* 1260 * This isn't quite right, as witness_cold is still 0 while we 1261 * enroll all the locks initialized before witness_initialize(). 1262 */ 1263 if ((lock_class->lc_flags & LC_SPINLOCK) && !witness_cold) { 1264 mtx_unlock_spin(&w_mtx); 1265 panic("spin lock %s not in order list", description); 1266 } 1267 if ((w = witness_get()) == NULL) 1268 return (NULL); 1269 w->w_name = description; 1270 w->w_class = lock_class; 1271 w->w_refcount = 1; 1272 STAILQ_INSERT_HEAD(&w_all, w, w_list); 1273 if (lock_class->lc_flags & LC_SPINLOCK) 1274 STAILQ_INSERT_HEAD(&w_spin, w, w_typelist); 1275 else if (lock_class->lc_flags & LC_SLEEPLOCK) 1276 STAILQ_INSERT_HEAD(&w_sleep, w, w_typelist); 1277 else { 1278 mtx_unlock_spin(&w_mtx); 1279 panic("lock class %s is not sleep or spin", 1280 lock_class->lc_name); 1281 } 1282 mtx_unlock_spin(&w_mtx); 1283 return (w); 1284} 1285 1286/* Don't let the door bang you on the way out... */ 1287static int 1288depart(struct witness *w) 1289{ 1290 struct witness_child_list_entry *wcl, *nwcl; 1291 struct witness_list *list; 1292 struct witness *parent; 1293 1294 MPASS(w->w_refcount == 0); 1295 if (w->w_class->lc_flags & LC_SLEEPLOCK) 1296 list = &w_sleep; 1297 else 1298 list = &w_spin; 1299 /* 1300 * First, we run through the entire tree looking for any 1301 * witnesses that the outgoing witness is a child of. For 1302 * each parent that we find, we reparent all the direct 1303 * children of the outgoing witness to its parent. 1304 */ 1305 STAILQ_FOREACH(parent, list, w_typelist) { 1306 if (!isitmychild(parent, w)) 1307 continue; 1308 removechild(parent, w); 1309 if (!reparentchildren(parent, w)) 1310 return (0); 1311 } 1312 1313 /* 1314 * Now we go through and free up the child list of the 1315 * outgoing witness. 1316 */ 1317 for (wcl = w->w_children; wcl != NULL; wcl = nwcl) { 1318 nwcl = wcl->wcl_next; 1319 witness_child_free(wcl); 1320 } 1321 1322 /* 1323 * Detach from various lists and free. 1324 */ 1325 STAILQ_REMOVE(list, w, witness, w_typelist); 1326 STAILQ_REMOVE(&w_all, w, witness, w_list); 1327 witness_free(w); 1328 1329 /* Finally, fixup the tree. */ 1330 return (rebalancetree(list)); 1331} 1332 1333/* 1334 * Prune an entire lock order tree. We look for cases where a lock 1335 * is now both a descendant and a direct child of a given lock. In 1336 * that case, we want to remove the direct child link from the tree. 1337 * 1338 * Returns false if insertchild() fails. 1339 */ 1340static int 1341rebalancetree(struct witness_list *list) 1342{ 1343 struct witness *child, *parent; 1344 1345 STAILQ_FOREACH(child, list, w_typelist) { 1346 STAILQ_FOREACH(parent, list, w_typelist) { 1347 if (!isitmychild(parent, child)) 1348 continue; 1349 removechild(parent, child); 1350 if (isitmydescendant(parent, child)) 1351 continue; 1352 if (!insertchild(parent, child)) 1353 return (0); 1354 } 1355 } 1356 witness_levelall(); 1357 return (1); 1358} 1359 1360/* 1361 * Add "child" as a direct child of "parent". Returns false if 1362 * we fail due to out of memory. 1363 */ 1364static int 1365insertchild(struct witness *parent, struct witness *child) 1366{ 1367 struct witness_child_list_entry **wcl; 1368 1369 MPASS(child != NULL && parent != NULL); 1370 1371 /* 1372 * Insert "child" after "parent" 1373 */ 1374 wcl = &parent->w_children; 1375 while (*wcl != NULL && (*wcl)->wcl_count == WITNESS_NCHILDREN) 1376 wcl = &(*wcl)->wcl_next; 1377 if (*wcl == NULL) { 1378 *wcl = witness_child_get(); 1379 if (*wcl == NULL) 1380 return (0); 1381 } 1382 (*wcl)->wcl_children[(*wcl)->wcl_count++] = child; 1383 1384 return (1); 1385} 1386 1387/* 1388 * Make all the direct descendants of oldparent be direct descendants 1389 * of newparent. 1390 */ 1391static int 1392reparentchildren(struct witness *newparent, struct witness *oldparent) 1393{ 1394 struct witness_child_list_entry *wcl; 1395 int i; 1396 1397 /* Avoid making a witness a child of itself. */ 1398 MPASS(!isitmychild(oldparent, newparent)); 1399 1400 for (wcl = oldparent->w_children; wcl != NULL; wcl = wcl->wcl_next) 1401 for (i = 0; i < wcl->wcl_count; i++) 1402 if (!insertchild(newparent, wcl->wcl_children[i])) 1403 return (0); 1404 return (1); 1405} 1406 1407static int 1408itismychild(struct witness *parent, struct witness *child) 1409{ 1410 struct witness_list *list; 1411 1412 MPASS(child != NULL && parent != NULL); 1413 if ((parent->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)) != 1414 (child->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK))) 1415 panic( 1416 "%s: parent (%s) and child (%s) are not the same lock type", 1417 __func__, parent->w_class->lc_name, 1418 child->w_class->lc_name); 1419 1420 if (!insertchild(parent, child)) 1421 return (0); 1422 1423 if (parent->w_class->lc_flags & LC_SLEEPLOCK) 1424 list = &w_sleep; 1425 else 1426 list = &w_spin; 1427 return (rebalancetree(list)); 1428} 1429 1430static void 1431removechild(struct witness *parent, struct witness *child) 1432{ 1433 struct witness_child_list_entry **wcl, *wcl1; 1434 int i; 1435 1436 for (wcl = &parent->w_children; *wcl != NULL; wcl = &(*wcl)->wcl_next) 1437 for (i = 0; i < (*wcl)->wcl_count; i++) 1438 if ((*wcl)->wcl_children[i] == child) 1439 goto found; 1440 return; 1441found: 1442 (*wcl)->wcl_count--; 1443 if ((*wcl)->wcl_count > i) 1444 (*wcl)->wcl_children[i] = 1445 (*wcl)->wcl_children[(*wcl)->wcl_count]; 1446 MPASS((*wcl)->wcl_children[i] != NULL); 1447 if ((*wcl)->wcl_count != 0) 1448 return; 1449 wcl1 = *wcl; 1450 *wcl = wcl1->wcl_next; 1451 witness_child_free(wcl1); 1452} 1453 1454static int 1455isitmychild(struct witness *parent, struct witness *child) 1456{ 1457 struct witness_child_list_entry *wcl; 1458 int i; 1459 1460 for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) { 1461 for (i = 0; i < wcl->wcl_count; i++) { 1462 if (wcl->wcl_children[i] == child) 1463 return (1); 1464 } 1465 } 1466 return (0); 1467} 1468 1469static int 1470isitmydescendant(struct witness *parent, struct witness *child) 1471{ 1472 struct witness_child_list_entry *wcl; 1473 int i, j; 1474 1475 if (isitmychild(parent, child)) 1476 return (1); 1477 j = 0; 1478 for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) { 1479 MPASS(j < 1000); 1480 for (i = 0; i < wcl->wcl_count; i++) { 1481 if (isitmydescendant(wcl->wcl_children[i], child)) 1482 return (1); 1483 } 1484 j++; 1485 } 1486 return (0); 1487} 1488 1489static void 1490witness_levelall (void) 1491{ 1492 struct witness_list *list; 1493 struct witness *w, *w1; 1494 1495 /* 1496 * First clear all levels. 1497 */ 1498 STAILQ_FOREACH(w, &w_all, w_list) { 1499 w->w_level = 0; 1500 } 1501 1502 /* 1503 * Look for locks with no parent and level all their descendants. 1504 */ 1505 STAILQ_FOREACH(w, &w_all, w_list) { 1506 /* 1507 * This is just an optimization, technically we could get 1508 * away just walking the all list each time. 1509 */ 1510 if (w->w_class->lc_flags & LC_SLEEPLOCK) 1511 list = &w_sleep; 1512 else 1513 list = &w_spin; 1514 STAILQ_FOREACH(w1, list, w_typelist) { 1515 if (isitmychild(w1, w)) 1516 goto skip; 1517 } 1518 witness_leveldescendents(w, 0); 1519 skip: 1520 ; /* silence GCC 3.x */ 1521 } 1522} 1523 1524static void 1525witness_leveldescendents(struct witness *parent, int level) 1526{ 1527 struct witness_child_list_entry *wcl; 1528 int i; 1529 1530 if (parent->w_level < level) 1531 parent->w_level = level; 1532 level++; 1533 for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) 1534 for (i = 0; i < wcl->wcl_count; i++) 1535 witness_leveldescendents(wcl->wcl_children[i], level); 1536} 1537 1538static void 1539witness_displaydescendants(void(*prnt)(const char *fmt, ...), 1540 struct witness *parent, int indent) 1541{ 1542 struct witness_child_list_entry *wcl; 1543 int i, level; 1544 1545 level = parent->w_level; 1546 prnt("%-2d", level); 1547 for (i = 0; i < indent; i++) 1548 prnt(" "); 1549 if (parent->w_refcount > 0) 1550 prnt("%s", parent->w_name); 1551 else 1552 prnt("(dead)"); 1553 if (parent->w_displayed) { 1554 prnt(" -- (already displayed)\n"); 1555 return; 1556 } 1557 parent->w_displayed = 1; 1558 if (parent->w_refcount > 0) { 1559 if (parent->w_file != NULL) 1560 prnt(" -- last acquired @ %s:%d", parent->w_file, 1561 parent->w_line); 1562 } 1563 prnt("\n"); 1564 for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) 1565 for (i = 0; i < wcl->wcl_count; i++) 1566 witness_displaydescendants(prnt, 1567 wcl->wcl_children[i], indent + 1); 1568} 1569 1570#ifdef BLESSING 1571static int 1572blessed(struct witness *w1, struct witness *w2) 1573{ 1574 int i; 1575 struct witness_blessed *b; 1576 1577 for (i = 0; i < blessed_count; i++) { 1578 b = &blessed_list[i]; 1579 if (strcmp(w1->w_name, b->b_lock1) == 0) { 1580 if (strcmp(w2->w_name, b->b_lock2) == 0) 1581 return (1); 1582 continue; 1583 } 1584 if (strcmp(w1->w_name, b->b_lock2) == 0) 1585 if (strcmp(w2->w_name, b->b_lock1) == 0) 1586 return (1); 1587 } 1588 return (0); 1589} 1590#endif 1591 1592static struct witness * 1593witness_get(void) 1594{ 1595 struct witness *w; 1596 1597 if (witness_watch == 0) { 1598 mtx_unlock_spin(&w_mtx); 1599 return (NULL); 1600 } 1601 if (STAILQ_EMPTY(&w_free)) { 1602 witness_watch = 0; 1603 mtx_unlock_spin(&w_mtx); 1604 printf("%s: witness exhausted\n", __func__); 1605 return (NULL); 1606 } 1607 w = STAILQ_FIRST(&w_free); 1608 STAILQ_REMOVE_HEAD(&w_free, w_list); 1609 bzero(w, sizeof(*w)); 1610 return (w); 1611} 1612 1613static void 1614witness_free(struct witness *w) 1615{ 1616 1617 STAILQ_INSERT_HEAD(&w_free, w, w_list); 1618} 1619 1620static struct witness_child_list_entry * 1621witness_child_get(void) 1622{ 1623 struct witness_child_list_entry *wcl; 1624 1625 if (witness_watch == 0) { 1626 mtx_unlock_spin(&w_mtx); 1627 return (NULL); 1628 } 1629 wcl = w_child_free; 1630 if (wcl == NULL) { 1631 witness_watch = 0; 1632 mtx_unlock_spin(&w_mtx); 1633 printf("%s: witness exhausted\n", __func__); 1634 return (NULL); 1635 } 1636 w_child_free = wcl->wcl_next; 1637 bzero(wcl, sizeof(*wcl)); 1638 return (wcl); 1639} 1640 1641static void 1642witness_child_free(struct witness_child_list_entry *wcl) 1643{ 1644 1645 wcl->wcl_next = w_child_free; 1646 w_child_free = wcl; 1647} 1648 1649static struct lock_list_entry * 1650witness_lock_list_get(void) 1651{ 1652 struct lock_list_entry *lle; 1653 1654 if (witness_watch == 0) 1655 return (NULL); 1656 mtx_lock_spin(&w_mtx); 1657 lle = w_lock_list_free; 1658 if (lle == NULL) { 1659 witness_watch = 0; 1660 mtx_unlock_spin(&w_mtx); 1661 printf("%s: witness exhausted\n", __func__); 1662 return (NULL); 1663 } 1664 w_lock_list_free = lle->ll_next; 1665 mtx_unlock_spin(&w_mtx); 1666 bzero(lle, sizeof(*lle)); 1667 return (lle); 1668} 1669 1670static void 1671witness_lock_list_free(struct lock_list_entry *lle) 1672{ 1673 1674 mtx_lock_spin(&w_mtx); 1675 lle->ll_next = w_lock_list_free; 1676 w_lock_list_free = lle; 1677 mtx_unlock_spin(&w_mtx); 1678} 1679 1680static struct lock_instance * 1681find_instance(struct lock_list_entry *lock_list, struct lock_object *lock) 1682{ 1683 struct lock_list_entry *lle; 1684 struct lock_instance *instance; 1685 int i; 1686 1687 for (lle = lock_list; lle != NULL; lle = lle->ll_next) 1688 for (i = lle->ll_count - 1; i >= 0; i--) { 1689 instance = &lle->ll_children[i]; 1690 if (instance->li_lock == lock) 1691 return (instance); 1692 } 1693 return (NULL); 1694} 1695 1696static void 1697witness_list_lock(struct lock_instance *instance) 1698{ 1699 struct lock_object *lock; 1700 1701 lock = instance->li_lock; 1702 printf("%s %s %s", (instance->li_flags & LI_EXCLUSIVE) != 0 ? 1703 "exclusive" : "shared", lock->lo_class->lc_name, lock->lo_name); 1704 if (lock->lo_type != lock->lo_name) 1705 printf(" (%s)", lock->lo_type); 1706 printf(" r = %d (%p) locked @ %s:%d\n", 1707 instance->li_flags & LI_RECURSEMASK, lock, instance->li_file, 1708 instance->li_line); 1709} 1710 1711int 1712witness_list_locks(struct lock_list_entry **lock_list) 1713{ 1714 struct lock_list_entry *lle; 1715 int i, nheld; 1716 1717 nheld = 0; 1718 for (lle = *lock_list; lle != NULL; lle = lle->ll_next) 1719 for (i = lle->ll_count - 1; i >= 0; i--) { 1720 witness_list_lock(&lle->ll_children[i]); 1721 nheld++; 1722 } 1723 return (nheld); 1724} 1725 1726/* 1727 * This is a bit risky at best. We call this function when we have timed 1728 * out acquiring a spin lock, and we assume that the other CPU is stuck 1729 * with this lock held. So, we go groveling around in the other CPU's 1730 * per-cpu data to try to find the lock instance for this spin lock to 1731 * see when it was last acquired. 1732 */ 1733void 1734witness_display_spinlock(struct lock_object *lock, struct thread *owner) 1735{ 1736 struct lock_instance *instance; 1737 struct pcpu *pc; 1738 1739 if (owner->td_critnest == 0 || owner->td_oncpu == NOCPU) 1740 return; 1741 pc = pcpu_find(owner->td_oncpu); 1742 instance = find_instance(pc->pc_spinlocks, lock); 1743 if (instance != NULL) 1744 witness_list_lock(instance); 1745} 1746 1747void 1748witness_save(struct lock_object *lock, const char **filep, int *linep) 1749{ 1750 struct lock_instance *instance; 1751 1752 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1753 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1754 return; 1755 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0) 1756 panic("%s: lock (%s) %s is not a sleep lock", __func__, 1757 lock->lo_class->lc_name, lock->lo_name); 1758 instance = find_instance(curthread->td_sleeplocks, lock); 1759 if (instance == NULL) 1760 panic("%s: lock (%s) %s not locked", __func__, 1761 lock->lo_class->lc_name, lock->lo_name); 1762 *filep = instance->li_file; 1763 *linep = instance->li_line; 1764} 1765 1766void 1767witness_restore(struct lock_object *lock, const char *file, int line) 1768{ 1769 struct lock_instance *instance; 1770 1771 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1772 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1773 return; 1774 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0) 1775 panic("%s: lock (%s) %s is not a sleep lock", __func__, 1776 lock->lo_class->lc_name, lock->lo_name); 1777 instance = find_instance(curthread->td_sleeplocks, lock); 1778 if (instance == NULL) 1779 panic("%s: lock (%s) %s not locked", __func__, 1780 lock->lo_class->lc_name, lock->lo_name); 1781 lock->lo_witness->w_file = file; 1782 lock->lo_witness->w_line = line; 1783 instance->li_file = file; 1784 instance->li_line = line; 1785} 1786 1787void 1788witness_assert(struct lock_object *lock, int flags, const char *file, int line) 1789{ 1790#ifdef INVARIANT_SUPPORT 1791 struct lock_instance *instance; 1792 1793 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1794 return; 1795 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) != 0) 1796 instance = find_instance(curthread->td_sleeplocks, lock); 1797 else if ((lock->lo_class->lc_flags & LC_SPINLOCK) != 0) 1798 instance = find_instance(PCPU_GET(spinlocks), lock); 1799 else { 1800 panic("Lock (%s) %s is not sleep or spin!", 1801 lock->lo_class->lc_name, lock->lo_name); 1802 } 1803 file = fixup_filename(file); 1804 switch (flags) { 1805 case LA_UNLOCKED: 1806 if (instance != NULL) 1807 panic("Lock (%s) %s locked @ %s:%d.", 1808 lock->lo_class->lc_name, lock->lo_name, file, line); 1809 break; 1810 case LA_LOCKED: 1811 case LA_LOCKED | LA_RECURSED: 1812 case LA_LOCKED | LA_NOTRECURSED: 1813 case LA_SLOCKED: 1814 case LA_SLOCKED | LA_RECURSED: 1815 case LA_SLOCKED | LA_NOTRECURSED: 1816 case LA_XLOCKED: 1817 case LA_XLOCKED | LA_RECURSED: 1818 case LA_XLOCKED | LA_NOTRECURSED: 1819 if (instance == NULL) { 1820 panic("Lock (%s) %s not locked @ %s:%d.", 1821 lock->lo_class->lc_name, lock->lo_name, file, line); 1822 break; 1823 } 1824 if ((flags & LA_XLOCKED) != 0 && 1825 (instance->li_flags & LI_EXCLUSIVE) == 0) 1826 panic("Lock (%s) %s not exclusively locked @ %s:%d.", 1827 lock->lo_class->lc_name, lock->lo_name, file, line); 1828 if ((flags & LA_SLOCKED) != 0 && 1829 (instance->li_flags & LI_EXCLUSIVE) != 0) 1830 panic("Lock (%s) %s exclusively locked @ %s:%d.", 1831 lock->lo_class->lc_name, lock->lo_name, file, line); 1832 if ((flags & LA_RECURSED) != 0 && 1833 (instance->li_flags & LI_RECURSEMASK) == 0) 1834 panic("Lock (%s) %s not recursed @ %s:%d.", 1835 lock->lo_class->lc_name, lock->lo_name, file, line); 1836 if ((flags & LA_NOTRECURSED) != 0 && 1837 (instance->li_flags & LI_RECURSEMASK) != 0) 1838 panic("Lock (%s) %s recursed @ %s:%d.", 1839 lock->lo_class->lc_name, lock->lo_name, file, line); 1840 break; 1841 default: 1842 panic("Invalid lock assertion at %s:%d.", file, line); 1843 1844 } 1845#endif /* INVARIANT_SUPPORT */ 1846} 1847 1848#ifdef DDB 1849static void 1850witness_list(struct thread *td) 1851{ 1852 1853 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1854 KASSERT(db_active, ("%s: not in the debugger", __func__)); 1855 1856 if (witness_watch == 0) 1857 return; 1858 1859 witness_list_locks(&td->td_sleeplocks); 1860 1861 /* 1862 * We only handle spinlocks if td == curthread. This is somewhat broken 1863 * if td is currently executing on some other CPU and holds spin locks 1864 * as we won't display those locks. If we had a MI way of getting 1865 * the per-cpu data for a given cpu then we could use 1866 * td->td_oncpu to get the list of spinlocks for this thread 1867 * and "fix" this. 1868 * 1869 * That still wouldn't really fix this unless we locked sched_lock 1870 * or stopped the other CPU to make sure it wasn't changing the list 1871 * out from under us. It is probably best to just not try to handle 1872 * threads on other CPU's for now. 1873 */ 1874 if (td == curthread && PCPU_GET(spinlocks) != NULL) 1875 witness_list_locks(PCPU_PTR(spinlocks)); 1876} 1877 1878DB_SHOW_COMMAND(locks, db_witness_list) 1879{ 1880 struct thread *td; 1881 pid_t pid; 1882 struct proc *p; 1883 1884 if (have_addr) { 1885 pid = (addr % 16) + ((addr >> 4) % 16) * 10 + 1886 ((addr >> 8) % 16) * 100 + ((addr >> 12) % 16) * 1000 + 1887 ((addr >> 16) % 16) * 10000; 1888 /* sx_slock(&allproc_lock); */ 1889 FOREACH_PROC_IN_SYSTEM(p) { 1890 if (p->p_pid == pid) 1891 break; 1892 } 1893 /* sx_sunlock(&allproc_lock); */ 1894 if (p == NULL) { 1895 db_printf("pid %d not found\n", pid); 1896 return; 1897 } 1898 FOREACH_THREAD_IN_PROC(p, td) { 1899 witness_list(td); 1900 } 1901 } else { 1902 td = curthread; 1903 witness_list(td); 1904 } 1905} 1906 1907DB_SHOW_COMMAND(witness, db_witness_display) 1908{ 1909 1910 witness_display(db_printf); 1911} 1912#endif 1913