subr_witness.c revision 130031
1/*- 2 * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 3. Berkeley Software Design Inc's name may not be used to endorse or 13 * promote products derived from this software without specific prior 14 * written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $ 29 * and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $ 30 */ 31 32/* 33 * Implementation of the `witness' lock verifier. Originally implemented for 34 * mutexes in BSD/OS. Extended to handle generic lock objects and lock 35 * classes in FreeBSD. 36 */ 37 38/* 39 * Main Entry: witness 40 * Pronunciation: 'wit-n&s 41 * Function: noun 42 * Etymology: Middle English witnesse, from Old English witnes knowledge, 43 * testimony, witness, from 2wit 44 * Date: before 12th century 45 * 1 : attestation of a fact or event : TESTIMONY 46 * 2 : one that gives evidence; specifically : one who testifies in 47 * a cause or before a judicial tribunal 48 * 3 : one asked to be present at a transaction so as to be able to 49 * testify to its having taken place 50 * 4 : one who has personal knowledge of something 51 * 5 a : something serving as evidence or proof : SIGN 52 * b : public affirmation by word or example of usually 53 * religious faith or conviction <the heroic witness to divine 54 * life -- Pilot> 55 * 6 capitalized : a member of the Jehovah's Witnesses 56 */ 57 58/* 59 * Special rules concerning Giant and lock orders: 60 * 61 * 1) Giant must be acquired before any other mutexes. Stated another way, 62 * no other mutex may be held when Giant is acquired. 63 * 64 * 2) Giant must be released when blocking on a sleepable lock. 65 * 66 * This rule is less obvious, but is a result of Giant providing the same 67 * semantics as spl(). Basically, when a thread sleeps, it must release 68 * Giant. When a thread blocks on a sleepable lock, it sleeps. Hence rule 69 * 2). 70 * 71 * 3) Giant may be acquired before or after sleepable locks. 72 * 73 * This rule is also not quite as obvious. Giant may be acquired after 74 * a sleepable lock because it is a non-sleepable lock and non-sleepable 75 * locks may always be acquired while holding a sleepable lock. The second 76 * case, Giant before a sleepable lock, follows from rule 2) above. Suppose 77 * you have two threads T1 and T2 and a sleepable lock X. Suppose that T1 78 * acquires X and blocks on Giant. Then suppose that T2 acquires Giant and 79 * blocks on X. When T2 blocks on X, T2 will release Giant allowing T1 to 80 * execute. Thus, acquiring Giant both before and after a sleepable lock 81 * will not result in a lock order reversal. 82 */ 83 84#include <sys/cdefs.h> 85__FBSDID("$FreeBSD: head/sys/kern/subr_witness.c 130031 2004-06-03 20:07:44Z jhb $"); 86 87#include "opt_ddb.h" 88#include "opt_witness.h" 89 90#include <sys/param.h> 91#include <sys/bus.h> 92#include <sys/kernel.h> 93#include <sys/ktr.h> 94#include <sys/lock.h> 95#include <sys/malloc.h> 96#include <sys/mutex.h> 97#include <sys/proc.h> 98#include <sys/sysctl.h> 99#include <sys/systm.h> 100 101#include <ddb/ddb.h> 102 103#include <machine/stdarg.h> 104 105/* Define this to check for blessed mutexes */ 106#undef BLESSING 107 108#define WITNESS_COUNT 200 109#define WITNESS_CHILDCOUNT (WITNESS_COUNT * 4) 110/* 111 * XXX: This is somewhat bogus, as we assume here that at most 1024 threads 112 * will hold LOCK_NCHILDREN * 2 locks. We handle failure ok, and we should 113 * probably be safe for the most part, but it's still a SWAG. 114 */ 115#define LOCK_CHILDCOUNT (MAXCPU + 1024) * 2 116 117#define WITNESS_NCHILDREN 6 118 119struct witness_child_list_entry; 120 121struct witness { 122 const char *w_name; 123 struct lock_class *w_class; 124 STAILQ_ENTRY(witness) w_list; /* List of all witnesses. */ 125 STAILQ_ENTRY(witness) w_typelist; /* Witnesses of a type. */ 126 struct witness_child_list_entry *w_children; /* Great evilness... */ 127 const char *w_file; 128 int w_line; 129 u_int w_level; 130 u_int w_refcount; 131 u_char w_Giant_squawked:1; 132 u_char w_other_squawked:1; 133 u_char w_same_squawked:1; 134 u_char w_displayed:1; 135}; 136 137struct witness_child_list_entry { 138 struct witness_child_list_entry *wcl_next; 139 struct witness *wcl_children[WITNESS_NCHILDREN]; 140 u_int wcl_count; 141}; 142 143STAILQ_HEAD(witness_list, witness); 144 145#ifdef BLESSING 146struct witness_blessed { 147 const char *b_lock1; 148 const char *b_lock2; 149}; 150#endif 151 152struct witness_order_list_entry { 153 const char *w_name; 154 struct lock_class *w_class; 155}; 156 157#ifdef BLESSING 158static int blessed(struct witness *, struct witness *); 159#endif 160static int depart(struct witness *w); 161static struct witness *enroll(const char *description, 162 struct lock_class *lock_class); 163static int insertchild(struct witness *parent, struct witness *child); 164static int isitmychild(struct witness *parent, struct witness *child); 165static int isitmydescendant(struct witness *parent, struct witness *child); 166static int itismychild(struct witness *parent, struct witness *child); 167static int rebalancetree(struct witness_list *list); 168static void removechild(struct witness *parent, struct witness *child); 169static int reparentchildren(struct witness *newparent, 170 struct witness *oldparent); 171static int sysctl_debug_witness_watch(SYSCTL_HANDLER_ARGS); 172static void witness_displaydescendants(void(*)(const char *fmt, ...), 173 struct witness *, int indent); 174static const char *fixup_filename(const char *file); 175static void witness_leveldescendents(struct witness *parent, int level); 176static void witness_levelall(void); 177static struct witness *witness_get(void); 178static void witness_free(struct witness *m); 179static struct witness_child_list_entry *witness_child_get(void); 180static void witness_child_free(struct witness_child_list_entry *wcl); 181static struct lock_list_entry *witness_lock_list_get(void); 182static void witness_lock_list_free(struct lock_list_entry *lle); 183static struct lock_instance *find_instance(struct lock_list_entry *lock_list, 184 struct lock_object *lock); 185static void witness_list_lock(struct lock_instance *instance); 186#ifdef DDB 187static void witness_list(struct thread *td); 188static void witness_display_list(void(*prnt)(const char *fmt, ...), 189 struct witness_list *list); 190static void witness_display(void(*)(const char *fmt, ...)); 191#endif 192 193MALLOC_DEFINE(M_WITNESS, "witness", "witness structure"); 194 195/* 196 * If set to 0, witness is disabled. If set to 1, witness performs full lock 197 * order checking for all locks. If set to 2 or higher, then witness skips 198 * the full lock order check if the lock being acquired is at a higher level 199 * (i.e. farther down in the tree) than the current lock. This last mode is 200 * somewhat experimental and not considered fully safe. At runtime, this 201 * value may be set to 0 to turn off witness. witness is not allowed be 202 * turned on once it is turned off, however. 203 */ 204static int witness_watch = 1; 205TUNABLE_INT("debug.witness_watch", &witness_watch); 206SYSCTL_PROC(_debug, OID_AUTO, witness_watch, CTLFLAG_RW | CTLTYPE_INT, NULL, 0, 207 sysctl_debug_witness_watch, "I", "witness is watching lock operations"); 208 209#ifdef DDB 210/* 211 * When DDB is enabled and witness_ddb is set to 1, it will cause the system to 212 * drop into kdebug() when: 213 * - a lock heirarchy violation occurs 214 * - locks are held when going to sleep. 215 */ 216#ifdef WITNESS_DDB 217int witness_ddb = 1; 218#else 219int witness_ddb = 0; 220#endif 221TUNABLE_INT("debug.witness_ddb", &witness_ddb); 222SYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, ""); 223 224/* 225 * When DDB is enabled and witness_trace is set to 1, it will cause the system 226 * to print a stack trace: 227 * - a lock heirarchy violation occurs 228 * - locks are held when going to sleep. 229 */ 230int witness_trace = 1; 231TUNABLE_INT("debug.witness_trace", &witness_trace); 232SYSCTL_INT(_debug, OID_AUTO, witness_trace, CTLFLAG_RW, &witness_trace, 0, ""); 233#endif /* DDB */ 234 235#ifdef WITNESS_SKIPSPIN 236int witness_skipspin = 1; 237#else 238int witness_skipspin = 0; 239#endif 240TUNABLE_INT("debug.witness_skipspin", &witness_skipspin); 241SYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RDTUN, &witness_skipspin, 0, 242 ""); 243 244static struct mtx w_mtx; 245static struct witness_list w_free = STAILQ_HEAD_INITIALIZER(w_free); 246static struct witness_list w_all = STAILQ_HEAD_INITIALIZER(w_all); 247static struct witness_list w_spin = STAILQ_HEAD_INITIALIZER(w_spin); 248static struct witness_list w_sleep = STAILQ_HEAD_INITIALIZER(w_sleep); 249static struct witness_child_list_entry *w_child_free = NULL; 250static struct lock_list_entry *w_lock_list_free = NULL; 251 252static struct witness w_data[WITNESS_COUNT]; 253static struct witness_child_list_entry w_childdata[WITNESS_CHILDCOUNT]; 254static struct lock_list_entry w_locklistdata[LOCK_CHILDCOUNT]; 255 256static struct witness_order_list_entry order_lists[] = { 257 { "proctree", &lock_class_sx }, 258 { "allproc", &lock_class_sx }, 259 { "Giant", &lock_class_mtx_sleep }, 260 { "filedesc structure", &lock_class_mtx_sleep }, 261 { "pipe mutex", &lock_class_mtx_sleep }, 262 { "sigio lock", &lock_class_mtx_sleep }, 263 { "process group", &lock_class_mtx_sleep }, 264 { "process lock", &lock_class_mtx_sleep }, 265 { "session", &lock_class_mtx_sleep }, 266 { "uidinfo hash", &lock_class_mtx_sleep }, 267 { "uidinfo struct", &lock_class_mtx_sleep }, 268 { "allprison", &lock_class_mtx_sleep }, 269 { NULL, NULL }, 270 /* 271 * Sockets 272 */ 273 { "filedesc structure", &lock_class_mtx_sleep }, 274 { "accept", &lock_class_mtx_sleep }, 275 { "sellck", &lock_class_mtx_sleep }, 276 { NULL, NULL }, 277 /* 278 * Routing 279 */ 280 { "radix node head", &lock_class_mtx_sleep }, 281 { "rtentry", &lock_class_mtx_sleep }, 282 { "ifaddr", &lock_class_mtx_sleep }, 283 { NULL, NULL }, 284 /* 285 * UNIX Domain Sockets 286 { NULL, NULL }, 287 */ 288 /* 289 * UDP/IP 290 */ 291 { "udp", &lock_class_mtx_sleep }, 292 { "udpinp", &lock_class_mtx_sleep }, 293 { NULL, NULL }, 294 /* 295 * TCP/IP 296 */ 297 { "tcp", &lock_class_mtx_sleep }, 298 { "tcpinp", &lock_class_mtx_sleep }, 299 { NULL, NULL }, 300 /* 301 * SLIP 302 */ 303 { "slip_mtx", &lock_class_mtx_sleep }, 304 { "slip sc_mtx", &lock_class_mtx_sleep }, 305 { NULL, NULL }, 306 /* 307 * spin locks 308 */ 309#ifdef SMP 310 { "ap boot", &lock_class_mtx_spin }, 311#endif 312 { "sio", &lock_class_mtx_spin }, 313#ifdef __i386__ 314 { "cy", &lock_class_mtx_spin }, 315#endif 316 { "uart_hwmtx", &lock_class_mtx_spin }, 317 { "sabtty", &lock_class_mtx_spin }, 318 { "zstty", &lock_class_mtx_spin }, 319 { "ng_node", &lock_class_mtx_spin }, 320 { "ng_worklist", &lock_class_mtx_spin }, 321 { "taskqueue_fast", &lock_class_mtx_spin }, 322 { "intr table", &lock_class_mtx_spin }, 323 { "ithread table lock", &lock_class_mtx_spin }, 324 { "sleepq chain", &lock_class_mtx_spin }, 325 { "sched lock", &lock_class_mtx_spin }, 326 { "turnstile chain", &lock_class_mtx_spin }, 327 { "td_contested", &lock_class_mtx_spin }, 328 { "callout", &lock_class_mtx_spin }, 329 { "entropy harvest", &lock_class_mtx_spin }, 330 { "entropy harvest buffers", &lock_class_mtx_spin }, 331 /* 332 * leaf locks 333 */ 334 { "allpmaps", &lock_class_mtx_spin }, 335 { "vm page queue free mutex", &lock_class_mtx_spin }, 336 { "icu", &lock_class_mtx_spin }, 337#ifdef SMP 338 { "smp rendezvous", &lock_class_mtx_spin }, 339#if defined(__i386__) || defined(__amd64__) 340 { "tlb", &lock_class_mtx_spin }, 341 { "lazypmap", &lock_class_mtx_spin }, 342#endif 343#ifdef __sparc64__ 344 { "ipi", &lock_class_mtx_spin }, 345#endif 346#endif 347 { "clk", &lock_class_mtx_spin }, 348 { "mutex profiling lock", &lock_class_mtx_spin }, 349 { "kse zombie lock", &lock_class_mtx_spin }, 350 { "ALD Queue", &lock_class_mtx_spin }, 351#ifdef __ia64__ 352 { "MCA spin lock", &lock_class_mtx_spin }, 353#endif 354#if defined(__i386__) || defined(__amd64__) 355 { "pcicfg", &lock_class_mtx_spin }, 356#endif 357 { NULL, NULL }, 358 { NULL, NULL } 359}; 360 361#ifdef BLESSING 362/* 363 * Pairs of locks which have been blessed 364 * Don't complain about order problems with blessed locks 365 */ 366static struct witness_blessed blessed_list[] = { 367}; 368static int blessed_count = 369 sizeof(blessed_list) / sizeof(struct witness_blessed); 370#endif 371 372/* 373 * List of all locks in the system. 374 */ 375TAILQ_HEAD(, lock_object) all_locks = TAILQ_HEAD_INITIALIZER(all_locks); 376 377static struct mtx all_mtx = { 378 { &lock_class_mtx_sleep, /* mtx_object.lo_class */ 379 "All locks list", /* mtx_object.lo_name */ 380 "All locks list", /* mtx_object.lo_type */ 381 LO_INITIALIZED, /* mtx_object.lo_flags */ 382 { NULL, NULL }, /* mtx_object.lo_list */ 383 NULL }, /* mtx_object.lo_witness */ 384 MTX_UNOWNED, 0 /* mtx_lock, mtx_recurse */ 385}; 386 387/* 388 * This global is set to 0 once it becomes safe to use the witness code. 389 */ 390static int witness_cold = 1; 391 392/* 393 * Global variables for book keeping. 394 */ 395static int lock_cur_cnt; 396static int lock_max_cnt; 397 398/* 399 * The WITNESS-enabled diagnostic code. 400 */ 401static void 402witness_initialize(void *dummy __unused) 403{ 404 struct lock_object *lock; 405 struct witness_order_list_entry *order; 406 struct witness *w, *w1; 407 int i; 408 409 /* 410 * We have to release Giant before initializing its witness 411 * structure so that WITNESS doesn't get confused. 412 */ 413 mtx_unlock(&Giant); 414 mtx_assert(&Giant, MA_NOTOWNED); 415 416 CTR1(KTR_WITNESS, "%s: initializing witness", __func__); 417 TAILQ_INSERT_HEAD(&all_locks, &all_mtx.mtx_object, lo_list); 418 mtx_init(&w_mtx, "witness lock", NULL, MTX_SPIN | MTX_QUIET | 419 MTX_NOWITNESS); 420 for (i = 0; i < WITNESS_COUNT; i++) 421 witness_free(&w_data[i]); 422 for (i = 0; i < WITNESS_CHILDCOUNT; i++) 423 witness_child_free(&w_childdata[i]); 424 for (i = 0; i < LOCK_CHILDCOUNT; i++) 425 witness_lock_list_free(&w_locklistdata[i]); 426 427 /* First add in all the specified order lists. */ 428 for (order = order_lists; order->w_name != NULL; order++) { 429 w = enroll(order->w_name, order->w_class); 430 if (w == NULL) 431 continue; 432 w->w_file = "order list"; 433 for (order++; order->w_name != NULL; order++) { 434 w1 = enroll(order->w_name, order->w_class); 435 if (w1 == NULL) 436 continue; 437 w1->w_file = "order list"; 438 if (!itismychild(w, w1)) 439 panic("Not enough memory for static orders!"); 440 w = w1; 441 } 442 } 443 444 /* Iterate through all locks and add them to witness. */ 445 mtx_lock(&all_mtx); 446 TAILQ_FOREACH(lock, &all_locks, lo_list) { 447 if (lock->lo_flags & LO_WITNESS) 448 lock->lo_witness = enroll(lock->lo_type, 449 lock->lo_class); 450 else 451 lock->lo_witness = NULL; 452 } 453 mtx_unlock(&all_mtx); 454 455 /* Mark the witness code as being ready for use. */ 456 atomic_store_rel_int(&witness_cold, 0); 457 458 mtx_lock(&Giant); 459} 460SYSINIT(witness_init, SI_SUB_WITNESS, SI_ORDER_FIRST, witness_initialize, NULL) 461 462static int 463sysctl_debug_witness_watch(SYSCTL_HANDLER_ARGS) 464{ 465 int error, value; 466 467 value = witness_watch; 468 error = sysctl_handle_int(oidp, &value, 0, req); 469 if (error != 0 || req->newptr == NULL) 470 return (error); 471 error = suser(req->td); 472 if (error != 0) 473 return (error); 474 if (value == witness_watch) 475 return (0); 476 if (value != 0) 477 return (EINVAL); 478 witness_watch = 0; 479 return (0); 480} 481 482void 483witness_init(struct lock_object *lock) 484{ 485 struct lock_class *class; 486 487 class = lock->lo_class; 488 if (lock->lo_flags & LO_INITIALIZED) 489 panic("%s: lock (%s) %s is already initialized", __func__, 490 class->lc_name, lock->lo_name); 491 if ((lock->lo_flags & LO_RECURSABLE) != 0 && 492 (class->lc_flags & LC_RECURSABLE) == 0) 493 panic("%s: lock (%s) %s can not be recursable", __func__, 494 class->lc_name, lock->lo_name); 495 if ((lock->lo_flags & LO_SLEEPABLE) != 0 && 496 (class->lc_flags & LC_SLEEPABLE) == 0) 497 panic("%s: lock (%s) %s can not be sleepable", __func__, 498 class->lc_name, lock->lo_name); 499 if ((lock->lo_flags & LO_UPGRADABLE) != 0 && 500 (class->lc_flags & LC_UPGRADABLE) == 0) 501 panic("%s: lock (%s) %s can not be upgradable", __func__, 502 class->lc_name, lock->lo_name); 503 504 mtx_lock(&all_mtx); 505 TAILQ_INSERT_TAIL(&all_locks, lock, lo_list); 506 lock->lo_flags |= LO_INITIALIZED; 507 lock_cur_cnt++; 508 if (lock_cur_cnt > lock_max_cnt) 509 lock_max_cnt = lock_cur_cnt; 510 mtx_unlock(&all_mtx); 511 if (!witness_cold && witness_watch != 0 && panicstr == NULL && 512 (lock->lo_flags & LO_WITNESS) != 0) 513 lock->lo_witness = enroll(lock->lo_type, class); 514 else 515 lock->lo_witness = NULL; 516} 517 518void 519witness_destroy(struct lock_object *lock) 520{ 521 struct witness *w; 522 523 if (witness_cold) 524 panic("lock (%s) %s destroyed while witness_cold", 525 lock->lo_class->lc_name, lock->lo_name); 526 if ((lock->lo_flags & LO_INITIALIZED) == 0) 527 panic("%s: lock (%s) %s is not initialized", __func__, 528 lock->lo_class->lc_name, lock->lo_name); 529 530 /* XXX: need to verify that no one holds the lock */ 531 w = lock->lo_witness; 532 if (w != NULL) { 533 mtx_lock_spin(&w_mtx); 534 MPASS(w->w_refcount > 0); 535 w->w_refcount--; 536 537 /* 538 * Lock is already released if we have an allocation failure 539 * and depart() fails. 540 */ 541 if (w->w_refcount != 0 || depart(w)) 542 mtx_unlock_spin(&w_mtx); 543 } 544 545 mtx_lock(&all_mtx); 546 lock_cur_cnt--; 547 TAILQ_REMOVE(&all_locks, lock, lo_list); 548 lock->lo_flags &= ~LO_INITIALIZED; 549 mtx_unlock(&all_mtx); 550} 551 552#ifdef DDB 553static void 554witness_display_list(void(*prnt)(const char *fmt, ...), 555 struct witness_list *list) 556{ 557 struct witness *w; 558 559 STAILQ_FOREACH(w, list, w_typelist) { 560 if (w->w_file == NULL || w->w_level > 0) 561 continue; 562 /* 563 * This lock has no anscestors, display its descendants. 564 */ 565 witness_displaydescendants(prnt, w, 0); 566 } 567} 568 569static void 570witness_display(void(*prnt)(const char *fmt, ...)) 571{ 572 struct witness *w; 573 574 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 575 witness_levelall(); 576 577 /* Clear all the displayed flags. */ 578 STAILQ_FOREACH(w, &w_all, w_list) { 579 w->w_displayed = 0; 580 } 581 582 /* 583 * First, handle sleep locks which have been acquired at least 584 * once. 585 */ 586 prnt("Sleep locks:\n"); 587 witness_display_list(prnt, &w_sleep); 588 589 /* 590 * Now do spin locks which have been acquired at least once. 591 */ 592 prnt("\nSpin locks:\n"); 593 witness_display_list(prnt, &w_spin); 594 595 /* 596 * Finally, any locks which have not been acquired yet. 597 */ 598 prnt("\nLocks which were never acquired:\n"); 599 STAILQ_FOREACH(w, &w_all, w_list) { 600 if (w->w_file != NULL || w->w_refcount == 0) 601 continue; 602 prnt("%s\n", w->w_name); 603 } 604} 605#endif /* DDB */ 606 607/* Trim useless garbage from filenames. */ 608static const char * 609fixup_filename(const char *file) 610{ 611 612 if (file == NULL) 613 return (NULL); 614 while (strncmp(file, "../", 3) == 0) 615 file += 3; 616 return (file); 617} 618 619int 620witness_defineorder(struct lock_object *lock1, struct lock_object *lock2) 621{ 622 623 if (witness_watch == 0 || panicstr != NULL) 624 return (0); 625 626 /* Require locks that witness knows about. */ 627 if (lock1 == NULL || lock1->lo_witness == NULL || lock2 == NULL || 628 lock2->lo_witness == NULL) 629 return (EINVAL); 630 631 MPASS(!mtx_owned(&w_mtx)); 632 mtx_lock_spin(&w_mtx); 633 634 /* 635 * If we already have either an explicit or implied lock order that 636 * is the other way around, then return an error. 637 */ 638 if (isitmydescendant(lock2->lo_witness, lock1->lo_witness)) { 639 mtx_unlock_spin(&w_mtx); 640 return (EDOOFUS); 641 } 642 643 /* Try to add the new order. */ 644 CTR3(KTR_WITNESS, "%s: adding %s as a child of %s", __func__, 645 lock2->lo_type, lock1->lo_type); 646 if (!itismychild(lock1->lo_witness, lock2->lo_witness)) 647 return (ENOMEM); 648 mtx_unlock_spin(&w_mtx); 649 return (0); 650} 651 652void 653witness_checkorder(struct lock_object *lock, int flags, const char *file, 654 int line) 655{ 656 struct lock_list_entry **lock_list, *lle; 657 struct lock_instance *lock1, *lock2; 658 struct lock_class *class; 659 struct witness *w, *w1; 660 struct thread *td; 661 int i, j; 662 663 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL || 664 panicstr != NULL) 665 return; 666 667 /* 668 * Try locks do not block if they fail to acquire the lock, thus 669 * there is no danger of deadlocks or of switching while holding a 670 * spin lock if we acquire a lock via a try operation. This 671 * function shouldn't even be called for try locks, so panic if 672 * that happens. 673 */ 674 if (flags & LOP_TRYLOCK) 675 panic("%s should not be called for try lock operations", 676 __func__); 677 678 w = lock->lo_witness; 679 class = lock->lo_class; 680 td = curthread; 681 file = fixup_filename(file); 682 683 if (class->lc_flags & LC_SLEEPLOCK) { 684 /* 685 * Since spin locks include a critical section, this check 686 * impliclty enforces a lock order of all sleep locks before 687 * all spin locks. 688 */ 689 if (td->td_critnest != 0) 690 panic("blockable sleep lock (%s) %s @ %s:%d", 691 class->lc_name, lock->lo_name, file, line); 692 lock_list = &td->td_sleeplocks; 693 } else 694 lock_list = PCPU_PTR(spinlocks); 695 696 /* 697 * Is this the first lock acquired? If so, then no order checking 698 * is needed. 699 */ 700 if (*lock_list == NULL) 701 return; 702 703 /* 704 * Check to see if we are recursing on a lock we already own. If 705 * so, make sure that we don't mismatch exclusive and shared lock 706 * acquires. 707 */ 708 lock1 = find_instance(*lock_list, lock); 709 if (lock1 != NULL) { 710 if ((lock1->li_flags & LI_EXCLUSIVE) != 0 && 711 (flags & LOP_EXCLUSIVE) == 0) { 712 printf("shared lock of (%s) %s @ %s:%d\n", 713 class->lc_name, lock->lo_name, file, line); 714 printf("while exclusively locked from %s:%d\n", 715 lock1->li_file, lock1->li_line); 716 panic("share->excl"); 717 } 718 if ((lock1->li_flags & LI_EXCLUSIVE) == 0 && 719 (flags & LOP_EXCLUSIVE) != 0) { 720 printf("exclusive lock of (%s) %s @ %s:%d\n", 721 class->lc_name, lock->lo_name, file, line); 722 printf("while share locked from %s:%d\n", 723 lock1->li_file, lock1->li_line); 724 panic("excl->share"); 725 } 726 return; 727 } 728 729 /* 730 * Try locks do not block if they fail to acquire the lock, thus 731 * there is no danger of deadlocks or of switching while holding a 732 * spin lock if we acquire a lock via a try operation. 733 */ 734 if (flags & LOP_TRYLOCK) 735 return; 736 737 /* 738 * Check for duplicate locks of the same type. Note that we only 739 * have to check for this on the last lock we just acquired. Any 740 * other cases will be caught as lock order violations. 741 */ 742 lock1 = &(*lock_list)->ll_children[(*lock_list)->ll_count - 1]; 743 w1 = lock1->li_lock->lo_witness; 744 if (w1 == w) { 745 if (w->w_same_squawked || (lock->lo_flags & LO_DUPOK)) 746 return; 747 w->w_same_squawked = 1; 748 printf("acquiring duplicate lock of same type: \"%s\"\n", 749 lock->lo_type); 750 printf(" 1st %s @ %s:%d\n", lock1->li_lock->lo_name, 751 lock1->li_file, lock1->li_line); 752 printf(" 2nd %s @ %s:%d\n", lock->lo_name, file, line); 753#ifdef DDB 754 goto debugger; 755#else 756 return; 757#endif 758 } 759 MPASS(!mtx_owned(&w_mtx)); 760 mtx_lock_spin(&w_mtx); 761 /* 762 * If we have a known higher number just say ok 763 */ 764 if (witness_watch > 1 && w->w_level > w1->w_level) { 765 mtx_unlock_spin(&w_mtx); 766 return; 767 } 768 /* 769 * If we know that the the lock we are acquiring comes after 770 * the lock we most recently acquired in the lock order tree, 771 * then there is no need for any further checks. 772 */ 773 if (isitmydescendant(w1, w)) { 774 mtx_unlock_spin(&w_mtx); 775 return; 776 } 777 for (j = 0, lle = *lock_list; lle != NULL; lle = lle->ll_next) { 778 for (i = lle->ll_count - 1; i >= 0; i--, j++) { 779 780 MPASS(j < WITNESS_COUNT); 781 lock1 = &lle->ll_children[i]; 782 w1 = lock1->li_lock->lo_witness; 783 784 /* 785 * If this lock doesn't undergo witness checking, 786 * then skip it. 787 */ 788 if (w1 == NULL) { 789 KASSERT((lock1->li_lock->lo_flags & LO_WITNESS) == 0, 790 ("lock missing witness structure")); 791 continue; 792 } 793 /* 794 * If we are locking Giant and this is a sleepable 795 * lock, then skip it. 796 */ 797 if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0 && 798 lock == &Giant.mtx_object) 799 continue; 800 /* 801 * If we are locking a sleepable lock and this lock 802 * is Giant, then skip it. 803 */ 804 if ((lock->lo_flags & LO_SLEEPABLE) != 0 && 805 lock1->li_lock == &Giant.mtx_object) 806 continue; 807 /* 808 * If we are locking a sleepable lock and this lock 809 * isn't sleepable, we want to treat it as a lock 810 * order violation to enfore a general lock order of 811 * sleepable locks before non-sleepable locks. 812 */ 813 if (!((lock->lo_flags & LO_SLEEPABLE) != 0 && 814 (lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0)) 815 /* 816 * Check the lock order hierarchy for a reveresal. 817 */ 818 if (!isitmydescendant(w, w1)) 819 continue; 820 /* 821 * We have a lock order violation, check to see if it 822 * is allowed or has already been yelled about. 823 */ 824 mtx_unlock_spin(&w_mtx); 825#ifdef BLESSING 826 /* 827 * If the lock order is blessed, just bail. We don't 828 * look for other lock order violations though, which 829 * may be a bug. 830 */ 831 if (blessed(w, w1)) 832 return; 833#endif 834 if (lock1->li_lock == &Giant.mtx_object) { 835 if (w1->w_Giant_squawked) 836 return; 837 else 838 w1->w_Giant_squawked = 1; 839 } else { 840 if (w1->w_other_squawked) 841 return; 842 else 843 w1->w_other_squawked = 1; 844 } 845 /* 846 * Ok, yell about it. 847 */ 848 printf("lock order reversal\n"); 849 /* 850 * Try to locate an earlier lock with 851 * witness w in our list. 852 */ 853 do { 854 lock2 = &lle->ll_children[i]; 855 MPASS(lock2->li_lock != NULL); 856 if (lock2->li_lock->lo_witness == w) 857 break; 858 if (i == 0 && lle->ll_next != NULL) { 859 lle = lle->ll_next; 860 i = lle->ll_count - 1; 861 MPASS(i >= 0 && i < LOCK_NCHILDREN); 862 } else 863 i--; 864 } while (i >= 0); 865 if (i < 0) { 866 printf(" 1st %p %s (%s) @ %s:%d\n", 867 lock1->li_lock, lock1->li_lock->lo_name, 868 lock1->li_lock->lo_type, lock1->li_file, 869 lock1->li_line); 870 printf(" 2nd %p %s (%s) @ %s:%d\n", lock, 871 lock->lo_name, lock->lo_type, file, line); 872 } else { 873 printf(" 1st %p %s (%s) @ %s:%d\n", 874 lock2->li_lock, lock2->li_lock->lo_name, 875 lock2->li_lock->lo_type, lock2->li_file, 876 lock2->li_line); 877 printf(" 2nd %p %s (%s) @ %s:%d\n", 878 lock1->li_lock, lock1->li_lock->lo_name, 879 lock1->li_lock->lo_type, lock1->li_file, 880 lock1->li_line); 881 printf(" 3rd %p %s (%s) @ %s:%d\n", lock, 882 lock->lo_name, lock->lo_type, file, line); 883 } 884#ifdef DDB 885 goto debugger; 886#else 887 return; 888#endif 889 } 890 } 891 lock1 = &(*lock_list)->ll_children[(*lock_list)->ll_count - 1]; 892 /* 893 * If requested, build a new lock order. However, don't build a new 894 * relationship between a sleepable lock and Giant if it is in the 895 * wrong direction. The correct lock order is that sleepable locks 896 * always come before Giant. 897 */ 898 if (flags & LOP_NEWORDER && 899 !(lock1->li_lock == &Giant.mtx_object && 900 (lock->lo_flags & LO_SLEEPABLE) != 0)) { 901 CTR3(KTR_WITNESS, "%s: adding %s as a child of %s", __func__, 902 lock->lo_type, lock1->li_lock->lo_type); 903 if (!itismychild(lock1->li_lock->lo_witness, w)) 904 /* Witness is dead. */ 905 return; 906 } 907 mtx_unlock_spin(&w_mtx); 908 return; 909 910#ifdef DDB 911debugger: 912 if (witness_trace) 913 backtrace(); 914 if (witness_ddb) 915 Debugger(__func__); 916#endif 917} 918 919void 920witness_lock(struct lock_object *lock, int flags, const char *file, int line) 921{ 922 struct lock_list_entry **lock_list, *lle; 923 struct lock_instance *instance; 924 struct witness *w; 925 struct thread *td; 926 927 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL || 928 panicstr != NULL) 929 return; 930 w = lock->lo_witness; 931 td = curthread; 932 file = fixup_filename(file); 933 934 /* Determine lock list for this lock. */ 935 if (lock->lo_class->lc_flags & LC_SLEEPLOCK) 936 lock_list = &td->td_sleeplocks; 937 else 938 lock_list = PCPU_PTR(spinlocks); 939 940 /* Check to see if we are recursing on a lock we already own. */ 941 instance = find_instance(*lock_list, lock); 942 if (instance != NULL) { 943 instance->li_flags++; 944 CTR4(KTR_WITNESS, "%s: pid %d recursed on %s r=%d", __func__, 945 td->td_proc->p_pid, lock->lo_name, 946 instance->li_flags & LI_RECURSEMASK); 947 instance->li_file = file; 948 instance->li_line = line; 949 return; 950 } 951 952 /* Update per-witness last file and line acquire. */ 953 w->w_file = file; 954 w->w_line = line; 955 956 /* Find the next open lock instance in the list and fill it. */ 957 lle = *lock_list; 958 if (lle == NULL || lle->ll_count == LOCK_NCHILDREN) { 959 lle = witness_lock_list_get(); 960 if (lle == NULL) 961 return; 962 lle->ll_next = *lock_list; 963 CTR3(KTR_WITNESS, "%s: pid %d added lle %p", __func__, 964 td->td_proc->p_pid, lle); 965 *lock_list = lle; 966 } 967 instance = &lle->ll_children[lle->ll_count++]; 968 instance->li_lock = lock; 969 instance->li_line = line; 970 instance->li_file = file; 971 if ((flags & LOP_EXCLUSIVE) != 0) 972 instance->li_flags = LI_EXCLUSIVE; 973 else 974 instance->li_flags = 0; 975 CTR4(KTR_WITNESS, "%s: pid %d added %s as lle[%d]", __func__, 976 td->td_proc->p_pid, lock->lo_name, lle->ll_count - 1); 977} 978 979void 980witness_upgrade(struct lock_object *lock, int flags, const char *file, int line) 981{ 982 struct lock_instance *instance; 983 struct lock_class *class; 984 985 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 986 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 987 return; 988 class = lock->lo_class; 989 file = fixup_filename(file); 990 if ((lock->lo_flags & LO_UPGRADABLE) == 0) 991 panic("upgrade of non-upgradable lock (%s) %s @ %s:%d", 992 class->lc_name, lock->lo_name, file, line); 993 if ((flags & LOP_TRYLOCK) == 0) 994 panic("non-try upgrade of lock (%s) %s @ %s:%d", class->lc_name, 995 lock->lo_name, file, line); 996 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0) 997 panic("upgrade of non-sleep lock (%s) %s @ %s:%d", 998 class->lc_name, lock->lo_name, file, line); 999 instance = find_instance(curthread->td_sleeplocks, lock); 1000 if (instance == NULL) 1001 panic("upgrade of unlocked lock (%s) %s @ %s:%d", 1002 class->lc_name, lock->lo_name, file, line); 1003 if ((instance->li_flags & LI_EXCLUSIVE) != 0) 1004 panic("upgrade of exclusive lock (%s) %s @ %s:%d", 1005 class->lc_name, lock->lo_name, file, line); 1006 if ((instance->li_flags & LI_RECURSEMASK) != 0) 1007 panic("upgrade of recursed lock (%s) %s r=%d @ %s:%d", 1008 class->lc_name, lock->lo_name, 1009 instance->li_flags & LI_RECURSEMASK, file, line); 1010 instance->li_flags |= LI_EXCLUSIVE; 1011} 1012 1013void 1014witness_downgrade(struct lock_object *lock, int flags, const char *file, 1015 int line) 1016{ 1017 struct lock_instance *instance; 1018 struct lock_class *class; 1019 1020 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1021 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1022 return; 1023 class = lock->lo_class; 1024 file = fixup_filename(file); 1025 if ((lock->lo_flags & LO_UPGRADABLE) == 0) 1026 panic("downgrade of non-upgradable lock (%s) %s @ %s:%d", 1027 class->lc_name, lock->lo_name, file, line); 1028 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0) 1029 panic("downgrade of non-sleep lock (%s) %s @ %s:%d", 1030 class->lc_name, lock->lo_name, file, line); 1031 instance = find_instance(curthread->td_sleeplocks, lock); 1032 if (instance == NULL) 1033 panic("downgrade of unlocked lock (%s) %s @ %s:%d", 1034 class->lc_name, lock->lo_name, file, line); 1035 if ((instance->li_flags & LI_EXCLUSIVE) == 0) 1036 panic("downgrade of shared lock (%s) %s @ %s:%d", 1037 class->lc_name, lock->lo_name, file, line); 1038 if ((instance->li_flags & LI_RECURSEMASK) != 0) 1039 panic("downgrade of recursed lock (%s) %s r=%d @ %s:%d", 1040 class->lc_name, lock->lo_name, 1041 instance->li_flags & LI_RECURSEMASK, file, line); 1042 instance->li_flags &= ~LI_EXCLUSIVE; 1043} 1044 1045void 1046witness_unlock(struct lock_object *lock, int flags, const char *file, int line) 1047{ 1048 struct lock_list_entry **lock_list, *lle; 1049 struct lock_instance *instance; 1050 struct lock_class *class; 1051 struct thread *td; 1052 register_t s; 1053 int i, j; 1054 1055 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL || 1056 panicstr != NULL) 1057 return; 1058 td = curthread; 1059 class = lock->lo_class; 1060 file = fixup_filename(file); 1061 1062 /* Find lock instance associated with this lock. */ 1063 if (class->lc_flags & LC_SLEEPLOCK) 1064 lock_list = &td->td_sleeplocks; 1065 else 1066 lock_list = PCPU_PTR(spinlocks); 1067 for (; *lock_list != NULL; lock_list = &(*lock_list)->ll_next) 1068 for (i = 0; i < (*lock_list)->ll_count; i++) { 1069 instance = &(*lock_list)->ll_children[i]; 1070 if (instance->li_lock == lock) 1071 goto found; 1072 } 1073 panic("lock (%s) %s not locked @ %s:%d", class->lc_name, lock->lo_name, 1074 file, line); 1075found: 1076 1077 /* First, check for shared/exclusive mismatches. */ 1078 if ((instance->li_flags & LI_EXCLUSIVE) != 0 && 1079 (flags & LOP_EXCLUSIVE) == 0) { 1080 printf("shared unlock of (%s) %s @ %s:%d\n", class->lc_name, 1081 lock->lo_name, file, line); 1082 printf("while exclusively locked from %s:%d\n", 1083 instance->li_file, instance->li_line); 1084 panic("excl->ushare"); 1085 } 1086 if ((instance->li_flags & LI_EXCLUSIVE) == 0 && 1087 (flags & LOP_EXCLUSIVE) != 0) { 1088 printf("exclusive unlock of (%s) %s @ %s:%d\n", class->lc_name, 1089 lock->lo_name, file, line); 1090 printf("while share locked from %s:%d\n", instance->li_file, 1091 instance->li_line); 1092 panic("share->uexcl"); 1093 } 1094 1095 /* If we are recursed, unrecurse. */ 1096 if ((instance->li_flags & LI_RECURSEMASK) > 0) { 1097 CTR4(KTR_WITNESS, "%s: pid %d unrecursed on %s r=%d", __func__, 1098 td->td_proc->p_pid, instance->li_lock->lo_name, 1099 instance->li_flags); 1100 instance->li_flags--; 1101 return; 1102 } 1103 1104 /* Otherwise, remove this item from the list. */ 1105 s = intr_disable(); 1106 CTR4(KTR_WITNESS, "%s: pid %d removed %s from lle[%d]", __func__, 1107 td->td_proc->p_pid, instance->li_lock->lo_name, 1108 (*lock_list)->ll_count - 1); 1109 for (j = i; j < (*lock_list)->ll_count - 1; j++) 1110 (*lock_list)->ll_children[j] = 1111 (*lock_list)->ll_children[j + 1]; 1112 (*lock_list)->ll_count--; 1113 intr_restore(s); 1114 1115 /* If this lock list entry is now empty, free it. */ 1116 if ((*lock_list)->ll_count == 0) { 1117 lle = *lock_list; 1118 *lock_list = lle->ll_next; 1119 CTR3(KTR_WITNESS, "%s: pid %d removed lle %p", __func__, 1120 td->td_proc->p_pid, lle); 1121 witness_lock_list_free(lle); 1122 } 1123} 1124 1125/* 1126 * Warn if any locks other than 'lock' are held. Flags can be passed in to 1127 * exempt Giant and sleepable locks from the checks as well. If any 1128 * non-exempt locks are held, then a supplied message is printed to the 1129 * console along with a list of the offending locks. If indicated in the 1130 * flags then a failure results in a panic as well. 1131 */ 1132int 1133witness_warn(int flags, struct lock_object *lock, const char *fmt, ...) 1134{ 1135 struct lock_list_entry *lle; 1136 struct lock_instance *lock1; 1137 struct thread *td; 1138 va_list ap; 1139 int i, n; 1140 1141 if (witness_cold || witness_watch == 0 || panicstr != NULL) 1142 return (0); 1143 n = 0; 1144 td = curthread; 1145 for (lle = td->td_sleeplocks; lle != NULL; lle = lle->ll_next) 1146 for (i = lle->ll_count - 1; i >= 0; i--) { 1147 lock1 = &lle->ll_children[i]; 1148 if (lock1->li_lock == lock) 1149 continue; 1150 if (flags & WARN_GIANTOK && 1151 lock1->li_lock == &Giant.mtx_object) 1152 continue; 1153 if (flags & WARN_SLEEPOK && 1154 (lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0) 1155 continue; 1156 if (n == 0) { 1157 va_start(ap, fmt); 1158 vprintf(fmt, ap); 1159 va_end(ap); 1160 printf(" with the following"); 1161 if (flags & WARN_SLEEPOK) 1162 printf(" non-sleepable"); 1163 printf(" locks held:\n"); 1164 } 1165 n++; 1166 witness_list_lock(lock1); 1167 } 1168 if (PCPU_GET(spinlocks) != NULL) { 1169 /* 1170 * Since we already hold a spinlock preemption is 1171 * already blocked. 1172 */ 1173 if (n == 0) { 1174 va_start(ap, fmt); 1175 vprintf(fmt, ap); 1176 va_end(ap); 1177 printf(" with the following"); 1178 if (flags & WARN_SLEEPOK) 1179 printf(" non-sleepable"); 1180 printf(" locks held:\n"); 1181 } 1182 n += witness_list_locks(PCPU_PTR(spinlocks)); 1183 } 1184 if (flags & WARN_PANIC && n) 1185 panic("witness_warn"); 1186#ifdef DDB 1187 else if (witness_ddb && n) 1188 Debugger(__func__); 1189 else if (witness_trace && n) 1190 backtrace(); 1191#endif 1192 return (n); 1193} 1194 1195const char * 1196witness_file(struct lock_object *lock) 1197{ 1198 struct witness *w; 1199 1200 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL) 1201 return ("?"); 1202 w = lock->lo_witness; 1203 return (w->w_file); 1204} 1205 1206int 1207witness_line(struct lock_object *lock) 1208{ 1209 struct witness *w; 1210 1211 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL) 1212 return (0); 1213 w = lock->lo_witness; 1214 return (w->w_line); 1215} 1216 1217static struct witness * 1218enroll(const char *description, struct lock_class *lock_class) 1219{ 1220 struct witness *w; 1221 1222 if (witness_watch == 0 || panicstr != NULL) 1223 return (NULL); 1224 if ((lock_class->lc_flags & LC_SPINLOCK) && witness_skipspin) 1225 return (NULL); 1226 mtx_lock_spin(&w_mtx); 1227 STAILQ_FOREACH(w, &w_all, w_list) { 1228 if (w->w_name == description || (w->w_refcount > 0 && 1229 strcmp(description, w->w_name) == 0)) { 1230 w->w_refcount++; 1231 mtx_unlock_spin(&w_mtx); 1232 if (lock_class != w->w_class) 1233 panic( 1234 "lock (%s) %s does not match earlier (%s) lock", 1235 description, lock_class->lc_name, 1236 w->w_class->lc_name); 1237 return (w); 1238 } 1239 } 1240 /* 1241 * This isn't quite right, as witness_cold is still 0 while we 1242 * enroll all the locks initialized before witness_initialize(). 1243 */ 1244 if ((lock_class->lc_flags & LC_SPINLOCK) && !witness_cold) { 1245 mtx_unlock_spin(&w_mtx); 1246 panic("spin lock %s not in order list", description); 1247 } 1248 if ((w = witness_get()) == NULL) 1249 return (NULL); 1250 w->w_name = description; 1251 w->w_class = lock_class; 1252 w->w_refcount = 1; 1253 STAILQ_INSERT_HEAD(&w_all, w, w_list); 1254 if (lock_class->lc_flags & LC_SPINLOCK) 1255 STAILQ_INSERT_HEAD(&w_spin, w, w_typelist); 1256 else if (lock_class->lc_flags & LC_SLEEPLOCK) 1257 STAILQ_INSERT_HEAD(&w_sleep, w, w_typelist); 1258 else { 1259 mtx_unlock_spin(&w_mtx); 1260 panic("lock class %s is not sleep or spin", 1261 lock_class->lc_name); 1262 } 1263 mtx_unlock_spin(&w_mtx); 1264 return (w); 1265} 1266 1267/* Don't let the door bang you on the way out... */ 1268static int 1269depart(struct witness *w) 1270{ 1271 struct witness_child_list_entry *wcl, *nwcl; 1272 struct witness_list *list; 1273 struct witness *parent; 1274 1275 MPASS(w->w_refcount == 0); 1276 if (w->w_class->lc_flags & LC_SLEEPLOCK) 1277 list = &w_sleep; 1278 else 1279 list = &w_spin; 1280 /* 1281 * First, we run through the entire tree looking for any 1282 * witnesses that the outgoing witness is a child of. For 1283 * each parent that we find, we reparent all the direct 1284 * children of the outgoing witness to its parent. 1285 */ 1286 STAILQ_FOREACH(parent, list, w_typelist) { 1287 if (!isitmychild(parent, w)) 1288 continue; 1289 removechild(parent, w); 1290 if (!reparentchildren(parent, w)) 1291 return (0); 1292 } 1293 1294 /* 1295 * Now we go through and free up the child list of the 1296 * outgoing witness. 1297 */ 1298 for (wcl = w->w_children; wcl != NULL; wcl = nwcl) { 1299 nwcl = wcl->wcl_next; 1300 witness_child_free(wcl); 1301 } 1302 1303 /* 1304 * Detach from various lists and free. 1305 */ 1306 STAILQ_REMOVE(list, w, witness, w_typelist); 1307 STAILQ_REMOVE(&w_all, w, witness, w_list); 1308 witness_free(w); 1309 1310 /* Finally, fixup the tree. */ 1311 return (rebalancetree(list)); 1312} 1313 1314/* 1315 * Prune an entire lock order tree. We look for cases where a lock 1316 * is now both a descendant and a direct child of a given lock. In 1317 * that case, we want to remove the direct child link from the tree. 1318 * 1319 * Returns false if insertchild() fails. 1320 */ 1321static int 1322rebalancetree(struct witness_list *list) 1323{ 1324 struct witness *child, *parent; 1325 1326 STAILQ_FOREACH(child, list, w_typelist) { 1327 STAILQ_FOREACH(parent, list, w_typelist) { 1328 if (!isitmychild(parent, child)) 1329 continue; 1330 removechild(parent, child); 1331 if (isitmydescendant(parent, child)) 1332 continue; 1333 if (!insertchild(parent, child)) 1334 return (0); 1335 } 1336 } 1337 witness_levelall(); 1338 return (1); 1339} 1340 1341/* 1342 * Add "child" as a direct child of "parent". Returns false if 1343 * we fail due to out of memory. 1344 */ 1345static int 1346insertchild(struct witness *parent, struct witness *child) 1347{ 1348 struct witness_child_list_entry **wcl; 1349 1350 MPASS(child != NULL && parent != NULL); 1351 1352 /* 1353 * Insert "child" after "parent" 1354 */ 1355 wcl = &parent->w_children; 1356 while (*wcl != NULL && (*wcl)->wcl_count == WITNESS_NCHILDREN) 1357 wcl = &(*wcl)->wcl_next; 1358 if (*wcl == NULL) { 1359 *wcl = witness_child_get(); 1360 if (*wcl == NULL) 1361 return (0); 1362 } 1363 (*wcl)->wcl_children[(*wcl)->wcl_count++] = child; 1364 1365 return (1); 1366} 1367 1368/* 1369 * Make all the direct descendants of oldparent be direct descendants 1370 * of newparent. 1371 */ 1372static int 1373reparentchildren(struct witness *newparent, struct witness *oldparent) 1374{ 1375 struct witness_child_list_entry *wcl; 1376 int i; 1377 1378 /* Avoid making a witness a child of itself. */ 1379 MPASS(!isitmychild(oldparent, newparent)); 1380 1381 for (wcl = oldparent->w_children; wcl != NULL; wcl = wcl->wcl_next) 1382 for (i = 0; i < wcl->wcl_count; i++) 1383 if (!insertchild(newparent, wcl->wcl_children[i])) 1384 return (0); 1385 return (1); 1386} 1387 1388static int 1389itismychild(struct witness *parent, struct witness *child) 1390{ 1391 struct witness_list *list; 1392 1393 MPASS(child != NULL && parent != NULL); 1394 if ((parent->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)) != 1395 (child->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK))) 1396 panic( 1397 "%s: parent (%s) and child (%s) are not the same lock type", 1398 __func__, parent->w_class->lc_name, 1399 child->w_class->lc_name); 1400 1401 if (!insertchild(parent, child)) 1402 return (0); 1403 1404 if (parent->w_class->lc_flags & LC_SLEEPLOCK) 1405 list = &w_sleep; 1406 else 1407 list = &w_spin; 1408 return (rebalancetree(list)); 1409} 1410 1411static void 1412removechild(struct witness *parent, struct witness *child) 1413{ 1414 struct witness_child_list_entry **wcl, *wcl1; 1415 int i; 1416 1417 for (wcl = &parent->w_children; *wcl != NULL; wcl = &(*wcl)->wcl_next) 1418 for (i = 0; i < (*wcl)->wcl_count; i++) 1419 if ((*wcl)->wcl_children[i] == child) 1420 goto found; 1421 return; 1422found: 1423 (*wcl)->wcl_count--; 1424 if ((*wcl)->wcl_count > i) 1425 (*wcl)->wcl_children[i] = 1426 (*wcl)->wcl_children[(*wcl)->wcl_count]; 1427 MPASS((*wcl)->wcl_children[i] != NULL); 1428 if ((*wcl)->wcl_count != 0) 1429 return; 1430 wcl1 = *wcl; 1431 *wcl = wcl1->wcl_next; 1432 witness_child_free(wcl1); 1433} 1434 1435static int 1436isitmychild(struct witness *parent, struct witness *child) 1437{ 1438 struct witness_child_list_entry *wcl; 1439 int i; 1440 1441 for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) { 1442 for (i = 0; i < wcl->wcl_count; i++) { 1443 if (wcl->wcl_children[i] == child) 1444 return (1); 1445 } 1446 } 1447 return (0); 1448} 1449 1450static int 1451isitmydescendant(struct witness *parent, struct witness *child) 1452{ 1453 struct witness_child_list_entry *wcl; 1454 int i, j; 1455 1456 if (isitmychild(parent, child)) 1457 return (1); 1458 j = 0; 1459 for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) { 1460 MPASS(j < 1000); 1461 for (i = 0; i < wcl->wcl_count; i++) { 1462 if (isitmydescendant(wcl->wcl_children[i], child)) 1463 return (1); 1464 } 1465 j++; 1466 } 1467 return (0); 1468} 1469 1470static void 1471witness_levelall (void) 1472{ 1473 struct witness_list *list; 1474 struct witness *w, *w1; 1475 1476 /* 1477 * First clear all levels. 1478 */ 1479 STAILQ_FOREACH(w, &w_all, w_list) { 1480 w->w_level = 0; 1481 } 1482 1483 /* 1484 * Look for locks with no parent and level all their descendants. 1485 */ 1486 STAILQ_FOREACH(w, &w_all, w_list) { 1487 /* 1488 * This is just an optimization, technically we could get 1489 * away just walking the all list each time. 1490 */ 1491 if (w->w_class->lc_flags & LC_SLEEPLOCK) 1492 list = &w_sleep; 1493 else 1494 list = &w_spin; 1495 STAILQ_FOREACH(w1, list, w_typelist) { 1496 if (isitmychild(w1, w)) 1497 goto skip; 1498 } 1499 witness_leveldescendents(w, 0); 1500 skip: 1501 ; /* silence GCC 3.x */ 1502 } 1503} 1504 1505static void 1506witness_leveldescendents(struct witness *parent, int level) 1507{ 1508 struct witness_child_list_entry *wcl; 1509 int i; 1510 1511 if (parent->w_level < level) 1512 parent->w_level = level; 1513 level++; 1514 for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) 1515 for (i = 0; i < wcl->wcl_count; i++) 1516 witness_leveldescendents(wcl->wcl_children[i], level); 1517} 1518 1519static void 1520witness_displaydescendants(void(*prnt)(const char *fmt, ...), 1521 struct witness *parent, int indent) 1522{ 1523 struct witness_child_list_entry *wcl; 1524 int i, level; 1525 1526 level = parent->w_level; 1527 prnt("%-2d", level); 1528 for (i = 0; i < indent; i++) 1529 prnt(" "); 1530 if (parent->w_refcount > 0) 1531 prnt("%s", parent->w_name); 1532 else 1533 prnt("(dead)"); 1534 if (parent->w_displayed) { 1535 prnt(" -- (already displayed)\n"); 1536 return; 1537 } 1538 parent->w_displayed = 1; 1539 if (parent->w_refcount > 0) { 1540 if (parent->w_file != NULL) 1541 prnt(" -- last acquired @ %s:%d", parent->w_file, 1542 parent->w_line); 1543 } 1544 prnt("\n"); 1545 for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) 1546 for (i = 0; i < wcl->wcl_count; i++) 1547 witness_displaydescendants(prnt, 1548 wcl->wcl_children[i], indent + 1); 1549} 1550 1551#ifdef BLESSING 1552static int 1553blessed(struct witness *w1, struct witness *w2) 1554{ 1555 int i; 1556 struct witness_blessed *b; 1557 1558 for (i = 0; i < blessed_count; i++) { 1559 b = &blessed_list[i]; 1560 if (strcmp(w1->w_name, b->b_lock1) == 0) { 1561 if (strcmp(w2->w_name, b->b_lock2) == 0) 1562 return (1); 1563 continue; 1564 } 1565 if (strcmp(w1->w_name, b->b_lock2) == 0) 1566 if (strcmp(w2->w_name, b->b_lock1) == 0) 1567 return (1); 1568 } 1569 return (0); 1570} 1571#endif 1572 1573static struct witness * 1574witness_get(void) 1575{ 1576 struct witness *w; 1577 1578 if (witness_watch == 0) { 1579 mtx_unlock_spin(&w_mtx); 1580 return (NULL); 1581 } 1582 if (STAILQ_EMPTY(&w_free)) { 1583 witness_watch = 0; 1584 mtx_unlock_spin(&w_mtx); 1585 printf("%s: witness exhausted\n", __func__); 1586 return (NULL); 1587 } 1588 w = STAILQ_FIRST(&w_free); 1589 STAILQ_REMOVE_HEAD(&w_free, w_list); 1590 bzero(w, sizeof(*w)); 1591 return (w); 1592} 1593 1594static void 1595witness_free(struct witness *w) 1596{ 1597 1598 STAILQ_INSERT_HEAD(&w_free, w, w_list); 1599} 1600 1601static struct witness_child_list_entry * 1602witness_child_get(void) 1603{ 1604 struct witness_child_list_entry *wcl; 1605 1606 if (witness_watch == 0) { 1607 mtx_unlock_spin(&w_mtx); 1608 return (NULL); 1609 } 1610 wcl = w_child_free; 1611 if (wcl == NULL) { 1612 witness_watch = 0; 1613 mtx_unlock_spin(&w_mtx); 1614 printf("%s: witness exhausted\n", __func__); 1615 return (NULL); 1616 } 1617 w_child_free = wcl->wcl_next; 1618 bzero(wcl, sizeof(*wcl)); 1619 return (wcl); 1620} 1621 1622static void 1623witness_child_free(struct witness_child_list_entry *wcl) 1624{ 1625 1626 wcl->wcl_next = w_child_free; 1627 w_child_free = wcl; 1628} 1629 1630static struct lock_list_entry * 1631witness_lock_list_get(void) 1632{ 1633 struct lock_list_entry *lle; 1634 1635 if (witness_watch == 0) 1636 return (NULL); 1637 mtx_lock_spin(&w_mtx); 1638 lle = w_lock_list_free; 1639 if (lle == NULL) { 1640 witness_watch = 0; 1641 mtx_unlock_spin(&w_mtx); 1642 printf("%s: witness exhausted\n", __func__); 1643 return (NULL); 1644 } 1645 w_lock_list_free = lle->ll_next; 1646 mtx_unlock_spin(&w_mtx); 1647 bzero(lle, sizeof(*lle)); 1648 return (lle); 1649} 1650 1651static void 1652witness_lock_list_free(struct lock_list_entry *lle) 1653{ 1654 1655 mtx_lock_spin(&w_mtx); 1656 lle->ll_next = w_lock_list_free; 1657 w_lock_list_free = lle; 1658 mtx_unlock_spin(&w_mtx); 1659} 1660 1661static struct lock_instance * 1662find_instance(struct lock_list_entry *lock_list, struct lock_object *lock) 1663{ 1664 struct lock_list_entry *lle; 1665 struct lock_instance *instance; 1666 int i; 1667 1668 for (lle = lock_list; lle != NULL; lle = lle->ll_next) 1669 for (i = lle->ll_count - 1; i >= 0; i--) { 1670 instance = &lle->ll_children[i]; 1671 if (instance->li_lock == lock) 1672 return (instance); 1673 } 1674 return (NULL); 1675} 1676 1677static void 1678witness_list_lock(struct lock_instance *instance) 1679{ 1680 struct lock_object *lock; 1681 1682 lock = instance->li_lock; 1683 printf("%s %s %s", (instance->li_flags & LI_EXCLUSIVE) != 0 ? 1684 "exclusive" : "shared", lock->lo_class->lc_name, lock->lo_name); 1685 if (lock->lo_type != lock->lo_name) 1686 printf(" (%s)", lock->lo_type); 1687 printf(" r = %d (%p) locked @ %s:%d\n", 1688 instance->li_flags & LI_RECURSEMASK, lock, instance->li_file, 1689 instance->li_line); 1690} 1691 1692int 1693witness_list_locks(struct lock_list_entry **lock_list) 1694{ 1695 struct lock_list_entry *lle; 1696 int i, nheld; 1697 1698 nheld = 0; 1699 for (lle = *lock_list; lle != NULL; lle = lle->ll_next) 1700 for (i = lle->ll_count - 1; i >= 0; i--) { 1701 witness_list_lock(&lle->ll_children[i]); 1702 nheld++; 1703 } 1704 return (nheld); 1705} 1706 1707/* 1708 * This is a bit risky at best. We call this function when we have timed 1709 * out acquiring a spin lock, and we assume that the other CPU is stuck 1710 * with this lock held. So, we go groveling around in the other CPU's 1711 * per-cpu data to try to find the lock instance for this spin lock to 1712 * see when it was last acquired. 1713 */ 1714void 1715witness_display_spinlock(struct lock_object *lock, struct thread *owner) 1716{ 1717 struct lock_instance *instance; 1718 struct pcpu *pc; 1719 1720 if (owner->td_critnest == 0 || owner->td_oncpu == NOCPU) 1721 return; 1722 pc = pcpu_find(owner->td_oncpu); 1723 instance = find_instance(pc->pc_spinlocks, lock); 1724 if (instance != NULL) 1725 witness_list_lock(instance); 1726} 1727 1728void 1729witness_save(struct lock_object *lock, const char **filep, int *linep) 1730{ 1731 struct lock_instance *instance; 1732 1733 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1734 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1735 return; 1736 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0) 1737 panic("%s: lock (%s) %s is not a sleep lock", __func__, 1738 lock->lo_class->lc_name, lock->lo_name); 1739 instance = find_instance(curthread->td_sleeplocks, lock); 1740 if (instance == NULL) 1741 panic("%s: lock (%s) %s not locked", __func__, 1742 lock->lo_class->lc_name, lock->lo_name); 1743 *filep = instance->li_file; 1744 *linep = instance->li_line; 1745} 1746 1747void 1748witness_restore(struct lock_object *lock, const char *file, int line) 1749{ 1750 struct lock_instance *instance; 1751 1752 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1753 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1754 return; 1755 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0) 1756 panic("%s: lock (%s) %s is not a sleep lock", __func__, 1757 lock->lo_class->lc_name, lock->lo_name); 1758 instance = find_instance(curthread->td_sleeplocks, lock); 1759 if (instance == NULL) 1760 panic("%s: lock (%s) %s not locked", __func__, 1761 lock->lo_class->lc_name, lock->lo_name); 1762 lock->lo_witness->w_file = file; 1763 lock->lo_witness->w_line = line; 1764 instance->li_file = file; 1765 instance->li_line = line; 1766} 1767 1768void 1769witness_assert(struct lock_object *lock, int flags, const char *file, int line) 1770{ 1771#ifdef INVARIANT_SUPPORT 1772 struct lock_instance *instance; 1773 1774 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1775 return; 1776 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) != 0) 1777 instance = find_instance(curthread->td_sleeplocks, lock); 1778 else if ((lock->lo_class->lc_flags & LC_SPINLOCK) != 0) 1779 instance = find_instance(PCPU_GET(spinlocks), lock); 1780 else { 1781 panic("Lock (%s) %s is not sleep or spin!", 1782 lock->lo_class->lc_name, lock->lo_name); 1783 } 1784 file = fixup_filename(file); 1785 switch (flags) { 1786 case LA_UNLOCKED: 1787 if (instance != NULL) 1788 panic("Lock (%s) %s locked @ %s:%d.", 1789 lock->lo_class->lc_name, lock->lo_name, file, line); 1790 break; 1791 case LA_LOCKED: 1792 case LA_LOCKED | LA_RECURSED: 1793 case LA_LOCKED | LA_NOTRECURSED: 1794 case LA_SLOCKED: 1795 case LA_SLOCKED | LA_RECURSED: 1796 case LA_SLOCKED | LA_NOTRECURSED: 1797 case LA_XLOCKED: 1798 case LA_XLOCKED | LA_RECURSED: 1799 case LA_XLOCKED | LA_NOTRECURSED: 1800 if (instance == NULL) { 1801 panic("Lock (%s) %s not locked @ %s:%d.", 1802 lock->lo_class->lc_name, lock->lo_name, file, line); 1803 break; 1804 } 1805 if ((flags & LA_XLOCKED) != 0 && 1806 (instance->li_flags & LI_EXCLUSIVE) == 0) 1807 panic("Lock (%s) %s not exclusively locked @ %s:%d.", 1808 lock->lo_class->lc_name, lock->lo_name, file, line); 1809 if ((flags & LA_SLOCKED) != 0 && 1810 (instance->li_flags & LI_EXCLUSIVE) != 0) 1811 panic("Lock (%s) %s exclusively locked @ %s:%d.", 1812 lock->lo_class->lc_name, lock->lo_name, file, line); 1813 if ((flags & LA_RECURSED) != 0 && 1814 (instance->li_flags & LI_RECURSEMASK) == 0) 1815 panic("Lock (%s) %s not recursed @ %s:%d.", 1816 lock->lo_class->lc_name, lock->lo_name, file, line); 1817 if ((flags & LA_NOTRECURSED) != 0 && 1818 (instance->li_flags & LI_RECURSEMASK) != 0) 1819 panic("Lock (%s) %s recursed @ %s:%d.", 1820 lock->lo_class->lc_name, lock->lo_name, file, line); 1821 break; 1822 default: 1823 panic("Invalid lock assertion at %s:%d.", file, line); 1824 1825 } 1826#endif /* INVARIANT_SUPPORT */ 1827} 1828 1829#ifdef DDB 1830static void 1831witness_list(struct thread *td) 1832{ 1833 1834 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1835 KASSERT(db_active, ("%s: not in the debugger", __func__)); 1836 1837 if (witness_watch == 0) 1838 return; 1839 1840 witness_list_locks(&td->td_sleeplocks); 1841 1842 /* 1843 * We only handle spinlocks if td == curthread. This is somewhat broken 1844 * if td is currently executing on some other CPU and holds spin locks 1845 * as we won't display those locks. If we had a MI way of getting 1846 * the per-cpu data for a given cpu then we could use 1847 * td->td_oncpu to get the list of spinlocks for this thread 1848 * and "fix" this. 1849 * 1850 * That still wouldn't really fix this unless we locked sched_lock 1851 * or stopped the other CPU to make sure it wasn't changing the list 1852 * out from under us. It is probably best to just not try to handle 1853 * threads on other CPU's for now. 1854 */ 1855 if (td == curthread && PCPU_GET(spinlocks) != NULL) 1856 witness_list_locks(PCPU_PTR(spinlocks)); 1857} 1858 1859DB_SHOW_COMMAND(locks, db_witness_list) 1860{ 1861 struct thread *td; 1862 pid_t pid; 1863 struct proc *p; 1864 1865 if (have_addr) { 1866 pid = (addr % 16) + ((addr >> 4) % 16) * 10 + 1867 ((addr >> 8) % 16) * 100 + ((addr >> 12) % 16) * 1000 + 1868 ((addr >> 16) % 16) * 10000; 1869 /* sx_slock(&allproc_lock); */ 1870 FOREACH_PROC_IN_SYSTEM(p) { 1871 if (p->p_pid == pid) 1872 break; 1873 } 1874 /* sx_sunlock(&allproc_lock); */ 1875 if (p == NULL) { 1876 db_printf("pid %d not found\n", pid); 1877 return; 1878 } 1879 FOREACH_THREAD_IN_PROC(p, td) { 1880 witness_list(td); 1881 } 1882 } else { 1883 td = curthread; 1884 witness_list(td); 1885 } 1886} 1887 1888DB_SHOW_COMMAND(witness, db_witness_display) 1889{ 1890 1891 witness_display(db_printf); 1892} 1893#endif 1894