1/* $OpenBSD: subr_witness.c,v 1.53 2024/06/03 14:34:19 claudio Exp $ */ 2 3/*- 4 * Copyright (c) 2008 Isilon Systems, Inc. 5 * Copyright (c) 2008 Ilya Maykov <ivmaykov@gmail.com> 6 * Copyright (c) 1998 Berkeley Software Design, Inc. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Berkeley Software Design Inc's name may not be used to endorse or 18 * promote products derived from this software without specific prior 19 * written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * from BSDI Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp 34 * and BSDI Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp 35 */ 36 37/* 38 * Implementation of the `witness' lock verifier. Originally implemented for 39 * mutexes in BSD/OS. Extended to handle generic lock objects and lock 40 * classes in FreeBSD. 41 */ 42 43/* 44 * Main Entry: witness 45 * Pronunciation: 'wit-n&s 46 * Function: noun 47 * Etymology: Middle English witnesse, from Old English witnes knowledge, 48 * testimony, witness, from 2wit 49 * Date: before 12th century 50 * 1 : attestation of a fact or event : TESTIMONY 51 * 2 : one that gives evidence; specifically : one who testifies in 52 * a cause or before a judicial tribunal 53 * 3 : one asked to be present at a transaction so as to be able to 54 * testify to its having taken place 55 * 4 : one who has personal knowledge of something 56 * 5 a : something serving as evidence or proof : SIGN 57 * b : public affirmation by word or example of usually 58 * religious faith or conviction <the heroic witness to divine 59 * life -- Pilot> 60 * 6 capitalized : a member of the Jehovah's Witnesses 61 */ 62 63/* 64 * Special rules concerning Giant and lock orders: 65 * 66 * 1) Giant must be acquired before any other mutexes. Stated another way, 67 * no other mutex may be held when Giant is acquired. 68 * 69 * 2) Giant must be released when blocking on a sleepable lock. 70 * 71 * This rule is less obvious, but is a result of Giant providing the same 72 * semantics as spl(). Basically, when a thread sleeps, it must release 73 * Giant. When a thread blocks on a sleepable lock, it sleeps. Hence rule 74 * 2). 75 * 76 * 3) Giant may be acquired before or after sleepable locks. 77 * 78 * This rule is also not quite as obvious. Giant may be acquired after 79 * a sleepable lock because it is a non-sleepable lock and non-sleepable 80 * locks may always be acquired while holding a sleepable lock. The second 81 * case, Giant before a sleepable lock, follows from rule 2) above. Suppose 82 * you have two threads T1 and T2 and a sleepable lock X. Suppose that T1 83 * acquires X and blocks on Giant. Then suppose that T2 acquires Giant and 84 * blocks on X. When T2 blocks on X, T2 will release Giant allowing T1 to 85 * execute. Thus, acquiring Giant both before and after a sleepable lock 86 * will not result in a lock order reversal. 87 */ 88 89#include <sys/param.h> 90#include <sys/systm.h> 91#include <sys/kernel.h> 92#include <sys/malloc.h> 93#ifdef MULTIPROCESSOR 94#include <sys/mplock.h> 95#endif 96#include <sys/mutex.h> 97#include <sys/percpu.h> 98#include <sys/proc.h> 99#include <sys/sched.h> 100#include <sys/stacktrace.h> 101#include <sys/stdint.h> 102#include <sys/sysctl.h> 103#include <sys/syslog.h> 104#include <sys/witness.h> 105 106#include <machine/cpu.h> 107 108#include <uvm/uvm_extern.h> /* uvm_pageboot_alloc */ 109 110#ifndef DDB 111#error "DDB is required for WITNESS" 112#endif 113 114#include <machine/db_machdep.h> 115 116#include <ddb/db_access.h> 117#include <ddb/db_var.h> 118#include <ddb/db_output.h> 119 120#define LI_RECURSEMASK 0x0000ffff /* Recursion depth of lock instance. */ 121#define LI_EXCLUSIVE 0x00010000 /* Exclusive lock instance. */ 122#define LI_NORELEASE 0x00020000 /* Lock not allowed to be released. */ 123 124#ifndef WITNESS_COUNT 125#define WITNESS_COUNT 1536 126#endif 127#define WITNESS_HASH_SIZE 251 /* Prime, gives load factor < 2 */ 128#define WITNESS_PENDLIST (1024 + MAXCPUS) 129 130/* Allocate 256 KB of stack data space */ 131#define WITNESS_LO_DATA_COUNT 2048 132 133/* Prime, gives load factor of ~2 at full load */ 134#define WITNESS_LO_HASH_SIZE 1021 135 136/* 137 * XXX: This is somewhat bogus, as we assume here that at most 2048 threads 138 * will hold LOCK_NCHILDREN locks. We handle failure ok, and we should 139 * probably be safe for the most part, but it's still a SWAG. 140 */ 141#define LOCK_NCHILDREN 5 142#define LOCK_CHILDCOUNT 2048 143 144#define FULLGRAPH_SBUF_SIZE 512 145 146/* 147 * These flags go in the witness relationship matrix and describe the 148 * relationship between any two struct witness objects. 149 */ 150#define WITNESS_UNRELATED 0x00 /* No lock order relation. */ 151#define WITNESS_PARENT 0x01 /* Parent, aka direct ancestor. */ 152#define WITNESS_ANCESTOR 0x02 /* Direct or indirect ancestor. */ 153#define WITNESS_CHILD 0x04 /* Child, aka direct descendant. */ 154#define WITNESS_DESCENDANT 0x08 /* Direct or indirect descendant. */ 155#define WITNESS_ANCESTOR_MASK (WITNESS_PARENT | WITNESS_ANCESTOR) 156#define WITNESS_DESCENDANT_MASK (WITNESS_CHILD | WITNESS_DESCENDANT) 157#define WITNESS_RELATED_MASK \ 158 (WITNESS_ANCESTOR_MASK | WITNESS_DESCENDANT_MASK) 159#define WITNESS_REVERSAL 0x10 /* A lock order reversal has been 160 * observed. */ 161#define WITNESS_RESERVED1 0x20 /* Unused flag, reserved. */ 162#define WITNESS_RESERVED2 0x40 /* Unused flag, reserved. */ 163#define WITNESS_LOCK_ORDER_KNOWN 0x80 /* This lock order is known. */ 164 165/* Descendant to ancestor flags */ 166#define WITNESS_DTOA(x) (((x) & WITNESS_RELATED_MASK) >> 2) 167 168/* Ancestor to descendant flags */ 169#define WITNESS_ATOD(x) (((x) & WITNESS_RELATED_MASK) << 2) 170 171#define WITNESS_INDEX_ASSERT(i) \ 172 KASSERT((i) > 0 && (i) <= w_max_used_index && (i) < witness_count) 173 174/* 175 * Lock classes. Each lock has a class which describes characteristics 176 * common to all types of locks of a given class. 177 * 178 * Spin locks in general must always protect against preemption, as it is 179 * an error to perform any type of context switch while holding a spin lock. 180 * Also, for an individual lock to be recursable, its class must allow 181 * recursion and the lock itself must explicitly allow recursion. 182 */ 183 184struct lock_class { 185 const char *lc_name; 186 u_int lc_flags; 187}; 188 189union lock_stack { 190 union lock_stack *ls_next; 191 struct stacktrace ls_stack; 192}; 193 194#define LC_SLEEPLOCK 0x00000001 /* Sleep lock. */ 195#define LC_SPINLOCK 0x00000002 /* Spin lock. */ 196#define LC_SLEEPABLE 0x00000004 /* Sleeping allowed with this lock. */ 197#define LC_RECURSABLE 0x00000008 /* Locks of this type may recurse. */ 198#define LC_UPGRADABLE 0x00000010 /* Upgrades and downgrades permitted. */ 199 200/* 201 * Lock instances. A lock instance is the data associated with a lock while 202 * it is held by witness. For example, a lock instance will hold the 203 * recursion count of a lock. Lock instances are held in lists. Spin locks 204 * are held in a per-cpu list while sleep locks are held in per-thread list. 205 */ 206struct lock_instance { 207 struct lock_object *li_lock; 208 union lock_stack *li_stack; 209 u_int li_flags; 210}; 211 212/* 213 * A simple list type used to build the list of locks held by a thread 214 * or CPU. We can't simply embed the list in struct lock_object since a 215 * lock may be held by more than one thread if it is a shared lock. Locks 216 * are added to the head of the list, so we fill up each list entry from 217 * "the back" logically. To ease some of the arithmetic, we actually fill 218 * in each list entry the normal way (children[0] then children[1], etc.) but 219 * when we traverse the list we read children[count-1] as the first entry 220 * down to children[0] as the final entry. 221 */ 222struct lock_list_entry { 223 struct lock_list_entry *ll_next; 224 struct lock_instance ll_children[LOCK_NCHILDREN]; 225 int ll_count; 226}; 227 228/* 229 * The main witness structure. One of these per named lock type in the system 230 * (for example, "vnode interlock"). 231 */ 232struct witness { 233 const struct lock_type *w_type; 234 const char *w_subtype; 235 uint32_t w_index; /* Index in the relationship matrix */ 236 struct lock_class *w_class; 237 SLIST_ENTRY(witness) w_list; /* List of all witnesses. */ 238 SLIST_ENTRY(witness) w_typelist; /* Witnesses of a type. */ 239 SLIST_ENTRY(witness) w_hash_next; /* Linked list in 240 * hash buckets. */ 241 uint16_t w_num_ancestors; /* direct/indirect 242 * ancestor count */ 243 uint16_t w_num_descendants; /* direct/indirect 244 * descendant count */ 245 int16_t w_ddb_level; 246 unsigned w_acquired:1; 247 unsigned w_displayed:1; 248 unsigned w_reversed:1; 249}; 250 251SLIST_HEAD(witness_list, witness); 252 253/* 254 * The witness hash table. Keys are witness names (const char *), elements are 255 * witness objects (struct witness *). 256 */ 257struct witness_hash { 258 struct witness_list wh_array[WITNESS_HASH_SIZE]; 259 uint32_t wh_size; 260 uint32_t wh_count; 261}; 262 263/* 264 * Key type for the lock order data hash table. 265 */ 266struct witness_lock_order_key { 267 uint16_t from; 268 uint16_t to; 269}; 270 271struct witness_lock_order_data { 272 struct stacktrace wlod_stack; 273 struct witness_lock_order_key wlod_key; 274 struct witness_lock_order_data *wlod_next; 275}; 276 277/* 278 * The witness lock order data hash table. Keys are witness index tuples 279 * (struct witness_lock_order_key), elements are lock order data objects 280 * (struct witness_lock_order_data). 281 */ 282struct witness_lock_order_hash { 283 struct witness_lock_order_data *wloh_array[WITNESS_LO_HASH_SIZE]; 284 u_int wloh_size; 285 u_int wloh_count; 286}; 287 288struct witness_pendhelp { 289 const struct lock_type *wh_type; 290 struct lock_object *wh_lock; 291}; 292 293struct witness_cpu { 294 struct lock_list_entry *wc_spinlocks; 295 struct lock_list_entry *wc_lle_cache; 296 union lock_stack *wc_stk_cache; 297 unsigned int wc_lle_count; 298 unsigned int wc_stk_count; 299} __aligned(CACHELINESIZE); 300 301#define WITNESS_LLE_CACHE_MAX 8 302#define WITNESS_STK_CACHE_MAX (WITNESS_LLE_CACHE_MAX * LOCK_NCHILDREN) 303 304struct witness_cpu witness_cpu[MAXCPUS]; 305 306/* 307 * Returns 0 if one of the locks is a spin lock and the other is not. 308 * Returns 1 otherwise. 309 */ 310static __inline int 311witness_lock_type_equal(struct witness *w1, struct witness *w2) 312{ 313 314 return ((w1->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)) == 315 (w2->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK))); 316} 317 318static __inline int 319witness_lock_order_key_equal(const struct witness_lock_order_key *a, 320 const struct witness_lock_order_key *b) 321{ 322 323 return (a->from == b->from && a->to == b->to); 324} 325 326static int _isitmyx(struct witness *w1, struct witness *w2, int rmask, 327 const char *fname); 328static void adopt(struct witness *parent, struct witness *child); 329static struct witness *enroll(const struct lock_type *, const char *, 330 struct lock_class *); 331static struct lock_instance *find_instance(struct lock_list_entry *list, 332 const struct lock_object *lock); 333static int isitmychild(struct witness *parent, struct witness *child); 334static int isitmydescendant(struct witness *parent, struct witness *child); 335static void itismychild(struct witness *parent, struct witness *child); 336#ifdef DDB 337static void db_witness_add_fullgraph(struct witness *parent); 338static void witness_ddb_compute_levels(void); 339static void witness_ddb_display(int(*)(const char *fmt, ...)); 340static void witness_ddb_display_descendants(int(*)(const char *fmt, ...), 341 struct witness *, int indent); 342static void witness_ddb_display_list(int(*prnt)(const char *fmt, ...), 343 struct witness_list *list); 344static void witness_ddb_level_descendants(struct witness *parent, int l); 345static void witness_ddb_list(struct proc *td); 346#endif 347static int witness_alloc_stacks(void); 348static void witness_debugger(int dump); 349static void witness_free(struct witness *m); 350static struct witness *witness_get(void); 351static uint32_t witness_hash_djb2(const uint8_t *key, uint32_t size); 352static struct witness *witness_hash_get(const struct lock_type *, 353 const char *); 354static void witness_hash_put(struct witness *w); 355static void witness_init_hash_tables(void); 356static void witness_increment_graph_generation(void); 357static int witness_list_locks(struct lock_list_entry **, 358 int (*)(const char *, ...)); 359static void witness_lock_list_free(struct lock_list_entry *lle); 360static struct lock_list_entry *witness_lock_list_get(void); 361static void witness_lock_stack_free(union lock_stack *stack); 362static union lock_stack *witness_lock_stack_get(void); 363static int witness_lock_order_add(struct witness *parent, 364 struct witness *child); 365static int witness_lock_order_check(struct witness *parent, 366 struct witness *child); 367static struct witness_lock_order_data *witness_lock_order_get( 368 struct witness *parent, 369 struct witness *child); 370static void witness_list_lock(struct lock_instance *instance, 371 int (*prnt)(const char *fmt, ...)); 372static void witness_print_cycle(int (*prnt)(const char *fmt, ...), 373 struct witness *parent, struct witness *child); 374static void witness_print_cycle_edge(int (*prnt)(const char *fmt, ...), 375 struct witness *parent, struct witness *child, 376 int step, int last); 377static int witness_search(struct witness *w, struct witness *target, 378 struct witness **path, int depth, int *remaining); 379static void witness_setflag(struct lock_object *lock, int flag, int set); 380 381/* 382 * If set to 0, lock order checking is disabled. If set to -1, 383 * witness is completely disabled. Otherwise witness performs full 384 * lock order checking for all locks. At runtime, lock order checking 385 * may be toggled. However, witness cannot be reenabled once it is 386 * completely disabled. 387 */ 388#ifdef WITNESS_WATCH 389static int witness_watch = 3; 390#else 391static int witness_watch = 2; 392#endif 393 394#ifdef WITNESS_LOCKTRACE 395static int witness_locktrace = 1; 396#else 397static int witness_locktrace = 0; 398#endif 399 400int witness_count = WITNESS_COUNT; 401int witness_uninitialized_report = 5; 402 403static struct mutex w_mtx; 404static struct rwlock w_ctlock = RWLOCK_INITIALIZER("w_ctlock"); 405 406/* w_list */ 407static struct witness_list w_free = SLIST_HEAD_INITIALIZER(w_free); 408static struct witness_list w_all = SLIST_HEAD_INITIALIZER(w_all); 409 410/* w_typelist */ 411static struct witness_list w_spin = SLIST_HEAD_INITIALIZER(w_spin); 412static struct witness_list w_sleep = SLIST_HEAD_INITIALIZER(w_sleep); 413 414/* lock list */ 415static struct lock_list_entry *w_lock_list_free = NULL; 416static struct witness_pendhelp pending_locks[WITNESS_PENDLIST]; 417static u_int pending_cnt; 418 419static int w_free_cnt, w_spin_cnt, w_sleep_cnt; 420 421static struct witness *w_data; 422static uint8_t **w_rmatrix; 423static struct lock_list_entry *w_locklistdata; 424static struct witness_hash w_hash; /* The witness hash table. */ 425 426/* The lock order data hash */ 427static struct witness_lock_order_data *w_lodata; 428static struct witness_lock_order_data *w_lofree = NULL; 429static struct witness_lock_order_hash w_lohash; 430static int w_max_used_index = 0; 431static unsigned int w_generation = 0; 432 433static union lock_stack *w_lock_stack_free; 434static unsigned int w_lock_stack_num; 435 436static struct lock_class lock_class_kernel_lock = { 437 .lc_name = "kernel_lock", 438 .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_SLEEPABLE 439}; 440 441static struct lock_class lock_class_mutex = { 442 .lc_name = "mutex", 443 .lc_flags = LC_SPINLOCK 444}; 445 446static struct lock_class lock_class_rwlock = { 447 .lc_name = "rwlock", 448 .lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_UPGRADABLE 449}; 450 451static struct lock_class lock_class_rrwlock = { 452 .lc_name = "rrwlock", 453 .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_SLEEPABLE | 454 LC_UPGRADABLE 455}; 456 457static struct lock_class *lock_classes[] = { 458 &lock_class_kernel_lock, 459 &lock_class_mutex, 460 &lock_class_rwlock, 461 &lock_class_rrwlock, 462}; 463 464/* 465 * This global is set to 0 once it becomes safe to use the witness code. 466 */ 467static int witness_cold = 1; 468 469/* 470 * This global is set to 1 once the static lock orders have been enrolled 471 * so that a warning can be issued for any spin locks enrolled later. 472 */ 473static int witness_spin_warn = 0; 474 475/* 476 * The WITNESS-enabled diagnostic code. Note that the witness code does 477 * assume that the early boot is single-threaded at least until after this 478 * routine is completed. 479 */ 480void 481witness_initialize(void) 482{ 483 struct lock_object *lock; 484 union lock_stack *stacks; 485 struct witness *w; 486 int i, s; 487 488 w_data = (void *)uvm_pageboot_alloc(sizeof(struct witness) * 489 witness_count); 490 memset(w_data, 0, sizeof(struct witness) * witness_count); 491 492 w_rmatrix = (void *)uvm_pageboot_alloc(sizeof(*w_rmatrix) * 493 (witness_count + 1)); 494 495 for (i = 0; i < witness_count + 1; i++) { 496 w_rmatrix[i] = (void *)uvm_pageboot_alloc( 497 sizeof(*w_rmatrix[i]) * (witness_count + 1)); 498 memset(w_rmatrix[i], 0, sizeof(*w_rmatrix[i]) * 499 (witness_count + 1)); 500 } 501 502 mtx_init_flags(&w_mtx, IPL_HIGH, "witness lock", MTX_NOWITNESS); 503 for (i = witness_count - 1; i >= 0; i--) { 504 w = &w_data[i]; 505 memset(w, 0, sizeof(*w)); 506 w_data[i].w_index = i; /* Witness index never changes. */ 507 witness_free(w); 508 } 509 KASSERTMSG(SLIST_FIRST(&w_free)->w_index == 0, 510 "%s: Invalid list of free witness objects", __func__); 511 512 /* Witness with index 0 is not used to aid in debugging. */ 513 SLIST_REMOVE_HEAD(&w_free, w_list); 514 w_free_cnt--; 515 516 for (i = 0; i < witness_count; i++) { 517 memset(w_rmatrix[i], 0, sizeof(*w_rmatrix[i]) * 518 (witness_count + 1)); 519 } 520 521 if (witness_locktrace) { 522 w_lock_stack_num = LOCK_CHILDCOUNT * LOCK_NCHILDREN; 523 stacks = (void *)uvm_pageboot_alloc(sizeof(*stacks) * 524 w_lock_stack_num); 525 } 526 527 w_locklistdata = (void *)uvm_pageboot_alloc( 528 sizeof(struct lock_list_entry) * LOCK_CHILDCOUNT); 529 memset(w_locklistdata, 0, sizeof(struct lock_list_entry) * 530 LOCK_CHILDCOUNT); 531 532 s = splhigh(); 533 for (i = 0; i < w_lock_stack_num; i++) 534 witness_lock_stack_free(&stacks[i]); 535 for (i = 0; i < LOCK_CHILDCOUNT; i++) 536 witness_lock_list_free(&w_locklistdata[i]); 537 splx(s); 538 witness_init_hash_tables(); 539 witness_spin_warn = 1; 540 541 /* Iterate through all locks and add them to witness. */ 542 for (i = 0; pending_locks[i].wh_lock != NULL; i++) { 543 lock = pending_locks[i].wh_lock; 544 KASSERTMSG(lock->lo_flags & LO_WITNESS, 545 "%s: lock %s is on pending list but not LO_WITNESS", 546 __func__, lock->lo_name); 547 lock->lo_witness = enroll(pending_locks[i].wh_type, 548 lock->lo_name, LOCK_CLASS(lock)); 549 } 550 551 /* Mark the witness code as being ready for use. */ 552 witness_cold = 0; 553} 554 555void 556witness_init(struct lock_object *lock, const struct lock_type *type) 557{ 558 struct lock_class *class; 559 560 /* Various sanity checks. */ 561 class = LOCK_CLASS(lock); 562 if ((lock->lo_flags & LO_RECURSABLE) != 0 && 563 (class->lc_flags & LC_RECURSABLE) == 0) 564 panic("%s: lock (%s) %s can not be recursable", 565 __func__, class->lc_name, lock->lo_name); 566 if ((lock->lo_flags & LO_SLEEPABLE) != 0 && 567 (class->lc_flags & LC_SLEEPABLE) == 0) 568 panic("%s: lock (%s) %s can not be sleepable", 569 __func__, class->lc_name, lock->lo_name); 570 if ((lock->lo_flags & LO_UPGRADABLE) != 0 && 571 (class->lc_flags & LC_UPGRADABLE) == 0) 572 panic("%s: lock (%s) %s can not be upgradable", 573 __func__, class->lc_name, lock->lo_name); 574 575 /* 576 * If we shouldn't watch this lock, then just clear lo_witness. 577 * Record the type in case the lock becomes watched later. 578 * Otherwise, if witness_cold is set, then it is too early to 579 * enroll this lock, so defer it to witness_initialize() by adding 580 * it to the pending_locks list. If it is not too early, then enroll 581 * the lock now. 582 */ 583 if (witness_watch < 1 || panicstr != NULL || db_active || 584 (lock->lo_flags & LO_WITNESS) == 0) { 585 lock->lo_witness = NULL; 586 lock->lo_type = type; 587 } else if (witness_cold) { 588 pending_locks[pending_cnt].wh_lock = lock; 589 pending_locks[pending_cnt++].wh_type = type; 590 if (pending_cnt > WITNESS_PENDLIST) 591 panic("%s: pending locks list is too small, " 592 "increase WITNESS_PENDLIST", 593 __func__); 594 } else 595 lock->lo_witness = enroll(type, lock->lo_name, class); 596} 597 598static inline int 599is_kernel_lock(const struct lock_object *lock) 600{ 601#ifdef MULTIPROCESSOR 602 return (lock == &kernel_lock.mpl_lock_obj); 603#else 604 return (0); 605#endif 606} 607 608#ifdef DDB 609static void 610witness_ddb_compute_levels(void) 611{ 612 struct witness *w; 613 614 /* 615 * First clear all levels. 616 */ 617 SLIST_FOREACH(w, &w_all, w_list) 618 w->w_ddb_level = -1; 619 620 /* 621 * Look for locks with no parents and level all their descendants. 622 */ 623 SLIST_FOREACH(w, &w_all, w_list) { 624 /* If the witness has ancestors (is not a root), skip it. */ 625 if (w->w_num_ancestors > 0) 626 continue; 627 witness_ddb_level_descendants(w, 0); 628 } 629} 630 631static void 632witness_ddb_level_descendants(struct witness *w, int l) 633{ 634 int i; 635 636 if (w->w_ddb_level >= l) 637 return; 638 639 w->w_ddb_level = l; 640 l++; 641 642 for (i = 1; i <= w_max_used_index; i++) { 643 if (w_rmatrix[w->w_index][i] & WITNESS_PARENT) 644 witness_ddb_level_descendants(&w_data[i], l); 645 } 646} 647 648static void 649witness_ddb_display_descendants(int(*prnt)(const char *fmt, ...), 650 struct witness *w, int indent) 651{ 652 int i; 653 654 for (i = 0; i < indent; i++) 655 prnt(" "); 656 prnt("%s (%s) (type: %s, depth: %d)", 657 w->w_subtype, w->w_type->lt_name, 658 w->w_class->lc_name, w->w_ddb_level); 659 if (w->w_displayed) { 660 prnt(" -- (already displayed)\n"); 661 return; 662 } 663 w->w_displayed = 1; 664 if (!w->w_acquired) 665 prnt(" -- never acquired\n"); 666 else 667 prnt("\n"); 668 indent++; 669 WITNESS_INDEX_ASSERT(w->w_index); 670 for (i = 1; i <= w_max_used_index; i++) { 671 if (w_rmatrix[w->w_index][i] & WITNESS_PARENT) 672 witness_ddb_display_descendants(prnt, &w_data[i], 673 indent); 674 } 675} 676 677static void 678witness_ddb_display_list(int(*prnt)(const char *fmt, ...), 679 struct witness_list *list) 680{ 681 struct witness *w; 682 683 SLIST_FOREACH(w, list, w_typelist) { 684 if (!w->w_acquired || w->w_ddb_level > 0) 685 continue; 686 687 /* This lock has no ancestors - display its descendants. */ 688 witness_ddb_display_descendants(prnt, w, 0); 689 } 690} 691 692static void 693witness_ddb_display(int(*prnt)(const char *fmt, ...)) 694{ 695 struct witness *w; 696 697 KASSERTMSG(witness_cold == 0, "%s: witness_cold", __func__); 698 witness_ddb_compute_levels(); 699 700 /* Clear all the displayed flags. */ 701 SLIST_FOREACH(w, &w_all, w_list) 702 w->w_displayed = 0; 703 704 /* 705 * First, handle sleep locks which have been acquired at least 706 * once. 707 */ 708 prnt("Sleep locks:\n"); 709 witness_ddb_display_list(prnt, &w_sleep); 710 711 /* 712 * Now do spin locks which have been acquired at least once. 713 */ 714 prnt("\nSpin locks:\n"); 715 witness_ddb_display_list(prnt, &w_spin); 716 717 /* 718 * Finally, any locks which have not been acquired yet. 719 */ 720 prnt("\nLocks which were never acquired:\n"); 721 SLIST_FOREACH(w, &w_all, w_list) { 722 if (w->w_acquired) 723 continue; 724 prnt("%s (%s) (type: %s, depth: %d)\n", 725 w->w_subtype, w->w_type->lt_name, 726 w->w_class->lc_name, w->w_ddb_level); 727 } 728} 729#endif /* DDB */ 730 731int 732witness_defineorder(struct lock_object *lock1, struct lock_object *lock2) 733{ 734 735 if (witness_watch < 0 || panicstr != NULL || db_active) 736 return (0); 737 738 /* Require locks that witness knows about. */ 739 if (lock1 == NULL || lock1->lo_witness == NULL || lock2 == NULL || 740 lock2->lo_witness == NULL) 741 return (EINVAL); 742 743 MUTEX_ASSERT_UNLOCKED(&w_mtx); 744 mtx_enter(&w_mtx); 745 746 /* 747 * If we already have either an explicit or implied lock order that 748 * is the other way around, then return an error. 749 */ 750 if (witness_watch && 751 isitmydescendant(lock2->lo_witness, lock1->lo_witness)) { 752 mtx_leave(&w_mtx); 753 return (EINVAL); 754 } 755 756 /* Try to add the new order. */ 757 itismychild(lock1->lo_witness, lock2->lo_witness); 758 mtx_leave(&w_mtx); 759 return (0); 760} 761 762void 763witness_checkorder(struct lock_object *lock, int flags, 764 struct lock_object *interlock) 765{ 766 struct lock_list_entry *lock_list, *lle; 767 struct lock_instance *lock1, *lock2, *plock; 768 struct lock_class *class, *iclass; 769 struct proc *p; 770 struct witness *w, *w1; 771 int i, j, s; 772 773 if (witness_cold || witness_watch < 1 || panicstr != NULL || db_active) 774 return; 775 776 if ((lock->lo_flags & LO_INITIALIZED) == 0) { 777 if (witness_uninitialized_report > 0) { 778 witness_uninitialized_report--; 779 printf("witness: lock_object uninitialized: %p\n", lock); 780 witness_debugger(1); 781 } 782 lock->lo_flags |= LO_INITIALIZED; 783 } 784 785 if ((lock->lo_flags & LO_WITNESS) == 0) 786 return; 787 788 w = lock->lo_witness; 789 class = LOCK_CLASS(lock); 790 791 if (w == NULL) 792 w = lock->lo_witness = 793 enroll(lock->lo_type, lock->lo_name, class); 794 795 p = curproc; 796 797 if (class->lc_flags & LC_SLEEPLOCK) { 798 /* 799 * Since spin locks include a critical section, this check 800 * implicitly enforces a lock order of all sleep locks before 801 * all spin locks. 802 */ 803 lock_list = witness_cpu[cpu_number()].wc_spinlocks; 804 if (lock_list != NULL && lock_list->ll_count > 0) { 805 panic("acquiring blockable sleep lock with " 806 "spinlock or critical section held (%s) %s", 807 class->lc_name, lock->lo_name); 808 } 809 810 /* 811 * If this is the first lock acquired then just return as 812 * no order checking is needed. 813 */ 814 lock_list = p->p_sleeplocks; 815 if (lock_list == NULL || lock_list->ll_count == 0) 816 return; 817 } else { 818 819 /* 820 * If this is the first lock, just return as no order 821 * checking is needed. 822 */ 823 lock_list = witness_cpu[cpu_number()].wc_spinlocks; 824 if (lock_list == NULL || lock_list->ll_count == 0) 825 return; 826 } 827 828 s = splhigh(); 829 830 /* 831 * Check to see if we are recursing on a lock we already own. If 832 * so, make sure that we don't mismatch exclusive and shared lock 833 * acquires. 834 */ 835 lock1 = find_instance(lock_list, lock); 836 if (lock1 != NULL) { 837 if ((lock1->li_flags & LI_EXCLUSIVE) != 0 && 838 (flags & LOP_EXCLUSIVE) == 0) { 839 printf("witness: shared lock of (%s) %s " 840 "while exclusively locked\n", 841 class->lc_name, lock->lo_name); 842 panic("excl->share"); 843 } 844 if ((lock1->li_flags & LI_EXCLUSIVE) == 0 && 845 (flags & LOP_EXCLUSIVE) != 0) { 846 printf("witness: exclusive lock of (%s) %s " 847 "while share locked\n", 848 class->lc_name, lock->lo_name); 849 panic("share->excl"); 850 } 851 goto out_splx; 852 } 853 854 /* Warn if the interlock is not locked exactly once. */ 855 if (interlock != NULL) { 856 iclass = LOCK_CLASS(interlock); 857 lock1 = find_instance(lock_list, interlock); 858 if (lock1 == NULL) 859 panic("interlock (%s) %s not locked", 860 iclass->lc_name, interlock->lo_name); 861 else if ((lock1->li_flags & LI_RECURSEMASK) != 0) 862 panic("interlock (%s) %s recursed", 863 iclass->lc_name, interlock->lo_name); 864 } 865 866 /* 867 * Find the previously acquired lock, but ignore interlocks. 868 */ 869 plock = &lock_list->ll_children[lock_list->ll_count - 1]; 870 if (interlock != NULL && plock->li_lock == interlock) { 871 if (lock_list->ll_count > 1) 872 plock = 873 &lock_list->ll_children[lock_list->ll_count - 2]; 874 else { 875 lle = lock_list->ll_next; 876 877 /* 878 * The interlock is the only lock we hold, so 879 * simply return. 880 */ 881 if (lle == NULL) 882 goto out_splx; 883 plock = &lle->ll_children[lle->ll_count - 1]; 884 } 885 } 886 887 /* 888 * Try to perform most checks without a lock. If this succeeds we 889 * can skip acquiring the lock and return success. Otherwise we redo 890 * the check with the lock held to handle races with concurrent updates. 891 */ 892 w1 = plock->li_lock->lo_witness; 893 if (witness_lock_order_check(w1, w)) 894 goto out_splx; 895 896 mtx_enter(&w_mtx); 897 if (witness_lock_order_check(w1, w)) 898 goto out; 899 900 witness_lock_order_add(w1, w); 901 902 /* 903 * Check for duplicate locks of the same type. Note that we only 904 * have to check for this on the last lock we just acquired. Any 905 * other cases will be caught as lock order violations. 906 */ 907 if (w1 == w) { 908 i = w->w_index; 909 if (!(lock->lo_flags & LO_DUPOK) && !(flags & LOP_DUPOK) && 910 !(w_rmatrix[i][i] & WITNESS_REVERSAL)) { 911 w_rmatrix[i][i] |= WITNESS_REVERSAL; 912 w->w_reversed = 1; 913 mtx_leave(&w_mtx); 914 printf("witness: acquiring duplicate lock of " 915 "same type: \"%s\"\n", w->w_type->lt_name); 916 printf(" 1st %s\n", plock->li_lock->lo_name); 917 printf(" 2nd %s\n", lock->lo_name); 918 witness_debugger(1); 919 } else 920 mtx_leave(&w_mtx); 921 goto out_splx; 922 } 923 MUTEX_ASSERT_LOCKED(&w_mtx); 924 925 /* 926 * If we know that the lock we are acquiring comes after 927 * the lock we most recently acquired in the lock order tree, 928 * then there is no need for any further checks. 929 */ 930 if (isitmychild(w1, w)) 931 goto out; 932 933 for (j = 0, lle = lock_list; lle != NULL; lle = lle->ll_next) { 934 for (i = lle->ll_count - 1; i >= 0; i--, j++) { 935 936 KASSERT(j < LOCK_CHILDCOUNT * LOCK_NCHILDREN); 937 lock1 = &lle->ll_children[i]; 938 939 /* 940 * Ignore the interlock. 941 */ 942 if (interlock == lock1->li_lock) 943 continue; 944 945 /* 946 * If this lock doesn't undergo witness checking, 947 * then skip it. 948 */ 949 w1 = lock1->li_lock->lo_witness; 950 if (w1 == NULL) { 951 KASSERTMSG((lock1->li_lock->lo_flags & 952 LO_WITNESS) == 0, 953 "lock missing witness structure"); 954 continue; 955 } 956 957 /* 958 * If we are locking Giant and this is a sleepable 959 * lock, then skip it. 960 */ 961 if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0 && 962 is_kernel_lock(lock)) 963 continue; 964 965 /* 966 * If we are locking a sleepable lock and this lock 967 * is Giant, then skip it. 968 */ 969 if ((lock->lo_flags & LO_SLEEPABLE) != 0 && 970 is_kernel_lock(lock1->li_lock)) 971 continue; 972 973 /* 974 * If we are locking a sleepable lock and this lock 975 * isn't sleepable, we want to treat it as a lock 976 * order violation to enforce a general lock order of 977 * sleepable locks before non-sleepable locks. 978 */ 979 if (((lock->lo_flags & LO_SLEEPABLE) != 0 && 980 (lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0)) 981 goto reversal; 982 983 /* 984 * If we are locking Giant and this is a non-sleepable 985 * lock, then treat it as a reversal. 986 */ 987 if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0 && 988 is_kernel_lock(lock)) 989 goto reversal; 990 991 /* 992 * Check the lock order hierarchy for a reveresal. 993 */ 994 if (!isitmydescendant(w, w1)) 995 continue; 996 reversal: 997 998 /* 999 * We have a lock order violation, check to see if it 1000 * is allowed or has already been yelled about. 1001 */ 1002 1003 /* Bail if this violation is known */ 1004 if (w_rmatrix[w1->w_index][w->w_index] & WITNESS_REVERSAL) 1005 goto out; 1006 1007 /* Record this as a violation */ 1008 w_rmatrix[w1->w_index][w->w_index] |= WITNESS_REVERSAL; 1009 w_rmatrix[w->w_index][w1->w_index] |= WITNESS_REVERSAL; 1010 w->w_reversed = w1->w_reversed = 1; 1011 witness_increment_graph_generation(); 1012 mtx_leave(&w_mtx); 1013 1014 /* 1015 * There are known LORs between VNODE locks. They are 1016 * not an indication of a bug. VNODE locks are flagged 1017 * as such (LO_IS_VNODE) and we don't yell if the LOR 1018 * is between 2 VNODE locks. 1019 */ 1020 if ((lock->lo_flags & LO_IS_VNODE) != 0 && 1021 (lock1->li_lock->lo_flags & LO_IS_VNODE) != 0) 1022 goto out_splx; 1023 1024 /* 1025 * Ok, yell about it. 1026 */ 1027 printf("witness: "); 1028 if (((lock->lo_flags & LO_SLEEPABLE) != 0 && 1029 (lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0)) 1030 printf("lock order reversal: " 1031 "(sleepable after non-sleepable)\n"); 1032 else if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0 1033 && is_kernel_lock(lock)) 1034 printf("lock order reversal: " 1035 "(Giant after non-sleepable)\n"); 1036 else 1037 printf("lock order reversal:\n"); 1038 1039 /* 1040 * Try to locate an earlier lock with 1041 * witness w in our list. 1042 */ 1043 do { 1044 lock2 = &lle->ll_children[i]; 1045 KASSERT(lock2->li_lock != NULL); 1046 if (lock2->li_lock->lo_witness == w) 1047 break; 1048 if (i == 0 && lle->ll_next != NULL) { 1049 lle = lle->ll_next; 1050 i = lle->ll_count - 1; 1051 KASSERT(i >= 0 && i < LOCK_NCHILDREN); 1052 } else 1053 i--; 1054 } while (i >= 0); 1055 if (i < 0) { 1056 printf(" 1st %p %s (%s)\n", 1057 lock1->li_lock, lock1->li_lock->lo_name, 1058 w1->w_type->lt_name); 1059 printf(" 2nd %p %s (%s)\n", 1060 lock, lock->lo_name, w->w_type->lt_name); 1061 } else { 1062 printf(" 1st %p %s (%s)\n", 1063 lock2->li_lock, lock2->li_lock->lo_name, 1064 lock2->li_lock->lo_witness->w_type-> 1065 lt_name); 1066 printf(" 2nd %p %s (%s)\n", 1067 lock1->li_lock, lock1->li_lock->lo_name, 1068 w1->w_type->lt_name); 1069 printf(" 3rd %p %s (%s)\n", lock, 1070 lock->lo_name, w->w_type->lt_name); 1071 } 1072 if (witness_watch > 1) 1073 witness_print_cycle(printf, w1, w); 1074 witness_debugger(0); 1075 goto out_splx; 1076 } 1077 } 1078 1079 /* 1080 * If requested, build a new lock order. However, don't build a new 1081 * relationship between a sleepable lock and Giant if it is in the 1082 * wrong direction. The correct lock order is that sleepable locks 1083 * always come before Giant. 1084 */ 1085 if (flags & LOP_NEWORDER && 1086 !(is_kernel_lock(plock->li_lock) && 1087 (lock->lo_flags & LO_SLEEPABLE) != 0)) 1088 itismychild(plock->li_lock->lo_witness, w); 1089out: 1090 mtx_leave(&w_mtx); 1091out_splx: 1092 splx(s); 1093} 1094 1095void 1096witness_lock(struct lock_object *lock, int flags) 1097{ 1098 struct lock_list_entry **lock_list, *lle; 1099 struct lock_instance *instance; 1100 struct proc *p; 1101 struct witness *w; 1102 int s; 1103 1104 if (witness_cold || witness_watch < 0 || panicstr != NULL || 1105 db_active || (lock->lo_flags & LO_WITNESS) == 0) 1106 return; 1107 1108 w = lock->lo_witness; 1109 if (w == NULL) 1110 w = lock->lo_witness = 1111 enroll(lock->lo_type, lock->lo_name, LOCK_CLASS(lock)); 1112 1113 p = curproc; 1114 1115 /* Determine lock list for this lock. */ 1116 if (LOCK_CLASS(lock)->lc_flags & LC_SLEEPLOCK) 1117 lock_list = &p->p_sleeplocks; 1118 else 1119 lock_list = &witness_cpu[cpu_number()].wc_spinlocks; 1120 1121 s = splhigh(); 1122 1123 /* Check to see if we are recursing on a lock we already own. */ 1124 instance = find_instance(*lock_list, lock); 1125 if (instance != NULL) { 1126 instance->li_flags++; 1127 goto out; 1128 } 1129 1130 w->w_acquired = 1; 1131 1132 /* Find the next open lock instance in the list and fill it. */ 1133 lle = *lock_list; 1134 if (lle == NULL || lle->ll_count == LOCK_NCHILDREN) { 1135 lle = witness_lock_list_get(); 1136 if (lle == NULL) 1137 goto out; 1138 lle->ll_next = *lock_list; 1139 *lock_list = lle; 1140 } 1141 instance = &lle->ll_children[lle->ll_count++]; 1142 instance->li_lock = lock; 1143 if ((flags & LOP_EXCLUSIVE) != 0) 1144 instance->li_flags = LI_EXCLUSIVE; 1145 else 1146 instance->li_flags = 0; 1147 instance->li_stack = NULL; 1148 if (witness_locktrace) { 1149 instance->li_stack = witness_lock_stack_get(); 1150 if (instance->li_stack != NULL) 1151 stacktrace_save(&instance->li_stack->ls_stack); 1152 } 1153out: 1154 splx(s); 1155} 1156 1157void 1158witness_upgrade(struct lock_object *lock, int flags) 1159{ 1160 struct lock_instance *instance; 1161 struct lock_class *class; 1162 int s; 1163 1164 KASSERTMSG(witness_cold == 0, "%s: witness_cold", __func__); 1165 if (lock->lo_witness == NULL || witness_watch < 0 || 1166 panicstr != NULL || db_active) 1167 return; 1168 class = LOCK_CLASS(lock); 1169 if (witness_watch) { 1170 if ((lock->lo_flags & LO_UPGRADABLE) == 0) 1171 panic("upgrade of non-upgradable lock (%s) %s", 1172 class->lc_name, lock->lo_name); 1173 if ((class->lc_flags & LC_SLEEPLOCK) == 0) 1174 panic("upgrade of non-sleep lock (%s) %s", 1175 class->lc_name, lock->lo_name); 1176 } 1177 s = splhigh(); 1178 instance = find_instance(curproc->p_sleeplocks, lock); 1179 if (instance == NULL) { 1180 panic("upgrade of unlocked lock (%s) %s", 1181 class->lc_name, lock->lo_name); 1182 goto out; 1183 } 1184 if (witness_watch) { 1185 if ((instance->li_flags & LI_EXCLUSIVE) != 0) 1186 panic("upgrade of exclusive lock (%s) %s", 1187 class->lc_name, lock->lo_name); 1188 if ((instance->li_flags & LI_RECURSEMASK) != 0) 1189 panic("upgrade of recursed lock (%s) %s r=%d", 1190 class->lc_name, lock->lo_name, 1191 instance->li_flags & LI_RECURSEMASK); 1192 } 1193 instance->li_flags |= LI_EXCLUSIVE; 1194out: 1195 splx(s); 1196} 1197 1198void 1199witness_downgrade(struct lock_object *lock, int flags) 1200{ 1201 struct lock_instance *instance; 1202 struct lock_class *class; 1203 int s; 1204 1205 KASSERTMSG(witness_cold == 0, "%s: witness_cold", __func__); 1206 if (lock->lo_witness == NULL || witness_watch < 0 || 1207 panicstr != NULL || db_active) 1208 return; 1209 class = LOCK_CLASS(lock); 1210 if (witness_watch) { 1211 if ((lock->lo_flags & LO_UPGRADABLE) == 0) 1212 panic( 1213 "downgrade of non-upgradable lock (%s) %s", 1214 class->lc_name, lock->lo_name); 1215 if ((class->lc_flags & LC_SLEEPLOCK) == 0) 1216 panic("downgrade of non-sleep lock (%s) %s", 1217 class->lc_name, lock->lo_name); 1218 } 1219 s = splhigh(); 1220 instance = find_instance(curproc->p_sleeplocks, lock); 1221 if (instance == NULL) { 1222 panic("downgrade of unlocked lock (%s) %s", 1223 class->lc_name, lock->lo_name); 1224 goto out; 1225 } 1226 if (witness_watch) { 1227 if ((instance->li_flags & LI_EXCLUSIVE) == 0) 1228 panic("downgrade of shared lock (%s) %s", 1229 class->lc_name, lock->lo_name); 1230 if ((instance->li_flags & LI_RECURSEMASK) != 0) 1231 panic("downgrade of recursed lock (%s) %s r=%d", 1232 class->lc_name, lock->lo_name, 1233 instance->li_flags & LI_RECURSEMASK); 1234 } 1235 instance->li_flags &= ~LI_EXCLUSIVE; 1236out: 1237 splx(s); 1238} 1239 1240void 1241witness_unlock(struct lock_object *lock, int flags) 1242{ 1243 struct lock_list_entry **lock_list, *lle; 1244 struct lock_instance *instance; 1245 struct lock_class *class; 1246 struct proc *p; 1247 int i, j; 1248 int s; 1249 1250 if (witness_cold || lock->lo_witness == NULL || 1251 panicstr != NULL || db_active) 1252 return; 1253 p = curproc; 1254 class = LOCK_CLASS(lock); 1255 1256 /* Find lock instance associated with this lock. */ 1257 if (class->lc_flags & LC_SLEEPLOCK) 1258 lock_list = &p->p_sleeplocks; 1259 else 1260 lock_list = &witness_cpu[cpu_number()].wc_spinlocks; 1261 1262 s = splhigh(); 1263 1264 lle = *lock_list; 1265 for (; *lock_list != NULL; lock_list = &(*lock_list)->ll_next) 1266 for (i = 0; i < (*lock_list)->ll_count; i++) { 1267 instance = &(*lock_list)->ll_children[i]; 1268 if (instance->li_lock == lock) 1269 goto found; 1270 } 1271 1272 /* 1273 * When disabling WITNESS through witness_watch we could end up in 1274 * having registered locks in the p_sleeplocks queue. 1275 * We have to make sure we flush these queues, so just search for 1276 * eventual register locks and remove them. 1277 */ 1278 if (witness_watch > 0) { 1279 panic("lock (%s) %s not locked", class->lc_name, lock->lo_name); 1280 } 1281 goto out; 1282 1283found: 1284 1285 /* First, check for shared/exclusive mismatches. */ 1286 if ((instance->li_flags & LI_EXCLUSIVE) != 0 && witness_watch > 0 && 1287 (flags & LOP_EXCLUSIVE) == 0) { 1288 printf("witness: shared unlock of (%s) %s " 1289 "while exclusively locked\n", 1290 class->lc_name, lock->lo_name); 1291 panic("excl->ushare"); 1292 } 1293 if ((instance->li_flags & LI_EXCLUSIVE) == 0 && witness_watch > 0 && 1294 (flags & LOP_EXCLUSIVE) != 0) { 1295 printf("witness: exclusive unlock of (%s) %s " 1296 "while share locked\n", class->lc_name, lock->lo_name); 1297 panic("share->uexcl"); 1298 } 1299 /* If we are recursed, unrecurse. */ 1300 if ((instance->li_flags & LI_RECURSEMASK) > 0) { 1301 instance->li_flags--; 1302 goto out; 1303 } 1304 /* The lock is now being dropped, check for NORELEASE flag */ 1305 if ((instance->li_flags & LI_NORELEASE) != 0 && witness_watch > 0) { 1306 printf("witness: forbidden unlock of (%s) %s\n", 1307 class->lc_name, lock->lo_name); 1308 panic("lock marked norelease"); 1309 } 1310 1311 /* Release the stack buffer, if any. */ 1312 if (instance->li_stack != NULL) { 1313 witness_lock_stack_free(instance->li_stack); 1314 instance->li_stack = NULL; 1315 } 1316 1317 /* Remove this item from the list. */ 1318 for (j = i; j < (*lock_list)->ll_count - 1; j++) 1319 (*lock_list)->ll_children[j] = 1320 (*lock_list)->ll_children[j + 1]; 1321 (*lock_list)->ll_count--; 1322 1323 /* 1324 * In order to reduce contention on w_mtx, we want to keep always an 1325 * head object into lists so that frequent allocation from the 1326 * free witness pool (and subsequent locking) is avoided. 1327 * In order to maintain the current code simple, when the head 1328 * object is totally unloaded it means also that we do not have 1329 * further objects in the list, so the list ownership needs to be 1330 * hand over to another object if the current head needs to be freed. 1331 */ 1332 if ((*lock_list)->ll_count == 0) { 1333 if (*lock_list == lle) { 1334 if (lle->ll_next == NULL) 1335 goto out; 1336 } else 1337 lle = *lock_list; 1338 *lock_list = lle->ll_next; 1339 witness_lock_list_free(lle); 1340 } 1341out: 1342 splx(s); 1343} 1344 1345void 1346witness_thread_exit(struct proc *p) 1347{ 1348 struct lock_list_entry *lle; 1349 int i, n, s; 1350 1351 lle = p->p_sleeplocks; 1352 if (lle == NULL || panicstr != NULL || db_active) 1353 return; 1354 if (lle->ll_count != 0) { 1355 for (n = 0; lle != NULL; lle = lle->ll_next) 1356 for (i = lle->ll_count - 1; i >= 0; i--) { 1357 if (n == 0) 1358 printf("witness: thread %p exiting " 1359 "with the following locks held:\n", 1360 p); 1361 n++; 1362 witness_list_lock(&lle->ll_children[i], 1363 printf); 1364 } 1365 panic("thread %p cannot exit while holding sleeplocks", p); 1366 } 1367 KASSERT(lle->ll_next == NULL); 1368 s = splhigh(); 1369 witness_lock_list_free(lle); 1370 splx(s); 1371} 1372 1373/* 1374 * Warn if any locks other than 'lock' are held. Flags can be passed in to 1375 * exempt Giant and sleepable locks from the checks as well. If any 1376 * non-exempt locks are held, then a supplied message is printed to the 1377 * output channel along with a list of the offending locks. If indicated in the 1378 * flags then a failure results in a panic as well. 1379 */ 1380int 1381witness_warn(int flags, struct lock_object *lock, const char *fmt, ...) 1382{ 1383 struct lock_list_entry *lock_list, *lle; 1384 struct lock_instance *lock1; 1385 struct proc *p; 1386 va_list ap; 1387 int i, n; 1388 1389 if (witness_cold || witness_watch < 1 || panicstr != NULL || db_active) 1390 return (0); 1391 n = 0; 1392 p = curproc; 1393 for (lle = p->p_sleeplocks; lle != NULL; lle = lle->ll_next) 1394 for (i = lle->ll_count - 1; i >= 0; i--) { 1395 lock1 = &lle->ll_children[i]; 1396 if (lock1->li_lock == lock) 1397 continue; 1398 if (flags & WARN_KERNELOK && 1399 is_kernel_lock(lock1->li_lock)) 1400 continue; 1401 if (flags & WARN_SLEEPOK && 1402 (lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0) 1403 continue; 1404 if (n == 0) { 1405 printf("witness: "); 1406 va_start(ap, fmt); 1407 vprintf(fmt, ap); 1408 va_end(ap); 1409 printf(" with the following %slocks held:\n", 1410 (flags & WARN_SLEEPOK) != 0 ? 1411 "non-sleepable " : ""); 1412 } 1413 n++; 1414 witness_list_lock(lock1, printf); 1415 } 1416 1417 lock_list = witness_cpu[cpu_number()].wc_spinlocks; 1418 if (lock_list != NULL && lock_list->ll_count != 0) { 1419 /* 1420 * We should only have one spinlock and as long as 1421 * the flags cannot match for this locks class, 1422 * check if the first spinlock is the one curproc 1423 * should hold. 1424 */ 1425 lock1 = &lock_list->ll_children[lock_list->ll_count - 1]; 1426 if (lock_list->ll_count == 1 && lock_list->ll_next == NULL && 1427 lock1->li_lock == lock && n == 0) 1428 return (0); 1429 1430 printf("witness: "); 1431 va_start(ap, fmt); 1432 vprintf(fmt, ap); 1433 va_end(ap); 1434 printf(" with the following %slocks held:\n", 1435 (flags & WARN_SLEEPOK) != 0 ? "non-sleepable " : ""); 1436 n += witness_list_locks(&lock_list, printf); 1437 } 1438 if (n > 0) { 1439 if (flags & WARN_PANIC) 1440 panic("%s", __func__); 1441 else 1442 witness_debugger(1); 1443 } 1444 return (n); 1445} 1446 1447static struct witness * 1448enroll(const struct lock_type *type, const char *subtype, 1449 struct lock_class *lock_class) 1450{ 1451 struct witness *w; 1452 struct witness_list *typelist; 1453 1454 KASSERT(type != NULL); 1455 1456 if (witness_watch < 0 || panicstr != NULL || db_active) 1457 return (NULL); 1458 if ((lock_class->lc_flags & LC_SPINLOCK)) { 1459 typelist = &w_spin; 1460 } else if ((lock_class->lc_flags & LC_SLEEPLOCK)) { 1461 typelist = &w_sleep; 1462 } else { 1463 panic("lock class %s is not sleep or spin", 1464 lock_class->lc_name); 1465 return (NULL); 1466 } 1467 1468 mtx_enter(&w_mtx); 1469 w = witness_hash_get(type, subtype); 1470 if (w) 1471 goto found; 1472 if ((w = witness_get()) == NULL) 1473 return (NULL); 1474 w->w_type = type; 1475 w->w_subtype = subtype; 1476 w->w_class = lock_class; 1477 SLIST_INSERT_HEAD(&w_all, w, w_list); 1478 if (lock_class->lc_flags & LC_SPINLOCK) { 1479 SLIST_INSERT_HEAD(&w_spin, w, w_typelist); 1480 w_spin_cnt++; 1481 } else if (lock_class->lc_flags & LC_SLEEPLOCK) { 1482 SLIST_INSERT_HEAD(&w_sleep, w, w_typelist); 1483 w_sleep_cnt++; 1484 } 1485 1486 /* Insert new witness into the hash */ 1487 witness_hash_put(w); 1488 witness_increment_graph_generation(); 1489 mtx_leave(&w_mtx); 1490 return (w); 1491found: 1492 mtx_leave(&w_mtx); 1493 if (lock_class != w->w_class) 1494 panic("lock (%s) %s does not match earlier (%s) lock", 1495 type->lt_name, lock_class->lc_name, w->w_class->lc_name); 1496 return (w); 1497} 1498 1499static void 1500adopt(struct witness *parent, struct witness *child) 1501{ 1502 int pi, ci, i, j; 1503 1504 if (witness_cold == 0) 1505 MUTEX_ASSERT_LOCKED(&w_mtx); 1506 1507 /* If the relationship is already known, there's no work to be done. */ 1508 if (isitmychild(parent, child)) 1509 return; 1510 1511 /* When the structure of the graph changes, bump up the generation. */ 1512 witness_increment_graph_generation(); 1513 1514 /* 1515 * The hard part ... create the direct relationship, then propagate all 1516 * indirect relationships. 1517 */ 1518 pi = parent->w_index; 1519 ci = child->w_index; 1520 WITNESS_INDEX_ASSERT(pi); 1521 WITNESS_INDEX_ASSERT(ci); 1522 KASSERT(pi != ci); 1523 w_rmatrix[pi][ci] |= WITNESS_PARENT; 1524 w_rmatrix[ci][pi] |= WITNESS_CHILD; 1525 1526 /* 1527 * If parent was not already an ancestor of child, 1528 * then we increment the descendant and ancestor counters. 1529 */ 1530 if ((w_rmatrix[pi][ci] & WITNESS_ANCESTOR) == 0) { 1531 parent->w_num_descendants++; 1532 child->w_num_ancestors++; 1533 } 1534 1535 /* 1536 * Find each ancestor of 'pi'. Note that 'pi' itself is counted as 1537 * an ancestor of 'pi' during this loop. 1538 */ 1539 for (i = 1; i <= w_max_used_index; i++) { 1540 if ((w_rmatrix[i][pi] & WITNESS_ANCESTOR_MASK) == 0 && 1541 (i != pi)) 1542 continue; 1543 1544 /* Find each descendant of 'i' and mark it as a descendant. */ 1545 for (j = 1; j <= w_max_used_index; j++) { 1546 1547 /* 1548 * Skip children that are already marked as 1549 * descendants of 'i'. 1550 */ 1551 if (w_rmatrix[i][j] & WITNESS_ANCESTOR_MASK) 1552 continue; 1553 1554 /* 1555 * We are only interested in descendants of 'ci'. Note 1556 * that 'ci' itself is counted as a descendant of 'ci'. 1557 */ 1558 if ((w_rmatrix[ci][j] & WITNESS_ANCESTOR_MASK) == 0 && 1559 (j != ci)) 1560 continue; 1561 w_rmatrix[i][j] |= WITNESS_ANCESTOR; 1562 w_rmatrix[j][i] |= WITNESS_DESCENDANT; 1563 w_data[i].w_num_descendants++; 1564 w_data[j].w_num_ancestors++; 1565 1566 /* 1567 * Make sure we aren't marking a node as both an 1568 * ancestor and descendant. We should have caught 1569 * this as a lock order reversal earlier. 1570 */ 1571 if ((w_rmatrix[i][j] & WITNESS_ANCESTOR_MASK) && 1572 (w_rmatrix[i][j] & WITNESS_DESCENDANT_MASK)) { 1573 printf("witness: rmatrix paradox! [%d][%d]=%d " 1574 "both ancestor and descendant\n", 1575 i, j, w_rmatrix[i][j]); 1576#ifdef DDB 1577 db_stack_dump(); 1578#endif 1579 printf("witness disabled\n"); 1580 witness_watch = -1; 1581 } 1582 if ((w_rmatrix[j][i] & WITNESS_ANCESTOR_MASK) && 1583 (w_rmatrix[j][i] & WITNESS_DESCENDANT_MASK)) { 1584 printf("witness: rmatrix paradox! [%d][%d]=%d " 1585 "both ancestor and descendant\n", 1586 j, i, w_rmatrix[j][i]); 1587#ifdef DDB 1588 db_stack_dump(); 1589#endif 1590 printf("witness disabled\n"); 1591 witness_watch = -1; 1592 } 1593 } 1594 } 1595} 1596 1597static void 1598itismychild(struct witness *parent, struct witness *child) 1599{ 1600 KASSERT(child != NULL && parent != NULL); 1601 if (witness_cold == 0) 1602 MUTEX_ASSERT_LOCKED(&w_mtx); 1603 1604 if (!witness_lock_type_equal(parent, child)) { 1605 if (witness_cold == 0) 1606 mtx_leave(&w_mtx); 1607 panic( 1608 "%s: parent \"%s\" (%s) and child \"%s\" (%s) are not " 1609 "the same lock type", __func__, parent->w_type->lt_name, 1610 parent->w_class->lc_name, child->w_type->lt_name, 1611 child->w_class->lc_name); 1612 } 1613 adopt(parent, child); 1614} 1615 1616/* 1617 * Generic code for the isitmy*() functions. The rmask parameter is the 1618 * expected relationship of w1 to w2. 1619 */ 1620static int 1621_isitmyx(struct witness *w1, struct witness *w2, int rmask, const char *fname) 1622{ 1623 unsigned char r1, r2; 1624 int i1, i2; 1625 1626 i1 = w1->w_index; 1627 i2 = w2->w_index; 1628 WITNESS_INDEX_ASSERT(i1); 1629 WITNESS_INDEX_ASSERT(i2); 1630 r1 = w_rmatrix[i1][i2] & WITNESS_RELATED_MASK; 1631 r2 = w_rmatrix[i2][i1] & WITNESS_RELATED_MASK; 1632 1633 /* The flags on one better be the inverse of the flags on the other */ 1634 if (!((WITNESS_ATOD(r1) == r2 && WITNESS_DTOA(r2) == r1) || 1635 (WITNESS_DTOA(r1) == r2 && WITNESS_ATOD(r2) == r1))) { 1636 /* Don't squawk if we're potentially racing with an update. */ 1637 if (w_mtx.mtx_owner != curcpu()) 1638 return (0); 1639 printf("witness: %s: rmatrix mismatch between %s (index %d) " 1640 "and %s (index %d): w_rmatrix[%d][%d] == %x but " 1641 "w_rmatrix[%d][%d] == %x\n", 1642 fname, w1->w_type->lt_name, i1, w2->w_type->lt_name, 1643 i2, i1, i2, r1, 1644 i2, i1, r2); 1645#ifdef DDB 1646 db_stack_dump(); 1647#endif 1648 printf("witness disabled\n"); 1649 witness_watch = -1; 1650 } 1651 return (r1 & rmask); 1652} 1653 1654/* 1655 * Checks if @child is a direct child of @parent. 1656 */ 1657static int 1658isitmychild(struct witness *parent, struct witness *child) 1659{ 1660 1661 return (_isitmyx(parent, child, WITNESS_PARENT, __func__)); 1662} 1663 1664/* 1665 * Checks if @descendant is a direct or indirect descendant of @ancestor. 1666 */ 1667static int 1668isitmydescendant(struct witness *ancestor, struct witness *descendant) 1669{ 1670 1671 return (_isitmyx(ancestor, descendant, WITNESS_ANCESTOR_MASK, 1672 __func__)); 1673} 1674 1675static struct witness * 1676witness_get(void) 1677{ 1678 struct witness *w; 1679 int index; 1680 1681 if (witness_cold == 0) 1682 MUTEX_ASSERT_LOCKED(&w_mtx); 1683 1684 if (witness_watch < 0) { 1685 mtx_leave(&w_mtx); 1686 return (NULL); 1687 } 1688 if (SLIST_EMPTY(&w_free)) { 1689 witness_watch = -1; 1690 mtx_leave(&w_mtx); 1691 printf("WITNESS: unable to allocate a new witness object\n"); 1692 return (NULL); 1693 } 1694 w = SLIST_FIRST(&w_free); 1695 SLIST_REMOVE_HEAD(&w_free, w_list); 1696 w_free_cnt--; 1697 index = w->w_index; 1698 KASSERT(index > 0 && index == w_max_used_index + 1 && 1699 index < witness_count); 1700 memset(w, 0, sizeof(*w)); 1701 w->w_index = index; 1702 if (index > w_max_used_index) 1703 w_max_used_index = index; 1704 return (w); 1705} 1706 1707static void 1708witness_free(struct witness *w) 1709{ 1710 SLIST_INSERT_HEAD(&w_free, w, w_list); 1711 w_free_cnt++; 1712} 1713 1714static struct lock_list_entry * 1715witness_lock_list_get(void) 1716{ 1717 struct lock_list_entry *lle; 1718 struct witness_cpu *wcpu = &witness_cpu[cpu_number()]; 1719 1720 if (witness_watch < 0) 1721 return (NULL); 1722 1723 splassert(IPL_HIGH); 1724 1725 if (wcpu->wc_lle_count > 0) { 1726 lle = wcpu->wc_lle_cache; 1727 wcpu->wc_lle_cache = lle->ll_next; 1728 wcpu->wc_lle_count--; 1729 memset(lle, 0, sizeof(*lle)); 1730 return (lle); 1731 } 1732 1733 mtx_enter(&w_mtx); 1734 lle = w_lock_list_free; 1735 if (lle == NULL) { 1736 witness_watch = -1; 1737 mtx_leave(&w_mtx); 1738 printf("%s: witness exhausted\n", __func__); 1739 return (NULL); 1740 } 1741 w_lock_list_free = lle->ll_next; 1742 mtx_leave(&w_mtx); 1743 memset(lle, 0, sizeof(*lle)); 1744 return (lle); 1745} 1746 1747static void 1748witness_lock_list_free(struct lock_list_entry *lle) 1749{ 1750 struct witness_cpu *wcpu = &witness_cpu[cpu_number()]; 1751 1752 splassert(IPL_HIGH); 1753 1754 if (wcpu->wc_lle_count < WITNESS_LLE_CACHE_MAX) { 1755 lle->ll_next = wcpu->wc_lle_cache; 1756 wcpu->wc_lle_cache = lle; 1757 wcpu->wc_lle_count++; 1758 return; 1759 } 1760 1761 mtx_enter(&w_mtx); 1762 lle->ll_next = w_lock_list_free; 1763 w_lock_list_free = lle; 1764 mtx_leave(&w_mtx); 1765} 1766 1767static union lock_stack * 1768witness_lock_stack_get(void) 1769{ 1770 union lock_stack *stack = NULL; 1771 struct witness_cpu *wcpu = &witness_cpu[cpu_number()]; 1772 1773 splassert(IPL_HIGH); 1774 1775 if (wcpu->wc_stk_count > 0) { 1776 stack = wcpu->wc_stk_cache; 1777 wcpu->wc_stk_cache = stack->ls_next; 1778 wcpu->wc_stk_count--; 1779 return (stack); 1780 } 1781 1782 mtx_enter(&w_mtx); 1783 if (w_lock_stack_free != NULL) { 1784 stack = w_lock_stack_free; 1785 w_lock_stack_free = stack->ls_next; 1786 } 1787 mtx_leave(&w_mtx); 1788 return (stack); 1789} 1790 1791static void 1792witness_lock_stack_free(union lock_stack *stack) 1793{ 1794 struct witness_cpu *wcpu = &witness_cpu[cpu_number()]; 1795 1796 splassert(IPL_HIGH); 1797 1798 if (wcpu->wc_stk_count < WITNESS_STK_CACHE_MAX) { 1799 stack->ls_next = wcpu->wc_stk_cache; 1800 wcpu->wc_stk_cache = stack; 1801 wcpu->wc_stk_count++; 1802 return; 1803 } 1804 1805 mtx_enter(&w_mtx); 1806 stack->ls_next = w_lock_stack_free; 1807 w_lock_stack_free = stack; 1808 mtx_leave(&w_mtx); 1809} 1810 1811static struct lock_instance * 1812find_instance(struct lock_list_entry *list, const struct lock_object *lock) 1813{ 1814 struct lock_list_entry *lle; 1815 struct lock_instance *instance; 1816 int i; 1817 1818 for (lle = list; lle != NULL; lle = lle->ll_next) { 1819 for (i = lle->ll_count - 1; i >= 0; i--) { 1820 instance = &lle->ll_children[i]; 1821 if (instance->li_lock == lock) 1822 return (instance); 1823 } 1824 } 1825 return (NULL); 1826} 1827 1828static void 1829witness_list_lock(struct lock_instance *instance, 1830 int (*prnt)(const char *fmt, ...)) 1831{ 1832 struct lock_object *lock; 1833 1834 lock = instance->li_lock; 1835 prnt("%s %s %s", (instance->li_flags & LI_EXCLUSIVE) != 0 ? 1836 "exclusive" : "shared", LOCK_CLASS(lock)->lc_name, lock->lo_name); 1837 prnt(" r = %d (%p)\n", instance->li_flags & LI_RECURSEMASK, lock); 1838 if (instance->li_stack != NULL) 1839 stacktrace_print(&instance->li_stack->ls_stack, prnt); 1840} 1841 1842static int 1843witness_search(struct witness *w, struct witness *target, 1844 struct witness **path, int depth, int *remaining) 1845{ 1846 int i, any_remaining; 1847 1848 if (depth == 0) { 1849 *remaining = 1; 1850 return (w == target); 1851 } 1852 1853 any_remaining = 0; 1854 for (i = 1; i <= w_max_used_index; i++) { 1855 if (w_rmatrix[w->w_index][i] & WITNESS_PARENT) { 1856 if (witness_search(&w_data[i], target, path, depth - 1, 1857 remaining)) { 1858 path[depth - 1] = &w_data[i]; 1859 *remaining = 1; 1860 return 1; 1861 } 1862 if (remaining) 1863 any_remaining = 1; 1864 } 1865 } 1866 *remaining = any_remaining; 1867 return 0; 1868} 1869 1870static void 1871witness_print_cycle_edge(int(*prnt)(const char *fmt, ...), 1872 struct witness *parent, struct witness *child, int step, int last) 1873{ 1874 struct witness_lock_order_data *wlod; 1875 int next; 1876 1877 if (last) 1878 next = 1; 1879 else 1880 next = step + 1; 1881 prnt("lock order [%d] %s (%s) -> [%d] %s (%s)\n", 1882 step, parent->w_subtype, parent->w_type->lt_name, 1883 next, child->w_subtype, child->w_type->lt_name); 1884 if (witness_watch > 1) { 1885 mtx_enter(&w_mtx); 1886 wlod = witness_lock_order_get(parent, child); 1887 mtx_leave(&w_mtx); 1888 1889 if (wlod != NULL) 1890 stacktrace_print(&wlod->wlod_stack, printf); 1891 else 1892 prnt("lock order data %p -> %p is missing\n", 1893 parent->w_type->lt_name, child->w_type->lt_name); 1894 } 1895} 1896 1897static void 1898witness_print_cycle(int(*prnt)(const char *fmt, ...), 1899 struct witness *parent, struct witness *child) 1900{ 1901 struct witness *path[4]; 1902 struct witness *w; 1903 int depth, remaining; 1904 int step = 0; 1905 1906 /* 1907 * Use depth-limited search to find the shortest path 1908 * from child to parent. 1909 */ 1910 for (depth = 1; depth < nitems(path); depth++) { 1911 if (witness_search(child, parent, path, depth, &remaining)) 1912 goto found; 1913 if (!remaining) 1914 break; 1915 } 1916 prnt("witness: incomplete path, depth %d\n", depth); 1917 return; 1918 1919found: 1920 witness_print_cycle_edge(prnt, parent, child, ++step, 0); 1921 for (w = child; depth > 0; depth--) { 1922 witness_print_cycle_edge(prnt, w, path[depth - 1], ++step, 1923 depth == 1); 1924 w = path[depth - 1]; 1925 } 1926} 1927 1928#ifdef DDB 1929static int 1930witness_thread_has_locks(struct proc *p) 1931{ 1932 1933 if (p->p_sleeplocks == NULL) 1934 return (0); 1935 return (p->p_sleeplocks->ll_count != 0); 1936} 1937 1938static int 1939witness_process_has_locks(struct process *pr) 1940{ 1941 struct proc *p; 1942 1943 TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) { 1944 if (witness_thread_has_locks(p)) 1945 return (1); 1946 } 1947 return (0); 1948} 1949#endif 1950 1951int 1952witness_list_locks(struct lock_list_entry **lock_list, 1953 int (*prnt)(const char *fmt, ...)) 1954{ 1955 struct lock_list_entry *lle; 1956 int i, nheld; 1957 1958 nheld = 0; 1959 for (lle = *lock_list; lle != NULL; lle = lle->ll_next) 1960 for (i = lle->ll_count - 1; i >= 0; i--) { 1961 witness_list_lock(&lle->ll_children[i], prnt); 1962 nheld++; 1963 } 1964 return (nheld); 1965} 1966 1967/* 1968 * This is a bit risky at best. We call this function when we have timed 1969 * out acquiring a spin lock, and we assume that the other CPU is stuck 1970 * with this lock held. So, we go groveling around in the other CPU's 1971 * per-cpu data to try to find the lock instance for this spin lock to 1972 * see when it was last acquired. 1973 */ 1974void 1975witness_display_spinlock(struct lock_object *lock, struct proc *owner, 1976 int (*prnt)(const char *fmt, ...)) 1977{ 1978 struct lock_instance *instance; 1979 1980 if (owner->p_stat != SONPROC) 1981 return; 1982 instance = find_instance( 1983 witness_cpu[owner->p_cpu->ci_cpuid].wc_spinlocks, lock); 1984 if (instance != NULL) 1985 witness_list_lock(instance, prnt); 1986} 1987 1988void 1989witness_assert(const struct lock_object *lock, int flags) 1990{ 1991 struct lock_instance *instance; 1992 struct lock_class *class; 1993 1994 if (lock->lo_witness == NULL || witness_watch < 1 || 1995 panicstr != NULL || db_active) 1996 return; 1997 class = LOCK_CLASS(lock); 1998 if ((class->lc_flags & LC_SLEEPLOCK) != 0) 1999 instance = find_instance(curproc->p_sleeplocks, lock); 2000 else if ((class->lc_flags & LC_SPINLOCK) != 0) 2001 instance = find_instance( 2002 witness_cpu[cpu_number()].wc_spinlocks, lock); 2003 else { 2004 panic("lock (%s) %s is not sleep or spin!", 2005 class->lc_name, lock->lo_name); 2006 return; 2007 } 2008 switch (flags) { 2009 case LA_UNLOCKED: 2010 if (instance != NULL) 2011 panic("lock (%s) %s locked", 2012 class->lc_name, lock->lo_name); 2013 break; 2014 case LA_LOCKED: 2015 case LA_LOCKED | LA_RECURSED: 2016 case LA_LOCKED | LA_NOTRECURSED: 2017 case LA_SLOCKED: 2018 case LA_SLOCKED | LA_RECURSED: 2019 case LA_SLOCKED | LA_NOTRECURSED: 2020 case LA_XLOCKED: 2021 case LA_XLOCKED | LA_RECURSED: 2022 case LA_XLOCKED | LA_NOTRECURSED: 2023 if (instance == NULL) { 2024 panic("lock (%s) %s not locked", 2025 class->lc_name, lock->lo_name); 2026 break; 2027 } 2028 if ((flags & LA_XLOCKED) != 0 && 2029 (instance->li_flags & LI_EXCLUSIVE) == 0) 2030 panic( 2031 "lock (%s) %s not exclusively locked", 2032 class->lc_name, lock->lo_name); 2033 if ((flags & LA_SLOCKED) != 0 && 2034 (instance->li_flags & LI_EXCLUSIVE) != 0) 2035 panic( 2036 "lock (%s) %s exclusively locked", 2037 class->lc_name, lock->lo_name); 2038 if ((flags & LA_RECURSED) != 0 && 2039 (instance->li_flags & LI_RECURSEMASK) == 0) 2040 panic("lock (%s) %s not recursed", 2041 class->lc_name, lock->lo_name); 2042 if ((flags & LA_NOTRECURSED) != 0 && 2043 (instance->li_flags & LI_RECURSEMASK) != 0) 2044 panic("lock (%s) %s recursed", 2045 class->lc_name, lock->lo_name); 2046 break; 2047 default: 2048 panic("invalid lock assertion"); 2049 2050 } 2051} 2052 2053static void 2054witness_setflag(struct lock_object *lock, int flag, int set) 2055{ 2056 struct lock_list_entry *lock_list; 2057 struct lock_instance *instance; 2058 struct lock_class *class; 2059 2060 if (lock->lo_witness == NULL || witness_watch < 0 || 2061 panicstr != NULL || db_active) 2062 return; 2063 class = LOCK_CLASS(lock); 2064 if (class->lc_flags & LC_SLEEPLOCK) 2065 lock_list = curproc->p_sleeplocks; 2066 else 2067 lock_list = witness_cpu[cpu_number()].wc_spinlocks; 2068 instance = find_instance(lock_list, lock); 2069 if (instance == NULL) { 2070 panic("%s: lock (%s) %s not locked", __func__, 2071 class->lc_name, lock->lo_name); 2072 return; 2073 } 2074 2075 if (set) 2076 instance->li_flags |= flag; 2077 else 2078 instance->li_flags &= ~flag; 2079} 2080 2081void 2082witness_norelease(struct lock_object *lock) 2083{ 2084 2085 witness_setflag(lock, LI_NORELEASE, 1); 2086} 2087 2088void 2089witness_releaseok(struct lock_object *lock) 2090{ 2091 2092 witness_setflag(lock, LI_NORELEASE, 0); 2093} 2094 2095#ifdef DDB 2096static void 2097witness_ddb_list(struct proc *p) 2098{ 2099 struct witness_cpu *wc = &witness_cpu[cpu_number()]; 2100 2101 KASSERTMSG(witness_cold == 0, "%s: witness_cold", __func__); 2102 KASSERTMSG(db_active, "%s: not in the debugger", __func__); 2103 2104 if (witness_watch < 1) 2105 return; 2106 2107 witness_list_locks(&p->p_sleeplocks, db_printf); 2108 2109 /* 2110 * We only handle spinlocks if td == curproc. This is somewhat broken 2111 * if td is currently executing on some other CPU and holds spin locks 2112 * as we won't display those locks. If we had a MI way of getting 2113 * the per-cpu data for a given cpu then we could use 2114 * td->td_oncpu to get the list of spinlocks for this thread 2115 * and "fix" this. 2116 * 2117 * That still wouldn't really fix this unless we locked the scheduler 2118 * lock or stopped the other CPU to make sure it wasn't changing the 2119 * list out from under us. It is probably best to just not try to 2120 * handle threads on other CPU's for now. 2121 */ 2122 if (p == curproc && wc->wc_spinlocks != NULL) 2123 witness_list_locks(&wc->wc_spinlocks, db_printf); 2124} 2125 2126void 2127db_witness_list(db_expr_t addr, int have_addr, db_expr_t count, char *modif) 2128{ 2129 struct proc *p; 2130 2131 if (have_addr) 2132 p = (struct proc *)addr; 2133 else 2134 p = curproc; 2135 witness_ddb_list(p); 2136} 2137 2138void 2139db_witness_list_all(db_expr_t addr, int have_addr, db_expr_t count, char *modif) 2140{ 2141 CPU_INFO_ITERATOR cii; 2142 struct cpu_info *ci; 2143 struct lock_list_entry *lock_list; 2144 struct process *pr; 2145 struct proc *p; 2146 2147 CPU_INFO_FOREACH(cii, ci) { 2148 lock_list = witness_cpu[CPU_INFO_UNIT(ci)].wc_spinlocks; 2149 if (lock_list == NULL || lock_list->ll_count == 0) 2150 continue; 2151 db_printf("CPU %d:\n", CPU_INFO_UNIT(ci)); 2152 witness_list_locks(&lock_list, db_printf); 2153 } 2154 2155 /* 2156 * It would be nice to list only threads and processes that actually 2157 * held sleep locks, but that information is currently not exported 2158 * by WITNESS. 2159 */ 2160 LIST_FOREACH(pr, &allprocess, ps_list) { 2161 if (!witness_process_has_locks(pr)) 2162 continue; 2163 TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) { 2164 if (!witness_thread_has_locks(p)) 2165 continue; 2166 db_printf("Process %d (%s) thread %p (%d)\n", 2167 pr->ps_pid, pr->ps_comm, p, p->p_tid); 2168 witness_ddb_list(p); 2169 } 2170 } 2171} 2172 2173void 2174witness_print_badstacks(void) 2175{ 2176 struct witness *w1, *w2; 2177 int error, generation, i, j; 2178 2179 if (witness_watch < 1) { 2180 db_printf("witness watch is disabled\n"); 2181 return; 2182 } 2183 if (witness_cold) { 2184 db_printf("witness is cold\n"); 2185 return; 2186 } 2187 error = 0; 2188 2189restart: 2190 mtx_enter(&w_mtx); 2191 generation = w_generation; 2192 mtx_leave(&w_mtx); 2193 db_printf("Number of known direct relationships is %d\n", 2194 w_lohash.wloh_count); 2195 for (i = 1; i < w_max_used_index; i++) { 2196 mtx_enter(&w_mtx); 2197 if (generation != w_generation) { 2198 mtx_leave(&w_mtx); 2199 2200 /* The graph has changed, try again. */ 2201 db_printf("Lock graph changed, restarting trace.\n"); 2202 goto restart; 2203 } 2204 2205 w1 = &w_data[i]; 2206 if (w1->w_reversed == 0) { 2207 mtx_leave(&w_mtx); 2208 continue; 2209 } 2210 mtx_leave(&w_mtx); 2211 2212 if (w1->w_reversed == 0) 2213 continue; 2214 for (j = 1; j < w_max_used_index; j++) { 2215 if ((w_rmatrix[i][j] & WITNESS_REVERSAL) == 0 || i > j) 2216 continue; 2217 2218 mtx_enter(&w_mtx); 2219 if (generation != w_generation) { 2220 mtx_leave(&w_mtx); 2221 2222 /* The graph has changed, try again. */ 2223 db_printf("Lock graph changed, " 2224 "restarting trace.\n"); 2225 goto restart; 2226 } 2227 2228 w2 = &w_data[j]; 2229 mtx_leave(&w_mtx); 2230 2231 db_printf("\nLock order reversal between \"%s\"(%s) " 2232 "and \"%s\"(%s)!\n", 2233 w1->w_type->lt_name, w1->w_class->lc_name, 2234 w2->w_type->lt_name, w2->w_class->lc_name); 2235 witness_print_cycle(db_printf, w1, w2); 2236 } 2237 } 2238 mtx_enter(&w_mtx); 2239 if (generation != w_generation) { 2240 mtx_leave(&w_mtx); 2241 2242 /* 2243 * The graph changed while we were printing stack data, 2244 * try again. 2245 */ 2246 db_printf("Lock graph changed, restarting trace.\n"); 2247 goto restart; 2248 } 2249 mtx_leave(&w_mtx); 2250} 2251 2252void 2253db_witness_display(db_expr_t addr, int have_addr, db_expr_t count, char *modif) 2254{ 2255 switch (modif[0]) { 2256 case 'b': 2257 witness_print_badstacks(); 2258 break; 2259 default: 2260 witness_ddb_display(db_printf); 2261 break; 2262 } 2263} 2264#endif 2265 2266void 2267db_witness_print_fullgraph(void) 2268{ 2269 struct witness *w; 2270 int error; 2271 2272 if (witness_watch < 1) { 2273 db_printf("witness watch is disabled\n"); 2274 return; 2275 } 2276 if (witness_cold) { 2277 db_printf("witness is cold\n"); 2278 return; 2279 } 2280 error = 0; 2281 2282 mtx_enter(&w_mtx); 2283 SLIST_FOREACH(w, &w_all, w_list) 2284 w->w_displayed = 0; 2285 SLIST_FOREACH(w, &w_all, w_list) 2286 db_witness_add_fullgraph(w); 2287 mtx_leave(&w_mtx); 2288} 2289 2290static void 2291db_witness_add_fullgraph(struct witness *w) 2292{ 2293 int i; 2294 2295 if (w->w_displayed != 0 || w->w_acquired == 0) 2296 return; 2297 w->w_displayed = 1; 2298 2299 WITNESS_INDEX_ASSERT(w->w_index); 2300 for (i = 1; i <= w_max_used_index; i++) { 2301 if (w_rmatrix[w->w_index][i] & WITNESS_PARENT) { 2302 db_printf("\"%s\",\"%s\"\n", w->w_type->lt_name, 2303 w_data[i].w_type->lt_name); 2304 db_witness_add_fullgraph(&w_data[i]); 2305 } 2306 } 2307} 2308 2309/* 2310 * A simple hash function. Takes a key pointer and a key size. If size == 0, 2311 * interprets the key as a string and reads until the null 2312 * terminator. Otherwise, reads the first size bytes. Returns an unsigned 32-bit 2313 * hash value computed from the key. 2314 */ 2315static uint32_t 2316witness_hash_djb2(const uint8_t *key, uint32_t size) 2317{ 2318 unsigned int hash = 5381; 2319 int i; 2320 2321 /* hash = hash * 33 + key[i] */ 2322 if (size) 2323 for (i = 0; i < size; i++) 2324 hash = ((hash << 5) + hash) + (unsigned int)key[i]; 2325 else 2326 for (i = 0; key[i] != 0; i++) 2327 hash = ((hash << 5) + hash) + (unsigned int)key[i]; 2328 2329 return (hash); 2330} 2331 2332 2333/* 2334 * Initializes the two witness hash tables. Called exactly once from 2335 * witness_initialize(). 2336 */ 2337static void 2338witness_init_hash_tables(void) 2339{ 2340 int i; 2341 2342 KASSERT(witness_cold); 2343 2344 /* Initialize the hash tables. */ 2345 for (i = 0; i < WITNESS_HASH_SIZE; i++) 2346 SLIST_INIT(&w_hash.wh_array[i]); 2347 2348 w_hash.wh_size = WITNESS_HASH_SIZE; 2349 w_hash.wh_count = 0; 2350 2351 /* Initialize the lock order data hash. */ 2352 w_lodata = (void *)uvm_pageboot_alloc( 2353 sizeof(struct witness_lock_order_data) * WITNESS_LO_DATA_COUNT); 2354 memset(w_lodata, 0, sizeof(struct witness_lock_order_data) * 2355 WITNESS_LO_DATA_COUNT); 2356 w_lofree = NULL; 2357 for (i = 0; i < WITNESS_LO_DATA_COUNT; i++) { 2358 w_lodata[i].wlod_next = w_lofree; 2359 w_lofree = &w_lodata[i]; 2360 } 2361 w_lohash.wloh_size = WITNESS_LO_HASH_SIZE; 2362 w_lohash.wloh_count = 0; 2363 for (i = 0; i < WITNESS_LO_HASH_SIZE; i++) 2364 w_lohash.wloh_array[i] = NULL; 2365} 2366 2367static struct witness * 2368witness_hash_get(const struct lock_type *type, const char *subtype) 2369{ 2370 struct witness *w; 2371 uint32_t hash; 2372 2373 KASSERT(type != NULL); 2374 if (witness_cold == 0) 2375 MUTEX_ASSERT_LOCKED(&w_mtx); 2376 hash = (uint32_t)((uintptr_t)type ^ (uintptr_t)subtype) % 2377 w_hash.wh_size; 2378 SLIST_FOREACH(w, &w_hash.wh_array[hash], w_hash_next) { 2379 if (w->w_type == type && w->w_subtype == subtype) 2380 goto out; 2381 } 2382 2383out: 2384 return (w); 2385} 2386 2387static void 2388witness_hash_put(struct witness *w) 2389{ 2390 uint32_t hash; 2391 2392 KASSERT(w != NULL); 2393 KASSERT(w->w_type != NULL); 2394 if (witness_cold == 0) 2395 MUTEX_ASSERT_LOCKED(&w_mtx); 2396 KASSERTMSG(witness_hash_get(w->w_type, w->w_subtype) == NULL, 2397 "%s: trying to add a hash entry that already exists!", __func__); 2398 KASSERTMSG(SLIST_NEXT(w, w_hash_next) == NULL, 2399 "%s: w->w_hash_next != NULL", __func__); 2400 2401 hash = (uint32_t)((uintptr_t)w->w_type ^ (uintptr_t)w->w_subtype) % 2402 w_hash.wh_size; 2403 SLIST_INSERT_HEAD(&w_hash.wh_array[hash], w, w_hash_next); 2404 w_hash.wh_count++; 2405} 2406 2407 2408static struct witness_lock_order_data * 2409witness_lock_order_get(struct witness *parent, struct witness *child) 2410{ 2411 struct witness_lock_order_data *data = NULL; 2412 struct witness_lock_order_key key; 2413 unsigned int hash; 2414 2415 KASSERT(parent != NULL && child != NULL); 2416 key.from = parent->w_index; 2417 key.to = child->w_index; 2418 WITNESS_INDEX_ASSERT(key.from); 2419 WITNESS_INDEX_ASSERT(key.to); 2420 if ((w_rmatrix[parent->w_index][child->w_index] 2421 & WITNESS_LOCK_ORDER_KNOWN) == 0) 2422 goto out; 2423 2424 hash = witness_hash_djb2((const char*)&key, 2425 sizeof(key)) % w_lohash.wloh_size; 2426 data = w_lohash.wloh_array[hash]; 2427 while (data != NULL) { 2428 if (witness_lock_order_key_equal(&data->wlod_key, &key)) 2429 break; 2430 data = data->wlod_next; 2431 } 2432 2433out: 2434 return (data); 2435} 2436 2437/* 2438 * Verify that parent and child have a known relationship, are not the same, 2439 * and child is actually a child of parent. This is done without w_mtx 2440 * to avoid contention in the common case. 2441 */ 2442static int 2443witness_lock_order_check(struct witness *parent, struct witness *child) 2444{ 2445 2446 if (parent != child && 2447 w_rmatrix[parent->w_index][child->w_index] 2448 & WITNESS_LOCK_ORDER_KNOWN && 2449 isitmychild(parent, child)) 2450 return (1); 2451 2452 return (0); 2453} 2454 2455static int 2456witness_lock_order_add(struct witness *parent, struct witness *child) 2457{ 2458 static int lofree_empty_reported = 0; 2459 struct witness_lock_order_data *data = NULL; 2460 struct witness_lock_order_key key; 2461 unsigned int hash; 2462 2463 KASSERT(parent != NULL && child != NULL); 2464 key.from = parent->w_index; 2465 key.to = child->w_index; 2466 WITNESS_INDEX_ASSERT(key.from); 2467 WITNESS_INDEX_ASSERT(key.to); 2468 if (w_rmatrix[parent->w_index][child->w_index] 2469 & WITNESS_LOCK_ORDER_KNOWN) 2470 return (1); 2471 2472 hash = witness_hash_djb2((const char*)&key, 2473 sizeof(key)) % w_lohash.wloh_size; 2474 w_rmatrix[parent->w_index][child->w_index] |= WITNESS_LOCK_ORDER_KNOWN; 2475 data = w_lofree; 2476 if (data == NULL) { 2477 if (!lofree_empty_reported) { 2478 lofree_empty_reported = 1; 2479 printf("witness: out of free lock order entries\n"); 2480 } 2481 return (0); 2482 } 2483 w_lofree = data->wlod_next; 2484 data->wlod_next = w_lohash.wloh_array[hash]; 2485 data->wlod_key = key; 2486 w_lohash.wloh_array[hash] = data; 2487 w_lohash.wloh_count++; 2488 stacktrace_save_at(&data->wlod_stack, 1); 2489 return (1); 2490} 2491 2492/* Call this whenever the structure of the witness graph changes. */ 2493static void 2494witness_increment_graph_generation(void) 2495{ 2496 2497 if (witness_cold == 0) 2498 MUTEX_ASSERT_LOCKED(&w_mtx); 2499 w_generation++; 2500} 2501 2502static void 2503witness_debugger(int dump) 2504{ 2505 switch (witness_watch) { 2506 case 1: 2507 break; 2508 case 2: 2509 if (dump) 2510 db_stack_dump(); 2511 break; 2512 case 3: 2513 if (dump) 2514 db_stack_dump(); 2515 db_enter(); 2516 break; 2517 default: 2518 panic("witness: locking error"); 2519 } 2520} 2521 2522static int 2523witness_alloc_stacks(void) 2524{ 2525 union lock_stack *stacks; 2526 unsigned int i, nstacks = LOCK_CHILDCOUNT * LOCK_NCHILDREN; 2527 2528 rw_assert_wrlock(&w_ctlock); 2529 2530 if (w_lock_stack_num >= nstacks) 2531 return (0); 2532 2533 nstacks -= w_lock_stack_num; 2534 stacks = mallocarray(nstacks, sizeof(*stacks), M_WITNESS, 2535 M_WAITOK | M_CANFAIL | M_ZERO); 2536 if (stacks == NULL) 2537 return (ENOMEM); 2538 2539 mtx_enter(&w_mtx); 2540 for (i = 0; i < nstacks; i++) { 2541 stacks[i].ls_next = w_lock_stack_free; 2542 w_lock_stack_free = &stacks[i]; 2543 } 2544 mtx_leave(&w_mtx); 2545 w_lock_stack_num += nstacks; 2546 2547 return (0); 2548} 2549 2550int 2551witness_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, 2552 void *newp, size_t newlen) 2553{ 2554 int error, value; 2555 2556 if (namelen != 1) 2557 return (ENOTDIR); 2558 2559 rw_enter_write(&w_ctlock); 2560 2561 switch (name[0]) { 2562 case KERN_WITNESS_WATCH: 2563 error = witness_sysctl_watch(oldp, oldlenp, newp, newlen); 2564 break; 2565 case KERN_WITNESS_LOCKTRACE: 2566 value = witness_locktrace; 2567 error = sysctl_int(oldp, oldlenp, newp, newlen, &value); 2568 if (error == 0 && newp != NULL) { 2569 switch (value) { 2570 case 1: 2571 error = witness_alloc_stacks(); 2572 /* FALLTHROUGH */ 2573 case 0: 2574 if (error == 0) 2575 witness_locktrace = value; 2576 break; 2577 default: 2578 error = EINVAL; 2579 break; 2580 } 2581 } 2582 break; 2583 default: 2584 error = EOPNOTSUPP; 2585 break; 2586 } 2587 2588 rw_exit_write(&w_ctlock); 2589 2590 return (error); 2591} 2592 2593int 2594witness_sysctl_watch(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 2595{ 2596 int error; 2597 int value; 2598 2599 value = witness_watch; 2600 error = sysctl_int_bounded(oldp, oldlenp, newp, newlen, 2601 &value, -1, 3); 2602 if (error == 0 && newp != NULL) { 2603 mtx_enter(&w_mtx); 2604 if (value < 0 || witness_watch >= 0) 2605 witness_watch = value; 2606 else 2607 error = EINVAL; 2608 mtx_leave(&w_mtx); 2609 } 2610 return (error); 2611} 2612