subr_witness.c revision 166061
139215Sgibbs/*- 2107178Snjl * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved. 339215Sgibbs * 4107178Snjl * Redistribution and use in source and binary forms, with or without 539215Sgibbs * modification, are permitted provided that the following conditions 639215Sgibbs * are met: 739215Sgibbs * 1. Redistributions of source code must retain the above copyright 839215Sgibbs * notice, this list of conditions and the following disclaimer. 939215Sgibbs * 2. Redistributions in binary form must reproduce the above copyright 1039215Sgibbs * notice, this list of conditions and the following disclaimer in the 1139215Sgibbs * documentation and/or other materials provided with the distribution. 1239215Sgibbs * 3. Berkeley Software Design Inc's name may not be used to endorse or 1339215Sgibbs * promote products derived from this software without specific prior 1439215Sgibbs * written permission. 1539215Sgibbs * 1639215Sgibbs * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND 1739215Sgibbs * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1839215Sgibbs * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1939215Sgibbs * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE 2039215Sgibbs * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2139215Sgibbs * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2239215Sgibbs * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2339215Sgibbs * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2439215Sgibbs * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2539215Sgibbs * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2639215Sgibbs * SUCH DAMAGE. 2739215Sgibbs * 2850476Speter * from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $ 2939215Sgibbs * and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $ 3039215Sgibbs */ 3139215Sgibbs 32121184Ssimokawa/* 3344498Sgibbs * Implementation of the `witness' lock verifier. Originally implemented for 34107178Snjl * mutexes in BSD/OS. Extended to handle generic lock objects and lock 3539215Sgibbs * classes in FreeBSD. 3644498Sgibbs */ 3739215Sgibbs 3839215Sgibbs/* 3939215Sgibbs * Main Entry: witness 40107178Snjl * Pronunciation: 'wit-n&s 4139215Sgibbs * Function: noun 4239215Sgibbs * Etymology: Middle English witnesse, from Old English witnes knowledge, 43107178Snjl * testimony, witness, from 2wit 44109161Snjl * Date: before 12th century 45107178Snjl * 1 : attestation of a fact or event : TESTIMONY 46107178Snjl * 2 : one that gives evidence; specifically : one who testifies in 47107178Snjl * a cause or before a judicial tribunal 48107178Snjl * 3 : one asked to be present at a transaction so as to be able to 49120428Ssimokawa * testify to its having taken place 50107178Snjl * 4 : one who has personal knowledge of something 5139215Sgibbs * 5 a : something serving as evidence or proof : SIGN 52107178Snjl * b : public affirmation by word or example of usually 5339215Sgibbs * religious faith or conviction <the heroic witness to divine 54107178Snjl * life -- Pilot> 5539215Sgibbs * 6 capitalized : a member of the Jehovah's Witnesses 56107178Snjl */ 57107178Snjl 58107178Snjl/* 59162704Smjacob * Special rules concerning Giant and lock orders: 60107178Snjl * 61107178Snjl * 1) Giant must be acquired before any other mutexes. Stated another way, 62107178Snjl * no other mutex may be held when Giant is acquired. 63107178Snjl * 64107178Snjl * 2) Giant must be released when blocking on a sleepable lock. 65162704Smjacob * 66121184Ssimokawa * This rule is less obvious, but is a result of Giant providing the same 67121184Ssimokawa * semantics as spl(). Basically, when a thread sleeps, it must release 68107178Snjl * Giant. When a thread blocks on a sleepable lock, it sleeps. Hence rule 69107178Snjl * 2). 70107178Snjl * 71107178Snjl * 3) Giant may be acquired before or after sleepable locks. 72107178Snjl * 73107178Snjl * This rule is also not quite as obvious. Giant may be acquired after 74107178Snjl * a sleepable lock because it is a non-sleepable lock and non-sleepable 75107178Snjl * locks may always be acquired while holding a sleepable lock. The second 76107178Snjl * case, Giant before a sleepable lock, follows from rule 2) above. Suppose 77107178Snjl * you have two threads T1 and T2 and a sleepable lock X. Suppose that T1 7844498Sgibbs * acquires X and blocks on Giant. Then suppose that T2 acquires Giant and 7944498Sgibbs * blocks on X. When T2 blocks on X, T2 will release Giant allowing T1 to 8044498Sgibbs * execute. Thus, acquiring Giant both before and after a sleepable lock 8144498Sgibbs * will not result in a lock order reversal. 8239215Sgibbs */ 83107178Snjl 84107178Snjl#include <sys/cdefs.h> 85107178Snjl__FBSDID("$FreeBSD: head/sys/kern/subr_witness.c 166061 2007-01-16 22:56:28Z ssouhlal $"); 86107178Snjl 87107178Snjl#include "opt_ddb.h" 88107178Snjl#include "opt_witness.h" 89107178Snjl 90162704Smjacob#include <sys/param.h> 91237601Sken#include <sys/bus.h> 92107178Snjl#include <sys/kdb.h> 93107178Snjl#include <sys/kernel.h> 94107178Snjl#include <sys/ktr.h> 95107178Snjl#include <sys/lock.h> 96107178Snjl#include <sys/malloc.h> 97107178Snjl#include <sys/mutex.h> 98107178Snjl#include <sys/priv.h> 9939215Sgibbs#include <sys/proc.h> 10039215Sgibbs#include <sys/sysctl.h> 10139215Sgibbs#include <sys/systm.h> 10239215Sgibbs 103228481Sed#include <ddb/ddb.h> 104228481Sed 105107178Snjl#include <machine/stdarg.h> 106107178Snjl 10739215Sgibbs/* Note that these traces do not work with KTR_ALQ. */ 108107178Snjl#if 0 109107178Snjl#define KTR_WITNESS KTR_SUBSYS 110107178Snjl#else 111107178Snjl#define KTR_WITNESS 0 112107178Snjl#endif 113107178Snjl 114107178Snjl/* Easier to stay with the old names. */ 115107178Snjl#define lo_list lo_witness_data.lod_list 116107178Snjl#define lo_witness lo_witness_data.lod_witness 117107178Snjl 118107178Snjl/* Define this to check for blessed mutexes */ 119107178Snjl#undef BLESSING 120107178Snjl 121162704Smjacob#define WITNESS_COUNT 1024 12239215Sgibbs#define WITNESS_CHILDCOUNT (WITNESS_COUNT * 4) 123107178Snjl/* 124107178Snjl * XXX: This is somewhat bogus, as we assume here that at most 1024 threads 12539215Sgibbs * will hold LOCK_NCHILDREN * 2 locks. We handle failure ok, and we should 126107178Snjl * probably be safe for the most part, but it's still a SWAG. 127107178Snjl */ 12839215Sgibbs#define LOCK_CHILDCOUNT (MAXCPU + 1024) * 2 129107178Snjl 130107178Snjl#define WITNESS_NCHILDREN 6 13144498Sgibbs 132107178Snjlstruct witness_child_list_entry; 133107178Snjl 13444498Sgibbsstruct witness { 135107178Snjl const char *w_name; 136107178Snjl struct lock_class *w_class; 137107178Snjl STAILQ_ENTRY(witness) w_list; /* List of all witnesses. */ 138107178Snjl STAILQ_ENTRY(witness) w_typelist; /* Witnesses of a type. */ 13944498Sgibbs struct witness_child_list_entry *w_children; /* Great evilness... */ 140107178Snjl const char *w_file; 141107178Snjl int w_line; 142107178Snjl u_int w_level; 143107178Snjl u_int w_refcount; 14463185Smjacob u_char w_Giant_squawked:1; 145107178Snjl u_char w_other_squawked:1; 146121184Ssimokawa u_char w_same_squawked:1; 147121184Ssimokawa u_char w_displayed:1; 148121184Ssimokawa}; 149121184Ssimokawa 150121184Ssimokawastruct witness_child_list_entry { 151121184Ssimokawa struct witness_child_list_entry *wcl_next; 152121184Ssimokawa struct witness *wcl_children[WITNESS_NCHILDREN]; 153121184Ssimokawa u_int wcl_count; 154121184Ssimokawa}; 155121184Ssimokawa 156121184SsimokawaSTAILQ_HEAD(witness_list, witness); 157121184Ssimokawa 158121184Ssimokawa#ifdef BLESSING 159121184Ssimokawastruct witness_blessed { 160121184Ssimokawa const char *b_lock1; 161121184Ssimokawa const char *b_lock2; 162121184Ssimokawa}; 163121184Ssimokawa#endif 164121184Ssimokawa 165121184Ssimokawastruct witness_order_list_entry { 166121184Ssimokawa const char *w_name; 167121184Ssimokawa struct lock_class *w_class; 168121184Ssimokawa}; 169121184Ssimokawa 170121184Ssimokawa#ifdef BLESSING 171121184Ssimokawastatic int blessed(struct witness *, struct witness *); 172121184Ssimokawa#endif 173107178Snjlstatic int depart(struct witness *w); 174121184Ssimokawastatic struct witness *enroll(const char *description, 175107178Snjl struct lock_class *lock_class); 176107178Snjlstatic int insertchild(struct witness *parent, struct witness *child); 177107178Snjlstatic int isitmychild(struct witness *parent, struct witness *child); 178121184Ssimokawastatic int isitmydescendant(struct witness *parent, struct witness *child); 179107178Snjlstatic int itismychild(struct witness *parent, struct witness *child); 180107178Snjlstatic void removechild(struct witness *parent, struct witness *child); 181107178Snjlstatic int sysctl_debug_witness_watch(SYSCTL_HANDLER_ARGS); 182107178Snjlstatic const char *fixup_filename(const char *file); 183107178Snjlstatic struct witness *witness_get(void); 184107178Snjlstatic void witness_free(struct witness *m); 185107178Snjlstatic struct witness_child_list_entry *witness_child_get(void); 186107178Snjlstatic void witness_child_free(struct witness_child_list_entry *wcl); 187107178Snjlstatic struct lock_list_entry *witness_lock_list_get(void); 188107178Snjlstatic void witness_lock_list_free(struct lock_list_entry *lle); 189107178Snjlstatic struct lock_instance *find_instance(struct lock_list_entry *lock_list, 190107178Snjl struct lock_object *lock); 191107178Snjlstatic void witness_list_lock(struct lock_instance *instance); 192107178Snjl#ifdef DDB 193107178Snjlstatic void witness_leveldescendents(struct witness *parent, int level); 194107178Snjlstatic void witness_levelall(void); 195107178Snjlstatic void witness_displaydescendants(void(*)(const char *fmt, ...), 196107178Snjl struct witness *, int indent); 197162704Smjacobstatic void witness_display_list(void(*prnt)(const char *fmt, ...), 198162704Smjacob struct witness_list *list); 199162704Smjacobstatic void witness_display(void(*)(const char *fmt, ...)); 20039215Sgibbsstatic void witness_list(struct thread *td); 20139215Sgibbs#endif 20239215Sgibbs 20339215SgibbsSYSCTL_NODE(_debug, OID_AUTO, witness, CTLFLAG_RW, 0, "Witness Locking"); 20439215Sgibbs 20539215Sgibbs/* 20639215Sgibbs * If set to 0, witness is disabled. If set to a non-zero value, witness 207107178Snjl * performs full lock order checking for all locks. At runtime, this 208107178Snjl * value may be set to 0 to turn off witness. witness is not allowed be 20939215Sgibbs * turned on once it is turned off, however. 21039215Sgibbs */ 211107178Snjlstatic int witness_watch = 1; 212107178SnjlTUNABLE_INT("debug.witness.watch", &witness_watch); 213107178SnjlSYSCTL_PROC(_debug_witness, OID_AUTO, watch, CTLFLAG_RW | CTLTYPE_INT, NULL, 0, 214107178Snjl sysctl_debug_witness_watch, "I", "witness is watching lock operations"); 215107178Snjl 216107178Snjl#ifdef KDB 217107178Snjl/* 218107178Snjl * When KDB is enabled and witness_kdb is set to 1, it will cause the system 21944498Sgibbs * to drop into kdebug() when: 22044498Sgibbs * - a lock hierarchy violation occurs 22144498Sgibbs * - locks are held when going to sleep. 222107178Snjl */ 223107178Snjl#ifdef WITNESS_KDB 224107178Snjlint witness_kdb = 1; 22544498Sgibbs#else 226107178Snjlint witness_kdb = 0; 227107178Snjl#endif 228107178SnjlTUNABLE_INT("debug.witness.kdb", &witness_kdb); 229196955SsbrunoSYSCTL_INT(_debug_witness, OID_AUTO, kdb, CTLFLAG_RW, &witness_kdb, 0, ""); 23044498Sgibbs 231107178Snjl/* 232107178Snjl * When KDB is enabled and witness_trace is set to 1, it will cause the system 233107178Snjl * to print a stack trace: 234107178Snjl * - a lock hierarchy violation occurs 235107178Snjl * - locks are held when going to sleep. 236107178Snjl */ 237120428Ssimokawaint witness_trace = 1; 238120428SsimokawaTUNABLE_INT("debug.witness.trace", &witness_trace); 239120428SsimokawaSYSCTL_INT(_debug_witness, OID_AUTO, trace, CTLFLAG_RW, &witness_trace, 0, ""); 240120428Ssimokawa#endif /* KDB */ 241120428Ssimokawa 242120428Ssimokawa#ifdef WITNESS_SKIPSPIN 243120428Ssimokawaint witness_skipspin = 1; 244120428Ssimokawa#else 245120428Ssimokawaint witness_skipspin = 0; 246120428Ssimokawa#endif 247120428SsimokawaTUNABLE_INT("debug.witness.skipspin", &witness_skipspin); 248120428SsimokawaSYSCTL_INT(_debug_witness, OID_AUTO, skipspin, CTLFLAG_RDTUN, 249107178Snjl &witness_skipspin, 0, ""); 250107178Snjl 25144498Sgibbsstatic struct mtx w_mtx; 252121184Ssimokawastatic struct witness_list w_free = STAILQ_HEAD_INITIALIZER(w_free); 253162704Smjacobstatic struct witness_list w_all = STAILQ_HEAD_INITIALIZER(w_all); 254121184Ssimokawastatic struct witness_list w_spin = STAILQ_HEAD_INITIALIZER(w_spin); 255121184Ssimokawastatic struct witness_list w_sleep = STAILQ_HEAD_INITIALIZER(w_sleep); 256107178Snjlstatic struct witness_child_list_entry *w_child_free = NULL; 257107178Snjlstatic struct lock_list_entry *w_lock_list_free = NULL; 25844498Sgibbs 259162704Smjacobstatic int w_free_cnt, w_spin_cnt, w_sleep_cnt, w_child_free_cnt, w_child_cnt; 260109161SnjlSYSCTL_INT(_debug_witness, OID_AUTO, free_cnt, CTLFLAG_RD, &w_free_cnt, 0, ""); 261109161SnjlSYSCTL_INT(_debug_witness, OID_AUTO, spin_cnt, CTLFLAG_RD, &w_spin_cnt, 0, ""); 262162704SmjacobSYSCTL_INT(_debug_witness, OID_AUTO, sleep_cnt, CTLFLAG_RD, &w_sleep_cnt, 0, 263109161Snjl ""); 264109161SnjlSYSCTL_INT(_debug_witness, OID_AUTO, child_free_cnt, CTLFLAG_RD, 265109161Snjl &w_child_free_cnt, 0, ""); 266109161SnjlSYSCTL_INT(_debug_witness, OID_AUTO, child_cnt, CTLFLAG_RD, &w_child_cnt, 0, 267109161Snjl ""); 268109161Snjl 269109161Snjlstatic struct witness w_data[WITNESS_COUNT]; 270109161Snjlstatic struct witness_child_list_entry w_childdata[WITNESS_CHILDCOUNT]; 271109161Snjlstatic struct lock_list_entry w_locklistdata[LOCK_CHILDCOUNT]; 272162704Smjacob 273162704Smjacobstatic struct witness_order_list_entry order_lists[] = { 274162704Smjacob /* 275162704Smjacob * sx locks 276162704Smjacob */ 277162704Smjacob { "proctree", &lock_class_sx }, 278162704Smjacob { "allproc", &lock_class_sx }, 279162704Smjacob { NULL, NULL }, 280109161Snjl /* 281109161Snjl * Various mutexes 282162704Smjacob */ 283109161Snjl { "Giant", &lock_class_mtx_sleep }, 284109161Snjl { "filedesc structure", &lock_class_mtx_sleep }, 285109161Snjl { "pipe mutex", &lock_class_mtx_sleep }, 286228481Sed { "sigio lock", &lock_class_mtx_sleep }, 287107178Snjl { "process group", &lock_class_mtx_sleep }, 288228481Sed { "process lock", &lock_class_mtx_sleep }, 289196955Ssbruno { "session", &lock_class_mtx_sleep }, 290228481Sed { "uidinfo hash", &lock_class_mtx_sleep }, 29163185Smjacob { "uidinfo struct", &lock_class_mtx_sleep }, 292107178Snjl { "allprison", &lock_class_mtx_sleep }, 293107178Snjl { NULL, NULL }, 294107178Snjl /* 295107178Snjl * Sockets 296107178Snjl */ 297107178Snjl { "filedesc structure", &lock_class_mtx_sleep }, 298107178Snjl { "accept", &lock_class_mtx_sleep }, 299107178Snjl { "so_snd", &lock_class_mtx_sleep }, 30039215Sgibbs { "so_rcv", &lock_class_mtx_sleep }, 301107178Snjl { "sellck", &lock_class_mtx_sleep }, 302107178Snjl { NULL, NULL }, 303107178Snjl /* 304107178Snjl * Routing 305107178Snjl */ 306107178Snjl { "so_rcv", &lock_class_mtx_sleep }, 307107178Snjl { "radix node head", &lock_class_mtx_sleep }, 308107178Snjl { "rtentry", &lock_class_mtx_sleep }, 309107178Snjl { "ifaddr", &lock_class_mtx_sleep }, 310107178Snjl { NULL, NULL }, 311162704Smjacob /* 31239215Sgibbs * Multicast - protocol locks before interface locks, after UDP locks. 31339215Sgibbs */ 314107178Snjl { "udpinp", &lock_class_mtx_sleep }, 315107178Snjl { "in_multi_mtx", &lock_class_mtx_sleep }, 316107178Snjl { "igmp_mtx", &lock_class_mtx_sleep }, 317196955Ssbruno { "if_addr_mtx", &lock_class_mtx_sleep }, 318107178Snjl { NULL, NULL }, 319107178Snjl /* 32044498Sgibbs * UNIX Domain Sockets 321107178Snjl */ 322107178Snjl { "unp", &lock_class_mtx_sleep }, 323107178Snjl { "so_snd", &lock_class_mtx_sleep }, 32449935Sgibbs { NULL, NULL }, 325107178Snjl /* 326107178Snjl * UDP/IP 327196955Ssbruno */ 328107178Snjl { "udp", &lock_class_mtx_sleep }, 32939215Sgibbs { "udpinp", &lock_class_mtx_sleep }, 330107178Snjl { "so_snd", &lock_class_mtx_sleep }, 33149935Sgibbs { NULL, NULL }, 33249935Sgibbs /* 33349935Sgibbs * TCP/IP 33449935Sgibbs */ 33549935Sgibbs { "tcp", &lock_class_mtx_sleep }, 336107178Snjl { "tcpinp", &lock_class_mtx_sleep }, 337107178Snjl { "so_snd", &lock_class_mtx_sleep }, 33863290Smjacob { NULL, NULL }, 339107178Snjl /* 34063290Smjacob * SLIP 341107178Snjl */ 34263290Smjacob { "slip_mtx", &lock_class_mtx_sleep }, 343107178Snjl { "slip sc_mtx", &lock_class_mtx_sleep }, 344107178Snjl { NULL, NULL }, 345107178Snjl /* 346107178Snjl * netatalk 347107178Snjl */ 348107178Snjl { "ddp_list_mtx", &lock_class_mtx_sleep }, 34944498Sgibbs { "ddp_mtx", &lock_class_mtx_sleep }, 350107178Snjl { NULL, NULL }, 351107178Snjl /* 352107178Snjl * BPF 353107178Snjl */ 354107178Snjl { "bpf global lock", &lock_class_mtx_sleep }, 355107178Snjl { "bpf interface lock", &lock_class_mtx_sleep }, 356107178Snjl { "bpf cdev lock", &lock_class_mtx_sleep }, 35739215Sgibbs { NULL, NULL }, 35839215Sgibbs /* 359107178Snjl * NFS server 360107178Snjl */ 361107178Snjl { "nfsd_mtx", &lock_class_mtx_sleep }, 362107178Snjl { "so_snd", &lock_class_mtx_sleep }, 363107178Snjl { NULL, NULL }, 364107178Snjl /* 365107178Snjl * CDEV 366107178Snjl */ 367107178Snjl { "system map", &lock_class_mtx_sleep }, 368255120Smav { "vm page queue mutex", &lock_class_mtx_sleep }, 369107178Snjl { "vnode interlock", &lock_class_mtx_sleep }, 370107178Snjl { "cdev", &lock_class_mtx_sleep }, 371107178Snjl { NULL, NULL }, 372107178Snjl /* 373107178Snjl * spin locks 374107178Snjl */ 375107178Snjl#ifdef SMP 376107178Snjl { "ap boot", &lock_class_mtx_spin }, 377107178Snjl#endif 378107178Snjl { "rm.mutex_mtx", &lock_class_mtx_spin }, 379107178Snjl { "sio", &lock_class_mtx_spin }, 380107178Snjl#ifdef __i386__ 381107178Snjl { "cy", &lock_class_mtx_spin }, 382107178Snjl#endif 383107178Snjl { "scc_hwmtx", &lock_class_mtx_spin }, 384107178Snjl { "uart_hwmtx", &lock_class_mtx_spin }, 385255120Smav { "zstty", &lock_class_mtx_spin }, 386107178Snjl { "ng_node", &lock_class_mtx_spin }, 387107178Snjl { "ng_worklist", &lock_class_mtx_spin }, 388107178Snjl { "fast_taskqueue", &lock_class_mtx_spin }, 389107178Snjl { "intr table", &lock_class_mtx_spin }, 390237601Sken { "sleepq chain", &lock_class_mtx_spin }, 391107178Snjl { "sched lock", &lock_class_mtx_spin }, 392107178Snjl { "turnstile chain", &lock_class_mtx_spin }, 393107178Snjl { "td_contested", &lock_class_mtx_spin }, 394107178Snjl { "callout", &lock_class_mtx_spin }, 395107178Snjl { "entropy harvest mutex", &lock_class_mtx_spin }, 396107178Snjl { "syscons video lock", &lock_class_mtx_spin }, 39739215Sgibbs /* 398107178Snjl * leaf locks 39939215Sgibbs */ 400107178Snjl { "allpmaps", &lock_class_mtx_spin }, 401107178Snjl { "vm page queue free mutex", &lock_class_mtx_spin }, 402107178Snjl { "icu", &lock_class_mtx_spin }, 40339215Sgibbs#ifdef SMP 404107178Snjl { "smp rendezvous", &lock_class_mtx_spin }, 405107178Snjl#if defined(__i386__) || defined(__amd64__) 406107178Snjl { "tlb", &lock_class_mtx_spin }, 40739215Sgibbs#endif 408107178Snjl#ifdef __sparc64__ 409107178Snjl { "ipi", &lock_class_mtx_spin }, 410107178Snjl { "rtc_mtx", &lock_class_mtx_spin }, 411107178Snjl#endif 412107178Snjl#endif 413107178Snjl { "clk", &lock_class_mtx_spin }, 414107178Snjl { "mutex profiling lock", &lock_class_mtx_spin }, 415107178Snjl { "kse zombie lock", &lock_class_mtx_spin }, 416107178Snjl { "ALD Queue", &lock_class_mtx_spin }, 417107178Snjl#ifdef __ia64__ 418107178Snjl { "MCA spin lock", &lock_class_mtx_spin }, 419107178Snjl#endif 420107178Snjl#if defined(__i386__) || defined(__amd64__) 421107178Snjl { "pcicfg", &lock_class_mtx_spin }, 42244498Sgibbs { "NDIS thread lock", &lock_class_mtx_spin }, 423162704Smjacob#endif 424107178Snjl { "tw_osl_io_lock", &lock_class_mtx_spin }, 42539215Sgibbs { "tw_osl_q_lock", &lock_class_mtx_spin }, 426107178Snjl { "tw_cl_io_lock", &lock_class_mtx_spin }, 427107178Snjl { "tw_cl_intr_lock", &lock_class_mtx_spin }, 428107178Snjl { "tw_cl_gen_lock", &lock_class_mtx_spin }, 429107178Snjl { NULL, NULL }, 430107178Snjl { NULL, NULL } 431107178Snjl}; 43244498Sgibbs 433107178Snjl#ifdef BLESSING 434107178Snjl/* 435107178Snjl * Pairs of locks which have been blessed 436107178Snjl * Don't complain about order problems with blessed locks 437107178Snjl */ 438107178Snjlstatic struct witness_blessed blessed_list[] = { 43939215Sgibbs}; 44039215Sgibbsstatic int blessed_count = 441107178Snjl sizeof(blessed_list) / sizeof(struct witness_blessed); 442162704Smjacob#endif 443107178Snjl 444107178Snjl/* 44539215Sgibbs * List of locks initialized prior to witness being initialized whose 446107178Snjl * enrollment is currently deferred. 447107178Snjl */ 448107178SnjlSTAILQ_HEAD(, lock_object) pending_locks = 449107178Snjl STAILQ_HEAD_INITIALIZER(pending_locks); 450107178Snjl 451107178Snjl/* 452107178Snjl * This global is set to 0 once it becomes safe to use the witness code. 453107178Snjl */ 454107178Snjlstatic int witness_cold = 1; 455107178Snjl 456107178Snjl/* 457107178Snjl * This global is set to 1 once the static lock orders have been enrolled 45839215Sgibbs * so that a warning can be issued for any spin locks enrolled later. 459107178Snjl */ 460107178Snjlstatic int witness_spin_warn = 0; 461107178Snjl 462107178Snjl/* 463107178Snjl * The WITNESS-enabled diagnostic code. Note that the witness code does 464107178Snjl * assume that the early boot is single-threaded at least until after this 465107178Snjl * routine is completed. 466162704Smjacob */ 467107178Snjlstatic void 468107178Snjlwitness_initialize(void *dummy __unused) 469107178Snjl{ 470107178Snjl struct lock_object *lock; 471107178Snjl struct witness_order_list_entry *order; 472107178Snjl struct witness *w, *w1; 473107178Snjl int i; 474107178Snjl 475162704Smjacob /* 476107178Snjl * We have to release Giant before initializing its witness 477107178Snjl * structure so that WITNESS doesn't get confused. 478107178Snjl */ 479107178Snjl mtx_unlock(&Giant); 480162704Smjacob mtx_assert(&Giant, MA_NOTOWNED); 48139215Sgibbs 48239215Sgibbs CTR1(KTR_WITNESS, "%s: initializing witness", __func__); 483162704Smjacob mtx_init(&w_mtx, "witness lock", NULL, MTX_SPIN | MTX_QUIET | 484162704Smjacob MTX_NOWITNESS | MTX_NOPROFILE); 485162704Smjacob for (i = 0; i < WITNESS_COUNT; i++) 486162704Smjacob witness_free(&w_data[i]); 487162704Smjacob for (i = 0; i < WITNESS_CHILDCOUNT; i++) 488107178Snjl witness_child_free(&w_childdata[i]); 489107178Snjl for (i = 0; i < LOCK_CHILDCOUNT; i++) 490107178Snjl witness_lock_list_free(&w_locklistdata[i]); 49139215Sgibbs 492107178Snjl /* First add in all the specified order lists. */ 493107178Snjl for (order = order_lists; order->w_name != NULL; order++) { 494107178Snjl w = enroll(order->w_name, order->w_class); 495107178Snjl if (w == NULL) 496107178Snjl continue; 497107178Snjl w->w_file = "order list"; 498237601Sken for (order++; order->w_name != NULL; order++) { 499237601Sken w1 = enroll(order->w_name, order->w_class); 500107178Snjl if (w1 == NULL) 501107178Snjl continue; 502107178Snjl w1->w_file = "order list"; 503107178Snjl if (!itismychild(w, w1)) 504107178Snjl panic("Not enough memory for static orders!"); 505107178Snjl w = w1; 50639215Sgibbs } 50739215Sgibbs } 508107178Snjl witness_spin_warn = 1; 509107178Snjl 510109345Snjl /* Iterate through all locks and add them to witness. */ 511109345Snjl while (!STAILQ_EMPTY(&pending_locks)) { 512109345Snjl lock = STAILQ_FIRST(&pending_locks); 513109345Snjl STAILQ_REMOVE_HEAD(&pending_locks, lo_list); 514107178Snjl KASSERT(lock->lo_flags & LO_WITNESS, 51539215Sgibbs ("%s: lock %s is on pending list but not LO_WITNESS", 51639215Sgibbs __func__, lock->lo_name)); 517107178Snjl lock->lo_witness = enroll(lock->lo_type, LOCK_CLASS(lock)); 518107178Snjl } 519107178Snjl 520107178Snjl /* Mark the witness code as being ready for use. */ 52139215Sgibbs witness_cold = 0; 522107178Snjl 52339215Sgibbs mtx_lock(&Giant); 524107178Snjl} 525107178SnjlSYSINIT(witness_init, SI_SUB_WITNESS, SI_ORDER_FIRST, witness_initialize, NULL) 526107178Snjl 527107178Snjlstatic int 528107178Snjlsysctl_debug_witness_watch(SYSCTL_HANDLER_ARGS) 529107178Snjl{ 530107178Snjl int error, value; 531107178Snjl 532107178Snjl value = witness_watch; 53339215Sgibbs error = sysctl_handle_int(oidp, &value, 0, req); 53439215Sgibbs if (error != 0 || req->newptr == NULL) 53539215Sgibbs return (error); 536107178Snjl /* 53739215Sgibbs * XXXRW: Why a priv check here? 538107178Snjl */ 53939215Sgibbs error = priv_check(req->td, PRIV_WITNESS); 540107178Snjl if (error != 0) 541162704Smjacob return (error); 54239215Sgibbs if (value == witness_watch) 543107178Snjl return (0); 544107178Snjl if (value != 0) 545107178Snjl return (EINVAL); 546107178Snjl witness_watch = 0; 54739215Sgibbs return (0); 548107178Snjl} 549107178Snjl 550107178Snjlvoid 551107178Snjlwitness_init(struct lock_object *lock) 552107178Snjl{ 55339215Sgibbs struct lock_class *class; 554107178Snjl 555107178Snjl /* Various sanity checks. */ 556107178Snjl class = LOCK_CLASS(lock); 557107178Snjl if ((lock->lo_flags & LO_RECURSABLE) != 0 && 558107178Snjl (class->lc_flags & LC_RECURSABLE) == 0) 559107178Snjl panic("%s: lock (%s) %s can not be recursable", __func__, 560107178Snjl class->lc_name, lock->lo_name); 561107178Snjl if ((lock->lo_flags & LO_SLEEPABLE) != 0 && 562107178Snjl (class->lc_flags & LC_SLEEPABLE) == 0) 563107178Snjl panic("%s: lock (%s) %s can not be sleepable", __func__, 564107178Snjl class->lc_name, lock->lo_name); 565107178Snjl if ((lock->lo_flags & LO_UPGRADABLE) != 0 && 566107178Snjl (class->lc_flags & LC_UPGRADABLE) == 0) 567107178Snjl panic("%s: lock (%s) %s can not be upgradable", __func__, 568107178Snjl class->lc_name, lock->lo_name); 569107178Snjl 570107178Snjl /* 571107178Snjl * If we shouldn't watch this lock, then just clear lo_witness. 572107178Snjl * Otherwise, if witness_cold is set, then it is too early to 573107178Snjl * enroll this lock, so defer it to witness_initialize() by adding 574107178Snjl * it to the pending_locks list. If it is not too early, then enroll 575107178Snjl * the lock now. 576107178Snjl */ 577107178Snjl if (witness_watch == 0 || panicstr != NULL || 578107178Snjl (lock->lo_flags & LO_WITNESS) == 0) 579107178Snjl lock->lo_witness = NULL; 580107178Snjl else if (witness_cold) { 581107178Snjl STAILQ_INSERT_TAIL(&pending_locks, lock, lo_list); 582107178Snjl lock->lo_flags |= LO_ENROLLPEND; 583107178Snjl } else 584107178Snjl lock->lo_witness = enroll(lock->lo_type, class); 585107178Snjl} 586107178Snjl 587107178Snjlvoid 588107178Snjlwitness_destroy(struct lock_object *lock) 589107178Snjl{ 590107178Snjl struct lock_class *class; 591107178Snjl struct witness *w; 592107178Snjl 593162704Smjacob class = LOCK_CLASS(lock); 594107178Snjl if (witness_cold) 595107178Snjl panic("lock (%s) %s destroyed while witness_cold", 596255120Smav class->lc_name, lock->lo_name); 597107178Snjl 598107178Snjl /* XXX: need to verify that no one holds the lock */ 599107178Snjl if ((lock->lo_flags & (LO_WITNESS | LO_ENROLLPEND)) == LO_WITNESS && 600107178Snjl lock->lo_witness != NULL) { 601107178Snjl w = lock->lo_witness; 602107178Snjl mtx_lock_spin(&w_mtx); 603107178Snjl MPASS(w->w_refcount > 0); 604107178Snjl w->w_refcount--; 605107178Snjl 60639215Sgibbs /* 607107178Snjl * Lock is already released if we have an allocation failure 60839215Sgibbs * and depart() fails. 609107178Snjl */ 610107178Snjl if (w->w_refcount != 0 || depart(w)) 611107178Snjl mtx_unlock_spin(&w_mtx); 612107178Snjl } 613107178Snjl 614107178Snjl /* 615107178Snjl * If this lock is destroyed before witness is up and running, 616107178Snjl * remove it from the pending list. 617107178Snjl */ 618107178Snjl if (lock->lo_flags & LO_ENROLLPEND) { 619107178Snjl STAILQ_REMOVE(&pending_locks, lock, lock_object, lo_list); 620107178Snjl lock->lo_flags &= ~LO_ENROLLPEND; 621107178Snjl } 622107178Snjl} 623107178Snjl 624107178Snjl#ifdef DDB 625107178Snjlstatic void 626162704Smjacobwitness_levelall (void) 627107178Snjl{ 628162704Smjacob struct witness_list *list; 629107178Snjl struct witness *w, *w1; 630107178Snjl 631107178Snjl /* 632107178Snjl * First clear all levels. 633107178Snjl */ 634107178Snjl STAILQ_FOREACH(w, &w_all, w_list) { 635107178Snjl w->w_level = 0; 636107178Snjl } 637107178Snjl 638107178Snjl /* 639107178Snjl * Look for locks with no parent and level all their descendants. 640120428Ssimokawa */ 641120428Ssimokawa STAILQ_FOREACH(w, &w_all, w_list) { 642107178Snjl /* 643107178Snjl * This is just an optimization, technically we could get 644107178Snjl * away just walking the all list each time. 645107178Snjl */ 646107178Snjl if (w->w_class->lc_flags & LC_SLEEPLOCK) 647107178Snjl list = &w_sleep; 648225950Sken else 64939215Sgibbs list = &w_spin; 650107178Snjl STAILQ_FOREACH(w1, list, w_typelist) { 651107178Snjl if (isitmychild(w1, w)) 652107178Snjl goto skip; 65339215Sgibbs } 654237601Sken witness_leveldescendents(w, 0); 655107178Snjl skip: 656107178Snjl ; /* silence GCC 3.x */ 657107178Snjl } 658107178Snjl} 659107178Snjl 66039215Sgibbsstatic void 661107178Snjlwitness_leveldescendents(struct witness *parent, int level) 662107178Snjl{ 663107178Snjl struct witness_child_list_entry *wcl; 664107178Snjl int i; 665107178Snjl 666107178Snjl if (parent->w_level < level) 667162704Smjacob parent->w_level = level; 668107178Snjl level++; 669107178Snjl for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) 670107178Snjl for (i = 0; i < wcl->wcl_count; i++) 671107178Snjl witness_leveldescendents(wcl->wcl_children[i], level); 672107178Snjl} 673107178Snjl 674107178Snjlstatic void 675107178Snjlwitness_displaydescendants(void(*prnt)(const char *fmt, ...), 676107178Snjl struct witness *parent, int indent) 677107178Snjl{ 678107178Snjl struct witness_child_list_entry *wcl; 67939215Sgibbs int i, level; 680107178Snjl 681107178Snjl level = parent->w_level; 68239215Sgibbs prnt("%-2d", level); 683107178Snjl for (i = 0; i < indent; i++) 684107178Snjl prnt(" "); 685107178Snjl if (parent->w_refcount > 0) 686107178Snjl prnt("%s", parent->w_name); 687107178Snjl else 688162704Smjacob prnt("(dead)"); 689107178Snjl if (parent->w_displayed) { 690107178Snjl prnt(" -- (already displayed)\n"); 691162704Smjacob return; 692107178Snjl } 693107178Snjl parent->w_displayed = 1; 694162704Smjacob if (parent->w_refcount > 0) { 695107178Snjl if (parent->w_file != NULL) 696162704Smjacob prnt(" -- last acquired @ %s:%d", parent->w_file, 697162704Smjacob parent->w_line); 698162704Smjacob } 699162704Smjacob prnt("\n"); 700162704Smjacob for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) 701162704Smjacob for (i = 0; i < wcl->wcl_count; i++) 702162704Smjacob witness_displaydescendants(prnt, 703162704Smjacob wcl->wcl_children[i], indent + 1); 704162704Smjacob} 705162704Smjacob 70639215Sgibbsstatic void 707162704Smjacobwitness_display_list(void(*prnt)(const char *fmt, ...), 708162704Smjacob struct witness_list *list) 709162704Smjacob{ 710162704Smjacob struct witness *w; 711107178Snjl 712107178Snjl STAILQ_FOREACH(w, list, w_typelist) { 713107178Snjl if (w->w_file == NULL || w->w_level > 0) 714107178Snjl continue; 71539215Sgibbs /* 716107178Snjl * This lock has no anscestors, display its descendants. 717107178Snjl */ 718107178Snjl witness_displaydescendants(prnt, w, 0); 719107178Snjl } 720162704Smjacob} 721107178Snjl 722107178Snjlstatic void 723107178Snjlwitness_display(void(*prnt)(const char *fmt, ...)) 724107178Snjl{ 725107178Snjl struct witness *w; 726107178Snjl 727107178Snjl KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 728162704Smjacob witness_levelall(); 729107178Snjl 730107178Snjl /* Clear all the displayed flags. */ 731107178Snjl STAILQ_FOREACH(w, &w_all, w_list) { 732107178Snjl w->w_displayed = 0; 733107178Snjl } 734107178Snjl 735107178Snjl /* 736107178Snjl * First, handle sleep locks which have been acquired at least 737107178Snjl * once. 738107178Snjl */ 739120428Ssimokawa prnt("Sleep locks:\n"); 740120428Ssimokawa witness_display_list(prnt, &w_sleep); 741120428Ssimokawa 742120428Ssimokawa /* 743120428Ssimokawa * Now do spin locks which have been acquired at least once. 744120428Ssimokawa */ 745120428Ssimokawa prnt("\nSpin locks:\n"); 746120428Ssimokawa witness_display_list(prnt, &w_spin); 747107178Snjl 748107178Snjl /* 749107178Snjl * Finally, any locks which have not been acquired yet. 750107178Snjl */ 751107178Snjl prnt("\nLocks which were never acquired:\n"); 752107178Snjl STAILQ_FOREACH(w, &w_all, w_list) { 753107178Snjl if (w->w_file != NULL || w->w_refcount == 0) 754107178Snjl continue; 755107178Snjl prnt("%s\n", w->w_name); 756107178Snjl } 757107178Snjl} 758107178Snjl#endif /* DDB */ 759107178Snjl 760107178Snjl/* Trim useless garbage from filenames. */ 761107178Snjlstatic const char * 762107178Snjlfixup_filename(const char *file) 763107178Snjl{ 764162704Smjacob 765162704Smjacob if (file == NULL) 766162704Smjacob return (NULL); 767162704Smjacob while (strncmp(file, "../", 3) == 0) 768162704Smjacob file += 3; 76963185Smjacob return (file); 770107178Snjl} 771162704Smjacob 772107178Snjlint 77363185Smjacobwitness_defineorder(struct lock_object *lock1, struct lock_object *lock2) 774107178Snjl{ 775237601Sken 776107178Snjl if (witness_watch == 0 || panicstr != NULL) 777107178Snjl return (0); 77863185Smjacob 779107178Snjl /* Require locks that witness knows about. */ 780107178Snjl if (lock1 == NULL || lock1->lo_witness == NULL || lock2 == NULL || 781107178Snjl lock2->lo_witness == NULL) 782107178Snjl return (EINVAL); 783107178Snjl 784107178Snjl MPASS(!mtx_owned(&w_mtx)); 785107178Snjl mtx_lock_spin(&w_mtx); 786107178Snjl 787107178Snjl /* 788107178Snjl * If we already have either an explicit or implied lock order that 789107178Snjl * is the other way around, then return an error. 790107178Snjl */ 791107178Snjl if (isitmydescendant(lock2->lo_witness, lock1->lo_witness)) { 792107178Snjl mtx_unlock_spin(&w_mtx); 793107178Snjl return (EDOOFUS); 794107178Snjl } 795237601Sken 796107178Snjl /* Try to add the new order. */ 797107178Snjl CTR3(KTR_WITNESS, "%s: adding %s as a child of %s", __func__, 798107178Snjl lock2->lo_type, lock1->lo_type); 799107178Snjl if (!itismychild(lock1->lo_witness, lock2->lo_witness)) 800107178Snjl return (ENOMEM); 801107178Snjl mtx_unlock_spin(&w_mtx); 802107178Snjl return (0); 803107178Snjl} 804107178Snjl 805107178Snjlvoid 806237601Skenwitness_checkorder(struct lock_object *lock, int flags, const char *file, 807107178Snjl int line) 80839215Sgibbs{ 809107178Snjl struct lock_list_entry **lock_list, *lle; 810107178Snjl struct lock_instance *lock1, *lock2; 811107178Snjl struct lock_class *class; 812107178Snjl struct witness *w, *w1; 813107178Snjl struct thread *td; 814107178Snjl int i, j; 815107178Snjl 81639215Sgibbs if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL || 81739215Sgibbs panicstr != NULL) 818107178Snjl return; 819107178Snjl 820107178Snjl /* 821107178Snjl * Try locks do not block if they fail to acquire the lock, thus 822107178Snjl * there is no danger of deadlocks or of switching while holding a 82339215Sgibbs * spin lock if we acquire a lock via a try operation. This 82439215Sgibbs * function shouldn't even be called for try locks, so panic if 825107178Snjl * that happens. 826107178Snjl */ 827107178Snjl if (flags & LOP_TRYLOCK) 828107178Snjl panic("%s should not be called for try lock operations", 829107178Snjl __func__); 830107178Snjl 831107178Snjl w = lock->lo_witness; 832107178Snjl class = LOCK_CLASS(lock); 833107178Snjl td = curthread; 834107178Snjl file = fixup_filename(file); 835107178Snjl 836107178Snjl if (class->lc_flags & LC_SLEEPLOCK) { 837107178Snjl /* 838107178Snjl * Since spin locks include a critical section, this check 839107178Snjl * implicitly enforces a lock order of all sleep locks before 840107178Snjl * all spin locks. 841107178Snjl */ 842107178Snjl if (td->td_critnest != 0 && !kdb_active) 843107178Snjl panic("blockable sleep lock (%s) %s @ %s:%d", 844107178Snjl class->lc_name, lock->lo_name, file, line); 845107178Snjl 846107178Snjl /* 847107178Snjl * If this is the first lock acquired then just return as 848107178Snjl * no order checking is needed. 849162704Smjacob */ 850162704Smjacob if (td->td_sleeplocks == NULL) 851107178Snjl return; 852162704Smjacob lock_list = &td->td_sleeplocks; 853107178Snjl } else { 854107178Snjl /* 855107178Snjl * If this is the first lock, just return as no order 856107178Snjl * checking is needed. We check this in both if clauses 857107178Snjl * here as unifying the check would require us to use a 858107178Snjl * critical section to ensure we don't migrate while doing 859107178Snjl * the check. Note that if this is not the first lock, we 860107178Snjl * are already in a critical section and are safe for the 861107178Snjl * rest of the check. 862107178Snjl */ 863107178Snjl if (PCPU_GET(spinlocks) == NULL) 864107178Snjl return; 865107178Snjl lock_list = PCPU_PTR(spinlocks); 866107178Snjl } 867107178Snjl 868107178Snjl /* 869107178Snjl * Check to see if we are recursing on a lock we already own. If 870107178Snjl * so, make sure that we don't mismatch exclusive and shared lock 871107178Snjl * acquires. 872107178Snjl */ 873107178Snjl lock1 = find_instance(*lock_list, lock); 874107178Snjl if (lock1 != NULL) { 875107178Snjl if ((lock1->li_flags & LI_EXCLUSIVE) != 0 && 876107178Snjl (flags & LOP_EXCLUSIVE) == 0) { 877109345Snjl printf("shared lock of (%s) %s @ %s:%d\n", 878107178Snjl class->lc_name, lock->lo_name, file, line); 879107178Snjl printf("while exclusively locked from %s:%d\n", 880107178Snjl lock1->li_file, lock1->li_line); 881107178Snjl panic("share->excl"); 882107178Snjl } 883107178Snjl if ((lock1->li_flags & LI_EXCLUSIVE) == 0 && 884107178Snjl (flags & LOP_EXCLUSIVE) != 0) { 885157009Smjacob printf("exclusive lock of (%s) %s @ %s:%d\n", 886107178Snjl class->lc_name, lock->lo_name, file, line); 887107178Snjl printf("while share locked from %s:%d\n", 888107178Snjl lock1->li_file, lock1->li_line); 889107178Snjl panic("excl->share"); 890107178Snjl } 891107178Snjl return; 892107178Snjl } 893107178Snjl 894107178Snjl /* 895107178Snjl * Try locks do not block if they fail to acquire the lock, thus 896107178Snjl * there is no danger of deadlocks or of switching while holding a 897107178Snjl * spin lock if we acquire a lock via a try operation. 898107178Snjl */ 899107178Snjl if (flags & LOP_TRYLOCK) 900107178Snjl return; 901107178Snjl 902107178Snjl /* 903107178Snjl * Check for duplicate locks of the same type. Note that we only 904107178Snjl * have to check for this on the last lock we just acquired. Any 905107178Snjl * other cases will be caught as lock order violations. 906255120Smav */ 907107178Snjl lock1 = &(*lock_list)->ll_children[(*lock_list)->ll_count - 1]; 908107178Snjl w1 = lock1->li_lock->lo_witness; 909107178Snjl if (w1 == w) { 910107178Snjl if (w->w_same_squawked || (lock->lo_flags & LO_DUPOK) || 911107178Snjl (flags & LOP_DUPOK)) 912107178Snjl return; 913107178Snjl w->w_same_squawked = 1; 914107178Snjl printf("acquiring duplicate lock of same type: \"%s\"\n", 915107178Snjl lock->lo_type); 916107178Snjl printf(" 1st %s @ %s:%d\n", lock1->li_lock->lo_name, 917107178Snjl lock1->li_file, lock1->li_line); 918107178Snjl printf(" 2nd %s @ %s:%d\n", lock->lo_name, file, line); 919107178Snjl#ifdef KDB 920107178Snjl goto debugger; 921107178Snjl#else 922107178Snjl return; 923107178Snjl#endif 924107178Snjl } 925107178Snjl MPASS(!mtx_owned(&w_mtx)); 926107178Snjl mtx_lock_spin(&w_mtx); 927107178Snjl /* 928107178Snjl * If we know that the the lock we are acquiring comes after 929107178Snjl * the lock we most recently acquired in the lock order tree, 930107178Snjl * then there is no need for any further checks. 931107178Snjl */ 932107178Snjl if (isitmychild(w1, w)) { 933107178Snjl mtx_unlock_spin(&w_mtx); 934107178Snjl return; 935107178Snjl } 936107178Snjl for (j = 0, lle = *lock_list; lle != NULL; lle = lle->ll_next) { 937107178Snjl for (i = lle->ll_count - 1; i >= 0; i--, j++) { 938107178Snjl 939107178Snjl MPASS(j < WITNESS_COUNT); 94039215Sgibbs lock1 = &lle->ll_children[i]; 941107178Snjl w1 = lock1->li_lock->lo_witness; 94244498Sgibbs 943107178Snjl /* 944107178Snjl * If this lock doesn't undergo witness checking, 945107178Snjl * then skip it. 946107178Snjl */ 947107178Snjl if (w1 == NULL) { 948107178Snjl KASSERT((lock1->li_lock->lo_flags & LO_WITNESS) == 0, 949107178Snjl ("lock missing witness structure")); 950107178Snjl continue; 951107178Snjl } 95244498Sgibbs /* 95344498Sgibbs * If we are locking Giant and this is a sleepable 954107178Snjl * lock, then skip it. 95544498Sgibbs */ 956107178Snjl if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0 && 95739215Sgibbs lock == &Giant.mtx_object) 958107178Snjl continue; 959107178Snjl /* 96039215Sgibbs * If we are locking a sleepable lock and this lock 961107178Snjl * is Giant, then skip it. 962107178Snjl */ 96339215Sgibbs if ((lock->lo_flags & LO_SLEEPABLE) != 0 && 964107178Snjl lock1->li_lock == &Giant.mtx_object) 965107178Snjl continue; 966107178Snjl /* 967107178Snjl * If we are locking a sleepable lock and this lock 968107178Snjl * isn't sleepable, we want to treat it as a lock 969107178Snjl * order violation to enfore a general lock order of 970107178Snjl * sleepable locks before non-sleepable locks. 971107178Snjl */ 972107178Snjl if (((lock->lo_flags & LO_SLEEPABLE) != 0 && 973107178Snjl (lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0)) 974107178Snjl goto reversal; 975107178Snjl /* 97639215Sgibbs * If we are locking Giant and this is a non-sleepable 97739215Sgibbs * lock, then treat it as a reversal. 978107178Snjl */ 979107178Snjl if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0 && 980107178Snjl lock == &Giant.mtx_object) 981107178Snjl goto reversal; 982162704Smjacob /* 983107178Snjl * Check the lock order hierarchy for a reveresal. 984107178Snjl */ 985107178Snjl if (!isitmydescendant(w, w1)) 986107178Snjl continue; 987 reversal: 988 /* 989 * We have a lock order violation, check to see if it 990 * is allowed or has already been yelled about. 991 */ 992 mtx_unlock_spin(&w_mtx); 993#ifdef BLESSING 994 /* 995 * If the lock order is blessed, just bail. We don't 996 * look for other lock order violations though, which 997 * may be a bug. 998 */ 999 if (blessed(w, w1)) 1000 return; 1001#endif 1002 if (lock1->li_lock == &Giant.mtx_object) { 1003 if (w1->w_Giant_squawked) 1004 return; 1005 else 1006 w1->w_Giant_squawked = 1; 1007 } else { 1008 if (w1->w_other_squawked) 1009 return; 1010 else 1011 w1->w_other_squawked = 1; 1012 } 1013 /* 1014 * Ok, yell about it. 1015 */ 1016 if (((lock->lo_flags & LO_SLEEPABLE) != 0 && 1017 (lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0)) 1018 printf( 1019 "lock order reversal: (sleepable after non-sleepable)\n"); 1020 else if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0 1021 && lock == &Giant.mtx_object) 1022 printf( 1023 "lock order reversal: (Giant after non-sleepable)\n"); 1024 else 1025 printf("lock order reversal:\n"); 1026 /* 1027 * Try to locate an earlier lock with 1028 * witness w in our list. 1029 */ 1030 do { 1031 lock2 = &lle->ll_children[i]; 1032 MPASS(lock2->li_lock != NULL); 1033 if (lock2->li_lock->lo_witness == w) 1034 break; 1035 if (i == 0 && lle->ll_next != NULL) { 1036 lle = lle->ll_next; 1037 i = lle->ll_count - 1; 1038 MPASS(i >= 0 && i < LOCK_NCHILDREN); 1039 } else 1040 i--; 1041 } while (i >= 0); 1042 if (i < 0) { 1043 printf(" 1st %p %s (%s) @ %s:%d\n", 1044 lock1->li_lock, lock1->li_lock->lo_name, 1045 lock1->li_lock->lo_type, lock1->li_file, 1046 lock1->li_line); 1047 printf(" 2nd %p %s (%s) @ %s:%d\n", lock, 1048 lock->lo_name, lock->lo_type, file, line); 1049 } else { 1050 printf(" 1st %p %s (%s) @ %s:%d\n", 1051 lock2->li_lock, lock2->li_lock->lo_name, 1052 lock2->li_lock->lo_type, lock2->li_file, 1053 lock2->li_line); 1054 printf(" 2nd %p %s (%s) @ %s:%d\n", 1055 lock1->li_lock, lock1->li_lock->lo_name, 1056 lock1->li_lock->lo_type, lock1->li_file, 1057 lock1->li_line); 1058 printf(" 3rd %p %s (%s) @ %s:%d\n", lock, 1059 lock->lo_name, lock->lo_type, file, line); 1060 } 1061#ifdef KDB 1062 goto debugger; 1063#else 1064 return; 1065#endif 1066 } 1067 } 1068 lock1 = &(*lock_list)->ll_children[(*lock_list)->ll_count - 1]; 1069 /* 1070 * If requested, build a new lock order. However, don't build a new 1071 * relationship between a sleepable lock and Giant if it is in the 1072 * wrong direction. The correct lock order is that sleepable locks 1073 * always come before Giant. 1074 */ 1075 if (flags & LOP_NEWORDER && 1076 !(lock1->li_lock == &Giant.mtx_object && 1077 (lock->lo_flags & LO_SLEEPABLE) != 0)) { 1078 CTR3(KTR_WITNESS, "%s: adding %s as a child of %s", __func__, 1079 lock->lo_type, lock1->li_lock->lo_type); 1080 if (!itismychild(lock1->li_lock->lo_witness, w)) 1081 /* Witness is dead. */ 1082 return; 1083 } 1084 mtx_unlock_spin(&w_mtx); 1085 return; 1086 1087#ifdef KDB 1088debugger: 1089 if (witness_trace) 1090 kdb_backtrace(); 1091 if (witness_kdb) 1092 kdb_enter(__func__); 1093#endif 1094} 1095 1096void 1097witness_lock(struct lock_object *lock, int flags, const char *file, int line) 1098{ 1099 struct lock_list_entry **lock_list, *lle; 1100 struct lock_instance *instance; 1101 struct witness *w; 1102 struct thread *td; 1103 1104 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL || 1105 panicstr != NULL) 1106 return; 1107 w = lock->lo_witness; 1108 td = curthread; 1109 file = fixup_filename(file); 1110 1111 /* Determine lock list for this lock. */ 1112 if (LOCK_CLASS(lock)->lc_flags & LC_SLEEPLOCK) 1113 lock_list = &td->td_sleeplocks; 1114 else 1115 lock_list = PCPU_PTR(spinlocks); 1116 1117 /* Check to see if we are recursing on a lock we already own. */ 1118 instance = find_instance(*lock_list, lock); 1119 if (instance != NULL) { 1120 instance->li_flags++; 1121 CTR4(KTR_WITNESS, "%s: pid %d recursed on %s r=%d", __func__, 1122 td->td_proc->p_pid, lock->lo_name, 1123 instance->li_flags & LI_RECURSEMASK); 1124 instance->li_file = file; 1125 instance->li_line = line; 1126 return; 1127 } 1128 1129 /* Update per-witness last file and line acquire. */ 1130 w->w_file = file; 1131 w->w_line = line; 1132 1133 /* Find the next open lock instance in the list and fill it. */ 1134 lle = *lock_list; 1135 if (lle == NULL || lle->ll_count == LOCK_NCHILDREN) { 1136 lle = witness_lock_list_get(); 1137 if (lle == NULL) 1138 return; 1139 lle->ll_next = *lock_list; 1140 CTR3(KTR_WITNESS, "%s: pid %d added lle %p", __func__, 1141 td->td_proc->p_pid, lle); 1142 *lock_list = lle; 1143 } 1144 instance = &lle->ll_children[lle->ll_count++]; 1145 instance->li_lock = lock; 1146 instance->li_line = line; 1147 instance->li_file = file; 1148 if ((flags & LOP_EXCLUSIVE) != 0) 1149 instance->li_flags = LI_EXCLUSIVE; 1150 else 1151 instance->li_flags = 0; 1152 CTR4(KTR_WITNESS, "%s: pid %d added %s as lle[%d]", __func__, 1153 td->td_proc->p_pid, lock->lo_name, lle->ll_count - 1); 1154} 1155 1156void 1157witness_upgrade(struct lock_object *lock, int flags, const char *file, int line) 1158{ 1159 struct lock_instance *instance; 1160 struct lock_class *class; 1161 1162 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1163 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1164 return; 1165 class = LOCK_CLASS(lock); 1166 file = fixup_filename(file); 1167 if ((lock->lo_flags & LO_UPGRADABLE) == 0) 1168 panic("upgrade of non-upgradable lock (%s) %s @ %s:%d", 1169 class->lc_name, lock->lo_name, file, line); 1170 if ((flags & LOP_TRYLOCK) == 0) 1171 panic("non-try upgrade of lock (%s) %s @ %s:%d", class->lc_name, 1172 lock->lo_name, file, line); 1173 if ((class->lc_flags & LC_SLEEPLOCK) == 0) 1174 panic("upgrade of non-sleep lock (%s) %s @ %s:%d", 1175 class->lc_name, lock->lo_name, file, line); 1176 instance = find_instance(curthread->td_sleeplocks, lock); 1177 if (instance == NULL) 1178 panic("upgrade of unlocked lock (%s) %s @ %s:%d", 1179 class->lc_name, lock->lo_name, file, line); 1180 if ((instance->li_flags & LI_EXCLUSIVE) != 0) 1181 panic("upgrade of exclusive lock (%s) %s @ %s:%d", 1182 class->lc_name, lock->lo_name, file, line); 1183 if ((instance->li_flags & LI_RECURSEMASK) != 0) 1184 panic("upgrade of recursed lock (%s) %s r=%d @ %s:%d", 1185 class->lc_name, lock->lo_name, 1186 instance->li_flags & LI_RECURSEMASK, file, line); 1187 instance->li_flags |= LI_EXCLUSIVE; 1188} 1189 1190void 1191witness_downgrade(struct lock_object *lock, int flags, const char *file, 1192 int line) 1193{ 1194 struct lock_instance *instance; 1195 struct lock_class *class; 1196 1197 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1198 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1199 return; 1200 class = LOCK_CLASS(lock); 1201 file = fixup_filename(file); 1202 if ((lock->lo_flags & LO_UPGRADABLE) == 0) 1203 panic("downgrade of non-upgradable lock (%s) %s @ %s:%d", 1204 class->lc_name, lock->lo_name, file, line); 1205 if ((class->lc_flags & LC_SLEEPLOCK) == 0) 1206 panic("downgrade of non-sleep lock (%s) %s @ %s:%d", 1207 class->lc_name, lock->lo_name, file, line); 1208 instance = find_instance(curthread->td_sleeplocks, lock); 1209 if (instance == NULL) 1210 panic("downgrade of unlocked lock (%s) %s @ %s:%d", 1211 class->lc_name, lock->lo_name, file, line); 1212 if ((instance->li_flags & LI_EXCLUSIVE) == 0) 1213 panic("downgrade of shared lock (%s) %s @ %s:%d", 1214 class->lc_name, lock->lo_name, file, line); 1215 if ((instance->li_flags & LI_RECURSEMASK) != 0) 1216 panic("downgrade of recursed lock (%s) %s r=%d @ %s:%d", 1217 class->lc_name, lock->lo_name, 1218 instance->li_flags & LI_RECURSEMASK, file, line); 1219 instance->li_flags &= ~LI_EXCLUSIVE; 1220} 1221 1222void 1223witness_unlock(struct lock_object *lock, int flags, const char *file, int line) 1224{ 1225 struct lock_list_entry **lock_list, *lle; 1226 struct lock_instance *instance; 1227 struct lock_class *class; 1228 struct thread *td; 1229 register_t s; 1230 int i, j; 1231 1232 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL || 1233 panicstr != NULL) 1234 return; 1235 td = curthread; 1236 class = LOCK_CLASS(lock); 1237 file = fixup_filename(file); 1238 1239 /* Find lock instance associated with this lock. */ 1240 if (class->lc_flags & LC_SLEEPLOCK) 1241 lock_list = &td->td_sleeplocks; 1242 else 1243 lock_list = PCPU_PTR(spinlocks); 1244 for (; *lock_list != NULL; lock_list = &(*lock_list)->ll_next) 1245 for (i = 0; i < (*lock_list)->ll_count; i++) { 1246 instance = &(*lock_list)->ll_children[i]; 1247 if (instance->li_lock == lock) 1248 goto found; 1249 } 1250 panic("lock (%s) %s not locked @ %s:%d", class->lc_name, lock->lo_name, 1251 file, line); 1252found: 1253 1254 /* First, check for shared/exclusive mismatches. */ 1255 if ((instance->li_flags & LI_EXCLUSIVE) != 0 && 1256 (flags & LOP_EXCLUSIVE) == 0) { 1257 printf("shared unlock of (%s) %s @ %s:%d\n", class->lc_name, 1258 lock->lo_name, file, line); 1259 printf("while exclusively locked from %s:%d\n", 1260 instance->li_file, instance->li_line); 1261 panic("excl->ushare"); 1262 } 1263 if ((instance->li_flags & LI_EXCLUSIVE) == 0 && 1264 (flags & LOP_EXCLUSIVE) != 0) { 1265 printf("exclusive unlock of (%s) %s @ %s:%d\n", class->lc_name, 1266 lock->lo_name, file, line); 1267 printf("while share locked from %s:%d\n", instance->li_file, 1268 instance->li_line); 1269 panic("share->uexcl"); 1270 } 1271 1272 /* If we are recursed, unrecurse. */ 1273 if ((instance->li_flags & LI_RECURSEMASK) > 0) { 1274 CTR4(KTR_WITNESS, "%s: pid %d unrecursed on %s r=%d", __func__, 1275 td->td_proc->p_pid, instance->li_lock->lo_name, 1276 instance->li_flags); 1277 instance->li_flags--; 1278 return; 1279 } 1280 1281 /* Otherwise, remove this item from the list. */ 1282 s = intr_disable(); 1283 CTR4(KTR_WITNESS, "%s: pid %d removed %s from lle[%d]", __func__, 1284 td->td_proc->p_pid, instance->li_lock->lo_name, 1285 (*lock_list)->ll_count - 1); 1286 for (j = i; j < (*lock_list)->ll_count - 1; j++) 1287 (*lock_list)->ll_children[j] = 1288 (*lock_list)->ll_children[j + 1]; 1289 (*lock_list)->ll_count--; 1290 intr_restore(s); 1291 1292 /* If this lock list entry is now empty, free it. */ 1293 if ((*lock_list)->ll_count == 0) { 1294 lle = *lock_list; 1295 *lock_list = lle->ll_next; 1296 CTR3(KTR_WITNESS, "%s: pid %d removed lle %p", __func__, 1297 td->td_proc->p_pid, lle); 1298 witness_lock_list_free(lle); 1299 } 1300} 1301 1302/* 1303 * Warn if any locks other than 'lock' are held. Flags can be passed in to 1304 * exempt Giant and sleepable locks from the checks as well. If any 1305 * non-exempt locks are held, then a supplied message is printed to the 1306 * console along with a list of the offending locks. If indicated in the 1307 * flags then a failure results in a panic as well. 1308 */ 1309int 1310witness_warn(int flags, struct lock_object *lock, const char *fmt, ...) 1311{ 1312 struct lock_list_entry *lle; 1313 struct lock_instance *lock1; 1314 struct thread *td; 1315 va_list ap; 1316 int i, n; 1317 1318 if (witness_cold || witness_watch == 0 || panicstr != NULL) 1319 return (0); 1320 n = 0; 1321 td = curthread; 1322 for (lle = td->td_sleeplocks; lle != NULL; lle = lle->ll_next) 1323 for (i = lle->ll_count - 1; i >= 0; i--) { 1324 lock1 = &lle->ll_children[i]; 1325 if (lock1->li_lock == lock) 1326 continue; 1327 if (flags & WARN_GIANTOK && 1328 lock1->li_lock == &Giant.mtx_object) 1329 continue; 1330 if (flags & WARN_SLEEPOK && 1331 (lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0) 1332 continue; 1333 if (n == 0) { 1334 va_start(ap, fmt); 1335 vprintf(fmt, ap); 1336 va_end(ap); 1337 printf(" with the following"); 1338 if (flags & WARN_SLEEPOK) 1339 printf(" non-sleepable"); 1340 printf(" locks held:\n"); 1341 } 1342 n++; 1343 witness_list_lock(lock1); 1344 } 1345 if (PCPU_GET(spinlocks) != NULL) { 1346 /* 1347 * Since we already hold a spinlock preemption is 1348 * already blocked. 1349 */ 1350 if (n == 0) { 1351 va_start(ap, fmt); 1352 vprintf(fmt, ap); 1353 va_end(ap); 1354 printf(" with the following"); 1355 if (flags & WARN_SLEEPOK) 1356 printf(" non-sleepable"); 1357 printf(" locks held:\n"); 1358 } 1359 n += witness_list_locks(PCPU_PTR(spinlocks)); 1360 } 1361 if (flags & WARN_PANIC && n) 1362 panic("witness_warn"); 1363#ifdef KDB 1364 else if (witness_kdb && n) 1365 kdb_enter(__func__); 1366 else if (witness_trace && n) 1367 kdb_backtrace(); 1368#endif 1369 return (n); 1370} 1371 1372const char * 1373witness_file(struct lock_object *lock) 1374{ 1375 struct witness *w; 1376 1377 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL) 1378 return ("?"); 1379 w = lock->lo_witness; 1380 return (w->w_file); 1381} 1382 1383int 1384witness_line(struct lock_object *lock) 1385{ 1386 struct witness *w; 1387 1388 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL) 1389 return (0); 1390 w = lock->lo_witness; 1391 return (w->w_line); 1392} 1393 1394static struct witness * 1395enroll(const char *description, struct lock_class *lock_class) 1396{ 1397 struct witness *w; 1398 1399 if (witness_watch == 0 || panicstr != NULL) 1400 return (NULL); 1401 if ((lock_class->lc_flags & LC_SPINLOCK) && witness_skipspin) 1402 return (NULL); 1403 mtx_lock_spin(&w_mtx); 1404 STAILQ_FOREACH(w, &w_all, w_list) { 1405 if (w->w_name == description || (w->w_refcount > 0 && 1406 strcmp(description, w->w_name) == 0)) { 1407 w->w_refcount++; 1408 mtx_unlock_spin(&w_mtx); 1409 if (lock_class != w->w_class) 1410 panic( 1411 "lock (%s) %s does not match earlier (%s) lock", 1412 description, lock_class->lc_name, 1413 w->w_class->lc_name); 1414 return (w); 1415 } 1416 } 1417 if ((w = witness_get()) == NULL) 1418 goto out; 1419 w->w_name = description; 1420 w->w_class = lock_class; 1421 w->w_refcount = 1; 1422 STAILQ_INSERT_HEAD(&w_all, w, w_list); 1423 if (lock_class->lc_flags & LC_SPINLOCK) { 1424 STAILQ_INSERT_HEAD(&w_spin, w, w_typelist); 1425 w_spin_cnt++; 1426 } else if (lock_class->lc_flags & LC_SLEEPLOCK) { 1427 STAILQ_INSERT_HEAD(&w_sleep, w, w_typelist); 1428 w_sleep_cnt++; 1429 } else { 1430 mtx_unlock_spin(&w_mtx); 1431 panic("lock class %s is not sleep or spin", 1432 lock_class->lc_name); 1433 } 1434 mtx_unlock_spin(&w_mtx); 1435out: 1436 /* 1437 * We issue a warning for any spin locks not defined in the static 1438 * order list as a way to discourage their use (folks should really 1439 * be using non-spin mutexes most of the time). However, several 1440 * 3rd part device drivers use spin locks because that is all they 1441 * have available on Windows and Linux and they think that normal 1442 * mutexes are insufficient. 1443 */ 1444 if ((lock_class->lc_flags & LC_SPINLOCK) && witness_spin_warn) 1445 printf("WITNESS: spin lock %s not in order list\n", 1446 description); 1447 return (w); 1448} 1449 1450/* Don't let the door bang you on the way out... */ 1451static int 1452depart(struct witness *w) 1453{ 1454 struct witness_child_list_entry *wcl, *nwcl; 1455 struct witness_list *list; 1456 struct witness *parent; 1457 1458 MPASS(w->w_refcount == 0); 1459 if (w->w_class->lc_flags & LC_SLEEPLOCK) { 1460 list = &w_sleep; 1461 w_sleep_cnt--; 1462 } else { 1463 list = &w_spin; 1464 w_spin_cnt--; 1465 } 1466 /* 1467 * First, we run through the entire tree looking for any 1468 * witnesses that the outgoing witness is a child of. For 1469 * each parent that we find, we reparent all the direct 1470 * children of the outgoing witness to its parent. 1471 */ 1472 STAILQ_FOREACH(parent, list, w_typelist) { 1473 if (!isitmychild(parent, w)) 1474 continue; 1475 removechild(parent, w); 1476 } 1477 1478 /* 1479 * Now we go through and free up the child list of the 1480 * outgoing witness. 1481 */ 1482 for (wcl = w->w_children; wcl != NULL; wcl = nwcl) { 1483 nwcl = wcl->wcl_next; 1484 w_child_cnt--; 1485 witness_child_free(wcl); 1486 } 1487 1488 /* 1489 * Detach from various lists and free. 1490 */ 1491 STAILQ_REMOVE(list, w, witness, w_typelist); 1492 STAILQ_REMOVE(&w_all, w, witness, w_list); 1493 witness_free(w); 1494 1495 return (1); 1496} 1497 1498/* 1499 * Add "child" as a direct child of "parent". Returns false if 1500 * we fail due to out of memory. 1501 */ 1502static int 1503insertchild(struct witness *parent, struct witness *child) 1504{ 1505 struct witness_child_list_entry **wcl; 1506 1507 MPASS(child != NULL && parent != NULL); 1508 1509 /* 1510 * Insert "child" after "parent" 1511 */ 1512 wcl = &parent->w_children; 1513 while (*wcl != NULL && (*wcl)->wcl_count == WITNESS_NCHILDREN) 1514 wcl = &(*wcl)->wcl_next; 1515 if (*wcl == NULL) { 1516 *wcl = witness_child_get(); 1517 if (*wcl == NULL) 1518 return (0); 1519 w_child_cnt++; 1520 } 1521 (*wcl)->wcl_children[(*wcl)->wcl_count++] = child; 1522 1523 return (1); 1524} 1525 1526 1527static int 1528itismychild(struct witness *parent, struct witness *child) 1529{ 1530 struct witness_list *list; 1531 1532 MPASS(child != NULL && parent != NULL); 1533 if ((parent->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)) != 1534 (child->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK))) 1535 panic( 1536 "%s: parent (%s) and child (%s) are not the same lock type", 1537 __func__, parent->w_class->lc_name, 1538 child->w_class->lc_name); 1539 1540 if (!insertchild(parent, child)) 1541 return (0); 1542 1543 if (parent->w_class->lc_flags & LC_SLEEPLOCK) 1544 list = &w_sleep; 1545 else 1546 list = &w_spin; 1547 return (1); 1548} 1549 1550static void 1551removechild(struct witness *parent, struct witness *child) 1552{ 1553 struct witness_child_list_entry **wcl, *wcl1; 1554 int i; 1555 1556 for (wcl = &parent->w_children; *wcl != NULL; wcl = &(*wcl)->wcl_next) 1557 for (i = 0; i < (*wcl)->wcl_count; i++) 1558 if ((*wcl)->wcl_children[i] == child) 1559 goto found; 1560 return; 1561found: 1562 (*wcl)->wcl_count--; 1563 if ((*wcl)->wcl_count > i) 1564 (*wcl)->wcl_children[i] = 1565 (*wcl)->wcl_children[(*wcl)->wcl_count]; 1566 MPASS((*wcl)->wcl_children[i] != NULL); 1567 if ((*wcl)->wcl_count != 0) 1568 return; 1569 wcl1 = *wcl; 1570 *wcl = wcl1->wcl_next; 1571 w_child_cnt--; 1572 witness_child_free(wcl1); 1573} 1574 1575static int 1576isitmychild(struct witness *parent, struct witness *child) 1577{ 1578 struct witness_child_list_entry *wcl; 1579 int i; 1580 1581 for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) { 1582 for (i = 0; i < wcl->wcl_count; i++) { 1583 if (wcl->wcl_children[i] == child) 1584 return (1); 1585 } 1586 } 1587 return (0); 1588} 1589 1590static int 1591isitmydescendant(struct witness *parent, struct witness *child) 1592{ 1593 struct witness_child_list_entry *wcl; 1594 int i, j; 1595 1596 if (isitmychild(parent, child)) 1597 return (1); 1598 j = 0; 1599 for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) { 1600 MPASS(j < 1000); 1601 for (i = 0; i < wcl->wcl_count; i++) { 1602 if (isitmydescendant(wcl->wcl_children[i], child)) 1603 return (1); 1604 } 1605 j++; 1606 } 1607 return (0); 1608} 1609 1610#ifdef BLESSING 1611static int 1612blessed(struct witness *w1, struct witness *w2) 1613{ 1614 int i; 1615 struct witness_blessed *b; 1616 1617 for (i = 0; i < blessed_count; i++) { 1618 b = &blessed_list[i]; 1619 if (strcmp(w1->w_name, b->b_lock1) == 0) { 1620 if (strcmp(w2->w_name, b->b_lock2) == 0) 1621 return (1); 1622 continue; 1623 } 1624 if (strcmp(w1->w_name, b->b_lock2) == 0) 1625 if (strcmp(w2->w_name, b->b_lock1) == 0) 1626 return (1); 1627 } 1628 return (0); 1629} 1630#endif 1631 1632static struct witness * 1633witness_get(void) 1634{ 1635 struct witness *w; 1636 1637 if (witness_watch == 0) { 1638 mtx_unlock_spin(&w_mtx); 1639 return (NULL); 1640 } 1641 if (STAILQ_EMPTY(&w_free)) { 1642 witness_watch = 0; 1643 mtx_unlock_spin(&w_mtx); 1644 printf("%s: witness exhausted\n", __func__); 1645 return (NULL); 1646 } 1647 w = STAILQ_FIRST(&w_free); 1648 STAILQ_REMOVE_HEAD(&w_free, w_list); 1649 w_free_cnt--; 1650 bzero(w, sizeof(*w)); 1651 return (w); 1652} 1653 1654static void 1655witness_free(struct witness *w) 1656{ 1657 1658 STAILQ_INSERT_HEAD(&w_free, w, w_list); 1659 w_free_cnt++; 1660} 1661 1662static struct witness_child_list_entry * 1663witness_child_get(void) 1664{ 1665 struct witness_child_list_entry *wcl; 1666 1667 if (witness_watch == 0) { 1668 mtx_unlock_spin(&w_mtx); 1669 return (NULL); 1670 } 1671 wcl = w_child_free; 1672 if (wcl == NULL) { 1673 witness_watch = 0; 1674 mtx_unlock_spin(&w_mtx); 1675 printf("%s: witness exhausted\n", __func__); 1676 return (NULL); 1677 } 1678 w_child_free = wcl->wcl_next; 1679 w_child_free_cnt--; 1680 bzero(wcl, sizeof(*wcl)); 1681 return (wcl); 1682} 1683 1684static void 1685witness_child_free(struct witness_child_list_entry *wcl) 1686{ 1687 1688 wcl->wcl_next = w_child_free; 1689 w_child_free = wcl; 1690 w_child_free_cnt++; 1691} 1692 1693static struct lock_list_entry * 1694witness_lock_list_get(void) 1695{ 1696 struct lock_list_entry *lle; 1697 1698 if (witness_watch == 0) 1699 return (NULL); 1700 mtx_lock_spin(&w_mtx); 1701 lle = w_lock_list_free; 1702 if (lle == NULL) { 1703 witness_watch = 0; 1704 mtx_unlock_spin(&w_mtx); 1705 printf("%s: witness exhausted\n", __func__); 1706 return (NULL); 1707 } 1708 w_lock_list_free = lle->ll_next; 1709 mtx_unlock_spin(&w_mtx); 1710 bzero(lle, sizeof(*lle)); 1711 return (lle); 1712} 1713 1714static void 1715witness_lock_list_free(struct lock_list_entry *lle) 1716{ 1717 1718 mtx_lock_spin(&w_mtx); 1719 lle->ll_next = w_lock_list_free; 1720 w_lock_list_free = lle; 1721 mtx_unlock_spin(&w_mtx); 1722} 1723 1724static struct lock_instance * 1725find_instance(struct lock_list_entry *lock_list, struct lock_object *lock) 1726{ 1727 struct lock_list_entry *lle; 1728 struct lock_instance *instance; 1729 int i; 1730 1731 for (lle = lock_list; lle != NULL; lle = lle->ll_next) 1732 for (i = lle->ll_count - 1; i >= 0; i--) { 1733 instance = &lle->ll_children[i]; 1734 if (instance->li_lock == lock) 1735 return (instance); 1736 } 1737 return (NULL); 1738} 1739 1740static void 1741witness_list_lock(struct lock_instance *instance) 1742{ 1743 struct lock_object *lock; 1744 1745 lock = instance->li_lock; 1746 printf("%s %s %s", (instance->li_flags & LI_EXCLUSIVE) != 0 ? 1747 "exclusive" : "shared", LOCK_CLASS(lock)->lc_name, lock->lo_name); 1748 if (lock->lo_type != lock->lo_name) 1749 printf(" (%s)", lock->lo_type); 1750 printf(" r = %d (%p) locked @ %s:%d\n", 1751 instance->li_flags & LI_RECURSEMASK, lock, instance->li_file, 1752 instance->li_line); 1753} 1754 1755#ifdef DDB 1756static int 1757witness_thread_has_locks(struct thread *td) 1758{ 1759 1760 return (td->td_sleeplocks != NULL); 1761} 1762 1763static int 1764witness_proc_has_locks(struct proc *p) 1765{ 1766 struct thread *td; 1767 1768 FOREACH_THREAD_IN_PROC(p, td) { 1769 if (witness_thread_has_locks(td)) 1770 return (1); 1771 } 1772 return (0); 1773} 1774#endif 1775 1776int 1777witness_list_locks(struct lock_list_entry **lock_list) 1778{ 1779 struct lock_list_entry *lle; 1780 int i, nheld; 1781 1782 nheld = 0; 1783 for (lle = *lock_list; lle != NULL; lle = lle->ll_next) 1784 for (i = lle->ll_count - 1; i >= 0; i--) { 1785 witness_list_lock(&lle->ll_children[i]); 1786 nheld++; 1787 } 1788 return (nheld); 1789} 1790 1791/* 1792 * This is a bit risky at best. We call this function when we have timed 1793 * out acquiring a spin lock, and we assume that the other CPU is stuck 1794 * with this lock held. So, we go groveling around in the other CPU's 1795 * per-cpu data to try to find the lock instance for this spin lock to 1796 * see when it was last acquired. 1797 */ 1798void 1799witness_display_spinlock(struct lock_object *lock, struct thread *owner) 1800{ 1801 struct lock_instance *instance; 1802 struct pcpu *pc; 1803 1804 if (owner->td_critnest == 0 || owner->td_oncpu == NOCPU) 1805 return; 1806 pc = pcpu_find(owner->td_oncpu); 1807 instance = find_instance(pc->pc_spinlocks, lock); 1808 if (instance != NULL) 1809 witness_list_lock(instance); 1810} 1811 1812void 1813witness_save(struct lock_object *lock, const char **filep, int *linep) 1814{ 1815 struct lock_list_entry *lock_list; 1816 struct lock_instance *instance; 1817 struct lock_class *class; 1818 1819 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1820 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1821 return; 1822 class = LOCK_CLASS(lock); 1823 if (class->lc_flags & LC_SLEEPLOCK) 1824 lock_list = curthread->td_sleeplocks; 1825 else { 1826 if (witness_skipspin) 1827 return; 1828 lock_list = PCPU_GET(spinlocks); 1829 } 1830 instance = find_instance(lock_list, lock); 1831 if (instance == NULL) 1832 panic("%s: lock (%s) %s not locked", __func__, 1833 class->lc_name, lock->lo_name); 1834 *filep = instance->li_file; 1835 *linep = instance->li_line; 1836} 1837 1838void 1839witness_restore(struct lock_object *lock, const char *file, int line) 1840{ 1841 struct lock_list_entry *lock_list; 1842 struct lock_instance *instance; 1843 struct lock_class *class; 1844 1845 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1846 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1847 return; 1848 class = LOCK_CLASS(lock); 1849 if (class->lc_flags & LC_SLEEPLOCK) 1850 lock_list = curthread->td_sleeplocks; 1851 else { 1852 if (witness_skipspin) 1853 return; 1854 lock_list = PCPU_GET(spinlocks); 1855 } 1856 instance = find_instance(lock_list, lock); 1857 if (instance == NULL) 1858 panic("%s: lock (%s) %s not locked", __func__, 1859 class->lc_name, lock->lo_name); 1860 lock->lo_witness->w_file = file; 1861 lock->lo_witness->w_line = line; 1862 instance->li_file = file; 1863 instance->li_line = line; 1864} 1865 1866void 1867witness_assert(struct lock_object *lock, int flags, const char *file, int line) 1868{ 1869#ifdef INVARIANT_SUPPORT 1870 struct lock_instance *instance; 1871 struct lock_class *class; 1872 1873 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1874 return; 1875 class = LOCK_CLASS(lock); 1876 if ((class->lc_flags & LC_SLEEPLOCK) != 0) 1877 instance = find_instance(curthread->td_sleeplocks, lock); 1878 else if ((class->lc_flags & LC_SPINLOCK) != 0) 1879 instance = find_instance(PCPU_GET(spinlocks), lock); 1880 else { 1881 panic("Lock (%s) %s is not sleep or spin!", 1882 class->lc_name, lock->lo_name); 1883 } 1884 file = fixup_filename(file); 1885 switch (flags) { 1886 case LA_UNLOCKED: 1887 if (instance != NULL) 1888 panic("Lock (%s) %s locked @ %s:%d.", 1889 class->lc_name, lock->lo_name, file, line); 1890 break; 1891 case LA_LOCKED: 1892 case LA_LOCKED | LA_RECURSED: 1893 case LA_LOCKED | LA_NOTRECURSED: 1894 case LA_SLOCKED: 1895 case LA_SLOCKED | LA_RECURSED: 1896 case LA_SLOCKED | LA_NOTRECURSED: 1897 case LA_XLOCKED: 1898 case LA_XLOCKED | LA_RECURSED: 1899 case LA_XLOCKED | LA_NOTRECURSED: 1900 if (instance == NULL) { 1901 panic("Lock (%s) %s not locked @ %s:%d.", 1902 class->lc_name, lock->lo_name, file, line); 1903 break; 1904 } 1905 if ((flags & LA_XLOCKED) != 0 && 1906 (instance->li_flags & LI_EXCLUSIVE) == 0) 1907 panic("Lock (%s) %s not exclusively locked @ %s:%d.", 1908 class->lc_name, lock->lo_name, file, line); 1909 if ((flags & LA_SLOCKED) != 0 && 1910 (instance->li_flags & LI_EXCLUSIVE) != 0) 1911 panic("Lock (%s) %s exclusively locked @ %s:%d.", 1912 class->lc_name, lock->lo_name, file, line); 1913 if ((flags & LA_RECURSED) != 0 && 1914 (instance->li_flags & LI_RECURSEMASK) == 0) 1915 panic("Lock (%s) %s not recursed @ %s:%d.", 1916 class->lc_name, lock->lo_name, file, line); 1917 if ((flags & LA_NOTRECURSED) != 0 && 1918 (instance->li_flags & LI_RECURSEMASK) != 0) 1919 panic("Lock (%s) %s recursed @ %s:%d.", 1920 class->lc_name, lock->lo_name, file, line); 1921 break; 1922 default: 1923 panic("Invalid lock assertion at %s:%d.", file, line); 1924 1925 } 1926#endif /* INVARIANT_SUPPORT */ 1927} 1928 1929#ifdef DDB 1930static void 1931witness_list(struct thread *td) 1932{ 1933 1934 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1935 KASSERT(kdb_active, ("%s: not in the debugger", __func__)); 1936 1937 if (witness_watch == 0) 1938 return; 1939 1940 witness_list_locks(&td->td_sleeplocks); 1941 1942 /* 1943 * We only handle spinlocks if td == curthread. This is somewhat broken 1944 * if td is currently executing on some other CPU and holds spin locks 1945 * as we won't display those locks. If we had a MI way of getting 1946 * the per-cpu data for a given cpu then we could use 1947 * td->td_oncpu to get the list of spinlocks for this thread 1948 * and "fix" this. 1949 * 1950 * That still wouldn't really fix this unless we locked sched_lock 1951 * or stopped the other CPU to make sure it wasn't changing the list 1952 * out from under us. It is probably best to just not try to handle 1953 * threads on other CPU's for now. 1954 */ 1955 if (td == curthread && PCPU_GET(spinlocks) != NULL) 1956 witness_list_locks(PCPU_PTR(spinlocks)); 1957} 1958 1959DB_SHOW_COMMAND(locks, db_witness_list) 1960{ 1961 struct thread *td; 1962 1963 if (have_addr) 1964 td = db_lookup_thread(addr, TRUE); 1965 else 1966 td = kdb_thread; 1967 witness_list(td); 1968} 1969 1970DB_SHOW_COMMAND(alllocks, db_witness_list_all) 1971{ 1972 struct thread *td; 1973 struct proc *p; 1974 1975 /* 1976 * It would be nice to list only threads and processes that actually 1977 * held sleep locks, but that information is currently not exported 1978 * by WITNESS. 1979 */ 1980 FOREACH_PROC_IN_SYSTEM(p) { 1981 if (!witness_proc_has_locks(p)) 1982 continue; 1983 FOREACH_THREAD_IN_PROC(p, td) { 1984 if (!witness_thread_has_locks(td)) 1985 continue; 1986 db_printf("Process %d (%s) thread %p (%d)\n", p->p_pid, 1987 p->p_comm, td, td->td_tid); 1988 witness_list(td); 1989 } 1990 } 1991} 1992 1993DB_SHOW_COMMAND(witness, db_witness_display) 1994{ 1995 1996 witness_display(db_printf); 1997} 1998#endif 1999