subr_witness.c revision 112116
1/*-
2 * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 * 3. Berkeley Software Design Inc's name may not be used to endorse or
13 *    promote products derived from this software without specific prior
14 *    written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
29 *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
30 * $FreeBSD: head/sys/kern/subr_witness.c 112116 2003-03-11 21:53:12Z jhb $
31 */
32
33/*
34 * Implementation of the `witness' lock verifier.  Originally implemented for
35 * mutexes in BSD/OS.  Extended to handle generic lock objects and lock
36 * classes in FreeBSD.
37 */
38
39/*
40 *	Main Entry: witness
41 *	Pronunciation: 'wit-n&s
42 *	Function: noun
43 *	Etymology: Middle English witnesse, from Old English witnes knowledge,
44 *	    testimony, witness, from 2wit
45 *	Date: before 12th century
46 *	1 : attestation of a fact or event : TESTIMONY
47 *	2 : one that gives evidence; specifically : one who testifies in
48 *	    a cause or before a judicial tribunal
49 *	3 : one asked to be present at a transaction so as to be able to
50 *	    testify to its having taken place
51 *	4 : one who has personal knowledge of something
52 *	5 a : something serving as evidence or proof : SIGN
53 *	  b : public affirmation by word or example of usually
54 *	      religious faith or conviction <the heroic witness to divine
55 *	      life -- Pilot>
56 *	6 capitalized : a member of the Jehovah's Witnesses
57 */
58
59/*
60 * Special rules concerning Giant and lock orders:
61 *
62 * 1) Giant must be acquired before any other mutexes.  Stated another way,
63 *    no other mutex may be held when Giant is acquired.
64 *
65 * 2) Giant must be released when blocking on a sleepable lock.
66 *
67 * This rule is less obvious, but is a result of Giant providing the same
68 * semantics as spl().  Basically, when a thread sleeps, it must release
69 * Giant.  When a thread blocks on a sleepable lock, it sleeps.  Hence rule
70 * 2).
71 *
72 * 3) Giant may be acquired before or after sleepable locks.
73 *
74 * This rule is also not quite as obvious.  Giant may be acquired after
75 * a sleepable lock because it is a non-sleepable lock and non-sleepable
76 * locks may always be acquired while holding a sleepable lock.  The second
77 * case, Giant before a sleepable lock, follows from rule 2) above.  Suppose
78 * you have two threads T1 and T2 and a sleepable lock X.  Suppose that T1
79 * acquires X and blocks on Giant.  Then suppose that T2 acquires Giant and
80 * blocks on X.  When T2 blocks on X, T2 will release Giant allowing T1 to
81 * execute.  Thus, acquiring Giant both before and after a sleepable lock
82 * will not result in a lock order reversal.
83 */
84
85#include "opt_ddb.h"
86#include "opt_witness.h"
87
88#include <sys/param.h>
89#include <sys/bus.h>
90#include <sys/kernel.h>
91#include <sys/ktr.h>
92#include <sys/lock.h>
93#include <sys/malloc.h>
94#include <sys/mutex.h>
95#include <sys/proc.h>
96#include <sys/sysctl.h>
97#include <sys/systm.h>
98
99#include <ddb/ddb.h>
100
101#include <machine/stdarg.h>
102
103/* Define this to check for blessed mutexes */
104#undef BLESSING
105
106#define WITNESS_COUNT 200
107#define WITNESS_CHILDCOUNT (WITNESS_COUNT * 4)
108/*
109 * XXX: This is somewhat bogus, as we assume here that at most 1024 threads
110 * will hold LOCK_NCHILDREN * 2 locks.  We handle failure ok, and we should
111 * probably be safe for the most part, but it's still a SWAG.
112 */
113#define LOCK_CHILDCOUNT (MAXCPU + 1024) * 2
114
115#define	WITNESS_NCHILDREN 6
116
117struct witness_child_list_entry;
118
119struct witness {
120	const	char *w_name;
121	struct	lock_class *w_class;
122	STAILQ_ENTRY(witness) w_list;		/* List of all witnesses. */
123	STAILQ_ENTRY(witness) w_typelist;	/* Witnesses of a type. */
124	struct	witness_child_list_entry *w_children;	/* Great evilness... */
125	const	char *w_file;
126	int	w_line;
127	u_int	w_level;
128	u_int	w_refcount;
129	u_char	w_Giant_squawked:1;
130	u_char	w_other_squawked:1;
131	u_char	w_same_squawked:1;
132};
133
134struct witness_child_list_entry {
135	struct	witness_child_list_entry *wcl_next;
136	struct	witness *wcl_children[WITNESS_NCHILDREN];
137	u_int	wcl_count;
138};
139
140STAILQ_HEAD(witness_list, witness);
141
142#ifdef BLESSING
143struct witness_blessed {
144	const	char *b_lock1;
145	const	char *b_lock2;
146};
147#endif
148
149struct witness_order_list_entry {
150	const	char *w_name;
151	struct	lock_class *w_class;
152};
153
154static struct	witness *enroll(const char *description,
155				struct lock_class *lock_class);
156static int	itismychild(struct witness *parent, struct witness *child);
157static void	removechild(struct witness *parent, struct witness *child);
158static int	isitmychild(struct witness *parent, struct witness *child);
159static int	isitmydescendant(struct witness *parent, struct witness *child);
160#ifdef BLESSING
161static int	blessed(struct witness *, struct witness *);
162#endif
163static void	witness_displaydescendants(void(*)(const char *fmt, ...),
164					   struct witness *);
165static const char *fixup_filename(const char *file);
166static void	witness_leveldescendents(struct witness *parent, int level);
167static void	witness_levelall(void);
168static struct	witness *witness_get(void);
169static void	witness_free(struct witness *m);
170static struct	witness_child_list_entry *witness_child_get(void);
171static void	witness_child_free(struct witness_child_list_entry *wcl);
172static struct	lock_list_entry *witness_lock_list_get(void);
173static void	witness_lock_list_free(struct lock_list_entry *lle);
174static struct	lock_instance *find_instance(struct lock_list_entry *lock_list,
175					     struct lock_object *lock);
176static void	witness_list_lock(struct lock_instance *instance);
177#ifdef DDB
178static void	witness_list(struct thread *td);
179static void	witness_display_list(void(*prnt)(const char *fmt, ...),
180				     struct witness_list *list);
181static void	witness_display(void(*)(const char *fmt, ...));
182#endif
183
184MALLOC_DEFINE(M_WITNESS, "witness", "witness structure");
185
186static int witness_watch = 1;
187TUNABLE_INT("debug.witness_watch", &witness_watch);
188SYSCTL_INT(_debug, OID_AUTO, witness_watch, CTLFLAG_RD, &witness_watch, 0, "");
189
190#ifdef DDB
191/*
192 * When DDB is enabled and witness_ddb is set to 1, it will cause the system to
193 * drop into kdebug() when:
194 *	- a lock heirarchy violation occurs
195 *	- locks are held when going to sleep.
196 */
197#ifdef WITNESS_DDB
198int	witness_ddb = 1;
199#else
200int	witness_ddb = 0;
201#endif
202TUNABLE_INT("debug.witness_ddb", &witness_ddb);
203SYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, "");
204
205/*
206 * When DDB is enabled and witness_trace is set to 1, it will cause the system
207 * to print a stack trace:
208 *	- a lock heirarchy violation occurs
209 *	- locks are held when going to sleep.
210 */
211int	witness_trace = 1;
212TUNABLE_INT("debug.witness_trace", &witness_trace);
213SYSCTL_INT(_debug, OID_AUTO, witness_trace, CTLFLAG_RW, &witness_trace, 0, "");
214#endif /* DDB */
215
216#ifdef WITNESS_SKIPSPIN
217int	witness_skipspin = 1;
218#else
219int	witness_skipspin = 0;
220#endif
221TUNABLE_INT("debug.witness_skipspin", &witness_skipspin);
222SYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RD, &witness_skipspin, 0,
223    "");
224
225static struct mtx w_mtx;
226static struct witness_list w_free = STAILQ_HEAD_INITIALIZER(w_free);
227static struct witness_list w_all = STAILQ_HEAD_INITIALIZER(w_all);
228static struct witness_list w_spin = STAILQ_HEAD_INITIALIZER(w_spin);
229static struct witness_list w_sleep = STAILQ_HEAD_INITIALIZER(w_sleep);
230static struct witness_child_list_entry *w_child_free = NULL;
231static struct lock_list_entry *w_lock_list_free = NULL;
232static int witness_dead;	/* fatal error, probably no memory */
233
234static struct witness w_data[WITNESS_COUNT];
235static struct witness_child_list_entry w_childdata[WITNESS_CHILDCOUNT];
236static struct lock_list_entry w_locklistdata[LOCK_CHILDCOUNT];
237
238static struct witness_order_list_entry order_lists[] = {
239	{ "proctree", &lock_class_sx },
240	{ "allproc", &lock_class_sx },
241	{ "Giant", &lock_class_mtx_sleep },
242	{ "filedesc structure", &lock_class_mtx_sleep },
243	{ "pipe mutex", &lock_class_mtx_sleep },
244	{ "sigio lock", &lock_class_mtx_sleep },
245	{ "process group", &lock_class_mtx_sleep },
246	{ "process lock", &lock_class_mtx_sleep },
247	{ "session", &lock_class_mtx_sleep },
248	{ "uidinfo hash", &lock_class_mtx_sleep },
249	{ "uidinfo struct", &lock_class_mtx_sleep },
250	{ NULL, NULL },
251	/*
252	 * spin locks
253	 */
254#ifdef SMP
255	{ "ap boot", &lock_class_mtx_spin },
256#ifdef __i386__
257	{ "com", &lock_class_mtx_spin },
258#endif
259#endif
260	{ "sio", &lock_class_mtx_spin },
261#ifdef __i386__
262	{ "cy", &lock_class_mtx_spin },
263#endif
264	{ "sabtty", &lock_class_mtx_spin },
265	{ "zstty", &lock_class_mtx_spin },
266	{ "ng_node", &lock_class_mtx_spin },
267	{ "ng_worklist", &lock_class_mtx_spin },
268	{ "ithread table lock", &lock_class_mtx_spin },
269	{ "sched lock", &lock_class_mtx_spin },
270	{ "callout", &lock_class_mtx_spin },
271	/*
272	 * leaf locks
273	 */
274	{ "allpmaps", &lock_class_mtx_spin },
275	{ "vm page queue free mutex", &lock_class_mtx_spin },
276	{ "icu", &lock_class_mtx_spin },
277#ifdef SMP
278	{ "smp rendezvous", &lock_class_mtx_spin },
279#if defined(__i386__) && defined(APIC_IO)
280	{ "tlb", &lock_class_mtx_spin },
281#endif
282#ifdef __sparc64__
283	{ "ipi", &lock_class_mtx_spin },
284#endif
285#endif
286	{ "clk", &lock_class_mtx_spin },
287	{ "mutex profiling lock", &lock_class_mtx_spin },
288	{ "kse zombie lock", &lock_class_mtx_spin },
289	{ "ALD Queue", &lock_class_mtx_spin },
290#ifdef __ia64__
291	{ "MCA spin lock", &lock_class_mtx_spin },
292#endif
293#ifdef __i386__
294	{ "pcicfg", &lock_class_mtx_spin },
295#endif
296	{ NULL, NULL },
297	{ NULL, NULL }
298};
299
300#ifdef BLESSING
301/*
302 * Pairs of locks which have been blessed
303 * Don't complain about order problems with blessed locks
304 */
305static struct witness_blessed blessed_list[] = {
306};
307static int blessed_count =
308	sizeof(blessed_list) / sizeof(struct witness_blessed);
309#endif
310
311/*
312 * List of all locks in the system.
313 */
314TAILQ_HEAD(, lock_object) all_locks = TAILQ_HEAD_INITIALIZER(all_locks);
315
316static struct mtx all_mtx = {
317	{ &lock_class_mtx_sleep,	/* mtx_object.lo_class */
318	  "All locks list",		/* mtx_object.lo_name */
319	  "All locks list",		/* mtx_object.lo_type */
320	  LO_INITIALIZED,		/* mtx_object.lo_flags */
321	  { NULL, NULL },		/* mtx_object.lo_list */
322	  NULL },			/* mtx_object.lo_witness */
323	MTX_UNOWNED, 0,			/* mtx_lock, mtx_recurse */
324	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
325	{ NULL, NULL }			/* mtx_contested */
326};
327
328/*
329 * This global is set to 0 once it becomes safe to use the witness code.
330 */
331static int witness_cold = 1;
332
333/*
334 * Global variables for book keeping.
335 */
336static int lock_cur_cnt;
337static int lock_max_cnt;
338
339/*
340 * The WITNESS-enabled diagnostic code.
341 */
342static void
343witness_initialize(void *dummy __unused)
344{
345	struct lock_object *lock;
346	struct witness_order_list_entry *order;
347	struct witness *w, *w1;
348	int i;
349
350	/*
351	 * We have to release Giant before initializing its witness
352	 * structure so that WITNESS doesn't get confused.
353	 */
354	mtx_unlock(&Giant);
355	mtx_assert(&Giant, MA_NOTOWNED);
356
357	CTR1(KTR_WITNESS, "%s: initializing witness", __func__);
358	TAILQ_INSERT_HEAD(&all_locks, &all_mtx.mtx_object, lo_list);
359	mtx_init(&w_mtx, "witness lock", NULL, MTX_SPIN | MTX_QUIET |
360	    MTX_NOWITNESS);
361	for (i = 0; i < WITNESS_COUNT; i++)
362		witness_free(&w_data[i]);
363	for (i = 0; i < WITNESS_CHILDCOUNT; i++)
364		witness_child_free(&w_childdata[i]);
365	for (i = 0; i < LOCK_CHILDCOUNT; i++)
366		witness_lock_list_free(&w_locklistdata[i]);
367
368	/* First add in all the specified order lists. */
369	for (order = order_lists; order->w_name != NULL; order++) {
370		w = enroll(order->w_name, order->w_class);
371		if (w == NULL)
372			continue;
373		w->w_file = "order list";
374		for (order++; order->w_name != NULL; order++) {
375			w1 = enroll(order->w_name, order->w_class);
376			if (w1 == NULL)
377				continue;
378			w1->w_file = "order list";
379			itismychild(w, w1);
380			w = w1;
381		}
382	}
383
384	/* Iterate through all locks and add them to witness. */
385	mtx_lock(&all_mtx);
386	TAILQ_FOREACH(lock, &all_locks, lo_list) {
387		if (lock->lo_flags & LO_WITNESS)
388			lock->lo_witness = enroll(lock->lo_type,
389			    lock->lo_class);
390		else
391			lock->lo_witness = NULL;
392	}
393	mtx_unlock(&all_mtx);
394
395	/* Mark the witness code as being ready for use. */
396	atomic_store_rel_int(&witness_cold, 0);
397
398	mtx_lock(&Giant);
399}
400SYSINIT(witness_init, SI_SUB_WITNESS, SI_ORDER_FIRST, witness_initialize, NULL)
401
402void
403witness_init(struct lock_object *lock)
404{
405	struct lock_class *class;
406
407	class = lock->lo_class;
408	if (lock->lo_flags & LO_INITIALIZED)
409		panic("%s: lock (%s) %s is already initialized", __func__,
410		    class->lc_name, lock->lo_name);
411	if ((lock->lo_flags & LO_RECURSABLE) != 0 &&
412	    (class->lc_flags & LC_RECURSABLE) == 0)
413		panic("%s: lock (%s) %s can not be recursable", __func__,
414		    class->lc_name, lock->lo_name);
415	if ((lock->lo_flags & LO_SLEEPABLE) != 0 &&
416	    (class->lc_flags & LC_SLEEPABLE) == 0)
417		panic("%s: lock (%s) %s can not be sleepable", __func__,
418		    class->lc_name, lock->lo_name);
419	if ((lock->lo_flags & LO_UPGRADABLE) != 0 &&
420	    (class->lc_flags & LC_UPGRADABLE) == 0)
421		panic("%s: lock (%s) %s can not be upgradable", __func__,
422		    class->lc_name, lock->lo_name);
423
424	mtx_lock(&all_mtx);
425	TAILQ_INSERT_TAIL(&all_locks, lock, lo_list);
426	lock->lo_flags |= LO_INITIALIZED;
427	lock_cur_cnt++;
428	if (lock_cur_cnt > lock_max_cnt)
429		lock_max_cnt = lock_cur_cnt;
430	mtx_unlock(&all_mtx);
431	if (!witness_cold && !witness_dead && panicstr == NULL &&
432	    (lock->lo_flags & LO_WITNESS) != 0)
433		lock->lo_witness = enroll(lock->lo_type, class);
434	else
435		lock->lo_witness = NULL;
436}
437
438void
439witness_destroy(struct lock_object *lock)
440{
441	struct witness *w;
442
443	if (witness_cold)
444		panic("lock (%s) %s destroyed while witness_cold",
445		    lock->lo_class->lc_name, lock->lo_name);
446	if ((lock->lo_flags & LO_INITIALIZED) == 0)
447		panic("%s: lock (%s) %s is not initialized", __func__,
448		    lock->lo_class->lc_name, lock->lo_name);
449
450	/* XXX: need to verify that no one holds the lock */
451	w = lock->lo_witness;
452	if (w != NULL) {
453		mtx_lock_spin(&w_mtx);
454		MPASS(w->w_refcount > 0);
455		w->w_refcount--;
456		mtx_unlock_spin(&w_mtx);
457	}
458
459	mtx_lock(&all_mtx);
460	lock_cur_cnt--;
461	TAILQ_REMOVE(&all_locks, lock, lo_list);
462	lock->lo_flags &= ~LO_INITIALIZED;
463	mtx_unlock(&all_mtx);
464}
465
466#ifdef DDB
467static void
468witness_display_list(void(*prnt)(const char *fmt, ...),
469		     struct witness_list *list)
470{
471	struct witness *w, *w1;
472	int found;
473
474	STAILQ_FOREACH(w, list, w_typelist) {
475		if (w->w_file == NULL)
476			continue;
477		found = 0;
478		STAILQ_FOREACH(w1, list, w_typelist) {
479			if (isitmychild(w1, w)) {
480				found++;
481				break;
482			}
483		}
484		if (found)
485			continue;
486		/*
487		 * This lock has no anscestors, display its descendants.
488		 */
489		witness_displaydescendants(prnt, w);
490	}
491}
492
493static void
494witness_display(void(*prnt)(const char *fmt, ...))
495{
496	struct witness *w;
497
498	KASSERT(!witness_cold, ("%s: witness_cold", __func__));
499	witness_levelall();
500
501	/*
502	 * First, handle sleep locks which have been acquired at least
503	 * once.
504	 */
505	prnt("Sleep locks:\n");
506	witness_display_list(prnt, &w_sleep);
507
508	/*
509	 * Now do spin locks which have been acquired at least once.
510	 */
511	prnt("\nSpin locks:\n");
512	witness_display_list(prnt, &w_spin);
513
514	/*
515	 * Finally, any locks which have not been acquired yet.
516	 */
517	prnt("\nLocks which were never acquired:\n");
518	STAILQ_FOREACH(w, &w_all, w_list) {
519		if (w->w_file != NULL || w->w_refcount == 0)
520			continue;
521		prnt("%s\n", w->w_name);
522	}
523}
524#endif /* DDB */
525
526/* Trim useless garbage from filenames. */
527static const char *
528fixup_filename(const char *file)
529{
530
531	if (file == NULL)
532		return (NULL);
533	while (strncmp(file, "../", 3) == 0)
534		file += 3;
535	return (file);
536}
537
538void
539witness_lock(struct lock_object *lock, int flags, const char *file, int line)
540{
541	struct lock_list_entry **lock_list, *lle;
542	struct lock_instance *lock1, *lock2;
543	struct lock_class *class;
544	struct witness *w, *w1;
545	struct thread *td;
546	int i, j;
547#ifdef DDB
548	int go_into_ddb = 0;
549#endif
550
551	if (witness_cold || witness_dead || lock->lo_witness == NULL ||
552	    panicstr != NULL)
553		return;
554	w = lock->lo_witness;
555	class = lock->lo_class;
556	td = curthread;
557	file = fixup_filename(file);
558
559	if (class->lc_flags & LC_SLEEPLOCK) {
560		/*
561		 * Since spin locks include a critical section, this check
562		 * impliclty enforces a lock order of all sleep locks before
563		 * all spin locks.
564		 */
565		if (td->td_critnest != 0 && (flags & LOP_TRYLOCK) == 0)
566			panic("blockable sleep lock (%s) %s @ %s:%d",
567			    class->lc_name, lock->lo_name, file, line);
568		lock_list = &td->td_sleeplocks;
569	} else
570		lock_list = PCPU_PTR(spinlocks);
571
572	/*
573	 * Is this the first lock acquired?  If so, then no order checking
574	 * is needed.
575	 */
576	if (*lock_list == NULL)
577		goto out;
578
579	/*
580	 * Check to see if we are recursing on a lock we already own.
581	 */
582	lock1 = find_instance(*lock_list, lock);
583	if (lock1 != NULL) {
584		if ((lock1->li_flags & LI_EXCLUSIVE) != 0 &&
585		    (flags & LOP_EXCLUSIVE) == 0) {
586			printf("shared lock of (%s) %s @ %s:%d\n",
587			    class->lc_name, lock->lo_name, file, line);
588			printf("while exclusively locked from %s:%d\n",
589			    lock1->li_file, lock1->li_line);
590			panic("share->excl");
591		}
592		if ((lock1->li_flags & LI_EXCLUSIVE) == 0 &&
593		    (flags & LOP_EXCLUSIVE) != 0) {
594			printf("exclusive lock of (%s) %s @ %s:%d\n",
595			    class->lc_name, lock->lo_name, file, line);
596			printf("while share locked from %s:%d\n",
597			    lock1->li_file, lock1->li_line);
598			panic("excl->share");
599		}
600		lock1->li_flags++;
601		if ((lock->lo_flags & LO_RECURSABLE) == 0) {
602			printf(
603			"recursed on non-recursive lock (%s) %s @ %s:%d\n",
604			    class->lc_name, lock->lo_name, file, line);
605			printf("first acquired @ %s:%d\n", lock1->li_file,
606			    lock1->li_line);
607			panic("recurse");
608		}
609		CTR4(KTR_WITNESS, "%s: pid %d recursed on %s r=%d", __func__,
610		    td->td_proc->p_pid, lock->lo_name,
611		    lock1->li_flags & LI_RECURSEMASK);
612		lock1->li_file = file;
613		lock1->li_line = line;
614		return;
615	}
616
617	/*
618	 * Try locks do not block if they fail to acquire the lock, thus
619	 * there is no danger of deadlocks or of switching while holding a
620	 * spin lock if we acquire a lock via a try operation.
621	 */
622	if (flags & LOP_TRYLOCK)
623		goto out;
624
625	/*
626	 * Check for duplicate locks of the same type.  Note that we only
627	 * have to check for this on the last lock we just acquired.  Any
628	 * other cases will be caught as lock order violations.
629	 */
630	lock1 = &(*lock_list)->ll_children[(*lock_list)->ll_count - 1];
631	w1 = lock1->li_lock->lo_witness;
632	if (w1 == w) {
633		if (w->w_same_squawked || (lock->lo_flags & LO_DUPOK))
634			goto out;
635		w->w_same_squawked = 1;
636		printf("acquiring duplicate lock of same type: \"%s\"\n",
637			lock->lo_type);
638		printf(" 1st %s @ %s:%d\n", lock1->li_lock->lo_name,
639		    lock1->li_file, lock1->li_line);
640		printf(" 2nd %s @ %s:%d\n", lock->lo_name, file, line);
641#ifdef DDB
642		go_into_ddb = 1;
643#endif
644		goto out;
645	}
646	MPASS(!mtx_owned(&w_mtx));
647	mtx_lock_spin(&w_mtx);
648	/*
649	 * If we have a known higher number just say ok
650	 */
651	if (witness_watch > 1 && w->w_level > w1->w_level) {
652		mtx_unlock_spin(&w_mtx);
653		goto out;
654	}
655	/*
656	 * If we know that the the lock we are acquiring comes after
657	 * the lock we most recently acquired in the lock order tree,
658	 * then there is no need for any further checks.
659	 */
660	if (isitmydescendant(w1, w)) {
661		mtx_unlock_spin(&w_mtx);
662		goto out;
663	}
664	for (j = 0, lle = *lock_list; lle != NULL; lle = lle->ll_next) {
665		for (i = lle->ll_count - 1; i >= 0; i--, j++) {
666
667			MPASS(j < WITNESS_COUNT);
668			lock1 = &lle->ll_children[i];
669			w1 = lock1->li_lock->lo_witness;
670
671			/*
672			 * If this lock doesn't undergo witness checking,
673			 * then skip it.
674			 */
675			if (w1 == NULL) {
676				KASSERT((lock1->li_lock->lo_flags & LO_WITNESS) == 0,
677				    ("lock missing witness structure"));
678				continue;
679			}
680			/*
681			 * If we are locking Giant and this is a sleepable
682			 * lock, then skip it.
683			 */
684			if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0 &&
685			    lock == &Giant.mtx_object)
686				continue;
687			/*
688			 * If we are locking a sleepable lock and this lock
689			 * is Giant, then skip it.
690			 */
691			if ((lock->lo_flags & LO_SLEEPABLE) != 0 &&
692			    lock1->li_lock == &Giant.mtx_object)
693				continue;
694			/*
695			 * If we are locking a sleepable lock and this lock
696			 * isn't sleepable, we want to treat it as a lock
697			 * order violation to enfore a general lock order of
698			 * sleepable locks before non-sleepable locks.
699			 */
700			if (!((lock->lo_flags & LO_SLEEPABLE) != 0 &&
701			    (lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0))
702			    /*
703			     * Check the lock order hierarchy for a reveresal.
704			     */
705			    if (!isitmydescendant(w, w1))
706				continue;
707			/*
708			 * We have a lock order violation, check to see if it
709			 * is allowed or has already been yelled about.
710			 */
711			mtx_unlock_spin(&w_mtx);
712#ifdef BLESSING
713			if (blessed(w, w1))
714				goto out;
715#endif
716			if (lock1->li_lock == &Giant.mtx_object) {
717				if (w1->w_Giant_squawked)
718					goto out;
719				else
720					w1->w_Giant_squawked = 1;
721			} else {
722				if (w1->w_other_squawked)
723					goto out;
724				else
725					w1->w_other_squawked = 1;
726			}
727			/*
728			 * Ok, yell about it.
729			 */
730			printf("lock order reversal\n");
731			/*
732			 * Try to locate an earlier lock with
733			 * witness w in our list.
734			 */
735			do {
736				lock2 = &lle->ll_children[i];
737				MPASS(lock2->li_lock != NULL);
738				if (lock2->li_lock->lo_witness == w)
739					break;
740				i--;
741				if (i == 0 && lle->ll_next != NULL) {
742					lle = lle->ll_next;
743					i = lle->ll_count - 1;
744					MPASS(i >= 0 && i < LOCK_NCHILDREN);
745				}
746			} while (i >= 0);
747			if (i < 0) {
748				printf(" 1st %p %s (%s) @ %s:%d\n",
749				    lock1->li_lock, lock1->li_lock->lo_name,
750				    lock1->li_lock->lo_type, lock1->li_file,
751				    lock1->li_line);
752				printf(" 2nd %p %s (%s) @ %s:%d\n", lock,
753				    lock->lo_name, lock->lo_type, file, line);
754			} else {
755				printf(" 1st %p %s (%s) @ %s:%d\n",
756				    lock2->li_lock, lock2->li_lock->lo_name,
757				    lock2->li_lock->lo_type, lock2->li_file,
758				    lock2->li_line);
759				printf(" 2nd %p %s (%s) @ %s:%d\n",
760				    lock1->li_lock, lock1->li_lock->lo_name,
761				    lock1->li_lock->lo_type, lock1->li_file,
762				    lock1->li_line);
763				printf(" 3rd %p %s (%s) @ %s:%d\n", lock,
764				    lock->lo_name, lock->lo_type, file, line);
765			}
766#ifdef DDB
767			go_into_ddb = 1;
768#endif
769			goto out;
770		}
771	}
772	lock1 = &(*lock_list)->ll_children[(*lock_list)->ll_count - 1];
773	/*
774	 * Don't build a new relationship between a sleepable lock and
775	 * Giant if it is the wrong direction.  The real lock order is that
776	 * sleepable locks come before Giant.
777	 */
778	if (lock1->li_lock == &Giant.mtx_object &&
779	    (lock->lo_flags & LO_SLEEPABLE) != 0)
780		mtx_unlock_spin(&w_mtx);
781	else {
782		CTR3(KTR_WITNESS, "%s: adding %s as a child of %s", __func__,
783		    lock->lo_type, lock1->li_lock->lo_type);
784		if (!itismychild(lock1->li_lock->lo_witness, w))
785			mtx_unlock_spin(&w_mtx);
786	}
787
788out:
789#ifdef DDB
790	if (go_into_ddb) {
791		if (witness_trace)
792			backtrace();
793		if (witness_ddb)
794			Debugger(__func__);
795	}
796#endif
797	w->w_file = file;
798	w->w_line = line;
799
800	lle = *lock_list;
801	if (lle == NULL || lle->ll_count == LOCK_NCHILDREN) {
802		lle = witness_lock_list_get();
803		if (lle == NULL)
804			return;
805		lle->ll_next = *lock_list;
806		CTR3(KTR_WITNESS, "%s: pid %d added lle %p", __func__,
807		    td->td_proc->p_pid, lle);
808		*lock_list = lle;
809	}
810	lock1 = &lle->ll_children[lle->ll_count++];
811	lock1->li_lock = lock;
812	lock1->li_line = line;
813	lock1->li_file = file;
814	if ((flags & LOP_EXCLUSIVE) != 0)
815		lock1->li_flags = LI_EXCLUSIVE;
816	else
817		lock1->li_flags = 0;
818	CTR4(KTR_WITNESS, "%s: pid %d added %s as lle[%d]", __func__,
819	    td->td_proc->p_pid, lock->lo_name, lle->ll_count - 1);
820}
821
822void
823witness_upgrade(struct lock_object *lock, int flags, const char *file, int line)
824{
825	struct lock_instance *instance;
826	struct lock_class *class;
827
828	KASSERT(!witness_cold, ("%s: witness_cold", __func__));
829	if (lock->lo_witness == NULL || witness_dead || panicstr != NULL)
830		return;
831	class = lock->lo_class;
832	file = fixup_filename(file);
833	if ((lock->lo_flags & LO_UPGRADABLE) == 0)
834		panic("upgrade of non-upgradable lock (%s) %s @ %s:%d",
835		    class->lc_name, lock->lo_name, file, line);
836	if ((flags & LOP_TRYLOCK) == 0)
837		panic("non-try upgrade of lock (%s) %s @ %s:%d", class->lc_name,
838		    lock->lo_name, file, line);
839	if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0)
840		panic("upgrade of non-sleep lock (%s) %s @ %s:%d",
841		    class->lc_name, lock->lo_name, file, line);
842	instance = find_instance(curthread->td_sleeplocks, lock);
843	if (instance == NULL)
844		panic("upgrade of unlocked lock (%s) %s @ %s:%d",
845		    class->lc_name, lock->lo_name, file, line);
846	if ((instance->li_flags & LI_EXCLUSIVE) != 0)
847		panic("upgrade of exclusive lock (%s) %s @ %s:%d",
848		    class->lc_name, lock->lo_name, file, line);
849	if ((instance->li_flags & LI_RECURSEMASK) != 0)
850		panic("upgrade of recursed lock (%s) %s r=%d @ %s:%d",
851		    class->lc_name, lock->lo_name,
852		    instance->li_flags & LI_RECURSEMASK, file, line);
853	instance->li_flags |= LI_EXCLUSIVE;
854}
855
856void
857witness_downgrade(struct lock_object *lock, int flags, const char *file,
858    int line)
859{
860	struct lock_instance *instance;
861	struct lock_class *class;
862
863	KASSERT(!witness_cold, ("%s: witness_cold", __func__));
864	if (lock->lo_witness == NULL || witness_dead || panicstr != NULL)
865		return;
866	class = lock->lo_class;
867	file = fixup_filename(file);
868	if ((lock->lo_flags & LO_UPGRADABLE) == 0)
869		panic("downgrade of non-upgradable lock (%s) %s @ %s:%d",
870		    class->lc_name, lock->lo_name, file, line);
871	if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0)
872		panic("downgrade of non-sleep lock (%s) %s @ %s:%d",
873		    class->lc_name, lock->lo_name, file, line);
874	instance = find_instance(curthread->td_sleeplocks, lock);
875	if (instance == NULL)
876		panic("downgrade of unlocked lock (%s) %s @ %s:%d",
877		    class->lc_name, lock->lo_name, file, line);
878	if ((instance->li_flags & LI_EXCLUSIVE) == 0)
879		panic("downgrade of shared lock (%s) %s @ %s:%d",
880		    class->lc_name, lock->lo_name, file, line);
881	if ((instance->li_flags & LI_RECURSEMASK) != 0)
882		panic("downgrade of recursed lock (%s) %s r=%d @ %s:%d",
883		    class->lc_name, lock->lo_name,
884		    instance->li_flags & LI_RECURSEMASK, file, line);
885	instance->li_flags &= ~LI_EXCLUSIVE;
886}
887
888void
889witness_unlock(struct lock_object *lock, int flags, const char *file, int line)
890{
891	struct lock_list_entry **lock_list, *lle;
892	struct lock_instance *instance;
893	struct lock_class *class;
894	struct thread *td;
895	register_t s;
896	int i, j;
897
898	if (witness_cold || witness_dead || lock->lo_witness == NULL ||
899	    panicstr != NULL)
900		return;
901	td = curthread;
902	class = lock->lo_class;
903	file = fixup_filename(file);
904	if (class->lc_flags & LC_SLEEPLOCK)
905		lock_list = &td->td_sleeplocks;
906	else
907		lock_list = PCPU_PTR(spinlocks);
908	for (; *lock_list != NULL; lock_list = &(*lock_list)->ll_next)
909		for (i = 0; i < (*lock_list)->ll_count; i++) {
910			instance = &(*lock_list)->ll_children[i];
911			if (instance->li_lock == lock) {
912				if ((instance->li_flags & LI_EXCLUSIVE) != 0 &&
913				    (flags & LOP_EXCLUSIVE) == 0) {
914					printf(
915					"shared unlock of (%s) %s @ %s:%d\n",
916					    class->lc_name, lock->lo_name,
917					    file, line);
918					printf(
919					"while exclusively locked from %s:%d\n",
920					    instance->li_file,
921					    instance->li_line);
922					panic("excl->ushare");
923				}
924				if ((instance->li_flags & LI_EXCLUSIVE) == 0 &&
925				    (flags & LOP_EXCLUSIVE) != 0) {
926					printf(
927					"exclusive unlock of (%s) %s @ %s:%d\n",
928					    class->lc_name, lock->lo_name,
929					    file, line);
930					printf(
931					"while share locked from %s:%d\n",
932					    instance->li_file,
933					    instance->li_line);
934					panic("share->uexcl");
935				}
936				/* If we are recursed, unrecurse. */
937				if ((instance->li_flags & LI_RECURSEMASK) > 0) {
938					CTR4(KTR_WITNESS,
939				    "%s: pid %d unrecursed on %s r=%d", __func__,
940					    td->td_proc->p_pid,
941					    instance->li_lock->lo_name,
942					    instance->li_flags);
943					instance->li_flags--;
944					return;
945				}
946				s = intr_disable();
947				CTR4(KTR_WITNESS,
948				    "%s: pid %d removed %s from lle[%d]", __func__,
949				    td->td_proc->p_pid,
950				    instance->li_lock->lo_name,
951				    (*lock_list)->ll_count - 1);
952				for (j = i; j < (*lock_list)->ll_count - 1; j++)
953					(*lock_list)->ll_children[j] =
954					    (*lock_list)->ll_children[j + 1];
955				(*lock_list)->ll_count--;
956				intr_restore(s);
957				if ((*lock_list)->ll_count == 0) {
958					lle = *lock_list;
959					*lock_list = lle->ll_next;
960					CTR3(KTR_WITNESS,
961					    "%s: pid %d removed lle %p", __func__,
962					    td->td_proc->p_pid, lle);
963					witness_lock_list_free(lle);
964				}
965				return;
966			}
967		}
968	panic("lock (%s) %s not locked @ %s:%d", class->lc_name, lock->lo_name,
969	    file, line);
970}
971
972/*
973 * Warn if any locks other than 'lock' are held.  Flags can be passed in to
974 * exempt Giant and sleepable locks from the checks as well.  If any
975 * non-exempt locks are held, then a supplied message is printed to the
976 * console along with a list of the offending locks.  If indicated in the
977 * flags then a failure results in a panic as well.
978 */
979int
980witness_warn(int flags, struct lock_object *lock, const char *fmt, ...)
981{
982	struct lock_list_entry *lle;
983	struct lock_instance *lock1;
984	struct thread *td;
985	va_list ap;
986	int i, n;
987
988	if (witness_cold || witness_dead || panicstr != NULL)
989		return (0);
990	n = 0;
991	td = curthread;
992	for (lle = td->td_sleeplocks; lle != NULL; lle = lle->ll_next)
993		for (i = lle->ll_count - 1; i >= 0; i--) {
994			lock1 = &lle->ll_children[i];
995			if (lock1->li_lock == lock)
996				continue;
997			if (flags & WARN_GIANTOK &&
998			    lock1->li_lock == &Giant.mtx_object)
999				continue;
1000			if (flags & WARN_SLEEPOK &&
1001			    (lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0)
1002				continue;
1003			if (n == 0) {
1004				va_start(ap, fmt);
1005				vprintf(fmt, ap);
1006				va_end(ap);
1007				printf(" with the following");
1008				if (flags & WARN_SLEEPOK)
1009					printf(" non-sleepable");
1010				printf("locks held:\n");
1011			}
1012			n++;
1013			witness_list_lock(lock1);
1014		}
1015	if (PCPU_GET(spinlocks) != NULL) {
1016		/*
1017		 * Since we already hold a spinlock preemption is
1018		 * already blocked.
1019		 */
1020		if (n == 0) {
1021			va_start(ap, fmt);
1022			vprintf(fmt, ap);
1023			va_end(ap);
1024			printf(" with the following");
1025			if (flags & WARN_SLEEPOK)
1026				printf(" non-sleepable");
1027			printf("locks held:\n");
1028		}
1029		n += witness_list_locks(PCPU_PTR(spinlocks));
1030	}
1031	if (flags & WARN_PANIC && n)
1032		panic("witness_warn");
1033#ifdef DDB
1034	else if (witness_ddb && n)
1035		Debugger(__func__);
1036#endif
1037	return (n);
1038}
1039
1040const char *
1041witness_file(struct lock_object *lock)
1042{
1043	struct witness *w;
1044
1045	if (witness_cold || witness_dead || lock->lo_witness == NULL)
1046		return ("?");
1047	w = lock->lo_witness;
1048	return (w->w_file);
1049}
1050
1051int
1052witness_line(struct lock_object *lock)
1053{
1054	struct witness *w;
1055
1056	if (witness_cold || witness_dead || lock->lo_witness == NULL)
1057		return (0);
1058	w = lock->lo_witness;
1059	return (w->w_line);
1060}
1061
1062static struct witness *
1063enroll(const char *description, struct lock_class *lock_class)
1064{
1065	struct witness *w;
1066
1067	if (!witness_watch || witness_dead || panicstr != NULL)
1068		return (NULL);
1069	if ((lock_class->lc_flags & LC_SPINLOCK) && witness_skipspin)
1070		return (NULL);
1071	mtx_lock_spin(&w_mtx);
1072	STAILQ_FOREACH(w, &w_all, w_list) {
1073		if (w->w_name == description || (w->w_refcount > 0 &&
1074		    strcmp(description, w->w_name) == 0)) {
1075			w->w_refcount++;
1076			mtx_unlock_spin(&w_mtx);
1077			if (lock_class != w->w_class)
1078				panic(
1079				"lock (%s) %s does not match earlier (%s) lock",
1080				    description, lock_class->lc_name,
1081				    w->w_class->lc_name);
1082			return (w);
1083		}
1084	}
1085	/*
1086	 * This isn't quite right, as witness_cold is still 0 while we
1087	 * enroll all the locks initialized before witness_initialize().
1088	 */
1089	if ((lock_class->lc_flags & LC_SPINLOCK) && !witness_cold) {
1090		mtx_unlock_spin(&w_mtx);
1091		panic("spin lock %s not in order list", description);
1092	}
1093	if ((w = witness_get()) == NULL)
1094		return (NULL);
1095	w->w_name = description;
1096	w->w_class = lock_class;
1097	w->w_refcount = 1;
1098	STAILQ_INSERT_HEAD(&w_all, w, w_list);
1099	if (lock_class->lc_flags & LC_SPINLOCK)
1100		STAILQ_INSERT_HEAD(&w_spin, w, w_typelist);
1101	else if (lock_class->lc_flags & LC_SLEEPLOCK)
1102		STAILQ_INSERT_HEAD(&w_sleep, w, w_typelist);
1103	else {
1104		mtx_unlock_spin(&w_mtx);
1105		panic("lock class %s is not sleep or spin",
1106		    lock_class->lc_name);
1107	}
1108	mtx_unlock_spin(&w_mtx);
1109	return (w);
1110}
1111
1112static int
1113itismychild(struct witness *parent, struct witness *child)
1114{
1115	static int recursed;
1116	struct witness_child_list_entry **wcl;
1117	struct witness_list *list;
1118
1119	MPASS(child != NULL && parent != NULL);
1120	if ((parent->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)) !=
1121	    (child->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)))
1122		panic(
1123		"%s: parent (%s) and child (%s) are not the same lock type",
1124		    __func__, parent->w_class->lc_name,
1125		    child->w_class->lc_name);
1126
1127	/*
1128	 * Insert "child" after "parent"
1129	 */
1130	wcl = &parent->w_children;
1131	while (*wcl != NULL && (*wcl)->wcl_count == WITNESS_NCHILDREN)
1132		wcl = &(*wcl)->wcl_next;
1133	if (*wcl == NULL) {
1134		*wcl = witness_child_get();
1135		if (*wcl == NULL)
1136			return (1);
1137	}
1138	(*wcl)->wcl_children[(*wcl)->wcl_count++] = child;
1139
1140	/*
1141	 * Now prune whole tree.  We look for cases where a lock is now
1142	 * both a descendant and a direct child of a given lock.  In that
1143	 * case, we want to remove the direct child link from the tree.
1144	 */
1145	if (recursed)
1146		return (0);
1147	recursed = 1;
1148	if (parent->w_class->lc_flags & LC_SLEEPLOCK)
1149		list = &w_sleep;
1150	else
1151		list = &w_spin;
1152	STAILQ_FOREACH(child, list, w_typelist) {
1153		STAILQ_FOREACH(parent, list, w_typelist) {
1154			if (!isitmychild(parent, child))
1155				continue;
1156			removechild(parent, child);
1157			if (isitmydescendant(parent, child))
1158				continue;
1159			itismychild(parent, child);
1160		}
1161	}
1162	recursed = 0;
1163	witness_levelall();
1164	return (0);
1165}
1166
1167static void
1168removechild(struct witness *parent, struct witness *child)
1169{
1170	struct witness_child_list_entry **wcl, *wcl1;
1171	int i;
1172
1173	for (wcl = &parent->w_children; *wcl != NULL; wcl = &(*wcl)->wcl_next)
1174		for (i = 0; i < (*wcl)->wcl_count; i++)
1175			if ((*wcl)->wcl_children[i] == child)
1176				goto found;
1177	return;
1178found:
1179	(*wcl)->wcl_count--;
1180	if ((*wcl)->wcl_count > i)
1181		(*wcl)->wcl_children[i] =
1182		    (*wcl)->wcl_children[(*wcl)->wcl_count];
1183	MPASS((*wcl)->wcl_children[i] != NULL);
1184	if ((*wcl)->wcl_count != 0)
1185		return;
1186	wcl1 = *wcl;
1187	*wcl = wcl1->wcl_next;
1188	witness_child_free(wcl1);
1189}
1190
1191static int
1192isitmychild(struct witness *parent, struct witness *child)
1193{
1194	struct witness_child_list_entry *wcl;
1195	int i;
1196
1197	for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) {
1198		for (i = 0; i < wcl->wcl_count; i++) {
1199			if (wcl->wcl_children[i] == child)
1200				return (1);
1201		}
1202	}
1203	return (0);
1204}
1205
1206static int
1207isitmydescendant(struct witness *parent, struct witness *child)
1208{
1209	struct witness_child_list_entry *wcl;
1210	int i, j;
1211
1212	if (isitmychild(parent, child))
1213		return (1);
1214	j = 0;
1215	for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) {
1216		MPASS(j < 1000);
1217		for (i = 0; i < wcl->wcl_count; i++) {
1218			if (isitmydescendant(wcl->wcl_children[i], child))
1219				return (1);
1220		}
1221		j++;
1222	}
1223	return (0);
1224}
1225
1226static void
1227witness_levelall (void)
1228{
1229	struct witness_list *list;
1230	struct witness *w, *w1;
1231
1232	/*
1233	 * First clear all levels.
1234	 */
1235	STAILQ_FOREACH(w, &w_all, w_list) {
1236		w->w_level = 0;
1237	}
1238
1239	/*
1240	 * Look for locks with no parent and level all their descendants.
1241	 */
1242	STAILQ_FOREACH(w, &w_all, w_list) {
1243		/*
1244		 * This is just an optimization, technically we could get
1245		 * away just walking the all list each time.
1246		 */
1247		if (w->w_class->lc_flags & LC_SLEEPLOCK)
1248			list = &w_sleep;
1249		else
1250			list = &w_spin;
1251		STAILQ_FOREACH(w1, list, w_typelist) {
1252			if (isitmychild(w1, w))
1253				goto skip;
1254		}
1255		witness_leveldescendents(w, 0);
1256	skip:
1257		;	/* silence GCC 3.x */
1258	}
1259}
1260
1261static void
1262witness_leveldescendents(struct witness *parent, int level)
1263{
1264	struct witness_child_list_entry *wcl;
1265	int i;
1266
1267	if (parent->w_level < level)
1268		parent->w_level = level;
1269	level++;
1270	for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next)
1271		for (i = 0; i < wcl->wcl_count; i++)
1272			witness_leveldescendents(wcl->wcl_children[i], level);
1273}
1274
1275static void
1276witness_displaydescendants(void(*prnt)(const char *fmt, ...),
1277			   struct witness *parent)
1278{
1279	struct witness_child_list_entry *wcl;
1280	int i, level;
1281
1282	level = parent->w_level;
1283	prnt("%-2d", level);
1284	for (i = 0; i < level; i++)
1285		prnt(" ");
1286	if (parent->w_refcount > 0) {
1287		prnt("%s", parent->w_name);
1288		if (parent->w_file != NULL)
1289			prnt(" -- last acquired @ %s:%d\n", parent->w_file,
1290			    parent->w_line);
1291	} else
1292		prnt("(dead)\n");
1293	for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next)
1294		for (i = 0; i < wcl->wcl_count; i++)
1295			    witness_displaydescendants(prnt,
1296				wcl->wcl_children[i]);
1297}
1298
1299#ifdef BLESSING
1300static int
1301blessed(struct witness *w1, struct witness *w2)
1302{
1303	int i;
1304	struct witness_blessed *b;
1305
1306	for (i = 0; i < blessed_count; i++) {
1307		b = &blessed_list[i];
1308		if (strcmp(w1->w_name, b->b_lock1) == 0) {
1309			if (strcmp(w2->w_name, b->b_lock2) == 0)
1310				return (1);
1311			continue;
1312		}
1313		if (strcmp(w1->w_name, b->b_lock2) == 0)
1314			if (strcmp(w2->w_name, b->b_lock1) == 0)
1315				return (1);
1316	}
1317	return (0);
1318}
1319#endif
1320
1321static struct witness *
1322witness_get(void)
1323{
1324	struct witness *w;
1325
1326	if (witness_dead) {
1327		mtx_unlock_spin(&w_mtx);
1328		return (NULL);
1329	}
1330	if (STAILQ_EMPTY(&w_free)) {
1331		witness_dead = 1;
1332		mtx_unlock_spin(&w_mtx);
1333		printf("%s: witness exhausted\n", __func__);
1334		return (NULL);
1335	}
1336	w = STAILQ_FIRST(&w_free);
1337	STAILQ_REMOVE_HEAD(&w_free, w_list);
1338	bzero(w, sizeof(*w));
1339	return (w);
1340}
1341
1342static void
1343witness_free(struct witness *w)
1344{
1345
1346	STAILQ_INSERT_HEAD(&w_free, w, w_list);
1347}
1348
1349static struct witness_child_list_entry *
1350witness_child_get(void)
1351{
1352	struct witness_child_list_entry *wcl;
1353
1354	if (witness_dead) {
1355		mtx_unlock_spin(&w_mtx);
1356		return (NULL);
1357	}
1358	wcl = w_child_free;
1359	if (wcl == NULL) {
1360		witness_dead = 1;
1361		mtx_unlock_spin(&w_mtx);
1362		printf("%s: witness exhausted\n", __func__);
1363		return (NULL);
1364	}
1365	w_child_free = wcl->wcl_next;
1366	bzero(wcl, sizeof(*wcl));
1367	return (wcl);
1368}
1369
1370static void
1371witness_child_free(struct witness_child_list_entry *wcl)
1372{
1373
1374	wcl->wcl_next = w_child_free;
1375	w_child_free = wcl;
1376}
1377
1378static struct lock_list_entry *
1379witness_lock_list_get(void)
1380{
1381	struct lock_list_entry *lle;
1382
1383	if (witness_dead)
1384		return (NULL);
1385	mtx_lock_spin(&w_mtx);
1386	lle = w_lock_list_free;
1387	if (lle == NULL) {
1388		witness_dead = 1;
1389		mtx_unlock_spin(&w_mtx);
1390		printf("%s: witness exhausted\n", __func__);
1391		return (NULL);
1392	}
1393	w_lock_list_free = lle->ll_next;
1394	mtx_unlock_spin(&w_mtx);
1395	bzero(lle, sizeof(*lle));
1396	return (lle);
1397}
1398
1399static void
1400witness_lock_list_free(struct lock_list_entry *lle)
1401{
1402
1403	mtx_lock_spin(&w_mtx);
1404	lle->ll_next = w_lock_list_free;
1405	w_lock_list_free = lle;
1406	mtx_unlock_spin(&w_mtx);
1407}
1408
1409static struct lock_instance *
1410find_instance(struct lock_list_entry *lock_list, struct lock_object *lock)
1411{
1412	struct lock_list_entry *lle;
1413	struct lock_instance *instance;
1414	int i;
1415
1416	for (lle = lock_list; lle != NULL; lle = lle->ll_next)
1417		for (i = lle->ll_count - 1; i >= 0; i--) {
1418			instance = &lle->ll_children[i];
1419			if (instance->li_lock == lock)
1420				return (instance);
1421		}
1422	return (NULL);
1423}
1424
1425static void
1426witness_list_lock(struct lock_instance *instance)
1427{
1428	struct lock_object *lock;
1429
1430	lock = instance->li_lock;
1431	printf("%s %s %s", (instance->li_flags & LI_EXCLUSIVE) != 0 ?
1432	    "exclusive" : "shared", lock->lo_class->lc_name, lock->lo_name);
1433	if (lock->lo_type != lock->lo_name)
1434		printf(" (%s)", lock->lo_type);
1435	printf(" r = %d (%p) locked @ %s:%d\n",
1436	    instance->li_flags & LI_RECURSEMASK, lock, instance->li_file,
1437	    instance->li_line);
1438}
1439
1440int
1441witness_list_locks(struct lock_list_entry **lock_list)
1442{
1443	struct lock_list_entry *lle;
1444	int i, nheld;
1445
1446	nheld = 0;
1447	for (lle = *lock_list; lle != NULL; lle = lle->ll_next)
1448		for (i = lle->ll_count - 1; i >= 0; i--) {
1449			witness_list_lock(&lle->ll_children[i]);
1450			nheld++;
1451		}
1452	return (nheld);
1453}
1454
1455void
1456witness_save(struct lock_object *lock, const char **filep, int *linep)
1457{
1458	struct lock_instance *instance;
1459
1460	KASSERT(!witness_cold, ("%s: witness_cold", __func__));
1461	if (lock->lo_witness == NULL || witness_dead || panicstr != NULL)
1462		return;
1463	if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0)
1464		panic("%s: lock (%s) %s is not a sleep lock", __func__,
1465		    lock->lo_class->lc_name, lock->lo_name);
1466	instance = find_instance(curthread->td_sleeplocks, lock);
1467	if (instance == NULL)
1468		panic("%s: lock (%s) %s not locked", __func__,
1469		    lock->lo_class->lc_name, lock->lo_name);
1470	*filep = instance->li_file;
1471	*linep = instance->li_line;
1472}
1473
1474void
1475witness_restore(struct lock_object *lock, const char *file, int line)
1476{
1477	struct lock_instance *instance;
1478
1479	KASSERT(!witness_cold, ("%s: witness_cold", __func__));
1480	if (lock->lo_witness == NULL || witness_dead || panicstr != NULL)
1481		return;
1482	if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0)
1483		panic("%s: lock (%s) %s is not a sleep lock", __func__,
1484		    lock->lo_class->lc_name, lock->lo_name);
1485	instance = find_instance(curthread->td_sleeplocks, lock);
1486	if (instance == NULL)
1487		panic("%s: lock (%s) %s not locked", __func__,
1488		    lock->lo_class->lc_name, lock->lo_name);
1489	lock->lo_witness->w_file = file;
1490	lock->lo_witness->w_line = line;
1491	instance->li_file = file;
1492	instance->li_line = line;
1493}
1494
1495void
1496witness_assert(struct lock_object *lock, int flags, const char *file, int line)
1497{
1498#ifdef INVARIANT_SUPPORT
1499	struct lock_instance *instance;
1500
1501	if (lock->lo_witness == NULL || witness_dead || panicstr != NULL)
1502		return;
1503	if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) != 0)
1504		instance = find_instance(curthread->td_sleeplocks, lock);
1505	else if ((lock->lo_class->lc_flags & LC_SPINLOCK) != 0)
1506		instance = find_instance(PCPU_GET(spinlocks), lock);
1507	else {
1508		panic("Lock (%s) %s is not sleep or spin!",
1509		    lock->lo_class->lc_name, lock->lo_name);
1510		return;
1511	}
1512	file = fixup_filename(file);
1513	switch (flags) {
1514	case LA_UNLOCKED:
1515		if (instance != NULL)
1516			panic("Lock (%s) %s locked @ %s:%d.",
1517			    lock->lo_class->lc_name, lock->lo_name, file, line);
1518		break;
1519	case LA_LOCKED:
1520	case LA_LOCKED | LA_RECURSED:
1521	case LA_LOCKED | LA_NOTRECURSED:
1522	case LA_SLOCKED:
1523	case LA_SLOCKED | LA_RECURSED:
1524	case LA_SLOCKED | LA_NOTRECURSED:
1525	case LA_XLOCKED:
1526	case LA_XLOCKED | LA_RECURSED:
1527	case LA_XLOCKED | LA_NOTRECURSED:
1528		if (instance == NULL) {
1529			panic("Lock (%s) %s not locked @ %s:%d.",
1530			    lock->lo_class->lc_name, lock->lo_name, file, line);
1531			break;
1532		}
1533		if ((flags & LA_XLOCKED) != 0 &&
1534		    (instance->li_flags & LI_EXCLUSIVE) == 0)
1535			panic("Lock (%s) %s not exclusively locked @ %s:%d.",
1536			    lock->lo_class->lc_name, lock->lo_name, file, line);
1537		if ((flags & LA_SLOCKED) != 0 &&
1538		    (instance->li_flags & LI_EXCLUSIVE) != 0)
1539			panic("Lock (%s) %s exclusively locked @ %s:%d.",
1540			    lock->lo_class->lc_name, lock->lo_name, file, line);
1541		if ((flags & LA_RECURSED) != 0 &&
1542		    (instance->li_flags & LI_RECURSEMASK) == 0)
1543			panic("Lock (%s) %s not recursed @ %s:%d.",
1544			    lock->lo_class->lc_name, lock->lo_name, file, line);
1545		if ((flags & LA_NOTRECURSED) != 0 &&
1546		    (instance->li_flags & LI_RECURSEMASK) != 0)
1547			panic("Lock (%s) %s recursed @ %s:%d.",
1548			    lock->lo_class->lc_name, lock->lo_name, file, line);
1549		break;
1550	default:
1551		panic("Invalid lock assertion at %s:%d.", file, line);
1552
1553	}
1554#endif	/* INVARIANT_SUPPORT */
1555}
1556
1557#ifdef DDB
1558static void
1559witness_list(struct thread *td)
1560{
1561
1562	KASSERT(!witness_cold, ("%s: witness_cold", __func__));
1563	KASSERT(db_active, ("%s: not in the debugger", __func__));
1564
1565	if (witness_dead)
1566		return;
1567
1568	witness_list_locks(&td->td_sleeplocks);
1569
1570	/*
1571	 * We only handle spinlocks if td == curthread.  This is somewhat broken
1572	 * if td is currently executing on some other CPU and holds spin locks
1573	 * as we won't display those locks.  If we had a MI way of getting
1574	 * the per-cpu data for a given cpu then we could use
1575	 * td->td_kse->ke_oncpu to get the list of spinlocks for this thread
1576	 * and "fix" this.
1577	 *
1578	 * That still wouldn't really fix this unless we locked sched_lock
1579	 * or stopped the other CPU to make sure it wasn't changing the list
1580	 * out from under us.  It is probably best to just not try to handle
1581	 * threads on other CPU's for now.
1582	 */
1583	if (td == curthread && PCPU_GET(spinlocks) != NULL)
1584		witness_list_locks(PCPU_PTR(spinlocks));
1585}
1586
1587DB_SHOW_COMMAND(locks, db_witness_list)
1588{
1589	struct thread *td;
1590	pid_t pid;
1591	struct proc *p;
1592
1593	if (have_addr) {
1594		pid = (addr % 16) + ((addr >> 4) % 16) * 10 +
1595		    ((addr >> 8) % 16) * 100 + ((addr >> 12) % 16) * 1000 +
1596		    ((addr >> 16) % 16) * 10000;
1597		/* sx_slock(&allproc_lock); */
1598		FOREACH_PROC_IN_SYSTEM(p) {
1599			if (p->p_pid == pid)
1600				break;
1601		}
1602		/* sx_sunlock(&allproc_lock); */
1603		if (p == NULL) {
1604			db_printf("pid %d not found\n", pid);
1605			return;
1606		}
1607		FOREACH_THREAD_IN_PROC(p, td) {
1608			witness_list(td);
1609		}
1610	} else {
1611		td = curthread;
1612		witness_list(td);
1613	}
1614}
1615
1616DB_SHOW_COMMAND(witness, db_witness_display)
1617{
1618
1619	witness_display(db_printf);
1620}
1621#endif
1622