subr_turnstile.c revision 72994
1/*-
2 * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 * 3. Berkeley Software Design Inc's name may not be used to endorse or
13 *    promote products derived from this software without specific prior
14 *    written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
29 *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
30 * $FreeBSD: head/sys/kern/subr_turnstile.c 72994 2001-02-24 19:36:13Z jhb $
31 */
32
33/*
34 * Machine independent bits of mutex implementation and implementation of
35 * `witness' structure & related debugging routines.
36 */
37
38/*
39 *	Main Entry: witness
40 *	Pronunciation: 'wit-n&s
41 *	Function: noun
42 *	Etymology: Middle English witnesse, from Old English witnes knowledge,
43 *	    testimony, witness, from 2wit
44 *	Date: before 12th century
45 *	1 : attestation of a fact or event : TESTIMONY
46 *	2 : one that gives evidence; specifically : one who testifies in
47 *	    a cause or before a judicial tribunal
48 *	3 : one asked to be present at a transaction so as to be able to
49 *	    testify to its having taken place
50 *	4 : one who has personal knowledge of something
51 *	5 a : something serving as evidence or proof : SIGN
52 *	  b : public affirmation by word or example of usually
53 *	      religious faith or conviction <the heroic witness to divine
54 *	      life -- Pilot>
55 *	6 capitalized : a member of the Jehovah's Witnesses
56 */
57
58#include "opt_ddb.h"
59#include "opt_witness.h"
60
61#include <sys/param.h>
62#include <sys/bus.h>
63#include <sys/kernel.h>
64#include <sys/malloc.h>
65#include <sys/proc.h>
66#include <sys/sysctl.h>
67#include <sys/systm.h>
68#include <sys/vmmeter.h>
69#include <sys/ktr.h>
70
71#include <machine/atomic.h>
72#include <machine/bus.h>
73#include <machine/clock.h>
74#include <machine/cpu.h>
75
76#include <ddb/ddb.h>
77
78#include <vm/vm.h>
79#include <vm/vm_extern.h>
80
81#include <sys/mutex.h>
82
83/*
84 * The WITNESS-enabled mutex debug structure.
85 */
86#ifdef WITNESS
87struct mtx_debug {
88	struct witness	*mtxd_witness;
89	LIST_ENTRY(mtx)	mtxd_held;
90	const char	*mtxd_file;
91	int		mtxd_line;
92};
93
94#define mtx_held	mtx_debug->mtxd_held
95#define	mtx_file	mtx_debug->mtxd_file
96#define	mtx_line	mtx_debug->mtxd_line
97#define	mtx_witness	mtx_debug->mtxd_witness
98#endif	/* WITNESS */
99
100/*
101 * Internal utility macros.
102 */
103#define mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
104
105#define mtx_owner(m)	(mtx_unowned((m)) ? NULL \
106	: (struct proc *)((m)->mtx_lock & MTX_FLAGMASK))
107
108#define SET_PRIO(p, pri)	(p)->p_pri.pri_level = (pri)
109
110/*
111 * Early WITNESS-enabled declarations.
112 */
113#ifdef WITNESS
114
115/*
116 * Internal WITNESS routines which must be prototyped early.
117 *
118 * XXX: When/if witness code is cleaned up, it would be wise to place all
119 *	witness prototyping early in this file.
120 */
121static void witness_init(struct mtx *, int flag);
122static void witness_destroy(struct mtx *);
123static void witness_display(void(*)(const char *fmt, ...));
124
125MALLOC_DEFINE(M_WITNESS, "witness", "witness mtx_debug structure");
126
127/* All mutexes in system (used for debug/panic) */
128static struct mtx_debug all_mtx_debug = { NULL, {NULL, NULL}, NULL, 0 };
129
130/*
131 * This global is set to 0 once it becomes safe to use the witness code.
132 */
133static int witness_cold = 1;
134
135#else	/* WITNESS */
136
137/* XXX XXX XXX
138 * flag++ is sleazoid way of shuting up warning
139 */
140#define witness_init(m, flag) flag++
141#define witness_destroy(m)
142#define witness_try_enter(m, t, f, l)
143#endif	/* WITNESS */
144
145/*
146 * All mutex locks in system are kept on the all_mtx list.
147 */
148static struct mtx all_mtx = { MTX_UNOWNED, 0, 0, 0, "All mutexes queue head",
149	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
150	{ NULL, NULL }, &all_mtx, &all_mtx,
151#ifdef WITNESS
152	&all_mtx_debug
153#else
154	NULL
155#endif
156	 };
157
158/*
159 * Global variables for book keeping.
160 */
161static int	mtx_cur_cnt;
162static int	mtx_max_cnt;
163
164/*
165 * Couple of strings for KTR_LOCK tracing in order to avoid duplicates.
166 */
167char	STR_mtx_lock_slp[] = "GOT (sleep) %s [%p] r=%d at %s:%d";
168char	STR_mtx_unlock_slp[] = "REL (sleep) %s [%p] r=%d at %s:%d";
169char	STR_mtx_lock_spn[] = "GOT (spin) %s [%p] r=%d at %s:%d";
170char	STR_mtx_unlock_spn[] = "REL (spin) %s [%p] r=%d at %s:%d";
171
172/*
173 * Prototypes for non-exported routines.
174 *
175 * NOTE: Prototypes for witness routines are placed at the bottom of the file.
176 */
177static void	propagate_priority(struct proc *);
178
179static void
180propagate_priority(struct proc *p)
181{
182	int pri = p->p_pri.pri_level;
183	struct mtx *m = p->p_blocked;
184
185	mtx_assert(&sched_lock, MA_OWNED);
186	for (;;) {
187		struct proc *p1;
188
189		p = mtx_owner(m);
190
191		if (p == NULL) {
192			/*
193			 * This really isn't quite right. Really
194			 * ought to bump priority of process that
195			 * next acquires the mutex.
196			 */
197			MPASS(m->mtx_lock == MTX_CONTESTED);
198			return;
199		}
200
201		MPASS(p->p_magic == P_MAGIC);
202		KASSERT(p->p_stat != SSLEEP, ("sleeping process owns a mutex"));
203		if (p->p_pri.pri_level <= pri)
204			return;
205
206		/*
207		 * Bump this process' priority.
208		 */
209		SET_PRIO(p, pri);
210
211		/*
212		 * If lock holder is actually running, just bump priority.
213		 */
214		if (p->p_oncpu != NOCPU) {
215			MPASS(p->p_stat == SRUN || p->p_stat == SZOMB);
216			return;
217		}
218
219		/*
220		 * If on run queue move to new run queue, and
221		 * quit.
222		 */
223		if (p->p_stat == SRUN) {
224			MPASS(p->p_blocked == NULL);
225			remrunqueue(p);
226			setrunqueue(p);
227			return;
228		}
229
230		/*
231		 * If we aren't blocked on a mutex, we should be.
232		 */
233		KASSERT(p->p_stat == SMTX, (
234		    "process %d(%s):%d holds %s but isn't blocked on a mutex\n",
235		    p->p_pid, p->p_comm, p->p_stat,
236		    m->mtx_description));
237
238		/*
239		 * Pick up the mutex that p is blocked on.
240		 */
241		m = p->p_blocked;
242		MPASS(m != NULL);
243
244		/*
245		 * Check if the proc needs to be moved up on
246		 * the blocked chain
247		 */
248		if (p == TAILQ_FIRST(&m->mtx_blocked)) {
249			continue;
250		}
251
252		p1 = TAILQ_PREV(p, procqueue, p_procq);
253		if (p1->p_pri.pri_level <= pri) {
254			continue;
255		}
256
257		/*
258		 * Remove proc from blocked chain and determine where
259		 * it should be moved up to.  Since we know that p1 has
260		 * a lower priority than p, we know that at least one
261		 * process in the chain has a lower priority and that
262		 * p1 will thus not be NULL after the loop.
263		 */
264		TAILQ_REMOVE(&m->mtx_blocked, p, p_procq);
265		TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) {
266			MPASS(p1->p_magic == P_MAGIC);
267			if (p1->p_pri.pri_level > pri)
268				break;
269		}
270
271		MPASS(p1 != NULL);
272		TAILQ_INSERT_BEFORE(p1, p, p_procq);
273		CTR4(KTR_LOCK,
274		    "propagate_priority: p %p moved before %p on [%p] %s",
275		    p, p1, m, m->mtx_description);
276	}
277}
278
279/*
280 * The important part of mtx_trylock{,_flags}()
281 * Tries to acquire lock `m.' We do NOT handle recursion here; we assume that
282 * if we're called, it's because we know we don't already own this lock.
283 */
284int
285_mtx_trylock(struct mtx *m, int opts, const char *file, int line)
286{
287	int rval;
288
289	MPASS(curproc != NULL);
290
291	/*
292	 * _mtx_trylock does not accept MTX_NOSWITCH option.
293	 */
294	KASSERT((opts & MTX_NOSWITCH) == 0,
295	    ("mtx_trylock() called with invalid option flag(s) %d", opts));
296
297	rval = _obtain_lock(m, curproc);
298
299#ifdef WITNESS
300	if (rval && m->mtx_witness != NULL) {
301		/*
302		 * We do not handle recursion in _mtx_trylock; see the
303		 * note at the top of the routine.
304		 */
305		KASSERT(!mtx_recursed(m),
306		    ("mtx_trylock() called on a recursed mutex"));
307		witness_try_enter(m, (opts | m->mtx_flags), file, line);
308	}
309#endif	/* WITNESS */
310
311	if ((opts & MTX_QUIET) == 0)
312		CTR5(KTR_LOCK, "TRY_LOCK %s [%p] result=%d at %s:%d",
313		    m->mtx_description, m, rval, file, line);
314
315	return rval;
316}
317
318/*
319 * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
320 *
321 * We call this if the lock is either contested (i.e. we need to go to
322 * sleep waiting for it), or if we need to recurse on it.
323 */
324void
325_mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line)
326{
327	struct proc *p = curproc;
328
329	if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)p) {
330		m->mtx_recurse++;
331		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
332		if ((opts & MTX_QUIET) == 0)
333			CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
334		return;
335	}
336
337	if ((opts & MTX_QUIET) == 0)
338		CTR4(KTR_LOCK,
339		    "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d",
340		    m->mtx_description, (void *)m->mtx_lock, file, line);
341
342	/*
343	 * Save our priority. Even though p_nativepri is protected by
344	 * sched_lock, we don't obtain it here as it can be expensive.
345	 * Since this is the only place p_nativepri is set, and since two
346	 * CPUs will not be executing the same process concurrently, we know
347	 * that no other CPU is going to be messing with this. Also,
348	 * p_nativepri is only read when we are blocked on a mutex, so that
349	 * can't be happening right now either.
350	 */
351	p->p_pri.pri_native = p->p_pri.pri_level;
352
353	while (!_obtain_lock(m, p)) {
354		uintptr_t v;
355		struct proc *p1;
356
357		mtx_lock_spin(&sched_lock);
358		/*
359		 * Check if the lock has been released while spinning for
360		 * the sched_lock.
361		 */
362		if ((v = m->mtx_lock) == MTX_UNOWNED) {
363			mtx_unlock_spin(&sched_lock);
364			continue;
365		}
366
367		/*
368		 * The mutex was marked contested on release. This means that
369		 * there are processes blocked on it.
370		 */
371		if (v == MTX_CONTESTED) {
372			p1 = TAILQ_FIRST(&m->mtx_blocked);
373			MPASS(p1 != NULL);
374			m->mtx_lock = (uintptr_t)p | MTX_CONTESTED;
375
376			if (p1->p_pri.pri_level < p->p_pri.pri_level)
377				SET_PRIO(p, p1->p_pri.pri_level);
378			mtx_unlock_spin(&sched_lock);
379			return;
380		}
381
382		/*
383		 * If the mutex isn't already contested and a failure occurs
384		 * setting the contested bit, the mutex was either released
385		 * or the state of the MTX_RECURSED bit changed.
386		 */
387		if ((v & MTX_CONTESTED) == 0 &&
388		    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
389			(void *)(v | MTX_CONTESTED))) {
390			mtx_unlock_spin(&sched_lock);
391			continue;
392		}
393
394		/*
395		 * We deffinately must sleep for this lock.
396		 */
397		mtx_assert(m, MA_NOTOWNED);
398
399#ifdef notyet
400		/*
401		 * If we're borrowing an interrupted thread's VM context, we
402		 * must clean up before going to sleep.
403		 */
404		if (p->p_ithd != NULL) {
405			struct ithd *it = p->p_ithd;
406
407			if (it->it_interrupted) {
408				if ((opts & MTX_QUIET) == 0)
409					CTR2(KTR_LOCK,
410				    "_mtx_lock_sleep: %p interrupted %p",
411					    it, it->it_interrupted);
412				intr_thd_fixup(it);
413			}
414		}
415#endif
416
417		/*
418		 * Put us on the list of threads blocked on this mutex.
419		 */
420		if (TAILQ_EMPTY(&m->mtx_blocked)) {
421			p1 = (struct proc *)(m->mtx_lock & MTX_FLAGMASK);
422			LIST_INSERT_HEAD(&p1->p_contested, m, mtx_contested);
423			TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
424		} else {
425			TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq)
426				if (p1->p_pri.pri_level > p->p_pri.pri_level)
427					break;
428			if (p1)
429				TAILQ_INSERT_BEFORE(p1, p, p_procq);
430			else
431				TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
432		}
433
434		/*
435		 * Save who we're blocked on.
436		 */
437		p->p_blocked = m;
438		p->p_mtxname = m->mtx_description;
439		p->p_stat = SMTX;
440		propagate_priority(p);
441
442		if ((opts & MTX_QUIET) == 0)
443			CTR3(KTR_LOCK,
444			    "_mtx_lock_sleep: p %p blocked on [%p] %s", p, m,
445			    m->mtx_description);
446
447		mi_switch();
448
449		if ((opts & MTX_QUIET) == 0)
450			CTR3(KTR_LOCK,
451			  "_mtx_lock_sleep: p %p free from blocked on [%p] %s",
452			  p, m, m->mtx_description);
453
454		mtx_unlock_spin(&sched_lock);
455	}
456
457	return;
458}
459
460/*
461 * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock.
462 *
463 * This is only called if we need to actually spin for the lock. Recursion
464 * is handled inline.
465 */
466void
467_mtx_lock_spin(struct mtx *m, int opts, u_int mtx_intr, const char *file,
468	       int line)
469{
470	int i = 0;
471
472	if ((opts & MTX_QUIET) == 0)
473		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
474
475	for (;;) {
476		if (_obtain_lock(m, curproc))
477			break;
478
479		while (m->mtx_lock != MTX_UNOWNED) {
480			if (i++ < 1000000)
481				continue;
482			if (i++ < 6000000)
483				DELAY(1);
484#ifdef DDB
485			else if (!db_active)
486#else
487			else
488#endif
489			panic("spin lock %s held by %p for > 5 seconds",
490			    m->mtx_description, (void *)m->mtx_lock);
491		}
492	}
493
494	m->mtx_saveintr = mtx_intr;
495	if ((opts & MTX_QUIET) == 0)
496		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
497
498	return;
499}
500
501/*
502 * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
503 *
504 * We are only called here if the lock is recursed or contested (i.e. we
505 * need to wake up a blocked thread).
506 */
507void
508_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
509{
510	struct proc *p, *p1;
511	struct mtx *m1;
512	int pri;
513
514	p = curproc;
515	MPASS4(mtx_owned(m), "mtx_owned(mpp)", file, line);
516
517	if (mtx_recursed(m)) {
518		if (--(m->mtx_recurse) == 0)
519			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
520		if ((opts & MTX_QUIET) == 0)
521			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m);
522		return;
523	}
524
525	mtx_lock_spin(&sched_lock);
526	if ((opts & MTX_QUIET) == 0)
527		CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
528
529	p1 = TAILQ_FIRST(&m->mtx_blocked);
530	MPASS(p->p_magic == P_MAGIC);
531	MPASS(p1->p_magic == P_MAGIC);
532
533	TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq);
534
535	if (TAILQ_EMPTY(&m->mtx_blocked)) {
536		LIST_REMOVE(m, mtx_contested);
537		_release_lock_quick(m);
538		if ((opts & MTX_QUIET) == 0)
539			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p not held", m);
540	} else
541		atomic_store_rel_ptr(&m->mtx_lock, (void *)MTX_CONTESTED);
542
543	pri = PRI_MAX;
544	LIST_FOREACH(m1, &p->p_contested, mtx_contested) {
545		int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_pri.pri_level;
546		if (cp < pri)
547			pri = cp;
548	}
549
550	if (pri > p->p_pri.pri_native)
551		pri = p->p_pri.pri_native;
552	SET_PRIO(p, pri);
553
554	if ((opts & MTX_QUIET) == 0)
555		CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p contested setrunqueue %p",
556		    m, p1);
557
558	p1->p_blocked = NULL;
559	p1->p_stat = SRUN;
560	setrunqueue(p1);
561
562	if ((opts & MTX_NOSWITCH) == 0 && p1->p_pri.pri_level < pri) {
563#ifdef notyet
564		if (p->p_ithd != NULL) {
565			struct ithd *it = p->p_ithd;
566
567			if (it->it_interrupted) {
568				if ((opts & MTX_QUIET) == 0)
569					CTR2(KTR_LOCK,
570				    "_mtx_unlock_sleep: %p interrupted %p",
571					    it, it->it_interrupted);
572				intr_thd_fixup(it);
573			}
574		}
575#endif
576		setrunqueue(p);
577		if ((opts & MTX_QUIET) == 0)
578			CTR2(KTR_LOCK,
579			    "_mtx_unlock_sleep: %p switching out lock=%p", m,
580			    (void *)m->mtx_lock);
581
582		mi_switch();
583		if ((opts & MTX_QUIET) == 0)
584			CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p",
585			    m, (void *)m->mtx_lock);
586	}
587
588	mtx_unlock_spin(&sched_lock);
589
590	return;
591}
592
593/*
594 * All the unlocking of MTX_SPIN locks is done inline.
595 * See the _rel_spin_lock() macro for the details.
596 */
597
598/*
599 * The backing function for the INVARIANTS-enabled mtx_assert()
600 */
601#ifdef INVARIANTS_SUPPORT
602void
603_mtx_assert(struct mtx *m, int what, const char *file, int line)
604{
605	switch ((what)) {
606	case MA_OWNED:
607	case MA_OWNED | MA_RECURSED:
608	case MA_OWNED | MA_NOTRECURSED:
609		if (!mtx_owned((m)))
610			panic("mutex %s not owned at %s:%d",
611			    (m)->mtx_description, file, line);
612		if (mtx_recursed((m))) {
613			if (((what) & MA_NOTRECURSED) != 0)
614				panic("mutex %s recursed at %s:%d",
615				    (m)->mtx_description, file, line);
616		} else if (((what) & MA_RECURSED) != 0) {
617			panic("mutex %s unrecursed at %s:%d",
618			    (m)->mtx_description, file, line);
619		}
620		break;
621	case MA_NOTOWNED:
622		if (mtx_owned((m)))
623			panic("mutex %s owned at %s:%d",
624			    (m)->mtx_description, file, line);
625		break;
626	default:
627		panic("unknown mtx_assert at %s:%d", file, line);
628	}
629}
630#endif
631
632/*
633 * The MUTEX_DEBUG-enabled mtx_validate()
634 */
635#define MV_DESTROY	0	/* validate before destory */
636#define MV_INIT		1	/* validate before init */
637
638#ifdef MUTEX_DEBUG
639
640int mtx_validate __P((struct mtx *, int));
641
642int
643mtx_validate(struct mtx *m, int when)
644{
645	struct mtx *mp;
646	int i;
647	int retval = 0;
648
649#ifdef WITNESS
650	if (witness_cold)
651		return 0;
652#endif
653	if (m == &all_mtx || cold)
654		return 0;
655
656	mtx_lock(&all_mtx);
657/*
658 * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
659 * we can re-enable the kernacc() checks.
660 */
661#ifndef __alpha__
662	MPASS(kernacc((caddr_t)all_mtx.mtx_next, sizeof(uintptr_t),
663	    VM_PROT_READ) == 1);
664#endif
665	MPASS(all_mtx.mtx_next->mtx_prev == &all_mtx);
666	for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
667#ifndef __alpha__
668		if (kernacc((caddr_t)mp->mtx_next, sizeof(uintptr_t),
669		    VM_PROT_READ) != 1) {
670			panic("mtx_validate: mp=%p mp->mtx_next=%p",
671			    mp, mp->mtx_next);
672		}
673#endif
674		i++;
675		if (i > mtx_cur_cnt) {
676			panic("mtx_validate: too many in chain, known=%d\n",
677			    mtx_cur_cnt);
678		}
679	}
680	MPASS(i == mtx_cur_cnt);
681	switch (when) {
682	case MV_DESTROY:
683		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
684			if (mp == m)
685				break;
686		MPASS(mp == m);
687		break;
688	case MV_INIT:
689		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
690		if (mp == m) {
691			/*
692			 * Not good. This mutex already exists.
693			 */
694			printf("re-initing existing mutex %s\n",
695			    m->mtx_description);
696			MPASS(m->mtx_lock == MTX_UNOWNED);
697			retval = 1;
698		}
699	}
700	mtx_unlock(&all_mtx);
701	return (retval);
702}
703#endif
704
705/*
706 * Mutex initialization routine; initialize lock `m' of type contained in
707 * `opts' with options contained in `opts' and description `description.'
708 * Place on "all_mtx" queue.
709 */
710void
711mtx_init(struct mtx *m, const char *description, int opts)
712{
713
714	if ((opts & MTX_QUIET) == 0)
715		CTR2(KTR_LOCK, "mtx_init %p (%s)", m, description);
716
717#ifdef MUTEX_DEBUG
718	/* Diagnostic and error correction */
719	if (mtx_validate(m, MV_INIT))
720		return;
721#endif
722
723	bzero((void *)m, sizeof *m);
724	TAILQ_INIT(&m->mtx_blocked);
725
726#ifdef WITNESS
727	if (!witness_cold) {
728		m->mtx_debug = malloc(sizeof(struct mtx_debug),
729		    M_WITNESS, M_NOWAIT | M_ZERO);
730		MPASS(m->mtx_debug != NULL);
731	}
732#endif
733
734	m->mtx_description = description;
735	m->mtx_flags = opts;
736	m->mtx_lock = MTX_UNOWNED;
737
738	/* Put on all mutex queue */
739	mtx_lock(&all_mtx);
740	m->mtx_next = &all_mtx;
741	m->mtx_prev = all_mtx.mtx_prev;
742	m->mtx_prev->mtx_next = m;
743	all_mtx.mtx_prev = m;
744	if (++mtx_cur_cnt > mtx_max_cnt)
745		mtx_max_cnt = mtx_cur_cnt;
746	mtx_unlock(&all_mtx);
747
748#ifdef WITNESS
749	if (!witness_cold)
750		witness_init(m, opts);
751#endif
752}
753
754/*
755 * Remove lock `m' from all_mtx queue.
756 */
757void
758mtx_destroy(struct mtx *m)
759{
760
761#ifdef WITNESS
762	KASSERT(!witness_cold, ("%s: Cannot destroy while still cold\n",
763	    __FUNCTION__));
764#endif
765
766	CTR2(KTR_LOCK, "mtx_destroy %p (%s)", m, m->mtx_description);
767
768#ifdef MUTEX_DEBUG
769	if (m->mtx_next == NULL)
770		panic("mtx_destroy: %p (%s) already destroyed",
771		    m, m->mtx_description);
772
773	if (!mtx_owned(m)) {
774		MPASS(m->mtx_lock == MTX_UNOWNED);
775	} else {
776		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
777	}
778
779	/* diagnostic */
780	mtx_validate(m, MV_DESTROY);
781#endif
782
783#ifdef WITNESS
784	if (m->mtx_witness)
785		witness_destroy(m);
786#endif /* WITNESS */
787
788	/* Remove from the all mutex queue */
789	mtx_lock(&all_mtx);
790	m->mtx_next->mtx_prev = m->mtx_prev;
791	m->mtx_prev->mtx_next = m->mtx_next;
792
793#ifdef MUTEX_DEBUG
794	m->mtx_next = m->mtx_prev = NULL;
795#endif
796
797#ifdef WITNESS
798	free(m->mtx_debug, M_WITNESS);
799	m->mtx_debug = NULL;
800#endif
801
802	mtx_cur_cnt--;
803	mtx_unlock(&all_mtx);
804}
805
806
807/*
808 * The WITNESS-enabled diagnostic code.
809 */
810#ifdef WITNESS
811static void
812witness_fixup(void *dummy __unused)
813{
814	struct mtx *mp;
815
816	/*
817	 * We have to release Giant before initializing its witness
818	 * structure so that WITNESS doesn't get confused.
819	 */
820	mtx_unlock(&Giant);
821	mtx_assert(&Giant, MA_NOTOWNED);
822
823	mtx_lock(&all_mtx);
824
825	/* Iterate through all mutexes and finish up mutex initialization. */
826	for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
827
828		mp->mtx_debug = malloc(sizeof(struct mtx_debug),
829		    M_WITNESS, M_NOWAIT | M_ZERO);
830		MPASS(mp->mtx_debug != NULL);
831
832		witness_init(mp, mp->mtx_flags);
833	}
834	mtx_unlock(&all_mtx);
835
836	/* Mark the witness code as being ready for use. */
837	atomic_store_rel_int(&witness_cold, 0);
838
839	mtx_lock(&Giant);
840}
841SYSINIT(wtnsfxup, SI_SUB_MUTEX, SI_ORDER_FIRST, witness_fixup, NULL)
842
843#define WITNESS_COUNT 200
844#define	WITNESS_NCHILDREN 2
845
846int witness_watch = 1;
847
848struct witness {
849	struct witness	*w_next;
850	const char	*w_description;
851	const char	*w_file;
852	int		 w_line;
853	struct witness	*w_morechildren;
854	u_char		 w_childcnt;
855	u_char		 w_Giant_squawked:1;
856	u_char		 w_other_squawked:1;
857	u_char		 w_same_squawked:1;
858	u_char		 w_spin:1;	/* MTX_SPIN type mutex. */
859	u_int		 w_level;
860	struct witness	*w_children[WITNESS_NCHILDREN];
861};
862
863struct witness_blessed {
864	char 	*b_lock1;
865	char	*b_lock2;
866};
867
868#ifdef DDB
869/*
870 * When DDB is enabled and witness_ddb is set to 1, it will cause the system to
871 * drop into kdebug() when:
872 *	- a lock heirarchy violation occurs
873 *	- locks are held when going to sleep.
874 */
875int	witness_ddb;
876#ifdef WITNESS_DDB
877TUNABLE_INT_DECL("debug.witness_ddb", 1, witness_ddb);
878#else
879TUNABLE_INT_DECL("debug.witness_ddb", 0, witness_ddb);
880#endif
881SYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, "");
882#endif /* DDB */
883
884int	witness_skipspin;
885#ifdef WITNESS_SKIPSPIN
886TUNABLE_INT_DECL("debug.witness_skipspin", 1, witness_skipspin);
887#else
888TUNABLE_INT_DECL("debug.witness_skipspin", 0, witness_skipspin);
889#endif
890SYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RD, &witness_skipspin, 0,
891    "");
892
893/*
894 * Witness-enabled globals
895 */
896static struct mtx	w_mtx;
897static struct witness	*w_free;
898static struct witness	*w_all;
899static int		 w_inited;
900static int		 witness_dead;	/* fatal error, probably no memory */
901
902static struct witness	 w_data[WITNESS_COUNT];
903
904/*
905 * Internal witness routine prototypes
906 */
907static struct witness *enroll(const char *description, int flag);
908static int itismychild(struct witness *parent, struct witness *child);
909static void removechild(struct witness *parent, struct witness *child);
910static int isitmychild(struct witness *parent, struct witness *child);
911static int isitmydescendant(struct witness *parent, struct witness *child);
912static int dup_ok(struct witness *);
913static int blessed(struct witness *, struct witness *);
914static void
915    witness_displaydescendants(void(*)(const char *fmt, ...), struct witness *);
916static void witness_leveldescendents(struct witness *parent, int level);
917static void witness_levelall(void);
918static struct witness * witness_get(void);
919static void witness_free(struct witness *m);
920
921static char *ignore_list[] = {
922	"witness lock",
923	NULL
924};
925
926static char *spin_order_list[] = {
927#if defined(__i386__) && defined (SMP)
928	"com",
929#endif
930	"sio",
931#ifdef __i386__
932	"cy",
933#endif
934	"ithread table lock",
935	"ithread list lock",
936	"sched lock",
937#ifdef __i386__
938	"clk",
939#endif
940	"callout",
941	/*
942	 * leaf locks
943	 */
944#ifdef SMP
945#ifdef __i386__
946	"ap boot",
947	"imen",
948#endif
949	"ng_node",
950	"ng_worklist",
951	"smp rendezvous",
952#endif
953	NULL
954};
955
956static char *order_list[] = {
957	"Giant", "proctree", "allproc", "process lock", "uidinfo hash",
958	    "uidinfo struct", NULL,
959	NULL
960};
961
962static char *dup_list[] = {
963	NULL
964};
965
966static char *sleep_list[] = {
967	"Giant",
968	NULL
969};
970
971/*
972 * Pairs of locks which have been blessed
973 * Don't complain about order problems with blessed locks
974 */
975static struct witness_blessed blessed_list[] = {
976};
977static int blessed_count =
978	sizeof(blessed_list) / sizeof(struct witness_blessed);
979
980static void
981witness_init(struct mtx *m, int flag)
982{
983	m->mtx_witness = enroll(m->mtx_description, flag);
984}
985
986static void
987witness_destroy(struct mtx *m)
988{
989	struct mtx *m1;
990	struct proc *p;
991	p = curproc;
992	LIST_FOREACH(m1, &p->p_heldmtx, mtx_held) {
993		if (m1 == m) {
994			LIST_REMOVE(m, mtx_held);
995			break;
996		}
997	}
998	return;
999
1000}
1001
1002static void
1003witness_display(void(*prnt)(const char *fmt, ...))
1004{
1005	struct witness *w, *w1;
1006	int level, found;
1007
1008	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1009	witness_levelall();
1010
1011	/*
1012	 * First, handle sleep mutexes which have been acquired at least
1013	 * once.
1014	 */
1015	prnt("Sleep mutexes:\n");
1016	for (w = w_all; w; w = w->w_next) {
1017		if (w->w_file == NULL || w->w_spin)
1018			continue;
1019		for (w1 = w_all; w1; w1 = w1->w_next) {
1020			if (isitmychild(w1, w))
1021				break;
1022		}
1023		if (w1 != NULL)
1024			continue;
1025		/*
1026		 * This lock has no anscestors, display its descendants.
1027		 */
1028		witness_displaydescendants(prnt, w);
1029	}
1030
1031	/*
1032	 * Now do spin mutexes which have been acquired at least once.
1033	 */
1034	prnt("\nSpin mutexes:\n");
1035	level = 0;
1036	while (level < sizeof(spin_order_list) / sizeof(char *)) {
1037		found = 0;
1038		for (w = w_all; w; w = w->w_next) {
1039			if (w->w_file == NULL || !w->w_spin)
1040				continue;
1041			if (w->w_level == 1 << level) {
1042				witness_displaydescendants(prnt, w);
1043				level++;
1044				found = 1;
1045			}
1046		}
1047		if (found == 0)
1048			level++;
1049	}
1050
1051	/*
1052	 * Finally, any mutexes which have not been acquired yet.
1053	 */
1054	prnt("\nMutexes which were never acquired:\n");
1055	for (w = w_all; w; w = w->w_next) {
1056		if (w->w_file != NULL)
1057			continue;
1058		prnt("%s\n", w->w_description);
1059	}
1060}
1061
1062void
1063witness_enter(struct mtx *m, int flags, const char *file, int line)
1064{
1065	struct witness *w, *w1;
1066	struct mtx *m1;
1067	struct proc *p;
1068	int i;
1069#ifdef DDB
1070	int go_into_ddb = 0;
1071#endif /* DDB */
1072
1073	if (witness_cold || m->mtx_witness == NULL || panicstr)
1074		return;
1075	w = m->mtx_witness;
1076	p = curproc;
1077
1078	if (flags & MTX_SPIN) {
1079		if ((m->mtx_flags & MTX_SPIN) == 0)
1080			panic("mutex_enter: MTX_SPIN on MTX_DEF mutex %s @"
1081			    " %s:%d", m->mtx_description, file, line);
1082		if (mtx_recursed(m)) {
1083			if ((m->mtx_flags & MTX_RECURSE) == 0)
1084				panic("mutex_enter: recursion on non-recursive"
1085				    " mutex %s @ %s:%d", m->mtx_description,
1086				    file, line);
1087			return;
1088		}
1089		mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1090		i = PCPU_GET(witness_spin_check);
1091		if (i != 0 && w->w_level < i) {
1092			mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1093			panic("mutex_enter(%s:%x, MTX_SPIN) out of order @"
1094			    " %s:%d already holding %s:%x",
1095			    m->mtx_description, w->w_level, file, line,
1096			    spin_order_list[ffs(i)-1], i);
1097		}
1098		PCPU_SET(witness_spin_check, i | w->w_level);
1099		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1100		w->w_file = file;
1101		w->w_line = line;
1102		m->mtx_line = line;
1103		m->mtx_file = file;
1104		return;
1105	}
1106	if ((m->mtx_flags & MTX_SPIN) != 0)
1107		panic("mutex_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1108		    m->mtx_description, file, line);
1109
1110	if (mtx_recursed(m)) {
1111		if ((m->mtx_flags & MTX_RECURSE) == 0)
1112			panic("mutex_enter: recursion on non-recursive"
1113			    " mutex %s @ %s:%d", m->mtx_description,
1114			    file, line);
1115		return;
1116	}
1117	if (witness_dead)
1118		goto out;
1119	if (cold)
1120		goto out;
1121
1122	if (!mtx_legal2block())
1123		panic("blockable mtx_lock() of %s when not legal @ %s:%d",
1124			    m->mtx_description, file, line);
1125	/*
1126	 * Is this the first mutex acquired
1127	 */
1128	if ((m1 = LIST_FIRST(&p->p_heldmtx)) == NULL)
1129		goto out;
1130
1131	if ((w1 = m1->mtx_witness) == w) {
1132		if (w->w_same_squawked || dup_ok(w))
1133			goto out;
1134		w->w_same_squawked = 1;
1135		printf("acquring duplicate lock of same type: \"%s\"\n",
1136			m->mtx_description);
1137		printf(" 1st @ %s:%d\n", w->w_file, w->w_line);
1138		printf(" 2nd @ %s:%d\n", file, line);
1139#ifdef DDB
1140		go_into_ddb = 1;
1141#endif /* DDB */
1142		goto out;
1143	}
1144	MPASS(!mtx_owned(&w_mtx));
1145	mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1146	/*
1147	 * If we have a known higher number just say ok
1148	 */
1149	if (witness_watch > 1 && w->w_level > w1->w_level) {
1150		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1151		goto out;
1152	}
1153	if (isitmydescendant(m1->mtx_witness, w)) {
1154		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1155		goto out;
1156	}
1157	for (i = 0; m1 != NULL; m1 = LIST_NEXT(m1, mtx_held), i++) {
1158
1159		MPASS(i < 200);
1160		w1 = m1->mtx_witness;
1161		if (isitmydescendant(w, w1)) {
1162			mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1163			if (blessed(w, w1))
1164				goto out;
1165			if (m1 == &Giant) {
1166				if (w1->w_Giant_squawked)
1167					goto out;
1168				else
1169					w1->w_Giant_squawked = 1;
1170			} else {
1171				if (w1->w_other_squawked)
1172					goto out;
1173				else
1174					w1->w_other_squawked = 1;
1175			}
1176			printf("lock order reversal\n");
1177			printf(" 1st %s last acquired @ %s:%d\n",
1178			    w->w_description, w->w_file, w->w_line);
1179			printf(" 2nd %p %s @ %s:%d\n",
1180			    m1, w1->w_description, w1->w_file, w1->w_line);
1181			printf(" 3rd %p %s @ %s:%d\n",
1182			    m, w->w_description, file, line);
1183#ifdef DDB
1184			go_into_ddb = 1;
1185#endif /* DDB */
1186			goto out;
1187		}
1188	}
1189	m1 = LIST_FIRST(&p->p_heldmtx);
1190	if (!itismychild(m1->mtx_witness, w))
1191		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1192
1193out:
1194#ifdef DDB
1195	if (witness_ddb && go_into_ddb)
1196		Debugger("witness_enter");
1197#endif /* DDB */
1198	w->w_file = file;
1199	w->w_line = line;
1200	m->mtx_line = line;
1201	m->mtx_file = file;
1202
1203	/*
1204	 * If this pays off it likely means that a mutex being witnessed
1205	 * is acquired in hardclock. Put it in the ignore list. It is
1206	 * likely not the mutex this assert fails on.
1207	 */
1208	MPASS(m->mtx_held.le_prev == NULL);
1209	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
1210}
1211
1212void
1213witness_try_enter(struct mtx *m, int flags, const char *file, int line)
1214{
1215	struct proc *p;
1216	struct witness *w = m->mtx_witness;
1217
1218	if (witness_cold)
1219		return;
1220	if (panicstr)
1221		return;
1222	if (flags & MTX_SPIN) {
1223		if ((m->mtx_flags & MTX_SPIN) == 0)
1224			panic("mutex_try_enter: "
1225			    "MTX_SPIN on MTX_DEF mutex %s @ %s:%d",
1226			    m->mtx_description, file, line);
1227		if (mtx_recursed(m)) {
1228			if ((m->mtx_flags & MTX_RECURSE) == 0)
1229				panic("mutex_try_enter: recursion on"
1230				    " non-recursive mutex %s @ %s:%d",
1231				    m->mtx_description, file, line);
1232			return;
1233		}
1234		mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1235		PCPU_SET(witness_spin_check,
1236		    PCPU_GET(witness_spin_check) | w->w_level);
1237		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1238		w->w_file = file;
1239		w->w_line = line;
1240		m->mtx_line = line;
1241		m->mtx_file = file;
1242		return;
1243	}
1244
1245	if ((m->mtx_flags & MTX_SPIN) != 0)
1246		panic("mutex_try_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1247		    m->mtx_description, file, line);
1248
1249	if (mtx_recursed(m)) {
1250		if ((m->mtx_flags & MTX_RECURSE) == 0)
1251			panic("mutex_try_enter: recursion on non-recursive"
1252			    " mutex %s @ %s:%d", m->mtx_description, file,
1253			    line);
1254		return;
1255	}
1256	w->w_file = file;
1257	w->w_line = line;
1258	m->mtx_line = line;
1259	m->mtx_file = file;
1260	p = curproc;
1261	MPASS(m->mtx_held.le_prev == NULL);
1262	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
1263}
1264
1265void
1266witness_exit(struct mtx *m, int flags, const char *file, int line)
1267{
1268	struct witness *w;
1269
1270	if (witness_cold || m->mtx_witness == NULL || panicstr)
1271		return;
1272	w = m->mtx_witness;
1273
1274	if (flags & MTX_SPIN) {
1275		if ((m->mtx_flags & MTX_SPIN) == 0)
1276			panic("mutex_exit: MTX_SPIN on MTX_DEF mutex %s @"
1277			    " %s:%d", m->mtx_description, file, line);
1278		if (mtx_recursed(m)) {
1279			if ((m->mtx_flags & MTX_RECURSE) == 0)
1280				panic("mutex_exit: recursion on non-recursive"
1281				    " mutex %s @ %s:%d", m->mtx_description,
1282				    file, line);
1283			return;
1284		}
1285		mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1286		PCPU_SET(witness_spin_check,
1287		    PCPU_GET(witness_spin_check) & ~w->w_level);
1288		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1289		return;
1290	}
1291	if ((m->mtx_flags & MTX_SPIN) != 0)
1292		panic("mutex_exit: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1293		    m->mtx_description, file, line);
1294
1295	if (mtx_recursed(m)) {
1296		if ((m->mtx_flags & MTX_RECURSE) == 0)
1297			panic("mutex_exit: recursion on non-recursive"
1298			    " mutex %s @ %s:%d", m->mtx_description,
1299			    file, line);
1300		return;
1301	}
1302
1303	if ((flags & MTX_NOSWITCH) == 0 && !mtx_legal2block() && !cold)
1304		panic("switchable mtx_unlock() of %s when not legal @ %s:%d",
1305			    m->mtx_description, file, line);
1306	LIST_REMOVE(m, mtx_held);
1307	m->mtx_held.le_prev = NULL;
1308}
1309
1310int
1311witness_sleep(int check_only, struct mtx *mtx, const char *file, int line)
1312{
1313	struct mtx *m;
1314	struct proc *p;
1315	char **sleep;
1316	int n = 0;
1317
1318	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1319	p = curproc;
1320	LIST_FOREACH(m, &p->p_heldmtx, mtx_held) {
1321		if (m == mtx)
1322			continue;
1323		for (sleep = sleep_list; *sleep!= NULL; sleep++)
1324			if (strcmp(m->mtx_description, *sleep) == 0)
1325				goto next;
1326		if (n == 0)
1327			printf("Whee!\n");
1328		printf("%s:%d: %s with \"%s\" locked from %s:%d\n",
1329			file, line, check_only ? "could sleep" : "sleeping",
1330			m->mtx_description,
1331			m->mtx_witness->w_file, m->mtx_witness->w_line);
1332		n++;
1333	next:
1334	}
1335#ifdef DDB
1336	if (witness_ddb && n)
1337		Debugger("witness_sleep");
1338#endif /* DDB */
1339	return (n);
1340}
1341
1342static struct witness *
1343enroll(const char *description, int flag)
1344{
1345	int i;
1346	struct witness *w, *w1;
1347	char **ignore;
1348	char **order;
1349
1350	if (!witness_watch)
1351		return (NULL);
1352	for (ignore = ignore_list; *ignore != NULL; ignore++)
1353		if (strcmp(description, *ignore) == 0)
1354			return (NULL);
1355
1356	if (w_inited == 0) {
1357		mtx_init(&w_mtx, "witness lock", MTX_SPIN);
1358		for (i = 0; i < WITNESS_COUNT; i++) {
1359			w = &w_data[i];
1360			witness_free(w);
1361		}
1362		w_inited = 1;
1363		for (order = order_list; *order != NULL; order++) {
1364			w = enroll(*order, MTX_DEF);
1365			w->w_file = "order list";
1366			for (order++; *order != NULL; order++) {
1367				w1 = enroll(*order, MTX_DEF);
1368				w1->w_file = "order list";
1369				itismychild(w, w1);
1370				w = w1;
1371    	    	    	}
1372		}
1373	}
1374	if ((flag & MTX_SPIN) && witness_skipspin)
1375		return (NULL);
1376	mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1377	for (w = w_all; w; w = w->w_next) {
1378		if (strcmp(description, w->w_description) == 0) {
1379			mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1380			return (w);
1381		}
1382	}
1383	if ((w = witness_get()) == NULL)
1384		return (NULL);
1385	w->w_next = w_all;
1386	w_all = w;
1387	w->w_description = description;
1388	mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1389	if (flag & MTX_SPIN) {
1390		w->w_spin = 1;
1391
1392		i = 1;
1393		for (order = spin_order_list; *order != NULL; order++) {
1394			if (strcmp(description, *order) == 0)
1395				break;
1396			i <<= 1;
1397		}
1398		if (*order == NULL)
1399			panic("spin lock %s not in order list", description);
1400		w->w_level = i;
1401	}
1402
1403	return (w);
1404}
1405
1406static int
1407itismychild(struct witness *parent, struct witness *child)
1408{
1409	static int recursed;
1410
1411	/*
1412	 * Insert "child" after "parent"
1413	 */
1414	while (parent->w_morechildren)
1415		parent = parent->w_morechildren;
1416
1417	if (parent->w_childcnt == WITNESS_NCHILDREN) {
1418		if ((parent->w_morechildren = witness_get()) == NULL)
1419			return (1);
1420		parent = parent->w_morechildren;
1421	}
1422	MPASS(child != NULL);
1423	parent->w_children[parent->w_childcnt++] = child;
1424	/*
1425	 * now prune whole tree
1426	 */
1427	if (recursed)
1428		return (0);
1429	recursed = 1;
1430	for (child = w_all; child != NULL; child = child->w_next) {
1431		for (parent = w_all; parent != NULL;
1432		    parent = parent->w_next) {
1433			if (!isitmychild(parent, child))
1434				continue;
1435			removechild(parent, child);
1436			if (isitmydescendant(parent, child))
1437				continue;
1438			itismychild(parent, child);
1439		}
1440	}
1441	recursed = 0;
1442	witness_levelall();
1443	return (0);
1444}
1445
1446static void
1447removechild(struct witness *parent, struct witness *child)
1448{
1449	struct witness *w, *w1;
1450	int i;
1451
1452	for (w = parent; w != NULL; w = w->w_morechildren)
1453		for (i = 0; i < w->w_childcnt; i++)
1454			if (w->w_children[i] == child)
1455				goto found;
1456	return;
1457found:
1458	for (w1 = w; w1->w_morechildren != NULL; w1 = w1->w_morechildren)
1459		continue;
1460	w->w_children[i] = w1->w_children[--w1->w_childcnt];
1461	MPASS(w->w_children[i] != NULL);
1462
1463	if (w1->w_childcnt != 0)
1464		return;
1465
1466	if (w1 == parent)
1467		return;
1468	for (w = parent; w->w_morechildren != w1; w = w->w_morechildren)
1469		continue;
1470	w->w_morechildren = 0;
1471	witness_free(w1);
1472}
1473
1474static int
1475isitmychild(struct witness *parent, struct witness *child)
1476{
1477	struct witness *w;
1478	int i;
1479
1480	for (w = parent; w != NULL; w = w->w_morechildren) {
1481		for (i = 0; i < w->w_childcnt; i++) {
1482			if (w->w_children[i] == child)
1483				return (1);
1484		}
1485	}
1486	return (0);
1487}
1488
1489static int
1490isitmydescendant(struct witness *parent, struct witness *child)
1491{
1492	struct witness *w;
1493	int i;
1494	int j;
1495
1496	for (j = 0, w = parent; w != NULL; w = w->w_morechildren, j++) {
1497		MPASS(j < 1000);
1498		for (i = 0; i < w->w_childcnt; i++) {
1499			if (w->w_children[i] == child)
1500				return (1);
1501		}
1502		for (i = 0; i < w->w_childcnt; i++) {
1503			if (isitmydescendant(w->w_children[i], child))
1504				return (1);
1505		}
1506	}
1507	return (0);
1508}
1509
1510void
1511witness_levelall (void)
1512{
1513	struct witness *w, *w1;
1514
1515	for (w = w_all; w; w = w->w_next)
1516		if (!(w->w_spin))
1517			w->w_level = 0;
1518	for (w = w_all; w; w = w->w_next) {
1519		if (w->w_spin)
1520			continue;
1521		for (w1 = w_all; w1; w1 = w1->w_next) {
1522			if (isitmychild(w1, w))
1523				break;
1524		}
1525		if (w1 != NULL)
1526			continue;
1527		witness_leveldescendents(w, 0);
1528	}
1529}
1530
1531static void
1532witness_leveldescendents(struct witness *parent, int level)
1533{
1534	int i;
1535	struct witness *w;
1536
1537	if (parent->w_level < level)
1538		parent->w_level = level;
1539	level++;
1540	for (w = parent; w != NULL; w = w->w_morechildren)
1541		for (i = 0; i < w->w_childcnt; i++)
1542			witness_leveldescendents(w->w_children[i], level);
1543}
1544
1545static void
1546witness_displaydescendants(void(*prnt)(const char *fmt, ...),
1547			   struct witness *parent)
1548{
1549	struct witness *w;
1550	int i;
1551	int level;
1552
1553	level = parent->w_spin ? ffs(parent->w_level) : parent->w_level;
1554
1555	prnt("%d", level);
1556	if (level < 10)
1557		prnt(" ");
1558	for (i = 0; i < level; i++)
1559		prnt(" ");
1560	prnt("%s", parent->w_description);
1561	if (parent->w_file != NULL)
1562		prnt(" -- last acquired @ %s:%d\n", parent->w_file,
1563		    parent->w_line);
1564
1565	for (w = parent; w != NULL; w = w->w_morechildren)
1566		for (i = 0; i < w->w_childcnt; i++)
1567			    witness_displaydescendants(prnt, w->w_children[i]);
1568    }
1569
1570static int
1571dup_ok(struct witness *w)
1572{
1573	char **dup;
1574
1575	for (dup = dup_list; *dup!= NULL; dup++)
1576		if (strcmp(w->w_description, *dup) == 0)
1577			return (1);
1578	return (0);
1579}
1580
1581static int
1582blessed(struct witness *w1, struct witness *w2)
1583{
1584	int i;
1585	struct witness_blessed *b;
1586
1587	for (i = 0; i < blessed_count; i++) {
1588		b = &blessed_list[i];
1589		if (strcmp(w1->w_description, b->b_lock1) == 0) {
1590			if (strcmp(w2->w_description, b->b_lock2) == 0)
1591				return (1);
1592			continue;
1593		}
1594		if (strcmp(w1->w_description, b->b_lock2) == 0)
1595			if (strcmp(w2->w_description, b->b_lock1) == 0)
1596				return (1);
1597	}
1598	return (0);
1599}
1600
1601static struct witness *
1602witness_get()
1603{
1604	struct witness *w;
1605
1606	if ((w = w_free) == NULL) {
1607		witness_dead = 1;
1608		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1609		printf("witness exhausted\n");
1610		return (NULL);
1611	}
1612	w_free = w->w_next;
1613	bzero(w, sizeof(*w));
1614	return (w);
1615}
1616
1617static void
1618witness_free(struct witness *w)
1619{
1620	w->w_next = w_free;
1621	w_free = w;
1622}
1623
1624int
1625witness_list(struct proc *p)
1626{
1627	struct mtx *m;
1628	int nheld;
1629
1630	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1631	nheld = 0;
1632	LIST_FOREACH(m, &p->p_heldmtx, mtx_held) {
1633		printf("\t\"%s\" (%p) locked at %s:%d\n",
1634		    m->mtx_description, m,
1635		    m->mtx_witness->w_file, m->mtx_witness->w_line);
1636		nheld++;
1637	}
1638
1639	return (nheld);
1640}
1641
1642#ifdef DDB
1643
1644DB_SHOW_COMMAND(mutexes, db_witness_list)
1645{
1646
1647	witness_list(curproc);
1648}
1649
1650DB_SHOW_COMMAND(witness, db_witness_display)
1651{
1652
1653	witness_display(db_printf);
1654}
1655#endif
1656
1657void
1658witness_save(struct mtx *m, const char **filep, int *linep)
1659{
1660
1661	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1662	if (m->mtx_witness == NULL)
1663		return;
1664
1665	*filep = m->mtx_witness->w_file;
1666	*linep = m->mtx_witness->w_line;
1667}
1668
1669void
1670witness_restore(struct mtx *m, const char *file, int line)
1671{
1672
1673	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1674	if (m->mtx_witness == NULL)
1675		return;
1676
1677	m->mtx_witness->w_file = file;
1678	m->mtx_witness->w_line = line;
1679}
1680
1681#endif	/* WITNESS */
1682