subr_turnstile.c revision 74900
1/*-
2 * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 * 3. Berkeley Software Design Inc's name may not be used to endorse or
13 *    promote products derived from this software without specific prior
14 *    written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
29 *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
30 * $FreeBSD: head/sys/kern/subr_turnstile.c 74900 2001-03-28 02:40:47Z jhb $
31 */
32
33/*
34 * Machine independent bits of mutex implementation and implementation of
35 * `witness' structure & related debugging routines.
36 */
37
38/*
39 *	Main Entry: witness
40 *	Pronunciation: 'wit-n&s
41 *	Function: noun
42 *	Etymology: Middle English witnesse, from Old English witnes knowledge,
43 *	    testimony, witness, from 2wit
44 *	Date: before 12th century
45 *	1 : attestation of a fact or event : TESTIMONY
46 *	2 : one that gives evidence; specifically : one who testifies in
47 *	    a cause or before a judicial tribunal
48 *	3 : one asked to be present at a transaction so as to be able to
49 *	    testify to its having taken place
50 *	4 : one who has personal knowledge of something
51 *	5 a : something serving as evidence or proof : SIGN
52 *	  b : public affirmation by word or example of usually
53 *	      religious faith or conviction <the heroic witness to divine
54 *	      life -- Pilot>
55 *	6 capitalized : a member of the Jehovah's Witnesses
56 */
57
58#include "opt_ddb.h"
59#include "opt_witness.h"
60
61#include <sys/param.h>
62#include <sys/bus.h>
63#include <sys/kernel.h>
64#include <sys/malloc.h>
65#include <sys/proc.h>
66#include <sys/sysctl.h>
67#include <sys/systm.h>
68#include <sys/vmmeter.h>
69#include <sys/ktr.h>
70
71#include <machine/atomic.h>
72#include <machine/bus.h>
73#include <machine/clock.h>
74#include <machine/cpu.h>
75
76#include <ddb/ddb.h>
77
78#include <vm/vm.h>
79#include <vm/vm_extern.h>
80
81#include <sys/mutex.h>
82
83/*
84 * The WITNESS-enabled mutex debug structure.
85 */
86#ifdef WITNESS
87struct mtx_debug {
88	struct witness	*mtxd_witness;
89	LIST_ENTRY(mtx)	mtxd_held;
90	const char	*mtxd_file;
91	int		mtxd_line;
92};
93
94#define mtx_held	mtx_debug->mtxd_held
95#define	mtx_file	mtx_debug->mtxd_file
96#define	mtx_line	mtx_debug->mtxd_line
97#define	mtx_witness	mtx_debug->mtxd_witness
98#endif	/* WITNESS */
99
100/*
101 * Internal utility macros.
102 */
103#define mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
104
105#define mtx_owner(m)	(mtx_unowned((m)) ? NULL \
106	: (struct proc *)((m)->mtx_lock & MTX_FLAGMASK))
107
108#define SET_PRIO(p, pri)	(p)->p_pri.pri_level = (pri)
109
110/*
111 * Early WITNESS-enabled declarations.
112 */
113#ifdef WITNESS
114
115/*
116 * Internal WITNESS routines which must be prototyped early.
117 *
118 * XXX: When/if witness code is cleaned up, it would be wise to place all
119 *	witness prototyping early in this file.
120 */
121static void witness_init(struct mtx *, int flag);
122static void witness_destroy(struct mtx *);
123static void witness_display(void(*)(const char *fmt, ...));
124
125MALLOC_DEFINE(M_WITNESS, "witness", "witness mtx_debug structure");
126
127/* All mutexes in system (used for debug/panic) */
128static struct mtx_debug all_mtx_debug = { NULL, {NULL, NULL}, NULL, 0 };
129
130/*
131 * This global is set to 0 once it becomes safe to use the witness code.
132 */
133static int witness_cold = 1;
134
135#else	/* WITNESS */
136
137/* XXX XXX XXX
138 * flag++ is sleazoid way of shuting up warning
139 */
140#define witness_init(m, flag) flag++
141#define witness_destroy(m)
142#define witness_try_enter(m, t, f, l)
143#endif	/* WITNESS */
144
145/*
146 * All mutex locks in system are kept on the all_mtx list.
147 */
148static struct mtx all_mtx = { MTX_UNOWNED, 0, 0, 0, "All mutexes queue head",
149	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
150	{ NULL, NULL }, &all_mtx, &all_mtx,
151#ifdef WITNESS
152	&all_mtx_debug
153#else
154	NULL
155#endif
156	 };
157
158/*
159 * Global variables for book keeping.
160 */
161static int	mtx_cur_cnt;
162static int	mtx_max_cnt;
163
164/*
165 * Couple of strings for KTR_LOCK tracing in order to avoid duplicates.
166 */
167char	STR_mtx_lock_slp[] = "GOT (sleep) %s [%p] r=%d at %s:%d";
168char	STR_mtx_unlock_slp[] = "REL (sleep) %s [%p] r=%d at %s:%d";
169char	STR_mtx_lock_spn[] = "GOT (spin) %s [%p] r=%d at %s:%d";
170char	STR_mtx_unlock_spn[] = "REL (spin) %s [%p] r=%d at %s:%d";
171
172/*
173 * Prototypes for non-exported routines.
174 *
175 * NOTE: Prototypes for witness routines are placed at the bottom of the file.
176 */
177static void	propagate_priority(struct proc *);
178
179static void
180propagate_priority(struct proc *p)
181{
182	int pri = p->p_pri.pri_level;
183	struct mtx *m = p->p_blocked;
184
185	mtx_assert(&sched_lock, MA_OWNED);
186	for (;;) {
187		struct proc *p1;
188
189		p = mtx_owner(m);
190
191		if (p == NULL) {
192			/*
193			 * This really isn't quite right. Really
194			 * ought to bump priority of process that
195			 * next acquires the mutex.
196			 */
197			MPASS(m->mtx_lock == MTX_CONTESTED);
198			return;
199		}
200
201		MPASS(p->p_magic == P_MAGIC);
202		KASSERT(p->p_stat != SSLEEP, ("sleeping process owns a mutex"));
203		if (p->p_pri.pri_level <= pri)
204			return;
205
206		/*
207		 * Bump this process' priority.
208		 */
209		SET_PRIO(p, pri);
210
211		/*
212		 * If lock holder is actually running, just bump priority.
213		 */
214		if (p->p_oncpu != NOCPU) {
215			MPASS(p->p_stat == SRUN || p->p_stat == SZOMB);
216			return;
217		}
218
219#ifndef SMP
220		/*
221		 * For UP, we check to see if p is curproc (this shouldn't
222		 * ever happen however as it would mean we are in a deadlock.)
223		 */
224		KASSERT(p != curproc, ("Deadlock detected"));
225#endif
226
227		/*
228		 * If on run queue move to new run queue, and
229		 * quit.
230		 */
231		if (p->p_stat == SRUN) {
232			MPASS(p->p_blocked == NULL);
233			remrunqueue(p);
234			setrunqueue(p);
235			return;
236		}
237
238		/*
239		 * If we aren't blocked on a mutex, we should be.
240		 */
241		KASSERT(p->p_stat == SMTX, (
242		    "process %d(%s):%d holds %s but isn't blocked on a mutex\n",
243		    p->p_pid, p->p_comm, p->p_stat,
244		    m->mtx_description));
245
246		/*
247		 * Pick up the mutex that p is blocked on.
248		 */
249		m = p->p_blocked;
250		MPASS(m != NULL);
251
252		/*
253		 * Check if the proc needs to be moved up on
254		 * the blocked chain
255		 */
256		if (p == TAILQ_FIRST(&m->mtx_blocked)) {
257			continue;
258		}
259
260		p1 = TAILQ_PREV(p, procqueue, p_procq);
261		if (p1->p_pri.pri_level <= pri) {
262			continue;
263		}
264
265		/*
266		 * Remove proc from blocked chain and determine where
267		 * it should be moved up to.  Since we know that p1 has
268		 * a lower priority than p, we know that at least one
269		 * process in the chain has a lower priority and that
270		 * p1 will thus not be NULL after the loop.
271		 */
272		TAILQ_REMOVE(&m->mtx_blocked, p, p_procq);
273		TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) {
274			MPASS(p1->p_magic == P_MAGIC);
275			if (p1->p_pri.pri_level > pri)
276				break;
277		}
278
279		MPASS(p1 != NULL);
280		TAILQ_INSERT_BEFORE(p1, p, p_procq);
281		CTR4(KTR_LOCK,
282		    "propagate_priority: p %p moved before %p on [%p] %s",
283		    p, p1, m, m->mtx_description);
284	}
285}
286
287/*
288 * Function versions of the inlined __mtx_* macros.  These are used by
289 * modules and can also be called from assembly language if needed.
290 */
291void
292_mtx_lock_flags(struct mtx *m, int opts, const char *file, int line)
293{
294
295	__mtx_lock_flags(m, opts, file, line);
296}
297
298void
299_mtx_unlock_flags(struct mtx *m, int opts, const char *file, int line)
300{
301
302	__mtx_unlock_flags(m, opts, file, line);
303}
304
305void
306_mtx_lock_spin_flags(struct mtx *m, int opts, const char *file, int line)
307{
308
309	__mtx_lock_spin_flags(m, opts, file, line);
310}
311
312void
313_mtx_unlock_spin_flags(struct mtx *m, int opts, const char *file, int line)
314{
315
316	__mtx_unlock_spin_flags(m, opts, file, line);
317}
318
319/*
320 * The important part of mtx_trylock{,_flags}()
321 * Tries to acquire lock `m.' We do NOT handle recursion here; we assume that
322 * if we're called, it's because we know we don't already own this lock.
323 */
324int
325_mtx_trylock(struct mtx *m, int opts, const char *file, int line)
326{
327	int rval;
328
329	MPASS(curproc != NULL);
330
331	/*
332	 * _mtx_trylock does not accept MTX_NOSWITCH option.
333	 */
334	KASSERT((opts & MTX_NOSWITCH) == 0,
335	    ("mtx_trylock() called with invalid option flag(s) %d", opts));
336
337	rval = _obtain_lock(m, curproc);
338
339#ifdef WITNESS
340	if (rval && m->mtx_witness != NULL) {
341		/*
342		 * We do not handle recursion in _mtx_trylock; see the
343		 * note at the top of the routine.
344		 */
345		KASSERT(!mtx_recursed(m),
346		    ("mtx_trylock() called on a recursed mutex"));
347		witness_try_enter(m, (opts | m->mtx_flags), file, line);
348	}
349#endif	/* WITNESS */
350
351	if ((opts & MTX_QUIET) == 0)
352		CTR5(KTR_LOCK, "TRY_LOCK %s [%p] result=%d at %s:%d",
353		    m->mtx_description, m, rval, file, line);
354
355	return rval;
356}
357
358/*
359 * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
360 *
361 * We call this if the lock is either contested (i.e. we need to go to
362 * sleep waiting for it), or if we need to recurse on it.
363 */
364void
365_mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line)
366{
367	struct proc *p = curproc;
368
369	if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)p) {
370		m->mtx_recurse++;
371		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
372		if ((opts & MTX_QUIET) == 0)
373			CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
374		return;
375	}
376
377	if ((opts & MTX_QUIET) == 0)
378		CTR4(KTR_LOCK,
379		    "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d",
380		    m->mtx_description, (void *)m->mtx_lock, file, line);
381
382	while (!_obtain_lock(m, p)) {
383		uintptr_t v;
384		struct proc *p1;
385
386		mtx_lock_spin(&sched_lock);
387		/*
388		 * Check if the lock has been released while spinning for
389		 * the sched_lock.
390		 */
391		if ((v = m->mtx_lock) == MTX_UNOWNED) {
392			mtx_unlock_spin(&sched_lock);
393			continue;
394		}
395
396		/*
397		 * The mutex was marked contested on release. This means that
398		 * there are processes blocked on it.
399		 */
400		if (v == MTX_CONTESTED) {
401			p1 = TAILQ_FIRST(&m->mtx_blocked);
402			MPASS(p1 != NULL);
403			m->mtx_lock = (uintptr_t)p | MTX_CONTESTED;
404
405			if (p1->p_pri.pri_level < p->p_pri.pri_level)
406				SET_PRIO(p, p1->p_pri.pri_level);
407			mtx_unlock_spin(&sched_lock);
408			return;
409		}
410
411		/*
412		 * If the mutex isn't already contested and a failure occurs
413		 * setting the contested bit, the mutex was either released
414		 * or the state of the MTX_RECURSED bit changed.
415		 */
416		if ((v & MTX_CONTESTED) == 0 &&
417		    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
418			(void *)(v | MTX_CONTESTED))) {
419			mtx_unlock_spin(&sched_lock);
420			continue;
421		}
422
423		/*
424		 * We deffinately must sleep for this lock.
425		 */
426		mtx_assert(m, MA_NOTOWNED);
427
428#ifdef notyet
429		/*
430		 * If we're borrowing an interrupted thread's VM context, we
431		 * must clean up before going to sleep.
432		 */
433		if (p->p_ithd != NULL) {
434			struct ithd *it = p->p_ithd;
435
436			if (it->it_interrupted) {
437				if ((opts & MTX_QUIET) == 0)
438					CTR2(KTR_LOCK,
439				    "_mtx_lock_sleep: %p interrupted %p",
440					    it, it->it_interrupted);
441				intr_thd_fixup(it);
442			}
443		}
444#endif
445
446		/*
447		 * Put us on the list of threads blocked on this mutex.
448		 */
449		if (TAILQ_EMPTY(&m->mtx_blocked)) {
450			p1 = (struct proc *)(m->mtx_lock & MTX_FLAGMASK);
451			LIST_INSERT_HEAD(&p1->p_contested, m, mtx_contested);
452			TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
453		} else {
454			TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq)
455				if (p1->p_pri.pri_level > p->p_pri.pri_level)
456					break;
457			if (p1)
458				TAILQ_INSERT_BEFORE(p1, p, p_procq);
459			else
460				TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
461		}
462
463		/*
464		 * Save who we're blocked on.
465		 */
466		p->p_blocked = m;
467		p->p_mtxname = m->mtx_description;
468		p->p_stat = SMTX;
469		propagate_priority(p);
470
471		if ((opts & MTX_QUIET) == 0)
472			CTR3(KTR_LOCK,
473			    "_mtx_lock_sleep: p %p blocked on [%p] %s", p, m,
474			    m->mtx_description);
475
476		mi_switch();
477
478		if ((opts & MTX_QUIET) == 0)
479			CTR3(KTR_LOCK,
480			  "_mtx_lock_sleep: p %p free from blocked on [%p] %s",
481			  p, m, m->mtx_description);
482
483		mtx_unlock_spin(&sched_lock);
484	}
485
486	return;
487}
488
489/*
490 * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock.
491 *
492 * This is only called if we need to actually spin for the lock. Recursion
493 * is handled inline.
494 */
495void
496_mtx_lock_spin(struct mtx *m, int opts, critical_t mtx_crit, const char *file,
497	       int line)
498{
499	int i = 0;
500
501	if ((opts & MTX_QUIET) == 0)
502		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
503
504	for (;;) {
505		if (_obtain_lock(m, curproc))
506			break;
507
508		while (m->mtx_lock != MTX_UNOWNED) {
509			if (i++ < 1000000)
510				continue;
511			if (i++ < 6000000)
512				DELAY(1);
513#ifdef DDB
514			else if (!db_active)
515#else
516			else
517#endif
518			panic("spin lock %s held by %p for > 5 seconds",
519			    m->mtx_description, (void *)m->mtx_lock);
520		}
521	}
522
523	m->mtx_savecrit = mtx_crit;
524	if ((opts & MTX_QUIET) == 0)
525		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
526
527	return;
528}
529
530/*
531 * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
532 *
533 * We are only called here if the lock is recursed or contested (i.e. we
534 * need to wake up a blocked thread).
535 */
536void
537_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
538{
539	struct proc *p, *p1;
540	struct mtx *m1;
541	int pri;
542
543	p = curproc;
544
545	if (mtx_recursed(m)) {
546		if (--(m->mtx_recurse) == 0)
547			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
548		if ((opts & MTX_QUIET) == 0)
549			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m);
550		return;
551	}
552
553	mtx_lock_spin(&sched_lock);
554	if ((opts & MTX_QUIET) == 0)
555		CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
556
557	p1 = TAILQ_FIRST(&m->mtx_blocked);
558	MPASS(p->p_magic == P_MAGIC);
559	MPASS(p1->p_magic == P_MAGIC);
560
561	TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq);
562
563	if (TAILQ_EMPTY(&m->mtx_blocked)) {
564		LIST_REMOVE(m, mtx_contested);
565		_release_lock_quick(m);
566		if ((opts & MTX_QUIET) == 0)
567			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p not held", m);
568	} else
569		atomic_store_rel_ptr(&m->mtx_lock, (void *)MTX_CONTESTED);
570
571	pri = PRI_MAX;
572	LIST_FOREACH(m1, &p->p_contested, mtx_contested) {
573		int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_pri.pri_level;
574		if (cp < pri)
575			pri = cp;
576	}
577
578	if (pri > p->p_pri.pri_native)
579		pri = p->p_pri.pri_native;
580	SET_PRIO(p, pri);
581
582	if ((opts & MTX_QUIET) == 0)
583		CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p contested setrunqueue %p",
584		    m, p1);
585
586	p1->p_blocked = NULL;
587	p1->p_stat = SRUN;
588	setrunqueue(p1);
589
590	if ((opts & MTX_NOSWITCH) == 0 && p1->p_pri.pri_level < pri) {
591#ifdef notyet
592		if (p->p_ithd != NULL) {
593			struct ithd *it = p->p_ithd;
594
595			if (it->it_interrupted) {
596				if ((opts & MTX_QUIET) == 0)
597					CTR2(KTR_LOCK,
598				    "_mtx_unlock_sleep: %p interrupted %p",
599					    it, it->it_interrupted);
600				intr_thd_fixup(it);
601			}
602		}
603#endif
604		setrunqueue(p);
605		if ((opts & MTX_QUIET) == 0)
606			CTR2(KTR_LOCK,
607			    "_mtx_unlock_sleep: %p switching out lock=%p", m,
608			    (void *)m->mtx_lock);
609
610		mi_switch();
611		if ((opts & MTX_QUIET) == 0)
612			CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p",
613			    m, (void *)m->mtx_lock);
614	}
615
616	mtx_unlock_spin(&sched_lock);
617
618	return;
619}
620
621/*
622 * All the unlocking of MTX_SPIN locks is done inline.
623 * See the _rel_spin_lock() macro for the details.
624 */
625
626/*
627 * The backing function for the INVARIANTS-enabled mtx_assert()
628 */
629#ifdef INVARIANT_SUPPORT
630void
631_mtx_assert(struct mtx *m, int what, const char *file, int line)
632{
633	switch (what) {
634	case MA_OWNED:
635	case MA_OWNED | MA_RECURSED:
636	case MA_OWNED | MA_NOTRECURSED:
637		if (!mtx_owned(m))
638			panic("mutex %s not owned at %s:%d",
639			    m->mtx_description, file, line);
640		if (mtx_recursed(m)) {
641			if ((what & MA_NOTRECURSED) != 0)
642				panic("mutex %s recursed at %s:%d",
643				    m->mtx_description, file, line);
644		} else if ((what & MA_RECURSED) != 0) {
645			panic("mutex %s unrecursed at %s:%d",
646			    m->mtx_description, file, line);
647		}
648		break;
649	case MA_NOTOWNED:
650		if (mtx_owned(m))
651			panic("mutex %s owned at %s:%d",
652			    m->mtx_description, file, line);
653		break;
654	default:
655		panic("unknown mtx_assert at %s:%d", file, line);
656	}
657}
658#endif
659
660/*
661 * The MUTEX_DEBUG-enabled mtx_validate()
662 */
663#define MV_DESTROY	0	/* validate before destory */
664#define MV_INIT		1	/* validate before init */
665
666#ifdef MUTEX_DEBUG
667
668int mtx_validate __P((struct mtx *, int));
669
670int
671mtx_validate(struct mtx *m, int when)
672{
673	struct mtx *mp;
674	int i;
675	int retval = 0;
676
677#ifdef WITNESS
678	if (witness_cold)
679		return 0;
680#endif
681	if (m == &all_mtx || cold)
682		return 0;
683
684	mtx_lock(&all_mtx);
685/*
686 * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
687 * we can re-enable the kernacc() checks.
688 */
689#ifndef __alpha__
690	MPASS(kernacc((caddr_t)all_mtx.mtx_next, sizeof(uintptr_t),
691	    VM_PROT_READ) == 1);
692#endif
693	MPASS(all_mtx.mtx_next->mtx_prev == &all_mtx);
694	for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
695#ifndef __alpha__
696		if (kernacc((caddr_t)mp->mtx_next, sizeof(uintptr_t),
697		    VM_PROT_READ) != 1) {
698			panic("mtx_validate: mp=%p mp->mtx_next=%p",
699			    mp, mp->mtx_next);
700		}
701#endif
702		i++;
703		if (i > mtx_cur_cnt) {
704			panic("mtx_validate: too many in chain, known=%d\n",
705			    mtx_cur_cnt);
706		}
707	}
708	MPASS(i == mtx_cur_cnt);
709	switch (when) {
710	case MV_DESTROY:
711		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
712			if (mp == m)
713				break;
714		MPASS(mp == m);
715		break;
716	case MV_INIT:
717		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
718		if (mp == m) {
719			/*
720			 * Not good. This mutex already exists.
721			 */
722			printf("re-initing existing mutex %s\n",
723			    m->mtx_description);
724			MPASS(m->mtx_lock == MTX_UNOWNED);
725			retval = 1;
726		}
727	}
728	mtx_unlock(&all_mtx);
729	return (retval);
730}
731#endif
732
733/*
734 * Mutex initialization routine; initialize lock `m' of type contained in
735 * `opts' with options contained in `opts' and description `description.'
736 * Place on "all_mtx" queue.
737 */
738void
739mtx_init(struct mtx *m, const char *description, int opts)
740{
741
742	if ((opts & MTX_QUIET) == 0)
743		CTR2(KTR_LOCK, "mtx_init %p (%s)", m, description);
744
745#ifdef MUTEX_DEBUG
746	/* Diagnostic and error correction */
747	if (mtx_validate(m, MV_INIT))
748		return;
749#endif
750
751	bzero((void *)m, sizeof *m);
752	TAILQ_INIT(&m->mtx_blocked);
753
754#ifdef WITNESS
755	if (!witness_cold) {
756		m->mtx_debug = malloc(sizeof(struct mtx_debug),
757		    M_WITNESS, M_NOWAIT | M_ZERO);
758		MPASS(m->mtx_debug != NULL);
759	}
760#endif
761
762	m->mtx_description = description;
763	m->mtx_flags = opts;
764	m->mtx_lock = MTX_UNOWNED;
765
766	/* Put on all mutex queue */
767	mtx_lock(&all_mtx);
768	m->mtx_next = &all_mtx;
769	m->mtx_prev = all_mtx.mtx_prev;
770	m->mtx_prev->mtx_next = m;
771	all_mtx.mtx_prev = m;
772	if (++mtx_cur_cnt > mtx_max_cnt)
773		mtx_max_cnt = mtx_cur_cnt;
774	mtx_unlock(&all_mtx);
775
776#ifdef WITNESS
777	if (!witness_cold)
778		witness_init(m, opts);
779#endif
780}
781
782/*
783 * Remove lock `m' from all_mtx queue.
784 */
785void
786mtx_destroy(struct mtx *m)
787{
788
789#ifdef WITNESS
790	KASSERT(!witness_cold, ("%s: Cannot destroy while still cold\n",
791	    __FUNCTION__));
792#endif
793
794	CTR2(KTR_LOCK, "mtx_destroy %p (%s)", m, m->mtx_description);
795
796#ifdef MUTEX_DEBUG
797	if (m->mtx_next == NULL)
798		panic("mtx_destroy: %p (%s) already destroyed",
799		    m, m->mtx_description);
800
801	if (!mtx_owned(m)) {
802		MPASS(m->mtx_lock == MTX_UNOWNED);
803	} else {
804		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
805	}
806
807	/* diagnostic */
808	mtx_validate(m, MV_DESTROY);
809#endif
810
811#ifdef WITNESS
812	if (m->mtx_witness)
813		witness_destroy(m);
814#endif /* WITNESS */
815
816	/* Remove from the all mutex queue */
817	mtx_lock(&all_mtx);
818	m->mtx_next->mtx_prev = m->mtx_prev;
819	m->mtx_prev->mtx_next = m->mtx_next;
820
821#ifdef MUTEX_DEBUG
822	m->mtx_next = m->mtx_prev = NULL;
823#endif
824
825#ifdef WITNESS
826	free(m->mtx_debug, M_WITNESS);
827	m->mtx_debug = NULL;
828#endif
829
830	mtx_cur_cnt--;
831	mtx_unlock(&all_mtx);
832}
833
834
835/*
836 * The WITNESS-enabled diagnostic code.
837 */
838#ifdef WITNESS
839static void
840witness_fixup(void *dummy __unused)
841{
842	struct mtx *mp;
843
844	/*
845	 * We have to release Giant before initializing its witness
846	 * structure so that WITNESS doesn't get confused.
847	 */
848	mtx_unlock(&Giant);
849	mtx_assert(&Giant, MA_NOTOWNED);
850
851	mtx_lock(&all_mtx);
852
853	/* Iterate through all mutexes and finish up mutex initialization. */
854	for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
855
856		mp->mtx_debug = malloc(sizeof(struct mtx_debug),
857		    M_WITNESS, M_NOWAIT | M_ZERO);
858		MPASS(mp->mtx_debug != NULL);
859
860		witness_init(mp, mp->mtx_flags);
861	}
862	mtx_unlock(&all_mtx);
863
864	/* Mark the witness code as being ready for use. */
865	atomic_store_rel_int(&witness_cold, 0);
866
867	mtx_lock(&Giant);
868}
869SYSINIT(wtnsfxup, SI_SUB_MUTEX, SI_ORDER_FIRST, witness_fixup, NULL)
870
871#define WITNESS_COUNT 200
872#define	WITNESS_NCHILDREN 2
873
874int witness_watch = 1;
875
876struct witness {
877	struct witness	*w_next;
878	const char	*w_description;
879	const char	*w_file;
880	int		 w_line;
881	struct witness	*w_morechildren;
882	u_char		 w_childcnt;
883	u_char		 w_Giant_squawked:1;
884	u_char		 w_other_squawked:1;
885	u_char		 w_same_squawked:1;
886	u_char		 w_spin:1;	/* MTX_SPIN type mutex. */
887	u_int		 w_level;
888	struct witness	*w_children[WITNESS_NCHILDREN];
889};
890
891struct witness_blessed {
892	char 	*b_lock1;
893	char	*b_lock2;
894};
895
896#ifdef DDB
897/*
898 * When DDB is enabled and witness_ddb is set to 1, it will cause the system to
899 * drop into kdebug() when:
900 *	- a lock heirarchy violation occurs
901 *	- locks are held when going to sleep.
902 */
903int	witness_ddb;
904#ifdef WITNESS_DDB
905TUNABLE_INT_DECL("debug.witness_ddb", 1, witness_ddb);
906#else
907TUNABLE_INT_DECL("debug.witness_ddb", 0, witness_ddb);
908#endif
909SYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, "");
910#endif /* DDB */
911
912int	witness_skipspin;
913#ifdef WITNESS_SKIPSPIN
914TUNABLE_INT_DECL("debug.witness_skipspin", 1, witness_skipspin);
915#else
916TUNABLE_INT_DECL("debug.witness_skipspin", 0, witness_skipspin);
917#endif
918SYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RD, &witness_skipspin, 0,
919    "");
920
921/*
922 * Witness-enabled globals
923 */
924static struct mtx	w_mtx;
925static struct witness	*w_free;
926static struct witness	*w_all;
927static int		 w_inited;
928static int		 witness_dead;	/* fatal error, probably no memory */
929
930static struct witness	 w_data[WITNESS_COUNT];
931
932/*
933 * Internal witness routine prototypes
934 */
935static struct witness *enroll(const char *description, int flag);
936static int itismychild(struct witness *parent, struct witness *child);
937static void removechild(struct witness *parent, struct witness *child);
938static int isitmychild(struct witness *parent, struct witness *child);
939static int isitmydescendant(struct witness *parent, struct witness *child);
940static int dup_ok(struct witness *);
941static int blessed(struct witness *, struct witness *);
942static void
943    witness_displaydescendants(void(*)(const char *fmt, ...), struct witness *);
944static void witness_leveldescendents(struct witness *parent, int level);
945static void witness_levelall(void);
946static struct witness * witness_get(void);
947static void witness_free(struct witness *m);
948
949static char *ignore_list[] = {
950	"witness lock",
951	NULL
952};
953
954static char *spin_order_list[] = {
955#if defined(__i386__) && defined (SMP)
956	"com",
957#endif
958	"sio",
959#ifdef __i386__
960	"cy",
961#endif
962	"ng_node",
963	"ng_worklist",
964	"ithread table lock",
965	"ithread list lock",
966	"sched lock",
967#ifdef __i386__
968	"clk",
969#endif
970	"callout",
971	/*
972	 * leaf locks
973	 */
974#ifdef SMP
975#ifdef __i386__
976	"ap boot",
977	"imen",
978#endif
979	"smp rendezvous",
980#endif
981	NULL
982};
983
984static char *order_list[] = {
985	"Giant", "proctree", "allproc", "process lock", "uidinfo hash",
986	    "uidinfo struct", NULL,
987	NULL
988};
989
990static char *dup_list[] = {
991	"process lock",
992	NULL
993};
994
995static char *sleep_list[] = {
996	"Giant",
997	NULL
998};
999
1000/*
1001 * Pairs of locks which have been blessed
1002 * Don't complain about order problems with blessed locks
1003 */
1004static struct witness_blessed blessed_list[] = {
1005};
1006static int blessed_count =
1007	sizeof(blessed_list) / sizeof(struct witness_blessed);
1008
1009static void
1010witness_init(struct mtx *m, int flag)
1011{
1012	m->mtx_witness = enroll(m->mtx_description, flag);
1013}
1014
1015static void
1016witness_destroy(struct mtx *m)
1017{
1018	struct mtx *m1;
1019	struct proc *p;
1020	p = curproc;
1021	LIST_FOREACH(m1, &p->p_heldmtx, mtx_held) {
1022		if (m1 == m) {
1023			LIST_REMOVE(m, mtx_held);
1024			break;
1025		}
1026	}
1027	return;
1028
1029}
1030
1031static void
1032witness_display(void(*prnt)(const char *fmt, ...))
1033{
1034	struct witness *w, *w1;
1035	int level, found;
1036
1037	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1038	witness_levelall();
1039
1040	/*
1041	 * First, handle sleep mutexes which have been acquired at least
1042	 * once.
1043	 */
1044	prnt("Sleep mutexes:\n");
1045	for (w = w_all; w; w = w->w_next) {
1046		if (w->w_file == NULL || w->w_spin)
1047			continue;
1048		for (w1 = w_all; w1; w1 = w1->w_next) {
1049			if (isitmychild(w1, w))
1050				break;
1051		}
1052		if (w1 != NULL)
1053			continue;
1054		/*
1055		 * This lock has no anscestors, display its descendants.
1056		 */
1057		witness_displaydescendants(prnt, w);
1058	}
1059
1060	/*
1061	 * Now do spin mutexes which have been acquired at least once.
1062	 */
1063	prnt("\nSpin mutexes:\n");
1064	level = 0;
1065	while (level < sizeof(spin_order_list) / sizeof(char *)) {
1066		found = 0;
1067		for (w = w_all; w; w = w->w_next) {
1068			if (w->w_file == NULL || !w->w_spin)
1069				continue;
1070			if (w->w_level == 1 << level) {
1071				witness_displaydescendants(prnt, w);
1072				level++;
1073				found = 1;
1074			}
1075		}
1076		if (found == 0)
1077			level++;
1078	}
1079
1080	/*
1081	 * Finally, any mutexes which have not been acquired yet.
1082	 */
1083	prnt("\nMutexes which were never acquired:\n");
1084	for (w = w_all; w; w = w->w_next) {
1085		if (w->w_file != NULL)
1086			continue;
1087		prnt("%s\n", w->w_description);
1088	}
1089}
1090
1091void
1092witness_enter(struct mtx *m, int flags, const char *file, int line)
1093{
1094	struct witness *w, *w1;
1095	struct mtx *m1;
1096	struct proc *p;
1097	int i;
1098#ifdef DDB
1099	int go_into_ddb = 0;
1100#endif /* DDB */
1101
1102	if (witness_cold || m->mtx_witness == NULL || panicstr)
1103		return;
1104	w = m->mtx_witness;
1105	p = curproc;
1106
1107	if (flags & MTX_SPIN) {
1108		if ((m->mtx_flags & MTX_SPIN) == 0)
1109			panic("mutex_enter: MTX_SPIN on MTX_DEF mutex %s @"
1110			    " %s:%d", m->mtx_description, file, line);
1111		if (mtx_recursed(m)) {
1112			if ((m->mtx_flags & MTX_RECURSE) == 0)
1113				panic("mutex_enter: recursion on non-recursive"
1114				    " mutex %s @ %s:%d", m->mtx_description,
1115				    file, line);
1116			return;
1117		}
1118		mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1119		i = PCPU_GET(witness_spin_check);
1120		if (i != 0 && w->w_level < i) {
1121			mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1122			panic("mutex_enter(%s:%x, MTX_SPIN) out of order @"
1123			    " %s:%d already holding %s:%x",
1124			    m->mtx_description, w->w_level, file, line,
1125			    spin_order_list[ffs(i)-1], i);
1126		}
1127		PCPU_SET(witness_spin_check, i | w->w_level);
1128		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1129		p->p_spinlocks++;
1130		MPASS(p->p_spinlocks > 0);
1131		w->w_file = file;
1132		w->w_line = line;
1133		m->mtx_line = line;
1134		m->mtx_file = file;
1135		return;
1136	}
1137	if ((m->mtx_flags & MTX_SPIN) != 0)
1138		panic("mutex_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1139		    m->mtx_description, file, line);
1140
1141	if (mtx_recursed(m)) {
1142		if ((m->mtx_flags & MTX_RECURSE) == 0)
1143			panic("mutex_enter: recursion on non-recursive"
1144			    " mutex %s @ %s:%d", m->mtx_description,
1145			    file, line);
1146		return;
1147	}
1148	if (witness_dead)
1149		goto out;
1150	if (cold)
1151		goto out;
1152
1153	if (p->p_spinlocks != 0)
1154		panic("blockable mtx_lock() of %s when not legal @ %s:%d",
1155			    m->mtx_description, file, line);
1156	/*
1157	 * Is this the first mutex acquired
1158	 */
1159	if ((m1 = LIST_FIRST(&p->p_heldmtx)) == NULL)
1160		goto out;
1161
1162	if ((w1 = m1->mtx_witness) == w) {
1163		if (w->w_same_squawked || dup_ok(w))
1164			goto out;
1165		w->w_same_squawked = 1;
1166		printf("acquring duplicate lock of same type: \"%s\"\n",
1167			m->mtx_description);
1168		printf(" 1st @ %s:%d\n", w->w_file, w->w_line);
1169		printf(" 2nd @ %s:%d\n", file, line);
1170#ifdef DDB
1171		go_into_ddb = 1;
1172#endif /* DDB */
1173		goto out;
1174	}
1175	MPASS(!mtx_owned(&w_mtx));
1176	mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1177	/*
1178	 * If we have a known higher number just say ok
1179	 */
1180	if (witness_watch > 1 && w->w_level > w1->w_level) {
1181		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1182		goto out;
1183	}
1184	if (isitmydescendant(m1->mtx_witness, w)) {
1185		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1186		goto out;
1187	}
1188	for (i = 0; m1 != NULL; m1 = LIST_NEXT(m1, mtx_held), i++) {
1189
1190		MPASS(i < 200);
1191		w1 = m1->mtx_witness;
1192		if (isitmydescendant(w, w1)) {
1193			mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1194			if (blessed(w, w1))
1195				goto out;
1196			if (m1 == &Giant) {
1197				if (w1->w_Giant_squawked)
1198					goto out;
1199				else
1200					w1->w_Giant_squawked = 1;
1201			} else {
1202				if (w1->w_other_squawked)
1203					goto out;
1204				else
1205					w1->w_other_squawked = 1;
1206			}
1207			printf("lock order reversal\n");
1208			printf(" 1st %s last acquired @ %s:%d\n",
1209			    w->w_description, w->w_file, w->w_line);
1210			printf(" 2nd %p %s @ %s:%d\n",
1211			    m1, w1->w_description, w1->w_file, w1->w_line);
1212			printf(" 3rd %p %s @ %s:%d\n",
1213			    m, w->w_description, file, line);
1214#ifdef DDB
1215			go_into_ddb = 1;
1216#endif /* DDB */
1217			goto out;
1218		}
1219	}
1220	m1 = LIST_FIRST(&p->p_heldmtx);
1221	if (!itismychild(m1->mtx_witness, w))
1222		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1223
1224out:
1225#ifdef DDB
1226	if (witness_ddb && go_into_ddb)
1227		Debugger("witness_enter");
1228#endif /* DDB */
1229	w->w_file = file;
1230	w->w_line = line;
1231	m->mtx_line = line;
1232	m->mtx_file = file;
1233
1234	/*
1235	 * If this pays off it likely means that a mutex being witnessed
1236	 * is acquired in hardclock. Put it in the ignore list. It is
1237	 * likely not the mutex this assert fails on.
1238	 */
1239	MPASS(m->mtx_held.le_prev == NULL);
1240	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
1241}
1242
1243void
1244witness_try_enter(struct mtx *m, int flags, const char *file, int line)
1245{
1246	struct proc *p;
1247	struct witness *w = m->mtx_witness;
1248
1249	if (witness_cold)
1250		return;
1251	if (panicstr)
1252		return;
1253	if (flags & MTX_SPIN) {
1254		if ((m->mtx_flags & MTX_SPIN) == 0)
1255			panic("mutex_try_enter: "
1256			    "MTX_SPIN on MTX_DEF mutex %s @ %s:%d",
1257			    m->mtx_description, file, line);
1258		if (mtx_recursed(m)) {
1259			if ((m->mtx_flags & MTX_RECURSE) == 0)
1260				panic("mutex_try_enter: recursion on"
1261				    " non-recursive mutex %s @ %s:%d",
1262				    m->mtx_description, file, line);
1263			return;
1264		}
1265		mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1266		PCPU_SET(witness_spin_check,
1267		    PCPU_GET(witness_spin_check) | w->w_level);
1268		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1269		w->w_file = file;
1270		w->w_line = line;
1271		m->mtx_line = line;
1272		m->mtx_file = file;
1273		return;
1274	}
1275
1276	if ((m->mtx_flags & MTX_SPIN) != 0)
1277		panic("mutex_try_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1278		    m->mtx_description, file, line);
1279
1280	if (mtx_recursed(m)) {
1281		if ((m->mtx_flags & MTX_RECURSE) == 0)
1282			panic("mutex_try_enter: recursion on non-recursive"
1283			    " mutex %s @ %s:%d", m->mtx_description, file,
1284			    line);
1285		return;
1286	}
1287	w->w_file = file;
1288	w->w_line = line;
1289	m->mtx_line = line;
1290	m->mtx_file = file;
1291	p = curproc;
1292	MPASS(m->mtx_held.le_prev == NULL);
1293	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
1294}
1295
1296void
1297witness_exit(struct mtx *m, int flags, const char *file, int line)
1298{
1299	struct witness *w;
1300	struct proc *p;
1301
1302	if (witness_cold || m->mtx_witness == NULL || panicstr)
1303		return;
1304	w = m->mtx_witness;
1305	p = curproc;
1306
1307	if (flags & MTX_SPIN) {
1308		if ((m->mtx_flags & MTX_SPIN) == 0)
1309			panic("mutex_exit: MTX_SPIN on MTX_DEF mutex %s @"
1310			    " %s:%d", m->mtx_description, file, line);
1311		if (mtx_recursed(m)) {
1312			if ((m->mtx_flags & MTX_RECURSE) == 0)
1313				panic("mutex_exit: recursion on non-recursive"
1314				    " mutex %s @ %s:%d", m->mtx_description,
1315				    file, line);
1316			return;
1317		}
1318		mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1319		PCPU_SET(witness_spin_check,
1320		    PCPU_GET(witness_spin_check) & ~w->w_level);
1321		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1322		MPASS(p->p_spinlocks > 0);
1323		p->p_spinlocks--;
1324		return;
1325	}
1326	if ((m->mtx_flags & MTX_SPIN) != 0)
1327		panic("mutex_exit: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1328		    m->mtx_description, file, line);
1329
1330	if (mtx_recursed(m)) {
1331		if ((m->mtx_flags & MTX_RECURSE) == 0)
1332			panic("mutex_exit: recursion on non-recursive"
1333			    " mutex %s @ %s:%d", m->mtx_description,
1334			    file, line);
1335		return;
1336	}
1337
1338	if ((flags & MTX_NOSWITCH) == 0 && p->p_spinlocks != 0 && !cold)
1339		panic("switchable mtx_unlock() of %s when not legal @ %s:%d",
1340			    m->mtx_description, file, line);
1341	LIST_REMOVE(m, mtx_held);
1342	m->mtx_held.le_prev = NULL;
1343}
1344
1345int
1346witness_sleep(int check_only, struct mtx *mtx, const char *file, int line)
1347{
1348	struct mtx *m;
1349	struct proc *p;
1350	char **sleep;
1351	int n = 0;
1352
1353	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1354	p = curproc;
1355	LIST_FOREACH(m, &p->p_heldmtx, mtx_held) {
1356		if (m == mtx)
1357			continue;
1358		for (sleep = sleep_list; *sleep!= NULL; sleep++)
1359			if (strcmp(m->mtx_description, *sleep) == 0)
1360				goto next;
1361		if (n == 0)
1362			printf("Whee!\n");
1363		printf("%s:%d: %s with \"%s\" locked from %s:%d\n",
1364			file, line, check_only ? "could sleep" : "sleeping",
1365			m->mtx_description,
1366			m->mtx_witness->w_file, m->mtx_witness->w_line);
1367		n++;
1368	next:
1369	}
1370#ifdef DDB
1371	if (witness_ddb && n)
1372		Debugger("witness_sleep");
1373#endif /* DDB */
1374	return (n);
1375}
1376
1377static struct witness *
1378enroll(const char *description, int flag)
1379{
1380	int i;
1381	struct witness *w, *w1;
1382	char **ignore;
1383	char **order;
1384
1385	if (!witness_watch)
1386		return (NULL);
1387	for (ignore = ignore_list; *ignore != NULL; ignore++)
1388		if (strcmp(description, *ignore) == 0)
1389			return (NULL);
1390
1391	if (w_inited == 0) {
1392		mtx_init(&w_mtx, "witness lock", MTX_SPIN);
1393		for (i = 0; i < WITNESS_COUNT; i++) {
1394			w = &w_data[i];
1395			witness_free(w);
1396		}
1397		w_inited = 1;
1398		for (order = order_list; *order != NULL; order++) {
1399			w = enroll(*order, MTX_DEF);
1400			w->w_file = "order list";
1401			for (order++; *order != NULL; order++) {
1402				w1 = enroll(*order, MTX_DEF);
1403				w1->w_file = "order list";
1404				itismychild(w, w1);
1405				w = w1;
1406    	    	    	}
1407		}
1408	}
1409	if ((flag & MTX_SPIN) && witness_skipspin)
1410		return (NULL);
1411	mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1412	for (w = w_all; w; w = w->w_next) {
1413		if (strcmp(description, w->w_description) == 0) {
1414			mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1415			return (w);
1416		}
1417	}
1418	if ((w = witness_get()) == NULL)
1419		return (NULL);
1420	w->w_next = w_all;
1421	w_all = w;
1422	w->w_description = description;
1423	mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1424	if (flag & MTX_SPIN) {
1425		w->w_spin = 1;
1426
1427		i = 1;
1428		for (order = spin_order_list; *order != NULL; order++) {
1429			if (strcmp(description, *order) == 0)
1430				break;
1431			i <<= 1;
1432		}
1433		if (*order == NULL)
1434			panic("spin lock %s not in order list", description);
1435		w->w_level = i;
1436	}
1437
1438	return (w);
1439}
1440
1441static int
1442itismychild(struct witness *parent, struct witness *child)
1443{
1444	static int recursed;
1445
1446	/*
1447	 * Insert "child" after "parent"
1448	 */
1449	while (parent->w_morechildren)
1450		parent = parent->w_morechildren;
1451
1452	if (parent->w_childcnt == WITNESS_NCHILDREN) {
1453		if ((parent->w_morechildren = witness_get()) == NULL)
1454			return (1);
1455		parent = parent->w_morechildren;
1456	}
1457	MPASS(child != NULL);
1458	parent->w_children[parent->w_childcnt++] = child;
1459	/*
1460	 * now prune whole tree
1461	 */
1462	if (recursed)
1463		return (0);
1464	recursed = 1;
1465	for (child = w_all; child != NULL; child = child->w_next) {
1466		for (parent = w_all; parent != NULL;
1467		    parent = parent->w_next) {
1468			if (!isitmychild(parent, child))
1469				continue;
1470			removechild(parent, child);
1471			if (isitmydescendant(parent, child))
1472				continue;
1473			itismychild(parent, child);
1474		}
1475	}
1476	recursed = 0;
1477	witness_levelall();
1478	return (0);
1479}
1480
1481static void
1482removechild(struct witness *parent, struct witness *child)
1483{
1484	struct witness *w, *w1;
1485	int i;
1486
1487	for (w = parent; w != NULL; w = w->w_morechildren)
1488		for (i = 0; i < w->w_childcnt; i++)
1489			if (w->w_children[i] == child)
1490				goto found;
1491	return;
1492found:
1493	for (w1 = w; w1->w_morechildren != NULL; w1 = w1->w_morechildren)
1494		continue;
1495	w->w_children[i] = w1->w_children[--w1->w_childcnt];
1496	MPASS(w->w_children[i] != NULL);
1497
1498	if (w1->w_childcnt != 0)
1499		return;
1500
1501	if (w1 == parent)
1502		return;
1503	for (w = parent; w->w_morechildren != w1; w = w->w_morechildren)
1504		continue;
1505	w->w_morechildren = 0;
1506	witness_free(w1);
1507}
1508
1509static int
1510isitmychild(struct witness *parent, struct witness *child)
1511{
1512	struct witness *w;
1513	int i;
1514
1515	for (w = parent; w != NULL; w = w->w_morechildren) {
1516		for (i = 0; i < w->w_childcnt; i++) {
1517			if (w->w_children[i] == child)
1518				return (1);
1519		}
1520	}
1521	return (0);
1522}
1523
1524static int
1525isitmydescendant(struct witness *parent, struct witness *child)
1526{
1527	struct witness *w;
1528	int i;
1529	int j;
1530
1531	for (j = 0, w = parent; w != NULL; w = w->w_morechildren, j++) {
1532		MPASS(j < 1000);
1533		for (i = 0; i < w->w_childcnt; i++) {
1534			if (w->w_children[i] == child)
1535				return (1);
1536		}
1537		for (i = 0; i < w->w_childcnt; i++) {
1538			if (isitmydescendant(w->w_children[i], child))
1539				return (1);
1540		}
1541	}
1542	return (0);
1543}
1544
1545void
1546witness_levelall (void)
1547{
1548	struct witness *w, *w1;
1549
1550	for (w = w_all; w; w = w->w_next)
1551		if (!(w->w_spin))
1552			w->w_level = 0;
1553	for (w = w_all; w; w = w->w_next) {
1554		if (w->w_spin)
1555			continue;
1556		for (w1 = w_all; w1; w1 = w1->w_next) {
1557			if (isitmychild(w1, w))
1558				break;
1559		}
1560		if (w1 != NULL)
1561			continue;
1562		witness_leveldescendents(w, 0);
1563	}
1564}
1565
1566static void
1567witness_leveldescendents(struct witness *parent, int level)
1568{
1569	int i;
1570	struct witness *w;
1571
1572	if (parent->w_level < level)
1573		parent->w_level = level;
1574	level++;
1575	for (w = parent; w != NULL; w = w->w_morechildren)
1576		for (i = 0; i < w->w_childcnt; i++)
1577			witness_leveldescendents(w->w_children[i], level);
1578}
1579
1580static void
1581witness_displaydescendants(void(*prnt)(const char *fmt, ...),
1582			   struct witness *parent)
1583{
1584	struct witness *w;
1585	int i;
1586	int level;
1587
1588	level = parent->w_spin ? ffs(parent->w_level) : parent->w_level;
1589
1590	prnt("%d", level);
1591	if (level < 10)
1592		prnt(" ");
1593	for (i = 0; i < level; i++)
1594		prnt(" ");
1595	prnt("%s", parent->w_description);
1596	if (parent->w_file != NULL)
1597		prnt(" -- last acquired @ %s:%d\n", parent->w_file,
1598		    parent->w_line);
1599
1600	for (w = parent; w != NULL; w = w->w_morechildren)
1601		for (i = 0; i < w->w_childcnt; i++)
1602			    witness_displaydescendants(prnt, w->w_children[i]);
1603    }
1604
1605static int
1606dup_ok(struct witness *w)
1607{
1608	char **dup;
1609
1610	for (dup = dup_list; *dup!= NULL; dup++)
1611		if (strcmp(w->w_description, *dup) == 0)
1612			return (1);
1613	return (0);
1614}
1615
1616static int
1617blessed(struct witness *w1, struct witness *w2)
1618{
1619	int i;
1620	struct witness_blessed *b;
1621
1622	for (i = 0; i < blessed_count; i++) {
1623		b = &blessed_list[i];
1624		if (strcmp(w1->w_description, b->b_lock1) == 0) {
1625			if (strcmp(w2->w_description, b->b_lock2) == 0)
1626				return (1);
1627			continue;
1628		}
1629		if (strcmp(w1->w_description, b->b_lock2) == 0)
1630			if (strcmp(w2->w_description, b->b_lock1) == 0)
1631				return (1);
1632	}
1633	return (0);
1634}
1635
1636static struct witness *
1637witness_get()
1638{
1639	struct witness *w;
1640
1641	if ((w = w_free) == NULL) {
1642		witness_dead = 1;
1643		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1644		printf("witness exhausted\n");
1645		return (NULL);
1646	}
1647	w_free = w->w_next;
1648	bzero(w, sizeof(*w));
1649	return (w);
1650}
1651
1652static void
1653witness_free(struct witness *w)
1654{
1655	w->w_next = w_free;
1656	w_free = w;
1657}
1658
1659int
1660witness_list(struct proc *p)
1661{
1662	struct mtx *m;
1663	int nheld;
1664
1665	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1666	nheld = 0;
1667	LIST_FOREACH(m, &p->p_heldmtx, mtx_held) {
1668		printf("\t\"%s\" (%p) locked at %s:%d\n",
1669		    m->mtx_description, m,
1670		    m->mtx_witness->w_file, m->mtx_witness->w_line);
1671		nheld++;
1672	}
1673
1674	return (nheld);
1675}
1676
1677#ifdef DDB
1678
1679DB_SHOW_COMMAND(mutexes, db_witness_list)
1680{
1681
1682	witness_list(curproc);
1683}
1684
1685DB_SHOW_COMMAND(witness, db_witness_display)
1686{
1687
1688	witness_display(db_printf);
1689}
1690#endif
1691
1692void
1693witness_save(struct mtx *m, const char **filep, int *linep)
1694{
1695
1696	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1697	if (m->mtx_witness == NULL)
1698		return;
1699
1700	*filep = m->mtx_witness->w_file;
1701	*linep = m->mtx_witness->w_line;
1702}
1703
1704void
1705witness_restore(struct mtx *m, const char *file, int line)
1706{
1707
1708	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1709	if (m->mtx_witness == NULL)
1710		return;
1711
1712	m->mtx_witness->w_file = file;
1713	m->mtx_witness->w_line = line;
1714}
1715
1716#endif	/* WITNESS */
1717