subr_witness.c revision 73033
165557Sjasone/*-
265557Sjasone * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
365557Sjasone *
465557Sjasone * Redistribution and use in source and binary forms, with or without
565557Sjasone * modification, are permitted provided that the following conditions
665557Sjasone * are met:
765557Sjasone * 1. Redistributions of source code must retain the above copyright
865557Sjasone *    notice, this list of conditions and the following disclaimer.
965557Sjasone * 2. Redistributions in binary form must reproduce the above copyright
1065557Sjasone *    notice, this list of conditions and the following disclaimer in the
1165557Sjasone *    documentation and/or other materials provided with the distribution.
1265557Sjasone * 3. Berkeley Software Design Inc's name may not be used to endorse or
1365557Sjasone *    promote products derived from this software without specific prior
1465557Sjasone *    written permission.
1565557Sjasone *
1665557Sjasone * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
1765557Sjasone * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1865557Sjasone * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1965557Sjasone * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
2065557Sjasone * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2165557Sjasone * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2265557Sjasone * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2365557Sjasone * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2465557Sjasone * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2565557Sjasone * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2665557Sjasone * SUCH DAMAGE.
2765557Sjasone *
2865557Sjasone *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
2967352Sjhb *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
3065557Sjasone * $FreeBSD: head/sys/kern/subr_witness.c 73033 2001-02-25 16:18:13Z jake $
3165557Sjasone */
3265557Sjasone
3365557Sjasone/*
3472200Sbmilekic * Machine independent bits of mutex implementation and implementation of
3572200Sbmilekic * `witness' structure & related debugging routines.
3672200Sbmilekic */
3772200Sbmilekic
3872200Sbmilekic/*
3965557Sjasone *	Main Entry: witness
4065557Sjasone *	Pronunciation: 'wit-n&s
4165557Sjasone *	Function: noun
4265557Sjasone *	Etymology: Middle English witnesse, from Old English witnes knowledge,
4365557Sjasone *	    testimony, witness, from 2wit
4465557Sjasone *	Date: before 12th century
4565557Sjasone *	1 : attestation of a fact or event : TESTIMONY
4665557Sjasone *	2 : one that gives evidence; specifically : one who testifies in
4765557Sjasone *	    a cause or before a judicial tribunal
4865557Sjasone *	3 : one asked to be present at a transaction so as to be able to
4965557Sjasone *	    testify to its having taken place
5065557Sjasone *	4 : one who has personal knowledge of something
5165557Sjasone *	5 a : something serving as evidence or proof : SIGN
5265557Sjasone *	  b : public affirmation by word or example of usually
5365557Sjasone *	      religious faith or conviction <the heroic witness to divine
5465557Sjasone *	      life -- Pilot>
5565557Sjasone *	6 capitalized : a member of the Jehovah's Witnesses
5665557Sjasone */
5765557Sjasone
5868790Sjhb#include "opt_ddb.h"
5967676Sjhb#include "opt_witness.h"
6067676Sjhb
6165557Sjasone#include <sys/param.h>
6267352Sjhb#include <sys/bus.h>
6367352Sjhb#include <sys/kernel.h>
6467352Sjhb#include <sys/malloc.h>
6565557Sjasone#include <sys/proc.h>
6667676Sjhb#include <sys/sysctl.h>
6765557Sjasone#include <sys/systm.h>
6867352Sjhb#include <sys/vmmeter.h>
6965557Sjasone#include <sys/ktr.h>
7065557Sjasone
7167352Sjhb#include <machine/atomic.h>
7267352Sjhb#include <machine/bus.h>
7367352Sjhb#include <machine/clock.h>
7465557Sjasone#include <machine/cpu.h>
7567352Sjhb
7668790Sjhb#include <ddb/ddb.h>
7768790Sjhb
7867352Sjhb#include <vm/vm.h>
7967352Sjhb#include <vm/vm_extern.h>
8067352Sjhb
8167352Sjhb#include <sys/mutex.h>
8265557Sjasone
8365557Sjasone/*
8472200Sbmilekic * The WITNESS-enabled mutex debug structure.
8567352Sjhb */
8671352Sjasone#ifdef WITNESS
8771352Sjasonestruct mtx_debug {
8871352Sjasone	struct witness	*mtxd_witness;
8971352Sjasone	LIST_ENTRY(mtx)	mtxd_held;
9071352Sjasone	const char	*mtxd_file;
9171352Sjasone	int		mtxd_line;
9271352Sjasone};
9371352Sjasone
9471560Sjhb#define mtx_held	mtx_debug->mtxd_held
9571560Sjhb#define	mtx_file	mtx_debug->mtxd_file
9671560Sjhb#define	mtx_line	mtx_debug->mtxd_line
9771560Sjhb#define	mtx_witness	mtx_debug->mtxd_witness
9871352Sjasone#endif	/* WITNESS */
9971352Sjasone
10071352Sjasone/*
10172200Sbmilekic * Internal utility macros.
10271352Sjasone */
10372200Sbmilekic#define mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
10471352Sjasone
10572200Sbmilekic#define mtx_owner(m)	(mtx_unowned((m)) ? NULL \
10672200Sbmilekic	: (struct proc *)((m)->mtx_lock & MTX_FLAGMASK))
10771352Sjasone
10872376Sjake#define SET_PRIO(p, pri)	(p)->p_pri.pri_level = (pri)
10971352Sjasone
11071352Sjasone/*
11172200Sbmilekic * Early WITNESS-enabled declarations.
11271352Sjasone */
11372200Sbmilekic#ifdef WITNESS
11471352Sjasone
11571352Sjasone/*
11672200Sbmilekic * Internal WITNESS routines which must be prototyped early.
11772200Sbmilekic *
11872200Sbmilekic * XXX: When/if witness code is cleaned up, it would be wise to place all
11972200Sbmilekic *	witness prototyping early in this file.
12072200Sbmilekic */
12172200Sbmilekicstatic void witness_init(struct mtx *, int flag);
12272200Sbmilekicstatic void witness_destroy(struct mtx *);
12372200Sbmilekicstatic void witness_display(void(*)(const char *fmt, ...));
12471352Sjasone
12572200SbmilekicMALLOC_DEFINE(M_WITNESS, "witness", "witness mtx_debug structure");
12671352Sjasone
12767352Sjhb/* All mutexes in system (used for debug/panic) */
12871560Sjhbstatic struct mtx_debug all_mtx_debug = { NULL, {NULL, NULL}, NULL, 0 };
12972200Sbmilekic
13071320Sjasone/*
13172200Sbmilekic * This global is set to 0 once it becomes safe to use the witness code.
13271320Sjasone */
13371320Sjasonestatic int witness_cold = 1;
13472200Sbmilekic
13569429Sjhb#else	/* WITNESS */
13671352Sjasone
13772200Sbmilekic/* XXX XXX XXX
13872200Sbmilekic * flag++ is sleazoid way of shuting up warning
13971352Sjasone */
14071352Sjasone#define witness_init(m, flag) flag++
14171352Sjasone#define witness_destroy(m)
14271352Sjasone#define witness_try_enter(m, t, f, l)
14369429Sjhb#endif	/* WITNESS */
14467352Sjhb
14572200Sbmilekic/*
14672200Sbmilekic * All mutex locks in system are kept on the all_mtx list.
14772200Sbmilekic */
14871560Sjhbstatic struct mtx all_mtx = { MTX_UNOWNED, 0, 0, 0, "All mutexes queue head",
14971560Sjhb	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
15071560Sjhb	{ NULL, NULL }, &all_mtx, &all_mtx,
15171560Sjhb#ifdef WITNESS
15271560Sjhb	&all_mtx_debug
15371560Sjhb#else
15471560Sjhb	NULL
15571560Sjhb#endif
15671560Sjhb	 };
15771560Sjhb
15872200Sbmilekic/*
15972200Sbmilekic * Global variables for book keeping.
16072200Sbmilekic */
16167352Sjhbstatic int	mtx_cur_cnt;
16267352Sjhbstatic int	mtx_max_cnt;
16367352Sjhb
16472200Sbmilekic/*
16572344Sbmilekic * Couple of strings for KTR_LOCK tracing in order to avoid duplicates.
16672344Sbmilekic */
16772344Sbmilekicchar	STR_mtx_lock_slp[] = "GOT (sleep) %s [%p] r=%d at %s:%d";
16872344Sbmilekicchar	STR_mtx_unlock_slp[] = "REL (sleep) %s [%p] r=%d at %s:%d";
16972344Sbmilekicchar	STR_mtx_lock_spn[] = "GOT (spin) %s [%p] r=%d at %s:%d";
17072344Sbmilekicchar	STR_mtx_unlock_spn[] = "REL (spin) %s [%p] r=%d at %s:%d";
17172344Sbmilekic
17272344Sbmilekic/*
17372200Sbmilekic * Prototypes for non-exported routines.
17472200Sbmilekic *
17572200Sbmilekic * NOTE: Prototypes for witness routines are placed at the bottom of the file.
17672200Sbmilekic */
17771352Sjasonestatic void	propagate_priority(struct proc *);
17867352Sjhb
17967352Sjhbstatic void
18067352Sjhbpropagate_priority(struct proc *p)
18167352Sjhb{
18272376Sjake	int pri = p->p_pri.pri_level;
18367352Sjhb	struct mtx *m = p->p_blocked;
18467352Sjhb
18569376Sjhb	mtx_assert(&sched_lock, MA_OWNED);
18667352Sjhb	for (;;) {
18767352Sjhb		struct proc *p1;
18867352Sjhb
18967352Sjhb		p = mtx_owner(m);
19067352Sjhb
19167352Sjhb		if (p == NULL) {
19267352Sjhb			/*
19367352Sjhb			 * This really isn't quite right. Really
19467352Sjhb			 * ought to bump priority of process that
19567352Sjhb			 * next acquires the mutex.
19667352Sjhb			 */
19767352Sjhb			MPASS(m->mtx_lock == MTX_CONTESTED);
19867352Sjhb			return;
19967352Sjhb		}
20072200Sbmilekic
20167352Sjhb		MPASS(p->p_magic == P_MAGIC);
20269376Sjhb		KASSERT(p->p_stat != SSLEEP, ("sleeping process owns a mutex"));
20372376Sjake		if (p->p_pri.pri_level <= pri)
20467352Sjhb			return;
20569376Sjhb
20667352Sjhb		/*
20769376Sjhb		 * Bump this process' priority.
20869376Sjhb		 */
20969376Sjhb		SET_PRIO(p, pri);
21069376Sjhb
21169376Sjhb		/*
21267352Sjhb		 * If lock holder is actually running, just bump priority.
21367352Sjhb		 */
21472836Sjhb		if (p->p_oncpu != NOCPU) {
21567352Sjhb			MPASS(p->p_stat == SRUN || p->p_stat == SZOMB);
21667352Sjhb			return;
21767352Sjhb		}
21872376Sjake
21967352Sjhb		/*
22067352Sjhb		 * If on run queue move to new run queue, and
22167352Sjhb		 * quit.
22267352Sjhb		 */
22367352Sjhb		if (p->p_stat == SRUN) {
22467352Sjhb			MPASS(p->p_blocked == NULL);
22567352Sjhb			remrunqueue(p);
22667352Sjhb			setrunqueue(p);
22767352Sjhb			return;
22867352Sjhb		}
22967352Sjhb
23067352Sjhb		/*
23169376Sjhb		 * If we aren't blocked on a mutex, we should be.
23267352Sjhb		 */
23369376Sjhb		KASSERT(p->p_stat == SMTX, (
23469376Sjhb		    "process %d(%s):%d holds %s but isn't blocked on a mutex\n",
23569376Sjhb		    p->p_pid, p->p_comm, p->p_stat,
23669376Sjhb		    m->mtx_description));
23767352Sjhb
23867352Sjhb		/*
23967352Sjhb		 * Pick up the mutex that p is blocked on.
24067352Sjhb		 */
24167352Sjhb		m = p->p_blocked;
24267352Sjhb		MPASS(m != NULL);
24367352Sjhb
24467352Sjhb		/*
24567352Sjhb		 * Check if the proc needs to be moved up on
24667352Sjhb		 * the blocked chain
24767352Sjhb		 */
24869376Sjhb		if (p == TAILQ_FIRST(&m->mtx_blocked)) {
24969376Sjhb			continue;
25069376Sjhb		}
25172200Sbmilekic
25272376Sjake		p1 = TAILQ_PREV(p, procqueue, p_procq);
25372376Sjake		if (p1->p_pri.pri_level <= pri) {
25467352Sjhb			continue;
25567352Sjhb		}
25667352Sjhb
25767352Sjhb		/*
25869376Sjhb		 * Remove proc from blocked chain and determine where
25969376Sjhb		 * it should be moved up to.  Since we know that p1 has
26069376Sjhb		 * a lower priority than p, we know that at least one
26169376Sjhb		 * process in the chain has a lower priority and that
26269376Sjhb		 * p1 will thus not be NULL after the loop.
26367352Sjhb		 */
26467352Sjhb		TAILQ_REMOVE(&m->mtx_blocked, p, p_procq);
26567352Sjhb		TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) {
26667352Sjhb			MPASS(p1->p_magic == P_MAGIC);
26772376Sjake			if (p1->p_pri.pri_level > pri)
26867352Sjhb				break;
26967352Sjhb		}
27072200Sbmilekic
27169376Sjhb		MPASS(p1 != NULL);
27269376Sjhb		TAILQ_INSERT_BEFORE(p1, p, p_procq);
27367352Sjhb		CTR4(KTR_LOCK,
27471560Sjhb		    "propagate_priority: p %p moved before %p on [%p] %s",
27567352Sjhb		    p, p1, m, m->mtx_description);
27667352Sjhb	}
27767352Sjhb}
27867352Sjhb
27971352Sjasone/*
28072200Sbmilekic * The important part of mtx_trylock{,_flags}()
28172200Sbmilekic * Tries to acquire lock `m.' We do NOT handle recursion here; we assume that
28272200Sbmilekic * if we're called, it's because we know we don't already own this lock.
28371352Sjasone */
28472200Sbmilekicint
28572200Sbmilekic_mtx_trylock(struct mtx *m, int opts, const char *file, int line)
28671352Sjasone{
28772200Sbmilekic	int rval;
28871352Sjasone
28972393Sbmilekic	MPASS(curproc != NULL);
29071352Sjasone
29172200Sbmilekic	/*
29272200Sbmilekic	 * _mtx_trylock does not accept MTX_NOSWITCH option.
29372200Sbmilekic	 */
29472344Sbmilekic	KASSERT((opts & MTX_NOSWITCH) == 0,
29572344Sbmilekic	    ("mtx_trylock() called with invalid option flag(s) %d", opts));
29672200Sbmilekic
29772393Sbmilekic	rval = _obtain_lock(m, curproc);
29872200Sbmilekic
29972200Sbmilekic#ifdef WITNESS
30072200Sbmilekic	if (rval && m->mtx_witness != NULL) {
30171352Sjasone		/*
30272200Sbmilekic		 * We do not handle recursion in _mtx_trylock; see the
30372200Sbmilekic		 * note at the top of the routine.
30471352Sjasone		 */
30572344Sbmilekic		KASSERT(!mtx_recursed(m),
30672344Sbmilekic		    ("mtx_trylock() called on a recursed mutex"));
30772200Sbmilekic		witness_try_enter(m, (opts | m->mtx_flags), file, line);
30871352Sjasone	}
30972200Sbmilekic#endif	/* WITNESS */
31071352Sjasone
31172200Sbmilekic	if ((opts & MTX_QUIET) == 0)
31272994Sjhb		CTR5(KTR_LOCK, "TRY_LOCK %s [%p] result=%d at %s:%d",
31372200Sbmilekic		    m->mtx_description, m, rval, file, line);
31472200Sbmilekic
31572200Sbmilekic	return rval;
31671352Sjasone}
31771352Sjasone
31871352Sjasone/*
31972200Sbmilekic * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
32071352Sjasone *
32172200Sbmilekic * We call this if the lock is either contested (i.e. we need to go to
32272200Sbmilekic * sleep waiting for it), or if we need to recurse on it.
32371352Sjasone */
32472200Sbmilekicvoid
32572200Sbmilekic_mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line)
32671352Sjasone{
32772393Sbmilekic	struct proc *p = curproc;
32871352Sjasone
32972200Sbmilekic	if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)p) {
33072200Sbmilekic		m->mtx_recurse++;
33172200Sbmilekic		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
33272200Sbmilekic		if ((opts & MTX_QUIET) == 0)
33372344Sbmilekic			CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
33472200Sbmilekic		return;
33571352Sjasone	}
33671352Sjasone
33772200Sbmilekic	if ((opts & MTX_QUIET) == 0)
33872994Sjhb		CTR4(KTR_LOCK,
33972994Sjhb		    "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d",
34072994Sjhb		    m->mtx_description, (void *)m->mtx_lock, file, line);
34171352Sjasone
34272200Sbmilekic	/*
34372200Sbmilekic	 * Save our priority. Even though p_nativepri is protected by
34472200Sbmilekic	 * sched_lock, we don't obtain it here as it can be expensive.
34572200Sbmilekic	 * Since this is the only place p_nativepri is set, and since two
34672200Sbmilekic	 * CPUs will not be executing the same process concurrently, we know
34772200Sbmilekic	 * that no other CPU is going to be messing with this. Also,
34872200Sbmilekic	 * p_nativepri is only read when we are blocked on a mutex, so that
34972200Sbmilekic	 * can't be happening right now either.
35072200Sbmilekic	 */
35172376Sjake	p->p_pri.pri_native = p->p_pri.pri_level;
35271352Sjasone
35372200Sbmilekic	while (!_obtain_lock(m, p)) {
35472200Sbmilekic		uintptr_t v;
35572200Sbmilekic		struct proc *p1;
35671352Sjasone
35772200Sbmilekic		mtx_lock_spin(&sched_lock);
35872200Sbmilekic		/*
35972200Sbmilekic		 * Check if the lock has been released while spinning for
36072200Sbmilekic		 * the sched_lock.
36172200Sbmilekic		 */
36272200Sbmilekic		if ((v = m->mtx_lock) == MTX_UNOWNED) {
36372200Sbmilekic			mtx_unlock_spin(&sched_lock);
36472200Sbmilekic			continue;
36571352Sjasone		}
36671352Sjasone
36772200Sbmilekic		/*
36872200Sbmilekic		 * The mutex was marked contested on release. This means that
36972200Sbmilekic		 * there are processes blocked on it.
37072200Sbmilekic		 */
37172200Sbmilekic		if (v == MTX_CONTESTED) {
37272200Sbmilekic			p1 = TAILQ_FIRST(&m->mtx_blocked);
37372344Sbmilekic			MPASS(p1 != NULL);
37472200Sbmilekic			m->mtx_lock = (uintptr_t)p | MTX_CONTESTED;
37567352Sjhb
37672376Sjake			if (p1->p_pri.pri_level < p->p_pri.pri_level)
37772376Sjake				SET_PRIO(p, p1->p_pri.pri_level);
37872200Sbmilekic			mtx_unlock_spin(&sched_lock);
37967352Sjhb			return;
38067352Sjhb		}
38169376Sjhb
38269376Sjhb		/*
38372200Sbmilekic		 * If the mutex isn't already contested and a failure occurs
38472200Sbmilekic		 * setting the contested bit, the mutex was either released
38572200Sbmilekic		 * or the state of the MTX_RECURSED bit changed.
38669376Sjhb		 */
38772200Sbmilekic		if ((v & MTX_CONTESTED) == 0 &&
38872200Sbmilekic		    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
38972200Sbmilekic			(void *)(v | MTX_CONTESTED))) {
39072200Sbmilekic			mtx_unlock_spin(&sched_lock);
39172200Sbmilekic			continue;
39272200Sbmilekic		}
39367352Sjhb
39472200Sbmilekic		/*
39572200Sbmilekic		 * We deffinately must sleep for this lock.
39672200Sbmilekic		 */
39772200Sbmilekic		mtx_assert(m, MA_NOTOWNED);
39867352Sjhb
39967352Sjhb#ifdef notyet
40072200Sbmilekic		/*
40172200Sbmilekic		 * If we're borrowing an interrupted thread's VM context, we
40272200Sbmilekic		 * must clean up before going to sleep.
40372200Sbmilekic		 */
40472994Sjhb		if (p->p_ithd != NULL) {
40572994Sjhb			struct ithd *it = p->p_ithd;
40667352Sjhb
40772200Sbmilekic			if (it->it_interrupted) {
40872200Sbmilekic				if ((opts & MTX_QUIET) == 0)
40972200Sbmilekic					CTR2(KTR_LOCK,
41072994Sjhb				    "_mtx_lock_sleep: %p interrupted %p",
41172200Sbmilekic					    it, it->it_interrupted);
41272200Sbmilekic				intr_thd_fixup(it);
41367352Sjhb			}
41472200Sbmilekic		}
41567352Sjhb#endif
41667352Sjhb
41772200Sbmilekic		/*
41872200Sbmilekic		 * Put us on the list of threads blocked on this mutex.
41972200Sbmilekic		 */
42072200Sbmilekic		if (TAILQ_EMPTY(&m->mtx_blocked)) {
42172200Sbmilekic			p1 = (struct proc *)(m->mtx_lock & MTX_FLAGMASK);
42272200Sbmilekic			LIST_INSERT_HEAD(&p1->p_contested, m, mtx_contested);
42372200Sbmilekic			TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
42472200Sbmilekic		} else {
42572200Sbmilekic			TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq)
42672376Sjake				if (p1->p_pri.pri_level > p->p_pri.pri_level)
42772200Sbmilekic					break;
42872200Sbmilekic			if (p1)
42972200Sbmilekic				TAILQ_INSERT_BEFORE(p1, p, p_procq);
43072200Sbmilekic			else
43167352Sjhb				TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
43272200Sbmilekic		}
43367352Sjhb
43472200Sbmilekic		/*
43572200Sbmilekic		 * Save who we're blocked on.
43672200Sbmilekic		 */
43772200Sbmilekic		p->p_blocked = m;
43872200Sbmilekic		p->p_mtxname = m->mtx_description;
43972200Sbmilekic		p->p_stat = SMTX;
44072200Sbmilekic		propagate_priority(p);
44167352Sjhb
44272200Sbmilekic		if ((opts & MTX_QUIET) == 0)
44372200Sbmilekic			CTR3(KTR_LOCK,
44472200Sbmilekic			    "_mtx_lock_sleep: p %p blocked on [%p] %s", p, m,
44572200Sbmilekic			    m->mtx_description);
44672200Sbmilekic
44772200Sbmilekic		mi_switch();
44872200Sbmilekic
44972200Sbmilekic		if ((opts & MTX_QUIET) == 0)
45072200Sbmilekic			CTR3(KTR_LOCK,
45172200Sbmilekic			  "_mtx_lock_sleep: p %p free from blocked on [%p] %s",
45272200Sbmilekic			  p, m, m->mtx_description);
45372200Sbmilekic
45472200Sbmilekic		mtx_unlock_spin(&sched_lock);
45572200Sbmilekic	}
45672200Sbmilekic
45772200Sbmilekic	return;
45872200Sbmilekic}
45972200Sbmilekic
46072200Sbmilekic/*
46172200Sbmilekic * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock.
46272200Sbmilekic *
46372200Sbmilekic * This is only called if we need to actually spin for the lock. Recursion
46472200Sbmilekic * is handled inline.
46572200Sbmilekic */
46672200Sbmilekicvoid
46772200Sbmilekic_mtx_lock_spin(struct mtx *m, int opts, u_int mtx_intr, const char *file,
46872200Sbmilekic	       int line)
46972200Sbmilekic{
47072200Sbmilekic	int i = 0;
47172200Sbmilekic
47272200Sbmilekic	if ((opts & MTX_QUIET) == 0)
47372344Sbmilekic		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
47472200Sbmilekic
47572200Sbmilekic	for (;;) {
47672393Sbmilekic		if (_obtain_lock(m, curproc))
47772200Sbmilekic			break;
47872200Sbmilekic
47972200Sbmilekic		while (m->mtx_lock != MTX_UNOWNED) {
48072200Sbmilekic			if (i++ < 1000000)
48172200Sbmilekic				continue;
48272200Sbmilekic			if (i++ < 6000000)
48372200Sbmilekic				DELAY(1);
48467352Sjhb#ifdef DDB
48572200Sbmilekic			else if (!db_active)
48667352Sjhb#else
48772200Sbmilekic			else
48867352Sjhb#endif
48972200Sbmilekic			panic("spin lock %s held by %p for > 5 seconds",
49072200Sbmilekic			    m->mtx_description, (void *)m->mtx_lock);
49167352Sjhb		}
49267352Sjhb	}
49372200Sbmilekic
49472200Sbmilekic	m->mtx_saveintr = mtx_intr;
49572200Sbmilekic	if ((opts & MTX_QUIET) == 0)
49672200Sbmilekic		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
49772200Sbmilekic
49872200Sbmilekic	return;
49967352Sjhb}
50067352Sjhb
50172200Sbmilekic/*
50272200Sbmilekic * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
50372200Sbmilekic *
50472200Sbmilekic * We are only called here if the lock is recursed or contested (i.e. we
50572200Sbmilekic * need to wake up a blocked thread).
50672200Sbmilekic */
50767352Sjhbvoid
50872200Sbmilekic_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
50967352Sjhb{
51067352Sjhb	struct proc *p, *p1;
51167352Sjhb	struct mtx *m1;
51267352Sjhb	int pri;
51367352Sjhb
51472393Sbmilekic	p = curproc;
51572200Sbmilekic	MPASS4(mtx_owned(m), "mtx_owned(mpp)", file, line);
51672200Sbmilekic
51772200Sbmilekic	if (mtx_recursed(m)) {
51872200Sbmilekic		if (--(m->mtx_recurse) == 0)
51972200Sbmilekic			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
52072200Sbmilekic		if ((opts & MTX_QUIET) == 0)
52172200Sbmilekic			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m);
52272200Sbmilekic		return;
52372200Sbmilekic	}
52472200Sbmilekic
52572200Sbmilekic	mtx_lock_spin(&sched_lock);
52672200Sbmilekic	if ((opts & MTX_QUIET) == 0)
52772200Sbmilekic		CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
52872200Sbmilekic
52972200Sbmilekic	p1 = TAILQ_FIRST(&m->mtx_blocked);
53072200Sbmilekic	MPASS(p->p_magic == P_MAGIC);
53172200Sbmilekic	MPASS(p1->p_magic == P_MAGIC);
53272200Sbmilekic
53372200Sbmilekic	TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq);
53472200Sbmilekic
53572200Sbmilekic	if (TAILQ_EMPTY(&m->mtx_blocked)) {
53672200Sbmilekic		LIST_REMOVE(m, mtx_contested);
53772200Sbmilekic		_release_lock_quick(m);
53872200Sbmilekic		if ((opts & MTX_QUIET) == 0)
53972200Sbmilekic			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p not held", m);
54072200Sbmilekic	} else
54172200Sbmilekic		atomic_store_rel_ptr(&m->mtx_lock, (void *)MTX_CONTESTED);
54272200Sbmilekic
54372376Sjake	pri = PRI_MAX;
54472200Sbmilekic	LIST_FOREACH(m1, &p->p_contested, mtx_contested) {
54572376Sjake		int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_pri.pri_level;
54672200Sbmilekic		if (cp < pri)
54772200Sbmilekic			pri = cp;
54872200Sbmilekic	}
54972200Sbmilekic
55072376Sjake	if (pri > p->p_pri.pri_native)
55172376Sjake		pri = p->p_pri.pri_native;
55272200Sbmilekic	SET_PRIO(p, pri);
55372200Sbmilekic
55472200Sbmilekic	if ((opts & MTX_QUIET) == 0)
55572200Sbmilekic		CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p contested setrunqueue %p",
55672200Sbmilekic		    m, p1);
55772200Sbmilekic
55872200Sbmilekic	p1->p_blocked = NULL;
55972200Sbmilekic	p1->p_stat = SRUN;
56072200Sbmilekic	setrunqueue(p1);
56172200Sbmilekic
56272376Sjake	if ((opts & MTX_NOSWITCH) == 0 && p1->p_pri.pri_level < pri) {
56367352Sjhb#ifdef notyet
56472994Sjhb		if (p->p_ithd != NULL) {
56572994Sjhb			struct ithd *it = p->p_ithd;
56667352Sjhb
56772200Sbmilekic			if (it->it_interrupted) {
56872200Sbmilekic				if ((opts & MTX_QUIET) == 0)
56972200Sbmilekic					CTR2(KTR_LOCK,
57072994Sjhb				    "_mtx_unlock_sleep: %p interrupted %p",
57172200Sbmilekic					    it, it->it_interrupted);
57272200Sbmilekic				intr_thd_fixup(it);
57367352Sjhb			}
57472200Sbmilekic		}
57567352Sjhb#endif
57672200Sbmilekic		setrunqueue(p);
57772200Sbmilekic		if ((opts & MTX_QUIET) == 0)
57872200Sbmilekic			CTR2(KTR_LOCK,
57972200Sbmilekic			    "_mtx_unlock_sleep: %p switching out lock=%p", m,
58072200Sbmilekic			    (void *)m->mtx_lock);
58172200Sbmilekic
58272200Sbmilekic		mi_switch();
58372200Sbmilekic		if ((opts & MTX_QUIET) == 0)
58472200Sbmilekic			CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p",
58572200Sbmilekic			    m, (void *)m->mtx_lock);
58667352Sjhb	}
58772200Sbmilekic
58872200Sbmilekic	mtx_unlock_spin(&sched_lock);
58972200Sbmilekic
59072200Sbmilekic	return;
59167352Sjhb}
59267352Sjhb
59372200Sbmilekic/*
59472200Sbmilekic * All the unlocking of MTX_SPIN locks is done inline.
59572200Sbmilekic * See the _rel_spin_lock() macro for the details.
59672200Sbmilekic */
59772200Sbmilekic
59872200Sbmilekic/*
59972994Sjhb * The backing function for the INVARIANTS-enabled mtx_assert()
60072200Sbmilekic */
60172996Sjhb#ifdef INVARIANT_SUPPORT
60271352Sjasonevoid
60371360Sjasone_mtx_assert(struct mtx *m, int what, const char *file, int line)
60471352Sjasone{
60573033Sjake	switch (what) {
60671352Sjasone	case MA_OWNED:
60771352Sjasone	case MA_OWNED | MA_RECURSED:
60871352Sjasone	case MA_OWNED | MA_NOTRECURSED:
60973033Sjake		if (!mtx_owned(m))
61071352Sjasone			panic("mutex %s not owned at %s:%d",
61173033Sjake			    m->mtx_description, file, line);
61273033Sjake		if (mtx_recursed(m)) {
61373033Sjake			if ((what & MA_NOTRECURSED) != 0)
61471352Sjasone				panic("mutex %s recursed at %s:%d",
61573033Sjake				    m->mtx_description, file, line);
61673033Sjake		} else if ((what & MA_RECURSED) != 0) {
61771352Sjasone			panic("mutex %s unrecursed at %s:%d",
61873033Sjake			    m->mtx_description, file, line);
61971352Sjasone		}
62071352Sjasone		break;
62171352Sjasone	case MA_NOTOWNED:
62273033Sjake		if (mtx_owned(m))
62371352Sjasone			panic("mutex %s owned at %s:%d",
62473033Sjake			    m->mtx_description, file, line);
62571352Sjasone		break;
62671352Sjasone	default:
62771360Sjasone		panic("unknown mtx_assert at %s:%d", file, line);
62871352Sjasone	}
62971352Sjasone}
63071352Sjasone#endif
63171352Sjasone
63272200Sbmilekic/*
63372200Sbmilekic * The MUTEX_DEBUG-enabled mtx_validate()
63472200Sbmilekic */
63567352Sjhb#define MV_DESTROY	0	/* validate before destory */
63667352Sjhb#define MV_INIT		1	/* validate before init */
63767352Sjhb
63867352Sjhb#ifdef MUTEX_DEBUG
63967352Sjhb
64067352Sjhbint mtx_validate __P((struct mtx *, int));
64167352Sjhb
64267352Sjhbint
64367352Sjhbmtx_validate(struct mtx *m, int when)
64467352Sjhb{
64567352Sjhb	struct mtx *mp;
64667352Sjhb	int i;
64767352Sjhb	int retval = 0;
64867352Sjhb
64971320Sjasone#ifdef WITNESS
65071320Sjasone	if (witness_cold)
65171320Sjasone		return 0;
65271320Sjasone#endif
65367352Sjhb	if (m == &all_mtx || cold)
65467352Sjhb		return 0;
65567352Sjhb
65672200Sbmilekic	mtx_lock(&all_mtx);
65767352Sjhb/*
65867352Sjhb * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
65967352Sjhb * we can re-enable the kernacc() checks.
66067352Sjhb */
66167352Sjhb#ifndef __alpha__
66267352Sjhb	MPASS(kernacc((caddr_t)all_mtx.mtx_next, sizeof(uintptr_t),
66367352Sjhb	    VM_PROT_READ) == 1);
66467352Sjhb#endif
66567352Sjhb	MPASS(all_mtx.mtx_next->mtx_prev == &all_mtx);
66667352Sjhb	for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
66767352Sjhb#ifndef __alpha__
66867352Sjhb		if (kernacc((caddr_t)mp->mtx_next, sizeof(uintptr_t),
66967352Sjhb		    VM_PROT_READ) != 1) {
67067352Sjhb			panic("mtx_validate: mp=%p mp->mtx_next=%p",
67167352Sjhb			    mp, mp->mtx_next);
67267352Sjhb		}
67367352Sjhb#endif
67467352Sjhb		i++;
67567352Sjhb		if (i > mtx_cur_cnt) {
67667352Sjhb			panic("mtx_validate: too many in chain, known=%d\n",
67767352Sjhb			    mtx_cur_cnt);
67867352Sjhb		}
67967352Sjhb	}
68067352Sjhb	MPASS(i == mtx_cur_cnt);
68167352Sjhb	switch (when) {
68267352Sjhb	case MV_DESTROY:
68367352Sjhb		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
68467352Sjhb			if (mp == m)
68567352Sjhb				break;
68667352Sjhb		MPASS(mp == m);
68767352Sjhb		break;
68867352Sjhb	case MV_INIT:
68967352Sjhb		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
69067352Sjhb		if (mp == m) {
69167352Sjhb			/*
69267352Sjhb			 * Not good. This mutex already exists.
69367352Sjhb			 */
69467352Sjhb			printf("re-initing existing mutex %s\n",
69567352Sjhb			    m->mtx_description);
69667352Sjhb			MPASS(m->mtx_lock == MTX_UNOWNED);
69767352Sjhb			retval = 1;
69867352Sjhb		}
69967352Sjhb	}
70072200Sbmilekic	mtx_unlock(&all_mtx);
70167352Sjhb	return (retval);
70267352Sjhb}
70367352Sjhb#endif
70467352Sjhb
70572200Sbmilekic/*
70672200Sbmilekic * Mutex initialization routine; initialize lock `m' of type contained in
70772200Sbmilekic * `opts' with options contained in `opts' and description `description.'
70872200Sbmilekic * Place on "all_mtx" queue.
70972200Sbmilekic */
71067352Sjhbvoid
71172200Sbmilekicmtx_init(struct mtx *m, const char *description, int opts)
71267352Sjhb{
71372200Sbmilekic
71472200Sbmilekic	if ((opts & MTX_QUIET) == 0)
71572200Sbmilekic		CTR2(KTR_LOCK, "mtx_init %p (%s)", m, description);
71672200Sbmilekic
71767352Sjhb#ifdef MUTEX_DEBUG
71872200Sbmilekic	/* Diagnostic and error correction */
71972200Sbmilekic	if (mtx_validate(m, MV_INIT))
72067352Sjhb		return;
72169429Sjhb#endif
72267352Sjhb
72367352Sjhb	bzero((void *)m, sizeof *m);
72467352Sjhb	TAILQ_INIT(&m->mtx_blocked);
72572200Sbmilekic
72669429Sjhb#ifdef WITNESS
72771320Sjasone	if (!witness_cold) {
72871560Sjhb		m->mtx_debug = malloc(sizeof(struct mtx_debug),
72972200Sbmilekic		    M_WITNESS, M_NOWAIT | M_ZERO);
73071560Sjhb		MPASS(m->mtx_debug != NULL);
73171320Sjasone	}
73271560Sjhb#endif
73371320Sjasone
73472200Sbmilekic	m->mtx_description = description;
73572200Sbmilekic	m->mtx_flags = opts;
73667352Sjhb	m->mtx_lock = MTX_UNOWNED;
73772200Sbmilekic
73867352Sjhb	/* Put on all mutex queue */
73972200Sbmilekic	mtx_lock(&all_mtx);
74067352Sjhb	m->mtx_next = &all_mtx;
74167352Sjhb	m->mtx_prev = all_mtx.mtx_prev;
74267352Sjhb	m->mtx_prev->mtx_next = m;
74367352Sjhb	all_mtx.mtx_prev = m;
74467352Sjhb	if (++mtx_cur_cnt > mtx_max_cnt)
74567352Sjhb		mtx_max_cnt = mtx_cur_cnt;
74672200Sbmilekic	mtx_unlock(&all_mtx);
74772200Sbmilekic
74871320Sjasone#ifdef WITNESS
74971320Sjasone	if (!witness_cold)
75072200Sbmilekic		witness_init(m, opts);
75171320Sjasone#endif
75267352Sjhb}
75367352Sjhb
75472200Sbmilekic/*
75572200Sbmilekic * Remove lock `m' from all_mtx queue.
75672200Sbmilekic */
75767352Sjhbvoid
75867352Sjhbmtx_destroy(struct mtx *m)
75967352Sjhb{
76067352Sjhb
76171320Sjasone#ifdef WITNESS
76271320Sjasone	KASSERT(!witness_cold, ("%s: Cannot destroy while still cold\n",
76371320Sjasone	    __FUNCTION__));
76471320Sjasone#endif
76572200Sbmilekic
76671560Sjhb	CTR2(KTR_LOCK, "mtx_destroy %p (%s)", m, m->mtx_description);
76772200Sbmilekic
76867352Sjhb#ifdef MUTEX_DEBUG
76967352Sjhb	if (m->mtx_next == NULL)
77067352Sjhb		panic("mtx_destroy: %p (%s) already destroyed",
77167352Sjhb		    m, m->mtx_description);
77267352Sjhb
77367352Sjhb	if (!mtx_owned(m)) {
77467352Sjhb		MPASS(m->mtx_lock == MTX_UNOWNED);
77567352Sjhb	} else {
77671228Sbmilekic		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
77767352Sjhb	}
77872200Sbmilekic
77972200Sbmilekic	/* diagnostic */
78072200Sbmilekic	mtx_validate(m, MV_DESTROY);
78167352Sjhb#endif
78267352Sjhb
78367352Sjhb#ifdef WITNESS
78467352Sjhb	if (m->mtx_witness)
78567352Sjhb		witness_destroy(m);
78667352Sjhb#endif /* WITNESS */
78767352Sjhb
78867352Sjhb	/* Remove from the all mutex queue */
78972200Sbmilekic	mtx_lock(&all_mtx);
79067352Sjhb	m->mtx_next->mtx_prev = m->mtx_prev;
79167352Sjhb	m->mtx_prev->mtx_next = m->mtx_next;
79272200Sbmilekic
79367352Sjhb#ifdef MUTEX_DEBUG
79467352Sjhb	m->mtx_next = m->mtx_prev = NULL;
79569429Sjhb#endif
79672200Sbmilekic
79769429Sjhb#ifdef WITNESS
79872200Sbmilekic	free(m->mtx_debug, M_WITNESS);
79971560Sjhb	m->mtx_debug = NULL;
80067352Sjhb#endif
80172200Sbmilekic
80267352Sjhb	mtx_cur_cnt--;
80372200Sbmilekic	mtx_unlock(&all_mtx);
80467352Sjhb}
80567352Sjhb
80672200Sbmilekic
80771560Sjhb/*
80872200Sbmilekic * The WITNESS-enabled diagnostic code.
80971560Sjhb */
81071560Sjhb#ifdef WITNESS
81171320Sjasonestatic void
81271320Sjasonewitness_fixup(void *dummy __unused)
81371320Sjasone{
81471320Sjasone	struct mtx *mp;
81571320Sjasone
81671560Sjhb	/*
81771560Sjhb	 * We have to release Giant before initializing its witness
81871560Sjhb	 * structure so that WITNESS doesn't get confused.
81971560Sjhb	 */
82072200Sbmilekic	mtx_unlock(&Giant);
82171560Sjhb	mtx_assert(&Giant, MA_NOTOWNED);
82271560Sjhb
82372200Sbmilekic	mtx_lock(&all_mtx);
82472200Sbmilekic
82571320Sjasone	/* Iterate through all mutexes and finish up mutex initialization. */
82671320Sjasone	for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
82771320Sjasone
82871560Sjhb		mp->mtx_debug = malloc(sizeof(struct mtx_debug),
82972200Sbmilekic		    M_WITNESS, M_NOWAIT | M_ZERO);
83071560Sjhb		MPASS(mp->mtx_debug != NULL);
83171320Sjasone
83271320Sjasone		witness_init(mp, mp->mtx_flags);
83371320Sjasone	}
83472200Sbmilekic	mtx_unlock(&all_mtx);
83571320Sjasone
83671320Sjasone	/* Mark the witness code as being ready for use. */
83771320Sjasone	atomic_store_rel_int(&witness_cold, 0);
83871560Sjhb
83972200Sbmilekic	mtx_lock(&Giant);
84071320Sjasone}
84171320SjasoneSYSINIT(wtnsfxup, SI_SUB_MUTEX, SI_ORDER_FIRST, witness_fixup, NULL)
84271320Sjasone
84365557Sjasone#define WITNESS_COUNT 200
84465557Sjasone#define	WITNESS_NCHILDREN 2
84565557Sjasone
84667401Sjhbint witness_watch = 1;
84765557Sjasone
84865856Sjhbstruct witness {
84965557Sjasone	struct witness	*w_next;
85067404Sjhb	const char	*w_description;
85165624Sjasone	const char	*w_file;
85265557Sjasone	int		 w_line;
85365557Sjasone	struct witness	*w_morechildren;
85465557Sjasone	u_char		 w_childcnt;
85565557Sjasone	u_char		 w_Giant_squawked:1;
85665557Sjasone	u_char		 w_other_squawked:1;
85765557Sjasone	u_char		 w_same_squawked:1;
85871228Sbmilekic	u_char		 w_spin:1;	/* MTX_SPIN type mutex. */
85965557Sjasone	u_int		 w_level;
86065557Sjasone	struct witness	*w_children[WITNESS_NCHILDREN];
86165856Sjhb};
86265557Sjasone
86365856Sjhbstruct witness_blessed {
86465557Sjasone	char 	*b_lock1;
86565557Sjasone	char	*b_lock2;
86665856Sjhb};
86765557Sjasone
86867676Sjhb#ifdef DDB
86965557Sjasone/*
87067676Sjhb * When DDB is enabled and witness_ddb is set to 1, it will cause the system to
87165557Sjasone * drop into kdebug() when:
87265557Sjasone *	- a lock heirarchy violation occurs
87365557Sjasone *	- locks are held when going to sleep.
87465557Sjasone */
87571560Sjhbint	witness_ddb;
87667676Sjhb#ifdef WITNESS_DDB
87771560SjhbTUNABLE_INT_DECL("debug.witness_ddb", 1, witness_ddb);
87867676Sjhb#else
87971560SjhbTUNABLE_INT_DECL("debug.witness_ddb", 0, witness_ddb);
88065557Sjasone#endif
88167676SjhbSYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, "");
88267676Sjhb#endif /* DDB */
88365557Sjasone
88471560Sjhbint	witness_skipspin;
88567676Sjhb#ifdef WITNESS_SKIPSPIN
88671560SjhbTUNABLE_INT_DECL("debug.witness_skipspin", 1, witness_skipspin);
88767676Sjhb#else
88871560SjhbTUNABLE_INT_DECL("debug.witness_skipspin", 0, witness_skipspin);
88965557Sjasone#endif
89067676SjhbSYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RD, &witness_skipspin, 0,
89167676Sjhb    "");
89265557Sjasone
89372200Sbmilekic/*
89472200Sbmilekic * Witness-enabled globals
89572200Sbmilekic */
89671320Sjasonestatic struct mtx	w_mtx;
89765856Sjhbstatic struct witness	*w_free;
89865856Sjhbstatic struct witness	*w_all;
89965856Sjhbstatic int		 w_inited;
90065856Sjhbstatic int		 witness_dead;	/* fatal error, probably no memory */
90165557Sjasone
90265856Sjhbstatic struct witness	 w_data[WITNESS_COUNT];
90365557Sjasone
90472200Sbmilekic/*
90572200Sbmilekic * Internal witness routine prototypes
90672200Sbmilekic */
90772200Sbmilekicstatic struct witness *enroll(const char *description, int flag);
90872200Sbmilekicstatic int itismychild(struct witness *parent, struct witness *child);
90972200Sbmilekicstatic void removechild(struct witness *parent, struct witness *child);
91072200Sbmilekicstatic int isitmychild(struct witness *parent, struct witness *child);
91172200Sbmilekicstatic int isitmydescendant(struct witness *parent, struct witness *child);
91272200Sbmilekicstatic int dup_ok(struct witness *);
91372200Sbmilekicstatic int blessed(struct witness *, struct witness *);
91472200Sbmilekicstatic void
91572200Sbmilekic    witness_displaydescendants(void(*)(const char *fmt, ...), struct witness *);
91672200Sbmilekicstatic void witness_leveldescendents(struct witness *parent, int level);
91772200Sbmilekicstatic void witness_levelall(void);
91872200Sbmilekicstatic struct witness * witness_get(void);
91972200Sbmilekicstatic void witness_free(struct witness *m);
92065557Sjasone
92165557Sjasonestatic char *ignore_list[] = {
92265557Sjasone	"witness lock",
92365557Sjasone	NULL
92465557Sjasone};
92565557Sjasone
92665557Sjasonestatic char *spin_order_list[] = {
92772224Sjhb#if defined(__i386__) && defined (SMP)
92872224Sjhb	"com",
92972224Sjhb#endif
93069362Sjhb	"sio",
93172224Sjhb#ifdef __i386__
93272224Sjhb	"cy",
93372224Sjhb#endif
93472836Sjhb	"ithread table lock",
93572836Sjhb	"ithread list lock",
93665557Sjasone	"sched lock",
93768808Sjhb#ifdef __i386__
93867676Sjhb	"clk",
93968808Sjhb#endif
94068889Sjake	"callout",
94165557Sjasone	/*
94265557Sjasone	 * leaf locks
94365557Sjasone	 */
94473003Sjulian	"ng_node",
94573003Sjulian	"ng_worklist",
94672224Sjhb#ifdef SMP
94771576Sjasone#ifdef __i386__
94871576Sjasone	"ap boot",
94971576Sjasone	"imen",
95071576Sjasone#endif
95171576Sjasone	"smp rendezvous",
95272224Sjhb#endif
95365557Sjasone	NULL
95465557Sjasone};
95565557Sjasone
95665557Sjasonestatic char *order_list[] = {
95772256Sjhb	"Giant", "proctree", "allproc", "process lock", "uidinfo hash",
95872256Sjhb	    "uidinfo struct", NULL,
95965557Sjasone	NULL
96065557Sjasone};
96165557Sjasone
96265557Sjasonestatic char *dup_list[] = {
96365557Sjasone	NULL
96465557Sjasone};
96565557Sjasone
96665557Sjasonestatic char *sleep_list[] = {
96768862Sjake	"Giant",
96865557Sjasone	NULL
96965557Sjasone};
97065557Sjasone
97165557Sjasone/*
97265557Sjasone * Pairs of locks which have been blessed
97365557Sjasone * Don't complain about order problems with blessed locks
97465557Sjasone */
97565856Sjhbstatic struct witness_blessed blessed_list[] = {
97665557Sjasone};
97772200Sbmilekicstatic int blessed_count =
97872200Sbmilekic	sizeof(blessed_list) / sizeof(struct witness_blessed);
97965557Sjasone
98071352Sjasonestatic void
98165856Sjhbwitness_init(struct mtx *m, int flag)
98265557Sjasone{
98365557Sjasone	m->mtx_witness = enroll(m->mtx_description, flag);
98465557Sjasone}
98565557Sjasone
98671352Sjasonestatic void
98765856Sjhbwitness_destroy(struct mtx *m)
98865557Sjasone{
98965856Sjhb	struct mtx *m1;
99065557Sjasone	struct proc *p;
99172393Sbmilekic	p = curproc;
99272224Sjhb	LIST_FOREACH(m1, &p->p_heldmtx, mtx_held) {
99365557Sjasone		if (m1 == m) {
99465557Sjasone			LIST_REMOVE(m, mtx_held);
99565557Sjasone			break;
99665557Sjasone		}
99765557Sjasone	}
99865557Sjasone	return;
99965557Sjasone
100065557Sjasone}
100165557Sjasone
100271352Sjasonestatic void
100371352Sjasonewitness_display(void(*prnt)(const char *fmt, ...))
100471352Sjasone{
100571352Sjasone	struct witness *w, *w1;
100672224Sjhb	int level, found;
100771352Sjasone
100871352Sjasone	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
100971352Sjasone	witness_levelall();
101071352Sjasone
101172224Sjhb	/*
101272224Sjhb	 * First, handle sleep mutexes which have been acquired at least
101372224Sjhb	 * once.
101472224Sjhb	 */
101572224Sjhb	prnt("Sleep mutexes:\n");
101671352Sjasone	for (w = w_all; w; w = w->w_next) {
101772224Sjhb		if (w->w_file == NULL || w->w_spin)
101871352Sjasone			continue;
101971352Sjasone		for (w1 = w_all; w1; w1 = w1->w_next) {
102071352Sjasone			if (isitmychild(w1, w))
102171352Sjasone				break;
102271352Sjasone		}
102371352Sjasone		if (w1 != NULL)
102471352Sjasone			continue;
102571352Sjasone		/*
102671352Sjasone		 * This lock has no anscestors, display its descendants.
102771352Sjasone		 */
102871352Sjasone		witness_displaydescendants(prnt, w);
102971352Sjasone	}
103072224Sjhb
103172224Sjhb	/*
103272224Sjhb	 * Now do spin mutexes which have been acquired at least once.
103372224Sjhb	 */
103472224Sjhb	prnt("\nSpin mutexes:\n");
103572224Sjhb	level = 0;
103672224Sjhb	while (level < sizeof(spin_order_list) / sizeof(char *)) {
103772224Sjhb		found = 0;
103872224Sjhb		for (w = w_all; w; w = w->w_next) {
103972224Sjhb			if (w->w_file == NULL || !w->w_spin)
104072224Sjhb				continue;
104172224Sjhb			if (w->w_level == 1 << level) {
104272224Sjhb				witness_displaydescendants(prnt, w);
104372224Sjhb				level++;
104472224Sjhb				found = 1;
104572224Sjhb			}
104672224Sjhb		}
104772224Sjhb		if (found == 0)
104872224Sjhb			level++;
104972224Sjhb	}
105072224Sjhb
105172224Sjhb	/*
105272224Sjhb	 * Finally, any mutexes which have not been acquired yet.
105372224Sjhb	 */
105472224Sjhb	prnt("\nMutexes which were never acquired:\n");
105571352Sjasone	for (w = w_all; w; w = w->w_next) {
105671352Sjasone		if (w->w_file != NULL)
105771352Sjasone			continue;
105871352Sjasone		prnt("%s\n", w->w_description);
105971352Sjasone	}
106071352Sjasone}
106171352Sjasone
106265557Sjasonevoid
106365856Sjhbwitness_enter(struct mtx *m, int flags, const char *file, int line)
106465557Sjasone{
106565856Sjhb	struct witness *w, *w1;
106665856Sjhb	struct mtx *m1;
106765557Sjasone	struct proc *p;
106865557Sjasone	int i;
106967676Sjhb#ifdef DDB
107067676Sjhb	int go_into_ddb = 0;
107167676Sjhb#endif /* DDB */
107265557Sjasone
107371352Sjasone	if (witness_cold || m->mtx_witness == NULL || panicstr)
107471320Sjasone		return;
107565557Sjasone	w = m->mtx_witness;
107672393Sbmilekic	p = curproc;
107765557Sjasone
107865557Sjasone	if (flags & MTX_SPIN) {
107971560Sjhb		if ((m->mtx_flags & MTX_SPIN) == 0)
108065651Sjasone			panic("mutex_enter: MTX_SPIN on MTX_DEF mutex %s @"
108165651Sjasone			    " %s:%d", m->mtx_description, file, line);
108271228Sbmilekic		if (mtx_recursed(m)) {
108371560Sjhb			if ((m->mtx_flags & MTX_RECURSE) == 0)
108471228Sbmilekic				panic("mutex_enter: recursion on non-recursive"
108571228Sbmilekic				    " mutex %s @ %s:%d", m->mtx_description,
108671228Sbmilekic				    file, line);
108765557Sjasone			return;
108871228Sbmilekic		}
108972200Sbmilekic		mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
109070861Sjake		i = PCPU_GET(witness_spin_check);
109165557Sjasone		if (i != 0 && w->w_level < i) {
109272200Sbmilekic			mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
109365651Sjasone			panic("mutex_enter(%s:%x, MTX_SPIN) out of order @"
109465651Sjasone			    " %s:%d already holding %s:%x",
109565557Sjasone			    m->mtx_description, w->w_level, file, line,
109665557Sjasone			    spin_order_list[ffs(i)-1], i);
109765557Sjasone		}
109865557Sjasone		PCPU_SET(witness_spin_check, i | w->w_level);
109972200Sbmilekic		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
110069361Sjhb		w->w_file = file;
110169361Sjhb		w->w_line = line;
110269361Sjhb		m->mtx_line = line;
110369361Sjhb		m->mtx_file = file;
110465557Sjasone		return;
110565557Sjasone	}
110671560Sjhb	if ((m->mtx_flags & MTX_SPIN) != 0)
110765557Sjasone		panic("mutex_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
110865557Sjasone		    m->mtx_description, file, line);
110965557Sjasone
111071228Sbmilekic	if (mtx_recursed(m)) {
111171560Sjhb		if ((m->mtx_flags & MTX_RECURSE) == 0)
111271228Sbmilekic			panic("mutex_enter: recursion on non-recursive"
111371228Sbmilekic			    " mutex %s @ %s:%d", m->mtx_description,
111471228Sbmilekic			    file, line);
111565557Sjasone		return;
111671228Sbmilekic	}
111765557Sjasone	if (witness_dead)
111865557Sjasone		goto out;
111969998Sjhb	if (cold)
112065557Sjasone		goto out;
112165557Sjasone
112265557Sjasone	if (!mtx_legal2block())
112372200Sbmilekic		panic("blockable mtx_lock() of %s when not legal @ %s:%d",
112465557Sjasone			    m->mtx_description, file, line);
112565557Sjasone	/*
112665557Sjasone	 * Is this the first mutex acquired
112765557Sjasone	 */
112865557Sjasone	if ((m1 = LIST_FIRST(&p->p_heldmtx)) == NULL)
112965557Sjasone		goto out;
113065557Sjasone
113165557Sjasone	if ((w1 = m1->mtx_witness) == w) {
113265557Sjasone		if (w->w_same_squawked || dup_ok(w))
113365557Sjasone			goto out;
113465557Sjasone		w->w_same_squawked = 1;
113565557Sjasone		printf("acquring duplicate lock of same type: \"%s\"\n",
113665557Sjasone			m->mtx_description);
113765557Sjasone		printf(" 1st @ %s:%d\n", w->w_file, w->w_line);
113865557Sjasone		printf(" 2nd @ %s:%d\n", file, line);
113967676Sjhb#ifdef DDB
114067676Sjhb		go_into_ddb = 1;
114167676Sjhb#endif /* DDB */
114265557Sjasone		goto out;
114365557Sjasone	}
114465557Sjasone	MPASS(!mtx_owned(&w_mtx));
114572200Sbmilekic	mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
114665557Sjasone	/*
114765557Sjasone	 * If we have a known higher number just say ok
114865557Sjasone	 */
114965557Sjasone	if (witness_watch > 1 && w->w_level > w1->w_level) {
115072200Sbmilekic		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
115165557Sjasone		goto out;
115265557Sjasone	}
115365557Sjasone	if (isitmydescendant(m1->mtx_witness, w)) {
115472200Sbmilekic		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
115565557Sjasone		goto out;
115665557Sjasone	}
115765557Sjasone	for (i = 0; m1 != NULL; m1 = LIST_NEXT(m1, mtx_held), i++) {
115865557Sjasone
115967352Sjhb		MPASS(i < 200);
116065557Sjasone		w1 = m1->mtx_witness;
116165557Sjasone		if (isitmydescendant(w, w1)) {
116272200Sbmilekic			mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
116365557Sjasone			if (blessed(w, w1))
116465557Sjasone				goto out;
116565557Sjasone			if (m1 == &Giant) {
116665557Sjasone				if (w1->w_Giant_squawked)
116765557Sjasone					goto out;
116865557Sjasone				else
116965557Sjasone					w1->w_Giant_squawked = 1;
117065557Sjasone			} else {
117165557Sjasone				if (w1->w_other_squawked)
117265557Sjasone					goto out;
117365557Sjasone				else
117465557Sjasone					w1->w_other_squawked = 1;
117565557Sjasone			}
117665557Sjasone			printf("lock order reversal\n");
117765557Sjasone			printf(" 1st %s last acquired @ %s:%d\n",
117865557Sjasone			    w->w_description, w->w_file, w->w_line);
117965557Sjasone			printf(" 2nd %p %s @ %s:%d\n",
118065557Sjasone			    m1, w1->w_description, w1->w_file, w1->w_line);
118165557Sjasone			printf(" 3rd %p %s @ %s:%d\n",
118265557Sjasone			    m, w->w_description, file, line);
118367676Sjhb#ifdef DDB
118467676Sjhb			go_into_ddb = 1;
118567676Sjhb#endif /* DDB */
118665557Sjasone			goto out;
118765557Sjasone		}
118865557Sjasone	}
118965557Sjasone	m1 = LIST_FIRST(&p->p_heldmtx);
119065557Sjasone	if (!itismychild(m1->mtx_witness, w))
119172200Sbmilekic		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
119265557Sjasone
119365557Sjasoneout:
119467676Sjhb#ifdef DDB
119567676Sjhb	if (witness_ddb && go_into_ddb)
119667676Sjhb		Debugger("witness_enter");
119767676Sjhb#endif /* DDB */
119865557Sjasone	w->w_file = file;
119965557Sjasone	w->w_line = line;
120065557Sjasone	m->mtx_line = line;
120165557Sjasone	m->mtx_file = file;
120265557Sjasone
120365557Sjasone	/*
120468582Sjhb	 * If this pays off it likely means that a mutex being witnessed
120565557Sjasone	 * is acquired in hardclock. Put it in the ignore list. It is
120665557Sjasone	 * likely not the mutex this assert fails on.
120765557Sjasone	 */
120867352Sjhb	MPASS(m->mtx_held.le_prev == NULL);
120965557Sjasone	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
121065557Sjasone}
121165557Sjasone
121265557Sjasonevoid
121365856Sjhbwitness_try_enter(struct mtx *m, int flags, const char *file, int line)
121465557Sjasone{
121565557Sjasone	struct proc *p;
121665856Sjhb	struct witness *w = m->mtx_witness;
121765557Sjasone
121871320Sjasone	if (witness_cold)
121971320Sjasone		return;
122069998Sjhb	if (panicstr)
122169998Sjhb		return;
122265557Sjasone	if (flags & MTX_SPIN) {
122371560Sjhb		if ((m->mtx_flags & MTX_SPIN) == 0)
122465557Sjasone			panic("mutex_try_enter: "
122565557Sjasone			    "MTX_SPIN on MTX_DEF mutex %s @ %s:%d",
122665557Sjasone			    m->mtx_description, file, line);
122771228Sbmilekic		if (mtx_recursed(m)) {
122871560Sjhb			if ((m->mtx_flags & MTX_RECURSE) == 0)
122971228Sbmilekic				panic("mutex_try_enter: recursion on"
123071228Sbmilekic				    " non-recursive mutex %s @ %s:%d",
123171228Sbmilekic				    m->mtx_description, file, line);
123265557Sjasone			return;
123371228Sbmilekic		}
123472200Sbmilekic		mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
123570861Sjake		PCPU_SET(witness_spin_check,
123670861Sjake		    PCPU_GET(witness_spin_check) | w->w_level);
123772200Sbmilekic		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
123869361Sjhb		w->w_file = file;
123969361Sjhb		w->w_line = line;
124069361Sjhb		m->mtx_line = line;
124169361Sjhb		m->mtx_file = file;
124265557Sjasone		return;
124365557Sjasone	}
124465557Sjasone
124571560Sjhb	if ((m->mtx_flags & MTX_SPIN) != 0)
124665557Sjasone		panic("mutex_try_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
124765557Sjasone		    m->mtx_description, file, line);
124865557Sjasone
124971228Sbmilekic	if (mtx_recursed(m)) {
125071560Sjhb		if ((m->mtx_flags & MTX_RECURSE) == 0)
125171228Sbmilekic			panic("mutex_try_enter: recursion on non-recursive"
125271228Sbmilekic			    " mutex %s @ %s:%d", m->mtx_description, file,
125371228Sbmilekic			    line);
125465557Sjasone		return;
125571228Sbmilekic	}
125665557Sjasone	w->w_file = file;
125765557Sjasone	w->w_line = line;
125865557Sjasone	m->mtx_line = line;
125965557Sjasone	m->mtx_file = file;
126072393Sbmilekic	p = curproc;
126167352Sjhb	MPASS(m->mtx_held.le_prev == NULL);
126265557Sjasone	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
126365557Sjasone}
126465557Sjasone
126565557Sjasonevoid
126671352Sjasonewitness_exit(struct mtx *m, int flags, const char *file, int line)
126765557Sjasone{
126871352Sjasone	struct witness *w;
126965557Sjasone
127071352Sjasone	if (witness_cold || m->mtx_witness == NULL || panicstr)
127171352Sjasone		return;
127271352Sjasone	w = m->mtx_witness;
127365557Sjasone
127471352Sjasone	if (flags & MTX_SPIN) {
127571560Sjhb		if ((m->mtx_flags & MTX_SPIN) == 0)
127671352Sjasone			panic("mutex_exit: MTX_SPIN on MTX_DEF mutex %s @"
127771352Sjasone			    " %s:%d", m->mtx_description, file, line);
127871352Sjasone		if (mtx_recursed(m)) {
127971560Sjhb			if ((m->mtx_flags & MTX_RECURSE) == 0)
128071352Sjasone				panic("mutex_exit: recursion on non-recursive"
128171352Sjasone				    " mutex %s @ %s:%d", m->mtx_description,
128271352Sjasone				    file, line);
128371352Sjasone			return;
128465557Sjasone		}
128572200Sbmilekic		mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
128671352Sjasone		PCPU_SET(witness_spin_check,
128771352Sjasone		    PCPU_GET(witness_spin_check) & ~w->w_level);
128872200Sbmilekic		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
128971352Sjasone		return;
129065557Sjasone	}
129171560Sjhb	if ((m->mtx_flags & MTX_SPIN) != 0)
129271352Sjasone		panic("mutex_exit: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
129371352Sjasone		    m->mtx_description, file, line);
129471352Sjasone
129571352Sjasone	if (mtx_recursed(m)) {
129671560Sjhb		if ((m->mtx_flags & MTX_RECURSE) == 0)
129771352Sjasone			panic("mutex_exit: recursion on non-recursive"
129871352Sjasone			    " mutex %s @ %s:%d", m->mtx_description,
129971352Sjasone			    file, line);
130071352Sjasone		return;
130165557Sjasone	}
130271352Sjasone
130371352Sjasone	if ((flags & MTX_NOSWITCH) == 0 && !mtx_legal2block() && !cold)
130472200Sbmilekic		panic("switchable mtx_unlock() of %s when not legal @ %s:%d",
130571352Sjasone			    m->mtx_description, file, line);
130671352Sjasone	LIST_REMOVE(m, mtx_held);
130771352Sjasone	m->mtx_held.le_prev = NULL;
130865557Sjasone}
130965557Sjasone
131065557Sjasoneint
131165856Sjhbwitness_sleep(int check_only, struct mtx *mtx, const char *file, int line)
131265557Sjasone{
131365856Sjhb	struct mtx *m;
131465557Sjasone	struct proc *p;
131565557Sjasone	char **sleep;
131665557Sjasone	int n = 0;
131765557Sjasone
131871320Sjasone	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
131972393Sbmilekic	p = curproc;
132072224Sjhb	LIST_FOREACH(m, &p->p_heldmtx, mtx_held) {
132165557Sjasone		if (m == mtx)
132265557Sjasone			continue;
132365557Sjasone		for (sleep = sleep_list; *sleep!= NULL; sleep++)
132465557Sjasone			if (strcmp(m->mtx_description, *sleep) == 0)
132565557Sjasone				goto next;
132672224Sjhb		if (n == 0)
132772224Sjhb			printf("Whee!\n");
132865557Sjasone		printf("%s:%d: %s with \"%s\" locked from %s:%d\n",
132965557Sjasone			file, line, check_only ? "could sleep" : "sleeping",
133065557Sjasone			m->mtx_description,
133165557Sjasone			m->mtx_witness->w_file, m->mtx_witness->w_line);
133265557Sjasone		n++;
133365557Sjasone	next:
133465557Sjasone	}
133567676Sjhb#ifdef DDB
133667676Sjhb	if (witness_ddb && n)
133767676Sjhb		Debugger("witness_sleep");
133867676Sjhb#endif /* DDB */
133965557Sjasone	return (n);
134065557Sjasone}
134165557Sjasone
134265856Sjhbstatic struct witness *
134367404Sjhbenroll(const char *description, int flag)
134465557Sjasone{
134565557Sjasone	int i;
134665856Sjhb	struct witness *w, *w1;
134765557Sjasone	char **ignore;
134865557Sjasone	char **order;
134965557Sjasone
135065557Sjasone	if (!witness_watch)
135165557Sjasone		return (NULL);
135265557Sjasone	for (ignore = ignore_list; *ignore != NULL; ignore++)
135365557Sjasone		if (strcmp(description, *ignore) == 0)
135465557Sjasone			return (NULL);
135565557Sjasone
135665557Sjasone	if (w_inited == 0) {
135771320Sjasone		mtx_init(&w_mtx, "witness lock", MTX_SPIN);
135865557Sjasone		for (i = 0; i < WITNESS_COUNT; i++) {
135965557Sjasone			w = &w_data[i];
136065557Sjasone			witness_free(w);
136165557Sjasone		}
136265557Sjasone		w_inited = 1;
136365557Sjasone		for (order = order_list; *order != NULL; order++) {
136465557Sjasone			w = enroll(*order, MTX_DEF);
136565557Sjasone			w->w_file = "order list";
136665557Sjasone			for (order++; *order != NULL; order++) {
136765557Sjasone				w1 = enroll(*order, MTX_DEF);
136865557Sjasone				w1->w_file = "order list";
136965557Sjasone				itismychild(w, w1);
137065557Sjasone				w = w1;
137165557Sjasone    	    	    	}
137265557Sjasone		}
137365557Sjasone	}
137465557Sjasone	if ((flag & MTX_SPIN) && witness_skipspin)
137565557Sjasone		return (NULL);
137672200Sbmilekic	mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
137765557Sjasone	for (w = w_all; w; w = w->w_next) {
137865557Sjasone		if (strcmp(description, w->w_description) == 0) {
137972200Sbmilekic			mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
138065557Sjasone			return (w);
138165557Sjasone		}
138265557Sjasone	}
138365557Sjasone	if ((w = witness_get()) == NULL)
138465557Sjasone		return (NULL);
138565557Sjasone	w->w_next = w_all;
138665557Sjasone	w_all = w;
138765557Sjasone	w->w_description = description;
138872200Sbmilekic	mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
138965557Sjasone	if (flag & MTX_SPIN) {
139065557Sjasone		w->w_spin = 1;
139165557Sjasone
139265557Sjasone		i = 1;
139365557Sjasone		for (order = spin_order_list; *order != NULL; order++) {
139465557Sjasone			if (strcmp(description, *order) == 0)
139565557Sjasone				break;
139665557Sjasone			i <<= 1;
139765557Sjasone		}
139865557Sjasone		if (*order == NULL)
139965557Sjasone			panic("spin lock %s not in order list", description);
140065557Sjasone		w->w_level = i;
140171560Sjhb	}
140271228Sbmilekic
140365557Sjasone	return (w);
140465557Sjasone}
140565557Sjasone
140665557Sjasonestatic int
140765856Sjhbitismychild(struct witness *parent, struct witness *child)
140865557Sjasone{
140965557Sjasone	static int recursed;
141065557Sjasone
141165557Sjasone	/*
141265557Sjasone	 * Insert "child" after "parent"
141365557Sjasone	 */
141465557Sjasone	while (parent->w_morechildren)
141565557Sjasone		parent = parent->w_morechildren;
141665557Sjasone
141765557Sjasone	if (parent->w_childcnt == WITNESS_NCHILDREN) {
141865557Sjasone		if ((parent->w_morechildren = witness_get()) == NULL)
141965557Sjasone			return (1);
142065557Sjasone		parent = parent->w_morechildren;
142165557Sjasone	}
142267352Sjhb	MPASS(child != NULL);
142365557Sjasone	parent->w_children[parent->w_childcnt++] = child;
142465557Sjasone	/*
142565557Sjasone	 * now prune whole tree
142665557Sjasone	 */
142765557Sjasone	if (recursed)
142865557Sjasone		return (0);
142965557Sjasone	recursed = 1;
143065557Sjasone	for (child = w_all; child != NULL; child = child->w_next) {
143165557Sjasone		for (parent = w_all; parent != NULL;
143265557Sjasone		    parent = parent->w_next) {
143365557Sjasone			if (!isitmychild(parent, child))
143465557Sjasone				continue;
143565557Sjasone			removechild(parent, child);
143665557Sjasone			if (isitmydescendant(parent, child))
143765557Sjasone				continue;
143865557Sjasone			itismychild(parent, child);
143965557Sjasone		}
144065557Sjasone	}
144165557Sjasone	recursed = 0;
144265557Sjasone	witness_levelall();
144365557Sjasone	return (0);
144465557Sjasone}
144565557Sjasone
144665557Sjasonestatic void
144765856Sjhbremovechild(struct witness *parent, struct witness *child)
144865557Sjasone{
144965856Sjhb	struct witness *w, *w1;
145065557Sjasone	int i;
145165557Sjasone
145265557Sjasone	for (w = parent; w != NULL; w = w->w_morechildren)
145365557Sjasone		for (i = 0; i < w->w_childcnt; i++)
145465557Sjasone			if (w->w_children[i] == child)
145565557Sjasone				goto found;
145665557Sjasone	return;
145765557Sjasonefound:
145865557Sjasone	for (w1 = w; w1->w_morechildren != NULL; w1 = w1->w_morechildren)
145965557Sjasone		continue;
146065557Sjasone	w->w_children[i] = w1->w_children[--w1->w_childcnt];
146167352Sjhb	MPASS(w->w_children[i] != NULL);
146265557Sjasone
146365557Sjasone	if (w1->w_childcnt != 0)
146465557Sjasone		return;
146565557Sjasone
146665557Sjasone	if (w1 == parent)
146765557Sjasone		return;
146865557Sjasone	for (w = parent; w->w_morechildren != w1; w = w->w_morechildren)
146965557Sjasone		continue;
147065557Sjasone	w->w_morechildren = 0;
147165557Sjasone	witness_free(w1);
147265557Sjasone}
147365557Sjasone
147465557Sjasonestatic int
147565856Sjhbisitmychild(struct witness *parent, struct witness *child)
147665557Sjasone{
147765856Sjhb	struct witness *w;
147865557Sjasone	int i;
147965557Sjasone
148065557Sjasone	for (w = parent; w != NULL; w = w->w_morechildren) {
148165557Sjasone		for (i = 0; i < w->w_childcnt; i++) {
148265557Sjasone			if (w->w_children[i] == child)
148365557Sjasone				return (1);
148465557Sjasone		}
148565557Sjasone	}
148665557Sjasone	return (0);
148765557Sjasone}
148865557Sjasone
148965557Sjasonestatic int
149065856Sjhbisitmydescendant(struct witness *parent, struct witness *child)
149165557Sjasone{
149265856Sjhb	struct witness *w;
149365557Sjasone	int i;
149465557Sjasone	int j;
149565557Sjasone
149665557Sjasone	for (j = 0, w = parent; w != NULL; w = w->w_morechildren, j++) {
149767352Sjhb		MPASS(j < 1000);
149865557Sjasone		for (i = 0; i < w->w_childcnt; i++) {
149965557Sjasone			if (w->w_children[i] == child)
150065557Sjasone				return (1);
150165557Sjasone		}
150265557Sjasone		for (i = 0; i < w->w_childcnt; i++) {
150365557Sjasone			if (isitmydescendant(w->w_children[i], child))
150465557Sjasone				return (1);
150565557Sjasone		}
150665557Sjasone	}
150765557Sjasone	return (0);
150865557Sjasone}
150965557Sjasone
151065557Sjasonevoid
151165557Sjasonewitness_levelall (void)
151265557Sjasone{
151365856Sjhb	struct witness *w, *w1;
151465557Sjasone
151565557Sjasone	for (w = w_all; w; w = w->w_next)
151671228Sbmilekic		if (!(w->w_spin))
151765557Sjasone			w->w_level = 0;
151865557Sjasone	for (w = w_all; w; w = w->w_next) {
151965557Sjasone		if (w->w_spin)
152065557Sjasone			continue;
152165557Sjasone		for (w1 = w_all; w1; w1 = w1->w_next) {
152265557Sjasone			if (isitmychild(w1, w))
152365557Sjasone				break;
152465557Sjasone		}
152565557Sjasone		if (w1 != NULL)
152665557Sjasone			continue;
152765557Sjasone		witness_leveldescendents(w, 0);
152865557Sjasone	}
152965557Sjasone}
153065557Sjasone
153165557Sjasonestatic void
153265856Sjhbwitness_leveldescendents(struct witness *parent, int level)
153365557Sjasone{
153465557Sjasone	int i;
153565856Sjhb	struct witness *w;
153665557Sjasone
153765557Sjasone	if (parent->w_level < level)
153865557Sjasone		parent->w_level = level;
153965557Sjasone	level++;
154065557Sjasone	for (w = parent; w != NULL; w = w->w_morechildren)
154165557Sjasone		for (i = 0; i < w->w_childcnt; i++)
154265557Sjasone			witness_leveldescendents(w->w_children[i], level);
154365557Sjasone}
154465557Sjasone
154565557Sjasonestatic void
154665856Sjhbwitness_displaydescendants(void(*prnt)(const char *fmt, ...),
154765856Sjhb			   struct witness *parent)
154865557Sjasone{
154965856Sjhb	struct witness *w;
155065557Sjasone	int i;
155172224Sjhb	int level;
155265557Sjasone
155372224Sjhb	level = parent->w_spin ? ffs(parent->w_level) : parent->w_level;
155472224Sjhb
155565557Sjasone	prnt("%d", level);
155665557Sjasone	if (level < 10)
155765557Sjasone		prnt(" ");
155865557Sjasone	for (i = 0; i < level; i++)
155965557Sjasone		prnt(" ");
156065557Sjasone	prnt("%s", parent->w_description);
156172224Sjhb	if (parent->w_file != NULL)
156272224Sjhb		prnt(" -- last acquired @ %s:%d\n", parent->w_file,
156372224Sjhb		    parent->w_line);
156465557Sjasone
156565557Sjasone	for (w = parent; w != NULL; w = w->w_morechildren)
156665557Sjasone		for (i = 0; i < w->w_childcnt; i++)
156765557Sjasone			    witness_displaydescendants(prnt, w->w_children[i]);
156865557Sjasone    }
156965557Sjasone
157065557Sjasonestatic int
157165856Sjhbdup_ok(struct witness *w)
157265557Sjasone{
157365557Sjasone	char **dup;
157465557Sjasone
157565557Sjasone	for (dup = dup_list; *dup!= NULL; dup++)
157665557Sjasone		if (strcmp(w->w_description, *dup) == 0)
157765557Sjasone			return (1);
157865557Sjasone	return (0);
157965557Sjasone}
158065557Sjasone
158165557Sjasonestatic int
158265856Sjhbblessed(struct witness *w1, struct witness *w2)
158365557Sjasone{
158465557Sjasone	int i;
158565856Sjhb	struct witness_blessed *b;
158665557Sjasone
158765557Sjasone	for (i = 0; i < blessed_count; i++) {
158865557Sjasone		b = &blessed_list[i];
158965557Sjasone		if (strcmp(w1->w_description, b->b_lock1) == 0) {
159065557Sjasone			if (strcmp(w2->w_description, b->b_lock2) == 0)
159165557Sjasone				return (1);
159265557Sjasone			continue;
159365557Sjasone		}
159465557Sjasone		if (strcmp(w1->w_description, b->b_lock2) == 0)
159565557Sjasone			if (strcmp(w2->w_description, b->b_lock1) == 0)
159665557Sjasone				return (1);
159765557Sjasone	}
159865557Sjasone	return (0);
159965557Sjasone}
160065557Sjasone
160165856Sjhbstatic struct witness *
160265557Sjasonewitness_get()
160365557Sjasone{
160465856Sjhb	struct witness *w;
160565557Sjasone
160665557Sjasone	if ((w = w_free) == NULL) {
160765557Sjasone		witness_dead = 1;
160872200Sbmilekic		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
160965557Sjasone		printf("witness exhausted\n");
161065557Sjasone		return (NULL);
161165557Sjasone	}
161265557Sjasone	w_free = w->w_next;
161365856Sjhb	bzero(w, sizeof(*w));
161465557Sjasone	return (w);
161565557Sjasone}
161665557Sjasone
161765557Sjasonestatic void
161865856Sjhbwitness_free(struct witness *w)
161965557Sjasone{
162065557Sjasone	w->w_next = w_free;
162165557Sjasone	w_free = w;
162265557Sjasone}
162365557Sjasone
162469881Sjakeint
162565557Sjasonewitness_list(struct proc *p)
162665557Sjasone{
162765856Sjhb	struct mtx *m;
162869881Sjake	int nheld;
162965557Sjasone
163071320Sjasone	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
163169881Sjake	nheld = 0;
163272224Sjhb	LIST_FOREACH(m, &p->p_heldmtx, mtx_held) {
163365557Sjasone		printf("\t\"%s\" (%p) locked at %s:%d\n",
163465557Sjasone		    m->mtx_description, m,
163565557Sjasone		    m->mtx_witness->w_file, m->mtx_witness->w_line);
163669881Sjake		nheld++;
163765557Sjasone	}
163869881Sjake
163969881Sjake	return (nheld);
164065557Sjasone}
164165557Sjasone
164271709Sjhb#ifdef DDB
164371709Sjhb
164472224SjhbDB_SHOW_COMMAND(mutexes, db_witness_list)
164571709Sjhb{
164671709Sjhb
164772393Sbmilekic	witness_list(curproc);
164871709Sjhb}
164971709Sjhb
165072224SjhbDB_SHOW_COMMAND(witness, db_witness_display)
165172224Sjhb{
165272224Sjhb
165372224Sjhb	witness_display(db_printf);
165472224Sjhb}
165571709Sjhb#endif
165671709Sjhb
165765557Sjasonevoid
165865856Sjhbwitness_save(struct mtx *m, const char **filep, int *linep)
165965557Sjasone{
166071320Sjasone
166171320Sjasone	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
166271352Sjasone	if (m->mtx_witness == NULL)
166371352Sjasone		return;
166471352Sjasone
166565557Sjasone	*filep = m->mtx_witness->w_file;
166665557Sjasone	*linep = m->mtx_witness->w_line;
166765557Sjasone}
166865557Sjasone
166965557Sjasonevoid
167065856Sjhbwitness_restore(struct mtx *m, const char *file, int line)
167165557Sjasone{
167271320Sjasone
167371320Sjasone	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
167471352Sjasone	if (m->mtx_witness == NULL)
167571352Sjasone		return;
167671352Sjasone
167765557Sjasone	m->mtx_witness->w_file = file;
167865557Sjasone	m->mtx_witness->w_line = line;
167965557Sjasone}
168065557Sjasone
168169429Sjhb#endif	/* WITNESS */
1682