subr_turnstile.c revision 72836
165557Sjasone/*-
265557Sjasone * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
365557Sjasone *
465557Sjasone * Redistribution and use in source and binary forms, with or without
565557Sjasone * modification, are permitted provided that the following conditions
665557Sjasone * are met:
765557Sjasone * 1. Redistributions of source code must retain the above copyright
865557Sjasone *    notice, this list of conditions and the following disclaimer.
965557Sjasone * 2. Redistributions in binary form must reproduce the above copyright
1065557Sjasone *    notice, this list of conditions and the following disclaimer in the
1165557Sjasone *    documentation and/or other materials provided with the distribution.
1265557Sjasone * 3. Berkeley Software Design Inc's name may not be used to endorse or
1365557Sjasone *    promote products derived from this software without specific prior
1465557Sjasone *    written permission.
1565557Sjasone *
1665557Sjasone * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
1765557Sjasone * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1865557Sjasone * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1965557Sjasone * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
2065557Sjasone * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2165557Sjasone * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2265557Sjasone * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2365557Sjasone * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2465557Sjasone * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2565557Sjasone * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2665557Sjasone * SUCH DAMAGE.
2765557Sjasone *
2865557Sjasone *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
2967352Sjhb *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
3065557Sjasone * $FreeBSD: head/sys/kern/subr_turnstile.c 72836 2001-02-22 02:12:54Z jhb $
3165557Sjasone */
3265557Sjasone
3365557Sjasone/*
3472200Sbmilekic * Machine independent bits of mutex implementation and implementation of
3572200Sbmilekic * `witness' structure & related debugging routines.
3672200Sbmilekic */
3772200Sbmilekic
3872200Sbmilekic/*
3965557Sjasone *	Main Entry: witness
4065557Sjasone *	Pronunciation: 'wit-n&s
4165557Sjasone *	Function: noun
4265557Sjasone *	Etymology: Middle English witnesse, from Old English witnes knowledge,
4365557Sjasone *	    testimony, witness, from 2wit
4465557Sjasone *	Date: before 12th century
4565557Sjasone *	1 : attestation of a fact or event : TESTIMONY
4665557Sjasone *	2 : one that gives evidence; specifically : one who testifies in
4765557Sjasone *	    a cause or before a judicial tribunal
4865557Sjasone *	3 : one asked to be present at a transaction so as to be able to
4965557Sjasone *	    testify to its having taken place
5065557Sjasone *	4 : one who has personal knowledge of something
5165557Sjasone *	5 a : something serving as evidence or proof : SIGN
5265557Sjasone *	  b : public affirmation by word or example of usually
5365557Sjasone *	      religious faith or conviction <the heroic witness to divine
5465557Sjasone *	      life -- Pilot>
5565557Sjasone *	6 capitalized : a member of the Jehovah's Witnesses
5665557Sjasone */
5765557Sjasone
5868790Sjhb#include "opt_ddb.h"
5967676Sjhb#include "opt_witness.h"
6067676Sjhb
6165557Sjasone#include <sys/param.h>
6267352Sjhb#include <sys/bus.h>
6367352Sjhb#include <sys/kernel.h>
6467352Sjhb#include <sys/malloc.h>
6565557Sjasone#include <sys/proc.h>
6667676Sjhb#include <sys/sysctl.h>
6765557Sjasone#include <sys/systm.h>
6867352Sjhb#include <sys/vmmeter.h>
6965557Sjasone#include <sys/ktr.h>
7065557Sjasone
7167352Sjhb#include <machine/atomic.h>
7267352Sjhb#include <machine/bus.h>
7367352Sjhb#include <machine/clock.h>
7465557Sjasone#include <machine/cpu.h>
7567352Sjhb
7668790Sjhb#include <ddb/ddb.h>
7768790Sjhb
7867352Sjhb#include <vm/vm.h>
7967352Sjhb#include <vm/vm_extern.h>
8067352Sjhb
8167352Sjhb#include <sys/mutex.h>
8265557Sjasone
8365557Sjasone/*
8472200Sbmilekic * The WITNESS-enabled mutex debug structure.
8567352Sjhb */
8671352Sjasone#ifdef WITNESS
8771352Sjasonestruct mtx_debug {
8871352Sjasone	struct witness	*mtxd_witness;
8971352Sjasone	LIST_ENTRY(mtx)	mtxd_held;
9071352Sjasone	const char	*mtxd_file;
9171352Sjasone	int		mtxd_line;
9271352Sjasone};
9371352Sjasone
9471560Sjhb#define mtx_held	mtx_debug->mtxd_held
9571560Sjhb#define	mtx_file	mtx_debug->mtxd_file
9671560Sjhb#define	mtx_line	mtx_debug->mtxd_line
9771560Sjhb#define	mtx_witness	mtx_debug->mtxd_witness
9871352Sjasone#endif	/* WITNESS */
9971352Sjasone
10071352Sjasone/*
10172200Sbmilekic * Internal utility macros.
10271352Sjasone */
10372200Sbmilekic#define mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
10471352Sjasone
10572200Sbmilekic#define mtx_owner(m)	(mtx_unowned((m)) ? NULL \
10672200Sbmilekic	: (struct proc *)((m)->mtx_lock & MTX_FLAGMASK))
10771352Sjasone
10872200Sbmilekic#define RETIP(x)		*(((uintptr_t *)(&x)) - 1)
10972376Sjake#define SET_PRIO(p, pri)	(p)->p_pri.pri_level = (pri)
11071352Sjasone
11171352Sjasone/*
11272200Sbmilekic * Early WITNESS-enabled declarations.
11371352Sjasone */
11472200Sbmilekic#ifdef WITNESS
11571352Sjasone
11671352Sjasone/*
11772200Sbmilekic * Internal WITNESS routines which must be prototyped early.
11872200Sbmilekic *
11972200Sbmilekic * XXX: When/if witness code is cleaned up, it would be wise to place all
12072200Sbmilekic *	witness prototyping early in this file.
12172200Sbmilekic */
12272200Sbmilekicstatic void witness_init(struct mtx *, int flag);
12372200Sbmilekicstatic void witness_destroy(struct mtx *);
12472200Sbmilekicstatic void witness_display(void(*)(const char *fmt, ...));
12571352Sjasone
12672200SbmilekicMALLOC_DEFINE(M_WITNESS, "witness", "witness mtx_debug structure");
12771352Sjasone
12867352Sjhb/* All mutexes in system (used for debug/panic) */
12971560Sjhbstatic struct mtx_debug all_mtx_debug = { NULL, {NULL, NULL}, NULL, 0 };
13072200Sbmilekic
13171320Sjasone/*
13272200Sbmilekic * This global is set to 0 once it becomes safe to use the witness code.
13371320Sjasone */
13471320Sjasonestatic int witness_cold = 1;
13572200Sbmilekic
13669429Sjhb#else	/* WITNESS */
13771352Sjasone
13872200Sbmilekic/* XXX XXX XXX
13972200Sbmilekic * flag++ is sleazoid way of shuting up warning
14071352Sjasone */
14171352Sjasone#define witness_init(m, flag) flag++
14271352Sjasone#define witness_destroy(m)
14371352Sjasone#define witness_try_enter(m, t, f, l)
14469429Sjhb#endif	/* WITNESS */
14567352Sjhb
14672200Sbmilekic/*
14772200Sbmilekic * All mutex locks in system are kept on the all_mtx list.
14872200Sbmilekic */
14971560Sjhbstatic struct mtx all_mtx = { MTX_UNOWNED, 0, 0, 0, "All mutexes queue head",
15071560Sjhb	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
15171560Sjhb	{ NULL, NULL }, &all_mtx, &all_mtx,
15271560Sjhb#ifdef WITNESS
15371560Sjhb	&all_mtx_debug
15471560Sjhb#else
15571560Sjhb	NULL
15671560Sjhb#endif
15771560Sjhb	 };
15871560Sjhb
15972200Sbmilekic/*
16072200Sbmilekic * Global variables for book keeping.
16172200Sbmilekic */
16267352Sjhbstatic int	mtx_cur_cnt;
16367352Sjhbstatic int	mtx_max_cnt;
16467352Sjhb
16572200Sbmilekic/*
16672344Sbmilekic * Couple of strings for KTR_LOCK tracing in order to avoid duplicates.
16772344Sbmilekic */
16872344Sbmilekicchar	STR_mtx_lock_slp[] = "GOT (sleep) %s [%p] r=%d at %s:%d";
16972344Sbmilekicchar	STR_mtx_unlock_slp[] = "REL (sleep) %s [%p] r=%d at %s:%d";
17072344Sbmilekicchar	STR_mtx_lock_spn[] = "GOT (spin) %s [%p] r=%d at %s:%d";
17172344Sbmilekicchar	STR_mtx_unlock_spn[] = "REL (spin) %s [%p] r=%d at %s:%d";
17272344Sbmilekic
17372344Sbmilekic/*
17472200Sbmilekic * Prototypes for non-exported routines.
17572200Sbmilekic *
17672200Sbmilekic * NOTE: Prototypes for witness routines are placed at the bottom of the file.
17772200Sbmilekic */
17871352Sjasonestatic void	propagate_priority(struct proc *);
17967352Sjhb
18067352Sjhbstatic void
18167352Sjhbpropagate_priority(struct proc *p)
18267352Sjhb{
18372376Sjake	int pri = p->p_pri.pri_level;
18467352Sjhb	struct mtx *m = p->p_blocked;
18567352Sjhb
18669376Sjhb	mtx_assert(&sched_lock, MA_OWNED);
18767352Sjhb	for (;;) {
18867352Sjhb		struct proc *p1;
18967352Sjhb
19067352Sjhb		p = mtx_owner(m);
19167352Sjhb
19267352Sjhb		if (p == NULL) {
19367352Sjhb			/*
19467352Sjhb			 * This really isn't quite right. Really
19567352Sjhb			 * ought to bump priority of process that
19667352Sjhb			 * next acquires the mutex.
19767352Sjhb			 */
19867352Sjhb			MPASS(m->mtx_lock == MTX_CONTESTED);
19967352Sjhb			return;
20067352Sjhb		}
20172200Sbmilekic
20267352Sjhb		MPASS(p->p_magic == P_MAGIC);
20369376Sjhb		KASSERT(p->p_stat != SSLEEP, ("sleeping process owns a mutex"));
20472376Sjake		if (p->p_pri.pri_level <= pri)
20567352Sjhb			return;
20669376Sjhb
20767352Sjhb		/*
20869376Sjhb		 * Bump this process' priority.
20969376Sjhb		 */
21069376Sjhb		SET_PRIO(p, pri);
21169376Sjhb
21269376Sjhb		/*
21367352Sjhb		 * If lock holder is actually running, just bump priority.
21467352Sjhb		 */
21572836Sjhb		if (p->p_oncpu != NOCPU) {
21667352Sjhb			MPASS(p->p_stat == SRUN || p->p_stat == SZOMB);
21767352Sjhb			return;
21867352Sjhb		}
21972376Sjake
22067352Sjhb		/*
22167352Sjhb		 * If on run queue move to new run queue, and
22267352Sjhb		 * quit.
22367352Sjhb		 */
22467352Sjhb		if (p->p_stat == SRUN) {
22567352Sjhb			MPASS(p->p_blocked == NULL);
22667352Sjhb			remrunqueue(p);
22767352Sjhb			setrunqueue(p);
22867352Sjhb			return;
22967352Sjhb		}
23067352Sjhb
23167352Sjhb		/*
23269376Sjhb		 * If we aren't blocked on a mutex, we should be.
23367352Sjhb		 */
23469376Sjhb		KASSERT(p->p_stat == SMTX, (
23569376Sjhb		    "process %d(%s):%d holds %s but isn't blocked on a mutex\n",
23669376Sjhb		    p->p_pid, p->p_comm, p->p_stat,
23769376Sjhb		    m->mtx_description));
23867352Sjhb
23967352Sjhb		/*
24067352Sjhb		 * Pick up the mutex that p is blocked on.
24167352Sjhb		 */
24267352Sjhb		m = p->p_blocked;
24367352Sjhb		MPASS(m != NULL);
24467352Sjhb
24567352Sjhb		/*
24667352Sjhb		 * Check if the proc needs to be moved up on
24767352Sjhb		 * the blocked chain
24867352Sjhb		 */
24969376Sjhb		if (p == TAILQ_FIRST(&m->mtx_blocked)) {
25069376Sjhb			continue;
25169376Sjhb		}
25272200Sbmilekic
25372376Sjake		p1 = TAILQ_PREV(p, procqueue, p_procq);
25472376Sjake		if (p1->p_pri.pri_level <= pri) {
25567352Sjhb			continue;
25667352Sjhb		}
25767352Sjhb
25867352Sjhb		/*
25969376Sjhb		 * Remove proc from blocked chain and determine where
26069376Sjhb		 * it should be moved up to.  Since we know that p1 has
26169376Sjhb		 * a lower priority than p, we know that at least one
26269376Sjhb		 * process in the chain has a lower priority and that
26369376Sjhb		 * p1 will thus not be NULL after the loop.
26467352Sjhb		 */
26567352Sjhb		TAILQ_REMOVE(&m->mtx_blocked, p, p_procq);
26667352Sjhb		TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) {
26767352Sjhb			MPASS(p1->p_magic == P_MAGIC);
26872376Sjake			if (p1->p_pri.pri_level > pri)
26967352Sjhb				break;
27067352Sjhb		}
27172200Sbmilekic
27269376Sjhb		MPASS(p1 != NULL);
27369376Sjhb		TAILQ_INSERT_BEFORE(p1, p, p_procq);
27467352Sjhb		CTR4(KTR_LOCK,
27571560Sjhb		    "propagate_priority: p %p moved before %p on [%p] %s",
27667352Sjhb		    p, p1, m, m->mtx_description);
27767352Sjhb	}
27867352Sjhb}
27967352Sjhb
28071352Sjasone/*
28172200Sbmilekic * The important part of mtx_trylock{,_flags}()
28272200Sbmilekic * Tries to acquire lock `m.' We do NOT handle recursion here; we assume that
28372200Sbmilekic * if we're called, it's because we know we don't already own this lock.
28471352Sjasone */
28572200Sbmilekicint
28672200Sbmilekic_mtx_trylock(struct mtx *m, int opts, const char *file, int line)
28771352Sjasone{
28872200Sbmilekic	int rval;
28971352Sjasone
29072393Sbmilekic	MPASS(curproc != NULL);
29171352Sjasone
29272200Sbmilekic	/*
29372200Sbmilekic	 * _mtx_trylock does not accept MTX_NOSWITCH option.
29472200Sbmilekic	 */
29572344Sbmilekic	KASSERT((opts & MTX_NOSWITCH) == 0,
29672344Sbmilekic	    ("mtx_trylock() called with invalid option flag(s) %d", opts));
29772200Sbmilekic
29872393Sbmilekic	rval = _obtain_lock(m, curproc);
29972200Sbmilekic
30072200Sbmilekic#ifdef WITNESS
30172200Sbmilekic	if (rval && m->mtx_witness != NULL) {
30271352Sjasone		/*
30372200Sbmilekic		 * We do not handle recursion in _mtx_trylock; see the
30472200Sbmilekic		 * note at the top of the routine.
30571352Sjasone		 */
30672344Sbmilekic		KASSERT(!mtx_recursed(m),
30772344Sbmilekic		    ("mtx_trylock() called on a recursed mutex"));
30872200Sbmilekic		witness_try_enter(m, (opts | m->mtx_flags), file, line);
30971352Sjasone	}
31072200Sbmilekic#endif	/* WITNESS */
31171352Sjasone
31272200Sbmilekic	if ((opts & MTX_QUIET) == 0)
31372200Sbmilekic		CTR5(KTR_LOCK, "TRY_ENTER %s [%p] result=%d at %s:%d",
31472200Sbmilekic		    m->mtx_description, m, rval, file, line);
31572200Sbmilekic
31672200Sbmilekic	return rval;
31771352Sjasone}
31871352Sjasone
31971352Sjasone/*
32072200Sbmilekic * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
32171352Sjasone *
32272200Sbmilekic * We call this if the lock is either contested (i.e. we need to go to
32372200Sbmilekic * sleep waiting for it), or if we need to recurse on it.
32471352Sjasone */
32572200Sbmilekicvoid
32672200Sbmilekic_mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line)
32771352Sjasone{
32872393Sbmilekic	struct proc *p = curproc;
32971352Sjasone
33072200Sbmilekic	if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)p) {
33172200Sbmilekic		m->mtx_recurse++;
33272200Sbmilekic		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
33372200Sbmilekic		if ((opts & MTX_QUIET) == 0)
33472344Sbmilekic			CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
33572200Sbmilekic		return;
33671352Sjasone	}
33771352Sjasone
33872200Sbmilekic	if ((opts & MTX_QUIET) == 0)
33972344Sbmilekic		CTR3(KTR_LOCK, "_mtx_lock_sleep: %p contested (lock=%p) [%p]",
34072344Sbmilekic		    m, (void *)m->mtx_lock, (void *)RETIP(m));
34171352Sjasone
34272200Sbmilekic	/*
34372200Sbmilekic	 * Save our priority. Even though p_nativepri is protected by
34472200Sbmilekic	 * sched_lock, we don't obtain it here as it can be expensive.
34572200Sbmilekic	 * Since this is the only place p_nativepri is set, and since two
34672200Sbmilekic	 * CPUs will not be executing the same process concurrently, we know
34772200Sbmilekic	 * that no other CPU is going to be messing with this. Also,
34872200Sbmilekic	 * p_nativepri is only read when we are blocked on a mutex, so that
34972200Sbmilekic	 * can't be happening right now either.
35072200Sbmilekic	 */
35172376Sjake	p->p_pri.pri_native = p->p_pri.pri_level;
35271352Sjasone
35372200Sbmilekic	while (!_obtain_lock(m, p)) {
35472200Sbmilekic		uintptr_t v;
35572200Sbmilekic		struct proc *p1;
35671352Sjasone
35772200Sbmilekic		mtx_lock_spin(&sched_lock);
35872200Sbmilekic		/*
35972200Sbmilekic		 * Check if the lock has been released while spinning for
36072200Sbmilekic		 * the sched_lock.
36172200Sbmilekic		 */
36272200Sbmilekic		if ((v = m->mtx_lock) == MTX_UNOWNED) {
36372200Sbmilekic			mtx_unlock_spin(&sched_lock);
36472200Sbmilekic			continue;
36571352Sjasone		}
36671352Sjasone
36772200Sbmilekic		/*
36872200Sbmilekic		 * The mutex was marked contested on release. This means that
36972200Sbmilekic		 * there are processes blocked on it.
37072200Sbmilekic		 */
37172200Sbmilekic		if (v == MTX_CONTESTED) {
37272200Sbmilekic			p1 = TAILQ_FIRST(&m->mtx_blocked);
37372344Sbmilekic			MPASS(p1 != NULL);
37472200Sbmilekic			m->mtx_lock = (uintptr_t)p | MTX_CONTESTED;
37567352Sjhb
37672376Sjake			if (p1->p_pri.pri_level < p->p_pri.pri_level)
37772376Sjake				SET_PRIO(p, p1->p_pri.pri_level);
37872200Sbmilekic			mtx_unlock_spin(&sched_lock);
37967352Sjhb			return;
38067352Sjhb		}
38169376Sjhb
38269376Sjhb		/*
38372200Sbmilekic		 * If the mutex isn't already contested and a failure occurs
38472200Sbmilekic		 * setting the contested bit, the mutex was either released
38572200Sbmilekic		 * or the state of the MTX_RECURSED bit changed.
38669376Sjhb		 */
38772200Sbmilekic		if ((v & MTX_CONTESTED) == 0 &&
38872200Sbmilekic		    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
38972200Sbmilekic			(void *)(v | MTX_CONTESTED))) {
39072200Sbmilekic			mtx_unlock_spin(&sched_lock);
39172200Sbmilekic			continue;
39272200Sbmilekic		}
39367352Sjhb
39472200Sbmilekic		/*
39572200Sbmilekic		 * We deffinately must sleep for this lock.
39672200Sbmilekic		 */
39772200Sbmilekic		mtx_assert(m, MA_NOTOWNED);
39867352Sjhb
39967352Sjhb#ifdef notyet
40072200Sbmilekic		/*
40172200Sbmilekic		 * If we're borrowing an interrupted thread's VM context, we
40272200Sbmilekic		 * must clean up before going to sleep.
40372200Sbmilekic		 */
40472200Sbmilekic		if (p->p_flag & (P_ITHD | P_SITHD)) {
40572200Sbmilekic			ithd_t *it = (ithd_t *)p;
40667352Sjhb
40772200Sbmilekic			if (it->it_interrupted) {
40872200Sbmilekic				if ((opts & MTX_QUIET) == 0)
40972200Sbmilekic					CTR2(KTR_LOCK,
41072344Sbmilekic				    "_mtx_lock_sleep: 0x%x interrupted 0x%x",
41172200Sbmilekic					    it, it->it_interrupted);
41272200Sbmilekic				intr_thd_fixup(it);
41367352Sjhb			}
41472200Sbmilekic		}
41567352Sjhb#endif
41667352Sjhb
41772200Sbmilekic		/*
41872200Sbmilekic		 * Put us on the list of threads blocked on this mutex.
41972200Sbmilekic		 */
42072200Sbmilekic		if (TAILQ_EMPTY(&m->mtx_blocked)) {
42172200Sbmilekic			p1 = (struct proc *)(m->mtx_lock & MTX_FLAGMASK);
42272200Sbmilekic			LIST_INSERT_HEAD(&p1->p_contested, m, mtx_contested);
42372200Sbmilekic			TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
42472200Sbmilekic		} else {
42572200Sbmilekic			TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq)
42672376Sjake				if (p1->p_pri.pri_level > p->p_pri.pri_level)
42772200Sbmilekic					break;
42872200Sbmilekic			if (p1)
42972200Sbmilekic				TAILQ_INSERT_BEFORE(p1, p, p_procq);
43072200Sbmilekic			else
43167352Sjhb				TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
43272200Sbmilekic		}
43367352Sjhb
43472200Sbmilekic		/*
43572200Sbmilekic		 * Save who we're blocked on.
43672200Sbmilekic		 */
43772200Sbmilekic		p->p_blocked = m;
43872200Sbmilekic		p->p_mtxname = m->mtx_description;
43972200Sbmilekic		p->p_stat = SMTX;
44072200Sbmilekic		propagate_priority(p);
44167352Sjhb
44272200Sbmilekic		if ((opts & MTX_QUIET) == 0)
44372200Sbmilekic			CTR3(KTR_LOCK,
44472200Sbmilekic			    "_mtx_lock_sleep: p %p blocked on [%p] %s", p, m,
44572200Sbmilekic			    m->mtx_description);
44672200Sbmilekic
44772200Sbmilekic		mi_switch();
44872200Sbmilekic
44972200Sbmilekic		if ((opts & MTX_QUIET) == 0)
45072200Sbmilekic			CTR3(KTR_LOCK,
45172200Sbmilekic			  "_mtx_lock_sleep: p %p free from blocked on [%p] %s",
45272200Sbmilekic			  p, m, m->mtx_description);
45372200Sbmilekic
45472200Sbmilekic		mtx_unlock_spin(&sched_lock);
45572200Sbmilekic	}
45672200Sbmilekic
45772200Sbmilekic	return;
45872200Sbmilekic}
45972200Sbmilekic
46072200Sbmilekic/*
46172200Sbmilekic * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock.
46272200Sbmilekic *
46372200Sbmilekic * This is only called if we need to actually spin for the lock. Recursion
46472200Sbmilekic * is handled inline.
46572200Sbmilekic */
46672200Sbmilekicvoid
46772200Sbmilekic_mtx_lock_spin(struct mtx *m, int opts, u_int mtx_intr, const char *file,
46872200Sbmilekic	       int line)
46972200Sbmilekic{
47072200Sbmilekic	int i = 0;
47172200Sbmilekic
47272200Sbmilekic	if ((opts & MTX_QUIET) == 0)
47372344Sbmilekic		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
47472200Sbmilekic
47572200Sbmilekic	for (;;) {
47672393Sbmilekic		if (_obtain_lock(m, curproc))
47772200Sbmilekic			break;
47872200Sbmilekic
47972200Sbmilekic		while (m->mtx_lock != MTX_UNOWNED) {
48072200Sbmilekic			if (i++ < 1000000)
48172200Sbmilekic				continue;
48272200Sbmilekic			if (i++ < 6000000)
48372200Sbmilekic				DELAY(1);
48467352Sjhb#ifdef DDB
48572200Sbmilekic			else if (!db_active)
48667352Sjhb#else
48772200Sbmilekic			else
48867352Sjhb#endif
48972200Sbmilekic			panic("spin lock %s held by %p for > 5 seconds",
49072200Sbmilekic			    m->mtx_description, (void *)m->mtx_lock);
49167352Sjhb		}
49267352Sjhb	}
49372200Sbmilekic
49472200Sbmilekic	m->mtx_saveintr = mtx_intr;
49572200Sbmilekic	if ((opts & MTX_QUIET) == 0)
49672200Sbmilekic		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
49772200Sbmilekic
49872200Sbmilekic	return;
49967352Sjhb}
50067352Sjhb
50172200Sbmilekic/*
50272200Sbmilekic * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
50372200Sbmilekic *
50472200Sbmilekic * We are only called here if the lock is recursed or contested (i.e. we
50572200Sbmilekic * need to wake up a blocked thread).
50672200Sbmilekic */
50767352Sjhbvoid
50872200Sbmilekic_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
50967352Sjhb{
51067352Sjhb	struct proc *p, *p1;
51167352Sjhb	struct mtx *m1;
51267352Sjhb	int pri;
51367352Sjhb
51472393Sbmilekic	p = curproc;
51572200Sbmilekic	MPASS4(mtx_owned(m), "mtx_owned(mpp)", file, line);
51672200Sbmilekic
51772200Sbmilekic	if (mtx_recursed(m)) {
51872200Sbmilekic		if (--(m->mtx_recurse) == 0)
51972200Sbmilekic			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
52072200Sbmilekic		if ((opts & MTX_QUIET) == 0)
52172200Sbmilekic			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m);
52272200Sbmilekic		return;
52372200Sbmilekic	}
52472200Sbmilekic
52572200Sbmilekic	mtx_lock_spin(&sched_lock);
52672200Sbmilekic	if ((opts & MTX_QUIET) == 0)
52772200Sbmilekic		CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
52872200Sbmilekic
52972200Sbmilekic	p1 = TAILQ_FIRST(&m->mtx_blocked);
53072200Sbmilekic	MPASS(p->p_magic == P_MAGIC);
53172200Sbmilekic	MPASS(p1->p_magic == P_MAGIC);
53272200Sbmilekic
53372200Sbmilekic	TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq);
53472200Sbmilekic
53572200Sbmilekic	if (TAILQ_EMPTY(&m->mtx_blocked)) {
53672200Sbmilekic		LIST_REMOVE(m, mtx_contested);
53772200Sbmilekic		_release_lock_quick(m);
53872200Sbmilekic		if ((opts & MTX_QUIET) == 0)
53972200Sbmilekic			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p not held", m);
54072200Sbmilekic	} else
54172200Sbmilekic		atomic_store_rel_ptr(&m->mtx_lock, (void *)MTX_CONTESTED);
54272200Sbmilekic
54372376Sjake	pri = PRI_MAX;
54472200Sbmilekic	LIST_FOREACH(m1, &p->p_contested, mtx_contested) {
54572376Sjake		int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_pri.pri_level;
54672200Sbmilekic		if (cp < pri)
54772200Sbmilekic			pri = cp;
54872200Sbmilekic	}
54972200Sbmilekic
55072376Sjake	if (pri > p->p_pri.pri_native)
55172376Sjake		pri = p->p_pri.pri_native;
55272200Sbmilekic	SET_PRIO(p, pri);
55372200Sbmilekic
55472200Sbmilekic	if ((opts & MTX_QUIET) == 0)
55572200Sbmilekic		CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p contested setrunqueue %p",
55672200Sbmilekic		    m, p1);
55772200Sbmilekic
55872200Sbmilekic	p1->p_blocked = NULL;
55972200Sbmilekic	p1->p_mtxname = NULL;
56072200Sbmilekic	p1->p_stat = SRUN;
56172200Sbmilekic	setrunqueue(p1);
56272200Sbmilekic
56372376Sjake	if ((opts & MTX_NOSWITCH) == 0 && p1->p_pri.pri_level < pri) {
56467352Sjhb#ifdef notyet
56572200Sbmilekic		if (p->p_flag & (P_ITHD | P_SITHD)) {
56672200Sbmilekic			ithd_t *it = (ithd_t *)p;
56767352Sjhb
56872200Sbmilekic			if (it->it_interrupted) {
56972200Sbmilekic				if ((opts & MTX_QUIET) == 0)
57072200Sbmilekic					CTR2(KTR_LOCK,
57172200Sbmilekic				    "_mtx_unlock_sleep: 0x%x interrupted 0x%x",
57272200Sbmilekic					    it, it->it_interrupted);
57372200Sbmilekic				intr_thd_fixup(it);
57467352Sjhb			}
57572200Sbmilekic		}
57667352Sjhb#endif
57772200Sbmilekic		setrunqueue(p);
57872200Sbmilekic		if ((opts & MTX_QUIET) == 0)
57972200Sbmilekic			CTR2(KTR_LOCK,
58072200Sbmilekic			    "_mtx_unlock_sleep: %p switching out lock=%p", m,
58172200Sbmilekic			    (void *)m->mtx_lock);
58272200Sbmilekic
58372200Sbmilekic		mi_switch();
58472200Sbmilekic		if ((opts & MTX_QUIET) == 0)
58572200Sbmilekic			CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p",
58672200Sbmilekic			    m, (void *)m->mtx_lock);
58767352Sjhb	}
58872200Sbmilekic
58972200Sbmilekic	mtx_unlock_spin(&sched_lock);
59072200Sbmilekic
59172200Sbmilekic	return;
59267352Sjhb}
59367352Sjhb
59472200Sbmilekic/*
59572200Sbmilekic * All the unlocking of MTX_SPIN locks is done inline.
59672200Sbmilekic * See the _rel_spin_lock() macro for the details.
59772200Sbmilekic */
59872200Sbmilekic
59972200Sbmilekic/*
60072200Sbmilekic * The INVARIANTS-enabled mtx_assert()
60172200Sbmilekic */
60271352Sjasone#ifdef INVARIANTS
60371352Sjasonevoid
60471360Sjasone_mtx_assert(struct mtx *m, int what, const char *file, int line)
60571352Sjasone{
60671352Sjasone	switch ((what)) {
60771352Sjasone	case MA_OWNED:
60871352Sjasone	case MA_OWNED | MA_RECURSED:
60971352Sjasone	case MA_OWNED | MA_NOTRECURSED:
61071352Sjasone		if (!mtx_owned((m)))
61171352Sjasone			panic("mutex %s not owned at %s:%d",
61271360Sjasone			    (m)->mtx_description, file, line);
61371352Sjasone		if (mtx_recursed((m))) {
61471352Sjasone			if (((what) & MA_NOTRECURSED) != 0)
61571352Sjasone				panic("mutex %s recursed at %s:%d",
61671360Sjasone				    (m)->mtx_description, file, line);
61771352Sjasone		} else if (((what) & MA_RECURSED) != 0) {
61871352Sjasone			panic("mutex %s unrecursed at %s:%d",
61971360Sjasone			    (m)->mtx_description, file, line);
62071352Sjasone		}
62171352Sjasone		break;
62271352Sjasone	case MA_NOTOWNED:
62371352Sjasone		if (mtx_owned((m)))
62471352Sjasone			panic("mutex %s owned at %s:%d",
62571360Sjasone			    (m)->mtx_description, file, line);
62671352Sjasone		break;
62771352Sjasone	default:
62871360Sjasone		panic("unknown mtx_assert at %s:%d", file, line);
62971352Sjasone	}
63071352Sjasone}
63171352Sjasone#endif
63271352Sjasone
63372200Sbmilekic/*
63472200Sbmilekic * The MUTEX_DEBUG-enabled mtx_validate()
63572200Sbmilekic */
63667352Sjhb#define MV_DESTROY	0	/* validate before destory */
63767352Sjhb#define MV_INIT		1	/* validate before init */
63867352Sjhb
63967352Sjhb#ifdef MUTEX_DEBUG
64067352Sjhb
64167352Sjhbint mtx_validate __P((struct mtx *, int));
64267352Sjhb
64367352Sjhbint
64467352Sjhbmtx_validate(struct mtx *m, int when)
64567352Sjhb{
64667352Sjhb	struct mtx *mp;
64767352Sjhb	int i;
64867352Sjhb	int retval = 0;
64967352Sjhb
65071320Sjasone#ifdef WITNESS
65171320Sjasone	if (witness_cold)
65271320Sjasone		return 0;
65371320Sjasone#endif
65467352Sjhb	if (m == &all_mtx || cold)
65567352Sjhb		return 0;
65667352Sjhb
65772200Sbmilekic	mtx_lock(&all_mtx);
65867352Sjhb/*
65967352Sjhb * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
66067352Sjhb * we can re-enable the kernacc() checks.
66167352Sjhb */
66267352Sjhb#ifndef __alpha__
66367352Sjhb	MPASS(kernacc((caddr_t)all_mtx.mtx_next, sizeof(uintptr_t),
66467352Sjhb	    VM_PROT_READ) == 1);
66567352Sjhb#endif
66667352Sjhb	MPASS(all_mtx.mtx_next->mtx_prev == &all_mtx);
66767352Sjhb	for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
66867352Sjhb#ifndef __alpha__
66967352Sjhb		if (kernacc((caddr_t)mp->mtx_next, sizeof(uintptr_t),
67067352Sjhb		    VM_PROT_READ) != 1) {
67167352Sjhb			panic("mtx_validate: mp=%p mp->mtx_next=%p",
67267352Sjhb			    mp, mp->mtx_next);
67367352Sjhb		}
67467352Sjhb#endif
67567352Sjhb		i++;
67667352Sjhb		if (i > mtx_cur_cnt) {
67767352Sjhb			panic("mtx_validate: too many in chain, known=%d\n",
67867352Sjhb			    mtx_cur_cnt);
67967352Sjhb		}
68067352Sjhb	}
68167352Sjhb	MPASS(i == mtx_cur_cnt);
68267352Sjhb	switch (when) {
68367352Sjhb	case MV_DESTROY:
68467352Sjhb		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
68567352Sjhb			if (mp == m)
68667352Sjhb				break;
68767352Sjhb		MPASS(mp == m);
68867352Sjhb		break;
68967352Sjhb	case MV_INIT:
69067352Sjhb		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
69167352Sjhb		if (mp == m) {
69267352Sjhb			/*
69367352Sjhb			 * Not good. This mutex already exists.
69467352Sjhb			 */
69567352Sjhb			printf("re-initing existing mutex %s\n",
69667352Sjhb			    m->mtx_description);
69767352Sjhb			MPASS(m->mtx_lock == MTX_UNOWNED);
69867352Sjhb			retval = 1;
69967352Sjhb		}
70067352Sjhb	}
70172200Sbmilekic	mtx_unlock(&all_mtx);
70267352Sjhb	return (retval);
70367352Sjhb}
70467352Sjhb#endif
70567352Sjhb
70672200Sbmilekic/*
70772200Sbmilekic * Mutex initialization routine; initialize lock `m' of type contained in
70872200Sbmilekic * `opts' with options contained in `opts' and description `description.'
70972200Sbmilekic * Place on "all_mtx" queue.
71072200Sbmilekic */
71167352Sjhbvoid
71272200Sbmilekicmtx_init(struct mtx *m, const char *description, int opts)
71367352Sjhb{
71472200Sbmilekic
71572200Sbmilekic	if ((opts & MTX_QUIET) == 0)
71672200Sbmilekic		CTR2(KTR_LOCK, "mtx_init %p (%s)", m, description);
71772200Sbmilekic
71867352Sjhb#ifdef MUTEX_DEBUG
71972200Sbmilekic	/* Diagnostic and error correction */
72072200Sbmilekic	if (mtx_validate(m, MV_INIT))
72167352Sjhb		return;
72269429Sjhb#endif
72367352Sjhb
72467352Sjhb	bzero((void *)m, sizeof *m);
72567352Sjhb	TAILQ_INIT(&m->mtx_blocked);
72672200Sbmilekic
72769429Sjhb#ifdef WITNESS
72871320Sjasone	if (!witness_cold) {
72971560Sjhb		m->mtx_debug = malloc(sizeof(struct mtx_debug),
73072200Sbmilekic		    M_WITNESS, M_NOWAIT | M_ZERO);
73171560Sjhb		MPASS(m->mtx_debug != NULL);
73271320Sjasone	}
73371560Sjhb#endif
73471320Sjasone
73572200Sbmilekic	m->mtx_description = description;
73672200Sbmilekic	m->mtx_flags = opts;
73767352Sjhb	m->mtx_lock = MTX_UNOWNED;
73872200Sbmilekic
73967352Sjhb	/* Put on all mutex queue */
74072200Sbmilekic	mtx_lock(&all_mtx);
74167352Sjhb	m->mtx_next = &all_mtx;
74267352Sjhb	m->mtx_prev = all_mtx.mtx_prev;
74367352Sjhb	m->mtx_prev->mtx_next = m;
74467352Sjhb	all_mtx.mtx_prev = m;
74567352Sjhb	if (++mtx_cur_cnt > mtx_max_cnt)
74667352Sjhb		mtx_max_cnt = mtx_cur_cnt;
74772200Sbmilekic	mtx_unlock(&all_mtx);
74872200Sbmilekic
74971320Sjasone#ifdef WITNESS
75071320Sjasone	if (!witness_cold)
75172200Sbmilekic		witness_init(m, opts);
75271320Sjasone#endif
75367352Sjhb}
75467352Sjhb
75572200Sbmilekic/*
75672200Sbmilekic * Remove lock `m' from all_mtx queue.
75772200Sbmilekic */
75867352Sjhbvoid
75967352Sjhbmtx_destroy(struct mtx *m)
76067352Sjhb{
76167352Sjhb
76271320Sjasone#ifdef WITNESS
76371320Sjasone	KASSERT(!witness_cold, ("%s: Cannot destroy while still cold\n",
76471320Sjasone	    __FUNCTION__));
76571320Sjasone#endif
76672200Sbmilekic
76771560Sjhb	CTR2(KTR_LOCK, "mtx_destroy %p (%s)", m, m->mtx_description);
76872200Sbmilekic
76967352Sjhb#ifdef MUTEX_DEBUG
77067352Sjhb	if (m->mtx_next == NULL)
77167352Sjhb		panic("mtx_destroy: %p (%s) already destroyed",
77267352Sjhb		    m, m->mtx_description);
77367352Sjhb
77467352Sjhb	if (!mtx_owned(m)) {
77567352Sjhb		MPASS(m->mtx_lock == MTX_UNOWNED);
77667352Sjhb	} else {
77771228Sbmilekic		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
77867352Sjhb	}
77972200Sbmilekic
78072200Sbmilekic	/* diagnostic */
78172200Sbmilekic	mtx_validate(m, MV_DESTROY);
78267352Sjhb#endif
78367352Sjhb
78467352Sjhb#ifdef WITNESS
78567352Sjhb	if (m->mtx_witness)
78667352Sjhb		witness_destroy(m);
78767352Sjhb#endif /* WITNESS */
78867352Sjhb
78967352Sjhb	/* Remove from the all mutex queue */
79072200Sbmilekic	mtx_lock(&all_mtx);
79167352Sjhb	m->mtx_next->mtx_prev = m->mtx_prev;
79267352Sjhb	m->mtx_prev->mtx_next = m->mtx_next;
79372200Sbmilekic
79467352Sjhb#ifdef MUTEX_DEBUG
79567352Sjhb	m->mtx_next = m->mtx_prev = NULL;
79669429Sjhb#endif
79772200Sbmilekic
79869429Sjhb#ifdef WITNESS
79972200Sbmilekic	free(m->mtx_debug, M_WITNESS);
80071560Sjhb	m->mtx_debug = NULL;
80167352Sjhb#endif
80272200Sbmilekic
80367352Sjhb	mtx_cur_cnt--;
80472200Sbmilekic	mtx_unlock(&all_mtx);
80567352Sjhb}
80667352Sjhb
80772200Sbmilekic
80871560Sjhb/*
80972200Sbmilekic * The WITNESS-enabled diagnostic code.
81071560Sjhb */
81171560Sjhb#ifdef WITNESS
81271320Sjasonestatic void
81371320Sjasonewitness_fixup(void *dummy __unused)
81471320Sjasone{
81571320Sjasone	struct mtx *mp;
81671320Sjasone
81771560Sjhb	/*
81871560Sjhb	 * We have to release Giant before initializing its witness
81971560Sjhb	 * structure so that WITNESS doesn't get confused.
82071560Sjhb	 */
82172200Sbmilekic	mtx_unlock(&Giant);
82271560Sjhb	mtx_assert(&Giant, MA_NOTOWNED);
82371560Sjhb
82472200Sbmilekic	mtx_lock(&all_mtx);
82572200Sbmilekic
82671320Sjasone	/* Iterate through all mutexes and finish up mutex initialization. */
82771320Sjasone	for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
82871320Sjasone
82971560Sjhb		mp->mtx_debug = malloc(sizeof(struct mtx_debug),
83072200Sbmilekic		    M_WITNESS, M_NOWAIT | M_ZERO);
83171560Sjhb		MPASS(mp->mtx_debug != NULL);
83271320Sjasone
83371320Sjasone		witness_init(mp, mp->mtx_flags);
83471320Sjasone	}
83572200Sbmilekic	mtx_unlock(&all_mtx);
83671320Sjasone
83771320Sjasone	/* Mark the witness code as being ready for use. */
83871320Sjasone	atomic_store_rel_int(&witness_cold, 0);
83971560Sjhb
84072200Sbmilekic	mtx_lock(&Giant);
84171320Sjasone}
84271320SjasoneSYSINIT(wtnsfxup, SI_SUB_MUTEX, SI_ORDER_FIRST, witness_fixup, NULL)
84371320Sjasone
84465557Sjasone#define WITNESS_COUNT 200
84565557Sjasone#define	WITNESS_NCHILDREN 2
84665557Sjasone
84767401Sjhbint witness_watch = 1;
84865557Sjasone
84965856Sjhbstruct witness {
85065557Sjasone	struct witness	*w_next;
85167404Sjhb	const char	*w_description;
85265624Sjasone	const char	*w_file;
85365557Sjasone	int		 w_line;
85465557Sjasone	struct witness	*w_morechildren;
85565557Sjasone	u_char		 w_childcnt;
85665557Sjasone	u_char		 w_Giant_squawked:1;
85765557Sjasone	u_char		 w_other_squawked:1;
85865557Sjasone	u_char		 w_same_squawked:1;
85971228Sbmilekic	u_char		 w_spin:1;	/* MTX_SPIN type mutex. */
86065557Sjasone	u_int		 w_level;
86165557Sjasone	struct witness	*w_children[WITNESS_NCHILDREN];
86265856Sjhb};
86365557Sjasone
86465856Sjhbstruct witness_blessed {
86565557Sjasone	char 	*b_lock1;
86665557Sjasone	char	*b_lock2;
86765856Sjhb};
86865557Sjasone
86967676Sjhb#ifdef DDB
87065557Sjasone/*
87167676Sjhb * When DDB is enabled and witness_ddb is set to 1, it will cause the system to
87265557Sjasone * drop into kdebug() when:
87365557Sjasone *	- a lock heirarchy violation occurs
87465557Sjasone *	- locks are held when going to sleep.
87565557Sjasone */
87671560Sjhbint	witness_ddb;
87767676Sjhb#ifdef WITNESS_DDB
87871560SjhbTUNABLE_INT_DECL("debug.witness_ddb", 1, witness_ddb);
87967676Sjhb#else
88071560SjhbTUNABLE_INT_DECL("debug.witness_ddb", 0, witness_ddb);
88165557Sjasone#endif
88267676SjhbSYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, "");
88367676Sjhb#endif /* DDB */
88465557Sjasone
88571560Sjhbint	witness_skipspin;
88667676Sjhb#ifdef WITNESS_SKIPSPIN
88771560SjhbTUNABLE_INT_DECL("debug.witness_skipspin", 1, witness_skipspin);
88867676Sjhb#else
88971560SjhbTUNABLE_INT_DECL("debug.witness_skipspin", 0, witness_skipspin);
89065557Sjasone#endif
89167676SjhbSYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RD, &witness_skipspin, 0,
89267676Sjhb    "");
89365557Sjasone
89472200Sbmilekic/*
89572200Sbmilekic * Witness-enabled globals
89672200Sbmilekic */
89771320Sjasonestatic struct mtx	w_mtx;
89865856Sjhbstatic struct witness	*w_free;
89965856Sjhbstatic struct witness	*w_all;
90065856Sjhbstatic int		 w_inited;
90165856Sjhbstatic int		 witness_dead;	/* fatal error, probably no memory */
90265557Sjasone
90365856Sjhbstatic struct witness	 w_data[WITNESS_COUNT];
90465557Sjasone
90572200Sbmilekic/*
90672200Sbmilekic * Internal witness routine prototypes
90772200Sbmilekic */
90872200Sbmilekicstatic struct witness *enroll(const char *description, int flag);
90972200Sbmilekicstatic int itismychild(struct witness *parent, struct witness *child);
91072200Sbmilekicstatic void removechild(struct witness *parent, struct witness *child);
91172200Sbmilekicstatic int isitmychild(struct witness *parent, struct witness *child);
91272200Sbmilekicstatic int isitmydescendant(struct witness *parent, struct witness *child);
91372200Sbmilekicstatic int dup_ok(struct witness *);
91472200Sbmilekicstatic int blessed(struct witness *, struct witness *);
91572200Sbmilekicstatic void
91672200Sbmilekic    witness_displaydescendants(void(*)(const char *fmt, ...), struct witness *);
91772200Sbmilekicstatic void witness_leveldescendents(struct witness *parent, int level);
91872200Sbmilekicstatic void witness_levelall(void);
91972200Sbmilekicstatic struct witness * witness_get(void);
92072200Sbmilekicstatic void witness_free(struct witness *m);
92165557Sjasone
92265557Sjasonestatic char *ignore_list[] = {
92365557Sjasone	"witness lock",
92465557Sjasone	NULL
92565557Sjasone};
92665557Sjasone
92765557Sjasonestatic char *spin_order_list[] = {
92872224Sjhb#if defined(__i386__) && defined (SMP)
92972224Sjhb	"com",
93072224Sjhb#endif
93169362Sjhb	"sio",
93272224Sjhb#ifdef __i386__
93372224Sjhb	"cy",
93472224Sjhb#endif
93572836Sjhb	"ithread table lock",
93672836Sjhb	"ithread list lock",
93765557Sjasone	"sched lock",
93868808Sjhb#ifdef __i386__
93967676Sjhb	"clk",
94068808Sjhb#endif
94168889Sjake	"callout",
94265557Sjasone	/*
94365557Sjasone	 * leaf locks
94465557Sjasone	 */
94572224Sjhb#ifdef SMP
94671576Sjasone#ifdef __i386__
94771576Sjasone	"ap boot",
94871576Sjasone	"imen",
94971576Sjasone#endif
95071576Sjasone	"smp rendezvous",
95172224Sjhb#endif
95265557Sjasone	NULL
95365557Sjasone};
95465557Sjasone
95565557Sjasonestatic char *order_list[] = {
95672256Sjhb	"Giant", "proctree", "allproc", "process lock", "uidinfo hash",
95772256Sjhb	    "uidinfo struct", NULL,
95865557Sjasone	NULL
95965557Sjasone};
96065557Sjasone
96165557Sjasonestatic char *dup_list[] = {
96265557Sjasone	NULL
96365557Sjasone};
96465557Sjasone
96565557Sjasonestatic char *sleep_list[] = {
96668862Sjake	"Giant",
96765557Sjasone	NULL
96865557Sjasone};
96965557Sjasone
97065557Sjasone/*
97165557Sjasone * Pairs of locks which have been blessed
97265557Sjasone * Don't complain about order problems with blessed locks
97365557Sjasone */
97465856Sjhbstatic struct witness_blessed blessed_list[] = {
97565557Sjasone};
97672200Sbmilekicstatic int blessed_count =
97772200Sbmilekic	sizeof(blessed_list) / sizeof(struct witness_blessed);
97865557Sjasone
97971352Sjasonestatic void
98065856Sjhbwitness_init(struct mtx *m, int flag)
98165557Sjasone{
98265557Sjasone	m->mtx_witness = enroll(m->mtx_description, flag);
98365557Sjasone}
98465557Sjasone
98571352Sjasonestatic void
98665856Sjhbwitness_destroy(struct mtx *m)
98765557Sjasone{
98865856Sjhb	struct mtx *m1;
98965557Sjasone	struct proc *p;
99072393Sbmilekic	p = curproc;
99172224Sjhb	LIST_FOREACH(m1, &p->p_heldmtx, mtx_held) {
99265557Sjasone		if (m1 == m) {
99365557Sjasone			LIST_REMOVE(m, mtx_held);
99465557Sjasone			break;
99565557Sjasone		}
99665557Sjasone	}
99765557Sjasone	return;
99865557Sjasone
99965557Sjasone}
100065557Sjasone
100171352Sjasonestatic void
100271352Sjasonewitness_display(void(*prnt)(const char *fmt, ...))
100371352Sjasone{
100471352Sjasone	struct witness *w, *w1;
100572224Sjhb	int level, found;
100671352Sjasone
100771352Sjasone	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
100871352Sjasone	witness_levelall();
100971352Sjasone
101072224Sjhb	/*
101172224Sjhb	 * First, handle sleep mutexes which have been acquired at least
101272224Sjhb	 * once.
101372224Sjhb	 */
101472224Sjhb	prnt("Sleep mutexes:\n");
101571352Sjasone	for (w = w_all; w; w = w->w_next) {
101672224Sjhb		if (w->w_file == NULL || w->w_spin)
101771352Sjasone			continue;
101871352Sjasone		for (w1 = w_all; w1; w1 = w1->w_next) {
101971352Sjasone			if (isitmychild(w1, w))
102071352Sjasone				break;
102171352Sjasone		}
102271352Sjasone		if (w1 != NULL)
102371352Sjasone			continue;
102471352Sjasone		/*
102571352Sjasone		 * This lock has no anscestors, display its descendants.
102671352Sjasone		 */
102771352Sjasone		witness_displaydescendants(prnt, w);
102871352Sjasone	}
102972224Sjhb
103072224Sjhb	/*
103172224Sjhb	 * Now do spin mutexes which have been acquired at least once.
103272224Sjhb	 */
103372224Sjhb	prnt("\nSpin mutexes:\n");
103472224Sjhb	level = 0;
103572224Sjhb	while (level < sizeof(spin_order_list) / sizeof(char *)) {
103672224Sjhb		found = 0;
103772224Sjhb		for (w = w_all; w; w = w->w_next) {
103872224Sjhb			if (w->w_file == NULL || !w->w_spin)
103972224Sjhb				continue;
104072224Sjhb			if (w->w_level == 1 << level) {
104172224Sjhb				witness_displaydescendants(prnt, w);
104272224Sjhb				level++;
104372224Sjhb				found = 1;
104472224Sjhb			}
104572224Sjhb		}
104672224Sjhb		if (found == 0)
104772224Sjhb			level++;
104872224Sjhb	}
104972224Sjhb
105072224Sjhb	/*
105172224Sjhb	 * Finally, any mutexes which have not been acquired yet.
105272224Sjhb	 */
105372224Sjhb	prnt("\nMutexes which were never acquired:\n");
105471352Sjasone	for (w = w_all; w; w = w->w_next) {
105571352Sjasone		if (w->w_file != NULL)
105671352Sjasone			continue;
105771352Sjasone		prnt("%s\n", w->w_description);
105871352Sjasone	}
105971352Sjasone}
106071352Sjasone
106165557Sjasonevoid
106265856Sjhbwitness_enter(struct mtx *m, int flags, const char *file, int line)
106365557Sjasone{
106465856Sjhb	struct witness *w, *w1;
106565856Sjhb	struct mtx *m1;
106665557Sjasone	struct proc *p;
106765557Sjasone	int i;
106867676Sjhb#ifdef DDB
106967676Sjhb	int go_into_ddb = 0;
107067676Sjhb#endif /* DDB */
107165557Sjasone
107271352Sjasone	if (witness_cold || m->mtx_witness == NULL || panicstr)
107371320Sjasone		return;
107465557Sjasone	w = m->mtx_witness;
107572393Sbmilekic	p = curproc;
107665557Sjasone
107765557Sjasone	if (flags & MTX_SPIN) {
107871560Sjhb		if ((m->mtx_flags & MTX_SPIN) == 0)
107965651Sjasone			panic("mutex_enter: MTX_SPIN on MTX_DEF mutex %s @"
108065651Sjasone			    " %s:%d", m->mtx_description, file, line);
108171228Sbmilekic		if (mtx_recursed(m)) {
108271560Sjhb			if ((m->mtx_flags & MTX_RECURSE) == 0)
108371228Sbmilekic				panic("mutex_enter: recursion on non-recursive"
108471228Sbmilekic				    " mutex %s @ %s:%d", m->mtx_description,
108571228Sbmilekic				    file, line);
108665557Sjasone			return;
108771228Sbmilekic		}
108872200Sbmilekic		mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
108970861Sjake		i = PCPU_GET(witness_spin_check);
109065557Sjasone		if (i != 0 && w->w_level < i) {
109172200Sbmilekic			mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
109265651Sjasone			panic("mutex_enter(%s:%x, MTX_SPIN) out of order @"
109365651Sjasone			    " %s:%d already holding %s:%x",
109465557Sjasone			    m->mtx_description, w->w_level, file, line,
109565557Sjasone			    spin_order_list[ffs(i)-1], i);
109665557Sjasone		}
109765557Sjasone		PCPU_SET(witness_spin_check, i | w->w_level);
109872200Sbmilekic		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
109969361Sjhb		w->w_file = file;
110069361Sjhb		w->w_line = line;
110169361Sjhb		m->mtx_line = line;
110269361Sjhb		m->mtx_file = file;
110365557Sjasone		return;
110465557Sjasone	}
110571560Sjhb	if ((m->mtx_flags & MTX_SPIN) != 0)
110665557Sjasone		panic("mutex_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
110765557Sjasone		    m->mtx_description, file, line);
110865557Sjasone
110971228Sbmilekic	if (mtx_recursed(m)) {
111071560Sjhb		if ((m->mtx_flags & MTX_RECURSE) == 0)
111171228Sbmilekic			panic("mutex_enter: recursion on non-recursive"
111271228Sbmilekic			    " mutex %s @ %s:%d", m->mtx_description,
111371228Sbmilekic			    file, line);
111465557Sjasone		return;
111571228Sbmilekic	}
111665557Sjasone	if (witness_dead)
111765557Sjasone		goto out;
111869998Sjhb	if (cold)
111965557Sjasone		goto out;
112065557Sjasone
112165557Sjasone	if (!mtx_legal2block())
112272200Sbmilekic		panic("blockable mtx_lock() of %s when not legal @ %s:%d",
112365557Sjasone			    m->mtx_description, file, line);
112465557Sjasone	/*
112565557Sjasone	 * Is this the first mutex acquired
112665557Sjasone	 */
112765557Sjasone	if ((m1 = LIST_FIRST(&p->p_heldmtx)) == NULL)
112865557Sjasone		goto out;
112965557Sjasone
113065557Sjasone	if ((w1 = m1->mtx_witness) == w) {
113165557Sjasone		if (w->w_same_squawked || dup_ok(w))
113265557Sjasone			goto out;
113365557Sjasone		w->w_same_squawked = 1;
113465557Sjasone		printf("acquring duplicate lock of same type: \"%s\"\n",
113565557Sjasone			m->mtx_description);
113665557Sjasone		printf(" 1st @ %s:%d\n", w->w_file, w->w_line);
113765557Sjasone		printf(" 2nd @ %s:%d\n", file, line);
113867676Sjhb#ifdef DDB
113967676Sjhb		go_into_ddb = 1;
114067676Sjhb#endif /* DDB */
114165557Sjasone		goto out;
114265557Sjasone	}
114365557Sjasone	MPASS(!mtx_owned(&w_mtx));
114472200Sbmilekic	mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
114565557Sjasone	/*
114665557Sjasone	 * If we have a known higher number just say ok
114765557Sjasone	 */
114865557Sjasone	if (witness_watch > 1 && w->w_level > w1->w_level) {
114972200Sbmilekic		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
115065557Sjasone		goto out;
115165557Sjasone	}
115265557Sjasone	if (isitmydescendant(m1->mtx_witness, w)) {
115372200Sbmilekic		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
115465557Sjasone		goto out;
115565557Sjasone	}
115665557Sjasone	for (i = 0; m1 != NULL; m1 = LIST_NEXT(m1, mtx_held), i++) {
115765557Sjasone
115867352Sjhb		MPASS(i < 200);
115965557Sjasone		w1 = m1->mtx_witness;
116065557Sjasone		if (isitmydescendant(w, w1)) {
116172200Sbmilekic			mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
116265557Sjasone			if (blessed(w, w1))
116365557Sjasone				goto out;
116465557Sjasone			if (m1 == &Giant) {
116565557Sjasone				if (w1->w_Giant_squawked)
116665557Sjasone					goto out;
116765557Sjasone				else
116865557Sjasone					w1->w_Giant_squawked = 1;
116965557Sjasone			} else {
117065557Sjasone				if (w1->w_other_squawked)
117165557Sjasone					goto out;
117265557Sjasone				else
117365557Sjasone					w1->w_other_squawked = 1;
117465557Sjasone			}
117565557Sjasone			printf("lock order reversal\n");
117665557Sjasone			printf(" 1st %s last acquired @ %s:%d\n",
117765557Sjasone			    w->w_description, w->w_file, w->w_line);
117865557Sjasone			printf(" 2nd %p %s @ %s:%d\n",
117965557Sjasone			    m1, w1->w_description, w1->w_file, w1->w_line);
118065557Sjasone			printf(" 3rd %p %s @ %s:%d\n",
118165557Sjasone			    m, w->w_description, file, line);
118267676Sjhb#ifdef DDB
118367676Sjhb			go_into_ddb = 1;
118467676Sjhb#endif /* DDB */
118565557Sjasone			goto out;
118665557Sjasone		}
118765557Sjasone	}
118865557Sjasone	m1 = LIST_FIRST(&p->p_heldmtx);
118965557Sjasone	if (!itismychild(m1->mtx_witness, w))
119072200Sbmilekic		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
119165557Sjasone
119265557Sjasoneout:
119367676Sjhb#ifdef DDB
119467676Sjhb	if (witness_ddb && go_into_ddb)
119567676Sjhb		Debugger("witness_enter");
119667676Sjhb#endif /* DDB */
119765557Sjasone	w->w_file = file;
119865557Sjasone	w->w_line = line;
119965557Sjasone	m->mtx_line = line;
120065557Sjasone	m->mtx_file = file;
120165557Sjasone
120265557Sjasone	/*
120368582Sjhb	 * If this pays off it likely means that a mutex being witnessed
120465557Sjasone	 * is acquired in hardclock. Put it in the ignore list. It is
120565557Sjasone	 * likely not the mutex this assert fails on.
120665557Sjasone	 */
120767352Sjhb	MPASS(m->mtx_held.le_prev == NULL);
120865557Sjasone	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
120965557Sjasone}
121065557Sjasone
121165557Sjasonevoid
121265856Sjhbwitness_try_enter(struct mtx *m, int flags, const char *file, int line)
121365557Sjasone{
121465557Sjasone	struct proc *p;
121565856Sjhb	struct witness *w = m->mtx_witness;
121665557Sjasone
121771320Sjasone	if (witness_cold)
121871320Sjasone		return;
121969998Sjhb	if (panicstr)
122069998Sjhb		return;
122165557Sjasone	if (flags & MTX_SPIN) {
122271560Sjhb		if ((m->mtx_flags & MTX_SPIN) == 0)
122365557Sjasone			panic("mutex_try_enter: "
122465557Sjasone			    "MTX_SPIN on MTX_DEF mutex %s @ %s:%d",
122565557Sjasone			    m->mtx_description, file, line);
122671228Sbmilekic		if (mtx_recursed(m)) {
122771560Sjhb			if ((m->mtx_flags & MTX_RECURSE) == 0)
122871228Sbmilekic				panic("mutex_try_enter: recursion on"
122971228Sbmilekic				    " non-recursive mutex %s @ %s:%d",
123071228Sbmilekic				    m->mtx_description, file, line);
123165557Sjasone			return;
123271228Sbmilekic		}
123372200Sbmilekic		mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
123470861Sjake		PCPU_SET(witness_spin_check,
123570861Sjake		    PCPU_GET(witness_spin_check) | w->w_level);
123672200Sbmilekic		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
123769361Sjhb		w->w_file = file;
123869361Sjhb		w->w_line = line;
123969361Sjhb		m->mtx_line = line;
124069361Sjhb		m->mtx_file = file;
124165557Sjasone		return;
124265557Sjasone	}
124365557Sjasone
124471560Sjhb	if ((m->mtx_flags & MTX_SPIN) != 0)
124565557Sjasone		panic("mutex_try_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
124665557Sjasone		    m->mtx_description, file, line);
124765557Sjasone
124871228Sbmilekic	if (mtx_recursed(m)) {
124971560Sjhb		if ((m->mtx_flags & MTX_RECURSE) == 0)
125071228Sbmilekic			panic("mutex_try_enter: recursion on non-recursive"
125171228Sbmilekic			    " mutex %s @ %s:%d", m->mtx_description, file,
125271228Sbmilekic			    line);
125365557Sjasone		return;
125471228Sbmilekic	}
125565557Sjasone	w->w_file = file;
125665557Sjasone	w->w_line = line;
125765557Sjasone	m->mtx_line = line;
125865557Sjasone	m->mtx_file = file;
125972393Sbmilekic	p = curproc;
126067352Sjhb	MPASS(m->mtx_held.le_prev == NULL);
126165557Sjasone	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
126265557Sjasone}
126365557Sjasone
126465557Sjasonevoid
126571352Sjasonewitness_exit(struct mtx *m, int flags, const char *file, int line)
126665557Sjasone{
126771352Sjasone	struct witness *w;
126865557Sjasone
126971352Sjasone	if (witness_cold || m->mtx_witness == NULL || panicstr)
127071352Sjasone		return;
127171352Sjasone	w = m->mtx_witness;
127265557Sjasone
127371352Sjasone	if (flags & MTX_SPIN) {
127471560Sjhb		if ((m->mtx_flags & MTX_SPIN) == 0)
127571352Sjasone			panic("mutex_exit: MTX_SPIN on MTX_DEF mutex %s @"
127671352Sjasone			    " %s:%d", m->mtx_description, file, line);
127771352Sjasone		if (mtx_recursed(m)) {
127871560Sjhb			if ((m->mtx_flags & MTX_RECURSE) == 0)
127971352Sjasone				panic("mutex_exit: recursion on non-recursive"
128071352Sjasone				    " mutex %s @ %s:%d", m->mtx_description,
128171352Sjasone				    file, line);
128271352Sjasone			return;
128365557Sjasone		}
128472200Sbmilekic		mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
128571352Sjasone		PCPU_SET(witness_spin_check,
128671352Sjasone		    PCPU_GET(witness_spin_check) & ~w->w_level);
128772200Sbmilekic		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
128871352Sjasone		return;
128965557Sjasone	}
129071560Sjhb	if ((m->mtx_flags & MTX_SPIN) != 0)
129171352Sjasone		panic("mutex_exit: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
129271352Sjasone		    m->mtx_description, file, line);
129371352Sjasone
129471352Sjasone	if (mtx_recursed(m)) {
129571560Sjhb		if ((m->mtx_flags & MTX_RECURSE) == 0)
129671352Sjasone			panic("mutex_exit: recursion on non-recursive"
129771352Sjasone			    " mutex %s @ %s:%d", m->mtx_description,
129871352Sjasone			    file, line);
129971352Sjasone		return;
130065557Sjasone	}
130171352Sjasone
130271352Sjasone	if ((flags & MTX_NOSWITCH) == 0 && !mtx_legal2block() && !cold)
130372200Sbmilekic		panic("switchable mtx_unlock() of %s when not legal @ %s:%d",
130471352Sjasone			    m->mtx_description, file, line);
130571352Sjasone	LIST_REMOVE(m, mtx_held);
130671352Sjasone	m->mtx_held.le_prev = NULL;
130765557Sjasone}
130865557Sjasone
130965557Sjasoneint
131065856Sjhbwitness_sleep(int check_only, struct mtx *mtx, const char *file, int line)
131165557Sjasone{
131265856Sjhb	struct mtx *m;
131365557Sjasone	struct proc *p;
131465557Sjasone	char **sleep;
131565557Sjasone	int n = 0;
131665557Sjasone
131771320Sjasone	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
131872393Sbmilekic	p = curproc;
131972224Sjhb	LIST_FOREACH(m, &p->p_heldmtx, mtx_held) {
132065557Sjasone		if (m == mtx)
132165557Sjasone			continue;
132265557Sjasone		for (sleep = sleep_list; *sleep!= NULL; sleep++)
132365557Sjasone			if (strcmp(m->mtx_description, *sleep) == 0)
132465557Sjasone				goto next;
132572224Sjhb		if (n == 0)
132672224Sjhb			printf("Whee!\n");
132765557Sjasone		printf("%s:%d: %s with \"%s\" locked from %s:%d\n",
132865557Sjasone			file, line, check_only ? "could sleep" : "sleeping",
132965557Sjasone			m->mtx_description,
133065557Sjasone			m->mtx_witness->w_file, m->mtx_witness->w_line);
133165557Sjasone		n++;
133265557Sjasone	next:
133365557Sjasone	}
133467676Sjhb#ifdef DDB
133567676Sjhb	if (witness_ddb && n)
133667676Sjhb		Debugger("witness_sleep");
133767676Sjhb#endif /* DDB */
133865557Sjasone	return (n);
133965557Sjasone}
134065557Sjasone
134165856Sjhbstatic struct witness *
134267404Sjhbenroll(const char *description, int flag)
134365557Sjasone{
134465557Sjasone	int i;
134565856Sjhb	struct witness *w, *w1;
134665557Sjasone	char **ignore;
134765557Sjasone	char **order;
134865557Sjasone
134965557Sjasone	if (!witness_watch)
135065557Sjasone		return (NULL);
135165557Sjasone	for (ignore = ignore_list; *ignore != NULL; ignore++)
135265557Sjasone		if (strcmp(description, *ignore) == 0)
135365557Sjasone			return (NULL);
135465557Sjasone
135565557Sjasone	if (w_inited == 0) {
135671320Sjasone		mtx_init(&w_mtx, "witness lock", MTX_SPIN);
135765557Sjasone		for (i = 0; i < WITNESS_COUNT; i++) {
135865557Sjasone			w = &w_data[i];
135965557Sjasone			witness_free(w);
136065557Sjasone		}
136165557Sjasone		w_inited = 1;
136265557Sjasone		for (order = order_list; *order != NULL; order++) {
136365557Sjasone			w = enroll(*order, MTX_DEF);
136465557Sjasone			w->w_file = "order list";
136565557Sjasone			for (order++; *order != NULL; order++) {
136665557Sjasone				w1 = enroll(*order, MTX_DEF);
136765557Sjasone				w1->w_file = "order list";
136865557Sjasone				itismychild(w, w1);
136965557Sjasone				w = w1;
137065557Sjasone    	    	    	}
137165557Sjasone		}
137265557Sjasone	}
137365557Sjasone	if ((flag & MTX_SPIN) && witness_skipspin)
137465557Sjasone		return (NULL);
137572200Sbmilekic	mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
137665557Sjasone	for (w = w_all; w; w = w->w_next) {
137765557Sjasone		if (strcmp(description, w->w_description) == 0) {
137872200Sbmilekic			mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
137965557Sjasone			return (w);
138065557Sjasone		}
138165557Sjasone	}
138265557Sjasone	if ((w = witness_get()) == NULL)
138365557Sjasone		return (NULL);
138465557Sjasone	w->w_next = w_all;
138565557Sjasone	w_all = w;
138665557Sjasone	w->w_description = description;
138772200Sbmilekic	mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
138865557Sjasone	if (flag & MTX_SPIN) {
138965557Sjasone		w->w_spin = 1;
139065557Sjasone
139165557Sjasone		i = 1;
139265557Sjasone		for (order = spin_order_list; *order != NULL; order++) {
139365557Sjasone			if (strcmp(description, *order) == 0)
139465557Sjasone				break;
139565557Sjasone			i <<= 1;
139665557Sjasone		}
139765557Sjasone		if (*order == NULL)
139865557Sjasone			panic("spin lock %s not in order list", description);
139965557Sjasone		w->w_level = i;
140071560Sjhb	}
140171228Sbmilekic
140265557Sjasone	return (w);
140365557Sjasone}
140465557Sjasone
140565557Sjasonestatic int
140665856Sjhbitismychild(struct witness *parent, struct witness *child)
140765557Sjasone{
140865557Sjasone	static int recursed;
140965557Sjasone
141065557Sjasone	/*
141165557Sjasone	 * Insert "child" after "parent"
141265557Sjasone	 */
141365557Sjasone	while (parent->w_morechildren)
141465557Sjasone		parent = parent->w_morechildren;
141565557Sjasone
141665557Sjasone	if (parent->w_childcnt == WITNESS_NCHILDREN) {
141765557Sjasone		if ((parent->w_morechildren = witness_get()) == NULL)
141865557Sjasone			return (1);
141965557Sjasone		parent = parent->w_morechildren;
142065557Sjasone	}
142167352Sjhb	MPASS(child != NULL);
142265557Sjasone	parent->w_children[parent->w_childcnt++] = child;
142365557Sjasone	/*
142465557Sjasone	 * now prune whole tree
142565557Sjasone	 */
142665557Sjasone	if (recursed)
142765557Sjasone		return (0);
142865557Sjasone	recursed = 1;
142965557Sjasone	for (child = w_all; child != NULL; child = child->w_next) {
143065557Sjasone		for (parent = w_all; parent != NULL;
143165557Sjasone		    parent = parent->w_next) {
143265557Sjasone			if (!isitmychild(parent, child))
143365557Sjasone				continue;
143465557Sjasone			removechild(parent, child);
143565557Sjasone			if (isitmydescendant(parent, child))
143665557Sjasone				continue;
143765557Sjasone			itismychild(parent, child);
143865557Sjasone		}
143965557Sjasone	}
144065557Sjasone	recursed = 0;
144165557Sjasone	witness_levelall();
144265557Sjasone	return (0);
144365557Sjasone}
144465557Sjasone
144565557Sjasonestatic void
144665856Sjhbremovechild(struct witness *parent, struct witness *child)
144765557Sjasone{
144865856Sjhb	struct witness *w, *w1;
144965557Sjasone	int i;
145065557Sjasone
145165557Sjasone	for (w = parent; w != NULL; w = w->w_morechildren)
145265557Sjasone		for (i = 0; i < w->w_childcnt; i++)
145365557Sjasone			if (w->w_children[i] == child)
145465557Sjasone				goto found;
145565557Sjasone	return;
145665557Sjasonefound:
145765557Sjasone	for (w1 = w; w1->w_morechildren != NULL; w1 = w1->w_morechildren)
145865557Sjasone		continue;
145965557Sjasone	w->w_children[i] = w1->w_children[--w1->w_childcnt];
146067352Sjhb	MPASS(w->w_children[i] != NULL);
146165557Sjasone
146265557Sjasone	if (w1->w_childcnt != 0)
146365557Sjasone		return;
146465557Sjasone
146565557Sjasone	if (w1 == parent)
146665557Sjasone		return;
146765557Sjasone	for (w = parent; w->w_morechildren != w1; w = w->w_morechildren)
146865557Sjasone		continue;
146965557Sjasone	w->w_morechildren = 0;
147065557Sjasone	witness_free(w1);
147165557Sjasone}
147265557Sjasone
147365557Sjasonestatic int
147465856Sjhbisitmychild(struct witness *parent, struct witness *child)
147565557Sjasone{
147665856Sjhb	struct witness *w;
147765557Sjasone	int i;
147865557Sjasone
147965557Sjasone	for (w = parent; w != NULL; w = w->w_morechildren) {
148065557Sjasone		for (i = 0; i < w->w_childcnt; i++) {
148165557Sjasone			if (w->w_children[i] == child)
148265557Sjasone				return (1);
148365557Sjasone		}
148465557Sjasone	}
148565557Sjasone	return (0);
148665557Sjasone}
148765557Sjasone
148865557Sjasonestatic int
148965856Sjhbisitmydescendant(struct witness *parent, struct witness *child)
149065557Sjasone{
149165856Sjhb	struct witness *w;
149265557Sjasone	int i;
149365557Sjasone	int j;
149465557Sjasone
149565557Sjasone	for (j = 0, w = parent; w != NULL; w = w->w_morechildren, j++) {
149667352Sjhb		MPASS(j < 1000);
149765557Sjasone		for (i = 0; i < w->w_childcnt; i++) {
149865557Sjasone			if (w->w_children[i] == child)
149965557Sjasone				return (1);
150065557Sjasone		}
150165557Sjasone		for (i = 0; i < w->w_childcnt; i++) {
150265557Sjasone			if (isitmydescendant(w->w_children[i], child))
150365557Sjasone				return (1);
150465557Sjasone		}
150565557Sjasone	}
150665557Sjasone	return (0);
150765557Sjasone}
150865557Sjasone
150965557Sjasonevoid
151065557Sjasonewitness_levelall (void)
151165557Sjasone{
151265856Sjhb	struct witness *w, *w1;
151365557Sjasone
151465557Sjasone	for (w = w_all; w; w = w->w_next)
151571228Sbmilekic		if (!(w->w_spin))
151665557Sjasone			w->w_level = 0;
151765557Sjasone	for (w = w_all; w; w = w->w_next) {
151865557Sjasone		if (w->w_spin)
151965557Sjasone			continue;
152065557Sjasone		for (w1 = w_all; w1; w1 = w1->w_next) {
152165557Sjasone			if (isitmychild(w1, w))
152265557Sjasone				break;
152365557Sjasone		}
152465557Sjasone		if (w1 != NULL)
152565557Sjasone			continue;
152665557Sjasone		witness_leveldescendents(w, 0);
152765557Sjasone	}
152865557Sjasone}
152965557Sjasone
153065557Sjasonestatic void
153165856Sjhbwitness_leveldescendents(struct witness *parent, int level)
153265557Sjasone{
153365557Sjasone	int i;
153465856Sjhb	struct witness *w;
153565557Sjasone
153665557Sjasone	if (parent->w_level < level)
153765557Sjasone		parent->w_level = level;
153865557Sjasone	level++;
153965557Sjasone	for (w = parent; w != NULL; w = w->w_morechildren)
154065557Sjasone		for (i = 0; i < w->w_childcnt; i++)
154165557Sjasone			witness_leveldescendents(w->w_children[i], level);
154265557Sjasone}
154365557Sjasone
154465557Sjasonestatic void
154565856Sjhbwitness_displaydescendants(void(*prnt)(const char *fmt, ...),
154665856Sjhb			   struct witness *parent)
154765557Sjasone{
154865856Sjhb	struct witness *w;
154965557Sjasone	int i;
155072224Sjhb	int level;
155165557Sjasone
155272224Sjhb	level = parent->w_spin ? ffs(parent->w_level) : parent->w_level;
155372224Sjhb
155465557Sjasone	prnt("%d", level);
155565557Sjasone	if (level < 10)
155665557Sjasone		prnt(" ");
155765557Sjasone	for (i = 0; i < level; i++)
155865557Sjasone		prnt(" ");
155965557Sjasone	prnt("%s", parent->w_description);
156072224Sjhb	if (parent->w_file != NULL)
156172224Sjhb		prnt(" -- last acquired @ %s:%d\n", parent->w_file,
156272224Sjhb		    parent->w_line);
156365557Sjasone
156465557Sjasone	for (w = parent; w != NULL; w = w->w_morechildren)
156565557Sjasone		for (i = 0; i < w->w_childcnt; i++)
156665557Sjasone			    witness_displaydescendants(prnt, w->w_children[i]);
156765557Sjasone    }
156865557Sjasone
156965557Sjasonestatic int
157065856Sjhbdup_ok(struct witness *w)
157165557Sjasone{
157265557Sjasone	char **dup;
157365557Sjasone
157465557Sjasone	for (dup = dup_list; *dup!= NULL; dup++)
157565557Sjasone		if (strcmp(w->w_description, *dup) == 0)
157665557Sjasone			return (1);
157765557Sjasone	return (0);
157865557Sjasone}
157965557Sjasone
158065557Sjasonestatic int
158165856Sjhbblessed(struct witness *w1, struct witness *w2)
158265557Sjasone{
158365557Sjasone	int i;
158465856Sjhb	struct witness_blessed *b;
158565557Sjasone
158665557Sjasone	for (i = 0; i < blessed_count; i++) {
158765557Sjasone		b = &blessed_list[i];
158865557Sjasone		if (strcmp(w1->w_description, b->b_lock1) == 0) {
158965557Sjasone			if (strcmp(w2->w_description, b->b_lock2) == 0)
159065557Sjasone				return (1);
159165557Sjasone			continue;
159265557Sjasone		}
159365557Sjasone		if (strcmp(w1->w_description, b->b_lock2) == 0)
159465557Sjasone			if (strcmp(w2->w_description, b->b_lock1) == 0)
159565557Sjasone				return (1);
159665557Sjasone	}
159765557Sjasone	return (0);
159865557Sjasone}
159965557Sjasone
160065856Sjhbstatic struct witness *
160165557Sjasonewitness_get()
160265557Sjasone{
160365856Sjhb	struct witness *w;
160465557Sjasone
160565557Sjasone	if ((w = w_free) == NULL) {
160665557Sjasone		witness_dead = 1;
160772200Sbmilekic		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
160865557Sjasone		printf("witness exhausted\n");
160965557Sjasone		return (NULL);
161065557Sjasone	}
161165557Sjasone	w_free = w->w_next;
161265856Sjhb	bzero(w, sizeof(*w));
161365557Sjasone	return (w);
161465557Sjasone}
161565557Sjasone
161665557Sjasonestatic void
161765856Sjhbwitness_free(struct witness *w)
161865557Sjasone{
161965557Sjasone	w->w_next = w_free;
162065557Sjasone	w_free = w;
162165557Sjasone}
162265557Sjasone
162369881Sjakeint
162465557Sjasonewitness_list(struct proc *p)
162565557Sjasone{
162665856Sjhb	struct mtx *m;
162769881Sjake	int nheld;
162865557Sjasone
162971320Sjasone	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
163069881Sjake	nheld = 0;
163172224Sjhb	LIST_FOREACH(m, &p->p_heldmtx, mtx_held) {
163265557Sjasone		printf("\t\"%s\" (%p) locked at %s:%d\n",
163365557Sjasone		    m->mtx_description, m,
163465557Sjasone		    m->mtx_witness->w_file, m->mtx_witness->w_line);
163569881Sjake		nheld++;
163665557Sjasone	}
163769881Sjake
163869881Sjake	return (nheld);
163965557Sjasone}
164065557Sjasone
164171709Sjhb#ifdef DDB
164271709Sjhb
164372224SjhbDB_SHOW_COMMAND(mutexes, db_witness_list)
164471709Sjhb{
164571709Sjhb
164672393Sbmilekic	witness_list(curproc);
164771709Sjhb}
164871709Sjhb
164972224SjhbDB_SHOW_COMMAND(witness, db_witness_display)
165072224Sjhb{
165172224Sjhb
165272224Sjhb	witness_display(db_printf);
165372224Sjhb}
165471709Sjhb#endif
165571709Sjhb
165665557Sjasonevoid
165765856Sjhbwitness_save(struct mtx *m, const char **filep, int *linep)
165865557Sjasone{
165971320Sjasone
166071320Sjasone	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
166171352Sjasone	if (m->mtx_witness == NULL)
166271352Sjasone		return;
166371352Sjasone
166465557Sjasone	*filep = m->mtx_witness->w_file;
166565557Sjasone	*linep = m->mtx_witness->w_line;
166665557Sjasone}
166765557Sjasone
166865557Sjasonevoid
166965856Sjhbwitness_restore(struct mtx *m, const char *file, int line)
167065557Sjasone{
167171320Sjasone
167271320Sjasone	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
167371352Sjasone	if (m->mtx_witness == NULL)
167471352Sjasone		return;
167571352Sjasone
167665557Sjasone	m->mtx_witness->w_file = file;
167765557Sjasone	m->mtx_witness->w_line = line;
167865557Sjasone}
167965557Sjasone
168069429Sjhb#endif	/* WITNESS */
1681