subr_turnstile.c revision 72344
165557Sjasone/*-
265557Sjasone * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
365557Sjasone *
465557Sjasone * Redistribution and use in source and binary forms, with or without
565557Sjasone * modification, are permitted provided that the following conditions
665557Sjasone * are met:
765557Sjasone * 1. Redistributions of source code must retain the above copyright
865557Sjasone *    notice, this list of conditions and the following disclaimer.
965557Sjasone * 2. Redistributions in binary form must reproduce the above copyright
1065557Sjasone *    notice, this list of conditions and the following disclaimer in the
1165557Sjasone *    documentation and/or other materials provided with the distribution.
1265557Sjasone * 3. Berkeley Software Design Inc's name may not be used to endorse or
1365557Sjasone *    promote products derived from this software without specific prior
1465557Sjasone *    written permission.
1565557Sjasone *
1665557Sjasone * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
1765557Sjasone * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1865557Sjasone * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1965557Sjasone * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
2065557Sjasone * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2165557Sjasone * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2265557Sjasone * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2365557Sjasone * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2465557Sjasone * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2565557Sjasone * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2665557Sjasone * SUCH DAMAGE.
2765557Sjasone *
2865557Sjasone *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
2967352Sjhb *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
3065557Sjasone * $FreeBSD: head/sys/kern/subr_turnstile.c 72344 2001-02-11 02:54:16Z bmilekic $
3165557Sjasone */
3265557Sjasone
3365557Sjasone/*
3472200Sbmilekic * Machine independent bits of mutex implementation and implementation of
3572200Sbmilekic * `witness' structure & related debugging routines.
3672200Sbmilekic */
3772200Sbmilekic
3872200Sbmilekic/*
3965557Sjasone *	Main Entry: witness
4065557Sjasone *	Pronunciation: 'wit-n&s
4165557Sjasone *	Function: noun
4265557Sjasone *	Etymology: Middle English witnesse, from Old English witnes knowledge,
4365557Sjasone *	    testimony, witness, from 2wit
4465557Sjasone *	Date: before 12th century
4565557Sjasone *	1 : attestation of a fact or event : TESTIMONY
4665557Sjasone *	2 : one that gives evidence; specifically : one who testifies in
4765557Sjasone *	    a cause or before a judicial tribunal
4865557Sjasone *	3 : one asked to be present at a transaction so as to be able to
4965557Sjasone *	    testify to its having taken place
5065557Sjasone *	4 : one who has personal knowledge of something
5165557Sjasone *	5 a : something serving as evidence or proof : SIGN
5265557Sjasone *	  b : public affirmation by word or example of usually
5365557Sjasone *	      religious faith or conviction <the heroic witness to divine
5465557Sjasone *	      life -- Pilot>
5565557Sjasone *	6 capitalized : a member of the Jehovah's Witnesses
5665557Sjasone */
5765557Sjasone
5868790Sjhb#include "opt_ddb.h"
5967676Sjhb#include "opt_witness.h"
6067676Sjhb
6165557Sjasone#include <sys/param.h>
6267352Sjhb#include <sys/bus.h>
6367352Sjhb#include <sys/kernel.h>
6467352Sjhb#include <sys/malloc.h>
6565557Sjasone#include <sys/proc.h>
6667676Sjhb#include <sys/sysctl.h>
6765557Sjasone#include <sys/systm.h>
6867352Sjhb#include <sys/vmmeter.h>
6965557Sjasone#include <sys/ktr.h>
7065557Sjasone
7167352Sjhb#include <machine/atomic.h>
7267352Sjhb#include <machine/bus.h>
7367352Sjhb#include <machine/clock.h>
7465557Sjasone#include <machine/cpu.h>
7567352Sjhb
7668790Sjhb#include <ddb/ddb.h>
7768790Sjhb
7867352Sjhb#include <vm/vm.h>
7967352Sjhb#include <vm/vm_extern.h>
8067352Sjhb
8167352Sjhb#include <sys/mutex.h>
8265557Sjasone
8365557Sjasone/*
8472200Sbmilekic * The WITNESS-enabled mutex debug structure.
8567352Sjhb */
8671352Sjasone#ifdef WITNESS
8771352Sjasonestruct mtx_debug {
8871352Sjasone	struct witness	*mtxd_witness;
8971352Sjasone	LIST_ENTRY(mtx)	mtxd_held;
9071352Sjasone	const char	*mtxd_file;
9171352Sjasone	int		mtxd_line;
9271352Sjasone};
9371352Sjasone
9471560Sjhb#define mtx_held	mtx_debug->mtxd_held
9571560Sjhb#define	mtx_file	mtx_debug->mtxd_file
9671560Sjhb#define	mtx_line	mtx_debug->mtxd_line
9771560Sjhb#define	mtx_witness	mtx_debug->mtxd_witness
9871352Sjasone#endif	/* WITNESS */
9971352Sjasone
10071352Sjasone/*
10172200Sbmilekic * Internal utility macros.
10271352Sjasone */
10372200Sbmilekic#define mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
10471352Sjasone
10572200Sbmilekic#define mtx_owner(m)	(mtx_unowned((m)) ? NULL \
10672200Sbmilekic	: (struct proc *)((m)->mtx_lock & MTX_FLAGMASK))
10771352Sjasone
10872200Sbmilekic#define RETIP(x)		*(((uintptr_t *)(&x)) - 1)
10972200Sbmilekic#define SET_PRIO(p, pri)	(p)->p_priority = (pri)
11071352Sjasone
11171352Sjasone/*
11272200Sbmilekic * Early WITNESS-enabled declarations.
11371352Sjasone */
11472200Sbmilekic#ifdef WITNESS
11571352Sjasone
11671352Sjasone/*
11772200Sbmilekic * Internal WITNESS routines which must be prototyped early.
11872200Sbmilekic *
11972200Sbmilekic * XXX: When/if witness code is cleaned up, it would be wise to place all
12072200Sbmilekic *	witness prototyping early in this file.
12172200Sbmilekic */
12272200Sbmilekicstatic void witness_init(struct mtx *, int flag);
12372200Sbmilekicstatic void witness_destroy(struct mtx *);
12472200Sbmilekicstatic void witness_display(void(*)(const char *fmt, ...));
12571352Sjasone
12672200SbmilekicMALLOC_DEFINE(M_WITNESS, "witness", "witness mtx_debug structure");
12771352Sjasone
12867352Sjhb/* All mutexes in system (used for debug/panic) */
12971560Sjhbstatic struct mtx_debug all_mtx_debug = { NULL, {NULL, NULL}, NULL, 0 };
13072200Sbmilekic
13171320Sjasone/*
13272200Sbmilekic * This global is set to 0 once it becomes safe to use the witness code.
13371320Sjasone */
13471320Sjasonestatic int witness_cold = 1;
13572200Sbmilekic
13669429Sjhb#else	/* WITNESS */
13771352Sjasone
13872200Sbmilekic/* XXX XXX XXX
13972200Sbmilekic * flag++ is sleazoid way of shuting up warning
14071352Sjasone */
14171352Sjasone#define witness_init(m, flag) flag++
14271352Sjasone#define witness_destroy(m)
14371352Sjasone#define witness_try_enter(m, t, f, l)
14469429Sjhb#endif	/* WITNESS */
14567352Sjhb
14672200Sbmilekic/*
14772200Sbmilekic * All mutex locks in system are kept on the all_mtx list.
14872200Sbmilekic */
14971560Sjhbstatic struct mtx all_mtx = { MTX_UNOWNED, 0, 0, 0, "All mutexes queue head",
15071560Sjhb	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
15171560Sjhb	{ NULL, NULL }, &all_mtx, &all_mtx,
15271560Sjhb#ifdef WITNESS
15371560Sjhb	&all_mtx_debug
15471560Sjhb#else
15571560Sjhb	NULL
15671560Sjhb#endif
15771560Sjhb	 };
15871560Sjhb
15972200Sbmilekic/*
16072200Sbmilekic * Global variables for book keeping.
16172200Sbmilekic */
16267352Sjhbstatic int	mtx_cur_cnt;
16367352Sjhbstatic int	mtx_max_cnt;
16467352Sjhb
16572200Sbmilekic/*
16672344Sbmilekic * Couple of strings for KTR_LOCK tracing in order to avoid duplicates.
16772344Sbmilekic */
16872344Sbmilekicchar	STR_mtx_lock_slp[] = "GOT (sleep) %s [%p] r=%d at %s:%d";
16972344Sbmilekicchar	STR_mtx_unlock_slp[] = "REL (sleep) %s [%p] r=%d at %s:%d";
17072344Sbmilekicchar	STR_mtx_lock_spn[] = "GOT (spin) %s [%p] r=%d at %s:%d";
17172344Sbmilekicchar	STR_mtx_unlock_spn[] = "REL (spin) %s [%p] r=%d at %s:%d";
17272344Sbmilekic
17372344Sbmilekic/*
17472200Sbmilekic * Prototypes for non-exported routines.
17572200Sbmilekic *
17672200Sbmilekic * NOTE: Prototypes for witness routines are placed at the bottom of the file.
17772200Sbmilekic */
17871352Sjasonestatic void	propagate_priority(struct proc *);
17967352Sjhb
18067352Sjhbstatic void
18167352Sjhbpropagate_priority(struct proc *p)
18267352Sjhb{
18367352Sjhb	int pri = p->p_priority;
18467352Sjhb	struct mtx *m = p->p_blocked;
18567352Sjhb
18669376Sjhb	mtx_assert(&sched_lock, MA_OWNED);
18767352Sjhb	for (;;) {
18867352Sjhb		struct proc *p1;
18967352Sjhb
19067352Sjhb		p = mtx_owner(m);
19167352Sjhb
19267352Sjhb		if (p == NULL) {
19367352Sjhb			/*
19467352Sjhb			 * This really isn't quite right. Really
19567352Sjhb			 * ought to bump priority of process that
19667352Sjhb			 * next acquires the mutex.
19767352Sjhb			 */
19867352Sjhb			MPASS(m->mtx_lock == MTX_CONTESTED);
19967352Sjhb			return;
20067352Sjhb		}
20172200Sbmilekic
20267352Sjhb		MPASS(p->p_magic == P_MAGIC);
20369376Sjhb		KASSERT(p->p_stat != SSLEEP, ("sleeping process owns a mutex"));
20467352Sjhb		if (p->p_priority <= pri)
20567352Sjhb			return;
20669376Sjhb
20767352Sjhb		/*
20869376Sjhb		 * Bump this process' priority.
20969376Sjhb		 */
21069376Sjhb		SET_PRIO(p, pri);
21169376Sjhb
21269376Sjhb		/*
21367352Sjhb		 * If lock holder is actually running, just bump priority.
21467352Sjhb		 */
21569376Sjhb#ifdef SMP
21669376Sjhb		/*
21769376Sjhb		 * For SMP, we can check the p_oncpu field to see if we are
21869376Sjhb		 * running.
21969376Sjhb		 */
22069376Sjhb		if (p->p_oncpu != 0xff) {
22167352Sjhb			MPASS(p->p_stat == SRUN || p->p_stat == SZOMB);
22267352Sjhb			return;
22367352Sjhb		}
22469376Sjhb#else
22567352Sjhb		/*
22669376Sjhb		 * For UP, we check to see if p is curproc (this shouldn't
22769376Sjhb		 * ever happen however as it would mean we are in a deadlock.)
22869376Sjhb		 */
22969376Sjhb		if (p == curproc) {
23069376Sjhb			panic("Deadlock detected");
23169376Sjhb			return;
23269376Sjhb		}
23369376Sjhb#endif
23469376Sjhb		/*
23567352Sjhb		 * If on run queue move to new run queue, and
23667352Sjhb		 * quit.
23767352Sjhb		 */
23867352Sjhb		if (p->p_stat == SRUN) {
23972200Sbmilekic			printf("XXX: moving proc %d(%s) to a new run queue\n",
24069376Sjhb			       p->p_pid, p->p_comm);
24167352Sjhb			MPASS(p->p_blocked == NULL);
24267352Sjhb			remrunqueue(p);
24367352Sjhb			setrunqueue(p);
24467352Sjhb			return;
24567352Sjhb		}
24667352Sjhb
24767352Sjhb		/*
24869376Sjhb		 * If we aren't blocked on a mutex, we should be.
24967352Sjhb		 */
25069376Sjhb		KASSERT(p->p_stat == SMTX, (
25169376Sjhb		    "process %d(%s):%d holds %s but isn't blocked on a mutex\n",
25269376Sjhb		    p->p_pid, p->p_comm, p->p_stat,
25369376Sjhb		    m->mtx_description));
25467352Sjhb
25567352Sjhb		/*
25667352Sjhb		 * Pick up the mutex that p is blocked on.
25767352Sjhb		 */
25867352Sjhb		m = p->p_blocked;
25967352Sjhb		MPASS(m != NULL);
26067352Sjhb
26167352Sjhb		printf("XXX: process %d(%s) is blocked on %s\n", p->p_pid,
26267352Sjhb		    p->p_comm, m->mtx_description);
26372200Sbmilekic
26467352Sjhb		/*
26567352Sjhb		 * Check if the proc needs to be moved up on
26667352Sjhb		 * the blocked chain
26767352Sjhb		 */
26869376Sjhb		if (p == TAILQ_FIRST(&m->mtx_blocked)) {
26969376Sjhb			printf("XXX: process at head of run queue\n");
27069376Sjhb			continue;
27169376Sjhb		}
27272200Sbmilekic
27369376Sjhb		p1 = TAILQ_PREV(p, rq, p_procq);
27469376Sjhb		if (p1->p_priority <= pri) {
27569376Sjhb			printf(
27672200Sbmilekic			   "XXX: previous process %d(%s) has higher priority\n",
27769376Sjhb	                    p->p_pid, p->p_comm);
27867352Sjhb			continue;
27967352Sjhb		}
28067352Sjhb
28167352Sjhb		/*
28269376Sjhb		 * Remove proc from blocked chain and determine where
28369376Sjhb		 * it should be moved up to.  Since we know that p1 has
28469376Sjhb		 * a lower priority than p, we know that at least one
28569376Sjhb		 * process in the chain has a lower priority and that
28669376Sjhb		 * p1 will thus not be NULL after the loop.
28767352Sjhb		 */
28867352Sjhb		TAILQ_REMOVE(&m->mtx_blocked, p, p_procq);
28967352Sjhb		TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) {
29067352Sjhb			MPASS(p1->p_magic == P_MAGIC);
29167352Sjhb			if (p1->p_priority > pri)
29267352Sjhb				break;
29367352Sjhb		}
29472200Sbmilekic
29569376Sjhb		MPASS(p1 != NULL);
29669376Sjhb		TAILQ_INSERT_BEFORE(p1, p, p_procq);
29767352Sjhb		CTR4(KTR_LOCK,
29871560Sjhb		    "propagate_priority: p %p moved before %p on [%p] %s",
29967352Sjhb		    p, p1, m, m->mtx_description);
30067352Sjhb	}
30167352Sjhb}
30267352Sjhb
30371352Sjasone/*
30472200Sbmilekic * The important part of mtx_trylock{,_flags}()
30572200Sbmilekic * Tries to acquire lock `m.' We do NOT handle recursion here; we assume that
30672200Sbmilekic * if we're called, it's because we know we don't already own this lock.
30771352Sjasone */
30872200Sbmilekicint
30972200Sbmilekic_mtx_trylock(struct mtx *m, int opts, const char *file, int line)
31071352Sjasone{
31172200Sbmilekic	int rval;
31271352Sjasone
31372344Sbmilekic	MPASS(CURPROC != NULL);
31471352Sjasone
31572200Sbmilekic	/*
31672200Sbmilekic	 * _mtx_trylock does not accept MTX_NOSWITCH option.
31772200Sbmilekic	 */
31872344Sbmilekic	KASSERT((opts & MTX_NOSWITCH) == 0,
31972344Sbmilekic	    ("mtx_trylock() called with invalid option flag(s) %d", opts));
32072200Sbmilekic
32172200Sbmilekic	rval = _obtain_lock(m, CURTHD);
32272200Sbmilekic
32372200Sbmilekic#ifdef WITNESS
32472200Sbmilekic	if (rval && m->mtx_witness != NULL) {
32571352Sjasone		/*
32672200Sbmilekic		 * We do not handle recursion in _mtx_trylock; see the
32772200Sbmilekic		 * note at the top of the routine.
32871352Sjasone		 */
32972344Sbmilekic		KASSERT(!mtx_recursed(m),
33072344Sbmilekic		    ("mtx_trylock() called on a recursed mutex"));
33172200Sbmilekic		witness_try_enter(m, (opts | m->mtx_flags), file, line);
33271352Sjasone	}
33372200Sbmilekic#endif	/* WITNESS */
33471352Sjasone
33572200Sbmilekic	if ((opts & MTX_QUIET) == 0)
33672200Sbmilekic		CTR5(KTR_LOCK, "TRY_ENTER %s [%p] result=%d at %s:%d",
33772200Sbmilekic		    m->mtx_description, m, rval, file, line);
33872200Sbmilekic
33972200Sbmilekic	return rval;
34071352Sjasone}
34171352Sjasone
34271352Sjasone/*
34372200Sbmilekic * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
34471352Sjasone *
34572200Sbmilekic * We call this if the lock is either contested (i.e. we need to go to
34672200Sbmilekic * sleep waiting for it), or if we need to recurse on it.
34771352Sjasone */
34872200Sbmilekicvoid
34972200Sbmilekic_mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line)
35071352Sjasone{
35172200Sbmilekic	struct proc *p = CURPROC;
35271352Sjasone
35372200Sbmilekic	if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)p) {
35472200Sbmilekic		m->mtx_recurse++;
35572200Sbmilekic		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
35672200Sbmilekic		if ((opts & MTX_QUIET) == 0)
35772344Sbmilekic			CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
35872200Sbmilekic		return;
35971352Sjasone	}
36071352Sjasone
36172200Sbmilekic	if ((opts & MTX_QUIET) == 0)
36272344Sbmilekic		CTR3(KTR_LOCK, "_mtx_lock_sleep: %p contested (lock=%p) [%p]",
36372344Sbmilekic		    m, (void *)m->mtx_lock, (void *)RETIP(m));
36471352Sjasone
36572200Sbmilekic	/*
36672200Sbmilekic	 * Save our priority. Even though p_nativepri is protected by
36772200Sbmilekic	 * sched_lock, we don't obtain it here as it can be expensive.
36872200Sbmilekic	 * Since this is the only place p_nativepri is set, and since two
36972200Sbmilekic	 * CPUs will not be executing the same process concurrently, we know
37072200Sbmilekic	 * that no other CPU is going to be messing with this. Also,
37172200Sbmilekic	 * p_nativepri is only read when we are blocked on a mutex, so that
37272200Sbmilekic	 * can't be happening right now either.
37372200Sbmilekic	 */
37472200Sbmilekic	p->p_nativepri = p->p_priority;
37571352Sjasone
37672200Sbmilekic	while (!_obtain_lock(m, p)) {
37772200Sbmilekic		uintptr_t v;
37872200Sbmilekic		struct proc *p1;
37971352Sjasone
38072200Sbmilekic		mtx_lock_spin(&sched_lock);
38172200Sbmilekic		/*
38272200Sbmilekic		 * Check if the lock has been released while spinning for
38372200Sbmilekic		 * the sched_lock.
38472200Sbmilekic		 */
38572200Sbmilekic		if ((v = m->mtx_lock) == MTX_UNOWNED) {
38672200Sbmilekic			mtx_unlock_spin(&sched_lock);
38772200Sbmilekic			continue;
38871352Sjasone		}
38971352Sjasone
39072200Sbmilekic		/*
39172200Sbmilekic		 * The mutex was marked contested on release. This means that
39272200Sbmilekic		 * there are processes blocked on it.
39372200Sbmilekic		 */
39472200Sbmilekic		if (v == MTX_CONTESTED) {
39572200Sbmilekic			p1 = TAILQ_FIRST(&m->mtx_blocked);
39672344Sbmilekic			MPASS(p1 != NULL);
39772200Sbmilekic			m->mtx_lock = (uintptr_t)p | MTX_CONTESTED;
39867352Sjhb
39972200Sbmilekic			if (p1->p_priority < p->p_priority)
40072200Sbmilekic				SET_PRIO(p, p1->p_priority);
40172200Sbmilekic			mtx_unlock_spin(&sched_lock);
40267352Sjhb			return;
40367352Sjhb		}
40469376Sjhb
40569376Sjhb		/*
40672200Sbmilekic		 * If the mutex isn't already contested and a failure occurs
40772200Sbmilekic		 * setting the contested bit, the mutex was either released
40872200Sbmilekic		 * or the state of the MTX_RECURSED bit changed.
40969376Sjhb		 */
41072200Sbmilekic		if ((v & MTX_CONTESTED) == 0 &&
41172200Sbmilekic		    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
41272200Sbmilekic			(void *)(v | MTX_CONTESTED))) {
41372200Sbmilekic			mtx_unlock_spin(&sched_lock);
41472200Sbmilekic			continue;
41572200Sbmilekic		}
41667352Sjhb
41772200Sbmilekic		/*
41872200Sbmilekic		 * We deffinately must sleep for this lock.
41972200Sbmilekic		 */
42072200Sbmilekic		mtx_assert(m, MA_NOTOWNED);
42167352Sjhb
42267352Sjhb#ifdef notyet
42372200Sbmilekic		/*
42472200Sbmilekic		 * If we're borrowing an interrupted thread's VM context, we
42572200Sbmilekic		 * must clean up before going to sleep.
42672200Sbmilekic		 */
42772200Sbmilekic		if (p->p_flag & (P_ITHD | P_SITHD)) {
42872200Sbmilekic			ithd_t *it = (ithd_t *)p;
42967352Sjhb
43072200Sbmilekic			if (it->it_interrupted) {
43172200Sbmilekic				if ((opts & MTX_QUIET) == 0)
43272200Sbmilekic					CTR2(KTR_LOCK,
43372344Sbmilekic				    "_mtx_lock_sleep: 0x%x interrupted 0x%x",
43472200Sbmilekic					    it, it->it_interrupted);
43572200Sbmilekic				intr_thd_fixup(it);
43667352Sjhb			}
43772200Sbmilekic		}
43867352Sjhb#endif
43967352Sjhb
44072200Sbmilekic		/*
44172200Sbmilekic		 * Put us on the list of threads blocked on this mutex.
44272200Sbmilekic		 */
44372200Sbmilekic		if (TAILQ_EMPTY(&m->mtx_blocked)) {
44472200Sbmilekic			p1 = (struct proc *)(m->mtx_lock & MTX_FLAGMASK);
44572200Sbmilekic			LIST_INSERT_HEAD(&p1->p_contested, m, mtx_contested);
44672200Sbmilekic			TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
44772200Sbmilekic		} else {
44872200Sbmilekic			TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq)
44972200Sbmilekic				if (p1->p_priority > p->p_priority)
45072200Sbmilekic					break;
45172200Sbmilekic			if (p1)
45272200Sbmilekic				TAILQ_INSERT_BEFORE(p1, p, p_procq);
45372200Sbmilekic			else
45467352Sjhb				TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
45572200Sbmilekic		}
45667352Sjhb
45772200Sbmilekic		/*
45872200Sbmilekic		 * Save who we're blocked on.
45972200Sbmilekic		 */
46072200Sbmilekic		p->p_blocked = m;
46172200Sbmilekic		p->p_mtxname = m->mtx_description;
46272200Sbmilekic		p->p_stat = SMTX;
46367352Sjhb#if 0
46472200Sbmilekic		propagate_priority(p);
46567352Sjhb#endif
46667352Sjhb
46772200Sbmilekic		if ((opts & MTX_QUIET) == 0)
46872200Sbmilekic			CTR3(KTR_LOCK,
46972200Sbmilekic			    "_mtx_lock_sleep: p %p blocked on [%p] %s", p, m,
47072200Sbmilekic			    m->mtx_description);
47172200Sbmilekic
47272200Sbmilekic		mi_switch();
47372200Sbmilekic
47472200Sbmilekic		if ((opts & MTX_QUIET) == 0)
47572200Sbmilekic			CTR3(KTR_LOCK,
47672200Sbmilekic			  "_mtx_lock_sleep: p %p free from blocked on [%p] %s",
47772200Sbmilekic			  p, m, m->mtx_description);
47872200Sbmilekic
47972200Sbmilekic		mtx_unlock_spin(&sched_lock);
48072200Sbmilekic	}
48172200Sbmilekic
48272200Sbmilekic	return;
48372200Sbmilekic}
48472200Sbmilekic
48572200Sbmilekic/*
48672200Sbmilekic * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock.
48772200Sbmilekic *
48872200Sbmilekic * This is only called if we need to actually spin for the lock. Recursion
48972200Sbmilekic * is handled inline.
49072200Sbmilekic */
49172200Sbmilekicvoid
49272200Sbmilekic_mtx_lock_spin(struct mtx *m, int opts, u_int mtx_intr, const char *file,
49372200Sbmilekic	       int line)
49472200Sbmilekic{
49572200Sbmilekic	int i = 0;
49672200Sbmilekic
49772200Sbmilekic	if ((opts & MTX_QUIET) == 0)
49872344Sbmilekic		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
49972200Sbmilekic
50072200Sbmilekic	for (;;) {
50172200Sbmilekic		if (_obtain_lock(m, CURPROC))
50272200Sbmilekic			break;
50372200Sbmilekic
50472200Sbmilekic		while (m->mtx_lock != MTX_UNOWNED) {
50572200Sbmilekic			if (i++ < 1000000)
50672200Sbmilekic				continue;
50772200Sbmilekic			if (i++ < 6000000)
50872200Sbmilekic				DELAY(1);
50967352Sjhb#ifdef DDB
51072200Sbmilekic			else if (!db_active)
51167352Sjhb#else
51272200Sbmilekic			else
51367352Sjhb#endif
51472200Sbmilekic			panic("spin lock %s held by %p for > 5 seconds",
51572200Sbmilekic			    m->mtx_description, (void *)m->mtx_lock);
51667352Sjhb		}
51767352Sjhb	}
51872200Sbmilekic
51972200Sbmilekic	m->mtx_saveintr = mtx_intr;
52072200Sbmilekic	if ((opts & MTX_QUIET) == 0)
52172200Sbmilekic		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
52272200Sbmilekic
52372200Sbmilekic	return;
52467352Sjhb}
52567352Sjhb
52672200Sbmilekic/*
52772200Sbmilekic * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
52872200Sbmilekic *
52972200Sbmilekic * We are only called here if the lock is recursed or contested (i.e. we
53072200Sbmilekic * need to wake up a blocked thread).
53172200Sbmilekic */
53267352Sjhbvoid
53372200Sbmilekic_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
53467352Sjhb{
53567352Sjhb	struct proc *p, *p1;
53667352Sjhb	struct mtx *m1;
53767352Sjhb	int pri;
53867352Sjhb
53967352Sjhb	p = CURPROC;
54072200Sbmilekic	MPASS4(mtx_owned(m), "mtx_owned(mpp)", file, line);
54172200Sbmilekic
54272200Sbmilekic	if (mtx_recursed(m)) {
54372200Sbmilekic		if (--(m->mtx_recurse) == 0)
54472200Sbmilekic			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
54572200Sbmilekic		if ((opts & MTX_QUIET) == 0)
54672200Sbmilekic			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m);
54772200Sbmilekic		return;
54872200Sbmilekic	}
54972200Sbmilekic
55072200Sbmilekic	mtx_lock_spin(&sched_lock);
55172200Sbmilekic	if ((opts & MTX_QUIET) == 0)
55272200Sbmilekic		CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
55372200Sbmilekic
55472200Sbmilekic	p1 = TAILQ_FIRST(&m->mtx_blocked);
55572200Sbmilekic	MPASS(p->p_magic == P_MAGIC);
55672200Sbmilekic	MPASS(p1->p_magic == P_MAGIC);
55772200Sbmilekic
55872200Sbmilekic	TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq);
55972200Sbmilekic
56072200Sbmilekic	if (TAILQ_EMPTY(&m->mtx_blocked)) {
56172200Sbmilekic		LIST_REMOVE(m, mtx_contested);
56272200Sbmilekic		_release_lock_quick(m);
56372200Sbmilekic		if ((opts & MTX_QUIET) == 0)
56472200Sbmilekic			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p not held", m);
56572200Sbmilekic	} else
56672200Sbmilekic		atomic_store_rel_ptr(&m->mtx_lock, (void *)MTX_CONTESTED);
56772200Sbmilekic
56872200Sbmilekic	pri = MAXPRI;
56972200Sbmilekic	LIST_FOREACH(m1, &p->p_contested, mtx_contested) {
57072200Sbmilekic		int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_priority;
57172200Sbmilekic		if (cp < pri)
57272200Sbmilekic			pri = cp;
57372200Sbmilekic	}
57472200Sbmilekic
57572200Sbmilekic	if (pri > p->p_nativepri)
57672200Sbmilekic		pri = p->p_nativepri;
57772200Sbmilekic	SET_PRIO(p, pri);
57872200Sbmilekic
57972200Sbmilekic	if ((opts & MTX_QUIET) == 0)
58072200Sbmilekic		CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p contested setrunqueue %p",
58172200Sbmilekic		    m, p1);
58272200Sbmilekic
58372200Sbmilekic	p1->p_blocked = NULL;
58472200Sbmilekic	p1->p_mtxname = NULL;
58572200Sbmilekic	p1->p_stat = SRUN;
58672200Sbmilekic	setrunqueue(p1);
58772200Sbmilekic
58872200Sbmilekic	if ((opts & MTX_NOSWITCH) == 0 && p1->p_priority < pri) {
58967352Sjhb#ifdef notyet
59072200Sbmilekic		if (p->p_flag & (P_ITHD | P_SITHD)) {
59172200Sbmilekic			ithd_t *it = (ithd_t *)p;
59267352Sjhb
59372200Sbmilekic			if (it->it_interrupted) {
59472200Sbmilekic				if ((opts & MTX_QUIET) == 0)
59572200Sbmilekic					CTR2(KTR_LOCK,
59672200Sbmilekic				    "_mtx_unlock_sleep: 0x%x interrupted 0x%x",
59772200Sbmilekic					    it, it->it_interrupted);
59872200Sbmilekic				intr_thd_fixup(it);
59967352Sjhb			}
60072200Sbmilekic		}
60167352Sjhb#endif
60272200Sbmilekic		setrunqueue(p);
60372200Sbmilekic		if ((opts & MTX_QUIET) == 0)
60472200Sbmilekic			CTR2(KTR_LOCK,
60572200Sbmilekic			    "_mtx_unlock_sleep: %p switching out lock=%p", m,
60672200Sbmilekic			    (void *)m->mtx_lock);
60772200Sbmilekic
60872200Sbmilekic		mi_switch();
60972200Sbmilekic		if ((opts & MTX_QUIET) == 0)
61072200Sbmilekic			CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p",
61172200Sbmilekic			    m, (void *)m->mtx_lock);
61267352Sjhb	}
61372200Sbmilekic
61472200Sbmilekic	mtx_unlock_spin(&sched_lock);
61572200Sbmilekic
61672200Sbmilekic	return;
61767352Sjhb}
61867352Sjhb
61972200Sbmilekic/*
62072200Sbmilekic * All the unlocking of MTX_SPIN locks is done inline.
62172200Sbmilekic * See the _rel_spin_lock() macro for the details.
62272200Sbmilekic */
62372200Sbmilekic
62472200Sbmilekic/*
62572200Sbmilekic * The INVARIANTS-enabled mtx_assert()
62672200Sbmilekic */
62771352Sjasone#ifdef INVARIANTS
62871352Sjasonevoid
62971360Sjasone_mtx_assert(struct mtx *m, int what, const char *file, int line)
63071352Sjasone{
63171352Sjasone	switch ((what)) {
63271352Sjasone	case MA_OWNED:
63371352Sjasone	case MA_OWNED | MA_RECURSED:
63471352Sjasone	case MA_OWNED | MA_NOTRECURSED:
63571352Sjasone		if (!mtx_owned((m)))
63671352Sjasone			panic("mutex %s not owned at %s:%d",
63771360Sjasone			    (m)->mtx_description, file, line);
63871352Sjasone		if (mtx_recursed((m))) {
63971352Sjasone			if (((what) & MA_NOTRECURSED) != 0)
64071352Sjasone				panic("mutex %s recursed at %s:%d",
64171360Sjasone				    (m)->mtx_description, file, line);
64271352Sjasone		} else if (((what) & MA_RECURSED) != 0) {
64371352Sjasone			panic("mutex %s unrecursed at %s:%d",
64471360Sjasone			    (m)->mtx_description, file, line);
64571352Sjasone		}
64671352Sjasone		break;
64771352Sjasone	case MA_NOTOWNED:
64871352Sjasone		if (mtx_owned((m)))
64971352Sjasone			panic("mutex %s owned at %s:%d",
65071360Sjasone			    (m)->mtx_description, file, line);
65171352Sjasone		break;
65271352Sjasone	default:
65371360Sjasone		panic("unknown mtx_assert at %s:%d", file, line);
65471352Sjasone	}
65571352Sjasone}
65671352Sjasone#endif
65771352Sjasone
65872200Sbmilekic/*
65972200Sbmilekic * The MUTEX_DEBUG-enabled mtx_validate()
66072200Sbmilekic */
66167352Sjhb#define MV_DESTROY	0	/* validate before destory */
66267352Sjhb#define MV_INIT		1	/* validate before init */
66367352Sjhb
66467352Sjhb#ifdef MUTEX_DEBUG
66567352Sjhb
66667352Sjhbint mtx_validate __P((struct mtx *, int));
66767352Sjhb
66867352Sjhbint
66967352Sjhbmtx_validate(struct mtx *m, int when)
67067352Sjhb{
67167352Sjhb	struct mtx *mp;
67267352Sjhb	int i;
67367352Sjhb	int retval = 0;
67467352Sjhb
67571320Sjasone#ifdef WITNESS
67671320Sjasone	if (witness_cold)
67771320Sjasone		return 0;
67871320Sjasone#endif
67967352Sjhb	if (m == &all_mtx || cold)
68067352Sjhb		return 0;
68167352Sjhb
68272200Sbmilekic	mtx_lock(&all_mtx);
68367352Sjhb/*
68467352Sjhb * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
68567352Sjhb * we can re-enable the kernacc() checks.
68667352Sjhb */
68767352Sjhb#ifndef __alpha__
68867352Sjhb	MPASS(kernacc((caddr_t)all_mtx.mtx_next, sizeof(uintptr_t),
68967352Sjhb	    VM_PROT_READ) == 1);
69067352Sjhb#endif
69167352Sjhb	MPASS(all_mtx.mtx_next->mtx_prev == &all_mtx);
69267352Sjhb	for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
69367352Sjhb#ifndef __alpha__
69467352Sjhb		if (kernacc((caddr_t)mp->mtx_next, sizeof(uintptr_t),
69567352Sjhb		    VM_PROT_READ) != 1) {
69667352Sjhb			panic("mtx_validate: mp=%p mp->mtx_next=%p",
69767352Sjhb			    mp, mp->mtx_next);
69867352Sjhb		}
69967352Sjhb#endif
70067352Sjhb		i++;
70167352Sjhb		if (i > mtx_cur_cnt) {
70267352Sjhb			panic("mtx_validate: too many in chain, known=%d\n",
70367352Sjhb			    mtx_cur_cnt);
70467352Sjhb		}
70567352Sjhb	}
70667352Sjhb	MPASS(i == mtx_cur_cnt);
70767352Sjhb	switch (when) {
70867352Sjhb	case MV_DESTROY:
70967352Sjhb		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
71067352Sjhb			if (mp == m)
71167352Sjhb				break;
71267352Sjhb		MPASS(mp == m);
71367352Sjhb		break;
71467352Sjhb	case MV_INIT:
71567352Sjhb		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
71667352Sjhb		if (mp == m) {
71767352Sjhb			/*
71867352Sjhb			 * Not good. This mutex already exists.
71967352Sjhb			 */
72067352Sjhb			printf("re-initing existing mutex %s\n",
72167352Sjhb			    m->mtx_description);
72267352Sjhb			MPASS(m->mtx_lock == MTX_UNOWNED);
72367352Sjhb			retval = 1;
72467352Sjhb		}
72567352Sjhb	}
72672200Sbmilekic	mtx_unlock(&all_mtx);
72767352Sjhb	return (retval);
72867352Sjhb}
72967352Sjhb#endif
73067352Sjhb
73172200Sbmilekic/*
73272200Sbmilekic * Mutex initialization routine; initialize lock `m' of type contained in
73372200Sbmilekic * `opts' with options contained in `opts' and description `description.'
73472200Sbmilekic * Place on "all_mtx" queue.
73572200Sbmilekic */
73667352Sjhbvoid
73772200Sbmilekicmtx_init(struct mtx *m, const char *description, int opts)
73867352Sjhb{
73972200Sbmilekic
74072200Sbmilekic	if ((opts & MTX_QUIET) == 0)
74172200Sbmilekic		CTR2(KTR_LOCK, "mtx_init %p (%s)", m, description);
74272200Sbmilekic
74367352Sjhb#ifdef MUTEX_DEBUG
74472200Sbmilekic	/* Diagnostic and error correction */
74572200Sbmilekic	if (mtx_validate(m, MV_INIT))
74667352Sjhb		return;
74769429Sjhb#endif
74867352Sjhb
74967352Sjhb	bzero((void *)m, sizeof *m);
75067352Sjhb	TAILQ_INIT(&m->mtx_blocked);
75172200Sbmilekic
75269429Sjhb#ifdef WITNESS
75371320Sjasone	if (!witness_cold) {
75471560Sjhb		m->mtx_debug = malloc(sizeof(struct mtx_debug),
75572200Sbmilekic		    M_WITNESS, M_NOWAIT | M_ZERO);
75671560Sjhb		MPASS(m->mtx_debug != NULL);
75771320Sjasone	}
75871560Sjhb#endif
75971320Sjasone
76072200Sbmilekic	m->mtx_description = description;
76172200Sbmilekic	m->mtx_flags = opts;
76267352Sjhb	m->mtx_lock = MTX_UNOWNED;
76372200Sbmilekic
76467352Sjhb	/* Put on all mutex queue */
76572200Sbmilekic	mtx_lock(&all_mtx);
76667352Sjhb	m->mtx_next = &all_mtx;
76767352Sjhb	m->mtx_prev = all_mtx.mtx_prev;
76867352Sjhb	m->mtx_prev->mtx_next = m;
76967352Sjhb	all_mtx.mtx_prev = m;
77067352Sjhb	if (++mtx_cur_cnt > mtx_max_cnt)
77167352Sjhb		mtx_max_cnt = mtx_cur_cnt;
77272200Sbmilekic	mtx_unlock(&all_mtx);
77372200Sbmilekic
77471320Sjasone#ifdef WITNESS
77571320Sjasone	if (!witness_cold)
77672200Sbmilekic		witness_init(m, opts);
77771320Sjasone#endif
77867352Sjhb}
77967352Sjhb
78072200Sbmilekic/*
78172200Sbmilekic * Remove lock `m' from all_mtx queue.
78272200Sbmilekic */
78367352Sjhbvoid
78467352Sjhbmtx_destroy(struct mtx *m)
78567352Sjhb{
78667352Sjhb
78771320Sjasone#ifdef WITNESS
78871320Sjasone	KASSERT(!witness_cold, ("%s: Cannot destroy while still cold\n",
78971320Sjasone	    __FUNCTION__));
79071320Sjasone#endif
79172200Sbmilekic
79271560Sjhb	CTR2(KTR_LOCK, "mtx_destroy %p (%s)", m, m->mtx_description);
79372200Sbmilekic
79467352Sjhb#ifdef MUTEX_DEBUG
79567352Sjhb	if (m->mtx_next == NULL)
79667352Sjhb		panic("mtx_destroy: %p (%s) already destroyed",
79767352Sjhb		    m, m->mtx_description);
79867352Sjhb
79967352Sjhb	if (!mtx_owned(m)) {
80067352Sjhb		MPASS(m->mtx_lock == MTX_UNOWNED);
80167352Sjhb	} else {
80271228Sbmilekic		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
80367352Sjhb	}
80472200Sbmilekic
80572200Sbmilekic	/* diagnostic */
80672200Sbmilekic	mtx_validate(m, MV_DESTROY);
80767352Sjhb#endif
80867352Sjhb
80967352Sjhb#ifdef WITNESS
81067352Sjhb	if (m->mtx_witness)
81167352Sjhb		witness_destroy(m);
81267352Sjhb#endif /* WITNESS */
81367352Sjhb
81467352Sjhb	/* Remove from the all mutex queue */
81572200Sbmilekic	mtx_lock(&all_mtx);
81667352Sjhb	m->mtx_next->mtx_prev = m->mtx_prev;
81767352Sjhb	m->mtx_prev->mtx_next = m->mtx_next;
81872200Sbmilekic
81967352Sjhb#ifdef MUTEX_DEBUG
82067352Sjhb	m->mtx_next = m->mtx_prev = NULL;
82169429Sjhb#endif
82272200Sbmilekic
82369429Sjhb#ifdef WITNESS
82472200Sbmilekic	free(m->mtx_debug, M_WITNESS);
82571560Sjhb	m->mtx_debug = NULL;
82667352Sjhb#endif
82772200Sbmilekic
82867352Sjhb	mtx_cur_cnt--;
82972200Sbmilekic	mtx_unlock(&all_mtx);
83067352Sjhb}
83167352Sjhb
83272200Sbmilekic
83371560Sjhb/*
83472200Sbmilekic * The WITNESS-enabled diagnostic code.
83571560Sjhb */
83671560Sjhb#ifdef WITNESS
83771320Sjasonestatic void
83871320Sjasonewitness_fixup(void *dummy __unused)
83971320Sjasone{
84071320Sjasone	struct mtx *mp;
84171320Sjasone
84271560Sjhb	/*
84371560Sjhb	 * We have to release Giant before initializing its witness
84471560Sjhb	 * structure so that WITNESS doesn't get confused.
84571560Sjhb	 */
84672200Sbmilekic	mtx_unlock(&Giant);
84771560Sjhb	mtx_assert(&Giant, MA_NOTOWNED);
84871560Sjhb
84972200Sbmilekic	mtx_lock(&all_mtx);
85072200Sbmilekic
85171320Sjasone	/* Iterate through all mutexes and finish up mutex initialization. */
85271320Sjasone	for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
85371320Sjasone
85471560Sjhb		mp->mtx_debug = malloc(sizeof(struct mtx_debug),
85572200Sbmilekic		    M_WITNESS, M_NOWAIT | M_ZERO);
85671560Sjhb		MPASS(mp->mtx_debug != NULL);
85771320Sjasone
85871320Sjasone		witness_init(mp, mp->mtx_flags);
85971320Sjasone	}
86072200Sbmilekic	mtx_unlock(&all_mtx);
86171320Sjasone
86271320Sjasone	/* Mark the witness code as being ready for use. */
86371320Sjasone	atomic_store_rel_int(&witness_cold, 0);
86471560Sjhb
86572200Sbmilekic	mtx_lock(&Giant);
86671320Sjasone}
86771320SjasoneSYSINIT(wtnsfxup, SI_SUB_MUTEX, SI_ORDER_FIRST, witness_fixup, NULL)
86871320Sjasone
86965557Sjasone#define WITNESS_COUNT 200
87065557Sjasone#define	WITNESS_NCHILDREN 2
87165557Sjasone
87267401Sjhbint witness_watch = 1;
87365557Sjasone
87465856Sjhbstruct witness {
87565557Sjasone	struct witness	*w_next;
87667404Sjhb	const char	*w_description;
87765624Sjasone	const char	*w_file;
87865557Sjasone	int		 w_line;
87965557Sjasone	struct witness	*w_morechildren;
88065557Sjasone	u_char		 w_childcnt;
88165557Sjasone	u_char		 w_Giant_squawked:1;
88265557Sjasone	u_char		 w_other_squawked:1;
88365557Sjasone	u_char		 w_same_squawked:1;
88471228Sbmilekic	u_char		 w_spin:1;	/* MTX_SPIN type mutex. */
88565557Sjasone	u_int		 w_level;
88665557Sjasone	struct witness	*w_children[WITNESS_NCHILDREN];
88765856Sjhb};
88865557Sjasone
88965856Sjhbstruct witness_blessed {
89065557Sjasone	char 	*b_lock1;
89165557Sjasone	char	*b_lock2;
89265856Sjhb};
89365557Sjasone
89467676Sjhb#ifdef DDB
89565557Sjasone/*
89667676Sjhb * When DDB is enabled and witness_ddb is set to 1, it will cause the system to
89765557Sjasone * drop into kdebug() when:
89865557Sjasone *	- a lock heirarchy violation occurs
89965557Sjasone *	- locks are held when going to sleep.
90065557Sjasone */
90171560Sjhbint	witness_ddb;
90267676Sjhb#ifdef WITNESS_DDB
90371560SjhbTUNABLE_INT_DECL("debug.witness_ddb", 1, witness_ddb);
90467676Sjhb#else
90571560SjhbTUNABLE_INT_DECL("debug.witness_ddb", 0, witness_ddb);
90665557Sjasone#endif
90767676SjhbSYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, "");
90867676Sjhb#endif /* DDB */
90965557Sjasone
91071560Sjhbint	witness_skipspin;
91167676Sjhb#ifdef WITNESS_SKIPSPIN
91271560SjhbTUNABLE_INT_DECL("debug.witness_skipspin", 1, witness_skipspin);
91367676Sjhb#else
91471560SjhbTUNABLE_INT_DECL("debug.witness_skipspin", 0, witness_skipspin);
91565557Sjasone#endif
91667676SjhbSYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RD, &witness_skipspin, 0,
91767676Sjhb    "");
91865557Sjasone
91972200Sbmilekic/*
92072200Sbmilekic * Witness-enabled globals
92172200Sbmilekic */
92271320Sjasonestatic struct mtx	w_mtx;
92365856Sjhbstatic struct witness	*w_free;
92465856Sjhbstatic struct witness	*w_all;
92565856Sjhbstatic int		 w_inited;
92665856Sjhbstatic int		 witness_dead;	/* fatal error, probably no memory */
92765557Sjasone
92865856Sjhbstatic struct witness	 w_data[WITNESS_COUNT];
92965557Sjasone
93072200Sbmilekic/*
93172200Sbmilekic * Internal witness routine prototypes
93272200Sbmilekic */
93372200Sbmilekicstatic struct witness *enroll(const char *description, int flag);
93472200Sbmilekicstatic int itismychild(struct witness *parent, struct witness *child);
93572200Sbmilekicstatic void removechild(struct witness *parent, struct witness *child);
93672200Sbmilekicstatic int isitmychild(struct witness *parent, struct witness *child);
93772200Sbmilekicstatic int isitmydescendant(struct witness *parent, struct witness *child);
93872200Sbmilekicstatic int dup_ok(struct witness *);
93972200Sbmilekicstatic int blessed(struct witness *, struct witness *);
94072200Sbmilekicstatic void
94172200Sbmilekic    witness_displaydescendants(void(*)(const char *fmt, ...), struct witness *);
94272200Sbmilekicstatic void witness_leveldescendents(struct witness *parent, int level);
94372200Sbmilekicstatic void witness_levelall(void);
94472200Sbmilekicstatic struct witness * witness_get(void);
94572200Sbmilekicstatic void witness_free(struct witness *m);
94665557Sjasone
94765557Sjasonestatic char *ignore_list[] = {
94865557Sjasone	"witness lock",
94965557Sjasone	NULL
95065557Sjasone};
95165557Sjasone
95265557Sjasonestatic char *spin_order_list[] = {
95372224Sjhb#if defined(__i386__) && defined (SMP)
95472224Sjhb	"com",
95572224Sjhb#endif
95669362Sjhb	"sio",
95772224Sjhb#ifdef __i386__
95872224Sjhb	"cy",
95972224Sjhb#endif
96065557Sjasone	"sched lock",
96168808Sjhb#ifdef __i386__
96267676Sjhb	"clk",
96368808Sjhb#endif
96468889Sjake	"callout",
96565557Sjasone	/*
96665557Sjasone	 * leaf locks
96765557Sjasone	 */
96872224Sjhb	"ithread table lock",
96972224Sjhb	"ithread list lock",
97072224Sjhb#ifdef SMP
97171576Sjasone#ifdef __i386__
97271576Sjasone	"ap boot",
97371576Sjasone	"imen",
97471576Sjasone#endif
97571576Sjasone	"smp rendezvous",
97672224Sjhb#endif
97765557Sjasone	NULL
97865557Sjasone};
97965557Sjasone
98065557Sjasonestatic char *order_list[] = {
98172256Sjhb	"Giant", "proctree", "allproc", "process lock", "uidinfo hash",
98272256Sjhb	    "uidinfo struct", NULL,
98365557Sjasone	NULL
98465557Sjasone};
98565557Sjasone
98665557Sjasonestatic char *dup_list[] = {
98765557Sjasone	NULL
98865557Sjasone};
98965557Sjasone
99065557Sjasonestatic char *sleep_list[] = {
99168862Sjake	"Giant",
99265557Sjasone	NULL
99365557Sjasone};
99465557Sjasone
99565557Sjasone/*
99665557Sjasone * Pairs of locks which have been blessed
99765557Sjasone * Don't complain about order problems with blessed locks
99865557Sjasone */
99965856Sjhbstatic struct witness_blessed blessed_list[] = {
100065557Sjasone};
100172200Sbmilekicstatic int blessed_count =
100272200Sbmilekic	sizeof(blessed_list) / sizeof(struct witness_blessed);
100365557Sjasone
100471352Sjasonestatic void
100565856Sjhbwitness_init(struct mtx *m, int flag)
100665557Sjasone{
100765557Sjasone	m->mtx_witness = enroll(m->mtx_description, flag);
100865557Sjasone}
100965557Sjasone
101071352Sjasonestatic void
101165856Sjhbwitness_destroy(struct mtx *m)
101265557Sjasone{
101365856Sjhb	struct mtx *m1;
101465557Sjasone	struct proc *p;
101565557Sjasone	p = CURPROC;
101672224Sjhb	LIST_FOREACH(m1, &p->p_heldmtx, mtx_held) {
101765557Sjasone		if (m1 == m) {
101865557Sjasone			LIST_REMOVE(m, mtx_held);
101965557Sjasone			break;
102065557Sjasone		}
102165557Sjasone	}
102265557Sjasone	return;
102365557Sjasone
102465557Sjasone}
102565557Sjasone
102671352Sjasonestatic void
102771352Sjasonewitness_display(void(*prnt)(const char *fmt, ...))
102871352Sjasone{
102971352Sjasone	struct witness *w, *w1;
103072224Sjhb	int level, found;
103171352Sjasone
103271352Sjasone	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
103371352Sjasone	witness_levelall();
103471352Sjasone
103572224Sjhb	/*
103672224Sjhb	 * First, handle sleep mutexes which have been acquired at least
103772224Sjhb	 * once.
103872224Sjhb	 */
103972224Sjhb	prnt("Sleep mutexes:\n");
104071352Sjasone	for (w = w_all; w; w = w->w_next) {
104172224Sjhb		if (w->w_file == NULL || w->w_spin)
104271352Sjasone			continue;
104371352Sjasone		for (w1 = w_all; w1; w1 = w1->w_next) {
104471352Sjasone			if (isitmychild(w1, w))
104571352Sjasone				break;
104671352Sjasone		}
104771352Sjasone		if (w1 != NULL)
104871352Sjasone			continue;
104971352Sjasone		/*
105071352Sjasone		 * This lock has no anscestors, display its descendants.
105171352Sjasone		 */
105271352Sjasone		witness_displaydescendants(prnt, w);
105371352Sjasone	}
105472224Sjhb
105572224Sjhb	/*
105672224Sjhb	 * Now do spin mutexes which have been acquired at least once.
105772224Sjhb	 */
105872224Sjhb	prnt("\nSpin mutexes:\n");
105972224Sjhb	level = 0;
106072224Sjhb	while (level < sizeof(spin_order_list) / sizeof(char *)) {
106172224Sjhb		found = 0;
106272224Sjhb		for (w = w_all; w; w = w->w_next) {
106372224Sjhb			if (w->w_file == NULL || !w->w_spin)
106472224Sjhb				continue;
106572224Sjhb			if (w->w_level == 1 << level) {
106672224Sjhb				witness_displaydescendants(prnt, w);
106772224Sjhb				level++;
106872224Sjhb				found = 1;
106972224Sjhb			}
107072224Sjhb		}
107172224Sjhb		if (found == 0)
107272224Sjhb			level++;
107372224Sjhb	}
107472224Sjhb
107572224Sjhb	/*
107672224Sjhb	 * Finally, any mutexes which have not been acquired yet.
107772224Sjhb	 */
107872224Sjhb	prnt("\nMutexes which were never acquired:\n");
107971352Sjasone	for (w = w_all; w; w = w->w_next) {
108071352Sjasone		if (w->w_file != NULL)
108171352Sjasone			continue;
108271352Sjasone		prnt("%s\n", w->w_description);
108371352Sjasone	}
108471352Sjasone}
108571352Sjasone
108665557Sjasonevoid
108765856Sjhbwitness_enter(struct mtx *m, int flags, const char *file, int line)
108865557Sjasone{
108965856Sjhb	struct witness *w, *w1;
109065856Sjhb	struct mtx *m1;
109165557Sjasone	struct proc *p;
109265557Sjasone	int i;
109367676Sjhb#ifdef DDB
109467676Sjhb	int go_into_ddb = 0;
109567676Sjhb#endif /* DDB */
109665557Sjasone
109771352Sjasone	if (witness_cold || m->mtx_witness == NULL || panicstr)
109871320Sjasone		return;
109965557Sjasone	w = m->mtx_witness;
110065557Sjasone	p = CURPROC;
110165557Sjasone
110265557Sjasone	if (flags & MTX_SPIN) {
110371560Sjhb		if ((m->mtx_flags & MTX_SPIN) == 0)
110465651Sjasone			panic("mutex_enter: MTX_SPIN on MTX_DEF mutex %s @"
110565651Sjasone			    " %s:%d", m->mtx_description, file, line);
110671228Sbmilekic		if (mtx_recursed(m)) {
110771560Sjhb			if ((m->mtx_flags & MTX_RECURSE) == 0)
110871228Sbmilekic				panic("mutex_enter: recursion on non-recursive"
110971228Sbmilekic				    " mutex %s @ %s:%d", m->mtx_description,
111071228Sbmilekic				    file, line);
111165557Sjasone			return;
111271228Sbmilekic		}
111372200Sbmilekic		mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
111470861Sjake		i = PCPU_GET(witness_spin_check);
111565557Sjasone		if (i != 0 && w->w_level < i) {
111672200Sbmilekic			mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
111765651Sjasone			panic("mutex_enter(%s:%x, MTX_SPIN) out of order @"
111865651Sjasone			    " %s:%d already holding %s:%x",
111965557Sjasone			    m->mtx_description, w->w_level, file, line,
112065557Sjasone			    spin_order_list[ffs(i)-1], i);
112165557Sjasone		}
112265557Sjasone		PCPU_SET(witness_spin_check, i | w->w_level);
112372200Sbmilekic		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
112469361Sjhb		w->w_file = file;
112569361Sjhb		w->w_line = line;
112669361Sjhb		m->mtx_line = line;
112769361Sjhb		m->mtx_file = file;
112865557Sjasone		return;
112965557Sjasone	}
113071560Sjhb	if ((m->mtx_flags & MTX_SPIN) != 0)
113165557Sjasone		panic("mutex_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
113265557Sjasone		    m->mtx_description, file, line);
113365557Sjasone
113471228Sbmilekic	if (mtx_recursed(m)) {
113571560Sjhb		if ((m->mtx_flags & MTX_RECURSE) == 0)
113671228Sbmilekic			panic("mutex_enter: recursion on non-recursive"
113771228Sbmilekic			    " mutex %s @ %s:%d", m->mtx_description,
113871228Sbmilekic			    file, line);
113965557Sjasone		return;
114071228Sbmilekic	}
114165557Sjasone	if (witness_dead)
114265557Sjasone		goto out;
114369998Sjhb	if (cold)
114465557Sjasone		goto out;
114565557Sjasone
114665557Sjasone	if (!mtx_legal2block())
114772200Sbmilekic		panic("blockable mtx_lock() of %s when not legal @ %s:%d",
114865557Sjasone			    m->mtx_description, file, line);
114965557Sjasone	/*
115065557Sjasone	 * Is this the first mutex acquired
115165557Sjasone	 */
115265557Sjasone	if ((m1 = LIST_FIRST(&p->p_heldmtx)) == NULL)
115365557Sjasone		goto out;
115465557Sjasone
115565557Sjasone	if ((w1 = m1->mtx_witness) == w) {
115665557Sjasone		if (w->w_same_squawked || dup_ok(w))
115765557Sjasone			goto out;
115865557Sjasone		w->w_same_squawked = 1;
115965557Sjasone		printf("acquring duplicate lock of same type: \"%s\"\n",
116065557Sjasone			m->mtx_description);
116165557Sjasone		printf(" 1st @ %s:%d\n", w->w_file, w->w_line);
116265557Sjasone		printf(" 2nd @ %s:%d\n", file, line);
116367676Sjhb#ifdef DDB
116467676Sjhb		go_into_ddb = 1;
116567676Sjhb#endif /* DDB */
116665557Sjasone		goto out;
116765557Sjasone	}
116865557Sjasone	MPASS(!mtx_owned(&w_mtx));
116972200Sbmilekic	mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
117065557Sjasone	/*
117165557Sjasone	 * If we have a known higher number just say ok
117265557Sjasone	 */
117365557Sjasone	if (witness_watch > 1 && w->w_level > w1->w_level) {
117472200Sbmilekic		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
117565557Sjasone		goto out;
117665557Sjasone	}
117765557Sjasone	if (isitmydescendant(m1->mtx_witness, w)) {
117872200Sbmilekic		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
117965557Sjasone		goto out;
118065557Sjasone	}
118165557Sjasone	for (i = 0; m1 != NULL; m1 = LIST_NEXT(m1, mtx_held), i++) {
118265557Sjasone
118367352Sjhb		MPASS(i < 200);
118465557Sjasone		w1 = m1->mtx_witness;
118565557Sjasone		if (isitmydescendant(w, w1)) {
118672200Sbmilekic			mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
118765557Sjasone			if (blessed(w, w1))
118865557Sjasone				goto out;
118965557Sjasone			if (m1 == &Giant) {
119065557Sjasone				if (w1->w_Giant_squawked)
119165557Sjasone					goto out;
119265557Sjasone				else
119365557Sjasone					w1->w_Giant_squawked = 1;
119465557Sjasone			} else {
119565557Sjasone				if (w1->w_other_squawked)
119665557Sjasone					goto out;
119765557Sjasone				else
119865557Sjasone					w1->w_other_squawked = 1;
119965557Sjasone			}
120065557Sjasone			printf("lock order reversal\n");
120165557Sjasone			printf(" 1st %s last acquired @ %s:%d\n",
120265557Sjasone			    w->w_description, w->w_file, w->w_line);
120365557Sjasone			printf(" 2nd %p %s @ %s:%d\n",
120465557Sjasone			    m1, w1->w_description, w1->w_file, w1->w_line);
120565557Sjasone			printf(" 3rd %p %s @ %s:%d\n",
120665557Sjasone			    m, w->w_description, file, line);
120767676Sjhb#ifdef DDB
120867676Sjhb			go_into_ddb = 1;
120967676Sjhb#endif /* DDB */
121065557Sjasone			goto out;
121165557Sjasone		}
121265557Sjasone	}
121365557Sjasone	m1 = LIST_FIRST(&p->p_heldmtx);
121465557Sjasone	if (!itismychild(m1->mtx_witness, w))
121572200Sbmilekic		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
121665557Sjasone
121765557Sjasoneout:
121867676Sjhb#ifdef DDB
121967676Sjhb	if (witness_ddb && go_into_ddb)
122067676Sjhb		Debugger("witness_enter");
122167676Sjhb#endif /* DDB */
122265557Sjasone	w->w_file = file;
122365557Sjasone	w->w_line = line;
122465557Sjasone	m->mtx_line = line;
122565557Sjasone	m->mtx_file = file;
122665557Sjasone
122765557Sjasone	/*
122868582Sjhb	 * If this pays off it likely means that a mutex being witnessed
122965557Sjasone	 * is acquired in hardclock. Put it in the ignore list. It is
123065557Sjasone	 * likely not the mutex this assert fails on.
123165557Sjasone	 */
123267352Sjhb	MPASS(m->mtx_held.le_prev == NULL);
123365557Sjasone	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
123465557Sjasone}
123565557Sjasone
123665557Sjasonevoid
123765856Sjhbwitness_try_enter(struct mtx *m, int flags, const char *file, int line)
123865557Sjasone{
123965557Sjasone	struct proc *p;
124065856Sjhb	struct witness *w = m->mtx_witness;
124165557Sjasone
124271320Sjasone	if (witness_cold)
124371320Sjasone		return;
124469998Sjhb	if (panicstr)
124569998Sjhb		return;
124665557Sjasone	if (flags & MTX_SPIN) {
124771560Sjhb		if ((m->mtx_flags & MTX_SPIN) == 0)
124865557Sjasone			panic("mutex_try_enter: "
124965557Sjasone			    "MTX_SPIN on MTX_DEF mutex %s @ %s:%d",
125065557Sjasone			    m->mtx_description, file, line);
125171228Sbmilekic		if (mtx_recursed(m)) {
125271560Sjhb			if ((m->mtx_flags & MTX_RECURSE) == 0)
125371228Sbmilekic				panic("mutex_try_enter: recursion on"
125471228Sbmilekic				    " non-recursive mutex %s @ %s:%d",
125571228Sbmilekic				    m->mtx_description, file, line);
125665557Sjasone			return;
125771228Sbmilekic		}
125872200Sbmilekic		mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
125970861Sjake		PCPU_SET(witness_spin_check,
126070861Sjake		    PCPU_GET(witness_spin_check) | w->w_level);
126172200Sbmilekic		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
126269361Sjhb		w->w_file = file;
126369361Sjhb		w->w_line = line;
126469361Sjhb		m->mtx_line = line;
126569361Sjhb		m->mtx_file = file;
126665557Sjasone		return;
126765557Sjasone	}
126865557Sjasone
126971560Sjhb	if ((m->mtx_flags & MTX_SPIN) != 0)
127065557Sjasone		panic("mutex_try_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
127165557Sjasone		    m->mtx_description, file, line);
127265557Sjasone
127371228Sbmilekic	if (mtx_recursed(m)) {
127471560Sjhb		if ((m->mtx_flags & MTX_RECURSE) == 0)
127571228Sbmilekic			panic("mutex_try_enter: recursion on non-recursive"
127671228Sbmilekic			    " mutex %s @ %s:%d", m->mtx_description, file,
127771228Sbmilekic			    line);
127865557Sjasone		return;
127971228Sbmilekic	}
128065557Sjasone	w->w_file = file;
128165557Sjasone	w->w_line = line;
128265557Sjasone	m->mtx_line = line;
128365557Sjasone	m->mtx_file = file;
128465557Sjasone	p = CURPROC;
128567352Sjhb	MPASS(m->mtx_held.le_prev == NULL);
128665557Sjasone	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
128765557Sjasone}
128865557Sjasone
128965557Sjasonevoid
129071352Sjasonewitness_exit(struct mtx *m, int flags, const char *file, int line)
129165557Sjasone{
129271352Sjasone	struct witness *w;
129365557Sjasone
129471352Sjasone	if (witness_cold || m->mtx_witness == NULL || panicstr)
129571352Sjasone		return;
129671352Sjasone	w = m->mtx_witness;
129765557Sjasone
129871352Sjasone	if (flags & MTX_SPIN) {
129971560Sjhb		if ((m->mtx_flags & MTX_SPIN) == 0)
130071352Sjasone			panic("mutex_exit: MTX_SPIN on MTX_DEF mutex %s @"
130171352Sjasone			    " %s:%d", m->mtx_description, file, line);
130271352Sjasone		if (mtx_recursed(m)) {
130371560Sjhb			if ((m->mtx_flags & MTX_RECURSE) == 0)
130471352Sjasone				panic("mutex_exit: recursion on non-recursive"
130571352Sjasone				    " mutex %s @ %s:%d", m->mtx_description,
130671352Sjasone				    file, line);
130771352Sjasone			return;
130865557Sjasone		}
130972200Sbmilekic		mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
131071352Sjasone		PCPU_SET(witness_spin_check,
131171352Sjasone		    PCPU_GET(witness_spin_check) & ~w->w_level);
131272200Sbmilekic		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
131371352Sjasone		return;
131465557Sjasone	}
131571560Sjhb	if ((m->mtx_flags & MTX_SPIN) != 0)
131671352Sjasone		panic("mutex_exit: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
131771352Sjasone		    m->mtx_description, file, line);
131871352Sjasone
131971352Sjasone	if (mtx_recursed(m)) {
132071560Sjhb		if ((m->mtx_flags & MTX_RECURSE) == 0)
132171352Sjasone			panic("mutex_exit: recursion on non-recursive"
132271352Sjasone			    " mutex %s @ %s:%d", m->mtx_description,
132371352Sjasone			    file, line);
132471352Sjasone		return;
132565557Sjasone	}
132671352Sjasone
132771352Sjasone	if ((flags & MTX_NOSWITCH) == 0 && !mtx_legal2block() && !cold)
132872200Sbmilekic		panic("switchable mtx_unlock() of %s when not legal @ %s:%d",
132971352Sjasone			    m->mtx_description, file, line);
133071352Sjasone	LIST_REMOVE(m, mtx_held);
133171352Sjasone	m->mtx_held.le_prev = NULL;
133265557Sjasone}
133365557Sjasone
133465557Sjasoneint
133565856Sjhbwitness_sleep(int check_only, struct mtx *mtx, const char *file, int line)
133665557Sjasone{
133765856Sjhb	struct mtx *m;
133865557Sjasone	struct proc *p;
133965557Sjasone	char **sleep;
134065557Sjasone	int n = 0;
134165557Sjasone
134271320Sjasone	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
134365557Sjasone	p = CURPROC;
134472224Sjhb	LIST_FOREACH(m, &p->p_heldmtx, mtx_held) {
134565557Sjasone		if (m == mtx)
134665557Sjasone			continue;
134765557Sjasone		for (sleep = sleep_list; *sleep!= NULL; sleep++)
134865557Sjasone			if (strcmp(m->mtx_description, *sleep) == 0)
134965557Sjasone				goto next;
135072224Sjhb		if (n == 0)
135172224Sjhb			printf("Whee!\n");
135265557Sjasone		printf("%s:%d: %s with \"%s\" locked from %s:%d\n",
135365557Sjasone			file, line, check_only ? "could sleep" : "sleeping",
135465557Sjasone			m->mtx_description,
135565557Sjasone			m->mtx_witness->w_file, m->mtx_witness->w_line);
135665557Sjasone		n++;
135765557Sjasone	next:
135865557Sjasone	}
135967676Sjhb#ifdef DDB
136067676Sjhb	if (witness_ddb && n)
136167676Sjhb		Debugger("witness_sleep");
136267676Sjhb#endif /* DDB */
136365557Sjasone	return (n);
136465557Sjasone}
136565557Sjasone
136665856Sjhbstatic struct witness *
136767404Sjhbenroll(const char *description, int flag)
136865557Sjasone{
136965557Sjasone	int i;
137065856Sjhb	struct witness *w, *w1;
137165557Sjasone	char **ignore;
137265557Sjasone	char **order;
137365557Sjasone
137465557Sjasone	if (!witness_watch)
137565557Sjasone		return (NULL);
137665557Sjasone	for (ignore = ignore_list; *ignore != NULL; ignore++)
137765557Sjasone		if (strcmp(description, *ignore) == 0)
137865557Sjasone			return (NULL);
137965557Sjasone
138065557Sjasone	if (w_inited == 0) {
138171320Sjasone		mtx_init(&w_mtx, "witness lock", MTX_SPIN);
138265557Sjasone		for (i = 0; i < WITNESS_COUNT; i++) {
138365557Sjasone			w = &w_data[i];
138465557Sjasone			witness_free(w);
138565557Sjasone		}
138665557Sjasone		w_inited = 1;
138765557Sjasone		for (order = order_list; *order != NULL; order++) {
138865557Sjasone			w = enroll(*order, MTX_DEF);
138965557Sjasone			w->w_file = "order list";
139065557Sjasone			for (order++; *order != NULL; order++) {
139165557Sjasone				w1 = enroll(*order, MTX_DEF);
139265557Sjasone				w1->w_file = "order list";
139365557Sjasone				itismychild(w, w1);
139465557Sjasone				w = w1;
139565557Sjasone    	    	    	}
139665557Sjasone		}
139765557Sjasone	}
139865557Sjasone	if ((flag & MTX_SPIN) && witness_skipspin)
139965557Sjasone		return (NULL);
140072200Sbmilekic	mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
140165557Sjasone	for (w = w_all; w; w = w->w_next) {
140265557Sjasone		if (strcmp(description, w->w_description) == 0) {
140372200Sbmilekic			mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
140465557Sjasone			return (w);
140565557Sjasone		}
140665557Sjasone	}
140765557Sjasone	if ((w = witness_get()) == NULL)
140865557Sjasone		return (NULL);
140965557Sjasone	w->w_next = w_all;
141065557Sjasone	w_all = w;
141165557Sjasone	w->w_description = description;
141272200Sbmilekic	mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
141365557Sjasone	if (flag & MTX_SPIN) {
141465557Sjasone		w->w_spin = 1;
141565557Sjasone
141665557Sjasone		i = 1;
141765557Sjasone		for (order = spin_order_list; *order != NULL; order++) {
141865557Sjasone			if (strcmp(description, *order) == 0)
141965557Sjasone				break;
142065557Sjasone			i <<= 1;
142165557Sjasone		}
142265557Sjasone		if (*order == NULL)
142365557Sjasone			panic("spin lock %s not in order list", description);
142465557Sjasone		w->w_level = i;
142571560Sjhb	}
142671228Sbmilekic
142765557Sjasone	return (w);
142865557Sjasone}
142965557Sjasone
143065557Sjasonestatic int
143165856Sjhbitismychild(struct witness *parent, struct witness *child)
143265557Sjasone{
143365557Sjasone	static int recursed;
143465557Sjasone
143565557Sjasone	/*
143665557Sjasone	 * Insert "child" after "parent"
143765557Sjasone	 */
143865557Sjasone	while (parent->w_morechildren)
143965557Sjasone		parent = parent->w_morechildren;
144065557Sjasone
144165557Sjasone	if (parent->w_childcnt == WITNESS_NCHILDREN) {
144265557Sjasone		if ((parent->w_morechildren = witness_get()) == NULL)
144365557Sjasone			return (1);
144465557Sjasone		parent = parent->w_morechildren;
144565557Sjasone	}
144667352Sjhb	MPASS(child != NULL);
144765557Sjasone	parent->w_children[parent->w_childcnt++] = child;
144865557Sjasone	/*
144965557Sjasone	 * now prune whole tree
145065557Sjasone	 */
145165557Sjasone	if (recursed)
145265557Sjasone		return (0);
145365557Sjasone	recursed = 1;
145465557Sjasone	for (child = w_all; child != NULL; child = child->w_next) {
145565557Sjasone		for (parent = w_all; parent != NULL;
145665557Sjasone		    parent = parent->w_next) {
145765557Sjasone			if (!isitmychild(parent, child))
145865557Sjasone				continue;
145965557Sjasone			removechild(parent, child);
146065557Sjasone			if (isitmydescendant(parent, child))
146165557Sjasone				continue;
146265557Sjasone			itismychild(parent, child);
146365557Sjasone		}
146465557Sjasone	}
146565557Sjasone	recursed = 0;
146665557Sjasone	witness_levelall();
146765557Sjasone	return (0);
146865557Sjasone}
146965557Sjasone
147065557Sjasonestatic void
147165856Sjhbremovechild(struct witness *parent, struct witness *child)
147265557Sjasone{
147365856Sjhb	struct witness *w, *w1;
147465557Sjasone	int i;
147565557Sjasone
147665557Sjasone	for (w = parent; w != NULL; w = w->w_morechildren)
147765557Sjasone		for (i = 0; i < w->w_childcnt; i++)
147865557Sjasone			if (w->w_children[i] == child)
147965557Sjasone				goto found;
148065557Sjasone	return;
148165557Sjasonefound:
148265557Sjasone	for (w1 = w; w1->w_morechildren != NULL; w1 = w1->w_morechildren)
148365557Sjasone		continue;
148465557Sjasone	w->w_children[i] = w1->w_children[--w1->w_childcnt];
148567352Sjhb	MPASS(w->w_children[i] != NULL);
148665557Sjasone
148765557Sjasone	if (w1->w_childcnt != 0)
148865557Sjasone		return;
148965557Sjasone
149065557Sjasone	if (w1 == parent)
149165557Sjasone		return;
149265557Sjasone	for (w = parent; w->w_morechildren != w1; w = w->w_morechildren)
149365557Sjasone		continue;
149465557Sjasone	w->w_morechildren = 0;
149565557Sjasone	witness_free(w1);
149665557Sjasone}
149765557Sjasone
149865557Sjasonestatic int
149965856Sjhbisitmychild(struct witness *parent, struct witness *child)
150065557Sjasone{
150165856Sjhb	struct witness *w;
150265557Sjasone	int i;
150365557Sjasone
150465557Sjasone	for (w = parent; w != NULL; w = w->w_morechildren) {
150565557Sjasone		for (i = 0; i < w->w_childcnt; i++) {
150665557Sjasone			if (w->w_children[i] == child)
150765557Sjasone				return (1);
150865557Sjasone		}
150965557Sjasone	}
151065557Sjasone	return (0);
151165557Sjasone}
151265557Sjasone
151365557Sjasonestatic int
151465856Sjhbisitmydescendant(struct witness *parent, struct witness *child)
151565557Sjasone{
151665856Sjhb	struct witness *w;
151765557Sjasone	int i;
151865557Sjasone	int j;
151965557Sjasone
152065557Sjasone	for (j = 0, w = parent; w != NULL; w = w->w_morechildren, j++) {
152167352Sjhb		MPASS(j < 1000);
152265557Sjasone		for (i = 0; i < w->w_childcnt; i++) {
152365557Sjasone			if (w->w_children[i] == child)
152465557Sjasone				return (1);
152565557Sjasone		}
152665557Sjasone		for (i = 0; i < w->w_childcnt; i++) {
152765557Sjasone			if (isitmydescendant(w->w_children[i], child))
152865557Sjasone				return (1);
152965557Sjasone		}
153065557Sjasone	}
153165557Sjasone	return (0);
153265557Sjasone}
153365557Sjasone
153465557Sjasonevoid
153565557Sjasonewitness_levelall (void)
153665557Sjasone{
153765856Sjhb	struct witness *w, *w1;
153865557Sjasone
153965557Sjasone	for (w = w_all; w; w = w->w_next)
154071228Sbmilekic		if (!(w->w_spin))
154165557Sjasone			w->w_level = 0;
154265557Sjasone	for (w = w_all; w; w = w->w_next) {
154365557Sjasone		if (w->w_spin)
154465557Sjasone			continue;
154565557Sjasone		for (w1 = w_all; w1; w1 = w1->w_next) {
154665557Sjasone			if (isitmychild(w1, w))
154765557Sjasone				break;
154865557Sjasone		}
154965557Sjasone		if (w1 != NULL)
155065557Sjasone			continue;
155165557Sjasone		witness_leveldescendents(w, 0);
155265557Sjasone	}
155365557Sjasone}
155465557Sjasone
155565557Sjasonestatic void
155665856Sjhbwitness_leveldescendents(struct witness *parent, int level)
155765557Sjasone{
155865557Sjasone	int i;
155965856Sjhb	struct witness *w;
156065557Sjasone
156165557Sjasone	if (parent->w_level < level)
156265557Sjasone		parent->w_level = level;
156365557Sjasone	level++;
156465557Sjasone	for (w = parent; w != NULL; w = w->w_morechildren)
156565557Sjasone		for (i = 0; i < w->w_childcnt; i++)
156665557Sjasone			witness_leveldescendents(w->w_children[i], level);
156765557Sjasone}
156865557Sjasone
156965557Sjasonestatic void
157065856Sjhbwitness_displaydescendants(void(*prnt)(const char *fmt, ...),
157165856Sjhb			   struct witness *parent)
157265557Sjasone{
157365856Sjhb	struct witness *w;
157465557Sjasone	int i;
157572224Sjhb	int level;
157665557Sjasone
157772224Sjhb	level = parent->w_spin ? ffs(parent->w_level) : parent->w_level;
157872224Sjhb
157965557Sjasone	prnt("%d", level);
158065557Sjasone	if (level < 10)
158165557Sjasone		prnt(" ");
158265557Sjasone	for (i = 0; i < level; i++)
158365557Sjasone		prnt(" ");
158465557Sjasone	prnt("%s", parent->w_description);
158572224Sjhb	if (parent->w_file != NULL)
158672224Sjhb		prnt(" -- last acquired @ %s:%d\n", parent->w_file,
158772224Sjhb		    parent->w_line);
158865557Sjasone
158965557Sjasone	for (w = parent; w != NULL; w = w->w_morechildren)
159065557Sjasone		for (i = 0; i < w->w_childcnt; i++)
159165557Sjasone			    witness_displaydescendants(prnt, w->w_children[i]);
159265557Sjasone    }
159365557Sjasone
159465557Sjasonestatic int
159565856Sjhbdup_ok(struct witness *w)
159665557Sjasone{
159765557Sjasone	char **dup;
159865557Sjasone
159965557Sjasone	for (dup = dup_list; *dup!= NULL; dup++)
160065557Sjasone		if (strcmp(w->w_description, *dup) == 0)
160165557Sjasone			return (1);
160265557Sjasone	return (0);
160365557Sjasone}
160465557Sjasone
160565557Sjasonestatic int
160665856Sjhbblessed(struct witness *w1, struct witness *w2)
160765557Sjasone{
160865557Sjasone	int i;
160965856Sjhb	struct witness_blessed *b;
161065557Sjasone
161165557Sjasone	for (i = 0; i < blessed_count; i++) {
161265557Sjasone		b = &blessed_list[i];
161365557Sjasone		if (strcmp(w1->w_description, b->b_lock1) == 0) {
161465557Sjasone			if (strcmp(w2->w_description, b->b_lock2) == 0)
161565557Sjasone				return (1);
161665557Sjasone			continue;
161765557Sjasone		}
161865557Sjasone		if (strcmp(w1->w_description, b->b_lock2) == 0)
161965557Sjasone			if (strcmp(w2->w_description, b->b_lock1) == 0)
162065557Sjasone				return (1);
162165557Sjasone	}
162265557Sjasone	return (0);
162365557Sjasone}
162465557Sjasone
162565856Sjhbstatic struct witness *
162665557Sjasonewitness_get()
162765557Sjasone{
162865856Sjhb	struct witness *w;
162965557Sjasone
163065557Sjasone	if ((w = w_free) == NULL) {
163165557Sjasone		witness_dead = 1;
163272200Sbmilekic		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
163365557Sjasone		printf("witness exhausted\n");
163465557Sjasone		return (NULL);
163565557Sjasone	}
163665557Sjasone	w_free = w->w_next;
163765856Sjhb	bzero(w, sizeof(*w));
163865557Sjasone	return (w);
163965557Sjasone}
164065557Sjasone
164165557Sjasonestatic void
164265856Sjhbwitness_free(struct witness *w)
164365557Sjasone{
164465557Sjasone	w->w_next = w_free;
164565557Sjasone	w_free = w;
164665557Sjasone}
164765557Sjasone
164869881Sjakeint
164965557Sjasonewitness_list(struct proc *p)
165065557Sjasone{
165165856Sjhb	struct mtx *m;
165269881Sjake	int nheld;
165365557Sjasone
165471320Sjasone	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
165569881Sjake	nheld = 0;
165672224Sjhb	LIST_FOREACH(m, &p->p_heldmtx, mtx_held) {
165765557Sjasone		printf("\t\"%s\" (%p) locked at %s:%d\n",
165865557Sjasone		    m->mtx_description, m,
165965557Sjasone		    m->mtx_witness->w_file, m->mtx_witness->w_line);
166069881Sjake		nheld++;
166165557Sjasone	}
166269881Sjake
166369881Sjake	return (nheld);
166465557Sjasone}
166565557Sjasone
166671709Sjhb#ifdef DDB
166771709Sjhb
166872224SjhbDB_SHOW_COMMAND(mutexes, db_witness_list)
166971709Sjhb{
167071709Sjhb
167171709Sjhb	witness_list(CURPROC);
167271709Sjhb}
167371709Sjhb
167472224SjhbDB_SHOW_COMMAND(witness, db_witness_display)
167572224Sjhb{
167672224Sjhb
167772224Sjhb	witness_display(db_printf);
167872224Sjhb}
167971709Sjhb#endif
168071709Sjhb
168165557Sjasonevoid
168265856Sjhbwitness_save(struct mtx *m, const char **filep, int *linep)
168365557Sjasone{
168471320Sjasone
168571320Sjasone	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
168671352Sjasone	if (m->mtx_witness == NULL)
168771352Sjasone		return;
168871352Sjasone
168965557Sjasone	*filep = m->mtx_witness->w_file;
169065557Sjasone	*linep = m->mtx_witness->w_line;
169165557Sjasone}
169265557Sjasone
169365557Sjasonevoid
169465856Sjhbwitness_restore(struct mtx *m, const char *file, int line)
169565557Sjasone{
169671320Sjasone
169771320Sjasone	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
169871352Sjasone	if (m->mtx_witness == NULL)
169971352Sjasone		return;
170071352Sjasone
170165557Sjasone	m->mtx_witness->w_file = file;
170265557Sjasone	m->mtx_witness->w_line = line;
170365557Sjasone}
170465557Sjasone
170569429Sjhb#endif	/* WITNESS */
1706