subr_turnstile.c revision 71560
165557Sjasone/*-
265557Sjasone * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
365557Sjasone *
465557Sjasone * Redistribution and use in source and binary forms, with or without
565557Sjasone * modification, are permitted provided that the following conditions
665557Sjasone * are met:
765557Sjasone * 1. Redistributions of source code must retain the above copyright
865557Sjasone *    notice, this list of conditions and the following disclaimer.
965557Sjasone * 2. Redistributions in binary form must reproduce the above copyright
1065557Sjasone *    notice, this list of conditions and the following disclaimer in the
1165557Sjasone *    documentation and/or other materials provided with the distribution.
1265557Sjasone * 3. Berkeley Software Design Inc's name may not be used to endorse or
1365557Sjasone *    promote products derived from this software without specific prior
1465557Sjasone *    written permission.
1565557Sjasone *
1665557Sjasone * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
1765557Sjasone * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1865557Sjasone * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1965557Sjasone * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
2065557Sjasone * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2165557Sjasone * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2265557Sjasone * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2365557Sjasone * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2465557Sjasone * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2565557Sjasone * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2665557Sjasone * SUCH DAMAGE.
2765557Sjasone *
2865557Sjasone *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
2967352Sjhb *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
3065557Sjasone * $FreeBSD: head/sys/kern/subr_turnstile.c 71560 2001-01-24 10:57:01Z jhb $
3165557Sjasone */
3265557Sjasone
3365557Sjasone/*
3465557Sjasone *	Main Entry: witness
3565557Sjasone *	Pronunciation: 'wit-n&s
3665557Sjasone *	Function: noun
3765557Sjasone *	Etymology: Middle English witnesse, from Old English witnes knowledge,
3865557Sjasone *	    testimony, witness, from 2wit
3965557Sjasone *	Date: before 12th century
4065557Sjasone *	1 : attestation of a fact or event : TESTIMONY
4165557Sjasone *	2 : one that gives evidence; specifically : one who testifies in
4265557Sjasone *	    a cause or before a judicial tribunal
4365557Sjasone *	3 : one asked to be present at a transaction so as to be able to
4465557Sjasone *	    testify to its having taken place
4565557Sjasone *	4 : one who has personal knowledge of something
4665557Sjasone *	5 a : something serving as evidence or proof : SIGN
4765557Sjasone *	  b : public affirmation by word or example of usually
4865557Sjasone *	      religious faith or conviction <the heroic witness to divine
4965557Sjasone *	      life -- Pilot>
5065557Sjasone *	6 capitalized : a member of the Jehovah's Witnesses
5165557Sjasone */
5265557Sjasone
5368790Sjhb#include "opt_ddb.h"
5467676Sjhb#include "opt_witness.h"
5567676Sjhb
5669215Salfred/*
5769215Salfred * Cause non-inlined mtx_*() to be compiled.
5869215Salfred * Must be defined early because other system headers may include mutex.h.
5969215Salfred */
6069215Salfred#define _KERN_MUTEX_C_
6169215Salfred
6265557Sjasone#include <sys/param.h>
6367352Sjhb#include <sys/bus.h>
6467352Sjhb#include <sys/kernel.h>
6567352Sjhb#include <sys/malloc.h>
6665557Sjasone#include <sys/proc.h>
6767676Sjhb#include <sys/sysctl.h>
6865557Sjasone#include <sys/systm.h>
6967352Sjhb#include <sys/vmmeter.h>
7065557Sjasone#include <sys/ktr.h>
7165557Sjasone
7267352Sjhb#include <machine/atomic.h>
7367352Sjhb#include <machine/bus.h>
7467352Sjhb#include <machine/clock.h>
7565557Sjasone#include <machine/cpu.h>
7667352Sjhb
7768790Sjhb#include <ddb/ddb.h>
7868790Sjhb
7967352Sjhb#include <vm/vm.h>
8067352Sjhb#include <vm/vm_extern.h>
8167352Sjhb
8267352Sjhb#include <sys/mutex.h>
8365557Sjasone
8465557Sjasone/*
8567352Sjhb * Machine independent bits of the mutex implementation
8667352Sjhb */
8771352Sjasone
8871352Sjasone#ifdef WITNESS
8971352Sjasonestruct mtx_debug {
9071352Sjasone	struct witness	*mtxd_witness;
9171352Sjasone	LIST_ENTRY(mtx)	mtxd_held;
9271352Sjasone	const char	*mtxd_file;
9371352Sjasone	int		mtxd_line;
9471352Sjasone};
9571352Sjasone
9671560Sjhb#define mtx_held	mtx_debug->mtxd_held
9771560Sjhb#define	mtx_file	mtx_debug->mtxd_file
9871560Sjhb#define	mtx_line	mtx_debug->mtxd_line
9971560Sjhb#define	mtx_witness	mtx_debug->mtxd_witness
10071352Sjasone#endif	/* WITNESS */
10171352Sjasone
10271352Sjasone/*
10371352Sjasone * Assembly macros
10471352Sjasone *------------------------------------------------------------------------------
10571352Sjasone */
10671352Sjasone
10771352Sjasone#define	_V(x)	__STRING(x)
10871352Sjasone
10971352Sjasone/*
11071352Sjasone * Default, unoptimized mutex micro-operations
11171352Sjasone */
11271352Sjasone
11371352Sjasone#ifndef _obtain_lock
11471352Sjasone/* Actually obtain mtx_lock */
11571352Sjasone#define _obtain_lock(mp, tid)						\
11671352Sjasone	atomic_cmpset_acq_ptr(&(mp)->mtx_lock, (void *)MTX_UNOWNED, (tid))
11771352Sjasone#endif
11871352Sjasone
11971352Sjasone#ifndef _release_lock
12071352Sjasone/* Actually release mtx_lock */
12171352Sjasone#define _release_lock(mp, tid)		       				\
12271352Sjasone	atomic_cmpset_rel_ptr(&(mp)->mtx_lock, (tid), (void *)MTX_UNOWNED)
12371352Sjasone#endif
12471352Sjasone
12571352Sjasone#ifndef _release_lock_quick
12671352Sjasone/* Actually release mtx_lock quickly assuming that we own it */
12771352Sjasone#define	_release_lock_quick(mp) 					\
12871352Sjasone	atomic_store_rel_ptr(&(mp)->mtx_lock, (void *)MTX_UNOWNED)
12971352Sjasone#endif
13071352Sjasone
13171352Sjasone#ifndef _getlock_sleep
13271352Sjasone/* Get a sleep lock, deal with recursion inline. */
13371352Sjasone#define	_getlock_sleep(mp, tid, type) do {				\
13471352Sjasone	if (!_obtain_lock(mp, tid)) {					\
13571352Sjasone		if (((mp)->mtx_lock & MTX_FLAGMASK) != ((uintptr_t)(tid)))\
13671352Sjasone			mtx_enter_hard(mp, (type) & MTX_HARDOPTS, 0);	\
13771352Sjasone		else {							\
13871352Sjasone			atomic_set_ptr(&(mp)->mtx_lock, MTX_RECURSED);	\
13971352Sjasone			(mp)->mtx_recurse++;				\
14071352Sjasone		}							\
14171352Sjasone	}								\
14271352Sjasone} while (0)
14371352Sjasone#endif
14471352Sjasone
14571352Sjasone#ifndef _getlock_spin_block
14671352Sjasone/* Get a spin lock, handle recursion inline (as the less common case) */
14771352Sjasone#define	_getlock_spin_block(mp, tid, type) do {				\
14871352Sjasone	u_int _mtx_intr = save_intr();					\
14971352Sjasone	disable_intr();							\
15071352Sjasone	if (!_obtain_lock(mp, tid))					\
15171352Sjasone		mtx_enter_hard(mp, (type) & MTX_HARDOPTS, _mtx_intr);	\
15271352Sjasone	else								\
15371352Sjasone		(mp)->mtx_saveintr = _mtx_intr;				\
15471352Sjasone} while (0)
15571352Sjasone#endif
15671352Sjasone
15771352Sjasone#ifndef _getlock_norecurse
15871352Sjasone/*
15971352Sjasone * Get a lock without any recursion handling. Calls the hard enter function if
16071352Sjasone * we can't get it inline.
16171352Sjasone */
16271352Sjasone#define	_getlock_norecurse(mp, tid, type) do {				\
16371352Sjasone	if (!_obtain_lock(mp, tid))					\
16471352Sjasone		mtx_enter_hard((mp), (type) & MTX_HARDOPTS, 0);		\
16571352Sjasone} while (0)
16671352Sjasone#endif
16771352Sjasone
16871352Sjasone#ifndef _exitlock_norecurse
16971352Sjasone/*
17071352Sjasone * Release a sleep lock assuming we haven't recursed on it, recursion is handled
17171352Sjasone * in the hard function.
17271352Sjasone */
17371352Sjasone#define	_exitlock_norecurse(mp, tid, type) do {				\
17471352Sjasone	if (!_release_lock(mp, tid))					\
17571352Sjasone		mtx_exit_hard((mp), (type) & MTX_HARDOPTS);		\
17671352Sjasone} while (0)
17771352Sjasone#endif
17871352Sjasone
17971352Sjasone#ifndef _exitlock
18071352Sjasone/*
18171352Sjasone * Release a sleep lock when its likely we recursed (the code to
18271352Sjasone * deal with simple recursion is inline).
18371352Sjasone */
18471352Sjasone#define	_exitlock(mp, tid, type) do {					\
18571352Sjasone	if (!_release_lock(mp, tid)) {					\
18671352Sjasone		if ((mp)->mtx_lock & MTX_RECURSED) {			\
18771352Sjasone			if (--((mp)->mtx_recurse) == 0)			\
18871352Sjasone				atomic_clear_ptr(&(mp)->mtx_lock,	\
18971352Sjasone				    MTX_RECURSED);			\
19071352Sjasone		} else {						\
19171352Sjasone			mtx_exit_hard((mp), (type) & MTX_HARDOPTS);	\
19271352Sjasone		}							\
19371352Sjasone	}								\
19471352Sjasone} while (0)
19571352Sjasone#endif
19671352Sjasone
19771352Sjasone#ifndef _exitlock_spin
19871352Sjasone/* Release a spin lock (with possible recursion). */
19971352Sjasone#define	_exitlock_spin(mp) do {						\
20071352Sjasone	if (!mtx_recursed((mp))) {					\
20171352Sjasone		int _mtx_intr = (mp)->mtx_saveintr;			\
20271352Sjasone									\
20371352Sjasone		_release_lock_quick(mp);				\
20471352Sjasone		restore_intr(_mtx_intr);				\
20571352Sjasone	} else {							\
20671352Sjasone		(mp)->mtx_recurse--;					\
20771352Sjasone	}								\
20871352Sjasone} while (0)
20971352Sjasone#endif
21071352Sjasone
21171352Sjasone#ifdef WITNESS
21271352Sjasonestatic void	witness_init(struct mtx *, int flag);
21371352Sjasonestatic void	witness_destroy(struct mtx *);
21471352Sjasonestatic void	witness_display(void(*)(const char *fmt, ...));
21571352Sjasone
21667352Sjhb/* All mutexes in system (used for debug/panic) */
21771560Sjhbstatic struct mtx_debug all_mtx_debug = { NULL, {NULL, NULL}, NULL, 0 };
21871320Sjasone/*
21971320Sjasone * Set to 0 once mutexes have been fully initialized so that witness code can be
22071320Sjasone * safely executed.
22171320Sjasone */
22271320Sjasonestatic int witness_cold = 1;
22369429Sjhb#else	/* WITNESS */
22471352Sjasone
22571352Sjasone/*
22671352Sjasone * flag++ is slezoid way of shutting up unused parameter warning
22771352Sjasone * in mtx_init()
22871352Sjasone */
22971352Sjasone#define witness_init(m, flag) flag++
23071352Sjasone#define witness_destroy(m)
23171352Sjasone#define witness_try_enter(m, t, f, l)
23269429Sjhb#endif	/* WITNESS */
23367352Sjhb
23471560Sjhb/* All mutexes in system (used for debug/panic) */
23571560Sjhbstatic struct mtx all_mtx = { MTX_UNOWNED, 0, 0, 0, "All mutexes queue head",
23671560Sjhb	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
23771560Sjhb	{ NULL, NULL }, &all_mtx, &all_mtx,
23871560Sjhb#ifdef WITNESS
23971560Sjhb	&all_mtx_debug
24071560Sjhb#else
24171560Sjhb	NULL
24271560Sjhb#endif
24371560Sjhb	 };
24471560Sjhb
24567352Sjhbstatic int	mtx_cur_cnt;
24667352Sjhbstatic int	mtx_max_cnt;
24767352Sjhb
24871352Sjasonestatic void	propagate_priority(struct proc *);
24971352Sjasonestatic void	mtx_enter_hard(struct mtx *, int type, int saveintr);
25071352Sjasonestatic void	mtx_exit_hard(struct mtx *, int type);
25167352Sjhb
25267352Sjhb#define	mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
25367352Sjhb#define	mtx_owner(m)	(mtx_unowned(m) ? NULL \
25467352Sjhb			    : (struct proc *)((m)->mtx_lock & MTX_FLAGMASK))
25567352Sjhb
25667352Sjhb#define RETIP(x)		*(((uintptr_t *)(&x)) - 1)
25767352Sjhb#define	SET_PRIO(p, pri)	(p)->p_priority = (pri)
25867352Sjhb
25967352Sjhbstatic void
26067352Sjhbpropagate_priority(struct proc *p)
26167352Sjhb{
26267352Sjhb	int pri = p->p_priority;
26367352Sjhb	struct mtx *m = p->p_blocked;
26467352Sjhb
26569376Sjhb	mtx_assert(&sched_lock, MA_OWNED);
26667352Sjhb	for (;;) {
26767352Sjhb		struct proc *p1;
26867352Sjhb
26967352Sjhb		p = mtx_owner(m);
27067352Sjhb
27167352Sjhb		if (p == NULL) {
27267352Sjhb			/*
27367352Sjhb			 * This really isn't quite right. Really
27467352Sjhb			 * ought to bump priority of process that
27567352Sjhb			 * next acquires the mutex.
27667352Sjhb			 */
27767352Sjhb			MPASS(m->mtx_lock == MTX_CONTESTED);
27867352Sjhb			return;
27967352Sjhb		}
28067352Sjhb		MPASS(p->p_magic == P_MAGIC);
28169376Sjhb		KASSERT(p->p_stat != SSLEEP, ("sleeping process owns a mutex"));
28267352Sjhb		if (p->p_priority <= pri)
28367352Sjhb			return;
28469376Sjhb
28567352Sjhb		/*
28669376Sjhb		 * Bump this process' priority.
28769376Sjhb		 */
28869376Sjhb		SET_PRIO(p, pri);
28969376Sjhb
29069376Sjhb		/*
29167352Sjhb		 * If lock holder is actually running, just bump priority.
29267352Sjhb		 */
29369376Sjhb#ifdef SMP
29469376Sjhb		/*
29569376Sjhb		 * For SMP, we can check the p_oncpu field to see if we are
29669376Sjhb		 * running.
29769376Sjhb		 */
29869376Sjhb		if (p->p_oncpu != 0xff) {
29967352Sjhb			MPASS(p->p_stat == SRUN || p->p_stat == SZOMB);
30067352Sjhb			return;
30167352Sjhb		}
30269376Sjhb#else
30367352Sjhb		/*
30469376Sjhb		 * For UP, we check to see if p is curproc (this shouldn't
30569376Sjhb		 * ever happen however as it would mean we are in a deadlock.)
30669376Sjhb		 */
30769376Sjhb		if (p == curproc) {
30869376Sjhb			panic("Deadlock detected");
30969376Sjhb			return;
31069376Sjhb		}
31169376Sjhb#endif
31269376Sjhb		/*
31367352Sjhb		 * If on run queue move to new run queue, and
31467352Sjhb		 * quit.
31567352Sjhb		 */
31667352Sjhb		if (p->p_stat == SRUN) {
31769376Sjhb			printf("XXX: moving process %d(%s) to a new run queue\n",
31869376Sjhb			       p->p_pid, p->p_comm);
31967352Sjhb			MPASS(p->p_blocked == NULL);
32067352Sjhb			remrunqueue(p);
32167352Sjhb			setrunqueue(p);
32267352Sjhb			return;
32367352Sjhb		}
32467352Sjhb
32567352Sjhb		/*
32669376Sjhb		 * If we aren't blocked on a mutex, we should be.
32767352Sjhb		 */
32869376Sjhb		KASSERT(p->p_stat == SMTX, (
32969376Sjhb		    "process %d(%s):%d holds %s but isn't blocked on a mutex\n",
33069376Sjhb		    p->p_pid, p->p_comm, p->p_stat,
33169376Sjhb		    m->mtx_description));
33267352Sjhb
33367352Sjhb		/*
33467352Sjhb		 * Pick up the mutex that p is blocked on.
33567352Sjhb		 */
33667352Sjhb		m = p->p_blocked;
33767352Sjhb		MPASS(m != NULL);
33867352Sjhb
33967352Sjhb		printf("XXX: process %d(%s) is blocked on %s\n", p->p_pid,
34067352Sjhb		    p->p_comm, m->mtx_description);
34167352Sjhb		/*
34267352Sjhb		 * Check if the proc needs to be moved up on
34367352Sjhb		 * the blocked chain
34467352Sjhb		 */
34569376Sjhb		if (p == TAILQ_FIRST(&m->mtx_blocked)) {
34669376Sjhb			printf("XXX: process at head of run queue\n");
34769376Sjhb			continue;
34869376Sjhb		}
34969376Sjhb		p1 = TAILQ_PREV(p, rq, p_procq);
35069376Sjhb		if (p1->p_priority <= pri) {
35169376Sjhb			printf(
35267352Sjhb	"XXX: previous process %d(%s) has higher priority\n",
35369376Sjhb	                    p->p_pid, p->p_comm);
35467352Sjhb			continue;
35567352Sjhb		}
35667352Sjhb
35767352Sjhb		/*
35869376Sjhb		 * Remove proc from blocked chain and determine where
35969376Sjhb		 * it should be moved up to.  Since we know that p1 has
36069376Sjhb		 * a lower priority than p, we know that at least one
36169376Sjhb		 * process in the chain has a lower priority and that
36269376Sjhb		 * p1 will thus not be NULL after the loop.
36367352Sjhb		 */
36467352Sjhb		TAILQ_REMOVE(&m->mtx_blocked, p, p_procq);
36567352Sjhb		TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) {
36667352Sjhb			MPASS(p1->p_magic == P_MAGIC);
36767352Sjhb			if (p1->p_priority > pri)
36867352Sjhb				break;
36967352Sjhb		}
37069376Sjhb		MPASS(p1 != NULL);
37169376Sjhb		TAILQ_INSERT_BEFORE(p1, p, p_procq);
37267352Sjhb		CTR4(KTR_LOCK,
37371560Sjhb		    "propagate_priority: p %p moved before %p on [%p] %s",
37467352Sjhb		    p, p1, m, m->mtx_description);
37567352Sjhb	}
37667352Sjhb}
37767352Sjhb
37871352Sjasone/*
37971352Sjasone * Get lock 'm', the macro handles the easy (and most common cases) and leaves
38071352Sjasone * the slow stuff to the mtx_enter_hard() function.
38171352Sjasone *
38271352Sjasone * Note: since type is usually a constant much of this code is optimized out.
38371352Sjasone */
38467352Sjhbvoid
38571352Sjasone_mtx_enter(struct mtx *mtxp, int type, const char *file, int line)
38671352Sjasone{
38771352Sjasone	struct mtx	*mpp = mtxp;
38871352Sjasone
38971352Sjasone	/* bits only valid on mtx_exit() */
39071352Sjasone	MPASS4(((type) & (MTX_NORECURSE | MTX_NOSWITCH)) == 0,
39171352Sjasone	    STR_mtx_bad_type, file, line);
39271352Sjasone
39371352Sjasone	if ((type) & MTX_SPIN) {
39471352Sjasone		/*
39571352Sjasone		 * Easy cases of spin locks:
39671352Sjasone		 *
39771352Sjasone		 * 1) We already own the lock and will simply recurse on it (if
39871352Sjasone		 *    RLIKELY)
39971352Sjasone		 *
40071352Sjasone		 * 2) The lock is free, we just get it
40171352Sjasone		 */
40271352Sjasone		if ((type) & MTX_RLIKELY) {
40371352Sjasone			/*
40471352Sjasone			 * Check for recursion, if we already have this
40571352Sjasone			 * lock we just bump the recursion count.
40671352Sjasone			 */
40771352Sjasone			if (mpp->mtx_lock == (uintptr_t)CURTHD) {
40871352Sjasone				mpp->mtx_recurse++;
40971352Sjasone				goto done;
41071352Sjasone			}
41171352Sjasone		}
41271352Sjasone
41371352Sjasone		if (((type) & MTX_TOPHALF) == 0) {
41471352Sjasone			/*
41571352Sjasone			 * If an interrupt thread uses this we must block
41671352Sjasone			 * interrupts here.
41771352Sjasone			 */
41871352Sjasone			if ((type) & MTX_FIRST) {
41971352Sjasone				ASS_IEN;
42071352Sjasone				disable_intr();
42171352Sjasone				_getlock_norecurse(mpp, CURTHD,
42271352Sjasone				    (type) & MTX_HARDOPTS);
42371352Sjasone			} else {
42471352Sjasone				_getlock_spin_block(mpp, CURTHD,
42571352Sjasone				    (type) & MTX_HARDOPTS);
42671352Sjasone			}
42771352Sjasone		} else
42871352Sjasone			_getlock_norecurse(mpp, CURTHD, (type) & MTX_HARDOPTS);
42971352Sjasone	} else {
43071352Sjasone		/* Sleep locks */
43171352Sjasone		if ((type) & MTX_RLIKELY)
43271352Sjasone			_getlock_sleep(mpp, CURTHD, (type) & MTX_HARDOPTS);
43371352Sjasone		else
43471352Sjasone			_getlock_norecurse(mpp, CURTHD, (type) & MTX_HARDOPTS);
43571352Sjasone	}
43671352Sjasonedone:
43771352Sjasone	WITNESS_ENTER(mpp, type, file, line);
43871352Sjasone	if (((type) & MTX_QUIET) == 0)
43971352Sjasone		CTR5(KTR_LOCK, STR_mtx_enter_fmt,
44071352Sjasone		    mpp->mtx_description, mpp, mpp->mtx_recurse, file, line);
44171352Sjasone
44271352Sjasone}
44371352Sjasone
44471352Sjasone/*
44571352Sjasone * Attempt to get MTX_DEF lock, return non-zero if lock acquired.
44671352Sjasone *
44771352Sjasone * XXX DOES NOT HANDLE RECURSION
44871352Sjasone */
44971352Sjasoneint
45071352Sjasone_mtx_try_enter(struct mtx *mtxp, int type, const char *file, int line)
45171352Sjasone{
45271352Sjasone	struct mtx	*const mpp = mtxp;
45371352Sjasone	int	rval;
45471352Sjasone
45571352Sjasone	rval = _obtain_lock(mpp, CURTHD);
45671352Sjasone#ifdef WITNESS
45771352Sjasone	if (rval && mpp->mtx_witness != NULL) {
45871352Sjasone		MPASS(mpp->mtx_recurse == 0);
45971352Sjasone		witness_try_enter(mpp, type, file, line);
46071352Sjasone	}
46171352Sjasone#endif	/* WITNESS */
46271352Sjasone	if (((type) & MTX_QUIET) == 0)
46371352Sjasone		CTR5(KTR_LOCK, STR_mtx_try_enter_fmt,
46471352Sjasone		    mpp->mtx_description, mpp, rval, file, line);
46571352Sjasone
46671352Sjasone	return rval;
46771352Sjasone}
46871352Sjasone
46971352Sjasone/*
47071352Sjasone * Release lock m.
47171352Sjasone */
47271352Sjasonevoid
47371352Sjasone_mtx_exit(struct mtx *mtxp, int type, const char *file, int line)
47471352Sjasone{
47571352Sjasone	struct mtx	*const mpp = mtxp;
47671352Sjasone
47771352Sjasone	MPASS4(mtx_owned(mpp), STR_mtx_owned, file, line);
47871352Sjasone	WITNESS_EXIT(mpp, type, file, line);
47971352Sjasone	if (((type) & MTX_QUIET) == 0)
48071352Sjasone		CTR5(KTR_LOCK, STR_mtx_exit_fmt,
48171352Sjasone		    mpp->mtx_description, mpp, mpp->mtx_recurse, file, line);
48271352Sjasone	if ((type) & MTX_SPIN) {
48371352Sjasone		if ((type) & MTX_NORECURSE) {
48471352Sjasone			int mtx_intr = mpp->mtx_saveintr;
48571352Sjasone
48671352Sjasone			MPASS4(mpp->mtx_recurse == 0, STR_mtx_recurse,
48771352Sjasone			    file, line);
48871352Sjasone			_release_lock_quick(mpp);
48971352Sjasone			if (((type) & MTX_TOPHALF) == 0) {
49071352Sjasone				if ((type) & MTX_FIRST) {
49171352Sjasone					ASS_IDIS;
49271352Sjasone					enable_intr();
49371352Sjasone				} else
49471352Sjasone					restore_intr(mtx_intr);
49571352Sjasone			}
49671352Sjasone		} else {
49771352Sjasone			if (((type & MTX_TOPHALF) == 0) &&
49871352Sjasone			    (type & MTX_FIRST)) {
49971352Sjasone				ASS_IDIS;
50071352Sjasone				ASS_SIEN(mpp);
50171352Sjasone			}
50271352Sjasone			_exitlock_spin(mpp);
50371352Sjasone		}
50471352Sjasone	} else {
50571352Sjasone		/* Handle sleep locks */
50671352Sjasone		if ((type) & MTX_RLIKELY)
50771352Sjasone			_exitlock(mpp, CURTHD, (type) & MTX_HARDOPTS);
50871352Sjasone		else {
50971352Sjasone			_exitlock_norecurse(mpp, CURTHD,
51071352Sjasone			    (type) & MTX_HARDOPTS);
51171352Sjasone		}
51271352Sjasone	}
51371352Sjasone}
51471352Sjasone
51571352Sjasonevoid
51667352Sjhbmtx_enter_hard(struct mtx *m, int type, int saveintr)
51767352Sjhb{
51867352Sjhb	struct proc *p = CURPROC;
51967352Sjhb
52067352Sjhb	KASSERT(p != NULL, ("curproc is NULL in mutex"));
52167352Sjhb
52267352Sjhb	switch (type) {
52367352Sjhb	case MTX_DEF:
52467352Sjhb		if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)p) {
52567352Sjhb			m->mtx_recurse++;
52671228Sbmilekic			atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
52769998Sjhb			if ((type & MTX_QUIET) == 0)
52871560Sjhb				CTR1(KTR_LOCK, "mtx_enter: %p recurse", m);
52967352Sjhb			return;
53067352Sjhb		}
53169998Sjhb		if ((type & MTX_QUIET) == 0)
53269998Sjhb			CTR3(KTR_LOCK,
53371560Sjhb			    "mtx_enter: %p contested (lock=%p) [%p]",
53469998Sjhb			    m, (void *)m->mtx_lock, (void *)RETIP(m));
53569376Sjhb
53669376Sjhb		/*
53769376Sjhb		 * Save our priority.  Even though p_nativepri is protected
53869376Sjhb		 * by sched_lock, we don't obtain it here as it can be
53969376Sjhb		 * expensive.  Since this is the only place p_nativepri is
54069376Sjhb		 * set, and since two CPUs will not be executing the same
54169376Sjhb		 * process concurrently, we know that no other CPU is going
54269376Sjhb		 * to be messing with this.  Also, p_nativepri is only read
54369376Sjhb		 * when we are blocked on a mutex, so that can't be happening
54469376Sjhb		 * right now either.
54569376Sjhb		 */
54669376Sjhb		p->p_nativepri = p->p_priority;
54767352Sjhb		while (!_obtain_lock(m, p)) {
54867396Sjhb			uintptr_t v;
54967352Sjhb			struct proc *p1;
55067352Sjhb
55167352Sjhb			mtx_enter(&sched_lock, MTX_SPIN | MTX_RLIKELY);
55267352Sjhb			/*
55367352Sjhb			 * check if the lock has been released while
55467352Sjhb			 * waiting for the schedlock.
55567352Sjhb			 */
55667352Sjhb			if ((v = m->mtx_lock) == MTX_UNOWNED) {
55767352Sjhb				mtx_exit(&sched_lock, MTX_SPIN);
55867352Sjhb				continue;
55967352Sjhb			}
56067352Sjhb			/*
56167352Sjhb			 * The mutex was marked contested on release. This
56267352Sjhb			 * means that there are processes blocked on it.
56367352Sjhb			 */
56467352Sjhb			if (v == MTX_CONTESTED) {
56567352Sjhb				p1 = TAILQ_FIRST(&m->mtx_blocked);
56667352Sjhb				KASSERT(p1 != NULL, ("contested mutex has no contesters"));
56767352Sjhb				KASSERT(p != NULL, ("curproc is NULL for contested mutex"));
56867352Sjhb				m->mtx_lock = (uintptr_t)p | MTX_CONTESTED;
56967352Sjhb				if (p1->p_priority < p->p_priority) {
57067352Sjhb					SET_PRIO(p, p1->p_priority);
57167352Sjhb				}
57267352Sjhb				mtx_exit(&sched_lock, MTX_SPIN);
57367352Sjhb				return;
57467352Sjhb			}
57567352Sjhb			/*
57667352Sjhb			 * If the mutex isn't already contested and
57767352Sjhb			 * a failure occurs setting the contested bit the
57867352Sjhb			 * mutex was either release or the
57967352Sjhb			 * state of the RECURSION bit changed.
58067352Sjhb			 */
58167352Sjhb			if ((v & MTX_CONTESTED) == 0 &&
58267352Sjhb			    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
58367352Sjhb				               (void *)(v | MTX_CONTESTED))) {
58467352Sjhb				mtx_exit(&sched_lock, MTX_SPIN);
58567352Sjhb				continue;
58667352Sjhb			}
58767352Sjhb
58867352Sjhb			/* We definitely have to sleep for this lock */
58967352Sjhb			mtx_assert(m, MA_NOTOWNED);
59067352Sjhb
59167352Sjhb#ifdef notyet
59267352Sjhb			/*
59367352Sjhb			 * If we're borrowing an interrupted thread's VM
59467352Sjhb			 * context must clean up before going to sleep.
59567352Sjhb			 */
59667352Sjhb			if (p->p_flag & (P_ITHD | P_SITHD)) {
59767352Sjhb				ithd_t *it = (ithd_t *)p;
59867352Sjhb
59967352Sjhb				if (it->it_interrupted) {
60069998Sjhb					if ((type & MTX_QUIET) == 0)
60169998Sjhb						CTR2(KTR_LOCK,
60267352Sjhb					    "mtx_enter: 0x%x interrupted 0x%x",
60369998Sjhb						    it, it->it_interrupted);
60467352Sjhb					intr_thd_fixup(it);
60567352Sjhb				}
60667352Sjhb			}
60767352Sjhb#endif
60867352Sjhb
60967352Sjhb			/* Put us on the list of procs blocked on this mutex */
61067352Sjhb			if (TAILQ_EMPTY(&m->mtx_blocked)) {
61167352Sjhb				p1 = (struct proc *)(m->mtx_lock &
61267352Sjhb						     MTX_FLAGMASK);
61367352Sjhb				LIST_INSERT_HEAD(&p1->p_contested, m,
61467352Sjhb						 mtx_contested);
61567352Sjhb				TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
61667352Sjhb			} else {
61767352Sjhb				TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq)
61867352Sjhb					if (p1->p_priority > p->p_priority)
61967352Sjhb						break;
62067352Sjhb				if (p1)
62167352Sjhb					TAILQ_INSERT_BEFORE(p1, p, p_procq);
62267352Sjhb				else
62367352Sjhb					TAILQ_INSERT_TAIL(&m->mtx_blocked, p,
62467352Sjhb							  p_procq);
62567352Sjhb			}
62667352Sjhb
62767352Sjhb			p->p_blocked = m;	/* Who we're blocked on */
62869369Sjhb			p->p_mtxname = m->mtx_description;
62967352Sjhb			p->p_stat = SMTX;
63067352Sjhb#if 0
63167352Sjhb			propagate_priority(p);
63267352Sjhb#endif
63369998Sjhb			if ((type & MTX_QUIET) == 0)
63469998Sjhb				CTR3(KTR_LOCK,
63571560Sjhb				    "mtx_enter: p %p blocked on [%p] %s",
63669998Sjhb				    p, m, m->mtx_description);
63768808Sjhb			mi_switch();
63869998Sjhb			if ((type & MTX_QUIET) == 0)
63969998Sjhb				CTR3(KTR_LOCK,
64071560Sjhb			    "mtx_enter: p %p free from blocked on [%p] %s",
64169998Sjhb				    p, m, m->mtx_description);
64267352Sjhb			mtx_exit(&sched_lock, MTX_SPIN);
64367352Sjhb		}
64467352Sjhb		return;
64567352Sjhb	case MTX_SPIN:
64667352Sjhb	case MTX_SPIN | MTX_FIRST:
64767352Sjhb	case MTX_SPIN | MTX_TOPHALF:
64867352Sjhb	    {
64967352Sjhb		int i = 0;
65067352Sjhb
65167352Sjhb		if (m->mtx_lock == (uintptr_t)p) {
65267352Sjhb			m->mtx_recurse++;
65367352Sjhb			return;
65467352Sjhb		}
65569998Sjhb		if ((type & MTX_QUIET) == 0)
65669998Sjhb			CTR1(KTR_LOCK, "mtx_enter: %p spinning", m);
65767352Sjhb		for (;;) {
65867352Sjhb			if (_obtain_lock(m, p))
65967352Sjhb				break;
66067352Sjhb			while (m->mtx_lock != MTX_UNOWNED) {
66167352Sjhb				if (i++ < 1000000)
66267352Sjhb					continue;
66367352Sjhb				if (i++ < 6000000)
66467352Sjhb					DELAY (1);
66567352Sjhb#ifdef DDB
66667352Sjhb				else if (!db_active)
66767352Sjhb#else
66867352Sjhb				else
66967352Sjhb#endif
67067352Sjhb					panic(
67171560Sjhb				"spin lock %s held by %p for > 5 seconds",
67267352Sjhb					    m->mtx_description,
67367352Sjhb					    (void *)m->mtx_lock);
67467352Sjhb			}
67567352Sjhb		}
67667352Sjhb
67767352Sjhb#ifdef MUTEX_DEBUG
67867352Sjhb		if (type != MTX_SPIN)
67967352Sjhb			m->mtx_saveintr = 0xbeefface;
68067352Sjhb		else
68167352Sjhb#endif
68267352Sjhb			m->mtx_saveintr = saveintr;
68369998Sjhb		if ((type & MTX_QUIET) == 0)
68471560Sjhb			CTR1(KTR_LOCK, "mtx_enter: %p spin done", m);
68567352Sjhb		return;
68667352Sjhb	    }
68767352Sjhb	}
68867352Sjhb}
68967352Sjhb
69067352Sjhbvoid
69167352Sjhbmtx_exit_hard(struct mtx *m, int type)
69267352Sjhb{
69367352Sjhb	struct proc *p, *p1;
69467352Sjhb	struct mtx *m1;
69567352Sjhb	int pri;
69667352Sjhb
69767352Sjhb	p = CURPROC;
69867352Sjhb	switch (type) {
69967352Sjhb	case MTX_DEF:
70067352Sjhb	case MTX_DEF | MTX_NOSWITCH:
70171228Sbmilekic		if (mtx_recursed(m)) {
70267352Sjhb			if (--(m->mtx_recurse) == 0)
70371228Sbmilekic				atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
70469998Sjhb			if ((type & MTX_QUIET) == 0)
70571560Sjhb				CTR1(KTR_LOCK, "mtx_exit: %p unrecurse", m);
70667352Sjhb			return;
70767352Sjhb		}
70867352Sjhb		mtx_enter(&sched_lock, MTX_SPIN);
70969998Sjhb		if ((type & MTX_QUIET) == 0)
71071560Sjhb			CTR1(KTR_LOCK, "mtx_exit: %p contested", m);
71167352Sjhb		p1 = TAILQ_FIRST(&m->mtx_blocked);
71267352Sjhb		MPASS(p->p_magic == P_MAGIC);
71367352Sjhb		MPASS(p1->p_magic == P_MAGIC);
71467352Sjhb		TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq);
71567352Sjhb		if (TAILQ_EMPTY(&m->mtx_blocked)) {
71667352Sjhb			LIST_REMOVE(m, mtx_contested);
71767352Sjhb			_release_lock_quick(m);
71869998Sjhb			if ((type & MTX_QUIET) == 0)
71971560Sjhb				CTR1(KTR_LOCK, "mtx_exit: %p not held", m);
72067352Sjhb		} else
72169363Sjhb			atomic_store_rel_ptr(&m->mtx_lock,
72269363Sjhb			    (void *)MTX_CONTESTED);
72367352Sjhb		pri = MAXPRI;
72467352Sjhb		LIST_FOREACH(m1, &p->p_contested, mtx_contested) {
72567352Sjhb			int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_priority;
72667352Sjhb			if (cp < pri)
72767352Sjhb				pri = cp;
72867352Sjhb		}
72967352Sjhb		if (pri > p->p_nativepri)
73067352Sjhb			pri = p->p_nativepri;
73167352Sjhb		SET_PRIO(p, pri);
73269998Sjhb		if ((type & MTX_QUIET) == 0)
73369998Sjhb			CTR2(KTR_LOCK,
73471560Sjhb			    "mtx_exit: %p contested setrunqueue %p", m, p1);
73567352Sjhb		p1->p_blocked = NULL;
73669369Sjhb		p1->p_mtxname = NULL;
73767352Sjhb		p1->p_stat = SRUN;
73867352Sjhb		setrunqueue(p1);
73967352Sjhb		if ((type & MTX_NOSWITCH) == 0 && p1->p_priority < pri) {
74067352Sjhb#ifdef notyet
74167352Sjhb			if (p->p_flag & (P_ITHD | P_SITHD)) {
74267352Sjhb				ithd_t *it = (ithd_t *)p;
74367352Sjhb
74467352Sjhb				if (it->it_interrupted) {
74569998Sjhb					if ((type & MTX_QUIET) == 0)
74669998Sjhb						CTR2(KTR_LOCK,
74767352Sjhb					    "mtx_exit: 0x%x interruped 0x%x",
74869998Sjhb						    it, it->it_interrupted);
74967352Sjhb					intr_thd_fixup(it);
75067352Sjhb				}
75167352Sjhb			}
75267352Sjhb#endif
75367352Sjhb			setrunqueue(p);
75469998Sjhb			if ((type & MTX_QUIET) == 0)
75569998Sjhb				CTR2(KTR_LOCK,
75671560Sjhb				    "mtx_exit: %p switching out lock=%p",
75769998Sjhb				    m, (void *)m->mtx_lock);
75867352Sjhb			mi_switch();
75969998Sjhb			if ((type & MTX_QUIET) == 0)
76069998Sjhb				CTR2(KTR_LOCK,
76171560Sjhb				    "mtx_exit: %p resuming lock=%p",
76269998Sjhb				    m, (void *)m->mtx_lock);
76367352Sjhb		}
76467352Sjhb		mtx_exit(&sched_lock, MTX_SPIN);
76567352Sjhb		break;
76667352Sjhb	case MTX_SPIN:
76767352Sjhb	case MTX_SPIN | MTX_FIRST:
76871228Sbmilekic		if (mtx_recursed(m)) {
76967352Sjhb			m->mtx_recurse--;
77067352Sjhb			return;
77167352Sjhb		}
77267352Sjhb		MPASS(mtx_owned(m));
77367352Sjhb		_release_lock_quick(m);
77467352Sjhb		if (type & MTX_FIRST)
77567352Sjhb			enable_intr();	/* XXX is this kosher? */
77667352Sjhb		else {
77767352Sjhb			MPASS(m->mtx_saveintr != 0xbeefface);
77867352Sjhb			restore_intr(m->mtx_saveintr);
77967352Sjhb		}
78067352Sjhb		break;
78167352Sjhb	case MTX_SPIN | MTX_TOPHALF:
78271228Sbmilekic		if (mtx_recursed(m)) {
78367352Sjhb			m->mtx_recurse--;
78467352Sjhb			return;
78567352Sjhb		}
78667352Sjhb		MPASS(mtx_owned(m));
78767352Sjhb		_release_lock_quick(m);
78867352Sjhb		break;
78967352Sjhb	default:
79067352Sjhb		panic("mtx_exit_hard: unsupported type 0x%x\n", type);
79167352Sjhb	}
79267352Sjhb}
79367352Sjhb
79471352Sjasone#ifdef INVARIANTS
79571352Sjasonevoid
79671360Sjasone_mtx_assert(struct mtx *m, int what, const char *file, int line)
79771352Sjasone{
79871352Sjasone	switch ((what)) {
79971352Sjasone	case MA_OWNED:
80071352Sjasone	case MA_OWNED | MA_RECURSED:
80171352Sjasone	case MA_OWNED | MA_NOTRECURSED:
80271352Sjasone		if (!mtx_owned((m)))
80371352Sjasone			panic("mutex %s not owned at %s:%d",
80471360Sjasone			    (m)->mtx_description, file, line);
80571352Sjasone		if (mtx_recursed((m))) {
80671352Sjasone			if (((what) & MA_NOTRECURSED) != 0)
80771352Sjasone				panic("mutex %s recursed at %s:%d",
80871360Sjasone				    (m)->mtx_description, file, line);
80971352Sjasone		} else if (((what) & MA_RECURSED) != 0) {
81071352Sjasone			panic("mutex %s unrecursed at %s:%d",
81171360Sjasone			    (m)->mtx_description, file, line);
81271352Sjasone		}
81371352Sjasone		break;
81471352Sjasone	case MA_NOTOWNED:
81571352Sjasone		if (mtx_owned((m)))
81671352Sjasone			panic("mutex %s owned at %s:%d",
81771360Sjasone			    (m)->mtx_description, file, line);
81871352Sjasone		break;
81971352Sjasone	default:
82071360Sjasone		panic("unknown mtx_assert at %s:%d", file, line);
82171352Sjasone	}
82271352Sjasone}
82371352Sjasone#endif
82471352Sjasone
82567352Sjhb#define MV_DESTROY	0	/* validate before destory */
82667352Sjhb#define MV_INIT		1	/* validate before init */
82767352Sjhb
82867352Sjhb#ifdef MUTEX_DEBUG
82967352Sjhb
83067352Sjhbint mtx_validate __P((struct mtx *, int));
83167352Sjhb
83267352Sjhbint
83367352Sjhbmtx_validate(struct mtx *m, int when)
83467352Sjhb{
83567352Sjhb	struct mtx *mp;
83667352Sjhb	int i;
83767352Sjhb	int retval = 0;
83867352Sjhb
83971320Sjasone#ifdef WITNESS
84071320Sjasone	if (witness_cold)
84171320Sjasone		return 0;
84271320Sjasone#endif
84367352Sjhb	if (m == &all_mtx || cold)
84467352Sjhb		return 0;
84567352Sjhb
84667352Sjhb	mtx_enter(&all_mtx, MTX_DEF);
84767352Sjhb/*
84867352Sjhb * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
84967352Sjhb * we can re-enable the kernacc() checks.
85067352Sjhb */
85167352Sjhb#ifndef __alpha__
85267352Sjhb	MPASS(kernacc((caddr_t)all_mtx.mtx_next, sizeof(uintptr_t),
85367352Sjhb	    VM_PROT_READ) == 1);
85467352Sjhb#endif
85567352Sjhb	MPASS(all_mtx.mtx_next->mtx_prev == &all_mtx);
85667352Sjhb	for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
85767352Sjhb#ifndef __alpha__
85867352Sjhb		if (kernacc((caddr_t)mp->mtx_next, sizeof(uintptr_t),
85967352Sjhb		    VM_PROT_READ) != 1) {
86067352Sjhb			panic("mtx_validate: mp=%p mp->mtx_next=%p",
86167352Sjhb			    mp, mp->mtx_next);
86267352Sjhb		}
86367352Sjhb#endif
86467352Sjhb		i++;
86567352Sjhb		if (i > mtx_cur_cnt) {
86667352Sjhb			panic("mtx_validate: too many in chain, known=%d\n",
86767352Sjhb			    mtx_cur_cnt);
86867352Sjhb		}
86967352Sjhb	}
87067352Sjhb	MPASS(i == mtx_cur_cnt);
87167352Sjhb	switch (when) {
87267352Sjhb	case MV_DESTROY:
87367352Sjhb		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
87467352Sjhb			if (mp == m)
87567352Sjhb				break;
87667352Sjhb		MPASS(mp == m);
87767352Sjhb		break;
87867352Sjhb	case MV_INIT:
87967352Sjhb		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
88067352Sjhb		if (mp == m) {
88167352Sjhb			/*
88267352Sjhb			 * Not good. This mutex already exists.
88367352Sjhb			 */
88467352Sjhb			printf("re-initing existing mutex %s\n",
88567352Sjhb			    m->mtx_description);
88667352Sjhb			MPASS(m->mtx_lock == MTX_UNOWNED);
88767352Sjhb			retval = 1;
88867352Sjhb		}
88967352Sjhb	}
89067352Sjhb	mtx_exit(&all_mtx, MTX_DEF);
89167352Sjhb	return (retval);
89267352Sjhb}
89367352Sjhb#endif
89467352Sjhb
89567352Sjhbvoid
89667352Sjhbmtx_init(struct mtx *m, const char *t, int flag)
89767352Sjhb{
89869998Sjhb	if ((flag & MTX_QUIET) == 0)
89971560Sjhb		CTR2(KTR_LOCK, "mtx_init %p (%s)", m, t);
90067352Sjhb#ifdef MUTEX_DEBUG
90167352Sjhb	if (mtx_validate(m, MV_INIT))	/* diagnostic and error correction */
90267352Sjhb		return;
90369429Sjhb#endif
90467352Sjhb
90567352Sjhb	bzero((void *)m, sizeof *m);
90667352Sjhb	TAILQ_INIT(&m->mtx_blocked);
90769429Sjhb#ifdef WITNESS
90871320Sjasone	if (!witness_cold) {
90971320Sjasone		/* XXX - should not use DEVBUF */
91071560Sjhb		m->mtx_debug = malloc(sizeof(struct mtx_debug),
91171320Sjasone		    M_DEVBUF, M_NOWAIT | M_ZERO);
91271560Sjhb		MPASS(m->mtx_debug != NULL);
91371320Sjasone	}
91471560Sjhb#endif
91571320Sjasone	m->mtx_description = t;
91671320Sjasone
91771320Sjasone	m->mtx_flags = flag;
91867352Sjhb	m->mtx_lock = MTX_UNOWNED;
91967352Sjhb	/* Put on all mutex queue */
92067352Sjhb	mtx_enter(&all_mtx, MTX_DEF);
92167352Sjhb	m->mtx_next = &all_mtx;
92267352Sjhb	m->mtx_prev = all_mtx.mtx_prev;
92367352Sjhb	m->mtx_prev->mtx_next = m;
92467352Sjhb	all_mtx.mtx_prev = m;
92567352Sjhb	if (++mtx_cur_cnt > mtx_max_cnt)
92667352Sjhb		mtx_max_cnt = mtx_cur_cnt;
92767352Sjhb	mtx_exit(&all_mtx, MTX_DEF);
92871320Sjasone#ifdef WITNESS
92971320Sjasone	if (!witness_cold)
93071320Sjasone		witness_init(m, flag);
93171320Sjasone#endif
93267352Sjhb}
93367352Sjhb
93467352Sjhbvoid
93567352Sjhbmtx_destroy(struct mtx *m)
93667352Sjhb{
93767352Sjhb
93871320Sjasone#ifdef WITNESS
93971320Sjasone	KASSERT(!witness_cold, ("%s: Cannot destroy while still cold\n",
94071320Sjasone	    __FUNCTION__));
94171320Sjasone#endif
94271560Sjhb	CTR2(KTR_LOCK, "mtx_destroy %p (%s)", m, m->mtx_description);
94367352Sjhb#ifdef MUTEX_DEBUG
94467352Sjhb	if (m->mtx_next == NULL)
94567352Sjhb		panic("mtx_destroy: %p (%s) already destroyed",
94667352Sjhb		    m, m->mtx_description);
94767352Sjhb
94867352Sjhb	if (!mtx_owned(m)) {
94967352Sjhb		MPASS(m->mtx_lock == MTX_UNOWNED);
95067352Sjhb	} else {
95171228Sbmilekic		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
95267352Sjhb	}
95367352Sjhb	mtx_validate(m, MV_DESTROY);		/* diagnostic */
95467352Sjhb#endif
95567352Sjhb
95667352Sjhb#ifdef WITNESS
95767352Sjhb	if (m->mtx_witness)
95867352Sjhb		witness_destroy(m);
95967352Sjhb#endif /* WITNESS */
96067352Sjhb
96167352Sjhb	/* Remove from the all mutex queue */
96267352Sjhb	mtx_enter(&all_mtx, MTX_DEF);
96367352Sjhb	m->mtx_next->mtx_prev = m->mtx_prev;
96467352Sjhb	m->mtx_prev->mtx_next = m->mtx_next;
96567352Sjhb#ifdef MUTEX_DEBUG
96667352Sjhb	m->mtx_next = m->mtx_prev = NULL;
96769429Sjhb#endif
96869429Sjhb#ifdef WITNESS
96971560Sjhb	free(m->mtx_debug, M_DEVBUF);
97071560Sjhb	m->mtx_debug = NULL;
97167352Sjhb#endif
97267352Sjhb	mtx_cur_cnt--;
97367352Sjhb	mtx_exit(&all_mtx, MTX_DEF);
97467352Sjhb}
97567352Sjhb
97671560Sjhb/*
97771560Sjhb * The non-inlined versions of the mtx_*() functions are always built (above),
97871560Sjhb * but the witness code depends on the WITNESS kernel option being specified.
97971560Sjhb */
98071560Sjhb
98171560Sjhb#ifdef WITNESS
98271320Sjasonestatic void
98371320Sjasonewitness_fixup(void *dummy __unused)
98471320Sjasone{
98571320Sjasone	struct mtx *mp;
98671320Sjasone
98771560Sjhb	/*
98871560Sjhb	 * We have to release Giant before initializing its witness
98971560Sjhb	 * structure so that WITNESS doesn't get confused.
99071560Sjhb	 */
99171560Sjhb	mtx_exit(&Giant, MTX_DEF);
99271560Sjhb	mtx_assert(&Giant, MA_NOTOWNED);
99371560Sjhb	mtx_enter(&all_mtx, MTX_DEF);
99471560Sjhb
99571320Sjasone	/* Iterate through all mutexes and finish up mutex initialization. */
99671320Sjasone	for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
99771320Sjasone
99871320Sjasone		/* XXX - should not use DEVBUF */
99971560Sjhb		mp->mtx_debug = malloc(sizeof(struct mtx_debug),
100071320Sjasone		    M_DEVBUF, M_NOWAIT | M_ZERO);
100171560Sjhb		MPASS(mp->mtx_debug != NULL);
100271320Sjasone
100371320Sjasone		witness_init(mp, mp->mtx_flags);
100471320Sjasone	}
100571560Sjhb	mtx_exit(&all_mtx, MTX_DEF);
100671320Sjasone
100771320Sjasone	/* Mark the witness code as being ready for use. */
100871320Sjasone	atomic_store_rel_int(&witness_cold, 0);
100971560Sjhb
101071560Sjhb	mtx_enter(&Giant, MTX_DEF);
101171320Sjasone}
101271320SjasoneSYSINIT(wtnsfxup, SI_SUB_MUTEX, SI_ORDER_FIRST, witness_fixup, NULL)
101371320Sjasone
101465557Sjasone#define WITNESS_COUNT 200
101565557Sjasone#define	WITNESS_NCHILDREN 2
101665557Sjasone
101767401Sjhbint witness_watch = 1;
101865557Sjasone
101965856Sjhbstruct witness {
102065557Sjasone	struct witness	*w_next;
102167404Sjhb	const char	*w_description;
102265624Sjasone	const char	*w_file;
102365557Sjasone	int		 w_line;
102465557Sjasone	struct witness	*w_morechildren;
102565557Sjasone	u_char		 w_childcnt;
102665557Sjasone	u_char		 w_Giant_squawked:1;
102765557Sjasone	u_char		 w_other_squawked:1;
102865557Sjasone	u_char		 w_same_squawked:1;
102971228Sbmilekic	u_char		 w_spin:1;	/* MTX_SPIN type mutex. */
103065557Sjasone	u_int		 w_level;
103165557Sjasone	struct witness	*w_children[WITNESS_NCHILDREN];
103265856Sjhb};
103365557Sjasone
103465856Sjhbstruct witness_blessed {
103565557Sjasone	char 	*b_lock1;
103665557Sjasone	char	*b_lock2;
103765856Sjhb};
103865557Sjasone
103967676Sjhb#ifdef DDB
104065557Sjasone/*
104167676Sjhb * When DDB is enabled and witness_ddb is set to 1, it will cause the system to
104265557Sjasone * drop into kdebug() when:
104365557Sjasone *	- a lock heirarchy violation occurs
104465557Sjasone *	- locks are held when going to sleep.
104565557Sjasone */
104671560Sjhbint	witness_ddb;
104767676Sjhb#ifdef WITNESS_DDB
104871560SjhbTUNABLE_INT_DECL("debug.witness_ddb", 1, witness_ddb);
104967676Sjhb#else
105071560SjhbTUNABLE_INT_DECL("debug.witness_ddb", 0, witness_ddb);
105165557Sjasone#endif
105267676SjhbSYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, "");
105367676Sjhb#endif /* DDB */
105465557Sjasone
105571560Sjhbint	witness_skipspin;
105667676Sjhb#ifdef WITNESS_SKIPSPIN
105771560SjhbTUNABLE_INT_DECL("debug.witness_skipspin", 1, witness_skipspin);
105867676Sjhb#else
105971560SjhbTUNABLE_INT_DECL("debug.witness_skipspin", 0, witness_skipspin);
106065557Sjasone#endif
106167676SjhbSYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RD, &witness_skipspin, 0,
106267676Sjhb    "");
106365557Sjasone
106471320Sjasonestatic struct mtx	w_mtx;
106565856Sjhbstatic struct witness	*w_free;
106665856Sjhbstatic struct witness	*w_all;
106765856Sjhbstatic int		 w_inited;
106865856Sjhbstatic int		 witness_dead;	/* fatal error, probably no memory */
106965557Sjasone
107065856Sjhbstatic struct witness	 w_data[WITNESS_COUNT];
107165557Sjasone
107267404Sjhbstatic struct witness	 *enroll __P((const char *description, int flag));
107365856Sjhbstatic int itismychild __P((struct witness *parent, struct witness *child));
107465856Sjhbstatic void removechild __P((struct witness *parent, struct witness *child));
107565856Sjhbstatic int isitmychild __P((struct witness *parent, struct witness *child));
107665856Sjhbstatic int isitmydescendant __P((struct witness *parent, struct witness *child));
107765856Sjhbstatic int dup_ok __P((struct witness *));
107865856Sjhbstatic int blessed __P((struct witness *, struct witness *));
107965557Sjasonestatic void witness_displaydescendants
108065856Sjhb    __P((void(*)(const char *fmt, ...), struct witness *));
108165856Sjhbstatic void witness_leveldescendents __P((struct witness *parent, int level));
108265557Sjasonestatic void witness_levelall __P((void));
108365856Sjhbstatic struct witness * witness_get __P((void));
108465856Sjhbstatic void witness_free __P((struct witness *m));
108565557Sjasone
108665557Sjasone
108765557Sjasonestatic char *ignore_list[] = {
108865557Sjasone	"witness lock",
108965557Sjasone	NULL
109065557Sjasone};
109165557Sjasone
109265557Sjasonestatic char *spin_order_list[] = {
109369362Sjhb	"sio",
109465557Sjasone	"sched lock",
109568808Sjhb#ifdef __i386__
109667676Sjhb	"clk",
109768808Sjhb#endif
109868889Sjake	"callout",
109965557Sjasone	/*
110065557Sjasone	 * leaf locks
110165557Sjasone	 */
110265557Sjasone	NULL
110365557Sjasone};
110465557Sjasone
110565557Sjasonestatic char *order_list[] = {
110671560Sjhb	"Giant", "uidinfo hash", "uidinfo struct", NULL,
110771560Sjhb	"Giant", "proctree", "allproc", "process lock", NULL,
110865557Sjasone	NULL
110965557Sjasone};
111065557Sjasone
111165557Sjasonestatic char *dup_list[] = {
111265557Sjasone	NULL
111365557Sjasone};
111465557Sjasone
111565557Sjasonestatic char *sleep_list[] = {
111668862Sjake	"Giant",
111765557Sjasone	NULL
111865557Sjasone};
111965557Sjasone
112065557Sjasone/*
112165557Sjasone * Pairs of locks which have been blessed
112265557Sjasone * Don't complain about order problems with blessed locks
112365557Sjasone */
112465856Sjhbstatic struct witness_blessed blessed_list[] = {
112565557Sjasone};
112665856Sjhbstatic int blessed_count = sizeof(blessed_list) / sizeof(struct witness_blessed);
112765557Sjasone
112871352Sjasonestatic void
112965856Sjhbwitness_init(struct mtx *m, int flag)
113065557Sjasone{
113165557Sjasone	m->mtx_witness = enroll(m->mtx_description, flag);
113265557Sjasone}
113365557Sjasone
113471352Sjasonestatic void
113565856Sjhbwitness_destroy(struct mtx *m)
113665557Sjasone{
113765856Sjhb	struct mtx *m1;
113865557Sjasone	struct proc *p;
113965557Sjasone	p = CURPROC;
114065557Sjasone	for ((m1 = LIST_FIRST(&p->p_heldmtx)); m1 != NULL;
114165557Sjasone		m1 = LIST_NEXT(m1, mtx_held)) {
114265557Sjasone		if (m1 == m) {
114365557Sjasone			LIST_REMOVE(m, mtx_held);
114465557Sjasone			break;
114565557Sjasone		}
114665557Sjasone	}
114765557Sjasone	return;
114865557Sjasone
114965557Sjasone}
115065557Sjasone
115171352Sjasonestatic void
115271352Sjasonewitness_display(void(*prnt)(const char *fmt, ...))
115371352Sjasone{
115471352Sjasone	struct witness *w, *w1;
115571352Sjasone
115671352Sjasone	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
115771352Sjasone	witness_levelall();
115871352Sjasone
115971352Sjasone	for (w = w_all; w; w = w->w_next) {
116071352Sjasone		if (w->w_file == NULL)
116171352Sjasone			continue;
116271352Sjasone		for (w1 = w_all; w1; w1 = w1->w_next) {
116371352Sjasone			if (isitmychild(w1, w))
116471352Sjasone				break;
116571352Sjasone		}
116671352Sjasone		if (w1 != NULL)
116771352Sjasone			continue;
116871352Sjasone		/*
116971352Sjasone		 * This lock has no anscestors, display its descendants.
117071352Sjasone		 */
117171352Sjasone		witness_displaydescendants(prnt, w);
117271352Sjasone	}
117371352Sjasone	prnt("\nMutex which were never acquired\n");
117471352Sjasone	for (w = w_all; w; w = w->w_next) {
117571352Sjasone		if (w->w_file != NULL)
117671352Sjasone			continue;
117771352Sjasone		prnt("%s\n", w->w_description);
117871352Sjasone	}
117971352Sjasone}
118071352Sjasone
118165557Sjasonevoid
118265856Sjhbwitness_enter(struct mtx *m, int flags, const char *file, int line)
118365557Sjasone{
118465856Sjhb	struct witness *w, *w1;
118565856Sjhb	struct mtx *m1;
118665557Sjasone	struct proc *p;
118765557Sjasone	int i;
118867676Sjhb#ifdef DDB
118967676Sjhb	int go_into_ddb = 0;
119067676Sjhb#endif /* DDB */
119165557Sjasone
119271352Sjasone	if (witness_cold || m->mtx_witness == NULL || panicstr)
119371320Sjasone		return;
119465557Sjasone	w = m->mtx_witness;
119565557Sjasone	p = CURPROC;
119665557Sjasone
119765557Sjasone	if (flags & MTX_SPIN) {
119871560Sjhb		if ((m->mtx_flags & MTX_SPIN) == 0)
119965651Sjasone			panic("mutex_enter: MTX_SPIN on MTX_DEF mutex %s @"
120065651Sjasone			    " %s:%d", m->mtx_description, file, line);
120171228Sbmilekic		if (mtx_recursed(m)) {
120271560Sjhb			if ((m->mtx_flags & MTX_RECURSE) == 0)
120371228Sbmilekic				panic("mutex_enter: recursion on non-recursive"
120471228Sbmilekic				    " mutex %s @ %s:%d", m->mtx_description,
120571228Sbmilekic				    file, line);
120665557Sjasone			return;
120771228Sbmilekic		}
120869998Sjhb		mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
120970861Sjake		i = PCPU_GET(witness_spin_check);
121065557Sjasone		if (i != 0 && w->w_level < i) {
121169998Sjhb			mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
121265651Sjasone			panic("mutex_enter(%s:%x, MTX_SPIN) out of order @"
121365651Sjasone			    " %s:%d already holding %s:%x",
121465557Sjasone			    m->mtx_description, w->w_level, file, line,
121565557Sjasone			    spin_order_list[ffs(i)-1], i);
121665557Sjasone		}
121765557Sjasone		PCPU_SET(witness_spin_check, i | w->w_level);
121869998Sjhb		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
121969361Sjhb		w->w_file = file;
122069361Sjhb		w->w_line = line;
122169361Sjhb		m->mtx_line = line;
122269361Sjhb		m->mtx_file = file;
122365557Sjasone		return;
122465557Sjasone	}
122571560Sjhb	if ((m->mtx_flags & MTX_SPIN) != 0)
122665557Sjasone		panic("mutex_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
122765557Sjasone		    m->mtx_description, file, line);
122865557Sjasone
122971228Sbmilekic	if (mtx_recursed(m)) {
123071560Sjhb		if ((m->mtx_flags & MTX_RECURSE) == 0)
123171228Sbmilekic			panic("mutex_enter: recursion on non-recursive"
123271228Sbmilekic			    " mutex %s @ %s:%d", m->mtx_description,
123371228Sbmilekic			    file, line);
123465557Sjasone		return;
123571228Sbmilekic	}
123665557Sjasone	if (witness_dead)
123765557Sjasone		goto out;
123869998Sjhb	if (cold)
123965557Sjasone		goto out;
124065557Sjasone
124165557Sjasone	if (!mtx_legal2block())
124265557Sjasone		panic("blockable mtx_enter() of %s when not legal @ %s:%d",
124365557Sjasone			    m->mtx_description, file, line);
124465557Sjasone	/*
124565557Sjasone	 * Is this the first mutex acquired
124665557Sjasone	 */
124765557Sjasone	if ((m1 = LIST_FIRST(&p->p_heldmtx)) == NULL)
124865557Sjasone		goto out;
124965557Sjasone
125065557Sjasone	if ((w1 = m1->mtx_witness) == w) {
125165557Sjasone		if (w->w_same_squawked || dup_ok(w))
125265557Sjasone			goto out;
125365557Sjasone		w->w_same_squawked = 1;
125465557Sjasone		printf("acquring duplicate lock of same type: \"%s\"\n",
125565557Sjasone			m->mtx_description);
125665557Sjasone		printf(" 1st @ %s:%d\n", w->w_file, w->w_line);
125765557Sjasone		printf(" 2nd @ %s:%d\n", file, line);
125867676Sjhb#ifdef DDB
125967676Sjhb		go_into_ddb = 1;
126067676Sjhb#endif /* DDB */
126165557Sjasone		goto out;
126265557Sjasone	}
126365557Sjasone	MPASS(!mtx_owned(&w_mtx));
126469998Sjhb	mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
126565557Sjasone	/*
126665557Sjasone	 * If we have a known higher number just say ok
126765557Sjasone	 */
126865557Sjasone	if (witness_watch > 1 && w->w_level > w1->w_level) {
126969998Sjhb		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
127065557Sjasone		goto out;
127165557Sjasone	}
127265557Sjasone	if (isitmydescendant(m1->mtx_witness, w)) {
127369998Sjhb		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
127465557Sjasone		goto out;
127565557Sjasone	}
127665557Sjasone	for (i = 0; m1 != NULL; m1 = LIST_NEXT(m1, mtx_held), i++) {
127765557Sjasone
127867352Sjhb		MPASS(i < 200);
127965557Sjasone		w1 = m1->mtx_witness;
128065557Sjasone		if (isitmydescendant(w, w1)) {
128169998Sjhb			mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
128265557Sjasone			if (blessed(w, w1))
128365557Sjasone				goto out;
128465557Sjasone			if (m1 == &Giant) {
128565557Sjasone				if (w1->w_Giant_squawked)
128665557Sjasone					goto out;
128765557Sjasone				else
128865557Sjasone					w1->w_Giant_squawked = 1;
128965557Sjasone			} else {
129065557Sjasone				if (w1->w_other_squawked)
129165557Sjasone					goto out;
129265557Sjasone				else
129365557Sjasone					w1->w_other_squawked = 1;
129465557Sjasone			}
129565557Sjasone			printf("lock order reversal\n");
129665557Sjasone			printf(" 1st %s last acquired @ %s:%d\n",
129765557Sjasone			    w->w_description, w->w_file, w->w_line);
129865557Sjasone			printf(" 2nd %p %s @ %s:%d\n",
129965557Sjasone			    m1, w1->w_description, w1->w_file, w1->w_line);
130065557Sjasone			printf(" 3rd %p %s @ %s:%d\n",
130165557Sjasone			    m, w->w_description, file, line);
130267676Sjhb#ifdef DDB
130367676Sjhb			go_into_ddb = 1;
130467676Sjhb#endif /* DDB */
130565557Sjasone			goto out;
130665557Sjasone		}
130765557Sjasone	}
130865557Sjasone	m1 = LIST_FIRST(&p->p_heldmtx);
130965557Sjasone	if (!itismychild(m1->mtx_witness, w))
131069998Sjhb		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
131165557Sjasone
131265557Sjasoneout:
131367676Sjhb#ifdef DDB
131467676Sjhb	if (witness_ddb && go_into_ddb)
131567676Sjhb		Debugger("witness_enter");
131667676Sjhb#endif /* DDB */
131765557Sjasone	w->w_file = file;
131865557Sjasone	w->w_line = line;
131965557Sjasone	m->mtx_line = line;
132065557Sjasone	m->mtx_file = file;
132165557Sjasone
132265557Sjasone	/*
132368582Sjhb	 * If this pays off it likely means that a mutex being witnessed
132465557Sjasone	 * is acquired in hardclock. Put it in the ignore list. It is
132565557Sjasone	 * likely not the mutex this assert fails on.
132665557Sjasone	 */
132767352Sjhb	MPASS(m->mtx_held.le_prev == NULL);
132865557Sjasone	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
132965557Sjasone}
133065557Sjasone
133165557Sjasonevoid
133265856Sjhbwitness_try_enter(struct mtx *m, int flags, const char *file, int line)
133365557Sjasone{
133465557Sjasone	struct proc *p;
133565856Sjhb	struct witness *w = m->mtx_witness;
133665557Sjasone
133771320Sjasone	if (witness_cold)
133871320Sjasone		return;
133969998Sjhb	if (panicstr)
134069998Sjhb		return;
134165557Sjasone	if (flags & MTX_SPIN) {
134271560Sjhb		if ((m->mtx_flags & MTX_SPIN) == 0)
134365557Sjasone			panic("mutex_try_enter: "
134465557Sjasone			    "MTX_SPIN on MTX_DEF mutex %s @ %s:%d",
134565557Sjasone			    m->mtx_description, file, line);
134671228Sbmilekic		if (mtx_recursed(m)) {
134771560Sjhb			if ((m->mtx_flags & MTX_RECURSE) == 0)
134871228Sbmilekic				panic("mutex_try_enter: recursion on"
134971228Sbmilekic				    " non-recursive mutex %s @ %s:%d",
135071228Sbmilekic				    m->mtx_description, file, line);
135165557Sjasone			return;
135271228Sbmilekic		}
135369998Sjhb		mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
135470861Sjake		PCPU_SET(witness_spin_check,
135570861Sjake		    PCPU_GET(witness_spin_check) | w->w_level);
135669998Sjhb		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
135769361Sjhb		w->w_file = file;
135869361Sjhb		w->w_line = line;
135969361Sjhb		m->mtx_line = line;
136069361Sjhb		m->mtx_file = file;
136165557Sjasone		return;
136265557Sjasone	}
136365557Sjasone
136471560Sjhb	if ((m->mtx_flags & MTX_SPIN) != 0)
136565557Sjasone		panic("mutex_try_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
136665557Sjasone		    m->mtx_description, file, line);
136765557Sjasone
136871228Sbmilekic	if (mtx_recursed(m)) {
136971560Sjhb		if ((m->mtx_flags & MTX_RECURSE) == 0)
137071228Sbmilekic			panic("mutex_try_enter: recursion on non-recursive"
137171228Sbmilekic			    " mutex %s @ %s:%d", m->mtx_description, file,
137271228Sbmilekic			    line);
137365557Sjasone		return;
137471228Sbmilekic	}
137565557Sjasone	w->w_file = file;
137665557Sjasone	w->w_line = line;
137765557Sjasone	m->mtx_line = line;
137865557Sjasone	m->mtx_file = file;
137965557Sjasone	p = CURPROC;
138067352Sjhb	MPASS(m->mtx_held.le_prev == NULL);
138165557Sjasone	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
138265557Sjasone}
138365557Sjasone
138465557Sjasonevoid
138571352Sjasonewitness_exit(struct mtx *m, int flags, const char *file, int line)
138665557Sjasone{
138771352Sjasone	struct witness *w;
138865557Sjasone
138971352Sjasone	if (witness_cold || m->mtx_witness == NULL || panicstr)
139071352Sjasone		return;
139171352Sjasone	w = m->mtx_witness;
139265557Sjasone
139371352Sjasone	if (flags & MTX_SPIN) {
139471560Sjhb		if ((m->mtx_flags & MTX_SPIN) == 0)
139571352Sjasone			panic("mutex_exit: MTX_SPIN on MTX_DEF mutex %s @"
139671352Sjasone			    " %s:%d", m->mtx_description, file, line);
139771352Sjasone		if (mtx_recursed(m)) {
139871560Sjhb			if ((m->mtx_flags & MTX_RECURSE) == 0)
139971352Sjasone				panic("mutex_exit: recursion on non-recursive"
140071352Sjasone				    " mutex %s @ %s:%d", m->mtx_description,
140171352Sjasone				    file, line);
140271352Sjasone			return;
140365557Sjasone		}
140471352Sjasone		mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
140571352Sjasone		PCPU_SET(witness_spin_check,
140671352Sjasone		    PCPU_GET(witness_spin_check) & ~w->w_level);
140771352Sjasone		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
140871352Sjasone		return;
140965557Sjasone	}
141071560Sjhb	if ((m->mtx_flags & MTX_SPIN) != 0)
141171352Sjasone		panic("mutex_exit: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
141271352Sjasone		    m->mtx_description, file, line);
141371352Sjasone
141471352Sjasone	if (mtx_recursed(m)) {
141571560Sjhb		if ((m->mtx_flags & MTX_RECURSE) == 0)
141671352Sjasone			panic("mutex_exit: recursion on non-recursive"
141771352Sjasone			    " mutex %s @ %s:%d", m->mtx_description,
141871352Sjasone			    file, line);
141971352Sjasone		return;
142065557Sjasone	}
142171352Sjasone
142271352Sjasone	if ((flags & MTX_NOSWITCH) == 0 && !mtx_legal2block() && !cold)
142371352Sjasone		panic("switchable mtx_exit() of %s when not legal @ %s:%d",
142471352Sjasone			    m->mtx_description, file, line);
142571352Sjasone	LIST_REMOVE(m, mtx_held);
142671352Sjasone	m->mtx_held.le_prev = NULL;
142765557Sjasone}
142865557Sjasone
142965557Sjasoneint
143065856Sjhbwitness_sleep(int check_only, struct mtx *mtx, const char *file, int line)
143165557Sjasone{
143265856Sjhb	struct mtx *m;
143365557Sjasone	struct proc *p;
143465557Sjasone	char **sleep;
143565557Sjasone	int n = 0;
143665557Sjasone
143771320Sjasone	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
143865557Sjasone	p = CURPROC;
143965557Sjasone	for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
144065557Sjasone	    m = LIST_NEXT(m, mtx_held)) {
144165557Sjasone		if (m == mtx)
144265557Sjasone			continue;
144365557Sjasone		for (sleep = sleep_list; *sleep!= NULL; sleep++)
144465557Sjasone			if (strcmp(m->mtx_description, *sleep) == 0)
144565557Sjasone				goto next;
144665557Sjasone		printf("%s:%d: %s with \"%s\" locked from %s:%d\n",
144765557Sjasone			file, line, check_only ? "could sleep" : "sleeping",
144865557Sjasone			m->mtx_description,
144965557Sjasone			m->mtx_witness->w_file, m->mtx_witness->w_line);
145065557Sjasone		n++;
145165557Sjasone	next:
145265557Sjasone	}
145367676Sjhb#ifdef DDB
145467676Sjhb	if (witness_ddb && n)
145567676Sjhb		Debugger("witness_sleep");
145667676Sjhb#endif /* DDB */
145765557Sjasone	return (n);
145865557Sjasone}
145965557Sjasone
146065856Sjhbstatic struct witness *
146167404Sjhbenroll(const char *description, int flag)
146265557Sjasone{
146365557Sjasone	int i;
146465856Sjhb	struct witness *w, *w1;
146565557Sjasone	char **ignore;
146665557Sjasone	char **order;
146765557Sjasone
146865557Sjasone	if (!witness_watch)
146965557Sjasone		return (NULL);
147065557Sjasone	for (ignore = ignore_list; *ignore != NULL; ignore++)
147165557Sjasone		if (strcmp(description, *ignore) == 0)
147265557Sjasone			return (NULL);
147365557Sjasone
147465557Sjasone	if (w_inited == 0) {
147571320Sjasone		mtx_init(&w_mtx, "witness lock", MTX_SPIN);
147665557Sjasone		for (i = 0; i < WITNESS_COUNT; i++) {
147765557Sjasone			w = &w_data[i];
147865557Sjasone			witness_free(w);
147965557Sjasone		}
148065557Sjasone		w_inited = 1;
148165557Sjasone		for (order = order_list; *order != NULL; order++) {
148265557Sjasone			w = enroll(*order, MTX_DEF);
148365557Sjasone			w->w_file = "order list";
148465557Sjasone			for (order++; *order != NULL; order++) {
148565557Sjasone				w1 = enroll(*order, MTX_DEF);
148665557Sjasone				w1->w_file = "order list";
148765557Sjasone				itismychild(w, w1);
148865557Sjasone				w = w1;
148965557Sjasone    	    	    	}
149065557Sjasone		}
149165557Sjasone	}
149265557Sjasone	if ((flag & MTX_SPIN) && witness_skipspin)
149365557Sjasone		return (NULL);
149469998Sjhb	mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
149565557Sjasone	for (w = w_all; w; w = w->w_next) {
149665557Sjasone		if (strcmp(description, w->w_description) == 0) {
149769998Sjhb			mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
149865557Sjasone			return (w);
149965557Sjasone		}
150065557Sjasone	}
150165557Sjasone	if ((w = witness_get()) == NULL)
150265557Sjasone		return (NULL);
150365557Sjasone	w->w_next = w_all;
150465557Sjasone	w_all = w;
150565557Sjasone	w->w_description = description;
150669998Sjhb	mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
150765557Sjasone	if (flag & MTX_SPIN) {
150865557Sjasone		w->w_spin = 1;
150965557Sjasone
151065557Sjasone		i = 1;
151165557Sjasone		for (order = spin_order_list; *order != NULL; order++) {
151265557Sjasone			if (strcmp(description, *order) == 0)
151365557Sjasone				break;
151465557Sjasone			i <<= 1;
151565557Sjasone		}
151665557Sjasone		if (*order == NULL)
151765557Sjasone			panic("spin lock %s not in order list", description);
151865557Sjasone		w->w_level = i;
151971560Sjhb	}
152071228Sbmilekic
152165557Sjasone	return (w);
152265557Sjasone}
152365557Sjasone
152465557Sjasonestatic int
152565856Sjhbitismychild(struct witness *parent, struct witness *child)
152665557Sjasone{
152765557Sjasone	static int recursed;
152865557Sjasone
152965557Sjasone	/*
153065557Sjasone	 * Insert "child" after "parent"
153165557Sjasone	 */
153265557Sjasone	while (parent->w_morechildren)
153365557Sjasone		parent = parent->w_morechildren;
153465557Sjasone
153565557Sjasone	if (parent->w_childcnt == WITNESS_NCHILDREN) {
153665557Sjasone		if ((parent->w_morechildren = witness_get()) == NULL)
153765557Sjasone			return (1);
153865557Sjasone		parent = parent->w_morechildren;
153965557Sjasone	}
154067352Sjhb	MPASS(child != NULL);
154165557Sjasone	parent->w_children[parent->w_childcnt++] = child;
154265557Sjasone	/*
154365557Sjasone	 * now prune whole tree
154465557Sjasone	 */
154565557Sjasone	if (recursed)
154665557Sjasone		return (0);
154765557Sjasone	recursed = 1;
154865557Sjasone	for (child = w_all; child != NULL; child = child->w_next) {
154965557Sjasone		for (parent = w_all; parent != NULL;
155065557Sjasone		    parent = parent->w_next) {
155165557Sjasone			if (!isitmychild(parent, child))
155265557Sjasone				continue;
155365557Sjasone			removechild(parent, child);
155465557Sjasone			if (isitmydescendant(parent, child))
155565557Sjasone				continue;
155665557Sjasone			itismychild(parent, child);
155765557Sjasone		}
155865557Sjasone	}
155965557Sjasone	recursed = 0;
156065557Sjasone	witness_levelall();
156165557Sjasone	return (0);
156265557Sjasone}
156365557Sjasone
156465557Sjasonestatic void
156565856Sjhbremovechild(struct witness *parent, struct witness *child)
156665557Sjasone{
156765856Sjhb	struct witness *w, *w1;
156865557Sjasone	int i;
156965557Sjasone
157065557Sjasone	for (w = parent; w != NULL; w = w->w_morechildren)
157165557Sjasone		for (i = 0; i < w->w_childcnt; i++)
157265557Sjasone			if (w->w_children[i] == child)
157365557Sjasone				goto found;
157465557Sjasone	return;
157565557Sjasonefound:
157665557Sjasone	for (w1 = w; w1->w_morechildren != NULL; w1 = w1->w_morechildren)
157765557Sjasone		continue;
157865557Sjasone	w->w_children[i] = w1->w_children[--w1->w_childcnt];
157967352Sjhb	MPASS(w->w_children[i] != NULL);
158065557Sjasone
158165557Sjasone	if (w1->w_childcnt != 0)
158265557Sjasone		return;
158365557Sjasone
158465557Sjasone	if (w1 == parent)
158565557Sjasone		return;
158665557Sjasone	for (w = parent; w->w_morechildren != w1; w = w->w_morechildren)
158765557Sjasone		continue;
158865557Sjasone	w->w_morechildren = 0;
158965557Sjasone	witness_free(w1);
159065557Sjasone}
159165557Sjasone
159265557Sjasonestatic int
159365856Sjhbisitmychild(struct witness *parent, struct witness *child)
159465557Sjasone{
159565856Sjhb	struct witness *w;
159665557Sjasone	int i;
159765557Sjasone
159865557Sjasone	for (w = parent; w != NULL; w = w->w_morechildren) {
159965557Sjasone		for (i = 0; i < w->w_childcnt; i++) {
160065557Sjasone			if (w->w_children[i] == child)
160165557Sjasone				return (1);
160265557Sjasone		}
160365557Sjasone	}
160465557Sjasone	return (0);
160565557Sjasone}
160665557Sjasone
160765557Sjasonestatic int
160865856Sjhbisitmydescendant(struct witness *parent, struct witness *child)
160965557Sjasone{
161065856Sjhb	struct witness *w;
161165557Sjasone	int i;
161265557Sjasone	int j;
161365557Sjasone
161465557Sjasone	for (j = 0, w = parent; w != NULL; w = w->w_morechildren, j++) {
161567352Sjhb		MPASS(j < 1000);
161665557Sjasone		for (i = 0; i < w->w_childcnt; i++) {
161765557Sjasone			if (w->w_children[i] == child)
161865557Sjasone				return (1);
161965557Sjasone		}
162065557Sjasone		for (i = 0; i < w->w_childcnt; i++) {
162165557Sjasone			if (isitmydescendant(w->w_children[i], child))
162265557Sjasone				return (1);
162365557Sjasone		}
162465557Sjasone	}
162565557Sjasone	return (0);
162665557Sjasone}
162765557Sjasone
162865557Sjasonevoid
162965557Sjasonewitness_levelall (void)
163065557Sjasone{
163165856Sjhb	struct witness *w, *w1;
163265557Sjasone
163365557Sjasone	for (w = w_all; w; w = w->w_next)
163471228Sbmilekic		if (!(w->w_spin))
163565557Sjasone			w->w_level = 0;
163665557Sjasone	for (w = w_all; w; w = w->w_next) {
163765557Sjasone		if (w->w_spin)
163865557Sjasone			continue;
163965557Sjasone		for (w1 = w_all; w1; w1 = w1->w_next) {
164065557Sjasone			if (isitmychild(w1, w))
164165557Sjasone				break;
164265557Sjasone		}
164365557Sjasone		if (w1 != NULL)
164465557Sjasone			continue;
164565557Sjasone		witness_leveldescendents(w, 0);
164665557Sjasone	}
164765557Sjasone}
164865557Sjasone
164965557Sjasonestatic void
165065856Sjhbwitness_leveldescendents(struct witness *parent, int level)
165165557Sjasone{
165265557Sjasone	int i;
165365856Sjhb	struct witness *w;
165465557Sjasone
165565557Sjasone	if (parent->w_level < level)
165665557Sjasone		parent->w_level = level;
165765557Sjasone	level++;
165865557Sjasone	for (w = parent; w != NULL; w = w->w_morechildren)
165965557Sjasone		for (i = 0; i < w->w_childcnt; i++)
166065557Sjasone			witness_leveldescendents(w->w_children[i], level);
166165557Sjasone}
166265557Sjasone
166365557Sjasonestatic void
166465856Sjhbwitness_displaydescendants(void(*prnt)(const char *fmt, ...),
166565856Sjhb			   struct witness *parent)
166665557Sjasone{
166765856Sjhb	struct witness *w;
166865557Sjasone	int i;
166965557Sjasone	int level = parent->w_level;
167065557Sjasone
167165557Sjasone	prnt("%d", level);
167265557Sjasone	if (level < 10)
167365557Sjasone		prnt(" ");
167465557Sjasone	for (i = 0; i < level; i++)
167565557Sjasone		prnt(" ");
167665557Sjasone	prnt("%s", parent->w_description);
167765557Sjasone	if (parent->w_file != NULL) {
167865557Sjasone		prnt(" -- last acquired @ %s", parent->w_file);
167965557Sjasone#ifndef W_USE_WHERE
168065557Sjasone		prnt(":%d", parent->w_line);
168165557Sjasone#endif
168265557Sjasone		prnt("\n");
168365557Sjasone	}
168465557Sjasone
168565557Sjasone	for (w = parent; w != NULL; w = w->w_morechildren)
168665557Sjasone		for (i = 0; i < w->w_childcnt; i++)
168765557Sjasone			    witness_displaydescendants(prnt, w->w_children[i]);
168865557Sjasone    }
168965557Sjasone
169065557Sjasonestatic int
169165856Sjhbdup_ok(struct witness *w)
169265557Sjasone{
169365557Sjasone	char **dup;
169465557Sjasone
169565557Sjasone	for (dup = dup_list; *dup!= NULL; dup++)
169665557Sjasone		if (strcmp(w->w_description, *dup) == 0)
169765557Sjasone			return (1);
169865557Sjasone	return (0);
169965557Sjasone}
170065557Sjasone
170165557Sjasonestatic int
170265856Sjhbblessed(struct witness *w1, struct witness *w2)
170365557Sjasone{
170465557Sjasone	int i;
170565856Sjhb	struct witness_blessed *b;
170665557Sjasone
170765557Sjasone	for (i = 0; i < blessed_count; i++) {
170865557Sjasone		b = &blessed_list[i];
170965557Sjasone		if (strcmp(w1->w_description, b->b_lock1) == 0) {
171065557Sjasone			if (strcmp(w2->w_description, b->b_lock2) == 0)
171165557Sjasone				return (1);
171265557Sjasone			continue;
171365557Sjasone		}
171465557Sjasone		if (strcmp(w1->w_description, b->b_lock2) == 0)
171565557Sjasone			if (strcmp(w2->w_description, b->b_lock1) == 0)
171665557Sjasone				return (1);
171765557Sjasone	}
171865557Sjasone	return (0);
171965557Sjasone}
172065557Sjasone
172165856Sjhbstatic struct witness *
172265557Sjasonewitness_get()
172365557Sjasone{
172465856Sjhb	struct witness *w;
172565557Sjasone
172665557Sjasone	if ((w = w_free) == NULL) {
172765557Sjasone		witness_dead = 1;
172869998Sjhb		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
172965557Sjasone		printf("witness exhausted\n");
173065557Sjasone		return (NULL);
173165557Sjasone	}
173265557Sjasone	w_free = w->w_next;
173365856Sjhb	bzero(w, sizeof(*w));
173465557Sjasone	return (w);
173565557Sjasone}
173665557Sjasone
173765557Sjasonestatic void
173865856Sjhbwitness_free(struct witness *w)
173965557Sjasone{
174065557Sjasone	w->w_next = w_free;
174165557Sjasone	w_free = w;
174265557Sjasone}
174365557Sjasone
174469881Sjakeint
174565557Sjasonewitness_list(struct proc *p)
174665557Sjasone{
174765856Sjhb	struct mtx *m;
174869881Sjake	int nheld;
174965557Sjasone
175071320Sjasone	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
175169881Sjake	nheld = 0;
175265557Sjasone	for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
175365557Sjasone	    m = LIST_NEXT(m, mtx_held)) {
175465557Sjasone		printf("\t\"%s\" (%p) locked at %s:%d\n",
175565557Sjasone		    m->mtx_description, m,
175665557Sjasone		    m->mtx_witness->w_file, m->mtx_witness->w_line);
175769881Sjake		nheld++;
175865557Sjasone	}
175969881Sjake
176069881Sjake	return (nheld);
176165557Sjasone}
176265557Sjasone
176365557Sjasonevoid
176465856Sjhbwitness_save(struct mtx *m, const char **filep, int *linep)
176565557Sjasone{
176671320Sjasone
176771320Sjasone	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
176871352Sjasone	if (m->mtx_witness == NULL)
176971352Sjasone		return;
177071352Sjasone
177165557Sjasone	*filep = m->mtx_witness->w_file;
177265557Sjasone	*linep = m->mtx_witness->w_line;
177365557Sjasone}
177465557Sjasone
177565557Sjasonevoid
177665856Sjhbwitness_restore(struct mtx *m, const char *file, int line)
177765557Sjasone{
177871320Sjasone
177971320Sjasone	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
178071352Sjasone	if (m->mtx_witness == NULL)
178171352Sjasone		return;
178271352Sjasone
178365557Sjasone	m->mtx_witness->w_file = file;
178465557Sjasone	m->mtx_witness->w_line = line;
178565557Sjasone}
178665557Sjasone
178769429Sjhb#endif	/* WITNESS */
1788