subr_witness.c revision 69998
165557Sjasone/*-
265557Sjasone * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
365557Sjasone *
465557Sjasone * Redistribution and use in source and binary forms, with or without
565557Sjasone * modification, are permitted provided that the following conditions
665557Sjasone * are met:
765557Sjasone * 1. Redistributions of source code must retain the above copyright
865557Sjasone *    notice, this list of conditions and the following disclaimer.
965557Sjasone * 2. Redistributions in binary form must reproduce the above copyright
1065557Sjasone *    notice, this list of conditions and the following disclaimer in the
1165557Sjasone *    documentation and/or other materials provided with the distribution.
1265557Sjasone * 3. Berkeley Software Design Inc's name may not be used to endorse or
1365557Sjasone *    promote products derived from this software without specific prior
1465557Sjasone *    written permission.
1565557Sjasone *
1665557Sjasone * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
1765557Sjasone * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1865557Sjasone * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1965557Sjasone * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
2065557Sjasone * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2165557Sjasone * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2265557Sjasone * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2365557Sjasone * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2465557Sjasone * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2565557Sjasone * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2665557Sjasone * SUCH DAMAGE.
2765557Sjasone *
2865557Sjasone *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
2967352Sjhb *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
3065557Sjasone * $FreeBSD: head/sys/kern/subr_witness.c 69998 2000-12-13 21:53:42Z jhb $
3165557Sjasone */
3265557Sjasone
3365557Sjasone/*
3465557Sjasone *	Main Entry: witness
3565557Sjasone *	Pronunciation: 'wit-n&s
3665557Sjasone *	Function: noun
3765557Sjasone *	Etymology: Middle English witnesse, from Old English witnes knowledge,
3865557Sjasone *	    testimony, witness, from 2wit
3965557Sjasone *	Date: before 12th century
4065557Sjasone *	1 : attestation of a fact or event : TESTIMONY
4165557Sjasone *	2 : one that gives evidence; specifically : one who testifies in
4265557Sjasone *	    a cause or before a judicial tribunal
4365557Sjasone *	3 : one asked to be present at a transaction so as to be able to
4465557Sjasone *	    testify to its having taken place
4565557Sjasone *	4 : one who has personal knowledge of something
4665557Sjasone *	5 a : something serving as evidence or proof : SIGN
4765557Sjasone *	  b : public affirmation by word or example of usually
4865557Sjasone *	      religious faith or conviction <the heroic witness to divine
4965557Sjasone *	      life -- Pilot>
5065557Sjasone *	6 capitalized : a member of the Jehovah's Witnesses
5165557Sjasone */
5265557Sjasone
5368790Sjhb#include "opt_ddb.h"
5467676Sjhb#include "opt_witness.h"
5567676Sjhb
5669215Salfred/*
5769215Salfred * Cause non-inlined mtx_*() to be compiled.
5869215Salfred * Must be defined early because other system headers may include mutex.h.
5969215Salfred */
6069215Salfred#define _KERN_MUTEX_C_
6169215Salfred
6265557Sjasone#include <sys/param.h>
6367352Sjhb#include <sys/bus.h>
6467352Sjhb#include <sys/kernel.h>
6567352Sjhb#include <sys/malloc.h>
6665557Sjasone#include <sys/proc.h>
6767676Sjhb#include <sys/sysctl.h>
6865557Sjasone#include <sys/systm.h>
6967352Sjhb#include <sys/vmmeter.h>
7065557Sjasone#include <sys/ktr.h>
7165557Sjasone
7267352Sjhb#include <machine/atomic.h>
7367352Sjhb#include <machine/bus.h>
7467352Sjhb#include <machine/clock.h>
7565557Sjasone#include <machine/cpu.h>
7667352Sjhb
7768790Sjhb#include <ddb/ddb.h>
7868790Sjhb
7967352Sjhb#include <vm/vm.h>
8067352Sjhb#include <vm/vm_extern.h>
8167352Sjhb
8267352Sjhb#include <sys/mutex.h>
8365557Sjasone
8465557Sjasone/*
8567352Sjhb * Machine independent bits of the mutex implementation
8667352Sjhb */
8767352Sjhb/* All mutexes in system (used for debug/panic) */
8869429Sjhb#ifdef WITNESS
8967352Sjhbstatic struct mtx_debug all_mtx_debug = { NULL, {NULL, NULL}, NULL, 0,
9067352Sjhb	"All mutexes queue head" };
9167352Sjhbstatic struct mtx all_mtx = { MTX_UNOWNED, 0, 0, &all_mtx_debug,
9267352Sjhb	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
9367352Sjhb	{ NULL, NULL }, &all_mtx, &all_mtx };
9469429Sjhb#else	/* WITNESS */
9567352Sjhbstatic struct mtx all_mtx = { MTX_UNOWNED, 0, 0, "All mutexes queue head",
9667352Sjhb	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
9767352Sjhb	{ NULL, NULL }, &all_mtx, &all_mtx };
9869429Sjhb#endif	/* WITNESS */
9967352Sjhb
10067352Sjhbstatic int	mtx_cur_cnt;
10167352Sjhbstatic int	mtx_max_cnt;
10267352Sjhb
10367352Sjhbvoid	_mtx_enter_giant_def(void);
10467352Sjhbvoid	_mtx_exit_giant_def(void);
10569376Sjhbstatic void propagate_priority(struct proc *);
10667352Sjhb
10767352Sjhb#define	mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
10867352Sjhb#define	mtx_owner(m)	(mtx_unowned(m) ? NULL \
10967352Sjhb			    : (struct proc *)((m)->mtx_lock & MTX_FLAGMASK))
11067352Sjhb
11167352Sjhb#define RETIP(x)		*(((uintptr_t *)(&x)) - 1)
11267352Sjhb#define	SET_PRIO(p, pri)	(p)->p_priority = (pri)
11367352Sjhb
11467352Sjhb/*
11567352Sjhb * XXX Temporary, for use from assembly language
11667352Sjhb */
11767352Sjhb
11867352Sjhbvoid
11967352Sjhb_mtx_enter_giant_def(void)
12067352Sjhb{
12167352Sjhb
12267352Sjhb	mtx_enter(&Giant, MTX_DEF);
12367352Sjhb}
12467352Sjhb
12567352Sjhbvoid
12667352Sjhb_mtx_exit_giant_def(void)
12767352Sjhb{
12867352Sjhb
12967352Sjhb	mtx_exit(&Giant, MTX_DEF);
13067352Sjhb}
13167352Sjhb
13267352Sjhbstatic void
13367352Sjhbpropagate_priority(struct proc *p)
13467352Sjhb{
13567352Sjhb	int pri = p->p_priority;
13667352Sjhb	struct mtx *m = p->p_blocked;
13767352Sjhb
13869376Sjhb	mtx_assert(&sched_lock, MA_OWNED);
13967352Sjhb	for (;;) {
14067352Sjhb		struct proc *p1;
14167352Sjhb
14267352Sjhb		p = mtx_owner(m);
14367352Sjhb
14467352Sjhb		if (p == NULL) {
14567352Sjhb			/*
14667352Sjhb			 * This really isn't quite right. Really
14767352Sjhb			 * ought to bump priority of process that
14867352Sjhb			 * next acquires the mutex.
14967352Sjhb			 */
15067352Sjhb			MPASS(m->mtx_lock == MTX_CONTESTED);
15167352Sjhb			return;
15267352Sjhb		}
15367352Sjhb		MPASS(p->p_magic == P_MAGIC);
15469376Sjhb		KASSERT(p->p_stat != SSLEEP, ("sleeping process owns a mutex"));
15567352Sjhb		if (p->p_priority <= pri)
15667352Sjhb			return;
15769376Sjhb
15867352Sjhb		/*
15969376Sjhb		 * Bump this process' priority.
16069376Sjhb		 */
16169376Sjhb		SET_PRIO(p, pri);
16269376Sjhb
16369376Sjhb		/*
16467352Sjhb		 * If lock holder is actually running, just bump priority.
16567352Sjhb		 */
16669376Sjhb#ifdef SMP
16769376Sjhb		/*
16869376Sjhb		 * For SMP, we can check the p_oncpu field to see if we are
16969376Sjhb		 * running.
17069376Sjhb		 */
17169376Sjhb		if (p->p_oncpu != 0xff) {
17267352Sjhb			MPASS(p->p_stat == SRUN || p->p_stat == SZOMB);
17367352Sjhb			return;
17467352Sjhb		}
17569376Sjhb#else
17667352Sjhb		/*
17769376Sjhb		 * For UP, we check to see if p is curproc (this shouldn't
17869376Sjhb		 * ever happen however as it would mean we are in a deadlock.)
17969376Sjhb		 */
18069376Sjhb		if (p == curproc) {
18169376Sjhb			panic("Deadlock detected");
18269376Sjhb			return;
18369376Sjhb		}
18469376Sjhb#endif
18569376Sjhb		/*
18667352Sjhb		 * If on run queue move to new run queue, and
18767352Sjhb		 * quit.
18867352Sjhb		 */
18967352Sjhb		if (p->p_stat == SRUN) {
19069376Sjhb			printf("XXX: moving process %d(%s) to a new run queue\n",
19169376Sjhb			       p->p_pid, p->p_comm);
19267352Sjhb			MPASS(p->p_blocked == NULL);
19367352Sjhb			remrunqueue(p);
19467352Sjhb			setrunqueue(p);
19567352Sjhb			return;
19667352Sjhb		}
19767352Sjhb
19867352Sjhb		/*
19969376Sjhb		 * If we aren't blocked on a mutex, we should be.
20067352Sjhb		 */
20169376Sjhb		KASSERT(p->p_stat == SMTX, (
20269376Sjhb		    "process %d(%s):%d holds %s but isn't blocked on a mutex\n",
20369376Sjhb		    p->p_pid, p->p_comm, p->p_stat,
20469376Sjhb		    m->mtx_description));
20567352Sjhb
20667352Sjhb		/*
20767352Sjhb		 * Pick up the mutex that p is blocked on.
20867352Sjhb		 */
20967352Sjhb		m = p->p_blocked;
21067352Sjhb		MPASS(m != NULL);
21167352Sjhb
21267352Sjhb		printf("XXX: process %d(%s) is blocked on %s\n", p->p_pid,
21367352Sjhb		    p->p_comm, m->mtx_description);
21467352Sjhb		/*
21567352Sjhb		 * Check if the proc needs to be moved up on
21667352Sjhb		 * the blocked chain
21767352Sjhb		 */
21869376Sjhb		if (p == TAILQ_FIRST(&m->mtx_blocked)) {
21969376Sjhb			printf("XXX: process at head of run queue\n");
22069376Sjhb			continue;
22169376Sjhb		}
22269376Sjhb		p1 = TAILQ_PREV(p, rq, p_procq);
22369376Sjhb		if (p1->p_priority <= pri) {
22469376Sjhb			printf(
22567352Sjhb	"XXX: previous process %d(%s) has higher priority\n",
22669376Sjhb	                    p->p_pid, p->p_comm);
22767352Sjhb			continue;
22867352Sjhb		}
22967352Sjhb
23067352Sjhb		/*
23169376Sjhb		 * Remove proc from blocked chain and determine where
23269376Sjhb		 * it should be moved up to.  Since we know that p1 has
23369376Sjhb		 * a lower priority than p, we know that at least one
23469376Sjhb		 * process in the chain has a lower priority and that
23569376Sjhb		 * p1 will thus not be NULL after the loop.
23667352Sjhb		 */
23767352Sjhb		TAILQ_REMOVE(&m->mtx_blocked, p, p_procq);
23867352Sjhb		TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) {
23967352Sjhb			MPASS(p1->p_magic == P_MAGIC);
24067352Sjhb			if (p1->p_priority > pri)
24167352Sjhb				break;
24267352Sjhb		}
24369376Sjhb		MPASS(p1 != NULL);
24469376Sjhb		TAILQ_INSERT_BEFORE(p1, p, p_procq);
24567352Sjhb		CTR4(KTR_LOCK,
24669376Sjhb		    "propagate_priority: p 0x%p moved before 0x%p on [0x%p] %s",
24767352Sjhb		    p, p1, m, m->mtx_description);
24867352Sjhb	}
24967352Sjhb}
25067352Sjhb
25167352Sjhbvoid
25267352Sjhbmtx_enter_hard(struct mtx *m, int type, int saveintr)
25367352Sjhb{
25467352Sjhb	struct proc *p = CURPROC;
25567352Sjhb
25667352Sjhb	KASSERT(p != NULL, ("curproc is NULL in mutex"));
25767352Sjhb
25867352Sjhb	switch (type) {
25967352Sjhb	case MTX_DEF:
26067352Sjhb		if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)p) {
26167352Sjhb			m->mtx_recurse++;
26267352Sjhb			atomic_set_ptr(&m->mtx_lock, MTX_RECURSE);
26369998Sjhb			if ((type & MTX_QUIET) == 0)
26469998Sjhb				CTR1(KTR_LOCK, "mtx_enter: 0x%p recurse", m);
26567352Sjhb			return;
26667352Sjhb		}
26769998Sjhb		if ((type & MTX_QUIET) == 0)
26869998Sjhb			CTR3(KTR_LOCK,
26969998Sjhb			    "mtx_enter: 0x%p contested (lock=%p) [0x%p]",
27069998Sjhb			    m, (void *)m->mtx_lock, (void *)RETIP(m));
27169376Sjhb
27269376Sjhb		/*
27369376Sjhb		 * Save our priority.  Even though p_nativepri is protected
27469376Sjhb		 * by sched_lock, we don't obtain it here as it can be
27569376Sjhb		 * expensive.  Since this is the only place p_nativepri is
27669376Sjhb		 * set, and since two CPUs will not be executing the same
27769376Sjhb		 * process concurrently, we know that no other CPU is going
27869376Sjhb		 * to be messing with this.  Also, p_nativepri is only read
27969376Sjhb		 * when we are blocked on a mutex, so that can't be happening
28069376Sjhb		 * right now either.
28169376Sjhb		 */
28269376Sjhb		p->p_nativepri = p->p_priority;
28367352Sjhb		while (!_obtain_lock(m, p)) {
28467396Sjhb			uintptr_t v;
28567352Sjhb			struct proc *p1;
28667352Sjhb
28767352Sjhb			mtx_enter(&sched_lock, MTX_SPIN | MTX_RLIKELY);
28867352Sjhb			/*
28967352Sjhb			 * check if the lock has been released while
29067352Sjhb			 * waiting for the schedlock.
29167352Sjhb			 */
29267352Sjhb			if ((v = m->mtx_lock) == MTX_UNOWNED) {
29367352Sjhb				mtx_exit(&sched_lock, MTX_SPIN);
29467352Sjhb				continue;
29567352Sjhb			}
29667352Sjhb			/*
29767352Sjhb			 * The mutex was marked contested on release. This
29867352Sjhb			 * means that there are processes blocked on it.
29967352Sjhb			 */
30067352Sjhb			if (v == MTX_CONTESTED) {
30167352Sjhb				p1 = TAILQ_FIRST(&m->mtx_blocked);
30267352Sjhb				KASSERT(p1 != NULL, ("contested mutex has no contesters"));
30367352Sjhb				KASSERT(p != NULL, ("curproc is NULL for contested mutex"));
30467352Sjhb				m->mtx_lock = (uintptr_t)p | MTX_CONTESTED;
30567352Sjhb				if (p1->p_priority < p->p_priority) {
30667352Sjhb					SET_PRIO(p, p1->p_priority);
30767352Sjhb				}
30867352Sjhb				mtx_exit(&sched_lock, MTX_SPIN);
30967352Sjhb				return;
31067352Sjhb			}
31167352Sjhb			/*
31267352Sjhb			 * If the mutex isn't already contested and
31367352Sjhb			 * a failure occurs setting the contested bit the
31467352Sjhb			 * mutex was either release or the
31567352Sjhb			 * state of the RECURSION bit changed.
31667352Sjhb			 */
31767352Sjhb			if ((v & MTX_CONTESTED) == 0 &&
31867352Sjhb			    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
31967352Sjhb				               (void *)(v | MTX_CONTESTED))) {
32067352Sjhb				mtx_exit(&sched_lock, MTX_SPIN);
32167352Sjhb				continue;
32267352Sjhb			}
32367352Sjhb
32467352Sjhb			/* We definitely have to sleep for this lock */
32567352Sjhb			mtx_assert(m, MA_NOTOWNED);
32667352Sjhb
32767352Sjhb#ifdef notyet
32867352Sjhb			/*
32967352Sjhb			 * If we're borrowing an interrupted thread's VM
33067352Sjhb			 * context must clean up before going to sleep.
33167352Sjhb			 */
33267352Sjhb			if (p->p_flag & (P_ITHD | P_SITHD)) {
33367352Sjhb				ithd_t *it = (ithd_t *)p;
33467352Sjhb
33567352Sjhb				if (it->it_interrupted) {
33669998Sjhb					if ((type & MTX_QUIET) == 0)
33769998Sjhb						CTR2(KTR_LOCK,
33867352Sjhb					    "mtx_enter: 0x%x interrupted 0x%x",
33969998Sjhb						    it, it->it_interrupted);
34067352Sjhb					intr_thd_fixup(it);
34167352Sjhb				}
34267352Sjhb			}
34367352Sjhb#endif
34467352Sjhb
34567352Sjhb			/* Put us on the list of procs blocked on this mutex */
34667352Sjhb			if (TAILQ_EMPTY(&m->mtx_blocked)) {
34767352Sjhb				p1 = (struct proc *)(m->mtx_lock &
34867352Sjhb						     MTX_FLAGMASK);
34967352Sjhb				LIST_INSERT_HEAD(&p1->p_contested, m,
35067352Sjhb						 mtx_contested);
35167352Sjhb				TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
35267352Sjhb			} else {
35367352Sjhb				TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq)
35467352Sjhb					if (p1->p_priority > p->p_priority)
35567352Sjhb						break;
35667352Sjhb				if (p1)
35767352Sjhb					TAILQ_INSERT_BEFORE(p1, p, p_procq);
35867352Sjhb				else
35967352Sjhb					TAILQ_INSERT_TAIL(&m->mtx_blocked, p,
36067352Sjhb							  p_procq);
36167352Sjhb			}
36267352Sjhb
36367352Sjhb			p->p_blocked = m;	/* Who we're blocked on */
36469369Sjhb			p->p_mtxname = m->mtx_description;
36567352Sjhb			p->p_stat = SMTX;
36667352Sjhb#if 0
36767352Sjhb			propagate_priority(p);
36867352Sjhb#endif
36969998Sjhb			if ((type & MTX_QUIET) == 0)
37069998Sjhb				CTR3(KTR_LOCK,
37169998Sjhb				    "mtx_enter: p 0x%p blocked on [0x%p] %s",
37269998Sjhb				    p, m, m->mtx_description);
37368808Sjhb			mi_switch();
37469998Sjhb			if ((type & MTX_QUIET) == 0)
37569998Sjhb				CTR3(KTR_LOCK,
37667352Sjhb			    "mtx_enter: p 0x%p free from blocked on [0x%p] %s",
37769998Sjhb				    p, m, m->mtx_description);
37867352Sjhb			mtx_exit(&sched_lock, MTX_SPIN);
37967352Sjhb		}
38067352Sjhb		return;
38167352Sjhb	case MTX_SPIN:
38267352Sjhb	case MTX_SPIN | MTX_FIRST:
38367352Sjhb	case MTX_SPIN | MTX_TOPHALF:
38467352Sjhb	    {
38567352Sjhb		int i = 0;
38667352Sjhb
38767352Sjhb		if (m->mtx_lock == (uintptr_t)p) {
38867352Sjhb			m->mtx_recurse++;
38967352Sjhb			return;
39067352Sjhb		}
39169998Sjhb		if ((type & MTX_QUIET) == 0)
39269998Sjhb			CTR1(KTR_LOCK, "mtx_enter: %p spinning", m);
39367352Sjhb		for (;;) {
39467352Sjhb			if (_obtain_lock(m, p))
39567352Sjhb				break;
39667352Sjhb			while (m->mtx_lock != MTX_UNOWNED) {
39767352Sjhb				if (i++ < 1000000)
39867352Sjhb					continue;
39967352Sjhb				if (i++ < 6000000)
40067352Sjhb					DELAY (1);
40167352Sjhb#ifdef DDB
40267352Sjhb				else if (!db_active)
40367352Sjhb#else
40467352Sjhb				else
40567352Sjhb#endif
40667352Sjhb					panic(
40767352Sjhb				"spin lock %s held by 0x%p for > 5 seconds",
40867352Sjhb					    m->mtx_description,
40967352Sjhb					    (void *)m->mtx_lock);
41067352Sjhb			}
41167352Sjhb		}
41267352Sjhb
41367352Sjhb#ifdef MUTEX_DEBUG
41467352Sjhb		if (type != MTX_SPIN)
41567352Sjhb			m->mtx_saveintr = 0xbeefface;
41667352Sjhb		else
41767352Sjhb#endif
41867352Sjhb			m->mtx_saveintr = saveintr;
41969998Sjhb		if ((type & MTX_QUIET) == 0)
42069998Sjhb			CTR1(KTR_LOCK, "mtx_enter: 0x%p spin done", m);
42167352Sjhb		return;
42267352Sjhb	    }
42367352Sjhb	}
42467352Sjhb}
42567352Sjhb
42667352Sjhbvoid
42767352Sjhbmtx_exit_hard(struct mtx *m, int type)
42867352Sjhb{
42967352Sjhb	struct proc *p, *p1;
43067352Sjhb	struct mtx *m1;
43167352Sjhb	int pri;
43267352Sjhb
43367352Sjhb	p = CURPROC;
43467352Sjhb	switch (type) {
43567352Sjhb	case MTX_DEF:
43667352Sjhb	case MTX_DEF | MTX_NOSWITCH:
43767352Sjhb		if (m->mtx_recurse != 0) {
43867352Sjhb			if (--(m->mtx_recurse) == 0)
43967352Sjhb				atomic_clear_ptr(&m->mtx_lock, MTX_RECURSE);
44069998Sjhb			if ((type & MTX_QUIET) == 0)
44169998Sjhb				CTR1(KTR_LOCK, "mtx_exit: 0x%p unrecurse", m);
44267352Sjhb			return;
44367352Sjhb		}
44467352Sjhb		mtx_enter(&sched_lock, MTX_SPIN);
44569998Sjhb		if ((type & MTX_QUIET) == 0)
44669998Sjhb			CTR1(KTR_LOCK, "mtx_exit: 0x%p contested", m);
44767352Sjhb		p1 = TAILQ_FIRST(&m->mtx_blocked);
44867352Sjhb		MPASS(p->p_magic == P_MAGIC);
44967352Sjhb		MPASS(p1->p_magic == P_MAGIC);
45067352Sjhb		TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq);
45167352Sjhb		if (TAILQ_EMPTY(&m->mtx_blocked)) {
45267352Sjhb			LIST_REMOVE(m, mtx_contested);
45367352Sjhb			_release_lock_quick(m);
45469998Sjhb			if ((type & MTX_QUIET) == 0)
45569998Sjhb				CTR1(KTR_LOCK, "mtx_exit: 0x%p not held", m);
45667352Sjhb		} else
45769363Sjhb			atomic_store_rel_ptr(&m->mtx_lock,
45869363Sjhb			    (void *)MTX_CONTESTED);
45967352Sjhb		pri = MAXPRI;
46067352Sjhb		LIST_FOREACH(m1, &p->p_contested, mtx_contested) {
46167352Sjhb			int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_priority;
46267352Sjhb			if (cp < pri)
46367352Sjhb				pri = cp;
46467352Sjhb		}
46567352Sjhb		if (pri > p->p_nativepri)
46667352Sjhb			pri = p->p_nativepri;
46767352Sjhb		SET_PRIO(p, pri);
46869998Sjhb		if ((type & MTX_QUIET) == 0)
46969998Sjhb			CTR2(KTR_LOCK,
47069998Sjhb			    "mtx_exit: 0x%p contested setrunqueue 0x%p", m, p1);
47167352Sjhb		p1->p_blocked = NULL;
47269369Sjhb		p1->p_mtxname = NULL;
47367352Sjhb		p1->p_stat = SRUN;
47467352Sjhb		setrunqueue(p1);
47567352Sjhb		if ((type & MTX_NOSWITCH) == 0 && p1->p_priority < pri) {
47667352Sjhb#ifdef notyet
47767352Sjhb			if (p->p_flag & (P_ITHD | P_SITHD)) {
47867352Sjhb				ithd_t *it = (ithd_t *)p;
47967352Sjhb
48067352Sjhb				if (it->it_interrupted) {
48169998Sjhb					if ((type & MTX_QUIET) == 0)
48269998Sjhb						CTR2(KTR_LOCK,
48367352Sjhb					    "mtx_exit: 0x%x interruped 0x%x",
48469998Sjhb						    it, it->it_interrupted);
48567352Sjhb					intr_thd_fixup(it);
48667352Sjhb				}
48767352Sjhb			}
48867352Sjhb#endif
48967352Sjhb			setrunqueue(p);
49069998Sjhb			if ((type & MTX_QUIET) == 0)
49169998Sjhb				CTR2(KTR_LOCK,
49269998Sjhb				    "mtx_exit: 0x%p switching out lock=0x%p",
49369998Sjhb				    m, (void *)m->mtx_lock);
49467352Sjhb			mi_switch();
49569998Sjhb			if ((type & MTX_QUIET) == 0)
49669998Sjhb				CTR2(KTR_LOCK,
49769998Sjhb				    "mtx_exit: 0x%p resuming lock=0x%p",
49869998Sjhb				    m, (void *)m->mtx_lock);
49967352Sjhb		}
50067352Sjhb		mtx_exit(&sched_lock, MTX_SPIN);
50167352Sjhb		break;
50267352Sjhb	case MTX_SPIN:
50367352Sjhb	case MTX_SPIN | MTX_FIRST:
50467352Sjhb		if (m->mtx_recurse != 0) {
50567352Sjhb			m->mtx_recurse--;
50667352Sjhb			return;
50767352Sjhb		}
50867352Sjhb		MPASS(mtx_owned(m));
50967352Sjhb		_release_lock_quick(m);
51067352Sjhb		if (type & MTX_FIRST)
51167352Sjhb			enable_intr();	/* XXX is this kosher? */
51267352Sjhb		else {
51367352Sjhb			MPASS(m->mtx_saveintr != 0xbeefface);
51467352Sjhb			restore_intr(m->mtx_saveintr);
51567352Sjhb		}
51667352Sjhb		break;
51767352Sjhb	case MTX_SPIN | MTX_TOPHALF:
51867352Sjhb		if (m->mtx_recurse != 0) {
51967352Sjhb			m->mtx_recurse--;
52067352Sjhb			return;
52167352Sjhb		}
52267352Sjhb		MPASS(mtx_owned(m));
52367352Sjhb		_release_lock_quick(m);
52467352Sjhb		break;
52567352Sjhb	default:
52667352Sjhb		panic("mtx_exit_hard: unsupported type 0x%x\n", type);
52767352Sjhb	}
52867352Sjhb}
52967352Sjhb
53067352Sjhb#define MV_DESTROY	0	/* validate before destory */
53167352Sjhb#define MV_INIT		1	/* validate before init */
53267352Sjhb
53367352Sjhb#ifdef MUTEX_DEBUG
53467352Sjhb
53567352Sjhbint mtx_validate __P((struct mtx *, int));
53667352Sjhb
53767352Sjhbint
53867352Sjhbmtx_validate(struct mtx *m, int when)
53967352Sjhb{
54067352Sjhb	struct mtx *mp;
54167352Sjhb	int i;
54267352Sjhb	int retval = 0;
54367352Sjhb
54467352Sjhb	if (m == &all_mtx || cold)
54567352Sjhb		return 0;
54667352Sjhb
54767352Sjhb	mtx_enter(&all_mtx, MTX_DEF);
54867352Sjhb/*
54967352Sjhb * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
55067352Sjhb * we can re-enable the kernacc() checks.
55167352Sjhb */
55267352Sjhb#ifndef __alpha__
55367352Sjhb	MPASS(kernacc((caddr_t)all_mtx.mtx_next, sizeof(uintptr_t),
55467352Sjhb	    VM_PROT_READ) == 1);
55567352Sjhb#endif
55667352Sjhb	MPASS(all_mtx.mtx_next->mtx_prev == &all_mtx);
55767352Sjhb	for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
55867352Sjhb#ifndef __alpha__
55967352Sjhb		if (kernacc((caddr_t)mp->mtx_next, sizeof(uintptr_t),
56067352Sjhb		    VM_PROT_READ) != 1) {
56167352Sjhb			panic("mtx_validate: mp=%p mp->mtx_next=%p",
56267352Sjhb			    mp, mp->mtx_next);
56367352Sjhb		}
56467352Sjhb#endif
56567352Sjhb		i++;
56667352Sjhb		if (i > mtx_cur_cnt) {
56767352Sjhb			panic("mtx_validate: too many in chain, known=%d\n",
56867352Sjhb			    mtx_cur_cnt);
56967352Sjhb		}
57067352Sjhb	}
57167352Sjhb	MPASS(i == mtx_cur_cnt);
57267352Sjhb	switch (when) {
57367352Sjhb	case MV_DESTROY:
57467352Sjhb		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
57567352Sjhb			if (mp == m)
57667352Sjhb				break;
57767352Sjhb		MPASS(mp == m);
57867352Sjhb		break;
57967352Sjhb	case MV_INIT:
58067352Sjhb		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
58167352Sjhb		if (mp == m) {
58267352Sjhb			/*
58367352Sjhb			 * Not good. This mutex already exists.
58467352Sjhb			 */
58567352Sjhb			printf("re-initing existing mutex %s\n",
58667352Sjhb			    m->mtx_description);
58767352Sjhb			MPASS(m->mtx_lock == MTX_UNOWNED);
58867352Sjhb			retval = 1;
58967352Sjhb		}
59067352Sjhb	}
59167352Sjhb	mtx_exit(&all_mtx, MTX_DEF);
59267352Sjhb	return (retval);
59367352Sjhb}
59467352Sjhb#endif
59567352Sjhb
59667352Sjhbvoid
59767352Sjhbmtx_init(struct mtx *m, const char *t, int flag)
59867352Sjhb{
59969429Sjhb#ifdef WITNESS
60067352Sjhb	struct mtx_debug *debug;
60167352Sjhb#endif
60267352Sjhb
60369998Sjhb	if ((flag & MTX_QUIET) == 0)
60469998Sjhb		CTR2(KTR_LOCK, "mtx_init 0x%p (%s)", m, t);
60567352Sjhb#ifdef MUTEX_DEBUG
60667352Sjhb	if (mtx_validate(m, MV_INIT))	/* diagnostic and error correction */
60767352Sjhb		return;
60869429Sjhb#endif
60969429Sjhb#ifdef WITNESS
61067352Sjhb	if (flag & MTX_COLD)
61167352Sjhb		debug = m->mtx_debug;
61267352Sjhb	else
61367352Sjhb		debug = NULL;
61467352Sjhb	if (debug == NULL) {
61567352Sjhb#ifdef DIAGNOSTIC
61667352Sjhb		if(cold && bootverbose)
61767352Sjhb			printf("malloc'ing mtx_debug while cold for %s\n", t);
61867352Sjhb#endif
61967352Sjhb
62067352Sjhb		/* XXX - should not use DEVBUF */
62169781Sdwmalone		debug = malloc(sizeof(struct mtx_debug), M_DEVBUF,
62269781Sdwmalone		    M_NOWAIT | M_ZERO);
62367352Sjhb		MPASS(debug != NULL);
62467352Sjhb	}
62567352Sjhb#endif
62667352Sjhb	bzero((void *)m, sizeof *m);
62767352Sjhb	TAILQ_INIT(&m->mtx_blocked);
62869429Sjhb#ifdef WITNESS
62967352Sjhb	m->mtx_debug = debug;
63067352Sjhb#endif
63167352Sjhb	m->mtx_description = t;
63267352Sjhb	m->mtx_lock = MTX_UNOWNED;
63367352Sjhb	/* Put on all mutex queue */
63467352Sjhb	mtx_enter(&all_mtx, MTX_DEF);
63567352Sjhb	m->mtx_next = &all_mtx;
63667352Sjhb	m->mtx_prev = all_mtx.mtx_prev;
63767352Sjhb	m->mtx_prev->mtx_next = m;
63867352Sjhb	all_mtx.mtx_prev = m;
63967352Sjhb	if (++mtx_cur_cnt > mtx_max_cnt)
64067352Sjhb		mtx_max_cnt = mtx_cur_cnt;
64167352Sjhb	mtx_exit(&all_mtx, MTX_DEF);
64267352Sjhb	witness_init(m, flag);
64367352Sjhb}
64467352Sjhb
64567352Sjhbvoid
64667352Sjhbmtx_destroy(struct mtx *m)
64767352Sjhb{
64867352Sjhb
64967352Sjhb	CTR2(KTR_LOCK, "mtx_destroy 0x%p (%s)", m, m->mtx_description);
65067352Sjhb#ifdef MUTEX_DEBUG
65167352Sjhb	if (m->mtx_next == NULL)
65267352Sjhb		panic("mtx_destroy: %p (%s) already destroyed",
65367352Sjhb		    m, m->mtx_description);
65467352Sjhb
65567352Sjhb	if (!mtx_owned(m)) {
65667352Sjhb		MPASS(m->mtx_lock == MTX_UNOWNED);
65767352Sjhb	} else {
65867352Sjhb		MPASS((m->mtx_lock & (MTX_RECURSE|MTX_CONTESTED)) == 0);
65967352Sjhb	}
66067352Sjhb	mtx_validate(m, MV_DESTROY);		/* diagnostic */
66167352Sjhb#endif
66267352Sjhb
66367352Sjhb#ifdef WITNESS
66467352Sjhb	if (m->mtx_witness)
66567352Sjhb		witness_destroy(m);
66667352Sjhb#endif /* WITNESS */
66767352Sjhb
66867352Sjhb	/* Remove from the all mutex queue */
66967352Sjhb	mtx_enter(&all_mtx, MTX_DEF);
67067352Sjhb	m->mtx_next->mtx_prev = m->mtx_prev;
67167352Sjhb	m->mtx_prev->mtx_next = m->mtx_next;
67267352Sjhb#ifdef MUTEX_DEBUG
67367352Sjhb	m->mtx_next = m->mtx_prev = NULL;
67469429Sjhb#endif
67569429Sjhb#ifdef WITNESS
67667352Sjhb	free(m->mtx_debug, M_DEVBUF);
67767352Sjhb	m->mtx_debug = NULL;
67867352Sjhb#endif
67967352Sjhb	mtx_cur_cnt--;
68067352Sjhb	mtx_exit(&all_mtx, MTX_DEF);
68167352Sjhb}
68267352Sjhb
68367352Sjhb/*
68465557Sjasone * The non-inlined versions of the mtx_*() functions are always built (above),
68569429Sjhb * but the witness code depends on the WITNESS kernel option being specified.
68665557Sjasone */
68769429Sjhb#ifdef WITNESS
68865557Sjasone
68965557Sjasone#define WITNESS_COUNT 200
69065557Sjasone#define	WITNESS_NCHILDREN 2
69165557Sjasone
69267401Sjhbint witness_watch = 1;
69365557Sjasone
69465856Sjhbstruct witness {
69565557Sjasone	struct witness	*w_next;
69667404Sjhb	const char	*w_description;
69765624Sjasone	const char	*w_file;
69865557Sjasone	int		 w_line;
69965557Sjasone	struct witness	*w_morechildren;
70065557Sjasone	u_char		 w_childcnt;
70165557Sjasone	u_char		 w_Giant_squawked:1;
70265557Sjasone	u_char		 w_other_squawked:1;
70365557Sjasone	u_char		 w_same_squawked:1;
70465557Sjasone	u_char		 w_sleep:1;
70565557Sjasone	u_char		 w_spin:1;	/* this is a spin mutex */
70665557Sjasone	u_int		 w_level;
70765557Sjasone	struct witness	*w_children[WITNESS_NCHILDREN];
70865856Sjhb};
70965557Sjasone
71065856Sjhbstruct witness_blessed {
71165557Sjasone	char 	*b_lock1;
71265557Sjasone	char	*b_lock2;
71365856Sjhb};
71465557Sjasone
71567676Sjhb#ifdef DDB
71665557Sjasone/*
71767676Sjhb * When DDB is enabled and witness_ddb is set to 1, it will cause the system to
71865557Sjasone * drop into kdebug() when:
71965557Sjasone *	- a lock heirarchy violation occurs
72065557Sjasone *	- locks are held when going to sleep.
72165557Sjasone */
72267676Sjhb#ifdef WITNESS_DDB
72367676Sjhbint	witness_ddb = 1;
72467676Sjhb#else
72567676Sjhbint	witness_ddb = 0;
72665557Sjasone#endif
72767676SjhbSYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, "");
72867676Sjhb#endif /* DDB */
72965557Sjasone
73067676Sjhb#ifdef WITNESS_SKIPSPIN
73167676Sjhbint	witness_skipspin = 1;
73267676Sjhb#else
73367676Sjhbint	witness_skipspin = 0;
73465557Sjasone#endif
73567676SjhbSYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RD, &witness_skipspin, 0,
73667676Sjhb    "");
73765557Sjasone
73867676SjhbMUTEX_DECLARE(static,w_mtx);
73965856Sjhbstatic struct witness	*w_free;
74065856Sjhbstatic struct witness	*w_all;
74165856Sjhbstatic int		 w_inited;
74265856Sjhbstatic int		 witness_dead;	/* fatal error, probably no memory */
74365557Sjasone
74465856Sjhbstatic struct witness	 w_data[WITNESS_COUNT];
74565557Sjasone
74667404Sjhbstatic struct witness	 *enroll __P((const char *description, int flag));
74765856Sjhbstatic int itismychild __P((struct witness *parent, struct witness *child));
74865856Sjhbstatic void removechild __P((struct witness *parent, struct witness *child));
74965856Sjhbstatic int isitmychild __P((struct witness *parent, struct witness *child));
75065856Sjhbstatic int isitmydescendant __P((struct witness *parent, struct witness *child));
75165856Sjhbstatic int dup_ok __P((struct witness *));
75265856Sjhbstatic int blessed __P((struct witness *, struct witness *));
75365557Sjasonestatic void witness_displaydescendants
75465856Sjhb    __P((void(*)(const char *fmt, ...), struct witness *));
75565856Sjhbstatic void witness_leveldescendents __P((struct witness *parent, int level));
75665557Sjasonestatic void witness_levelall __P((void));
75765856Sjhbstatic struct witness * witness_get __P((void));
75865856Sjhbstatic void witness_free __P((struct witness *m));
75965557Sjasone
76065557Sjasone
76165557Sjasonestatic char *ignore_list[] = {
76265557Sjasone	"witness lock",
76365557Sjasone	NULL
76465557Sjasone};
76565557Sjasone
76665557Sjasonestatic char *spin_order_list[] = {
76769362Sjhb	"sio",
76865557Sjasone	"sched lock",
76968808Sjhb#ifdef __i386__
77067676Sjhb	"clk",
77168808Sjhb#endif
77268889Sjake	"callout",
77365557Sjasone	/*
77465557Sjasone	 * leaf locks
77565557Sjasone	 */
77665557Sjasone	NULL
77765557Sjasone};
77865557Sjasone
77965557Sjasonestatic char *order_list[] = {
78069208Sjake	"uidinfo hash", "uidinfo struct", NULL,
78165557Sjasone	NULL
78265557Sjasone};
78365557Sjasone
78465557Sjasonestatic char *dup_list[] = {
78565557Sjasone	NULL
78665557Sjasone};
78765557Sjasone
78865557Sjasonestatic char *sleep_list[] = {
78968862Sjake	"Giant",
79065557Sjasone	NULL
79165557Sjasone};
79265557Sjasone
79365557Sjasone/*
79465557Sjasone * Pairs of locks which have been blessed
79565557Sjasone * Don't complain about order problems with blessed locks
79665557Sjasone */
79765856Sjhbstatic struct witness_blessed blessed_list[] = {
79865557Sjasone};
79965856Sjhbstatic int blessed_count = sizeof(blessed_list) / sizeof(struct witness_blessed);
80065557Sjasone
80165557Sjasonevoid
80265856Sjhbwitness_init(struct mtx *m, int flag)
80365557Sjasone{
80465557Sjasone	m->mtx_witness = enroll(m->mtx_description, flag);
80565557Sjasone}
80665557Sjasone
80765557Sjasonevoid
80865856Sjhbwitness_destroy(struct mtx *m)
80965557Sjasone{
81065856Sjhb	struct mtx *m1;
81165557Sjasone	struct proc *p;
81265557Sjasone	p = CURPROC;
81365557Sjasone	for ((m1 = LIST_FIRST(&p->p_heldmtx)); m1 != NULL;
81465557Sjasone		m1 = LIST_NEXT(m1, mtx_held)) {
81565557Sjasone		if (m1 == m) {
81665557Sjasone			LIST_REMOVE(m, mtx_held);
81765557Sjasone			break;
81865557Sjasone		}
81965557Sjasone	}
82065557Sjasone	return;
82165557Sjasone
82265557Sjasone}
82365557Sjasone
82465557Sjasonevoid
82565856Sjhbwitness_enter(struct mtx *m, int flags, const char *file, int line)
82665557Sjasone{
82765856Sjhb	struct witness *w, *w1;
82865856Sjhb	struct mtx *m1;
82965557Sjasone	struct proc *p;
83065557Sjasone	int i;
83167676Sjhb#ifdef DDB
83267676Sjhb	int go_into_ddb = 0;
83367676Sjhb#endif /* DDB */
83465557Sjasone
83569998Sjhb	if (panicstr)
83669998Sjhb		return;
83765557Sjasone	w = m->mtx_witness;
83865557Sjasone	p = CURPROC;
83965557Sjasone
84065557Sjasone	if (flags & MTX_SPIN) {
84165557Sjasone		if (!w->w_spin)
84265651Sjasone			panic("mutex_enter: MTX_SPIN on MTX_DEF mutex %s @"
84365651Sjasone			    " %s:%d", m->mtx_description, file, line);
84465557Sjasone		if (m->mtx_recurse != 0)
84565557Sjasone			return;
84669998Sjhb		mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
84765557Sjasone		i = witness_spin_check;
84865557Sjasone		if (i != 0 && w->w_level < i) {
84969998Sjhb			mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
85065651Sjasone			panic("mutex_enter(%s:%x, MTX_SPIN) out of order @"
85165651Sjasone			    " %s:%d already holding %s:%x",
85265557Sjasone			    m->mtx_description, w->w_level, file, line,
85365557Sjasone			    spin_order_list[ffs(i)-1], i);
85465557Sjasone		}
85565557Sjasone		PCPU_SET(witness_spin_check, i | w->w_level);
85669998Sjhb		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
85769361Sjhb		w->w_file = file;
85869361Sjhb		w->w_line = line;
85969361Sjhb		m->mtx_line = line;
86069361Sjhb		m->mtx_file = file;
86165557Sjasone		return;
86265557Sjasone	}
86365557Sjasone	if (w->w_spin)
86465557Sjasone		panic("mutex_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
86565557Sjasone		    m->mtx_description, file, line);
86665557Sjasone
86765557Sjasone	if (m->mtx_recurse != 0)
86865557Sjasone		return;
86965557Sjasone	if (witness_dead)
87065557Sjasone		goto out;
87169998Sjhb	if (cold)
87265557Sjasone		goto out;
87365557Sjasone
87465557Sjasone	if (!mtx_legal2block())
87565557Sjasone		panic("blockable mtx_enter() of %s when not legal @ %s:%d",
87665557Sjasone			    m->mtx_description, file, line);
87765557Sjasone	/*
87865557Sjasone	 * Is this the first mutex acquired
87965557Sjasone	 */
88065557Sjasone	if ((m1 = LIST_FIRST(&p->p_heldmtx)) == NULL)
88165557Sjasone		goto out;
88265557Sjasone
88365557Sjasone	if ((w1 = m1->mtx_witness) == w) {
88465557Sjasone		if (w->w_same_squawked || dup_ok(w))
88565557Sjasone			goto out;
88665557Sjasone		w->w_same_squawked = 1;
88765557Sjasone		printf("acquring duplicate lock of same type: \"%s\"\n",
88865557Sjasone			m->mtx_description);
88965557Sjasone		printf(" 1st @ %s:%d\n", w->w_file, w->w_line);
89065557Sjasone		printf(" 2nd @ %s:%d\n", file, line);
89167676Sjhb#ifdef DDB
89267676Sjhb		go_into_ddb = 1;
89367676Sjhb#endif /* DDB */
89465557Sjasone		goto out;
89565557Sjasone	}
89665557Sjasone	MPASS(!mtx_owned(&w_mtx));
89769998Sjhb	mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
89865557Sjasone	/*
89965557Sjasone	 * If we have a known higher number just say ok
90065557Sjasone	 */
90165557Sjasone	if (witness_watch > 1 && w->w_level > w1->w_level) {
90269998Sjhb		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
90365557Sjasone		goto out;
90465557Sjasone	}
90565557Sjasone	if (isitmydescendant(m1->mtx_witness, w)) {
90669998Sjhb		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
90765557Sjasone		goto out;
90865557Sjasone	}
90965557Sjasone	for (i = 0; m1 != NULL; m1 = LIST_NEXT(m1, mtx_held), i++) {
91065557Sjasone
91167352Sjhb		MPASS(i < 200);
91265557Sjasone		w1 = m1->mtx_witness;
91365557Sjasone		if (isitmydescendant(w, w1)) {
91469998Sjhb			mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
91565557Sjasone			if (blessed(w, w1))
91665557Sjasone				goto out;
91765557Sjasone			if (m1 == &Giant) {
91865557Sjasone				if (w1->w_Giant_squawked)
91965557Sjasone					goto out;
92065557Sjasone				else
92165557Sjasone					w1->w_Giant_squawked = 1;
92265557Sjasone			} else {
92365557Sjasone				if (w1->w_other_squawked)
92465557Sjasone					goto out;
92565557Sjasone				else
92665557Sjasone					w1->w_other_squawked = 1;
92765557Sjasone			}
92865557Sjasone			printf("lock order reversal\n");
92965557Sjasone			printf(" 1st %s last acquired @ %s:%d\n",
93065557Sjasone			    w->w_description, w->w_file, w->w_line);
93165557Sjasone			printf(" 2nd %p %s @ %s:%d\n",
93265557Sjasone			    m1, w1->w_description, w1->w_file, w1->w_line);
93365557Sjasone			printf(" 3rd %p %s @ %s:%d\n",
93465557Sjasone			    m, w->w_description, file, line);
93567676Sjhb#ifdef DDB
93667676Sjhb			go_into_ddb = 1;
93767676Sjhb#endif /* DDB */
93865557Sjasone			goto out;
93965557Sjasone		}
94065557Sjasone	}
94165557Sjasone	m1 = LIST_FIRST(&p->p_heldmtx);
94265557Sjasone	if (!itismychild(m1->mtx_witness, w))
94369998Sjhb		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
94465557Sjasone
94565557Sjasoneout:
94667676Sjhb#ifdef DDB
94767676Sjhb	if (witness_ddb && go_into_ddb)
94867676Sjhb		Debugger("witness_enter");
94967676Sjhb#endif /* DDB */
95065557Sjasone	w->w_file = file;
95165557Sjasone	w->w_line = line;
95265557Sjasone	m->mtx_line = line;
95365557Sjasone	m->mtx_file = file;
95465557Sjasone
95565557Sjasone	/*
95668582Sjhb	 * If this pays off it likely means that a mutex being witnessed
95765557Sjasone	 * is acquired in hardclock. Put it in the ignore list. It is
95865557Sjasone	 * likely not the mutex this assert fails on.
95965557Sjasone	 */
96067352Sjhb	MPASS(m->mtx_held.le_prev == NULL);
96165557Sjasone	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
96265557Sjasone}
96365557Sjasone
96465557Sjasonevoid
96565856Sjhbwitness_exit(struct mtx *m, int flags, const char *file, int line)
96665557Sjasone{
96765856Sjhb	struct witness *w;
96865557Sjasone
96969998Sjhb	if (panicstr)
97069998Sjhb		return;
97165557Sjasone	w = m->mtx_witness;
97265557Sjasone
97365557Sjasone	if (flags & MTX_SPIN) {
97465557Sjasone		if (!w->w_spin)
97565651Sjasone			panic("mutex_exit: MTX_SPIN on MTX_DEF mutex %s @"
97665651Sjasone			    " %s:%d", m->mtx_description, file, line);
97765557Sjasone		if (m->mtx_recurse != 0)
97865557Sjasone			return;
97969998Sjhb		mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
98065557Sjasone		PCPU_SET(witness_spin_check, witness_spin_check & ~w->w_level);
98169998Sjhb		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
98265557Sjasone		return;
98365557Sjasone	}
98465557Sjasone	if (w->w_spin)
98565557Sjasone		panic("mutex_exit: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
98665557Sjasone		    m->mtx_description, file, line);
98765557Sjasone
98865557Sjasone	if (m->mtx_recurse != 0)
98965557Sjasone		return;
99065557Sjasone
99165557Sjasone	if ((flags & MTX_NOSWITCH) == 0 && !mtx_legal2block() && !cold)
99265557Sjasone		panic("switchable mtx_exit() of %s when not legal @ %s:%d",
99365557Sjasone			    m->mtx_description, file, line);
99465557Sjasone	LIST_REMOVE(m, mtx_held);
99565557Sjasone	m->mtx_held.le_prev = NULL;
99665557Sjasone}
99765557Sjasone
99865557Sjasonevoid
99965856Sjhbwitness_try_enter(struct mtx *m, int flags, const char *file, int line)
100065557Sjasone{
100165557Sjasone	struct proc *p;
100265856Sjhb	struct witness *w = m->mtx_witness;
100365557Sjasone
100469998Sjhb	if (panicstr)
100569998Sjhb		return;
100665557Sjasone	if (flags & MTX_SPIN) {
100765557Sjasone		if (!w->w_spin)
100865557Sjasone			panic("mutex_try_enter: "
100965557Sjasone			    "MTX_SPIN on MTX_DEF mutex %s @ %s:%d",
101065557Sjasone			    m->mtx_description, file, line);
101165557Sjasone		if (m->mtx_recurse != 0)
101265557Sjasone			return;
101369998Sjhb		mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
101465557Sjasone		PCPU_SET(witness_spin_check, witness_spin_check | w->w_level);
101569998Sjhb		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
101669361Sjhb		w->w_file = file;
101769361Sjhb		w->w_line = line;
101869361Sjhb		m->mtx_line = line;
101969361Sjhb		m->mtx_file = file;
102065557Sjasone		return;
102165557Sjasone	}
102265557Sjasone
102365557Sjasone	if (w->w_spin)
102465557Sjasone		panic("mutex_try_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
102565557Sjasone		    m->mtx_description, file, line);
102665557Sjasone
102765557Sjasone	if (m->mtx_recurse != 0)
102865557Sjasone		return;
102965557Sjasone
103065557Sjasone	w->w_file = file;
103165557Sjasone	w->w_line = line;
103265557Sjasone	m->mtx_line = line;
103365557Sjasone	m->mtx_file = file;
103465557Sjasone	p = CURPROC;
103567352Sjhb	MPASS(m->mtx_held.le_prev == NULL);
103665557Sjasone	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
103765557Sjasone}
103865557Sjasone
103965557Sjasonevoid
104065557Sjasonewitness_display(void(*prnt)(const char *fmt, ...))
104165557Sjasone{
104265856Sjhb	struct witness *w, *w1;
104365557Sjasone
104465557Sjasone	witness_levelall();
104565557Sjasone
104665557Sjasone	for (w = w_all; w; w = w->w_next) {
104765557Sjasone		if (w->w_file == NULL)
104865557Sjasone			continue;
104965557Sjasone		for (w1 = w_all; w1; w1 = w1->w_next) {
105065557Sjasone			if (isitmychild(w1, w))
105165557Sjasone				break;
105265557Sjasone		}
105365557Sjasone		if (w1 != NULL)
105465557Sjasone			continue;
105565557Sjasone		/*
105665557Sjasone		 * This lock has no anscestors, display its descendants.
105765557Sjasone		 */
105865557Sjasone		witness_displaydescendants(prnt, w);
105965557Sjasone	}
106065557Sjasone	prnt("\nMutex which were never acquired\n");
106165557Sjasone	for (w = w_all; w; w = w->w_next) {
106265557Sjasone		if (w->w_file != NULL)
106365557Sjasone			continue;
106465557Sjasone		prnt("%s\n", w->w_description);
106565557Sjasone	}
106665557Sjasone}
106765557Sjasone
106865557Sjasoneint
106965856Sjhbwitness_sleep(int check_only, struct mtx *mtx, const char *file, int line)
107065557Sjasone{
107165856Sjhb	struct mtx *m;
107265557Sjasone	struct proc *p;
107365557Sjasone	char **sleep;
107465557Sjasone	int n = 0;
107565557Sjasone
107665557Sjasone	p = CURPROC;
107765557Sjasone	for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
107865557Sjasone	    m = LIST_NEXT(m, mtx_held)) {
107965557Sjasone		if (m == mtx)
108065557Sjasone			continue;
108165557Sjasone		for (sleep = sleep_list; *sleep!= NULL; sleep++)
108265557Sjasone			if (strcmp(m->mtx_description, *sleep) == 0)
108365557Sjasone				goto next;
108465557Sjasone		printf("%s:%d: %s with \"%s\" locked from %s:%d\n",
108565557Sjasone			file, line, check_only ? "could sleep" : "sleeping",
108665557Sjasone			m->mtx_description,
108765557Sjasone			m->mtx_witness->w_file, m->mtx_witness->w_line);
108865557Sjasone		n++;
108965557Sjasone	next:
109065557Sjasone	}
109167676Sjhb#ifdef DDB
109267676Sjhb	if (witness_ddb && n)
109367676Sjhb		Debugger("witness_sleep");
109467676Sjhb#endif /* DDB */
109565557Sjasone	return (n);
109665557Sjasone}
109765557Sjasone
109865856Sjhbstatic struct witness *
109967404Sjhbenroll(const char *description, int flag)
110065557Sjasone{
110165557Sjasone	int i;
110265856Sjhb	struct witness *w, *w1;
110365557Sjasone	char **ignore;
110465557Sjasone	char **order;
110565557Sjasone
110665557Sjasone	if (!witness_watch)
110765557Sjasone		return (NULL);
110865557Sjasone	for (ignore = ignore_list; *ignore != NULL; ignore++)
110965557Sjasone		if (strcmp(description, *ignore) == 0)
111065557Sjasone			return (NULL);
111165557Sjasone
111265557Sjasone	if (w_inited == 0) {
111369879Sjhb		mtx_init(&w_mtx, "witness lock", MTX_COLD | MTX_SPIN);
111465557Sjasone		for (i = 0; i < WITNESS_COUNT; i++) {
111565557Sjasone			w = &w_data[i];
111665557Sjasone			witness_free(w);
111765557Sjasone		}
111865557Sjasone		w_inited = 1;
111965557Sjasone		for (order = order_list; *order != NULL; order++) {
112065557Sjasone			w = enroll(*order, MTX_DEF);
112165557Sjasone			w->w_file = "order list";
112265557Sjasone			for (order++; *order != NULL; order++) {
112365557Sjasone				w1 = enroll(*order, MTX_DEF);
112465557Sjasone				w1->w_file = "order list";
112565557Sjasone				itismychild(w, w1);
112665557Sjasone				w = w1;
112765557Sjasone    	    	    	}
112865557Sjasone		}
112965557Sjasone	}
113065557Sjasone	if ((flag & MTX_SPIN) && witness_skipspin)
113165557Sjasone		return (NULL);
113269998Sjhb	mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
113365557Sjasone	for (w = w_all; w; w = w->w_next) {
113465557Sjasone		if (strcmp(description, w->w_description) == 0) {
113569998Sjhb			mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
113665557Sjasone			return (w);
113765557Sjasone		}
113865557Sjasone	}
113965557Sjasone	if ((w = witness_get()) == NULL)
114065557Sjasone		return (NULL);
114165557Sjasone	w->w_next = w_all;
114265557Sjasone	w_all = w;
114365557Sjasone	w->w_description = description;
114469998Sjhb	mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
114565557Sjasone	if (flag & MTX_SPIN) {
114665557Sjasone		w->w_spin = 1;
114765557Sjasone
114865557Sjasone		i = 1;
114965557Sjasone		for (order = spin_order_list; *order != NULL; order++) {
115065557Sjasone			if (strcmp(description, *order) == 0)
115165557Sjasone				break;
115265557Sjasone			i <<= 1;
115365557Sjasone		}
115465557Sjasone		if (*order == NULL)
115565557Sjasone			panic("spin lock %s not in order list", description);
115665557Sjasone		w->w_level = i;
115765557Sjasone	}
115865557Sjasone	return (w);
115965557Sjasone}
116065557Sjasone
116165557Sjasonestatic int
116265856Sjhbitismychild(struct witness *parent, struct witness *child)
116365557Sjasone{
116465557Sjasone	static int recursed;
116565557Sjasone
116665557Sjasone	/*
116765557Sjasone	 * Insert "child" after "parent"
116865557Sjasone	 */
116965557Sjasone	while (parent->w_morechildren)
117065557Sjasone		parent = parent->w_morechildren;
117165557Sjasone
117265557Sjasone	if (parent->w_childcnt == WITNESS_NCHILDREN) {
117365557Sjasone		if ((parent->w_morechildren = witness_get()) == NULL)
117465557Sjasone			return (1);
117565557Sjasone		parent = parent->w_morechildren;
117665557Sjasone	}
117767352Sjhb	MPASS(child != NULL);
117865557Sjasone	parent->w_children[parent->w_childcnt++] = child;
117965557Sjasone	/*
118065557Sjasone	 * now prune whole tree
118165557Sjasone	 */
118265557Sjasone	if (recursed)
118365557Sjasone		return (0);
118465557Sjasone	recursed = 1;
118565557Sjasone	for (child = w_all; child != NULL; child = child->w_next) {
118665557Sjasone		for (parent = w_all; parent != NULL;
118765557Sjasone		    parent = parent->w_next) {
118865557Sjasone			if (!isitmychild(parent, child))
118965557Sjasone				continue;
119065557Sjasone			removechild(parent, child);
119165557Sjasone			if (isitmydescendant(parent, child))
119265557Sjasone				continue;
119365557Sjasone			itismychild(parent, child);
119465557Sjasone		}
119565557Sjasone	}
119665557Sjasone	recursed = 0;
119765557Sjasone	witness_levelall();
119865557Sjasone	return (0);
119965557Sjasone}
120065557Sjasone
120165557Sjasonestatic void
120265856Sjhbremovechild(struct witness *parent, struct witness *child)
120365557Sjasone{
120465856Sjhb	struct witness *w, *w1;
120565557Sjasone	int i;
120665557Sjasone
120765557Sjasone	for (w = parent; w != NULL; w = w->w_morechildren)
120865557Sjasone		for (i = 0; i < w->w_childcnt; i++)
120965557Sjasone			if (w->w_children[i] == child)
121065557Sjasone				goto found;
121165557Sjasone	return;
121265557Sjasonefound:
121365557Sjasone	for (w1 = w; w1->w_morechildren != NULL; w1 = w1->w_morechildren)
121465557Sjasone		continue;
121565557Sjasone	w->w_children[i] = w1->w_children[--w1->w_childcnt];
121667352Sjhb	MPASS(w->w_children[i] != NULL);
121765557Sjasone
121865557Sjasone	if (w1->w_childcnt != 0)
121965557Sjasone		return;
122065557Sjasone
122165557Sjasone	if (w1 == parent)
122265557Sjasone		return;
122365557Sjasone	for (w = parent; w->w_morechildren != w1; w = w->w_morechildren)
122465557Sjasone		continue;
122565557Sjasone	w->w_morechildren = 0;
122665557Sjasone	witness_free(w1);
122765557Sjasone}
122865557Sjasone
122965557Sjasonestatic int
123065856Sjhbisitmychild(struct witness *parent, struct witness *child)
123165557Sjasone{
123265856Sjhb	struct witness *w;
123365557Sjasone	int i;
123465557Sjasone
123565557Sjasone	for (w = parent; w != NULL; w = w->w_morechildren) {
123665557Sjasone		for (i = 0; i < w->w_childcnt; i++) {
123765557Sjasone			if (w->w_children[i] == child)
123865557Sjasone				return (1);
123965557Sjasone		}
124065557Sjasone	}
124165557Sjasone	return (0);
124265557Sjasone}
124365557Sjasone
124465557Sjasonestatic int
124565856Sjhbisitmydescendant(struct witness *parent, struct witness *child)
124665557Sjasone{
124765856Sjhb	struct witness *w;
124865557Sjasone	int i;
124965557Sjasone	int j;
125065557Sjasone
125165557Sjasone	for (j = 0, w = parent; w != NULL; w = w->w_morechildren, j++) {
125267352Sjhb		MPASS(j < 1000);
125365557Sjasone		for (i = 0; i < w->w_childcnt; i++) {
125465557Sjasone			if (w->w_children[i] == child)
125565557Sjasone				return (1);
125665557Sjasone		}
125765557Sjasone		for (i = 0; i < w->w_childcnt; i++) {
125865557Sjasone			if (isitmydescendant(w->w_children[i], child))
125965557Sjasone				return (1);
126065557Sjasone		}
126165557Sjasone	}
126265557Sjasone	return (0);
126365557Sjasone}
126465557Sjasone
126565557Sjasonevoid
126665557Sjasonewitness_levelall (void)
126765557Sjasone{
126865856Sjhb	struct witness *w, *w1;
126965557Sjasone
127065557Sjasone	for (w = w_all; w; w = w->w_next)
127165557Sjasone		if (!w->w_spin)
127265557Sjasone			w->w_level = 0;
127365557Sjasone	for (w = w_all; w; w = w->w_next) {
127465557Sjasone		if (w->w_spin)
127565557Sjasone			continue;
127665557Sjasone		for (w1 = w_all; w1; w1 = w1->w_next) {
127765557Sjasone			if (isitmychild(w1, w))
127865557Sjasone				break;
127965557Sjasone		}
128065557Sjasone		if (w1 != NULL)
128165557Sjasone			continue;
128265557Sjasone		witness_leveldescendents(w, 0);
128365557Sjasone	}
128465557Sjasone}
128565557Sjasone
128665557Sjasonestatic void
128765856Sjhbwitness_leveldescendents(struct witness *parent, int level)
128865557Sjasone{
128965557Sjasone	int i;
129065856Sjhb	struct witness *w;
129165557Sjasone
129265557Sjasone	if (parent->w_level < level)
129365557Sjasone		parent->w_level = level;
129465557Sjasone	level++;
129565557Sjasone	for (w = parent; w != NULL; w = w->w_morechildren)
129665557Sjasone		for (i = 0; i < w->w_childcnt; i++)
129765557Sjasone			witness_leveldescendents(w->w_children[i], level);
129865557Sjasone}
129965557Sjasone
130065557Sjasonestatic void
130165856Sjhbwitness_displaydescendants(void(*prnt)(const char *fmt, ...),
130265856Sjhb			   struct witness *parent)
130365557Sjasone{
130465856Sjhb	struct witness *w;
130565557Sjasone	int i;
130665557Sjasone	int level = parent->w_level;
130765557Sjasone
130865557Sjasone	prnt("%d", level);
130965557Sjasone	if (level < 10)
131065557Sjasone		prnt(" ");
131165557Sjasone	for (i = 0; i < level; i++)
131265557Sjasone		prnt(" ");
131365557Sjasone	prnt("%s", parent->w_description);
131465557Sjasone	if (parent->w_file != NULL) {
131565557Sjasone		prnt(" -- last acquired @ %s", parent->w_file);
131665557Sjasone#ifndef W_USE_WHERE
131765557Sjasone		prnt(":%d", parent->w_line);
131865557Sjasone#endif
131965557Sjasone		prnt("\n");
132065557Sjasone	}
132165557Sjasone
132265557Sjasone	for (w = parent; w != NULL; w = w->w_morechildren)
132365557Sjasone		for (i = 0; i < w->w_childcnt; i++)
132465557Sjasone			    witness_displaydescendants(prnt, w->w_children[i]);
132565557Sjasone    }
132665557Sjasone
132765557Sjasonestatic int
132865856Sjhbdup_ok(struct witness *w)
132965557Sjasone{
133065557Sjasone	char **dup;
133165557Sjasone
133265557Sjasone	for (dup = dup_list; *dup!= NULL; dup++)
133365557Sjasone		if (strcmp(w->w_description, *dup) == 0)
133465557Sjasone			return (1);
133565557Sjasone	return (0);
133665557Sjasone}
133765557Sjasone
133865557Sjasonestatic int
133965856Sjhbblessed(struct witness *w1, struct witness *w2)
134065557Sjasone{
134165557Sjasone	int i;
134265856Sjhb	struct witness_blessed *b;
134365557Sjasone
134465557Sjasone	for (i = 0; i < blessed_count; i++) {
134565557Sjasone		b = &blessed_list[i];
134665557Sjasone		if (strcmp(w1->w_description, b->b_lock1) == 0) {
134765557Sjasone			if (strcmp(w2->w_description, b->b_lock2) == 0)
134865557Sjasone				return (1);
134965557Sjasone			continue;
135065557Sjasone		}
135165557Sjasone		if (strcmp(w1->w_description, b->b_lock2) == 0)
135265557Sjasone			if (strcmp(w2->w_description, b->b_lock1) == 0)
135365557Sjasone				return (1);
135465557Sjasone	}
135565557Sjasone	return (0);
135665557Sjasone}
135765557Sjasone
135865856Sjhbstatic struct witness *
135965557Sjasonewitness_get()
136065557Sjasone{
136165856Sjhb	struct witness *w;
136265557Sjasone
136365557Sjasone	if ((w = w_free) == NULL) {
136465557Sjasone		witness_dead = 1;
136569998Sjhb		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
136665557Sjasone		printf("witness exhausted\n");
136765557Sjasone		return (NULL);
136865557Sjasone	}
136965557Sjasone	w_free = w->w_next;
137065856Sjhb	bzero(w, sizeof(*w));
137165557Sjasone	return (w);
137265557Sjasone}
137365557Sjasone
137465557Sjasonestatic void
137565856Sjhbwitness_free(struct witness *w)
137665557Sjasone{
137765557Sjasone	w->w_next = w_free;
137865557Sjasone	w_free = w;
137965557Sjasone}
138065557Sjasone
138169881Sjakeint
138265557Sjasonewitness_list(struct proc *p)
138365557Sjasone{
138465856Sjhb	struct mtx *m;
138569881Sjake	int nheld;
138665557Sjasone
138769881Sjake	nheld = 0;
138865557Sjasone	for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
138965557Sjasone	    m = LIST_NEXT(m, mtx_held)) {
139065557Sjasone		printf("\t\"%s\" (%p) locked at %s:%d\n",
139165557Sjasone		    m->mtx_description, m,
139265557Sjasone		    m->mtx_witness->w_file, m->mtx_witness->w_line);
139369881Sjake		nheld++;
139465557Sjasone	}
139569881Sjake
139669881Sjake	return (nheld);
139765557Sjasone}
139865557Sjasone
139965557Sjasonevoid
140065856Sjhbwitness_save(struct mtx *m, const char **filep, int *linep)
140165557Sjasone{
140265557Sjasone	*filep = m->mtx_witness->w_file;
140365557Sjasone	*linep = m->mtx_witness->w_line;
140465557Sjasone}
140565557Sjasone
140665557Sjasonevoid
140765856Sjhbwitness_restore(struct mtx *m, const char *file, int line)
140865557Sjasone{
140965557Sjasone	m->mtx_witness->w_file = file;
141065557Sjasone	m->mtx_witness->w_line = line;
141165557Sjasone}
141265557Sjasone
141369429Sjhb#endif	/* WITNESS */
1414