subr_turnstile.c revision 68790
165557Sjasone/*-
265557Sjasone * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
365557Sjasone *
465557Sjasone * Redistribution and use in source and binary forms, with or without
565557Sjasone * modification, are permitted provided that the following conditions
665557Sjasone * are met:
765557Sjasone * 1. Redistributions of source code must retain the above copyright
865557Sjasone *    notice, this list of conditions and the following disclaimer.
965557Sjasone * 2. Redistributions in binary form must reproduce the above copyright
1065557Sjasone *    notice, this list of conditions and the following disclaimer in the
1165557Sjasone *    documentation and/or other materials provided with the distribution.
1265557Sjasone * 3. Berkeley Software Design Inc's name may not be used to endorse or
1365557Sjasone *    promote products derived from this software without specific prior
1465557Sjasone *    written permission.
1565557Sjasone *
1665557Sjasone * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
1765557Sjasone * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1865557Sjasone * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1965557Sjasone * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
2065557Sjasone * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2165557Sjasone * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2265557Sjasone * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2365557Sjasone * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2465557Sjasone * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2565557Sjasone * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2665557Sjasone * SUCH DAMAGE.
2765557Sjasone *
2865557Sjasone *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
2967352Sjhb *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
3065557Sjasone * $FreeBSD: head/sys/kern/subr_turnstile.c 68790 2000-11-15 22:08:16Z jhb $
3165557Sjasone */
3265557Sjasone
3365557Sjasone/*
3465557Sjasone *	Main Entry: witness
3565557Sjasone *	Pronunciation: 'wit-n&s
3665557Sjasone *	Function: noun
3765557Sjasone *	Etymology: Middle English witnesse, from Old English witnes knowledge,
3865557Sjasone *	    testimony, witness, from 2wit
3965557Sjasone *	Date: before 12th century
4065557Sjasone *	1 : attestation of a fact or event : TESTIMONY
4165557Sjasone *	2 : one that gives evidence; specifically : one who testifies in
4265557Sjasone *	    a cause or before a judicial tribunal
4365557Sjasone *	3 : one asked to be present at a transaction so as to be able to
4465557Sjasone *	    testify to its having taken place
4565557Sjasone *	4 : one who has personal knowledge of something
4665557Sjasone *	5 a : something serving as evidence or proof : SIGN
4765557Sjasone *	  b : public affirmation by word or example of usually
4865557Sjasone *	      religious faith or conviction <the heroic witness to divine
4965557Sjasone *	      life -- Pilot>
5065557Sjasone *	6 capitalized : a member of the Jehovah's Witnesses
5165557Sjasone */
5265557Sjasone
5368790Sjhb#include "opt_ddb.h"
5467676Sjhb#include "opt_witness.h"
5567676Sjhb
5665557Sjasone#include <sys/param.h>
5767352Sjhb#include <sys/bus.h>
5867352Sjhb#include <sys/kernel.h>
5967352Sjhb#include <sys/malloc.h>
6065557Sjasone#include <sys/proc.h>
6167676Sjhb#include <sys/sysctl.h>
6265557Sjasone#include <sys/systm.h>
6367352Sjhb#include <sys/vmmeter.h>
6465557Sjasone#include <sys/ktr.h>
6565557Sjasone
6667352Sjhb#include <machine/atomic.h>
6767352Sjhb#include <machine/bus.h>
6867352Sjhb#include <machine/clock.h>
6965557Sjasone#include <machine/cpu.h>
7067352Sjhb
7168790Sjhb#include <ddb/ddb.h>
7268790Sjhb
7367352Sjhb#include <vm/vm.h>
7467352Sjhb#include <vm/vm_extern.h>
7567352Sjhb
7665557Sjasone#define _KERN_MUTEX_C_		/* Cause non-inlined mtx_*() to be compiled. */
7767352Sjhb#include <sys/mutex.h>
7865557Sjasone
7965557Sjasone/*
8067352Sjhb * Machine independent bits of the mutex implementation
8167352Sjhb */
8267352Sjhb/* All mutexes in system (used for debug/panic) */
8367352Sjhb#ifdef MUTEX_DEBUG
8467352Sjhbstatic struct mtx_debug all_mtx_debug = { NULL, {NULL, NULL}, NULL, 0,
8567352Sjhb	"All mutexes queue head" };
8667352Sjhbstatic struct mtx all_mtx = { MTX_UNOWNED, 0, 0, &all_mtx_debug,
8767352Sjhb	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
8867352Sjhb	{ NULL, NULL }, &all_mtx, &all_mtx };
8967352Sjhb#else	/* MUTEX_DEBUG */
9067352Sjhbstatic struct mtx all_mtx = { MTX_UNOWNED, 0, 0, "All mutexes queue head",
9167352Sjhb	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
9267352Sjhb	{ NULL, NULL }, &all_mtx, &all_mtx };
9367352Sjhb#endif	/* MUTEX_DEBUG */
9467352Sjhb
9567352Sjhbstatic int	mtx_cur_cnt;
9667352Sjhbstatic int	mtx_max_cnt;
9767352Sjhb
9867352Sjhbvoid	_mtx_enter_giant_def(void);
9967352Sjhbvoid	_mtx_exit_giant_def(void);
10067352Sjhbstatic void propagate_priority(struct proc *) __unused;
10167352Sjhb
10267352Sjhb#define	mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
10367352Sjhb#define	mtx_owner(m)	(mtx_unowned(m) ? NULL \
10467352Sjhb			    : (struct proc *)((m)->mtx_lock & MTX_FLAGMASK))
10567352Sjhb
10667352Sjhb#define RETIP(x)		*(((uintptr_t *)(&x)) - 1)
10767352Sjhb#define	SET_PRIO(p, pri)	(p)->p_priority = (pri)
10867352Sjhb
10967352Sjhb/*
11067352Sjhb * XXX Temporary, for use from assembly language
11167352Sjhb */
11267352Sjhb
11367352Sjhbvoid
11467352Sjhb_mtx_enter_giant_def(void)
11567352Sjhb{
11667352Sjhb
11767352Sjhb	mtx_enter(&Giant, MTX_DEF);
11867352Sjhb}
11967352Sjhb
12067352Sjhbvoid
12167352Sjhb_mtx_exit_giant_def(void)
12267352Sjhb{
12367352Sjhb
12467352Sjhb	mtx_exit(&Giant, MTX_DEF);
12567352Sjhb}
12667352Sjhb
12767352Sjhbstatic void
12867352Sjhbpropagate_priority(struct proc *p)
12967352Sjhb{
13067352Sjhb	int pri = p->p_priority;
13167352Sjhb	struct mtx *m = p->p_blocked;
13267352Sjhb
13367352Sjhb	for (;;) {
13467352Sjhb		struct proc *p1;
13567352Sjhb
13667352Sjhb		p = mtx_owner(m);
13767352Sjhb
13867352Sjhb		if (p == NULL) {
13967352Sjhb			/*
14067352Sjhb			 * This really isn't quite right. Really
14167352Sjhb			 * ought to bump priority of process that
14267352Sjhb			 * next acquires the mutex.
14367352Sjhb			 */
14467352Sjhb			MPASS(m->mtx_lock == MTX_CONTESTED);
14567352Sjhb			return;
14667352Sjhb		}
14767352Sjhb		MPASS(p->p_magic == P_MAGIC);
14867352Sjhb		if (p->p_priority <= pri)
14967352Sjhb			return;
15067352Sjhb		/*
15167352Sjhb		 * If lock holder is actually running, just bump priority.
15267352Sjhb		 */
15367352Sjhb		if (TAILQ_NEXT(p, p_procq) == NULL) {
15467352Sjhb			MPASS(p->p_stat == SRUN || p->p_stat == SZOMB);
15567352Sjhb			SET_PRIO(p, pri);
15667352Sjhb			return;
15767352Sjhb		}
15867352Sjhb		/*
15967352Sjhb		 * If on run queue move to new run queue, and
16067352Sjhb		 * quit.
16167352Sjhb		 */
16267352Sjhb		if (p->p_stat == SRUN) {
16367352Sjhb			MPASS(p->p_blocked == NULL);
16467352Sjhb			remrunqueue(p);
16567352Sjhb			SET_PRIO(p, pri);
16667352Sjhb			setrunqueue(p);
16767352Sjhb			return;
16867352Sjhb		}
16967352Sjhb
17067352Sjhb		/*
17167352Sjhb		 * If we aren't blocked on a mutex, give up and quit.
17267352Sjhb		 */
17367352Sjhb		if (p->p_stat != SMTX) {
17467352Sjhb			printf(
17567352Sjhb	"XXX: process %d(%s):%d holds %s but isn't blocked on a mutex\n",
17667352Sjhb			    p->p_pid, p->p_comm, p->p_stat, m->mtx_description);
17767352Sjhb			return;
17867352Sjhb		}
17967352Sjhb
18067352Sjhb		/*
18167352Sjhb		 * Pick up the mutex that p is blocked on.
18267352Sjhb		 */
18367352Sjhb		m = p->p_blocked;
18467352Sjhb		MPASS(m != NULL);
18567352Sjhb
18667352Sjhb		printf("XXX: process %d(%s) is blocked on %s\n", p->p_pid,
18767352Sjhb		    p->p_comm, m->mtx_description);
18867352Sjhb		/*
18967352Sjhb		 * Check if the proc needs to be moved up on
19067352Sjhb		 * the blocked chain
19167352Sjhb		 */
19267352Sjhb		if ((p1 = TAILQ_PREV(p, rq, p_procq)) == NULL ||
19367352Sjhb		    p1->p_priority <= pri) {
19467352Sjhb			if (p1)
19567352Sjhb				printf(
19667352Sjhb	"XXX: previous process %d(%s) has higher priority\n",
19767352Sjhb				    p->p_pid, p->p_comm);
19867352Sjhb			else
19967352Sjhb				printf("XXX: process at head of run queue\n");
20067352Sjhb			continue;
20167352Sjhb		}
20267352Sjhb
20367352Sjhb		/*
20467352Sjhb		 * Remove proc from blocked chain
20567352Sjhb		 */
20667352Sjhb		TAILQ_REMOVE(&m->mtx_blocked, p, p_procq);
20767352Sjhb		TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) {
20867352Sjhb			MPASS(p1->p_magic == P_MAGIC);
20967352Sjhb			if (p1->p_priority > pri)
21067352Sjhb				break;
21167352Sjhb		}
21267352Sjhb		if (p1)
21367352Sjhb			TAILQ_INSERT_BEFORE(p1, p, p_procq);
21467352Sjhb		else
21567352Sjhb			TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
21667352Sjhb		CTR4(KTR_LOCK,
21767352Sjhb		    "propagate priority: p 0x%p moved before 0x%p on [0x%p] %s",
21867352Sjhb		    p, p1, m, m->mtx_description);
21967352Sjhb	}
22067352Sjhb}
22167352Sjhb
22267352Sjhbvoid
22367352Sjhbmtx_enter_hard(struct mtx *m, int type, int saveintr)
22467352Sjhb{
22567352Sjhb	struct proc *p = CURPROC;
22667352Sjhb	struct timeval new_switchtime;
22767352Sjhb
22867352Sjhb	KASSERT(p != NULL, ("curproc is NULL in mutex"));
22967352Sjhb
23067352Sjhb	switch (type) {
23167352Sjhb	case MTX_DEF:
23267352Sjhb		if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)p) {
23367352Sjhb			m->mtx_recurse++;
23467352Sjhb			atomic_set_ptr(&m->mtx_lock, MTX_RECURSE);
23567352Sjhb			CTR1(KTR_LOCK, "mtx_enter: 0x%p recurse", m);
23667352Sjhb			return;
23767352Sjhb		}
23867352Sjhb		CTR3(KTR_LOCK, "mtx_enter: 0x%p contested (lock=%p) [0x%p]",
23967548Sjhb		    m, (void *)m->mtx_lock, (void *)RETIP(m));
24067352Sjhb		while (!_obtain_lock(m, p)) {
24167396Sjhb			uintptr_t v;
24267352Sjhb			struct proc *p1;
24367352Sjhb
24467352Sjhb			mtx_enter(&sched_lock, MTX_SPIN | MTX_RLIKELY);
24567352Sjhb			/*
24667352Sjhb			 * check if the lock has been released while
24767352Sjhb			 * waiting for the schedlock.
24867352Sjhb			 */
24967352Sjhb			if ((v = m->mtx_lock) == MTX_UNOWNED) {
25067352Sjhb				mtx_exit(&sched_lock, MTX_SPIN);
25167352Sjhb				continue;
25267352Sjhb			}
25367352Sjhb			/*
25467352Sjhb			 * The mutex was marked contested on release. This
25567352Sjhb			 * means that there are processes blocked on it.
25667352Sjhb			 */
25767352Sjhb			if (v == MTX_CONTESTED) {
25867352Sjhb				p1 = TAILQ_FIRST(&m->mtx_blocked);
25967352Sjhb				KASSERT(p1 != NULL, ("contested mutex has no contesters"));
26067352Sjhb				KASSERT(p != NULL, ("curproc is NULL for contested mutex"));
26167352Sjhb				m->mtx_lock = (uintptr_t)p | MTX_CONTESTED;
26267352Sjhb				if (p1->p_priority < p->p_priority) {
26367352Sjhb					SET_PRIO(p, p1->p_priority);
26467352Sjhb				}
26567352Sjhb				mtx_exit(&sched_lock, MTX_SPIN);
26667352Sjhb				return;
26767352Sjhb			}
26867352Sjhb			/*
26967352Sjhb			 * If the mutex isn't already contested and
27067352Sjhb			 * a failure occurs setting the contested bit the
27167352Sjhb			 * mutex was either release or the
27267352Sjhb			 * state of the RECURSION bit changed.
27367352Sjhb			 */
27467352Sjhb			if ((v & MTX_CONTESTED) == 0 &&
27567352Sjhb			    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
27667352Sjhb				               (void *)(v | MTX_CONTESTED))) {
27767352Sjhb				mtx_exit(&sched_lock, MTX_SPIN);
27867352Sjhb				continue;
27967352Sjhb			}
28067352Sjhb
28167352Sjhb			/* We definitely have to sleep for this lock */
28267352Sjhb			mtx_assert(m, MA_NOTOWNED);
28367352Sjhb
28467352Sjhb#ifdef notyet
28567352Sjhb			/*
28667352Sjhb			 * If we're borrowing an interrupted thread's VM
28767352Sjhb			 * context must clean up before going to sleep.
28867352Sjhb			 */
28967352Sjhb			if (p->p_flag & (P_ITHD | P_SITHD)) {
29067352Sjhb				ithd_t *it = (ithd_t *)p;
29167352Sjhb
29267352Sjhb				if (it->it_interrupted) {
29367352Sjhb					CTR2(KTR_LOCK,
29467352Sjhb					    "mtx_enter: 0x%x interrupted 0x%x",
29567352Sjhb					    it, it->it_interrupted);
29667352Sjhb					intr_thd_fixup(it);
29767352Sjhb				}
29867352Sjhb			}
29967352Sjhb#endif
30067352Sjhb
30167352Sjhb			/* Put us on the list of procs blocked on this mutex */
30267352Sjhb			if (TAILQ_EMPTY(&m->mtx_blocked)) {
30367352Sjhb				p1 = (struct proc *)(m->mtx_lock &
30467352Sjhb						     MTX_FLAGMASK);
30567352Sjhb				LIST_INSERT_HEAD(&p1->p_contested, m,
30667352Sjhb						 mtx_contested);
30767352Sjhb				TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
30867352Sjhb			} else {
30967352Sjhb				TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq)
31067352Sjhb					if (p1->p_priority > p->p_priority)
31167352Sjhb						break;
31267352Sjhb				if (p1)
31367352Sjhb					TAILQ_INSERT_BEFORE(p1, p, p_procq);
31467352Sjhb				else
31567352Sjhb					TAILQ_INSERT_TAIL(&m->mtx_blocked, p,
31667352Sjhb							  p_procq);
31767352Sjhb			}
31867352Sjhb
31967352Sjhb			p->p_blocked = m;	/* Who we're blocked on */
32067352Sjhb			p->p_stat = SMTX;
32167352Sjhb#if 0
32267352Sjhb			propagate_priority(p);
32367352Sjhb#endif
32467352Sjhb			CTR3(KTR_LOCK, "mtx_enter: p 0x%p blocked on [0x%p] %s",
32567352Sjhb			    p, m, m->mtx_description);
32667352Sjhb			/*
32767352Sjhb			 * Blatantly copied from mi_switch nearly verbatim.
32867352Sjhb			 * When Giant goes away and we stop dinking with it
32967352Sjhb			 * in mi_switch, we can go back to calling mi_switch
33067352Sjhb			 * directly here.
33167352Sjhb			 */
33267352Sjhb
33367352Sjhb			/*
33467352Sjhb			 * Compute the amount of time during which the current
33567352Sjhb			 * process was running, and add that to its total so
33667352Sjhb			 * far.
33767352Sjhb			 */
33867352Sjhb			microuptime(&new_switchtime);
33967352Sjhb			if (timevalcmp(&new_switchtime, &switchtime, <)) {
34067352Sjhb				printf(
34167352Sjhb		    "microuptime() went backwards (%ld.%06ld -> %ld.%06ld)\n",
34267352Sjhb		    		    switchtime.tv_sec, switchtime.tv_usec,
34367352Sjhb		    		    new_switchtime.tv_sec,
34467352Sjhb		    		    new_switchtime.tv_usec);
34567352Sjhb				new_switchtime = switchtime;
34667352Sjhb			} else {
34767352Sjhb				p->p_runtime += (new_switchtime.tv_usec -
34867352Sjhb				    switchtime.tv_usec) +
34967352Sjhb				    (new_switchtime.tv_sec - switchtime.tv_sec) *
35067352Sjhb				    (int64_t)1000000;
35167352Sjhb			}
35267352Sjhb
35367352Sjhb			/*
35467352Sjhb			 * Pick a new current process and record its start time.
35567352Sjhb			 */
35667352Sjhb			cnt.v_swtch++;
35767352Sjhb			switchtime = new_switchtime;
35867352Sjhb			cpu_switch();
35967352Sjhb			if (switchtime.tv_sec == 0)
36067352Sjhb				microuptime(&switchtime);
36167352Sjhb			switchticks = ticks;
36267352Sjhb			CTR3(KTR_LOCK,
36367352Sjhb			    "mtx_enter: p 0x%p free from blocked on [0x%p] %s",
36467352Sjhb			    p, m, m->mtx_description);
36567352Sjhb			mtx_exit(&sched_lock, MTX_SPIN);
36667352Sjhb		}
36767352Sjhb		return;
36867352Sjhb	case MTX_SPIN:
36967352Sjhb	case MTX_SPIN | MTX_FIRST:
37067352Sjhb	case MTX_SPIN | MTX_TOPHALF:
37167352Sjhb	    {
37267352Sjhb		int i = 0;
37367352Sjhb
37467352Sjhb		if (m->mtx_lock == (uintptr_t)p) {
37567352Sjhb			m->mtx_recurse++;
37667352Sjhb			return;
37767352Sjhb		}
37867352Sjhb		CTR1(KTR_LOCK, "mtx_enter: %p spinning", m);
37967352Sjhb		for (;;) {
38067352Sjhb			if (_obtain_lock(m, p))
38167352Sjhb				break;
38267352Sjhb			while (m->mtx_lock != MTX_UNOWNED) {
38367352Sjhb				if (i++ < 1000000)
38467352Sjhb					continue;
38567352Sjhb				if (i++ < 6000000)
38667352Sjhb					DELAY (1);
38767352Sjhb#ifdef DDB
38867352Sjhb				else if (!db_active)
38967352Sjhb#else
39067352Sjhb				else
39167352Sjhb#endif
39267352Sjhb					panic(
39367352Sjhb				"spin lock %s held by 0x%p for > 5 seconds",
39467352Sjhb					    m->mtx_description,
39567352Sjhb					    (void *)m->mtx_lock);
39667352Sjhb			}
39767352Sjhb		}
39867352Sjhb
39967352Sjhb#ifdef MUTEX_DEBUG
40067352Sjhb		if (type != MTX_SPIN)
40167352Sjhb			m->mtx_saveintr = 0xbeefface;
40267352Sjhb		else
40367352Sjhb#endif
40467352Sjhb			m->mtx_saveintr = saveintr;
40567352Sjhb		CTR1(KTR_LOCK, "mtx_enter: 0x%p spin done", m);
40667352Sjhb		return;
40767352Sjhb	    }
40867352Sjhb	}
40967352Sjhb}
41067352Sjhb
41167352Sjhbvoid
41267352Sjhbmtx_exit_hard(struct mtx *m, int type)
41367352Sjhb{
41467352Sjhb	struct proc *p, *p1;
41567352Sjhb	struct mtx *m1;
41667352Sjhb	int pri;
41767352Sjhb
41867352Sjhb	p = CURPROC;
41967352Sjhb	switch (type) {
42067352Sjhb	case MTX_DEF:
42167352Sjhb	case MTX_DEF | MTX_NOSWITCH:
42267352Sjhb		if (m->mtx_recurse != 0) {
42367352Sjhb			if (--(m->mtx_recurse) == 0)
42467352Sjhb				atomic_clear_ptr(&m->mtx_lock, MTX_RECURSE);
42567352Sjhb			CTR1(KTR_LOCK, "mtx_exit: 0x%p unrecurse", m);
42667352Sjhb			return;
42767352Sjhb		}
42867352Sjhb		mtx_enter(&sched_lock, MTX_SPIN);
42967352Sjhb		CTR1(KTR_LOCK, "mtx_exit: 0x%p contested", m);
43067352Sjhb		p1 = TAILQ_FIRST(&m->mtx_blocked);
43167352Sjhb		MPASS(p->p_magic == P_MAGIC);
43267352Sjhb		MPASS(p1->p_magic == P_MAGIC);
43367352Sjhb		TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq);
43467352Sjhb		if (TAILQ_EMPTY(&m->mtx_blocked)) {
43567352Sjhb			LIST_REMOVE(m, mtx_contested);
43667352Sjhb			_release_lock_quick(m);
43767352Sjhb			CTR1(KTR_LOCK, "mtx_exit: 0x%p not held", m);
43867352Sjhb		} else
43967352Sjhb			m->mtx_lock = MTX_CONTESTED;
44067352Sjhb		pri = MAXPRI;
44167352Sjhb		LIST_FOREACH(m1, &p->p_contested, mtx_contested) {
44267352Sjhb			int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_priority;
44367352Sjhb			if (cp < pri)
44467352Sjhb				pri = cp;
44567352Sjhb		}
44667352Sjhb		if (pri > p->p_nativepri)
44767352Sjhb			pri = p->p_nativepri;
44867352Sjhb		SET_PRIO(p, pri);
44967352Sjhb		CTR2(KTR_LOCK, "mtx_exit: 0x%p contested setrunqueue 0x%p",
45067352Sjhb		    m, p1);
45167352Sjhb		p1->p_blocked = NULL;
45267352Sjhb		p1->p_stat = SRUN;
45367352Sjhb		setrunqueue(p1);
45467352Sjhb		if ((type & MTX_NOSWITCH) == 0 && p1->p_priority < pri) {
45567352Sjhb#ifdef notyet
45667352Sjhb			if (p->p_flag & (P_ITHD | P_SITHD)) {
45767352Sjhb				ithd_t *it = (ithd_t *)p;
45867352Sjhb
45967352Sjhb				if (it->it_interrupted) {
46067352Sjhb					CTR2(KTR_LOCK,
46167352Sjhb					    "mtx_exit: 0x%x interruped 0x%x",
46267352Sjhb					    it, it->it_interrupted);
46367352Sjhb					intr_thd_fixup(it);
46467352Sjhb				}
46567352Sjhb			}
46667352Sjhb#endif
46767352Sjhb			setrunqueue(p);
46867352Sjhb			CTR2(KTR_LOCK, "mtx_exit: 0x%p switching out lock=0x%p",
46967548Sjhb			    m, (void *)m->mtx_lock);
47067352Sjhb			mi_switch();
47167352Sjhb			CTR2(KTR_LOCK, "mtx_exit: 0x%p resuming lock=0x%p",
47267548Sjhb			    m, (void *)m->mtx_lock);
47367352Sjhb		}
47467352Sjhb		mtx_exit(&sched_lock, MTX_SPIN);
47567352Sjhb		break;
47667352Sjhb	case MTX_SPIN:
47767352Sjhb	case MTX_SPIN | MTX_FIRST:
47867352Sjhb		if (m->mtx_recurse != 0) {
47967352Sjhb			m->mtx_recurse--;
48067352Sjhb			return;
48167352Sjhb		}
48267352Sjhb		MPASS(mtx_owned(m));
48367352Sjhb		_release_lock_quick(m);
48467352Sjhb		if (type & MTX_FIRST)
48567352Sjhb			enable_intr();	/* XXX is this kosher? */
48667352Sjhb		else {
48767352Sjhb			MPASS(m->mtx_saveintr != 0xbeefface);
48867352Sjhb			restore_intr(m->mtx_saveintr);
48967352Sjhb		}
49067352Sjhb		break;
49167352Sjhb	case MTX_SPIN | MTX_TOPHALF:
49267352Sjhb		if (m->mtx_recurse != 0) {
49367352Sjhb			m->mtx_recurse--;
49467352Sjhb			return;
49567352Sjhb		}
49667352Sjhb		MPASS(mtx_owned(m));
49767352Sjhb		_release_lock_quick(m);
49867352Sjhb		break;
49967352Sjhb	default:
50067352Sjhb		panic("mtx_exit_hard: unsupported type 0x%x\n", type);
50167352Sjhb	}
50267352Sjhb}
50367352Sjhb
50467352Sjhb#define MV_DESTROY	0	/* validate before destory */
50567352Sjhb#define MV_INIT		1	/* validate before init */
50667352Sjhb
50767352Sjhb#ifdef MUTEX_DEBUG
50867352Sjhb
50967352Sjhbint mtx_validate __P((struct mtx *, int));
51067352Sjhb
51167352Sjhbint
51267352Sjhbmtx_validate(struct mtx *m, int when)
51367352Sjhb{
51467352Sjhb	struct mtx *mp;
51567352Sjhb	int i;
51667352Sjhb	int retval = 0;
51767352Sjhb
51867352Sjhb	if (m == &all_mtx || cold)
51967352Sjhb		return 0;
52067352Sjhb
52167352Sjhb	mtx_enter(&all_mtx, MTX_DEF);
52267352Sjhb/*
52367352Sjhb * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
52467352Sjhb * we can re-enable the kernacc() checks.
52567352Sjhb */
52667352Sjhb#ifndef __alpha__
52767352Sjhb	MPASS(kernacc((caddr_t)all_mtx.mtx_next, sizeof(uintptr_t),
52867352Sjhb	    VM_PROT_READ) == 1);
52967352Sjhb#endif
53067352Sjhb	MPASS(all_mtx.mtx_next->mtx_prev == &all_mtx);
53167352Sjhb	for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
53267352Sjhb#ifndef __alpha__
53367352Sjhb		if (kernacc((caddr_t)mp->mtx_next, sizeof(uintptr_t),
53467352Sjhb		    VM_PROT_READ) != 1) {
53567352Sjhb			panic("mtx_validate: mp=%p mp->mtx_next=%p",
53667352Sjhb			    mp, mp->mtx_next);
53767352Sjhb		}
53867352Sjhb#endif
53967352Sjhb		i++;
54067352Sjhb		if (i > mtx_cur_cnt) {
54167352Sjhb			panic("mtx_validate: too many in chain, known=%d\n",
54267352Sjhb			    mtx_cur_cnt);
54367352Sjhb		}
54467352Sjhb	}
54567352Sjhb	MPASS(i == mtx_cur_cnt);
54667352Sjhb	switch (when) {
54767352Sjhb	case MV_DESTROY:
54867352Sjhb		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
54967352Sjhb			if (mp == m)
55067352Sjhb				break;
55167352Sjhb		MPASS(mp == m);
55267352Sjhb		break;
55367352Sjhb	case MV_INIT:
55467352Sjhb		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
55567352Sjhb		if (mp == m) {
55667352Sjhb			/*
55767352Sjhb			 * Not good. This mutex already exists.
55867352Sjhb			 */
55967352Sjhb			printf("re-initing existing mutex %s\n",
56067352Sjhb			    m->mtx_description);
56167352Sjhb			MPASS(m->mtx_lock == MTX_UNOWNED);
56267352Sjhb			retval = 1;
56367352Sjhb		}
56467352Sjhb	}
56567352Sjhb	mtx_exit(&all_mtx, MTX_DEF);
56667352Sjhb	return (retval);
56767352Sjhb}
56867352Sjhb#endif
56967352Sjhb
57067352Sjhbvoid
57167352Sjhbmtx_init(struct mtx *m, const char *t, int flag)
57267352Sjhb{
57367352Sjhb#ifdef MUTEX_DEBUG
57467352Sjhb	struct mtx_debug *debug;
57567352Sjhb#endif
57667352Sjhb
57767352Sjhb	CTR2(KTR_LOCK, "mtx_init 0x%p (%s)", m, t);
57867352Sjhb#ifdef MUTEX_DEBUG
57967352Sjhb	if (mtx_validate(m, MV_INIT))	/* diagnostic and error correction */
58067352Sjhb		return;
58167352Sjhb	if (flag & MTX_COLD)
58267352Sjhb		debug = m->mtx_debug;
58367352Sjhb	else
58467352Sjhb		debug = NULL;
58567352Sjhb	if (debug == NULL) {
58667352Sjhb#ifdef DIAGNOSTIC
58767352Sjhb		if(cold && bootverbose)
58867352Sjhb			printf("malloc'ing mtx_debug while cold for %s\n", t);
58967352Sjhb#endif
59067352Sjhb
59167352Sjhb		/* XXX - should not use DEVBUF */
59267352Sjhb		debug = malloc(sizeof(struct mtx_debug), M_DEVBUF, M_NOWAIT);
59367352Sjhb		MPASS(debug != NULL);
59467352Sjhb		bzero(debug, sizeof(struct mtx_debug));
59567352Sjhb	}
59667352Sjhb#endif
59767352Sjhb	bzero((void *)m, sizeof *m);
59867352Sjhb	TAILQ_INIT(&m->mtx_blocked);
59967352Sjhb#ifdef MUTEX_DEBUG
60067352Sjhb	m->mtx_debug = debug;
60167352Sjhb#endif
60267352Sjhb	m->mtx_description = t;
60367352Sjhb	m->mtx_lock = MTX_UNOWNED;
60467352Sjhb	/* Put on all mutex queue */
60567352Sjhb	mtx_enter(&all_mtx, MTX_DEF);
60667352Sjhb	m->mtx_next = &all_mtx;
60767352Sjhb	m->mtx_prev = all_mtx.mtx_prev;
60867352Sjhb	m->mtx_prev->mtx_next = m;
60967352Sjhb	all_mtx.mtx_prev = m;
61067352Sjhb	if (++mtx_cur_cnt > mtx_max_cnt)
61167352Sjhb		mtx_max_cnt = mtx_cur_cnt;
61267352Sjhb	mtx_exit(&all_mtx, MTX_DEF);
61367352Sjhb	witness_init(m, flag);
61467352Sjhb}
61567352Sjhb
61667352Sjhbvoid
61767352Sjhbmtx_destroy(struct mtx *m)
61867352Sjhb{
61967352Sjhb
62067352Sjhb	CTR2(KTR_LOCK, "mtx_destroy 0x%p (%s)", m, m->mtx_description);
62167352Sjhb#ifdef MUTEX_DEBUG
62267352Sjhb	if (m->mtx_next == NULL)
62367352Sjhb		panic("mtx_destroy: %p (%s) already destroyed",
62467352Sjhb		    m, m->mtx_description);
62567352Sjhb
62667352Sjhb	if (!mtx_owned(m)) {
62767352Sjhb		MPASS(m->mtx_lock == MTX_UNOWNED);
62867352Sjhb	} else {
62967352Sjhb		MPASS((m->mtx_lock & (MTX_RECURSE|MTX_CONTESTED)) == 0);
63067352Sjhb	}
63167352Sjhb	mtx_validate(m, MV_DESTROY);		/* diagnostic */
63267352Sjhb#endif
63367352Sjhb
63467352Sjhb#ifdef WITNESS
63567352Sjhb	if (m->mtx_witness)
63667352Sjhb		witness_destroy(m);
63767352Sjhb#endif /* WITNESS */
63867352Sjhb
63967352Sjhb	/* Remove from the all mutex queue */
64067352Sjhb	mtx_enter(&all_mtx, MTX_DEF);
64167352Sjhb	m->mtx_next->mtx_prev = m->mtx_prev;
64267352Sjhb	m->mtx_prev->mtx_next = m->mtx_next;
64367352Sjhb#ifdef MUTEX_DEBUG
64467352Sjhb	m->mtx_next = m->mtx_prev = NULL;
64567352Sjhb	free(m->mtx_debug, M_DEVBUF);
64667352Sjhb	m->mtx_debug = NULL;
64767352Sjhb#endif
64867352Sjhb	mtx_cur_cnt--;
64967352Sjhb	mtx_exit(&all_mtx, MTX_DEF);
65067352Sjhb}
65167352Sjhb
65267352Sjhb/*
65365557Sjasone * The non-inlined versions of the mtx_*() functions are always built (above),
65467352Sjhb * but the witness code depends on the MUTEX_DEBUG and WITNESS kernel options
65565557Sjasone * being specified.
65665557Sjasone */
65767352Sjhb#if (defined(MUTEX_DEBUG) && defined(WITNESS))
65865557Sjasone
65965557Sjasone#define WITNESS_COUNT 200
66065557Sjasone#define	WITNESS_NCHILDREN 2
66165557Sjasone
66267401Sjhbint witness_watch = 1;
66365557Sjasone
66465856Sjhbstruct witness {
66565557Sjasone	struct witness	*w_next;
66667404Sjhb	const char	*w_description;
66765624Sjasone	const char	*w_file;
66865557Sjasone	int		 w_line;
66965557Sjasone	struct witness	*w_morechildren;
67065557Sjasone	u_char		 w_childcnt;
67165557Sjasone	u_char		 w_Giant_squawked:1;
67265557Sjasone	u_char		 w_other_squawked:1;
67365557Sjasone	u_char		 w_same_squawked:1;
67465557Sjasone	u_char		 w_sleep:1;
67565557Sjasone	u_char		 w_spin:1;	/* this is a spin mutex */
67665557Sjasone	u_int		 w_level;
67765557Sjasone	struct witness	*w_children[WITNESS_NCHILDREN];
67865856Sjhb};
67965557Sjasone
68065856Sjhbstruct witness_blessed {
68165557Sjasone	char 	*b_lock1;
68265557Sjasone	char	*b_lock2;
68365856Sjhb};
68465557Sjasone
68567676Sjhb#ifdef DDB
68665557Sjasone/*
68767676Sjhb * When DDB is enabled and witness_ddb is set to 1, it will cause the system to
68865557Sjasone * drop into kdebug() when:
68965557Sjasone *	- a lock heirarchy violation occurs
69065557Sjasone *	- locks are held when going to sleep.
69165557Sjasone */
69267676Sjhb#ifdef WITNESS_DDB
69367676Sjhbint	witness_ddb = 1;
69467676Sjhb#else
69567676Sjhbint	witness_ddb = 0;
69665557Sjasone#endif
69767676SjhbSYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, "");
69867676Sjhb#endif /* DDB */
69965557Sjasone
70067676Sjhb#ifdef WITNESS_SKIPSPIN
70167676Sjhbint	witness_skipspin = 1;
70267676Sjhb#else
70367676Sjhbint	witness_skipspin = 0;
70465557Sjasone#endif
70567676SjhbSYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RD, &witness_skipspin, 0,
70667676Sjhb    "");
70765557Sjasone
70867676SjhbMUTEX_DECLARE(static,w_mtx);
70965856Sjhbstatic struct witness	*w_free;
71065856Sjhbstatic struct witness	*w_all;
71165856Sjhbstatic int		 w_inited;
71265856Sjhbstatic int		 witness_dead;	/* fatal error, probably no memory */
71365557Sjasone
71465856Sjhbstatic struct witness	 w_data[WITNESS_COUNT];
71565557Sjasone
71667404Sjhbstatic struct witness	 *enroll __P((const char *description, int flag));
71765856Sjhbstatic int itismychild __P((struct witness *parent, struct witness *child));
71865856Sjhbstatic void removechild __P((struct witness *parent, struct witness *child));
71965856Sjhbstatic int isitmychild __P((struct witness *parent, struct witness *child));
72065856Sjhbstatic int isitmydescendant __P((struct witness *parent, struct witness *child));
72165856Sjhbstatic int dup_ok __P((struct witness *));
72265856Sjhbstatic int blessed __P((struct witness *, struct witness *));
72365557Sjasonestatic void witness_displaydescendants
72465856Sjhb    __P((void(*)(const char *fmt, ...), struct witness *));
72565856Sjhbstatic void witness_leveldescendents __P((struct witness *parent, int level));
72665557Sjasonestatic void witness_levelall __P((void));
72765856Sjhbstatic struct witness * witness_get __P((void));
72865856Sjhbstatic void witness_free __P((struct witness *m));
72965557Sjasone
73065557Sjasone
73165557Sjasonestatic char *ignore_list[] = {
73265557Sjasone	"witness lock",
73365557Sjasone	NULL
73465557Sjasone};
73565557Sjasone
73665557Sjasonestatic char *spin_order_list[] = {
73765557Sjasone	"sched lock",
73867676Sjhb	"clk",
73967676Sjhb	"sio",
74065557Sjasone	/*
74165557Sjasone	 * leaf locks
74265557Sjasone	 */
74365557Sjasone	NULL
74465557Sjasone};
74565557Sjasone
74665557Sjasonestatic char *order_list[] = {
74765557Sjasone	NULL
74865557Sjasone};
74965557Sjasone
75065557Sjasonestatic char *dup_list[] = {
75165557Sjasone	NULL
75265557Sjasone};
75365557Sjasone
75465557Sjasonestatic char *sleep_list[] = {
75565557Sjasone	"Giant lock",
75665557Sjasone	NULL
75765557Sjasone};
75865557Sjasone
75965557Sjasone/*
76065557Sjasone * Pairs of locks which have been blessed
76165557Sjasone * Don't complain about order problems with blessed locks
76265557Sjasone */
76365856Sjhbstatic struct witness_blessed blessed_list[] = {
76465557Sjasone};
76565856Sjhbstatic int blessed_count = sizeof(blessed_list) / sizeof(struct witness_blessed);
76665557Sjasone
76765557Sjasonevoid
76865856Sjhbwitness_init(struct mtx *m, int flag)
76965557Sjasone{
77065557Sjasone	m->mtx_witness = enroll(m->mtx_description, flag);
77165557Sjasone}
77265557Sjasone
77365557Sjasonevoid
77465856Sjhbwitness_destroy(struct mtx *m)
77565557Sjasone{
77665856Sjhb	struct mtx *m1;
77765557Sjasone	struct proc *p;
77865557Sjasone	p = CURPROC;
77965557Sjasone	for ((m1 = LIST_FIRST(&p->p_heldmtx)); m1 != NULL;
78065557Sjasone		m1 = LIST_NEXT(m1, mtx_held)) {
78165557Sjasone		if (m1 == m) {
78265557Sjasone			LIST_REMOVE(m, mtx_held);
78365557Sjasone			break;
78465557Sjasone		}
78565557Sjasone	}
78665557Sjasone	return;
78765557Sjasone
78865557Sjasone}
78965557Sjasone
79065557Sjasonevoid
79165856Sjhbwitness_enter(struct mtx *m, int flags, const char *file, int line)
79265557Sjasone{
79365856Sjhb	struct witness *w, *w1;
79465856Sjhb	struct mtx *m1;
79565557Sjasone	struct proc *p;
79665557Sjasone	int i;
79767676Sjhb#ifdef DDB
79867676Sjhb	int go_into_ddb = 0;
79967676Sjhb#endif /* DDB */
80065557Sjasone
80165557Sjasone	w = m->mtx_witness;
80265557Sjasone	p = CURPROC;
80365557Sjasone
80465557Sjasone	if (flags & MTX_SPIN) {
80565557Sjasone		if (!w->w_spin)
80665651Sjasone			panic("mutex_enter: MTX_SPIN on MTX_DEF mutex %s @"
80765651Sjasone			    " %s:%d", m->mtx_description, file, line);
80865557Sjasone		if (m->mtx_recurse != 0)
80965557Sjasone			return;
81065557Sjasone		mtx_enter(&w_mtx, MTX_SPIN);
81165557Sjasone		i = witness_spin_check;
81265557Sjasone		if (i != 0 && w->w_level < i) {
81365557Sjasone			mtx_exit(&w_mtx, MTX_SPIN);
81465651Sjasone			panic("mutex_enter(%s:%x, MTX_SPIN) out of order @"
81565651Sjasone			    " %s:%d already holding %s:%x",
81665557Sjasone			    m->mtx_description, w->w_level, file, line,
81765557Sjasone			    spin_order_list[ffs(i)-1], i);
81865557Sjasone		}
81965557Sjasone		PCPU_SET(witness_spin_check, i | w->w_level);
82065557Sjasone		mtx_exit(&w_mtx, MTX_SPIN);
82165557Sjasone		return;
82265557Sjasone	}
82365557Sjasone	if (w->w_spin)
82465557Sjasone		panic("mutex_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
82565557Sjasone		    m->mtx_description, file, line);
82665557Sjasone
82765557Sjasone	if (m->mtx_recurse != 0)
82865557Sjasone		return;
82965557Sjasone	if (witness_dead)
83065557Sjasone		goto out;
83168785Sjhb	if (cold || panicstr)
83265557Sjasone		goto out;
83365557Sjasone
83465557Sjasone	if (!mtx_legal2block())
83565557Sjasone		panic("blockable mtx_enter() of %s when not legal @ %s:%d",
83665557Sjasone			    m->mtx_description, file, line);
83765557Sjasone	/*
83865557Sjasone	 * Is this the first mutex acquired
83965557Sjasone	 */
84065557Sjasone	if ((m1 = LIST_FIRST(&p->p_heldmtx)) == NULL)
84165557Sjasone		goto out;
84265557Sjasone
84365557Sjasone	if ((w1 = m1->mtx_witness) == w) {
84465557Sjasone		if (w->w_same_squawked || dup_ok(w))
84565557Sjasone			goto out;
84665557Sjasone		w->w_same_squawked = 1;
84765557Sjasone		printf("acquring duplicate lock of same type: \"%s\"\n",
84865557Sjasone			m->mtx_description);
84965557Sjasone		printf(" 1st @ %s:%d\n", w->w_file, w->w_line);
85065557Sjasone		printf(" 2nd @ %s:%d\n", file, line);
85167676Sjhb#ifdef DDB
85267676Sjhb		go_into_ddb = 1;
85367676Sjhb#endif /* DDB */
85465557Sjasone		goto out;
85565557Sjasone	}
85665557Sjasone	MPASS(!mtx_owned(&w_mtx));
85765557Sjasone	mtx_enter(&w_mtx, MTX_SPIN);
85865557Sjasone	/*
85965557Sjasone	 * If we have a known higher number just say ok
86065557Sjasone	 */
86165557Sjasone	if (witness_watch > 1 && w->w_level > w1->w_level) {
86265557Sjasone		mtx_exit(&w_mtx, MTX_SPIN);
86365557Sjasone		goto out;
86465557Sjasone	}
86565557Sjasone	if (isitmydescendant(m1->mtx_witness, w)) {
86665557Sjasone		mtx_exit(&w_mtx, MTX_SPIN);
86765557Sjasone		goto out;
86865557Sjasone	}
86965557Sjasone	for (i = 0; m1 != NULL; m1 = LIST_NEXT(m1, mtx_held), i++) {
87065557Sjasone
87167352Sjhb		MPASS(i < 200);
87265557Sjasone		w1 = m1->mtx_witness;
87365557Sjasone		if (isitmydescendant(w, w1)) {
87465557Sjasone			mtx_exit(&w_mtx, MTX_SPIN);
87565557Sjasone			if (blessed(w, w1))
87665557Sjasone				goto out;
87765557Sjasone			if (m1 == &Giant) {
87865557Sjasone				if (w1->w_Giant_squawked)
87965557Sjasone					goto out;
88065557Sjasone				else
88165557Sjasone					w1->w_Giant_squawked = 1;
88265557Sjasone			} else {
88365557Sjasone				if (w1->w_other_squawked)
88465557Sjasone					goto out;
88565557Sjasone				else
88665557Sjasone					w1->w_other_squawked = 1;
88765557Sjasone			}
88865557Sjasone			printf("lock order reversal\n");
88965557Sjasone			printf(" 1st %s last acquired @ %s:%d\n",
89065557Sjasone			    w->w_description, w->w_file, w->w_line);
89165557Sjasone			printf(" 2nd %p %s @ %s:%d\n",
89265557Sjasone			    m1, w1->w_description, w1->w_file, w1->w_line);
89365557Sjasone			printf(" 3rd %p %s @ %s:%d\n",
89465557Sjasone			    m, w->w_description, file, line);
89567676Sjhb#ifdef DDB
89667676Sjhb			go_into_ddb = 1;
89767676Sjhb#endif /* DDB */
89865557Sjasone			goto out;
89965557Sjasone		}
90065557Sjasone	}
90165557Sjasone	m1 = LIST_FIRST(&p->p_heldmtx);
90265557Sjasone	if (!itismychild(m1->mtx_witness, w))
90365557Sjasone		mtx_exit(&w_mtx, MTX_SPIN);
90465557Sjasone
90565557Sjasoneout:
90667676Sjhb#ifdef DDB
90767676Sjhb	if (witness_ddb && go_into_ddb)
90867676Sjhb		Debugger("witness_enter");
90967676Sjhb#endif /* DDB */
91065557Sjasone	w->w_file = file;
91165557Sjasone	w->w_line = line;
91265557Sjasone	m->mtx_line = line;
91365557Sjasone	m->mtx_file = file;
91465557Sjasone
91565557Sjasone	/*
91668582Sjhb	 * If this pays off it likely means that a mutex being witnessed
91765557Sjasone	 * is acquired in hardclock. Put it in the ignore list. It is
91865557Sjasone	 * likely not the mutex this assert fails on.
91965557Sjasone	 */
92067352Sjhb	MPASS(m->mtx_held.le_prev == NULL);
92165557Sjasone	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
92265557Sjasone}
92365557Sjasone
92465557Sjasonevoid
92565856Sjhbwitness_exit(struct mtx *m, int flags, const char *file, int line)
92665557Sjasone{
92765856Sjhb	struct witness *w;
92865557Sjasone
92965557Sjasone	w = m->mtx_witness;
93065557Sjasone
93165557Sjasone	if (flags & MTX_SPIN) {
93265557Sjasone		if (!w->w_spin)
93365651Sjasone			panic("mutex_exit: MTX_SPIN on MTX_DEF mutex %s @"
93465651Sjasone			    " %s:%d", m->mtx_description, file, line);
93565557Sjasone		if (m->mtx_recurse != 0)
93665557Sjasone			return;
93765557Sjasone		mtx_enter(&w_mtx, MTX_SPIN);
93865557Sjasone		PCPU_SET(witness_spin_check, witness_spin_check & ~w->w_level);
93965557Sjasone		mtx_exit(&w_mtx, MTX_SPIN);
94065557Sjasone		return;
94165557Sjasone	}
94265557Sjasone	if (w->w_spin)
94365557Sjasone		panic("mutex_exit: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
94465557Sjasone		    m->mtx_description, file, line);
94565557Sjasone
94665557Sjasone	if (m->mtx_recurse != 0)
94765557Sjasone		return;
94865557Sjasone
94965557Sjasone	if ((flags & MTX_NOSWITCH) == 0 && !mtx_legal2block() && !cold)
95065557Sjasone		panic("switchable mtx_exit() of %s when not legal @ %s:%d",
95165557Sjasone			    m->mtx_description, file, line);
95265557Sjasone	LIST_REMOVE(m, mtx_held);
95365557Sjasone	m->mtx_held.le_prev = NULL;
95465557Sjasone}
95565557Sjasone
95665557Sjasonevoid
95765856Sjhbwitness_try_enter(struct mtx *m, int flags, const char *file, int line)
95865557Sjasone{
95965557Sjasone	struct proc *p;
96065856Sjhb	struct witness *w = m->mtx_witness;
96165557Sjasone
96265557Sjasone	if (flags & MTX_SPIN) {
96365557Sjasone		if (!w->w_spin)
96465557Sjasone			panic("mutex_try_enter: "
96565557Sjasone			    "MTX_SPIN on MTX_DEF mutex %s @ %s:%d",
96665557Sjasone			    m->mtx_description, file, line);
96765557Sjasone		if (m->mtx_recurse != 0)
96865557Sjasone			return;
96965557Sjasone		mtx_enter(&w_mtx, MTX_SPIN);
97065557Sjasone		PCPU_SET(witness_spin_check, witness_spin_check | w->w_level);
97165557Sjasone		mtx_exit(&w_mtx, MTX_SPIN);
97265557Sjasone		return;
97365557Sjasone	}
97465557Sjasone
97565557Sjasone	if (w->w_spin)
97665557Sjasone		panic("mutex_try_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
97765557Sjasone		    m->mtx_description, file, line);
97865557Sjasone
97965557Sjasone	if (m->mtx_recurse != 0)
98065557Sjasone		return;
98165557Sjasone
98265557Sjasone	w->w_file = file;
98365557Sjasone	w->w_line = line;
98465557Sjasone	m->mtx_line = line;
98565557Sjasone	m->mtx_file = file;
98665557Sjasone	p = CURPROC;
98767352Sjhb	MPASS(m->mtx_held.le_prev == NULL);
98865557Sjasone	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
98965557Sjasone}
99065557Sjasone
99165557Sjasonevoid
99265557Sjasonewitness_display(void(*prnt)(const char *fmt, ...))
99365557Sjasone{
99465856Sjhb	struct witness *w, *w1;
99565557Sjasone
99665557Sjasone	witness_levelall();
99765557Sjasone
99865557Sjasone	for (w = w_all; w; w = w->w_next) {
99965557Sjasone		if (w->w_file == NULL)
100065557Sjasone			continue;
100165557Sjasone		for (w1 = w_all; w1; w1 = w1->w_next) {
100265557Sjasone			if (isitmychild(w1, w))
100365557Sjasone				break;
100465557Sjasone		}
100565557Sjasone		if (w1 != NULL)
100665557Sjasone			continue;
100765557Sjasone		/*
100865557Sjasone		 * This lock has no anscestors, display its descendants.
100965557Sjasone		 */
101065557Sjasone		witness_displaydescendants(prnt, w);
101165557Sjasone	}
101265557Sjasone	prnt("\nMutex which were never acquired\n");
101365557Sjasone	for (w = w_all; w; w = w->w_next) {
101465557Sjasone		if (w->w_file != NULL)
101565557Sjasone			continue;
101665557Sjasone		prnt("%s\n", w->w_description);
101765557Sjasone	}
101865557Sjasone}
101965557Sjasone
102065557Sjasoneint
102165856Sjhbwitness_sleep(int check_only, struct mtx *mtx, const char *file, int line)
102265557Sjasone{
102365856Sjhb	struct mtx *m;
102465557Sjasone	struct proc *p;
102565557Sjasone	char **sleep;
102665557Sjasone	int n = 0;
102765557Sjasone
102865557Sjasone	p = CURPROC;
102965557Sjasone	for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
103065557Sjasone	    m = LIST_NEXT(m, mtx_held)) {
103165557Sjasone		if (m == mtx)
103265557Sjasone			continue;
103365557Sjasone		for (sleep = sleep_list; *sleep!= NULL; sleep++)
103465557Sjasone			if (strcmp(m->mtx_description, *sleep) == 0)
103565557Sjasone				goto next;
103665557Sjasone		printf("%s:%d: %s with \"%s\" locked from %s:%d\n",
103765557Sjasone			file, line, check_only ? "could sleep" : "sleeping",
103865557Sjasone			m->mtx_description,
103965557Sjasone			m->mtx_witness->w_file, m->mtx_witness->w_line);
104065557Sjasone		n++;
104165557Sjasone	next:
104265557Sjasone	}
104367676Sjhb#ifdef DDB
104467676Sjhb	if (witness_ddb && n)
104567676Sjhb		Debugger("witness_sleep");
104667676Sjhb#endif /* DDB */
104765557Sjasone	return (n);
104865557Sjasone}
104965557Sjasone
105065856Sjhbstatic struct witness *
105167404Sjhbenroll(const char *description, int flag)
105265557Sjasone{
105365557Sjasone	int i;
105465856Sjhb	struct witness *w, *w1;
105565557Sjasone	char **ignore;
105665557Sjasone	char **order;
105765557Sjasone
105865557Sjasone	if (!witness_watch)
105965557Sjasone		return (NULL);
106065557Sjasone	for (ignore = ignore_list; *ignore != NULL; ignore++)
106165557Sjasone		if (strcmp(description, *ignore) == 0)
106265557Sjasone			return (NULL);
106365557Sjasone
106465557Sjasone	if (w_inited == 0) {
106567676Sjhb		mtx_init(&w_mtx, "witness lock", MTX_COLD | MTX_DEF);
106665557Sjasone		for (i = 0; i < WITNESS_COUNT; i++) {
106765557Sjasone			w = &w_data[i];
106865557Sjasone			witness_free(w);
106965557Sjasone		}
107065557Sjasone		w_inited = 1;
107165557Sjasone		for (order = order_list; *order != NULL; order++) {
107265557Sjasone			w = enroll(*order, MTX_DEF);
107365557Sjasone			w->w_file = "order list";
107465557Sjasone			for (order++; *order != NULL; order++) {
107565557Sjasone				w1 = enroll(*order, MTX_DEF);
107665557Sjasone				w1->w_file = "order list";
107765557Sjasone				itismychild(w, w1);
107865557Sjasone				w = w1;
107965557Sjasone    	    	    	}
108065557Sjasone		}
108165557Sjasone	}
108265557Sjasone	if ((flag & MTX_SPIN) && witness_skipspin)
108365557Sjasone		return (NULL);
108465557Sjasone	mtx_enter(&w_mtx, MTX_SPIN);
108565557Sjasone	for (w = w_all; w; w = w->w_next) {
108665557Sjasone		if (strcmp(description, w->w_description) == 0) {
108765557Sjasone			mtx_exit(&w_mtx, MTX_SPIN);
108865557Sjasone			return (w);
108965557Sjasone		}
109065557Sjasone	}
109165557Sjasone	if ((w = witness_get()) == NULL)
109265557Sjasone		return (NULL);
109365557Sjasone	w->w_next = w_all;
109465557Sjasone	w_all = w;
109565557Sjasone	w->w_description = description;
109665557Sjasone	mtx_exit(&w_mtx, MTX_SPIN);
109765557Sjasone	if (flag & MTX_SPIN) {
109865557Sjasone		w->w_spin = 1;
109965557Sjasone
110065557Sjasone		i = 1;
110165557Sjasone		for (order = spin_order_list; *order != NULL; order++) {
110265557Sjasone			if (strcmp(description, *order) == 0)
110365557Sjasone				break;
110465557Sjasone			i <<= 1;
110565557Sjasone		}
110665557Sjasone		if (*order == NULL)
110765557Sjasone			panic("spin lock %s not in order list", description);
110865557Sjasone		w->w_level = i;
110965557Sjasone	}
111065557Sjasone	return (w);
111165557Sjasone}
111265557Sjasone
111365557Sjasonestatic int
111465856Sjhbitismychild(struct witness *parent, struct witness *child)
111565557Sjasone{
111665557Sjasone	static int recursed;
111765557Sjasone
111865557Sjasone	/*
111965557Sjasone	 * Insert "child" after "parent"
112065557Sjasone	 */
112165557Sjasone	while (parent->w_morechildren)
112265557Sjasone		parent = parent->w_morechildren;
112365557Sjasone
112465557Sjasone	if (parent->w_childcnt == WITNESS_NCHILDREN) {
112565557Sjasone		if ((parent->w_morechildren = witness_get()) == NULL)
112665557Sjasone			return (1);
112765557Sjasone		parent = parent->w_morechildren;
112865557Sjasone	}
112967352Sjhb	MPASS(child != NULL);
113065557Sjasone	parent->w_children[parent->w_childcnt++] = child;
113165557Sjasone	/*
113265557Sjasone	 * now prune whole tree
113365557Sjasone	 */
113465557Sjasone	if (recursed)
113565557Sjasone		return (0);
113665557Sjasone	recursed = 1;
113765557Sjasone	for (child = w_all; child != NULL; child = child->w_next) {
113865557Sjasone		for (parent = w_all; parent != NULL;
113965557Sjasone		    parent = parent->w_next) {
114065557Sjasone			if (!isitmychild(parent, child))
114165557Sjasone				continue;
114265557Sjasone			removechild(parent, child);
114365557Sjasone			if (isitmydescendant(parent, child))
114465557Sjasone				continue;
114565557Sjasone			itismychild(parent, child);
114665557Sjasone		}
114765557Sjasone	}
114865557Sjasone	recursed = 0;
114965557Sjasone	witness_levelall();
115065557Sjasone	return (0);
115165557Sjasone}
115265557Sjasone
115365557Sjasonestatic void
115465856Sjhbremovechild(struct witness *parent, struct witness *child)
115565557Sjasone{
115665856Sjhb	struct witness *w, *w1;
115765557Sjasone	int i;
115865557Sjasone
115965557Sjasone	for (w = parent; w != NULL; w = w->w_morechildren)
116065557Sjasone		for (i = 0; i < w->w_childcnt; i++)
116165557Sjasone			if (w->w_children[i] == child)
116265557Sjasone				goto found;
116365557Sjasone	return;
116465557Sjasonefound:
116565557Sjasone	for (w1 = w; w1->w_morechildren != NULL; w1 = w1->w_morechildren)
116665557Sjasone		continue;
116765557Sjasone	w->w_children[i] = w1->w_children[--w1->w_childcnt];
116867352Sjhb	MPASS(w->w_children[i] != NULL);
116965557Sjasone
117065557Sjasone	if (w1->w_childcnt != 0)
117165557Sjasone		return;
117265557Sjasone
117365557Sjasone	if (w1 == parent)
117465557Sjasone		return;
117565557Sjasone	for (w = parent; w->w_morechildren != w1; w = w->w_morechildren)
117665557Sjasone		continue;
117765557Sjasone	w->w_morechildren = 0;
117865557Sjasone	witness_free(w1);
117965557Sjasone}
118065557Sjasone
118165557Sjasonestatic int
118265856Sjhbisitmychild(struct witness *parent, struct witness *child)
118365557Sjasone{
118465856Sjhb	struct witness *w;
118565557Sjasone	int i;
118665557Sjasone
118765557Sjasone	for (w = parent; w != NULL; w = w->w_morechildren) {
118865557Sjasone		for (i = 0; i < w->w_childcnt; i++) {
118965557Sjasone			if (w->w_children[i] == child)
119065557Sjasone				return (1);
119165557Sjasone		}
119265557Sjasone	}
119365557Sjasone	return (0);
119465557Sjasone}
119565557Sjasone
119665557Sjasonestatic int
119765856Sjhbisitmydescendant(struct witness *parent, struct witness *child)
119865557Sjasone{
119965856Sjhb	struct witness *w;
120065557Sjasone	int i;
120165557Sjasone	int j;
120265557Sjasone
120365557Sjasone	for (j = 0, w = parent; w != NULL; w = w->w_morechildren, j++) {
120467352Sjhb		MPASS(j < 1000);
120565557Sjasone		for (i = 0; i < w->w_childcnt; i++) {
120665557Sjasone			if (w->w_children[i] == child)
120765557Sjasone				return (1);
120865557Sjasone		}
120965557Sjasone		for (i = 0; i < w->w_childcnt; i++) {
121065557Sjasone			if (isitmydescendant(w->w_children[i], child))
121165557Sjasone				return (1);
121265557Sjasone		}
121365557Sjasone	}
121465557Sjasone	return (0);
121565557Sjasone}
121665557Sjasone
121765557Sjasonevoid
121865557Sjasonewitness_levelall (void)
121965557Sjasone{
122065856Sjhb	struct witness *w, *w1;
122165557Sjasone
122265557Sjasone	for (w = w_all; w; w = w->w_next)
122365557Sjasone		if (!w->w_spin)
122465557Sjasone			w->w_level = 0;
122565557Sjasone	for (w = w_all; w; w = w->w_next) {
122665557Sjasone		if (w->w_spin)
122765557Sjasone			continue;
122865557Sjasone		for (w1 = w_all; w1; w1 = w1->w_next) {
122965557Sjasone			if (isitmychild(w1, w))
123065557Sjasone				break;
123165557Sjasone		}
123265557Sjasone		if (w1 != NULL)
123365557Sjasone			continue;
123465557Sjasone		witness_leveldescendents(w, 0);
123565557Sjasone	}
123665557Sjasone}
123765557Sjasone
123865557Sjasonestatic void
123965856Sjhbwitness_leveldescendents(struct witness *parent, int level)
124065557Sjasone{
124165557Sjasone	int i;
124265856Sjhb	struct witness *w;
124365557Sjasone
124465557Sjasone	if (parent->w_level < level)
124565557Sjasone		parent->w_level = level;
124665557Sjasone	level++;
124765557Sjasone	for (w = parent; w != NULL; w = w->w_morechildren)
124865557Sjasone		for (i = 0; i < w->w_childcnt; i++)
124965557Sjasone			witness_leveldescendents(w->w_children[i], level);
125065557Sjasone}
125165557Sjasone
125265557Sjasonestatic void
125365856Sjhbwitness_displaydescendants(void(*prnt)(const char *fmt, ...),
125465856Sjhb			   struct witness *parent)
125565557Sjasone{
125665856Sjhb	struct witness *w;
125765557Sjasone	int i;
125865557Sjasone	int level = parent->w_level;
125965557Sjasone
126065557Sjasone	prnt("%d", level);
126165557Sjasone	if (level < 10)
126265557Sjasone		prnt(" ");
126365557Sjasone	for (i = 0; i < level; i++)
126465557Sjasone		prnt(" ");
126565557Sjasone	prnt("%s", parent->w_description);
126665557Sjasone	if (parent->w_file != NULL) {
126765557Sjasone		prnt(" -- last acquired @ %s", parent->w_file);
126865557Sjasone#ifndef W_USE_WHERE
126965557Sjasone		prnt(":%d", parent->w_line);
127065557Sjasone#endif
127165557Sjasone		prnt("\n");
127265557Sjasone	}
127365557Sjasone
127465557Sjasone	for (w = parent; w != NULL; w = w->w_morechildren)
127565557Sjasone		for (i = 0; i < w->w_childcnt; i++)
127665557Sjasone			    witness_displaydescendants(prnt, w->w_children[i]);
127765557Sjasone    }
127865557Sjasone
127965557Sjasonestatic int
128065856Sjhbdup_ok(struct witness *w)
128165557Sjasone{
128265557Sjasone	char **dup;
128365557Sjasone
128465557Sjasone	for (dup = dup_list; *dup!= NULL; dup++)
128565557Sjasone		if (strcmp(w->w_description, *dup) == 0)
128665557Sjasone			return (1);
128765557Sjasone	return (0);
128865557Sjasone}
128965557Sjasone
129065557Sjasonestatic int
129165856Sjhbblessed(struct witness *w1, struct witness *w2)
129265557Sjasone{
129365557Sjasone	int i;
129465856Sjhb	struct witness_blessed *b;
129565557Sjasone
129665557Sjasone	for (i = 0; i < blessed_count; i++) {
129765557Sjasone		b = &blessed_list[i];
129865557Sjasone		if (strcmp(w1->w_description, b->b_lock1) == 0) {
129965557Sjasone			if (strcmp(w2->w_description, b->b_lock2) == 0)
130065557Sjasone				return (1);
130165557Sjasone			continue;
130265557Sjasone		}
130365557Sjasone		if (strcmp(w1->w_description, b->b_lock2) == 0)
130465557Sjasone			if (strcmp(w2->w_description, b->b_lock1) == 0)
130565557Sjasone				return (1);
130665557Sjasone	}
130765557Sjasone	return (0);
130865557Sjasone}
130965557Sjasone
131065856Sjhbstatic struct witness *
131165557Sjasonewitness_get()
131265557Sjasone{
131365856Sjhb	struct witness *w;
131465557Sjasone
131565557Sjasone	if ((w = w_free) == NULL) {
131665557Sjasone		witness_dead = 1;
131765557Sjasone		mtx_exit(&w_mtx, MTX_SPIN);
131865557Sjasone		printf("witness exhausted\n");
131965557Sjasone		return (NULL);
132065557Sjasone	}
132165557Sjasone	w_free = w->w_next;
132265856Sjhb	bzero(w, sizeof(*w));
132365557Sjasone	return (w);
132465557Sjasone}
132565557Sjasone
132665557Sjasonestatic void
132765856Sjhbwitness_free(struct witness *w)
132865557Sjasone{
132965557Sjasone	w->w_next = w_free;
133065557Sjasone	w_free = w;
133165557Sjasone}
133265557Sjasone
133365557Sjasonevoid
133465557Sjasonewitness_list(struct proc *p)
133565557Sjasone{
133665856Sjhb	struct mtx *m;
133765557Sjasone
133865557Sjasone	for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
133965557Sjasone	    m = LIST_NEXT(m, mtx_held)) {
134065557Sjasone		printf("\t\"%s\" (%p) locked at %s:%d\n",
134165557Sjasone		    m->mtx_description, m,
134265557Sjasone		    m->mtx_witness->w_file, m->mtx_witness->w_line);
134365557Sjasone	}
134465557Sjasone}
134565557Sjasone
134665557Sjasonevoid
134765856Sjhbwitness_save(struct mtx *m, const char **filep, int *linep)
134865557Sjasone{
134965557Sjasone	*filep = m->mtx_witness->w_file;
135065557Sjasone	*linep = m->mtx_witness->w_line;
135165557Sjasone}
135265557Sjasone
135365557Sjasonevoid
135465856Sjhbwitness_restore(struct mtx *m, const char *file, int line)
135565557Sjasone{
135665557Sjasone	m->mtx_witness->w_file = file;
135765557Sjasone	m->mtx_witness->w_line = line;
135865557Sjasone}
135965557Sjasone
136067352Sjhb#endif	/* (defined(MUTEX_DEBUG) && defined(WITNESS)) */
1361