subr_turnstile.c revision 67676
165557Sjasone/*-
265557Sjasone * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
365557Sjasone *
465557Sjasone * Redistribution and use in source and binary forms, with or without
565557Sjasone * modification, are permitted provided that the following conditions
665557Sjasone * are met:
765557Sjasone * 1. Redistributions of source code must retain the above copyright
865557Sjasone *    notice, this list of conditions and the following disclaimer.
965557Sjasone * 2. Redistributions in binary form must reproduce the above copyright
1065557Sjasone *    notice, this list of conditions and the following disclaimer in the
1165557Sjasone *    documentation and/or other materials provided with the distribution.
1265557Sjasone * 3. Berkeley Software Design Inc's name may not be used to endorse or
1365557Sjasone *    promote products derived from this software without specific prior
1465557Sjasone *    written permission.
1565557Sjasone *
1665557Sjasone * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
1765557Sjasone * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1865557Sjasone * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1965557Sjasone * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
2065557Sjasone * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2165557Sjasone * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2265557Sjasone * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2365557Sjasone * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2465557Sjasone * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2565557Sjasone * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2665557Sjasone * SUCH DAMAGE.
2765557Sjasone *
2865557Sjasone *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
2967352Sjhb *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
3065557Sjasone * $FreeBSD: head/sys/kern/subr_turnstile.c 67676 2000-10-27 02:59:30Z jhb $
3165557Sjasone */
3265557Sjasone
3365557Sjasone/*
3465557Sjasone *	Main Entry: witness
3565557Sjasone *	Pronunciation: 'wit-n&s
3665557Sjasone *	Function: noun
3765557Sjasone *	Etymology: Middle English witnesse, from Old English witnes knowledge,
3865557Sjasone *	    testimony, witness, from 2wit
3965557Sjasone *	Date: before 12th century
4065557Sjasone *	1 : attestation of a fact or event : TESTIMONY
4165557Sjasone *	2 : one that gives evidence; specifically : one who testifies in
4265557Sjasone *	    a cause or before a judicial tribunal
4365557Sjasone *	3 : one asked to be present at a transaction so as to be able to
4465557Sjasone *	    testify to its having taken place
4565557Sjasone *	4 : one who has personal knowledge of something
4665557Sjasone *	5 a : something serving as evidence or proof : SIGN
4765557Sjasone *	  b : public affirmation by word or example of usually
4865557Sjasone *	      religious faith or conviction <the heroic witness to divine
4965557Sjasone *	      life -- Pilot>
5065557Sjasone *	6 capitalized : a member of the Jehovah's Witnesses
5165557Sjasone */
5265557Sjasone
5367676Sjhb#include "opt_witness.h"
5467676Sjhb
5565557Sjasone#include <sys/param.h>
5667352Sjhb#include <sys/bus.h>
5767352Sjhb#include <sys/kernel.h>
5867352Sjhb#include <sys/malloc.h>
5965557Sjasone#include <sys/proc.h>
6067676Sjhb#include <sys/sysctl.h>
6165557Sjasone#include <sys/systm.h>
6267352Sjhb#include <sys/vmmeter.h>
6365557Sjasone#include <sys/ktr.h>
6465557Sjasone
6567352Sjhb#include <machine/atomic.h>
6667352Sjhb#include <machine/bus.h>
6767352Sjhb#include <machine/clock.h>
6865557Sjasone#include <machine/cpu.h>
6967352Sjhb
7067352Sjhb#include <vm/vm.h>
7167352Sjhb#include <vm/vm_extern.h>
7267352Sjhb
7365557Sjasone#define _KERN_MUTEX_C_		/* Cause non-inlined mtx_*() to be compiled. */
7467352Sjhb#include <sys/mutex.h>
7565557Sjasone
7665557Sjasone/*
7767352Sjhb * Machine independent bits of the mutex implementation
7867352Sjhb */
7967352Sjhb/* All mutexes in system (used for debug/panic) */
8067352Sjhb#ifdef MUTEX_DEBUG
8167352Sjhbstatic struct mtx_debug all_mtx_debug = { NULL, {NULL, NULL}, NULL, 0,
8267352Sjhb	"All mutexes queue head" };
8367352Sjhbstatic struct mtx all_mtx = { MTX_UNOWNED, 0, 0, &all_mtx_debug,
8467352Sjhb	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
8567352Sjhb	{ NULL, NULL }, &all_mtx, &all_mtx };
8667352Sjhb#else	/* MUTEX_DEBUG */
8767352Sjhbstatic struct mtx all_mtx = { MTX_UNOWNED, 0, 0, "All mutexes queue head",
8867352Sjhb	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
8967352Sjhb	{ NULL, NULL }, &all_mtx, &all_mtx };
9067352Sjhb#endif	/* MUTEX_DEBUG */
9167352Sjhb
9267352Sjhbstatic int	mtx_cur_cnt;
9367352Sjhbstatic int	mtx_max_cnt;
9467352Sjhb
9567352Sjhbvoid	_mtx_enter_giant_def(void);
9667352Sjhbvoid	_mtx_exit_giant_def(void);
9767352Sjhbstatic void propagate_priority(struct proc *) __unused;
9867352Sjhb
9967352Sjhb#define	mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
10067352Sjhb#define	mtx_owner(m)	(mtx_unowned(m) ? NULL \
10167352Sjhb			    : (struct proc *)((m)->mtx_lock & MTX_FLAGMASK))
10267352Sjhb
10367352Sjhb#define RETIP(x)		*(((uintptr_t *)(&x)) - 1)
10467352Sjhb#define	SET_PRIO(p, pri)	(p)->p_priority = (pri)
10567352Sjhb
10667352Sjhb/*
10767352Sjhb * XXX Temporary, for use from assembly language
10867352Sjhb */
10967352Sjhb
11067352Sjhbvoid
11167352Sjhb_mtx_enter_giant_def(void)
11267352Sjhb{
11367352Sjhb
11467352Sjhb	mtx_enter(&Giant, MTX_DEF);
11567352Sjhb}
11667352Sjhb
11767352Sjhbvoid
11867352Sjhb_mtx_exit_giant_def(void)
11967352Sjhb{
12067352Sjhb
12167352Sjhb	mtx_exit(&Giant, MTX_DEF);
12267352Sjhb}
12367352Sjhb
12467352Sjhbstatic void
12567352Sjhbpropagate_priority(struct proc *p)
12667352Sjhb{
12767352Sjhb	int pri = p->p_priority;
12867352Sjhb	struct mtx *m = p->p_blocked;
12967352Sjhb
13067352Sjhb	for (;;) {
13167352Sjhb		struct proc *p1;
13267352Sjhb
13367352Sjhb		p = mtx_owner(m);
13467352Sjhb
13567352Sjhb		if (p == NULL) {
13667352Sjhb			/*
13767352Sjhb			 * This really isn't quite right. Really
13867352Sjhb			 * ought to bump priority of process that
13967352Sjhb			 * next acquires the mutex.
14067352Sjhb			 */
14167352Sjhb			MPASS(m->mtx_lock == MTX_CONTESTED);
14267352Sjhb			return;
14367352Sjhb		}
14467352Sjhb		MPASS(p->p_magic == P_MAGIC);
14567352Sjhb		if (p->p_priority <= pri)
14667352Sjhb			return;
14767352Sjhb		/*
14867352Sjhb		 * If lock holder is actually running, just bump priority.
14967352Sjhb		 */
15067352Sjhb		if (TAILQ_NEXT(p, p_procq) == NULL) {
15167352Sjhb			MPASS(p->p_stat == SRUN || p->p_stat == SZOMB);
15267352Sjhb			SET_PRIO(p, pri);
15367352Sjhb			return;
15467352Sjhb		}
15567352Sjhb		/*
15667352Sjhb		 * If on run queue move to new run queue, and
15767352Sjhb		 * quit.
15867352Sjhb		 */
15967352Sjhb		if (p->p_stat == SRUN) {
16067352Sjhb			MPASS(p->p_blocked == NULL);
16167352Sjhb			remrunqueue(p);
16267352Sjhb			SET_PRIO(p, pri);
16367352Sjhb			setrunqueue(p);
16467352Sjhb			return;
16567352Sjhb		}
16667352Sjhb
16767352Sjhb		/*
16867352Sjhb		 * If we aren't blocked on a mutex, give up and quit.
16967352Sjhb		 */
17067352Sjhb		if (p->p_stat != SMTX) {
17167352Sjhb			printf(
17267352Sjhb	"XXX: process %d(%s):%d holds %s but isn't blocked on a mutex\n",
17367352Sjhb			    p->p_pid, p->p_comm, p->p_stat, m->mtx_description);
17467352Sjhb			return;
17567352Sjhb		}
17667352Sjhb
17767352Sjhb		/*
17867352Sjhb		 * Pick up the mutex that p is blocked on.
17967352Sjhb		 */
18067352Sjhb		m = p->p_blocked;
18167352Sjhb		MPASS(m != NULL);
18267352Sjhb
18367352Sjhb		printf("XXX: process %d(%s) is blocked on %s\n", p->p_pid,
18467352Sjhb		    p->p_comm, m->mtx_description);
18567352Sjhb		/*
18667352Sjhb		 * Check if the proc needs to be moved up on
18767352Sjhb		 * the blocked chain
18867352Sjhb		 */
18967352Sjhb		if ((p1 = TAILQ_PREV(p, rq, p_procq)) == NULL ||
19067352Sjhb		    p1->p_priority <= pri) {
19167352Sjhb			if (p1)
19267352Sjhb				printf(
19367352Sjhb	"XXX: previous process %d(%s) has higher priority\n",
19467352Sjhb				    p->p_pid, p->p_comm);
19567352Sjhb			else
19667352Sjhb				printf("XXX: process at head of run queue\n");
19767352Sjhb			continue;
19867352Sjhb		}
19967352Sjhb
20067352Sjhb		/*
20167352Sjhb		 * Remove proc from blocked chain
20267352Sjhb		 */
20367352Sjhb		TAILQ_REMOVE(&m->mtx_blocked, p, p_procq);
20467352Sjhb		TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) {
20567352Sjhb			MPASS(p1->p_magic == P_MAGIC);
20667352Sjhb			if (p1->p_priority > pri)
20767352Sjhb				break;
20867352Sjhb		}
20967352Sjhb		if (p1)
21067352Sjhb			TAILQ_INSERT_BEFORE(p1, p, p_procq);
21167352Sjhb		else
21267352Sjhb			TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
21367352Sjhb		CTR4(KTR_LOCK,
21467352Sjhb		    "propagate priority: p 0x%p moved before 0x%p on [0x%p] %s",
21567352Sjhb		    p, p1, m, m->mtx_description);
21667352Sjhb	}
21767352Sjhb}
21867352Sjhb
21967352Sjhbvoid
22067352Sjhbmtx_enter_hard(struct mtx *m, int type, int saveintr)
22167352Sjhb{
22267352Sjhb	struct proc *p = CURPROC;
22367352Sjhb	struct timeval new_switchtime;
22467352Sjhb
22567352Sjhb	KASSERT(p != NULL, ("curproc is NULL in mutex"));
22667352Sjhb
22767352Sjhb	switch (type) {
22867352Sjhb	case MTX_DEF:
22967352Sjhb		if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)p) {
23067352Sjhb			m->mtx_recurse++;
23167352Sjhb			atomic_set_ptr(&m->mtx_lock, MTX_RECURSE);
23267352Sjhb			CTR1(KTR_LOCK, "mtx_enter: 0x%p recurse", m);
23367352Sjhb			return;
23467352Sjhb		}
23567352Sjhb		CTR3(KTR_LOCK, "mtx_enter: 0x%p contested (lock=%p) [0x%p]",
23667548Sjhb		    m, (void *)m->mtx_lock, (void *)RETIP(m));
23767352Sjhb		while (!_obtain_lock(m, p)) {
23867396Sjhb			uintptr_t v;
23967352Sjhb			struct proc *p1;
24067352Sjhb
24167352Sjhb			mtx_enter(&sched_lock, MTX_SPIN | MTX_RLIKELY);
24267352Sjhb			/*
24367352Sjhb			 * check if the lock has been released while
24467352Sjhb			 * waiting for the schedlock.
24567352Sjhb			 */
24667352Sjhb			if ((v = m->mtx_lock) == MTX_UNOWNED) {
24767352Sjhb				mtx_exit(&sched_lock, MTX_SPIN);
24867352Sjhb				continue;
24967352Sjhb			}
25067352Sjhb			/*
25167352Sjhb			 * The mutex was marked contested on release. This
25267352Sjhb			 * means that there are processes blocked on it.
25367352Sjhb			 */
25467352Sjhb			if (v == MTX_CONTESTED) {
25567352Sjhb				p1 = TAILQ_FIRST(&m->mtx_blocked);
25667352Sjhb				KASSERT(p1 != NULL, ("contested mutex has no contesters"));
25767352Sjhb				KASSERT(p != NULL, ("curproc is NULL for contested mutex"));
25867352Sjhb				m->mtx_lock = (uintptr_t)p | MTX_CONTESTED;
25967352Sjhb				if (p1->p_priority < p->p_priority) {
26067352Sjhb					SET_PRIO(p, p1->p_priority);
26167352Sjhb				}
26267352Sjhb				mtx_exit(&sched_lock, MTX_SPIN);
26367352Sjhb				return;
26467352Sjhb			}
26567352Sjhb			/*
26667352Sjhb			 * If the mutex isn't already contested and
26767352Sjhb			 * a failure occurs setting the contested bit the
26867352Sjhb			 * mutex was either release or the
26967352Sjhb			 * state of the RECURSION bit changed.
27067352Sjhb			 */
27167352Sjhb			if ((v & MTX_CONTESTED) == 0 &&
27267352Sjhb			    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
27367352Sjhb				               (void *)(v | MTX_CONTESTED))) {
27467352Sjhb				mtx_exit(&sched_lock, MTX_SPIN);
27567352Sjhb				continue;
27667352Sjhb			}
27767352Sjhb
27867352Sjhb			/* We definitely have to sleep for this lock */
27967352Sjhb			mtx_assert(m, MA_NOTOWNED);
28067352Sjhb
28167352Sjhb#ifdef notyet
28267352Sjhb			/*
28367352Sjhb			 * If we're borrowing an interrupted thread's VM
28467352Sjhb			 * context must clean up before going to sleep.
28567352Sjhb			 */
28667352Sjhb			if (p->p_flag & (P_ITHD | P_SITHD)) {
28767352Sjhb				ithd_t *it = (ithd_t *)p;
28867352Sjhb
28967352Sjhb				if (it->it_interrupted) {
29067352Sjhb					CTR2(KTR_LOCK,
29167352Sjhb					    "mtx_enter: 0x%x interrupted 0x%x",
29267352Sjhb					    it, it->it_interrupted);
29367352Sjhb					intr_thd_fixup(it);
29467352Sjhb				}
29567352Sjhb			}
29667352Sjhb#endif
29767352Sjhb
29867352Sjhb			/* Put us on the list of procs blocked on this mutex */
29967352Sjhb			if (TAILQ_EMPTY(&m->mtx_blocked)) {
30067352Sjhb				p1 = (struct proc *)(m->mtx_lock &
30167352Sjhb						     MTX_FLAGMASK);
30267352Sjhb				LIST_INSERT_HEAD(&p1->p_contested, m,
30367352Sjhb						 mtx_contested);
30467352Sjhb				TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
30567352Sjhb			} else {
30667352Sjhb				TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq)
30767352Sjhb					if (p1->p_priority > p->p_priority)
30867352Sjhb						break;
30967352Sjhb				if (p1)
31067352Sjhb					TAILQ_INSERT_BEFORE(p1, p, p_procq);
31167352Sjhb				else
31267352Sjhb					TAILQ_INSERT_TAIL(&m->mtx_blocked, p,
31367352Sjhb							  p_procq);
31467352Sjhb			}
31567352Sjhb
31667352Sjhb			p->p_blocked = m;	/* Who we're blocked on */
31767352Sjhb			p->p_stat = SMTX;
31867352Sjhb#if 0
31967352Sjhb			propagate_priority(p);
32067352Sjhb#endif
32167352Sjhb			CTR3(KTR_LOCK, "mtx_enter: p 0x%p blocked on [0x%p] %s",
32267352Sjhb			    p, m, m->mtx_description);
32367352Sjhb			/*
32467352Sjhb			 * Blatantly copied from mi_switch nearly verbatim.
32567352Sjhb			 * When Giant goes away and we stop dinking with it
32667352Sjhb			 * in mi_switch, we can go back to calling mi_switch
32767352Sjhb			 * directly here.
32867352Sjhb			 */
32967352Sjhb
33067352Sjhb			/*
33167352Sjhb			 * Compute the amount of time during which the current
33267352Sjhb			 * process was running, and add that to its total so
33367352Sjhb			 * far.
33467352Sjhb			 */
33567352Sjhb			microuptime(&new_switchtime);
33667352Sjhb			if (timevalcmp(&new_switchtime, &switchtime, <)) {
33767352Sjhb				printf(
33867352Sjhb		    "microuptime() went backwards (%ld.%06ld -> %ld.%06ld)\n",
33967352Sjhb		    		    switchtime.tv_sec, switchtime.tv_usec,
34067352Sjhb		    		    new_switchtime.tv_sec,
34167352Sjhb		    		    new_switchtime.tv_usec);
34267352Sjhb				new_switchtime = switchtime;
34367352Sjhb			} else {
34467352Sjhb				p->p_runtime += (new_switchtime.tv_usec -
34567352Sjhb				    switchtime.tv_usec) +
34667352Sjhb				    (new_switchtime.tv_sec - switchtime.tv_sec) *
34767352Sjhb				    (int64_t)1000000;
34867352Sjhb			}
34967352Sjhb
35067352Sjhb			/*
35167352Sjhb			 * Pick a new current process and record its start time.
35267352Sjhb			 */
35367352Sjhb			cnt.v_swtch++;
35467352Sjhb			switchtime = new_switchtime;
35567352Sjhb			cpu_switch();
35667352Sjhb			if (switchtime.tv_sec == 0)
35767352Sjhb				microuptime(&switchtime);
35867352Sjhb			switchticks = ticks;
35967352Sjhb			CTR3(KTR_LOCK,
36067352Sjhb			    "mtx_enter: p 0x%p free from blocked on [0x%p] %s",
36167352Sjhb			    p, m, m->mtx_description);
36267352Sjhb			mtx_exit(&sched_lock, MTX_SPIN);
36367352Sjhb		}
36467352Sjhb		return;
36567352Sjhb	case MTX_SPIN:
36667352Sjhb	case MTX_SPIN | MTX_FIRST:
36767352Sjhb	case MTX_SPIN | MTX_TOPHALF:
36867352Sjhb	    {
36967352Sjhb		int i = 0;
37067352Sjhb
37167352Sjhb		if (m->mtx_lock == (uintptr_t)p) {
37267352Sjhb			m->mtx_recurse++;
37367352Sjhb			return;
37467352Sjhb		}
37567352Sjhb		CTR1(KTR_LOCK, "mtx_enter: %p spinning", m);
37667352Sjhb		for (;;) {
37767352Sjhb			if (_obtain_lock(m, p))
37867352Sjhb				break;
37967352Sjhb			while (m->mtx_lock != MTX_UNOWNED) {
38067352Sjhb				if (i++ < 1000000)
38167352Sjhb					continue;
38267352Sjhb				if (i++ < 6000000)
38367352Sjhb					DELAY (1);
38467352Sjhb#ifdef DDB
38567352Sjhb				else if (!db_active)
38667352Sjhb#else
38767352Sjhb				else
38867352Sjhb#endif
38967352Sjhb					panic(
39067352Sjhb				"spin lock %s held by 0x%p for > 5 seconds",
39167352Sjhb					    m->mtx_description,
39267352Sjhb					    (void *)m->mtx_lock);
39367352Sjhb			}
39467352Sjhb		}
39567352Sjhb
39667352Sjhb#ifdef MUTEX_DEBUG
39767352Sjhb		if (type != MTX_SPIN)
39867352Sjhb			m->mtx_saveintr = 0xbeefface;
39967352Sjhb		else
40067352Sjhb#endif
40167352Sjhb			m->mtx_saveintr = saveintr;
40267352Sjhb		CTR1(KTR_LOCK, "mtx_enter: 0x%p spin done", m);
40367352Sjhb		return;
40467352Sjhb	    }
40567352Sjhb	}
40667352Sjhb}
40767352Sjhb
40867352Sjhbvoid
40967352Sjhbmtx_exit_hard(struct mtx *m, int type)
41067352Sjhb{
41167352Sjhb	struct proc *p, *p1;
41267352Sjhb	struct mtx *m1;
41367352Sjhb	int pri;
41467352Sjhb
41567352Sjhb	p = CURPROC;
41667352Sjhb	switch (type) {
41767352Sjhb	case MTX_DEF:
41867352Sjhb	case MTX_DEF | MTX_NOSWITCH:
41967352Sjhb		if (m->mtx_recurse != 0) {
42067352Sjhb			if (--(m->mtx_recurse) == 0)
42167352Sjhb				atomic_clear_ptr(&m->mtx_lock, MTX_RECURSE);
42267352Sjhb			CTR1(KTR_LOCK, "mtx_exit: 0x%p unrecurse", m);
42367352Sjhb			return;
42467352Sjhb		}
42567352Sjhb		mtx_enter(&sched_lock, MTX_SPIN);
42667352Sjhb		CTR1(KTR_LOCK, "mtx_exit: 0x%p contested", m);
42767352Sjhb		p1 = TAILQ_FIRST(&m->mtx_blocked);
42867352Sjhb		MPASS(p->p_magic == P_MAGIC);
42967352Sjhb		MPASS(p1->p_magic == P_MAGIC);
43067352Sjhb		TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq);
43167352Sjhb		if (TAILQ_EMPTY(&m->mtx_blocked)) {
43267352Sjhb			LIST_REMOVE(m, mtx_contested);
43367352Sjhb			_release_lock_quick(m);
43467352Sjhb			CTR1(KTR_LOCK, "mtx_exit: 0x%p not held", m);
43567352Sjhb		} else
43667352Sjhb			m->mtx_lock = MTX_CONTESTED;
43767352Sjhb		pri = MAXPRI;
43867352Sjhb		LIST_FOREACH(m1, &p->p_contested, mtx_contested) {
43967352Sjhb			int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_priority;
44067352Sjhb			if (cp < pri)
44167352Sjhb				pri = cp;
44267352Sjhb		}
44367352Sjhb		if (pri > p->p_nativepri)
44467352Sjhb			pri = p->p_nativepri;
44567352Sjhb		SET_PRIO(p, pri);
44667352Sjhb		CTR2(KTR_LOCK, "mtx_exit: 0x%p contested setrunqueue 0x%p",
44767352Sjhb		    m, p1);
44867352Sjhb		p1->p_blocked = NULL;
44967352Sjhb		p1->p_stat = SRUN;
45067352Sjhb		setrunqueue(p1);
45167352Sjhb		if ((type & MTX_NOSWITCH) == 0 && p1->p_priority < pri) {
45267352Sjhb#ifdef notyet
45367352Sjhb			if (p->p_flag & (P_ITHD | P_SITHD)) {
45467352Sjhb				ithd_t *it = (ithd_t *)p;
45567352Sjhb
45667352Sjhb				if (it->it_interrupted) {
45767352Sjhb					CTR2(KTR_LOCK,
45867352Sjhb					    "mtx_exit: 0x%x interruped 0x%x",
45967352Sjhb					    it, it->it_interrupted);
46067352Sjhb					intr_thd_fixup(it);
46167352Sjhb				}
46267352Sjhb			}
46367352Sjhb#endif
46467352Sjhb			setrunqueue(p);
46567352Sjhb			CTR2(KTR_LOCK, "mtx_exit: 0x%p switching out lock=0x%p",
46667548Sjhb			    m, (void *)m->mtx_lock);
46767352Sjhb			mi_switch();
46867352Sjhb			CTR2(KTR_LOCK, "mtx_exit: 0x%p resuming lock=0x%p",
46967548Sjhb			    m, (void *)m->mtx_lock);
47067352Sjhb		}
47167352Sjhb		mtx_exit(&sched_lock, MTX_SPIN);
47267352Sjhb		break;
47367352Sjhb	case MTX_SPIN:
47467352Sjhb	case MTX_SPIN | MTX_FIRST:
47567352Sjhb		if (m->mtx_recurse != 0) {
47667352Sjhb			m->mtx_recurse--;
47767352Sjhb			return;
47867352Sjhb		}
47967352Sjhb		MPASS(mtx_owned(m));
48067352Sjhb		_release_lock_quick(m);
48167352Sjhb		if (type & MTX_FIRST)
48267352Sjhb			enable_intr();	/* XXX is this kosher? */
48367352Sjhb		else {
48467352Sjhb			MPASS(m->mtx_saveintr != 0xbeefface);
48567352Sjhb			restore_intr(m->mtx_saveintr);
48667352Sjhb		}
48767352Sjhb		break;
48867352Sjhb	case MTX_SPIN | MTX_TOPHALF:
48967352Sjhb		if (m->mtx_recurse != 0) {
49067352Sjhb			m->mtx_recurse--;
49167352Sjhb			return;
49267352Sjhb		}
49367352Sjhb		MPASS(mtx_owned(m));
49467352Sjhb		_release_lock_quick(m);
49567352Sjhb		break;
49667352Sjhb	default:
49767352Sjhb		panic("mtx_exit_hard: unsupported type 0x%x\n", type);
49867352Sjhb	}
49967352Sjhb}
50067352Sjhb
50167352Sjhb#define MV_DESTROY	0	/* validate before destory */
50267352Sjhb#define MV_INIT		1	/* validate before init */
50367352Sjhb
50467352Sjhb#ifdef MUTEX_DEBUG
50567352Sjhb
50667352Sjhbint mtx_validate __P((struct mtx *, int));
50767352Sjhb
50867352Sjhbint
50967352Sjhbmtx_validate(struct mtx *m, int when)
51067352Sjhb{
51167352Sjhb	struct mtx *mp;
51267352Sjhb	int i;
51367352Sjhb	int retval = 0;
51467352Sjhb
51567352Sjhb	if (m == &all_mtx || cold)
51667352Sjhb		return 0;
51767352Sjhb
51867352Sjhb	mtx_enter(&all_mtx, MTX_DEF);
51967352Sjhb/*
52067352Sjhb * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
52167352Sjhb * we can re-enable the kernacc() checks.
52267352Sjhb */
52367352Sjhb#ifndef __alpha__
52467352Sjhb	MPASS(kernacc((caddr_t)all_mtx.mtx_next, sizeof(uintptr_t),
52567352Sjhb	    VM_PROT_READ) == 1);
52667352Sjhb#endif
52767352Sjhb	MPASS(all_mtx.mtx_next->mtx_prev == &all_mtx);
52867352Sjhb	for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
52967352Sjhb#ifndef __alpha__
53067352Sjhb		if (kernacc((caddr_t)mp->mtx_next, sizeof(uintptr_t),
53167352Sjhb		    VM_PROT_READ) != 1) {
53267352Sjhb			panic("mtx_validate: mp=%p mp->mtx_next=%p",
53367352Sjhb			    mp, mp->mtx_next);
53467352Sjhb		}
53567352Sjhb#endif
53667352Sjhb		i++;
53767352Sjhb		if (i > mtx_cur_cnt) {
53867352Sjhb			panic("mtx_validate: too many in chain, known=%d\n",
53967352Sjhb			    mtx_cur_cnt);
54067352Sjhb		}
54167352Sjhb	}
54267352Sjhb	MPASS(i == mtx_cur_cnt);
54367352Sjhb	switch (when) {
54467352Sjhb	case MV_DESTROY:
54567352Sjhb		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
54667352Sjhb			if (mp == m)
54767352Sjhb				break;
54867352Sjhb		MPASS(mp == m);
54967352Sjhb		break;
55067352Sjhb	case MV_INIT:
55167352Sjhb		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
55267352Sjhb		if (mp == m) {
55367352Sjhb			/*
55467352Sjhb			 * Not good. This mutex already exists.
55567352Sjhb			 */
55667352Sjhb			printf("re-initing existing mutex %s\n",
55767352Sjhb			    m->mtx_description);
55867352Sjhb			MPASS(m->mtx_lock == MTX_UNOWNED);
55967352Sjhb			retval = 1;
56067352Sjhb		}
56167352Sjhb	}
56267352Sjhb	mtx_exit(&all_mtx, MTX_DEF);
56367352Sjhb	return (retval);
56467352Sjhb}
56567352Sjhb#endif
56667352Sjhb
56767352Sjhbvoid
56867352Sjhbmtx_init(struct mtx *m, const char *t, int flag)
56967352Sjhb{
57067352Sjhb#ifdef MUTEX_DEBUG
57167352Sjhb	struct mtx_debug *debug;
57267352Sjhb#endif
57367352Sjhb
57467352Sjhb	CTR2(KTR_LOCK, "mtx_init 0x%p (%s)", m, t);
57567352Sjhb#ifdef MUTEX_DEBUG
57667352Sjhb	if (mtx_validate(m, MV_INIT))	/* diagnostic and error correction */
57767352Sjhb		return;
57867352Sjhb	if (flag & MTX_COLD)
57967352Sjhb		debug = m->mtx_debug;
58067352Sjhb	else
58167352Sjhb		debug = NULL;
58267352Sjhb	if (debug == NULL) {
58367352Sjhb#ifdef DIAGNOSTIC
58467352Sjhb		if(cold && bootverbose)
58567352Sjhb			printf("malloc'ing mtx_debug while cold for %s\n", t);
58667352Sjhb#endif
58767352Sjhb
58867352Sjhb		/* XXX - should not use DEVBUF */
58967352Sjhb		debug = malloc(sizeof(struct mtx_debug), M_DEVBUF, M_NOWAIT);
59067352Sjhb		MPASS(debug != NULL);
59167352Sjhb		bzero(debug, sizeof(struct mtx_debug));
59267352Sjhb	}
59367352Sjhb#endif
59467352Sjhb	bzero((void *)m, sizeof *m);
59567352Sjhb	TAILQ_INIT(&m->mtx_blocked);
59667352Sjhb#ifdef MUTEX_DEBUG
59767352Sjhb	m->mtx_debug = debug;
59867352Sjhb#endif
59967352Sjhb	m->mtx_description = t;
60067352Sjhb	m->mtx_lock = MTX_UNOWNED;
60167352Sjhb	/* Put on all mutex queue */
60267352Sjhb	mtx_enter(&all_mtx, MTX_DEF);
60367352Sjhb	m->mtx_next = &all_mtx;
60467352Sjhb	m->mtx_prev = all_mtx.mtx_prev;
60567352Sjhb	m->mtx_prev->mtx_next = m;
60667352Sjhb	all_mtx.mtx_prev = m;
60767352Sjhb	if (++mtx_cur_cnt > mtx_max_cnt)
60867352Sjhb		mtx_max_cnt = mtx_cur_cnt;
60967352Sjhb	mtx_exit(&all_mtx, MTX_DEF);
61067352Sjhb	witness_init(m, flag);
61167352Sjhb}
61267352Sjhb
61367352Sjhbvoid
61467352Sjhbmtx_destroy(struct mtx *m)
61567352Sjhb{
61667352Sjhb
61767352Sjhb	CTR2(KTR_LOCK, "mtx_destroy 0x%p (%s)", m, m->mtx_description);
61867352Sjhb#ifdef MUTEX_DEBUG
61967352Sjhb	if (m->mtx_next == NULL)
62067352Sjhb		panic("mtx_destroy: %p (%s) already destroyed",
62167352Sjhb		    m, m->mtx_description);
62267352Sjhb
62367352Sjhb	if (!mtx_owned(m)) {
62467352Sjhb		MPASS(m->mtx_lock == MTX_UNOWNED);
62567352Sjhb	} else {
62667352Sjhb		MPASS((m->mtx_lock & (MTX_RECURSE|MTX_CONTESTED)) == 0);
62767352Sjhb	}
62867352Sjhb	mtx_validate(m, MV_DESTROY);		/* diagnostic */
62967352Sjhb#endif
63067352Sjhb
63167352Sjhb#ifdef WITNESS
63267352Sjhb	if (m->mtx_witness)
63367352Sjhb		witness_destroy(m);
63467352Sjhb#endif /* WITNESS */
63567352Sjhb
63667352Sjhb	/* Remove from the all mutex queue */
63767352Sjhb	mtx_enter(&all_mtx, MTX_DEF);
63867352Sjhb	m->mtx_next->mtx_prev = m->mtx_prev;
63967352Sjhb	m->mtx_prev->mtx_next = m->mtx_next;
64067352Sjhb#ifdef MUTEX_DEBUG
64167352Sjhb	m->mtx_next = m->mtx_prev = NULL;
64267352Sjhb	free(m->mtx_debug, M_DEVBUF);
64367352Sjhb	m->mtx_debug = NULL;
64467352Sjhb#endif
64567352Sjhb	mtx_cur_cnt--;
64667352Sjhb	mtx_exit(&all_mtx, MTX_DEF);
64767352Sjhb}
64867352Sjhb
64967352Sjhb/*
65065557Sjasone * The non-inlined versions of the mtx_*() functions are always built (above),
65167352Sjhb * but the witness code depends on the MUTEX_DEBUG and WITNESS kernel options
65265557Sjasone * being specified.
65365557Sjasone */
65467352Sjhb#if (defined(MUTEX_DEBUG) && defined(WITNESS))
65565557Sjasone
65665557Sjasone#define WITNESS_COUNT 200
65765557Sjasone#define	WITNESS_NCHILDREN 2
65865557Sjasone
65965557Sjasone#ifndef SMP
66065557Sjasoneextern int witness_spin_check;
66165557Sjasone#endif
66265557Sjasone
66367401Sjhbint witness_watch = 1;
66465557Sjasone
66565856Sjhbstruct witness {
66665557Sjasone	struct witness	*w_next;
66767404Sjhb	const char	*w_description;
66865624Sjasone	const char	*w_file;
66965557Sjasone	int		 w_line;
67065557Sjasone	struct witness	*w_morechildren;
67165557Sjasone	u_char		 w_childcnt;
67265557Sjasone	u_char		 w_Giant_squawked:1;
67365557Sjasone	u_char		 w_other_squawked:1;
67465557Sjasone	u_char		 w_same_squawked:1;
67565557Sjasone	u_char		 w_sleep:1;
67665557Sjasone	u_char		 w_spin:1;	/* this is a spin mutex */
67765557Sjasone	u_int		 w_level;
67865557Sjasone	struct witness	*w_children[WITNESS_NCHILDREN];
67965856Sjhb};
68065557Sjasone
68165856Sjhbstruct witness_blessed {
68265557Sjasone	char 	*b_lock1;
68365557Sjasone	char	*b_lock2;
68465856Sjhb};
68565557Sjasone
68667676Sjhb#ifdef DDB
68765557Sjasone/*
68867676Sjhb * When DDB is enabled and witness_ddb is set to 1, it will cause the system to
68965557Sjasone * drop into kdebug() when:
69065557Sjasone *	- a lock heirarchy violation occurs
69165557Sjasone *	- locks are held when going to sleep.
69265557Sjasone */
69367676Sjhb#ifdef WITNESS_DDB
69467676Sjhbint	witness_ddb = 1;
69567676Sjhb#else
69667676Sjhbint	witness_ddb = 0;
69765557Sjasone#endif
69867676SjhbSYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, "");
69967676Sjhb#endif /* DDB */
70065557Sjasone
70167676Sjhb#ifdef WITNESS_SKIPSPIN
70267676Sjhbint	witness_skipspin = 1;
70367676Sjhb#else
70467676Sjhbint	witness_skipspin = 0;
70565557Sjasone#endif
70667676SjhbSYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RD, &witness_skipspin, 0,
70767676Sjhb    "");
70865557Sjasone
70967676SjhbMUTEX_DECLARE(static,w_mtx);
71065856Sjhbstatic struct witness	*w_free;
71165856Sjhbstatic struct witness	*w_all;
71265856Sjhbstatic int		 w_inited;
71365856Sjhbstatic int		 witness_dead;	/* fatal error, probably no memory */
71465557Sjasone
71565856Sjhbstatic struct witness	 w_data[WITNESS_COUNT];
71665557Sjasone
71767404Sjhbstatic struct witness	 *enroll __P((const char *description, int flag));
71865856Sjhbstatic int itismychild __P((struct witness *parent, struct witness *child));
71965856Sjhbstatic void removechild __P((struct witness *parent, struct witness *child));
72065856Sjhbstatic int isitmychild __P((struct witness *parent, struct witness *child));
72165856Sjhbstatic int isitmydescendant __P((struct witness *parent, struct witness *child));
72265856Sjhbstatic int dup_ok __P((struct witness *));
72365856Sjhbstatic int blessed __P((struct witness *, struct witness *));
72465557Sjasonestatic void witness_displaydescendants
72565856Sjhb    __P((void(*)(const char *fmt, ...), struct witness *));
72665856Sjhbstatic void witness_leveldescendents __P((struct witness *parent, int level));
72765557Sjasonestatic void witness_levelall __P((void));
72865856Sjhbstatic struct witness * witness_get __P((void));
72965856Sjhbstatic void witness_free __P((struct witness *m));
73065557Sjasone
73165557Sjasone
73265557Sjasonestatic char *ignore_list[] = {
73365557Sjasone	"witness lock",
73465557Sjasone	NULL
73565557Sjasone};
73665557Sjasone
73765557Sjasonestatic char *spin_order_list[] = {
73865557Sjasone	"sched lock",
73967676Sjhb	"clk",
74067676Sjhb	"sio",
74165557Sjasone	/*
74265557Sjasone	 * leaf locks
74365557Sjasone	 */
74465557Sjasone	NULL
74565557Sjasone};
74665557Sjasone
74765557Sjasonestatic char *order_list[] = {
74865557Sjasone	NULL
74965557Sjasone};
75065557Sjasone
75165557Sjasonestatic char *dup_list[] = {
75265557Sjasone	NULL
75365557Sjasone};
75465557Sjasone
75565557Sjasonestatic char *sleep_list[] = {
75665557Sjasone	"Giant lock",
75765557Sjasone	NULL
75865557Sjasone};
75965557Sjasone
76065557Sjasone/*
76165557Sjasone * Pairs of locks which have been blessed
76265557Sjasone * Don't complain about order problems with blessed locks
76365557Sjasone */
76465856Sjhbstatic struct witness_blessed blessed_list[] = {
76565557Sjasone};
76665856Sjhbstatic int blessed_count = sizeof(blessed_list) / sizeof(struct witness_blessed);
76765557Sjasone
76865557Sjasonevoid
76965856Sjhbwitness_init(struct mtx *m, int flag)
77065557Sjasone{
77165557Sjasone	m->mtx_witness = enroll(m->mtx_description, flag);
77265557Sjasone}
77365557Sjasone
77465557Sjasonevoid
77565856Sjhbwitness_destroy(struct mtx *m)
77665557Sjasone{
77765856Sjhb	struct mtx *m1;
77865557Sjasone	struct proc *p;
77965557Sjasone	p = CURPROC;
78065557Sjasone	for ((m1 = LIST_FIRST(&p->p_heldmtx)); m1 != NULL;
78165557Sjasone		m1 = LIST_NEXT(m1, mtx_held)) {
78265557Sjasone		if (m1 == m) {
78365557Sjasone			LIST_REMOVE(m, mtx_held);
78465557Sjasone			break;
78565557Sjasone		}
78665557Sjasone	}
78765557Sjasone	return;
78865557Sjasone
78965557Sjasone}
79065557Sjasone
79165557Sjasonevoid
79265856Sjhbwitness_enter(struct mtx *m, int flags, const char *file, int line)
79365557Sjasone{
79465856Sjhb	struct witness *w, *w1;
79565856Sjhb	struct mtx *m1;
79665557Sjasone	struct proc *p;
79765557Sjasone	int i;
79867676Sjhb#ifdef DDB
79967676Sjhb	int go_into_ddb = 0;
80067676Sjhb#endif /* DDB */
80165557Sjasone
80265557Sjasone	w = m->mtx_witness;
80365557Sjasone	p = CURPROC;
80465557Sjasone
80565557Sjasone	if (flags & MTX_SPIN) {
80665557Sjasone		if (!w->w_spin)
80765651Sjasone			panic("mutex_enter: MTX_SPIN on MTX_DEF mutex %s @"
80865651Sjasone			    " %s:%d", m->mtx_description, file, line);
80965557Sjasone		if (m->mtx_recurse != 0)
81065557Sjasone			return;
81165557Sjasone		mtx_enter(&w_mtx, MTX_SPIN);
81265557Sjasone		i = witness_spin_check;
81365557Sjasone		if (i != 0 && w->w_level < i) {
81465557Sjasone			mtx_exit(&w_mtx, MTX_SPIN);
81565651Sjasone			panic("mutex_enter(%s:%x, MTX_SPIN) out of order @"
81665651Sjasone			    " %s:%d already holding %s:%x",
81765557Sjasone			    m->mtx_description, w->w_level, file, line,
81865557Sjasone			    spin_order_list[ffs(i)-1], i);
81965557Sjasone		}
82065557Sjasone		PCPU_SET(witness_spin_check, i | w->w_level);
82165557Sjasone		mtx_exit(&w_mtx, MTX_SPIN);
82265557Sjasone		return;
82365557Sjasone	}
82465557Sjasone	if (w->w_spin)
82565557Sjasone		panic("mutex_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
82665557Sjasone		    m->mtx_description, file, line);
82765557Sjasone
82865557Sjasone	if (m->mtx_recurse != 0)
82965557Sjasone		return;
83065557Sjasone	if (witness_dead)
83165557Sjasone		goto out;
83265557Sjasone	if (cold)
83365557Sjasone		goto out;
83465557Sjasone
83565557Sjasone	if (!mtx_legal2block())
83665557Sjasone		panic("blockable mtx_enter() of %s when not legal @ %s:%d",
83765557Sjasone			    m->mtx_description, file, line);
83865557Sjasone	/*
83965557Sjasone	 * Is this the first mutex acquired
84065557Sjasone	 */
84165557Sjasone	if ((m1 = LIST_FIRST(&p->p_heldmtx)) == NULL)
84265557Sjasone		goto out;
84365557Sjasone
84465557Sjasone	if ((w1 = m1->mtx_witness) == w) {
84565557Sjasone		if (w->w_same_squawked || dup_ok(w))
84665557Sjasone			goto out;
84765557Sjasone		w->w_same_squawked = 1;
84865557Sjasone		printf("acquring duplicate lock of same type: \"%s\"\n",
84965557Sjasone			m->mtx_description);
85065557Sjasone		printf(" 1st @ %s:%d\n", w->w_file, w->w_line);
85165557Sjasone		printf(" 2nd @ %s:%d\n", file, line);
85267676Sjhb#ifdef DDB
85367676Sjhb		go_into_ddb = 1;
85467676Sjhb#endif /* DDB */
85565557Sjasone		goto out;
85665557Sjasone	}
85765557Sjasone	MPASS(!mtx_owned(&w_mtx));
85865557Sjasone	mtx_enter(&w_mtx, MTX_SPIN);
85965557Sjasone	/*
86065557Sjasone	 * If we have a known higher number just say ok
86165557Sjasone	 */
86265557Sjasone	if (witness_watch > 1 && w->w_level > w1->w_level) {
86365557Sjasone		mtx_exit(&w_mtx, MTX_SPIN);
86465557Sjasone		goto out;
86565557Sjasone	}
86665557Sjasone	if (isitmydescendant(m1->mtx_witness, w)) {
86765557Sjasone		mtx_exit(&w_mtx, MTX_SPIN);
86865557Sjasone		goto out;
86965557Sjasone	}
87065557Sjasone	for (i = 0; m1 != NULL; m1 = LIST_NEXT(m1, mtx_held), i++) {
87165557Sjasone
87267352Sjhb		MPASS(i < 200);
87365557Sjasone		w1 = m1->mtx_witness;
87465557Sjasone		if (isitmydescendant(w, w1)) {
87565557Sjasone			mtx_exit(&w_mtx, MTX_SPIN);
87665557Sjasone			if (blessed(w, w1))
87765557Sjasone				goto out;
87865557Sjasone			if (m1 == &Giant) {
87965557Sjasone				if (w1->w_Giant_squawked)
88065557Sjasone					goto out;
88165557Sjasone				else
88265557Sjasone					w1->w_Giant_squawked = 1;
88365557Sjasone			} else {
88465557Sjasone				if (w1->w_other_squawked)
88565557Sjasone					goto out;
88665557Sjasone				else
88765557Sjasone					w1->w_other_squawked = 1;
88865557Sjasone			}
88965557Sjasone			printf("lock order reversal\n");
89065557Sjasone			printf(" 1st %s last acquired @ %s:%d\n",
89165557Sjasone			    w->w_description, w->w_file, w->w_line);
89265557Sjasone			printf(" 2nd %p %s @ %s:%d\n",
89365557Sjasone			    m1, w1->w_description, w1->w_file, w1->w_line);
89465557Sjasone			printf(" 3rd %p %s @ %s:%d\n",
89565557Sjasone			    m, w->w_description, file, line);
89667676Sjhb#ifdef DDB
89767676Sjhb			go_into_ddb = 1;
89867676Sjhb#endif /* DDB */
89965557Sjasone			goto out;
90065557Sjasone		}
90165557Sjasone	}
90265557Sjasone	m1 = LIST_FIRST(&p->p_heldmtx);
90365557Sjasone	if (!itismychild(m1->mtx_witness, w))
90465557Sjasone		mtx_exit(&w_mtx, MTX_SPIN);
90565557Sjasone
90665557Sjasoneout:
90767676Sjhb#ifdef DDB
90867676Sjhb	if (witness_ddb && go_into_ddb)
90967676Sjhb		Debugger("witness_enter");
91067676Sjhb#endif /* DDB */
91165557Sjasone	w->w_file = file;
91265557Sjasone	w->w_line = line;
91365557Sjasone	m->mtx_line = line;
91465557Sjasone	m->mtx_file = file;
91565557Sjasone
91665557Sjasone	/*
91765557Sjasone	 * If this pays off it likely means that a mutex  being witnessed
91865557Sjasone	 * is acquired in hardclock. Put it in the ignore list. It is
91965557Sjasone	 * likely not the mutex this assert fails on.
92065557Sjasone	 */
92167352Sjhb	MPASS(m->mtx_held.le_prev == NULL);
92265557Sjasone	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
92365557Sjasone}
92465557Sjasone
92565557Sjasonevoid
92665856Sjhbwitness_exit(struct mtx *m, int flags, const char *file, int line)
92765557Sjasone{
92865856Sjhb	struct witness *w;
92965557Sjasone
93065557Sjasone	w = m->mtx_witness;
93165557Sjasone
93265557Sjasone	if (flags & MTX_SPIN) {
93365557Sjasone		if (!w->w_spin)
93465651Sjasone			panic("mutex_exit: MTX_SPIN on MTX_DEF mutex %s @"
93565651Sjasone			    " %s:%d", m->mtx_description, file, line);
93665557Sjasone		if (m->mtx_recurse != 0)
93765557Sjasone			return;
93865557Sjasone		mtx_enter(&w_mtx, MTX_SPIN);
93965557Sjasone		PCPU_SET(witness_spin_check, witness_spin_check & ~w->w_level);
94065557Sjasone		mtx_exit(&w_mtx, MTX_SPIN);
94165557Sjasone		return;
94265557Sjasone	}
94365557Sjasone	if (w->w_spin)
94465557Sjasone		panic("mutex_exit: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
94565557Sjasone		    m->mtx_description, file, line);
94665557Sjasone
94765557Sjasone	if (m->mtx_recurse != 0)
94865557Sjasone		return;
94965557Sjasone
95065557Sjasone	if ((flags & MTX_NOSWITCH) == 0 && !mtx_legal2block() && !cold)
95165557Sjasone		panic("switchable mtx_exit() of %s when not legal @ %s:%d",
95265557Sjasone			    m->mtx_description, file, line);
95365557Sjasone	LIST_REMOVE(m, mtx_held);
95465557Sjasone	m->mtx_held.le_prev = NULL;
95565557Sjasone}
95665557Sjasone
95765557Sjasonevoid
95865856Sjhbwitness_try_enter(struct mtx *m, int flags, const char *file, int line)
95965557Sjasone{
96065557Sjasone	struct proc *p;
96165856Sjhb	struct witness *w = m->mtx_witness;
96265557Sjasone
96365557Sjasone	if (flags & MTX_SPIN) {
96465557Sjasone		if (!w->w_spin)
96565557Sjasone			panic("mutex_try_enter: "
96665557Sjasone			    "MTX_SPIN on MTX_DEF mutex %s @ %s:%d",
96765557Sjasone			    m->mtx_description, file, line);
96865557Sjasone		if (m->mtx_recurse != 0)
96965557Sjasone			return;
97065557Sjasone		mtx_enter(&w_mtx, MTX_SPIN);
97165557Sjasone		PCPU_SET(witness_spin_check, witness_spin_check | w->w_level);
97265557Sjasone		mtx_exit(&w_mtx, MTX_SPIN);
97365557Sjasone		return;
97465557Sjasone	}
97565557Sjasone
97665557Sjasone	if (w->w_spin)
97765557Sjasone		panic("mutex_try_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
97865557Sjasone		    m->mtx_description, file, line);
97965557Sjasone
98065557Sjasone	if (m->mtx_recurse != 0)
98165557Sjasone		return;
98265557Sjasone
98365557Sjasone	w->w_file = file;
98465557Sjasone	w->w_line = line;
98565557Sjasone	m->mtx_line = line;
98665557Sjasone	m->mtx_file = file;
98765557Sjasone	p = CURPROC;
98867352Sjhb	MPASS(m->mtx_held.le_prev == NULL);
98965557Sjasone	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
99065557Sjasone}
99165557Sjasone
99265557Sjasonevoid
99365557Sjasonewitness_display(void(*prnt)(const char *fmt, ...))
99465557Sjasone{
99565856Sjhb	struct witness *w, *w1;
99665557Sjasone
99765557Sjasone	witness_levelall();
99865557Sjasone
99965557Sjasone	for (w = w_all; w; w = w->w_next) {
100065557Sjasone		if (w->w_file == NULL)
100165557Sjasone			continue;
100265557Sjasone		for (w1 = w_all; w1; w1 = w1->w_next) {
100365557Sjasone			if (isitmychild(w1, w))
100465557Sjasone				break;
100565557Sjasone		}
100665557Sjasone		if (w1 != NULL)
100765557Sjasone			continue;
100865557Sjasone		/*
100965557Sjasone		 * This lock has no anscestors, display its descendants.
101065557Sjasone		 */
101165557Sjasone		witness_displaydescendants(prnt, w);
101265557Sjasone	}
101365557Sjasone	prnt("\nMutex which were never acquired\n");
101465557Sjasone	for (w = w_all; w; w = w->w_next) {
101565557Sjasone		if (w->w_file != NULL)
101665557Sjasone			continue;
101765557Sjasone		prnt("%s\n", w->w_description);
101865557Sjasone	}
101965557Sjasone}
102065557Sjasone
102165557Sjasoneint
102265856Sjhbwitness_sleep(int check_only, struct mtx *mtx, const char *file, int line)
102365557Sjasone{
102465856Sjhb	struct mtx *m;
102565557Sjasone	struct proc *p;
102665557Sjasone	char **sleep;
102765557Sjasone	int n = 0;
102865557Sjasone
102965557Sjasone	p = CURPROC;
103065557Sjasone	for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
103165557Sjasone	    m = LIST_NEXT(m, mtx_held)) {
103265557Sjasone		if (m == mtx)
103365557Sjasone			continue;
103465557Sjasone		for (sleep = sleep_list; *sleep!= NULL; sleep++)
103565557Sjasone			if (strcmp(m->mtx_description, *sleep) == 0)
103665557Sjasone				goto next;
103765557Sjasone		printf("%s:%d: %s with \"%s\" locked from %s:%d\n",
103865557Sjasone			file, line, check_only ? "could sleep" : "sleeping",
103965557Sjasone			m->mtx_description,
104065557Sjasone			m->mtx_witness->w_file, m->mtx_witness->w_line);
104165557Sjasone		n++;
104265557Sjasone	next:
104365557Sjasone	}
104467676Sjhb#ifdef DDB
104567676Sjhb	if (witness_ddb && n)
104667676Sjhb		Debugger("witness_sleep");
104767676Sjhb#endif /* DDB */
104865557Sjasone	return (n);
104965557Sjasone}
105065557Sjasone
105165856Sjhbstatic struct witness *
105267404Sjhbenroll(const char *description, int flag)
105365557Sjasone{
105465557Sjasone	int i;
105565856Sjhb	struct witness *w, *w1;
105665557Sjasone	char **ignore;
105765557Sjasone	char **order;
105865557Sjasone
105965557Sjasone	if (!witness_watch)
106065557Sjasone		return (NULL);
106165557Sjasone	for (ignore = ignore_list; *ignore != NULL; ignore++)
106265557Sjasone		if (strcmp(description, *ignore) == 0)
106365557Sjasone			return (NULL);
106465557Sjasone
106565557Sjasone	if (w_inited == 0) {
106667676Sjhb		mtx_init(&w_mtx, "witness lock", MTX_COLD | MTX_DEF);
106765557Sjasone		for (i = 0; i < WITNESS_COUNT; i++) {
106865557Sjasone			w = &w_data[i];
106965557Sjasone			witness_free(w);
107065557Sjasone		}
107165557Sjasone		w_inited = 1;
107265557Sjasone		for (order = order_list; *order != NULL; order++) {
107365557Sjasone			w = enroll(*order, MTX_DEF);
107465557Sjasone			w->w_file = "order list";
107565557Sjasone			for (order++; *order != NULL; order++) {
107665557Sjasone				w1 = enroll(*order, MTX_DEF);
107765557Sjasone				w1->w_file = "order list";
107865557Sjasone				itismychild(w, w1);
107965557Sjasone				w = w1;
108065557Sjasone    	    	    	}
108165557Sjasone		}
108265557Sjasone	}
108365557Sjasone	if ((flag & MTX_SPIN) && witness_skipspin)
108465557Sjasone		return (NULL);
108565557Sjasone	mtx_enter(&w_mtx, MTX_SPIN);
108665557Sjasone	for (w = w_all; w; w = w->w_next) {
108765557Sjasone		if (strcmp(description, w->w_description) == 0) {
108865557Sjasone			mtx_exit(&w_mtx, MTX_SPIN);
108965557Sjasone			return (w);
109065557Sjasone		}
109165557Sjasone	}
109265557Sjasone	if ((w = witness_get()) == NULL)
109365557Sjasone		return (NULL);
109465557Sjasone	w->w_next = w_all;
109565557Sjasone	w_all = w;
109665557Sjasone	w->w_description = description;
109765557Sjasone	mtx_exit(&w_mtx, MTX_SPIN);
109865557Sjasone	if (flag & MTX_SPIN) {
109965557Sjasone		w->w_spin = 1;
110065557Sjasone
110165557Sjasone		i = 1;
110265557Sjasone		for (order = spin_order_list; *order != NULL; order++) {
110365557Sjasone			if (strcmp(description, *order) == 0)
110465557Sjasone				break;
110565557Sjasone			i <<= 1;
110665557Sjasone		}
110765557Sjasone		if (*order == NULL)
110865557Sjasone			panic("spin lock %s not in order list", description);
110965557Sjasone		w->w_level = i;
111065557Sjasone	}
111165557Sjasone	return (w);
111265557Sjasone}
111365557Sjasone
111465557Sjasonestatic int
111565856Sjhbitismychild(struct witness *parent, struct witness *child)
111665557Sjasone{
111765557Sjasone	static int recursed;
111865557Sjasone
111965557Sjasone	/*
112065557Sjasone	 * Insert "child" after "parent"
112165557Sjasone	 */
112265557Sjasone	while (parent->w_morechildren)
112365557Sjasone		parent = parent->w_morechildren;
112465557Sjasone
112565557Sjasone	if (parent->w_childcnt == WITNESS_NCHILDREN) {
112665557Sjasone		if ((parent->w_morechildren = witness_get()) == NULL)
112765557Sjasone			return (1);
112865557Sjasone		parent = parent->w_morechildren;
112965557Sjasone	}
113067352Sjhb	MPASS(child != NULL);
113165557Sjasone	parent->w_children[parent->w_childcnt++] = child;
113265557Sjasone	/*
113365557Sjasone	 * now prune whole tree
113465557Sjasone	 */
113565557Sjasone	if (recursed)
113665557Sjasone		return (0);
113765557Sjasone	recursed = 1;
113865557Sjasone	for (child = w_all; child != NULL; child = child->w_next) {
113965557Sjasone		for (parent = w_all; parent != NULL;
114065557Sjasone		    parent = parent->w_next) {
114165557Sjasone			if (!isitmychild(parent, child))
114265557Sjasone				continue;
114365557Sjasone			removechild(parent, child);
114465557Sjasone			if (isitmydescendant(parent, child))
114565557Sjasone				continue;
114665557Sjasone			itismychild(parent, child);
114765557Sjasone		}
114865557Sjasone	}
114965557Sjasone	recursed = 0;
115065557Sjasone	witness_levelall();
115165557Sjasone	return (0);
115265557Sjasone}
115365557Sjasone
115465557Sjasonestatic void
115565856Sjhbremovechild(struct witness *parent, struct witness *child)
115665557Sjasone{
115765856Sjhb	struct witness *w, *w1;
115865557Sjasone	int i;
115965557Sjasone
116065557Sjasone	for (w = parent; w != NULL; w = w->w_morechildren)
116165557Sjasone		for (i = 0; i < w->w_childcnt; i++)
116265557Sjasone			if (w->w_children[i] == child)
116365557Sjasone				goto found;
116465557Sjasone	return;
116565557Sjasonefound:
116665557Sjasone	for (w1 = w; w1->w_morechildren != NULL; w1 = w1->w_morechildren)
116765557Sjasone		continue;
116865557Sjasone	w->w_children[i] = w1->w_children[--w1->w_childcnt];
116967352Sjhb	MPASS(w->w_children[i] != NULL);
117065557Sjasone
117165557Sjasone	if (w1->w_childcnt != 0)
117265557Sjasone		return;
117365557Sjasone
117465557Sjasone	if (w1 == parent)
117565557Sjasone		return;
117665557Sjasone	for (w = parent; w->w_morechildren != w1; w = w->w_morechildren)
117765557Sjasone		continue;
117865557Sjasone	w->w_morechildren = 0;
117965557Sjasone	witness_free(w1);
118065557Sjasone}
118165557Sjasone
118265557Sjasonestatic int
118365856Sjhbisitmychild(struct witness *parent, struct witness *child)
118465557Sjasone{
118565856Sjhb	struct witness *w;
118665557Sjasone	int i;
118765557Sjasone
118865557Sjasone	for (w = parent; w != NULL; w = w->w_morechildren) {
118965557Sjasone		for (i = 0; i < w->w_childcnt; i++) {
119065557Sjasone			if (w->w_children[i] == child)
119165557Sjasone				return (1);
119265557Sjasone		}
119365557Sjasone	}
119465557Sjasone	return (0);
119565557Sjasone}
119665557Sjasone
119765557Sjasonestatic int
119865856Sjhbisitmydescendant(struct witness *parent, struct witness *child)
119965557Sjasone{
120065856Sjhb	struct witness *w;
120165557Sjasone	int i;
120265557Sjasone	int j;
120365557Sjasone
120465557Sjasone	for (j = 0, w = parent; w != NULL; w = w->w_morechildren, j++) {
120567352Sjhb		MPASS(j < 1000);
120665557Sjasone		for (i = 0; i < w->w_childcnt; i++) {
120765557Sjasone			if (w->w_children[i] == child)
120865557Sjasone				return (1);
120965557Sjasone		}
121065557Sjasone		for (i = 0; i < w->w_childcnt; i++) {
121165557Sjasone			if (isitmydescendant(w->w_children[i], child))
121265557Sjasone				return (1);
121365557Sjasone		}
121465557Sjasone	}
121565557Sjasone	return (0);
121665557Sjasone}
121765557Sjasone
121865557Sjasonevoid
121965557Sjasonewitness_levelall (void)
122065557Sjasone{
122165856Sjhb	struct witness *w, *w1;
122265557Sjasone
122365557Sjasone	for (w = w_all; w; w = w->w_next)
122465557Sjasone		if (!w->w_spin)
122565557Sjasone			w->w_level = 0;
122665557Sjasone	for (w = w_all; w; w = w->w_next) {
122765557Sjasone		if (w->w_spin)
122865557Sjasone			continue;
122965557Sjasone		for (w1 = w_all; w1; w1 = w1->w_next) {
123065557Sjasone			if (isitmychild(w1, w))
123165557Sjasone				break;
123265557Sjasone		}
123365557Sjasone		if (w1 != NULL)
123465557Sjasone			continue;
123565557Sjasone		witness_leveldescendents(w, 0);
123665557Sjasone	}
123765557Sjasone}
123865557Sjasone
123965557Sjasonestatic void
124065856Sjhbwitness_leveldescendents(struct witness *parent, int level)
124165557Sjasone{
124265557Sjasone	int i;
124365856Sjhb	struct witness *w;
124465557Sjasone
124565557Sjasone	if (parent->w_level < level)
124665557Sjasone		parent->w_level = level;
124765557Sjasone	level++;
124865557Sjasone	for (w = parent; w != NULL; w = w->w_morechildren)
124965557Sjasone		for (i = 0; i < w->w_childcnt; i++)
125065557Sjasone			witness_leveldescendents(w->w_children[i], level);
125165557Sjasone}
125265557Sjasone
125365557Sjasonestatic void
125465856Sjhbwitness_displaydescendants(void(*prnt)(const char *fmt, ...),
125565856Sjhb			   struct witness *parent)
125665557Sjasone{
125765856Sjhb	struct witness *w;
125865557Sjasone	int i;
125965557Sjasone	int level = parent->w_level;
126065557Sjasone
126165557Sjasone	prnt("%d", level);
126265557Sjasone	if (level < 10)
126365557Sjasone		prnt(" ");
126465557Sjasone	for (i = 0; i < level; i++)
126565557Sjasone		prnt(" ");
126665557Sjasone	prnt("%s", parent->w_description);
126765557Sjasone	if (parent->w_file != NULL) {
126865557Sjasone		prnt(" -- last acquired @ %s", parent->w_file);
126965557Sjasone#ifndef W_USE_WHERE
127065557Sjasone		prnt(":%d", parent->w_line);
127165557Sjasone#endif
127265557Sjasone		prnt("\n");
127365557Sjasone	}
127465557Sjasone
127565557Sjasone	for (w = parent; w != NULL; w = w->w_morechildren)
127665557Sjasone		for (i = 0; i < w->w_childcnt; i++)
127765557Sjasone			    witness_displaydescendants(prnt, w->w_children[i]);
127865557Sjasone    }
127965557Sjasone
128065557Sjasonestatic int
128165856Sjhbdup_ok(struct witness *w)
128265557Sjasone{
128365557Sjasone	char **dup;
128465557Sjasone
128565557Sjasone	for (dup = dup_list; *dup!= NULL; dup++)
128665557Sjasone		if (strcmp(w->w_description, *dup) == 0)
128765557Sjasone			return (1);
128865557Sjasone	return (0);
128965557Sjasone}
129065557Sjasone
129165557Sjasonestatic int
129265856Sjhbblessed(struct witness *w1, struct witness *w2)
129365557Sjasone{
129465557Sjasone	int i;
129565856Sjhb	struct witness_blessed *b;
129665557Sjasone
129765557Sjasone	for (i = 0; i < blessed_count; i++) {
129865557Sjasone		b = &blessed_list[i];
129965557Sjasone		if (strcmp(w1->w_description, b->b_lock1) == 0) {
130065557Sjasone			if (strcmp(w2->w_description, b->b_lock2) == 0)
130165557Sjasone				return (1);
130265557Sjasone			continue;
130365557Sjasone		}
130465557Sjasone		if (strcmp(w1->w_description, b->b_lock2) == 0)
130565557Sjasone			if (strcmp(w2->w_description, b->b_lock1) == 0)
130665557Sjasone				return (1);
130765557Sjasone	}
130865557Sjasone	return (0);
130965557Sjasone}
131065557Sjasone
131165856Sjhbstatic struct witness *
131265557Sjasonewitness_get()
131365557Sjasone{
131465856Sjhb	struct witness *w;
131565557Sjasone
131665557Sjasone	if ((w = w_free) == NULL) {
131765557Sjasone		witness_dead = 1;
131865557Sjasone		mtx_exit(&w_mtx, MTX_SPIN);
131965557Sjasone		printf("witness exhausted\n");
132065557Sjasone		return (NULL);
132165557Sjasone	}
132265557Sjasone	w_free = w->w_next;
132365856Sjhb	bzero(w, sizeof(*w));
132465557Sjasone	return (w);
132565557Sjasone}
132665557Sjasone
132765557Sjasonestatic void
132865856Sjhbwitness_free(struct witness *w)
132965557Sjasone{
133065557Sjasone	w->w_next = w_free;
133165557Sjasone	w_free = w;
133265557Sjasone}
133365557Sjasone
133465557Sjasonevoid
133565557Sjasonewitness_list(struct proc *p)
133665557Sjasone{
133765856Sjhb	struct mtx *m;
133865557Sjasone
133965557Sjasone	for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
134065557Sjasone	    m = LIST_NEXT(m, mtx_held)) {
134165557Sjasone		printf("\t\"%s\" (%p) locked at %s:%d\n",
134265557Sjasone		    m->mtx_description, m,
134365557Sjasone		    m->mtx_witness->w_file, m->mtx_witness->w_line);
134465557Sjasone	}
134565557Sjasone}
134665557Sjasone
134765557Sjasonevoid
134865856Sjhbwitness_save(struct mtx *m, const char **filep, int *linep)
134965557Sjasone{
135065557Sjasone	*filep = m->mtx_witness->w_file;
135165557Sjasone	*linep = m->mtx_witness->w_line;
135265557Sjasone}
135365557Sjasone
135465557Sjasonevoid
135565856Sjhbwitness_restore(struct mtx *m, const char *file, int line)
135665557Sjasone{
135765557Sjasone	m->mtx_witness->w_file = file;
135865557Sjasone	m->mtx_witness->w_line = line;
135965557Sjasone}
136065557Sjasone
136167352Sjhb#endif	/* (defined(MUTEX_DEBUG) && defined(WITNESS)) */
1362