subr_turnstile.c revision 71560
1123992Ssobomax/*-
2103026Ssobomax * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
3103026Ssobomax *
4139823Simp * Redistribution and use in source and binary forms, with or without
5103026Ssobomax * modification, are permitted provided that the following conditions
6103026Ssobomax * are met:
7103026Ssobomax * 1. Redistributions of source code must retain the above copyright
8103026Ssobomax *    notice, this list of conditions and the following disclaimer.
9103026Ssobomax * 2. Redistributions in binary form must reproduce the above copyright
10103026Ssobomax *    notice, this list of conditions and the following disclaimer in the
11103026Ssobomax *    documentation and/or other materials provided with the distribution.
12103026Ssobomax * 3. Berkeley Software Design Inc's name may not be used to endorse or
13103026Ssobomax *    promote products derived from this software without specific prior
14103026Ssobomax *    written permission.
15103026Ssobomax *
16103026Ssobomax * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
17103026Ssobomax * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18103026Ssobomax * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19103026Ssobomax * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
20103026Ssobomax * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21103026Ssobomax * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22103026Ssobomax * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23103026Ssobomax * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24103026Ssobomax * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25103026Ssobomax * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26103026Ssobomax * SUCH DAMAGE.
27103026Ssobomax *
28103026Ssobomax *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
29103026Ssobomax *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
30103026Ssobomax * $FreeBSD: head/sys/kern/subr_turnstile.c 71560 2001-01-24 10:57:01Z jhb $
31103026Ssobomax */
32103026Ssobomax
33103026Ssobomax/*
34103026Ssobomax *	Main Entry: witness
35103026Ssobomax *	Pronunciation: 'wit-n&s
36103026Ssobomax *	Function: noun
37103026Ssobomax *	Etymology: Middle English witnesse, from Old English witnes knowledge,
38103026Ssobomax *	    testimony, witness, from 2wit
39103026Ssobomax *	Date: before 12th century
40103026Ssobomax *	1 : attestation of a fact or event : TESTIMONY
41103026Ssobomax *	2 : one that gives evidence; specifically : one who testifies in
42103026Ssobomax *	    a cause or before a judicial tribunal
43103026Ssobomax *	3 : one asked to be present at a transaction so as to be able to
44103026Ssobomax *	    testify to its having taken place
45103026Ssobomax *	4 : one who has personal knowledge of something
46103026Ssobomax *	5 a : something serving as evidence or proof : SIGN
47103026Ssobomax *	  b : public affirmation by word or example of usually
48103026Ssobomax *	      religious faith or conviction <the heroic witness to divine
49103394Sbde *	      life -- Pilot>
50103026Ssobomax *	6 capitalized : a member of the Jehovah's Witnesses
51122699Sbms */
52103026Ssobomax
53103026Ssobomax#include "opt_ddb.h"
54103026Ssobomax#include "opt_witness.h"
55103026Ssobomax
56129880Sphk/*
57103026Ssobomax * Cause non-inlined mtx_*() to be compiled.
58103026Ssobomax * Must be defined early because other system headers may include mutex.h.
59103026Ssobomax */
60103026Ssobomax#define _KERN_MUTEX_C_
61103026Ssobomax
62103344Sbde#include <sys/param.h>
63103026Ssobomax#include <sys/bus.h>
64103026Ssobomax#include <sys/kernel.h>
65103026Ssobomax#include <sys/malloc.h>
66130933Sbrooks#include <sys/proc.h>
67103026Ssobomax#include <sys/sysctl.h>
68103026Ssobomax#include <sys/systm.h>
69103026Ssobomax#include <sys/vmmeter.h>
70103026Ssobomax#include <sys/ktr.h>
71103026Ssobomax
72103026Ssobomax#include <machine/atomic.h>
73103026Ssobomax#include <machine/bus.h>
74103026Ssobomax#include <machine/clock.h>
75103026Ssobomax#include <machine/cpu.h>
76103026Ssobomax
77103026Ssobomax#include <ddb/ddb.h>
78103026Ssobomax
79103026Ssobomax#include <vm/vm.h>
80103026Ssobomax#include <vm/vm_extern.h>
81103026Ssobomax
82103026Ssobomax#include <sys/mutex.h>
83103026Ssobomax
84103026Ssobomax/*
85103026Ssobomax * Machine independent bits of the mutex implementation
86103026Ssobomax */
87103026Ssobomax
88103026Ssobomax#ifdef WITNESS
89103026Ssobomaxstruct mtx_debug {
90103026Ssobomax	struct witness	*mtxd_witness;
91103026Ssobomax	LIST_ENTRY(mtx)	mtxd_held;
92103026Ssobomax	const char	*mtxd_file;
93103026Ssobomax	int		mtxd_line;
94103026Ssobomax};
95103026Ssobomax
96127307Srwatson#define mtx_held	mtx_debug->mtxd_held
97127307Srwatson#define	mtx_file	mtx_debug->mtxd_file
98127307Srwatson#define	mtx_line	mtx_debug->mtxd_line
99127307Srwatson#define	mtx_witness	mtx_debug->mtxd_witness
100127307Srwatson#endif	/* WITNESS */
101103026Ssobomax
102103026Ssobomax/*
103103026Ssobomax * Assembly macros
104103026Ssobomax *------------------------------------------------------------------------------
105105300Salfred */
106105300Salfred
107103032Ssobomax#define	_V(x)	__STRING(x)
108103032Ssobomax
109103032Ssobomax/*
110103026Ssobomax * Default, unoptimized mutex micro-operations
111130933Sbrooks */
112103026Ssobomax
113103032Ssobomax#ifndef _obtain_lock
114103026Ssobomax/* Actually obtain mtx_lock */
115105300Salfred#define _obtain_lock(mp, tid)						\
116103026Ssobomax	atomic_cmpset_acq_ptr(&(mp)->mtx_lock, (void *)MTX_UNOWNED, (tid))
117103026Ssobomax#endif
118103026Ssobomax
119103026Ssobomax#ifndef _release_lock
120103026Ssobomax/* Actually release mtx_lock */
121103026Ssobomax#define _release_lock(mp, tid)		       				\
122103026Ssobomax	atomic_cmpset_rel_ptr(&(mp)->mtx_lock, (tid), (void *)MTX_UNOWNED)
123103026Ssobomax#endif
124103026Ssobomax
125103026Ssobomax#ifndef _release_lock_quick
126103026Ssobomax/* Actually release mtx_lock quickly assuming that we own it */
127103026Ssobomax#define	_release_lock_quick(mp) 					\
128103026Ssobomax	atomic_store_rel_ptr(&(mp)->mtx_lock, (void *)MTX_UNOWNED)
129103026Ssobomax#endif
130103026Ssobomax
131103026Ssobomax#ifndef _getlock_sleep
132103026Ssobomax/* Get a sleep lock, deal with recursion inline. */
133103026Ssobomax#define	_getlock_sleep(mp, tid, type) do {				\
134103026Ssobomax	if (!_obtain_lock(mp, tid)) {					\
135103026Ssobomax		if (((mp)->mtx_lock & MTX_FLAGMASK) != ((uintptr_t)(tid)))\
136123338Sbms			mtx_enter_hard(mp, (type) & MTX_HARDOPTS, 0);	\
137103026Ssobomax		else {							\
138103026Ssobomax			atomic_set_ptr(&(mp)->mtx_lock, MTX_RECURSED);	\
139103026Ssobomax			(mp)->mtx_recurse++;				\
140103026Ssobomax		}							\
141103026Ssobomax	}								\
142103026Ssobomax} while (0)
143103026Ssobomax#endif
144103026Ssobomax
145103026Ssobomax#ifndef _getlock_spin_block
146103026Ssobomax/* Get a spin lock, handle recursion inline (as the less common case) */
147103026Ssobomax#define	_getlock_spin_block(mp, tid, type) do {				\
148103026Ssobomax	u_int _mtx_intr = save_intr();					\
149103026Ssobomax	disable_intr();							\
150103026Ssobomax	if (!_obtain_lock(mp, tid))					\
151103026Ssobomax		mtx_enter_hard(mp, (type) & MTX_HARDOPTS, _mtx_intr);	\
152103026Ssobomax	else								\
153103026Ssobomax		(mp)->mtx_saveintr = _mtx_intr;				\
154103032Ssobomax} while (0)
155103026Ssobomax#endif
156103026Ssobomax
157103026Ssobomax#ifndef _getlock_norecurse
158127307Srwatson/*
159103026Ssobomax * Get a lock without any recursion handling. Calls the hard enter function if
160103026Ssobomax * we can't get it inline.
161103026Ssobomax */
162103026Ssobomax#define	_getlock_norecurse(mp, tid, type) do {				\
163103032Ssobomax	if (!_obtain_lock(mp, tid))					\
164103026Ssobomax		mtx_enter_hard((mp), (type) & MTX_HARDOPTS, 0);		\
165103026Ssobomax} while (0)
166103026Ssobomax#endif
167103026Ssobomax
168103026Ssobomax#ifndef _exitlock_norecurse
169103026Ssobomax/*
170131673Sbms * Release a sleep lock assuming we haven't recursed on it, recursion is handled
171103026Ssobomax * in the hard function.
172147256Sbrooks */
173147256Sbrooks#define	_exitlock_norecurse(mp, tid, type) do {				\
174147256Sbrooks	if (!_release_lock(mp, tid))					\
175147256Sbrooks		mtx_exit_hard((mp), (type) & MTX_HARDOPTS);		\
176147256Sbrooks} while (0)
177147256Sbrooks#endif
178147256Sbrooks
179147256Sbrooks#ifndef _exitlock
180147256Sbrooks/*
181147256Sbrooks * Release a sleep lock when its likely we recursed (the code to
182103026Ssobomax * deal with simple recursion is inline).
183103026Ssobomax */
184147256Sbrooks#define	_exitlock(mp, tid, type) do {					\
185103026Ssobomax	if (!_release_lock(mp, tid)) {					\
186103026Ssobomax		if ((mp)->mtx_lock & MTX_RECURSED) {			\
187125024Ssobomax			if (--((mp)->mtx_recurse) == 0)			\
188147256Sbrooks				atomic_clear_ptr(&(mp)->mtx_lock,	\
189147256Sbrooks				    MTX_RECURSED);			\
190127307Srwatson		} else {						\
191103026Ssobomax			mtx_exit_hard((mp), (type) & MTX_HARDOPTS);	\
192127307Srwatson		}							\
193103026Ssobomax	}								\
194103026Ssobomax} while (0)
195103026Ssobomax#endif
196103032Ssobomax
197127307Srwatson#ifndef _exitlock_spin
198103026Ssobomax/* Release a spin lock (with possible recursion). */
199103026Ssobomax#define	_exitlock_spin(mp) do {						\
200103026Ssobomax	if (!mtx_recursed((mp))) {					\
201103026Ssobomax		int _mtx_intr = (mp)->mtx_saveintr;			\
202103026Ssobomax									\
203103026Ssobomax		_release_lock_quick(mp);				\
204147256Sbrooks		restore_intr(_mtx_intr);				\
205147256Sbrooks	} else {							\
206147256Sbrooks		(mp)->mtx_recurse--;					\
207103026Ssobomax	}								\
208103026Ssobomax} while (0)
209103026Ssobomax#endif
210127307Srwatson
211127307Srwatson#ifdef WITNESS
212127307Srwatsonstatic void	witness_init(struct mtx *, int flag);
213127307Srwatsonstatic void	witness_destroy(struct mtx *);
214127307Srwatsonstatic void	witness_display(void(*)(const char *fmt, ...));
215127307Srwatson
216127307Srwatson/* All mutexes in system (used for debug/panic) */
217127307Srwatsonstatic struct mtx_debug all_mtx_debug = { NULL, {NULL, NULL}, NULL, 0 };
218127307Srwatson/*
219127307Srwatson * Set to 0 once mutexes have been fully initialized so that witness code can be
220127307Srwatson * safely executed.
221127307Srwatson */
222103026Ssobomaxstatic int witness_cold = 1;
223103026Ssobomax#else	/* WITNESS */
224103026Ssobomax
225103026Ssobomax/*
226103032Ssobomax * flag++ is slezoid way of shutting up unused parameter warning
227103026Ssobomax * in mtx_init()
228103026Ssobomax */
229103026Ssobomax#define witness_init(m, flag) flag++
230103026Ssobomax#define witness_destroy(m)
231103026Ssobomax#define witness_try_enter(m, t, f, l)
232103026Ssobomax#endif	/* WITNESS */
233103026Ssobomax
234123992Ssobomax/* All mutexes in system (used for debug/panic) */
235103026Ssobomaxstatic struct mtx all_mtx = { MTX_UNOWNED, 0, 0, 0, "All mutexes queue head",
236103026Ssobomax	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
237103026Ssobomax	{ NULL, NULL }, &all_mtx, &all_mtx,
238103026Ssobomax#ifdef WITNESS
239103026Ssobomax	&all_mtx_debug
240103026Ssobomax#else
241103026Ssobomax	NULL
242103026Ssobomax#endif
243147256Sbrooks	 };
244103026Ssobomax
245103026Ssobomaxstatic int	mtx_cur_cnt;
246103026Ssobomaxstatic int	mtx_max_cnt;
247103026Ssobomax
248103026Ssobomaxstatic void	propagate_priority(struct proc *);
249103026Ssobomaxstatic void	mtx_enter_hard(struct mtx *, int type, int saveintr);
250103026Ssobomaxstatic void	mtx_exit_hard(struct mtx *, int type);
251103026Ssobomax
252103026Ssobomax#define	mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
253103026Ssobomax#define	mtx_owner(m)	(mtx_unowned(m) ? NULL \
254103026Ssobomax			    : (struct proc *)((m)->mtx_lock & MTX_FLAGMASK))
255103026Ssobomax
256103026Ssobomax#define RETIP(x)		*(((uintptr_t *)(&x)) - 1)
257103026Ssobomax#define	SET_PRIO(p, pri)	(p)->p_priority = (pri)
258103026Ssobomax
259103026Ssobomaxstatic void
260103026Ssobomaxpropagate_priority(struct proc *p)
261123922Ssam{
262103026Ssobomax	int pri = p->p_priority;
263103026Ssobomax	struct mtx *m = p->p_blocked;
264103026Ssobomax
265103026Ssobomax	mtx_assert(&sched_lock, MA_OWNED);
266103026Ssobomax	for (;;) {
267103026Ssobomax		struct proc *p1;
268103026Ssobomax
269103026Ssobomax		p = mtx_owner(m);
270103026Ssobomax
271103026Ssobomax		if (p == NULL) {
272103026Ssobomax			/*
273103026Ssobomax			 * This really isn't quite right. Really
274103026Ssobomax			 * ought to bump priority of process that
275103026Ssobomax			 * next acquires the mutex.
276103026Ssobomax			 */
277103026Ssobomax			MPASS(m->mtx_lock == MTX_CONTESTED);
278103026Ssobomax			return;
279103026Ssobomax		}
280103026Ssobomax		MPASS(p->p_magic == P_MAGIC);
281103026Ssobomax		KASSERT(p->p_stat != SSLEEP, ("sleeping process owns a mutex"));
282103026Ssobomax		if (p->p_priority <= pri)
283103026Ssobomax			return;
284103026Ssobomax
285103026Ssobomax		/*
286103026Ssobomax		 * Bump this process' priority.
287103026Ssobomax		 */
288103026Ssobomax		SET_PRIO(p, pri);
289103026Ssobomax
290103026Ssobomax		/*
291103026Ssobomax		 * If lock holder is actually running, just bump priority.
292103026Ssobomax		 */
293103026Ssobomax#ifdef SMP
294103026Ssobomax		/*
295103026Ssobomax		 * For SMP, we can check the p_oncpu field to see if we are
296103026Ssobomax		 * running.
297103026Ssobomax		 */
298103026Ssobomax		if (p->p_oncpu != 0xff) {
299103026Ssobomax			MPASS(p->p_stat == SRUN || p->p_stat == SZOMB);
300103026Ssobomax			return;
301103026Ssobomax		}
302123992Ssobomax#else
303103026Ssobomax		/*
304103026Ssobomax		 * For UP, we check to see if p is curproc (this shouldn't
305103026Ssobomax		 * ever happen however as it would mean we are in a deadlock.)
306111119Simp		 */
307103026Ssobomax		if (p == curproc) {
308103026Ssobomax			panic("Deadlock detected");
309103026Ssobomax			return;
310103026Ssobomax		}
311103026Ssobomax#endif
312103026Ssobomax		/*
313103026Ssobomax		 * If on run queue move to new run queue, and
314103026Ssobomax		 * quit.
315103026Ssobomax		 */
316103026Ssobomax		if (p->p_stat == SRUN) {
317103026Ssobomax			printf("XXX: moving process %d(%s) to a new run queue\n",
318103026Ssobomax			       p->p_pid, p->p_comm);
319103026Ssobomax			MPASS(p->p_blocked == NULL);
320103026Ssobomax			remrunqueue(p);
321103026Ssobomax			setrunqueue(p);
322103026Ssobomax			return;
323103026Ssobomax		}
324103026Ssobomax
325103026Ssobomax		/*
326103026Ssobomax		 * If we aren't blocked on a mutex, we should be.
327103026Ssobomax		 */
328103026Ssobomax		KASSERT(p->p_stat == SMTX, (
329103026Ssobomax		    "process %d(%s):%d holds %s but isn't blocked on a mutex\n",
330103026Ssobomax		    p->p_pid, p->p_comm, p->p_stat,
331103026Ssobomax		    m->mtx_description));
332103026Ssobomax
333103026Ssobomax		/*
334103026Ssobomax		 * Pick up the mutex that p is blocked on.
335103026Ssobomax		 */
336103026Ssobomax		m = p->p_blocked;
337103026Ssobomax		MPASS(m != NULL);
338103026Ssobomax
339103026Ssobomax		printf("XXX: process %d(%s) is blocked on %s\n", p->p_pid,
340103026Ssobomax		    p->p_comm, m->mtx_description);
341103026Ssobomax		/*
342103026Ssobomax		 * Check if the proc needs to be moved up on
343103026Ssobomax		 * the blocked chain
344103026Ssobomax		 */
345103026Ssobomax		if (p == TAILQ_FIRST(&m->mtx_blocked)) {
346103026Ssobomax			printf("XXX: process at head of run queue\n");
347103026Ssobomax			continue;
348103026Ssobomax		}
349103026Ssobomax		p1 = TAILQ_PREV(p, rq, p_procq);
350103026Ssobomax		if (p1->p_priority <= pri) {
351103026Ssobomax			printf(
352103026Ssobomax	"XXX: previous process %d(%s) has higher priority\n",
353103026Ssobomax	                    p->p_pid, p->p_comm);
354103026Ssobomax			continue;
355111119Simp		}
356103026Ssobomax
357103026Ssobomax		/*
358103026Ssobomax		 * Remove proc from blocked chain and determine where
359103026Ssobomax		 * it should be moved up to.  Since we know that p1 has
360103026Ssobomax		 * a lower priority than p, we know that at least one
361103026Ssobomax		 * process in the chain has a lower priority and that
362103026Ssobomax		 * p1 will thus not be NULL after the loop.
363128580Sandre		 */
364103026Ssobomax		TAILQ_REMOVE(&m->mtx_blocked, p, p_procq);
365103026Ssobomax		TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) {
366103026Ssobomax			MPASS(p1->p_magic == P_MAGIC);
367103026Ssobomax			if (p1->p_priority > pri)
368103026Ssobomax				break;
369103026Ssobomax		}
370103026Ssobomax		MPASS(p1 != NULL);
371103026Ssobomax		TAILQ_INSERT_BEFORE(p1, p, p_procq);
372125226Ssobomax		CTR4(KTR_LOCK,
373103026Ssobomax		    "propagate_priority: p %p moved before %p on [%p] %s",
374103026Ssobomax		    p, p1, m, m->mtx_description);
375103026Ssobomax	}
376103026Ssobomax}
377103026Ssobomax
378103026Ssobomax/*
379103026Ssobomax * Get lock 'm', the macro handles the easy (and most common cases) and leaves
380133163Ssobomax * the slow stuff to the mtx_enter_hard() function.
381103026Ssobomax *
382103032Ssobomax * Note: since type is usually a constant much of this code is optimized out.
383103026Ssobomax */
384103026Ssobomaxvoid
385125226Ssobomax_mtx_enter(struct mtx *mtxp, int type, const char *file, int line)
386103026Ssobomax{
387103026Ssobomax	struct mtx	*mpp = mtxp;
388103026Ssobomax
389103026Ssobomax	/* bits only valid on mtx_exit() */
390128583Sandre	MPASS4(((type) & (MTX_NORECURSE | MTX_NOSWITCH)) == 0,
391128583Sandre	    STR_mtx_bad_type, file, line);
392128583Sandre
393128583Sandre	if ((type) & MTX_SPIN) {
394128583Sandre		/*
395128580Sandre		 * Easy cases of spin locks:
396123992Ssobomax		 *
397103026Ssobomax		 * 1) We already own the lock and will simply recurse on it (if
398103026Ssobomax		 *    RLIKELY)
399103026Ssobomax		 *
400103026Ssobomax		 * 2) The lock is free, we just get it
401103026Ssobomax		 */
402103026Ssobomax		if ((type) & MTX_RLIKELY) {
403103026Ssobomax			/*
404103032Ssobomax			 * Check for recursion, if we already have this
405103026Ssobomax			 * lock we just bump the recursion count.
406103026Ssobomax			 */
407103026Ssobomax			if (mpp->mtx_lock == (uintptr_t)CURTHD) {
408103026Ssobomax				mpp->mtx_recurse++;
409103026Ssobomax				goto done;
410103026Ssobomax			}
411103026Ssobomax		}
412103026Ssobomax
413103026Ssobomax		if (((type) & MTX_TOPHALF) == 0) {
414103026Ssobomax			/*
415103026Ssobomax			 * If an interrupt thread uses this we must block
416103026Ssobomax			 * interrupts here.
417103026Ssobomax			 */
418103026Ssobomax			if ((type) & MTX_FIRST) {
419103026Ssobomax				ASS_IEN;
420103026Ssobomax				disable_intr();
421103026Ssobomax				_getlock_norecurse(mpp, CURTHD,
422103026Ssobomax				    (type) & MTX_HARDOPTS);
423103026Ssobomax			} else {
424125020Ssobomax				_getlock_spin_block(mpp, CURTHD,
425103026Ssobomax				    (type) & MTX_HARDOPTS);
426103026Ssobomax			}
427103026Ssobomax		} else
428103026Ssobomax			_getlock_norecurse(mpp, CURTHD, (type) & MTX_HARDOPTS);
429103026Ssobomax	} else {
430103026Ssobomax		/* Sleep locks */
431103026Ssobomax		if ((type) & MTX_RLIKELY)
432103026Ssobomax			_getlock_sleep(mpp, CURTHD, (type) & MTX_HARDOPTS);
433125024Ssobomax		else
434125024Ssobomax			_getlock_norecurse(mpp, CURTHD, (type) & MTX_HARDOPTS);
435125024Ssobomax	}
436125024Ssobomaxdone:
437103026Ssobomax	WITNESS_ENTER(mpp, type, file, line);
438103026Ssobomax	if (((type) & MTX_QUIET) == 0)
439103026Ssobomax		CTR5(KTR_LOCK, STR_mtx_enter_fmt,
440103026Ssobomax		    mpp->mtx_description, mpp, mpp->mtx_recurse, file, line);
441103026Ssobomax
442103026Ssobomax}
443103026Ssobomax
444103026Ssobomax/*
445103026Ssobomax * Attempt to get MTX_DEF lock, return non-zero if lock acquired.
446103026Ssobomax *
447103026Ssobomax * XXX DOES NOT HANDLE RECURSION
448147256Sbrooks */
449103026Ssobomaxint
450103026Ssobomax_mtx_try_enter(struct mtx *mtxp, int type, const char *file, int line)
451103026Ssobomax{
452103026Ssobomax	struct mtx	*const mpp = mtxp;
453103026Ssobomax	int	rval;
454103026Ssobomax
455103026Ssobomax	rval = _obtain_lock(mpp, CURTHD);
456103026Ssobomax#ifdef WITNESS
457103026Ssobomax	if (rval && mpp->mtx_witness != NULL) {
458103026Ssobomax		MPASS(mpp->mtx_recurse == 0);
459103026Ssobomax		witness_try_enter(mpp, type, file, line);
460103026Ssobomax	}
461103026Ssobomax#endif	/* WITNESS */
462103026Ssobomax	if (((type) & MTX_QUIET) == 0)
463103026Ssobomax		CTR5(KTR_LOCK, STR_mtx_try_enter_fmt,
464103026Ssobomax		    mpp->mtx_description, mpp, rval, file, line);
465103026Ssobomax
466103026Ssobomax	return rval;
467103026Ssobomax}
468103026Ssobomax
469103026Ssobomax/*
470103026Ssobomax * Release lock m.
471103026Ssobomax */
472103026Ssobomaxvoid
473103026Ssobomax_mtx_exit(struct mtx *mtxp, int type, const char *file, int line)
474103026Ssobomax{
475103026Ssobomax	struct mtx	*const mpp = mtxp;
476103026Ssobomax
477103026Ssobomax	MPASS4(mtx_owned(mpp), STR_mtx_owned, file, line);
478103026Ssobomax	WITNESS_EXIT(mpp, type, file, line);
479103026Ssobomax	if (((type) & MTX_QUIET) == 0)
480103026Ssobomax		CTR5(KTR_LOCK, STR_mtx_exit_fmt,
481103026Ssobomax		    mpp->mtx_description, mpp, mpp->mtx_recurse, file, line);
482103026Ssobomax	if ((type) & MTX_SPIN) {
483103026Ssobomax		if ((type) & MTX_NORECURSE) {
484103026Ssobomax			int mtx_intr = mpp->mtx_saveintr;
485103026Ssobomax
486103026Ssobomax			MPASS4(mpp->mtx_recurse == 0, STR_mtx_recurse,
487103026Ssobomax			    file, line);
488103026Ssobomax			_release_lock_quick(mpp);
489103026Ssobomax			if (((type) & MTX_TOPHALF) == 0) {
490103026Ssobomax				if ((type) & MTX_FIRST) {
491103026Ssobomax					ASS_IDIS;
492103026Ssobomax					enable_intr();
493103026Ssobomax				} else
494103026Ssobomax					restore_intr(mtx_intr);
495103026Ssobomax			}
496103026Ssobomax		} else {
497103026Ssobomax			if (((type & MTX_TOPHALF) == 0) &&
498103026Ssobomax			    (type & MTX_FIRST)) {
499103026Ssobomax				ASS_IDIS;
500103026Ssobomax				ASS_SIEN(mpp);
501103026Ssobomax			}
502103026Ssobomax			_exitlock_spin(mpp);
503103026Ssobomax		}
504103026Ssobomax	} else {
505103026Ssobomax		/* Handle sleep locks */
506103026Ssobomax		if ((type) & MTX_RLIKELY)
507103026Ssobomax			_exitlock(mpp, CURTHD, (type) & MTX_HARDOPTS);
508103026Ssobomax		else {
509103026Ssobomax			_exitlock_norecurse(mpp, CURTHD,
510103026Ssobomax			    (type) & MTX_HARDOPTS);
511103026Ssobomax		}
512103026Ssobomax	}
513103026Ssobomax}
514103026Ssobomax
515103026Ssobomaxvoid
516103026Ssobomaxmtx_enter_hard(struct mtx *m, int type, int saveintr)
517103026Ssobomax{
518103026Ssobomax	struct proc *p = CURPROC;
519125020Ssobomax
520103026Ssobomax	KASSERT(p != NULL, ("curproc is NULL in mutex"));
521103026Ssobomax
522103026Ssobomax	switch (type) {
523103026Ssobomax	case MTX_DEF:
524103026Ssobomax		if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)p) {
525103026Ssobomax			m->mtx_recurse++;
526103026Ssobomax			atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
527103026Ssobomax			if ((type & MTX_QUIET) == 0)
528147256Sbrooks				CTR1(KTR_LOCK, "mtx_enter: %p recurse", m);
529103026Ssobomax			return;
530103026Ssobomax		}
531103026Ssobomax		if ((type & MTX_QUIET) == 0)
532103026Ssobomax			CTR3(KTR_LOCK,
533103026Ssobomax			    "mtx_enter: %p contested (lock=%p) [%p]",
534103026Ssobomax			    m, (void *)m->mtx_lock, (void *)RETIP(m));
535103026Ssobomax
536103026Ssobomax		/*
537103026Ssobomax		 * Save our priority.  Even though p_nativepri is protected
538103026Ssobomax		 * by sched_lock, we don't obtain it here as it can be
539103026Ssobomax		 * expensive.  Since this is the only place p_nativepri is
540103026Ssobomax		 * set, and since two CPUs will not be executing the same
541103026Ssobomax		 * process concurrently, we know that no other CPU is going
542103026Ssobomax		 * to be messing with this.  Also, p_nativepri is only read
543103026Ssobomax		 * when we are blocked on a mutex, so that can't be happening
544103026Ssobomax		 * right now either.
545103026Ssobomax		 */
546103026Ssobomax		p->p_nativepri = p->p_priority;
547103026Ssobomax		while (!_obtain_lock(m, p)) {
548103026Ssobomax			uintptr_t v;
549103026Ssobomax			struct proc *p1;
550103026Ssobomax
551103026Ssobomax			mtx_enter(&sched_lock, MTX_SPIN | MTX_RLIKELY);
552103026Ssobomax			/*
553103026Ssobomax			 * check if the lock has been released while
554103026Ssobomax			 * waiting for the schedlock.
555103026Ssobomax			 */
556103026Ssobomax			if ((v = m->mtx_lock) == MTX_UNOWNED) {
557103026Ssobomax				mtx_exit(&sched_lock, MTX_SPIN);
558103026Ssobomax				continue;
559103026Ssobomax			}
560103026Ssobomax			/*
561103026Ssobomax			 * The mutex was marked contested on release. This
562103026Ssobomax			 * means that there are processes blocked on it.
563103026Ssobomax			 */
564103026Ssobomax			if (v == MTX_CONTESTED) {
565103026Ssobomax				p1 = TAILQ_FIRST(&m->mtx_blocked);
566103026Ssobomax				KASSERT(p1 != NULL, ("contested mutex has no contesters"));
567103026Ssobomax				KASSERT(p != NULL, ("curproc is NULL for contested mutex"));
568103026Ssobomax				m->mtx_lock = (uintptr_t)p | MTX_CONTESTED;
569103026Ssobomax				if (p1->p_priority < p->p_priority) {
570103026Ssobomax					SET_PRIO(p, p1->p_priority);
571103026Ssobomax				}
572103026Ssobomax				mtx_exit(&sched_lock, MTX_SPIN);
573103026Ssobomax				return;
574103026Ssobomax			}
575103026Ssobomax			/*
576103026Ssobomax			 * If the mutex isn't already contested and
577103026Ssobomax			 * a failure occurs setting the contested bit the
578103026Ssobomax			 * mutex was either release or the
579103026Ssobomax			 * state of the RECURSION bit changed.
580103026Ssobomax			 */
581103026Ssobomax			if ((v & MTX_CONTESTED) == 0 &&
582103026Ssobomax			    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
583103026Ssobomax				               (void *)(v | MTX_CONTESTED))) {
584103026Ssobomax				mtx_exit(&sched_lock, MTX_SPIN);
585103026Ssobomax				continue;
586103026Ssobomax			}
587103026Ssobomax
588103026Ssobomax			/* We definitely have to sleep for this lock */
589103026Ssobomax			mtx_assert(m, MA_NOTOWNED);
590103026Ssobomax
591103026Ssobomax#ifdef notyet
592103026Ssobomax			/*
593103026Ssobomax			 * If we're borrowing an interrupted thread's VM
594103026Ssobomax			 * context must clean up before going to sleep.
595103026Ssobomax			 */
596103026Ssobomax			if (p->p_flag & (P_ITHD | P_SITHD)) {
597103026Ssobomax				ithd_t *it = (ithd_t *)p;
598103026Ssobomax
599103026Ssobomax				if (it->it_interrupted) {
600103026Ssobomax					if ((type & MTX_QUIET) == 0)
601103026Ssobomax						CTR2(KTR_LOCK,
602103026Ssobomax					    "mtx_enter: 0x%x interrupted 0x%x",
603103026Ssobomax						    it, it->it_interrupted);
604103026Ssobomax					intr_thd_fixup(it);
605103026Ssobomax				}
606103026Ssobomax			}
607103026Ssobomax#endif
608122699Sbms
609122699Sbms			/* Put us on the list of procs blocked on this mutex */
610122699Sbms			if (TAILQ_EMPTY(&m->mtx_blocked)) {
611103026Ssobomax				p1 = (struct proc *)(m->mtx_lock &
612103026Ssobomax						     MTX_FLAGMASK);
613103026Ssobomax				LIST_INSERT_HEAD(&p1->p_contested, m,
614103026Ssobomax						 mtx_contested);
615103026Ssobomax				TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
616103026Ssobomax			} else {
617103026Ssobomax				TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq)
618103026Ssobomax					if (p1->p_priority > p->p_priority)
619103026Ssobomax						break;
620103026Ssobomax				if (p1)
621103026Ssobomax					TAILQ_INSERT_BEFORE(p1, p, p_procq);
622122699Sbms				else
623122699Sbms					TAILQ_INSERT_TAIL(&m->mtx_blocked, p,
624122699Sbms							  p_procq);
625103026Ssobomax			}
626103026Ssobomax
627103026Ssobomax			p->p_blocked = m;	/* Who we're blocked on */
628103026Ssobomax			p->p_mtxname = m->mtx_description;
629103026Ssobomax			p->p_stat = SMTX;
630103026Ssobomax#if 0
631103026Ssobomax			propagate_priority(p);
632103026Ssobomax#endif
633103026Ssobomax			if ((type & MTX_QUIET) == 0)
634103026Ssobomax				CTR3(KTR_LOCK,
635103026Ssobomax				    "mtx_enter: p %p blocked on [%p] %s",
636103026Ssobomax				    p, m, m->mtx_description);
637103026Ssobomax			mi_switch();
638103026Ssobomax			if ((type & MTX_QUIET) == 0)
639103026Ssobomax				CTR3(KTR_LOCK,
640103026Ssobomax			    "mtx_enter: p %p free from blocked on [%p] %s",
641103026Ssobomax				    p, m, m->mtx_description);
642103026Ssobomax			mtx_exit(&sched_lock, MTX_SPIN);
643103026Ssobomax		}
644103026Ssobomax		return;
645103026Ssobomax	case MTX_SPIN:
646103026Ssobomax	case MTX_SPIN | MTX_FIRST:
647103026Ssobomax	case MTX_SPIN | MTX_TOPHALF:
648103026Ssobomax	    {
649123992Ssobomax		int i = 0;
650103026Ssobomax
651103026Ssobomax		if (m->mtx_lock == (uintptr_t)p) {
652103026Ssobomax			m->mtx_recurse++;
653103026Ssobomax			return;
654103026Ssobomax		}
655103032Ssobomax		if ((type & MTX_QUIET) == 0)
656103026Ssobomax			CTR1(KTR_LOCK, "mtx_enter: %p spinning", m);
657103026Ssobomax		for (;;) {
658103026Ssobomax			if (_obtain_lock(m, p))
659103026Ssobomax				break;
660103026Ssobomax			while (m->mtx_lock != MTX_UNOWNED) {
661103026Ssobomax				if (i++ < 1000000)
662103026Ssobomax					continue;
663103026Ssobomax				if (i++ < 6000000)
664103026Ssobomax					DELAY (1);
665103026Ssobomax#ifdef DDB
666103026Ssobomax				else if (!db_active)
667103026Ssobomax#else
668103026Ssobomax				else
669103026Ssobomax#endif
670103026Ssobomax					panic(
671103026Ssobomax				"spin lock %s held by %p for > 5 seconds",
672103026Ssobomax					    m->mtx_description,
673103026Ssobomax					    (void *)m->mtx_lock);
674147256Sbrooks			}
675103026Ssobomax		}
676103026Ssobomax
677103026Ssobomax#ifdef MUTEX_DEBUG
678103026Ssobomax		if (type != MTX_SPIN)
679103026Ssobomax			m->mtx_saveintr = 0xbeefface;
680103026Ssobomax		else
681103026Ssobomax#endif
682103026Ssobomax			m->mtx_saveintr = saveintr;
683103026Ssobomax		if ((type & MTX_QUIET) == 0)
684103026Ssobomax			CTR1(KTR_LOCK, "mtx_enter: %p spin done", m);
685147256Sbrooks		return;
686103026Ssobomax	    }
687103026Ssobomax	}
688103026Ssobomax}
689103026Ssobomax
690103026Ssobomaxvoid
691103026Ssobomaxmtx_exit_hard(struct mtx *m, int type)
692103026Ssobomax{
693103026Ssobomax	struct proc *p, *p1;
694103026Ssobomax	struct mtx *m1;
695103026Ssobomax	int pri;
696103026Ssobomax
697103026Ssobomax	p = CURPROC;
698103026Ssobomax	switch (type) {
699103026Ssobomax	case MTX_DEF:
700103026Ssobomax	case MTX_DEF | MTX_NOSWITCH:
701103026Ssobomax		if (mtx_recursed(m)) {
702103026Ssobomax			if (--(m->mtx_recurse) == 0)
703103026Ssobomax				atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
704103026Ssobomax			if ((type & MTX_QUIET) == 0)
705103026Ssobomax				CTR1(KTR_LOCK, "mtx_exit: %p unrecurse", m);
706103026Ssobomax			return;
707103026Ssobomax		}
708103026Ssobomax		mtx_enter(&sched_lock, MTX_SPIN);
709147256Sbrooks		if ((type & MTX_QUIET) == 0)
710103026Ssobomax			CTR1(KTR_LOCK, "mtx_exit: %p contested", m);
711103026Ssobomax		p1 = TAILQ_FIRST(&m->mtx_blocked);
712103026Ssobomax		MPASS(p->p_magic == P_MAGIC);
713103026Ssobomax		MPASS(p1->p_magic == P_MAGIC);
714103026Ssobomax		TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq);
715103026Ssobomax		if (TAILQ_EMPTY(&m->mtx_blocked)) {
716103026Ssobomax			LIST_REMOVE(m, mtx_contested);
717103026Ssobomax			_release_lock_quick(m);
718103026Ssobomax			if ((type & MTX_QUIET) == 0)
719103026Ssobomax				CTR1(KTR_LOCK, "mtx_exit: %p not held", m);
720103026Ssobomax		} else
721103026Ssobomax			atomic_store_rel_ptr(&m->mtx_lock,
722103026Ssobomax			    (void *)MTX_CONTESTED);
723103026Ssobomax		pri = MAXPRI;
724103026Ssobomax		LIST_FOREACH(m1, &p->p_contested, mtx_contested) {
725123992Ssobomax			int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_priority;
726123992Ssobomax			if (cp < pri)
727103026Ssobomax				pri = cp;
728123992Ssobomax		}
729103026Ssobomax		if (pri > p->p_nativepri)
730103026Ssobomax			pri = p->p_nativepri;
731103026Ssobomax		SET_PRIO(p, pri);
732103026Ssobomax		if ((type & MTX_QUIET) == 0)
733103026Ssobomax			CTR2(KTR_LOCK,
734103026Ssobomax			    "mtx_exit: %p contested setrunqueue %p", m, p1);
735103026Ssobomax		p1->p_blocked = NULL;
736103026Ssobomax		p1->p_mtxname = NULL;
737103026Ssobomax		p1->p_stat = SRUN;
738103026Ssobomax		setrunqueue(p1);
739103026Ssobomax		if ((type & MTX_NOSWITCH) == 0 && p1->p_priority < pri) {
740103026Ssobomax#ifdef notyet
741103026Ssobomax			if (p->p_flag & (P_ITHD | P_SITHD)) {
742103026Ssobomax				ithd_t *it = (ithd_t *)p;
743103026Ssobomax
744103026Ssobomax				if (it->it_interrupted) {
745103026Ssobomax					if ((type & MTX_QUIET) == 0)
746103026Ssobomax						CTR2(KTR_LOCK,
747103026Ssobomax					    "mtx_exit: 0x%x interruped 0x%x",
748103026Ssobomax						    it, it->it_interrupted);
749103026Ssobomax					intr_thd_fixup(it);
750103026Ssobomax				}
751103026Ssobomax			}
752103026Ssobomax#endif
753127307Srwatson			setrunqueue(p);
754103026Ssobomax			if ((type & MTX_QUIET) == 0)
755103026Ssobomax				CTR2(KTR_LOCK,
756103026Ssobomax				    "mtx_exit: %p switching out lock=%p",
757103026Ssobomax				    m, (void *)m->mtx_lock);
758103026Ssobomax			mi_switch();
759103026Ssobomax			if ((type & MTX_QUIET) == 0)
760103026Ssobomax				CTR2(KTR_LOCK,
761103026Ssobomax				    "mtx_exit: %p resuming lock=%p",
762127307Srwatson				    m, (void *)m->mtx_lock);
763127307Srwatson		}
764127307Srwatson		mtx_exit(&sched_lock, MTX_SPIN);
765127307Srwatson		break;
766127307Srwatson	case MTX_SPIN:
767127307Srwatson	case MTX_SPIN | MTX_FIRST:
768127307Srwatson		if (mtx_recursed(m)) {
769127307Srwatson			m->mtx_recurse--;
770127307Srwatson			return;
771103026Ssobomax		}
772132199Sphk		MPASS(mtx_owned(m));
773132199Sphk		_release_lock_quick(m);
774103026Ssobomax		if (type & MTX_FIRST)
775103026Ssobomax			enable_intr();	/* XXX is this kosher? */
776103026Ssobomax		else {
777103026Ssobomax			MPASS(m->mtx_saveintr != 0xbeefface);
778103026Ssobomax			restore_intr(m->mtx_saveintr);
779103026Ssobomax		}
780103026Ssobomax		break;
781103026Ssobomax	case MTX_SPIN | MTX_TOPHALF:
782103026Ssobomax		if (mtx_recursed(m)) {
783103026Ssobomax			m->mtx_recurse--;
784103026Ssobomax			return;
785103026Ssobomax		}
786		MPASS(mtx_owned(m));
787		_release_lock_quick(m);
788		break;
789	default:
790		panic("mtx_exit_hard: unsupported type 0x%x\n", type);
791	}
792}
793
794#ifdef INVARIANTS
795void
796_mtx_assert(struct mtx *m, int what, const char *file, int line)
797{
798	switch ((what)) {
799	case MA_OWNED:
800	case MA_OWNED | MA_RECURSED:
801	case MA_OWNED | MA_NOTRECURSED:
802		if (!mtx_owned((m)))
803			panic("mutex %s not owned at %s:%d",
804			    (m)->mtx_description, file, line);
805		if (mtx_recursed((m))) {
806			if (((what) & MA_NOTRECURSED) != 0)
807				panic("mutex %s recursed at %s:%d",
808				    (m)->mtx_description, file, line);
809		} else if (((what) & MA_RECURSED) != 0) {
810			panic("mutex %s unrecursed at %s:%d",
811			    (m)->mtx_description, file, line);
812		}
813		break;
814	case MA_NOTOWNED:
815		if (mtx_owned((m)))
816			panic("mutex %s owned at %s:%d",
817			    (m)->mtx_description, file, line);
818		break;
819	default:
820		panic("unknown mtx_assert at %s:%d", file, line);
821	}
822}
823#endif
824
825#define MV_DESTROY	0	/* validate before destory */
826#define MV_INIT		1	/* validate before init */
827
828#ifdef MUTEX_DEBUG
829
830int mtx_validate __P((struct mtx *, int));
831
832int
833mtx_validate(struct mtx *m, int when)
834{
835	struct mtx *mp;
836	int i;
837	int retval = 0;
838
839#ifdef WITNESS
840	if (witness_cold)
841		return 0;
842#endif
843	if (m == &all_mtx || cold)
844		return 0;
845
846	mtx_enter(&all_mtx, MTX_DEF);
847/*
848 * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
849 * we can re-enable the kernacc() checks.
850 */
851#ifndef __alpha__
852	MPASS(kernacc((caddr_t)all_mtx.mtx_next, sizeof(uintptr_t),
853	    VM_PROT_READ) == 1);
854#endif
855	MPASS(all_mtx.mtx_next->mtx_prev == &all_mtx);
856	for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
857#ifndef __alpha__
858		if (kernacc((caddr_t)mp->mtx_next, sizeof(uintptr_t),
859		    VM_PROT_READ) != 1) {
860			panic("mtx_validate: mp=%p mp->mtx_next=%p",
861			    mp, mp->mtx_next);
862		}
863#endif
864		i++;
865		if (i > mtx_cur_cnt) {
866			panic("mtx_validate: too many in chain, known=%d\n",
867			    mtx_cur_cnt);
868		}
869	}
870	MPASS(i == mtx_cur_cnt);
871	switch (when) {
872	case MV_DESTROY:
873		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
874			if (mp == m)
875				break;
876		MPASS(mp == m);
877		break;
878	case MV_INIT:
879		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
880		if (mp == m) {
881			/*
882			 * Not good. This mutex already exists.
883			 */
884			printf("re-initing existing mutex %s\n",
885			    m->mtx_description);
886			MPASS(m->mtx_lock == MTX_UNOWNED);
887			retval = 1;
888		}
889	}
890	mtx_exit(&all_mtx, MTX_DEF);
891	return (retval);
892}
893#endif
894
895void
896mtx_init(struct mtx *m, const char *t, int flag)
897{
898	if ((flag & MTX_QUIET) == 0)
899		CTR2(KTR_LOCK, "mtx_init %p (%s)", m, t);
900#ifdef MUTEX_DEBUG
901	if (mtx_validate(m, MV_INIT))	/* diagnostic and error correction */
902		return;
903#endif
904
905	bzero((void *)m, sizeof *m);
906	TAILQ_INIT(&m->mtx_blocked);
907#ifdef WITNESS
908	if (!witness_cold) {
909		/* XXX - should not use DEVBUF */
910		m->mtx_debug = malloc(sizeof(struct mtx_debug),
911		    M_DEVBUF, M_NOWAIT | M_ZERO);
912		MPASS(m->mtx_debug != NULL);
913	}
914#endif
915	m->mtx_description = t;
916
917	m->mtx_flags = flag;
918	m->mtx_lock = MTX_UNOWNED;
919	/* Put on all mutex queue */
920	mtx_enter(&all_mtx, MTX_DEF);
921	m->mtx_next = &all_mtx;
922	m->mtx_prev = all_mtx.mtx_prev;
923	m->mtx_prev->mtx_next = m;
924	all_mtx.mtx_prev = m;
925	if (++mtx_cur_cnt > mtx_max_cnt)
926		mtx_max_cnt = mtx_cur_cnt;
927	mtx_exit(&all_mtx, MTX_DEF);
928#ifdef WITNESS
929	if (!witness_cold)
930		witness_init(m, flag);
931#endif
932}
933
934void
935mtx_destroy(struct mtx *m)
936{
937
938#ifdef WITNESS
939	KASSERT(!witness_cold, ("%s: Cannot destroy while still cold\n",
940	    __FUNCTION__));
941#endif
942	CTR2(KTR_LOCK, "mtx_destroy %p (%s)", m, m->mtx_description);
943#ifdef MUTEX_DEBUG
944	if (m->mtx_next == NULL)
945		panic("mtx_destroy: %p (%s) already destroyed",
946		    m, m->mtx_description);
947
948	if (!mtx_owned(m)) {
949		MPASS(m->mtx_lock == MTX_UNOWNED);
950	} else {
951		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
952	}
953	mtx_validate(m, MV_DESTROY);		/* diagnostic */
954#endif
955
956#ifdef WITNESS
957	if (m->mtx_witness)
958		witness_destroy(m);
959#endif /* WITNESS */
960
961	/* Remove from the all mutex queue */
962	mtx_enter(&all_mtx, MTX_DEF);
963	m->mtx_next->mtx_prev = m->mtx_prev;
964	m->mtx_prev->mtx_next = m->mtx_next;
965#ifdef MUTEX_DEBUG
966	m->mtx_next = m->mtx_prev = NULL;
967#endif
968#ifdef WITNESS
969	free(m->mtx_debug, M_DEVBUF);
970	m->mtx_debug = NULL;
971#endif
972	mtx_cur_cnt--;
973	mtx_exit(&all_mtx, MTX_DEF);
974}
975
976/*
977 * The non-inlined versions of the mtx_*() functions are always built (above),
978 * but the witness code depends on the WITNESS kernel option being specified.
979 */
980
981#ifdef WITNESS
982static void
983witness_fixup(void *dummy __unused)
984{
985	struct mtx *mp;
986
987	/*
988	 * We have to release Giant before initializing its witness
989	 * structure so that WITNESS doesn't get confused.
990	 */
991	mtx_exit(&Giant, MTX_DEF);
992	mtx_assert(&Giant, MA_NOTOWNED);
993	mtx_enter(&all_mtx, MTX_DEF);
994
995	/* Iterate through all mutexes and finish up mutex initialization. */
996	for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
997
998		/* XXX - should not use DEVBUF */
999		mp->mtx_debug = malloc(sizeof(struct mtx_debug),
1000		    M_DEVBUF, M_NOWAIT | M_ZERO);
1001		MPASS(mp->mtx_debug != NULL);
1002
1003		witness_init(mp, mp->mtx_flags);
1004	}
1005	mtx_exit(&all_mtx, MTX_DEF);
1006
1007	/* Mark the witness code as being ready for use. */
1008	atomic_store_rel_int(&witness_cold, 0);
1009
1010	mtx_enter(&Giant, MTX_DEF);
1011}
1012SYSINIT(wtnsfxup, SI_SUB_MUTEX, SI_ORDER_FIRST, witness_fixup, NULL)
1013
1014#define WITNESS_COUNT 200
1015#define	WITNESS_NCHILDREN 2
1016
1017int witness_watch = 1;
1018
1019struct witness {
1020	struct witness	*w_next;
1021	const char	*w_description;
1022	const char	*w_file;
1023	int		 w_line;
1024	struct witness	*w_morechildren;
1025	u_char		 w_childcnt;
1026	u_char		 w_Giant_squawked:1;
1027	u_char		 w_other_squawked:1;
1028	u_char		 w_same_squawked:1;
1029	u_char		 w_spin:1;	/* MTX_SPIN type mutex. */
1030	u_int		 w_level;
1031	struct witness	*w_children[WITNESS_NCHILDREN];
1032};
1033
1034struct witness_blessed {
1035	char 	*b_lock1;
1036	char	*b_lock2;
1037};
1038
1039#ifdef DDB
1040/*
1041 * When DDB is enabled and witness_ddb is set to 1, it will cause the system to
1042 * drop into kdebug() when:
1043 *	- a lock heirarchy violation occurs
1044 *	- locks are held when going to sleep.
1045 */
1046int	witness_ddb;
1047#ifdef WITNESS_DDB
1048TUNABLE_INT_DECL("debug.witness_ddb", 1, witness_ddb);
1049#else
1050TUNABLE_INT_DECL("debug.witness_ddb", 0, witness_ddb);
1051#endif
1052SYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, "");
1053#endif /* DDB */
1054
1055int	witness_skipspin;
1056#ifdef WITNESS_SKIPSPIN
1057TUNABLE_INT_DECL("debug.witness_skipspin", 1, witness_skipspin);
1058#else
1059TUNABLE_INT_DECL("debug.witness_skipspin", 0, witness_skipspin);
1060#endif
1061SYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RD, &witness_skipspin, 0,
1062    "");
1063
1064static struct mtx	w_mtx;
1065static struct witness	*w_free;
1066static struct witness	*w_all;
1067static int		 w_inited;
1068static int		 witness_dead;	/* fatal error, probably no memory */
1069
1070static struct witness	 w_data[WITNESS_COUNT];
1071
1072static struct witness	 *enroll __P((const char *description, int flag));
1073static int itismychild __P((struct witness *parent, struct witness *child));
1074static void removechild __P((struct witness *parent, struct witness *child));
1075static int isitmychild __P((struct witness *parent, struct witness *child));
1076static int isitmydescendant __P((struct witness *parent, struct witness *child));
1077static int dup_ok __P((struct witness *));
1078static int blessed __P((struct witness *, struct witness *));
1079static void witness_displaydescendants
1080    __P((void(*)(const char *fmt, ...), struct witness *));
1081static void witness_leveldescendents __P((struct witness *parent, int level));
1082static void witness_levelall __P((void));
1083static struct witness * witness_get __P((void));
1084static void witness_free __P((struct witness *m));
1085
1086
1087static char *ignore_list[] = {
1088	"witness lock",
1089	NULL
1090};
1091
1092static char *spin_order_list[] = {
1093	"sio",
1094	"sched lock",
1095#ifdef __i386__
1096	"clk",
1097#endif
1098	"callout",
1099	/*
1100	 * leaf locks
1101	 */
1102	NULL
1103};
1104
1105static char *order_list[] = {
1106	"Giant", "uidinfo hash", "uidinfo struct", NULL,
1107	"Giant", "proctree", "allproc", "process lock", NULL,
1108	NULL
1109};
1110
1111static char *dup_list[] = {
1112	NULL
1113};
1114
1115static char *sleep_list[] = {
1116	"Giant",
1117	NULL
1118};
1119
1120/*
1121 * Pairs of locks which have been blessed
1122 * Don't complain about order problems with blessed locks
1123 */
1124static struct witness_blessed blessed_list[] = {
1125};
1126static int blessed_count = sizeof(blessed_list) / sizeof(struct witness_blessed);
1127
1128static void
1129witness_init(struct mtx *m, int flag)
1130{
1131	m->mtx_witness = enroll(m->mtx_description, flag);
1132}
1133
1134static void
1135witness_destroy(struct mtx *m)
1136{
1137	struct mtx *m1;
1138	struct proc *p;
1139	p = CURPROC;
1140	for ((m1 = LIST_FIRST(&p->p_heldmtx)); m1 != NULL;
1141		m1 = LIST_NEXT(m1, mtx_held)) {
1142		if (m1 == m) {
1143			LIST_REMOVE(m, mtx_held);
1144			break;
1145		}
1146	}
1147	return;
1148
1149}
1150
1151static void
1152witness_display(void(*prnt)(const char *fmt, ...))
1153{
1154	struct witness *w, *w1;
1155
1156	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1157	witness_levelall();
1158
1159	for (w = w_all; w; w = w->w_next) {
1160		if (w->w_file == NULL)
1161			continue;
1162		for (w1 = w_all; w1; w1 = w1->w_next) {
1163			if (isitmychild(w1, w))
1164				break;
1165		}
1166		if (w1 != NULL)
1167			continue;
1168		/*
1169		 * This lock has no anscestors, display its descendants.
1170		 */
1171		witness_displaydescendants(prnt, w);
1172	}
1173	prnt("\nMutex which were never acquired\n");
1174	for (w = w_all; w; w = w->w_next) {
1175		if (w->w_file != NULL)
1176			continue;
1177		prnt("%s\n", w->w_description);
1178	}
1179}
1180
1181void
1182witness_enter(struct mtx *m, int flags, const char *file, int line)
1183{
1184	struct witness *w, *w1;
1185	struct mtx *m1;
1186	struct proc *p;
1187	int i;
1188#ifdef DDB
1189	int go_into_ddb = 0;
1190#endif /* DDB */
1191
1192	if (witness_cold || m->mtx_witness == NULL || panicstr)
1193		return;
1194	w = m->mtx_witness;
1195	p = CURPROC;
1196
1197	if (flags & MTX_SPIN) {
1198		if ((m->mtx_flags & MTX_SPIN) == 0)
1199			panic("mutex_enter: MTX_SPIN on MTX_DEF mutex %s @"
1200			    " %s:%d", m->mtx_description, file, line);
1201		if (mtx_recursed(m)) {
1202			if ((m->mtx_flags & MTX_RECURSE) == 0)
1203				panic("mutex_enter: recursion on non-recursive"
1204				    " mutex %s @ %s:%d", m->mtx_description,
1205				    file, line);
1206			return;
1207		}
1208		mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
1209		i = PCPU_GET(witness_spin_check);
1210		if (i != 0 && w->w_level < i) {
1211			mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1212			panic("mutex_enter(%s:%x, MTX_SPIN) out of order @"
1213			    " %s:%d already holding %s:%x",
1214			    m->mtx_description, w->w_level, file, line,
1215			    spin_order_list[ffs(i)-1], i);
1216		}
1217		PCPU_SET(witness_spin_check, i | w->w_level);
1218		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1219		w->w_file = file;
1220		w->w_line = line;
1221		m->mtx_line = line;
1222		m->mtx_file = file;
1223		return;
1224	}
1225	if ((m->mtx_flags & MTX_SPIN) != 0)
1226		panic("mutex_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1227		    m->mtx_description, file, line);
1228
1229	if (mtx_recursed(m)) {
1230		if ((m->mtx_flags & MTX_RECURSE) == 0)
1231			panic("mutex_enter: recursion on non-recursive"
1232			    " mutex %s @ %s:%d", m->mtx_description,
1233			    file, line);
1234		return;
1235	}
1236	if (witness_dead)
1237		goto out;
1238	if (cold)
1239		goto out;
1240
1241	if (!mtx_legal2block())
1242		panic("blockable mtx_enter() of %s when not legal @ %s:%d",
1243			    m->mtx_description, file, line);
1244	/*
1245	 * Is this the first mutex acquired
1246	 */
1247	if ((m1 = LIST_FIRST(&p->p_heldmtx)) == NULL)
1248		goto out;
1249
1250	if ((w1 = m1->mtx_witness) == w) {
1251		if (w->w_same_squawked || dup_ok(w))
1252			goto out;
1253		w->w_same_squawked = 1;
1254		printf("acquring duplicate lock of same type: \"%s\"\n",
1255			m->mtx_description);
1256		printf(" 1st @ %s:%d\n", w->w_file, w->w_line);
1257		printf(" 2nd @ %s:%d\n", file, line);
1258#ifdef DDB
1259		go_into_ddb = 1;
1260#endif /* DDB */
1261		goto out;
1262	}
1263	MPASS(!mtx_owned(&w_mtx));
1264	mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
1265	/*
1266	 * If we have a known higher number just say ok
1267	 */
1268	if (witness_watch > 1 && w->w_level > w1->w_level) {
1269		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1270		goto out;
1271	}
1272	if (isitmydescendant(m1->mtx_witness, w)) {
1273		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1274		goto out;
1275	}
1276	for (i = 0; m1 != NULL; m1 = LIST_NEXT(m1, mtx_held), i++) {
1277
1278		MPASS(i < 200);
1279		w1 = m1->mtx_witness;
1280		if (isitmydescendant(w, w1)) {
1281			mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1282			if (blessed(w, w1))
1283				goto out;
1284			if (m1 == &Giant) {
1285				if (w1->w_Giant_squawked)
1286					goto out;
1287				else
1288					w1->w_Giant_squawked = 1;
1289			} else {
1290				if (w1->w_other_squawked)
1291					goto out;
1292				else
1293					w1->w_other_squawked = 1;
1294			}
1295			printf("lock order reversal\n");
1296			printf(" 1st %s last acquired @ %s:%d\n",
1297			    w->w_description, w->w_file, w->w_line);
1298			printf(" 2nd %p %s @ %s:%d\n",
1299			    m1, w1->w_description, w1->w_file, w1->w_line);
1300			printf(" 3rd %p %s @ %s:%d\n",
1301			    m, w->w_description, file, line);
1302#ifdef DDB
1303			go_into_ddb = 1;
1304#endif /* DDB */
1305			goto out;
1306		}
1307	}
1308	m1 = LIST_FIRST(&p->p_heldmtx);
1309	if (!itismychild(m1->mtx_witness, w))
1310		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1311
1312out:
1313#ifdef DDB
1314	if (witness_ddb && go_into_ddb)
1315		Debugger("witness_enter");
1316#endif /* DDB */
1317	w->w_file = file;
1318	w->w_line = line;
1319	m->mtx_line = line;
1320	m->mtx_file = file;
1321
1322	/*
1323	 * If this pays off it likely means that a mutex being witnessed
1324	 * is acquired in hardclock. Put it in the ignore list. It is
1325	 * likely not the mutex this assert fails on.
1326	 */
1327	MPASS(m->mtx_held.le_prev == NULL);
1328	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
1329}
1330
1331void
1332witness_try_enter(struct mtx *m, int flags, const char *file, int line)
1333{
1334	struct proc *p;
1335	struct witness *w = m->mtx_witness;
1336
1337	if (witness_cold)
1338		return;
1339	if (panicstr)
1340		return;
1341	if (flags & MTX_SPIN) {
1342		if ((m->mtx_flags & MTX_SPIN) == 0)
1343			panic("mutex_try_enter: "
1344			    "MTX_SPIN on MTX_DEF mutex %s @ %s:%d",
1345			    m->mtx_description, file, line);
1346		if (mtx_recursed(m)) {
1347			if ((m->mtx_flags & MTX_RECURSE) == 0)
1348				panic("mutex_try_enter: recursion on"
1349				    " non-recursive mutex %s @ %s:%d",
1350				    m->mtx_description, file, line);
1351			return;
1352		}
1353		mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
1354		PCPU_SET(witness_spin_check,
1355		    PCPU_GET(witness_spin_check) | w->w_level);
1356		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1357		w->w_file = file;
1358		w->w_line = line;
1359		m->mtx_line = line;
1360		m->mtx_file = file;
1361		return;
1362	}
1363
1364	if ((m->mtx_flags & MTX_SPIN) != 0)
1365		panic("mutex_try_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1366		    m->mtx_description, file, line);
1367
1368	if (mtx_recursed(m)) {
1369		if ((m->mtx_flags & MTX_RECURSE) == 0)
1370			panic("mutex_try_enter: recursion on non-recursive"
1371			    " mutex %s @ %s:%d", m->mtx_description, file,
1372			    line);
1373		return;
1374	}
1375	w->w_file = file;
1376	w->w_line = line;
1377	m->mtx_line = line;
1378	m->mtx_file = file;
1379	p = CURPROC;
1380	MPASS(m->mtx_held.le_prev == NULL);
1381	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
1382}
1383
1384void
1385witness_exit(struct mtx *m, int flags, const char *file, int line)
1386{
1387	struct witness *w;
1388
1389	if (witness_cold || m->mtx_witness == NULL || panicstr)
1390		return;
1391	w = m->mtx_witness;
1392
1393	if (flags & MTX_SPIN) {
1394		if ((m->mtx_flags & MTX_SPIN) == 0)
1395			panic("mutex_exit: MTX_SPIN on MTX_DEF mutex %s @"
1396			    " %s:%d", m->mtx_description, file, line);
1397		if (mtx_recursed(m)) {
1398			if ((m->mtx_flags & MTX_RECURSE) == 0)
1399				panic("mutex_exit: recursion on non-recursive"
1400				    " mutex %s @ %s:%d", m->mtx_description,
1401				    file, line);
1402			return;
1403		}
1404		mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
1405		PCPU_SET(witness_spin_check,
1406		    PCPU_GET(witness_spin_check) & ~w->w_level);
1407		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1408		return;
1409	}
1410	if ((m->mtx_flags & MTX_SPIN) != 0)
1411		panic("mutex_exit: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1412		    m->mtx_description, file, line);
1413
1414	if (mtx_recursed(m)) {
1415		if ((m->mtx_flags & MTX_RECURSE) == 0)
1416			panic("mutex_exit: recursion on non-recursive"
1417			    " mutex %s @ %s:%d", m->mtx_description,
1418			    file, line);
1419		return;
1420	}
1421
1422	if ((flags & MTX_NOSWITCH) == 0 && !mtx_legal2block() && !cold)
1423		panic("switchable mtx_exit() of %s when not legal @ %s:%d",
1424			    m->mtx_description, file, line);
1425	LIST_REMOVE(m, mtx_held);
1426	m->mtx_held.le_prev = NULL;
1427}
1428
1429int
1430witness_sleep(int check_only, struct mtx *mtx, const char *file, int line)
1431{
1432	struct mtx *m;
1433	struct proc *p;
1434	char **sleep;
1435	int n = 0;
1436
1437	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1438	p = CURPROC;
1439	for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
1440	    m = LIST_NEXT(m, mtx_held)) {
1441		if (m == mtx)
1442			continue;
1443		for (sleep = sleep_list; *sleep!= NULL; sleep++)
1444			if (strcmp(m->mtx_description, *sleep) == 0)
1445				goto next;
1446		printf("%s:%d: %s with \"%s\" locked from %s:%d\n",
1447			file, line, check_only ? "could sleep" : "sleeping",
1448			m->mtx_description,
1449			m->mtx_witness->w_file, m->mtx_witness->w_line);
1450		n++;
1451	next:
1452	}
1453#ifdef DDB
1454	if (witness_ddb && n)
1455		Debugger("witness_sleep");
1456#endif /* DDB */
1457	return (n);
1458}
1459
1460static struct witness *
1461enroll(const char *description, int flag)
1462{
1463	int i;
1464	struct witness *w, *w1;
1465	char **ignore;
1466	char **order;
1467
1468	if (!witness_watch)
1469		return (NULL);
1470	for (ignore = ignore_list; *ignore != NULL; ignore++)
1471		if (strcmp(description, *ignore) == 0)
1472			return (NULL);
1473
1474	if (w_inited == 0) {
1475		mtx_init(&w_mtx, "witness lock", MTX_SPIN);
1476		for (i = 0; i < WITNESS_COUNT; i++) {
1477			w = &w_data[i];
1478			witness_free(w);
1479		}
1480		w_inited = 1;
1481		for (order = order_list; *order != NULL; order++) {
1482			w = enroll(*order, MTX_DEF);
1483			w->w_file = "order list";
1484			for (order++; *order != NULL; order++) {
1485				w1 = enroll(*order, MTX_DEF);
1486				w1->w_file = "order list";
1487				itismychild(w, w1);
1488				w = w1;
1489    	    	    	}
1490		}
1491	}
1492	if ((flag & MTX_SPIN) && witness_skipspin)
1493		return (NULL);
1494	mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
1495	for (w = w_all; w; w = w->w_next) {
1496		if (strcmp(description, w->w_description) == 0) {
1497			mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1498			return (w);
1499		}
1500	}
1501	if ((w = witness_get()) == NULL)
1502		return (NULL);
1503	w->w_next = w_all;
1504	w_all = w;
1505	w->w_description = description;
1506	mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1507	if (flag & MTX_SPIN) {
1508		w->w_spin = 1;
1509
1510		i = 1;
1511		for (order = spin_order_list; *order != NULL; order++) {
1512			if (strcmp(description, *order) == 0)
1513				break;
1514			i <<= 1;
1515		}
1516		if (*order == NULL)
1517			panic("spin lock %s not in order list", description);
1518		w->w_level = i;
1519	}
1520
1521	return (w);
1522}
1523
1524static int
1525itismychild(struct witness *parent, struct witness *child)
1526{
1527	static int recursed;
1528
1529	/*
1530	 * Insert "child" after "parent"
1531	 */
1532	while (parent->w_morechildren)
1533		parent = parent->w_morechildren;
1534
1535	if (parent->w_childcnt == WITNESS_NCHILDREN) {
1536		if ((parent->w_morechildren = witness_get()) == NULL)
1537			return (1);
1538		parent = parent->w_morechildren;
1539	}
1540	MPASS(child != NULL);
1541	parent->w_children[parent->w_childcnt++] = child;
1542	/*
1543	 * now prune whole tree
1544	 */
1545	if (recursed)
1546		return (0);
1547	recursed = 1;
1548	for (child = w_all; child != NULL; child = child->w_next) {
1549		for (parent = w_all; parent != NULL;
1550		    parent = parent->w_next) {
1551			if (!isitmychild(parent, child))
1552				continue;
1553			removechild(parent, child);
1554			if (isitmydescendant(parent, child))
1555				continue;
1556			itismychild(parent, child);
1557		}
1558	}
1559	recursed = 0;
1560	witness_levelall();
1561	return (0);
1562}
1563
1564static void
1565removechild(struct witness *parent, struct witness *child)
1566{
1567	struct witness *w, *w1;
1568	int i;
1569
1570	for (w = parent; w != NULL; w = w->w_morechildren)
1571		for (i = 0; i < w->w_childcnt; i++)
1572			if (w->w_children[i] == child)
1573				goto found;
1574	return;
1575found:
1576	for (w1 = w; w1->w_morechildren != NULL; w1 = w1->w_morechildren)
1577		continue;
1578	w->w_children[i] = w1->w_children[--w1->w_childcnt];
1579	MPASS(w->w_children[i] != NULL);
1580
1581	if (w1->w_childcnt != 0)
1582		return;
1583
1584	if (w1 == parent)
1585		return;
1586	for (w = parent; w->w_morechildren != w1; w = w->w_morechildren)
1587		continue;
1588	w->w_morechildren = 0;
1589	witness_free(w1);
1590}
1591
1592static int
1593isitmychild(struct witness *parent, struct witness *child)
1594{
1595	struct witness *w;
1596	int i;
1597
1598	for (w = parent; w != NULL; w = w->w_morechildren) {
1599		for (i = 0; i < w->w_childcnt; i++) {
1600			if (w->w_children[i] == child)
1601				return (1);
1602		}
1603	}
1604	return (0);
1605}
1606
1607static int
1608isitmydescendant(struct witness *parent, struct witness *child)
1609{
1610	struct witness *w;
1611	int i;
1612	int j;
1613
1614	for (j = 0, w = parent; w != NULL; w = w->w_morechildren, j++) {
1615		MPASS(j < 1000);
1616		for (i = 0; i < w->w_childcnt; i++) {
1617			if (w->w_children[i] == child)
1618				return (1);
1619		}
1620		for (i = 0; i < w->w_childcnt; i++) {
1621			if (isitmydescendant(w->w_children[i], child))
1622				return (1);
1623		}
1624	}
1625	return (0);
1626}
1627
1628void
1629witness_levelall (void)
1630{
1631	struct witness *w, *w1;
1632
1633	for (w = w_all; w; w = w->w_next)
1634		if (!(w->w_spin))
1635			w->w_level = 0;
1636	for (w = w_all; w; w = w->w_next) {
1637		if (w->w_spin)
1638			continue;
1639		for (w1 = w_all; w1; w1 = w1->w_next) {
1640			if (isitmychild(w1, w))
1641				break;
1642		}
1643		if (w1 != NULL)
1644			continue;
1645		witness_leveldescendents(w, 0);
1646	}
1647}
1648
1649static void
1650witness_leveldescendents(struct witness *parent, int level)
1651{
1652	int i;
1653	struct witness *w;
1654
1655	if (parent->w_level < level)
1656		parent->w_level = level;
1657	level++;
1658	for (w = parent; w != NULL; w = w->w_morechildren)
1659		for (i = 0; i < w->w_childcnt; i++)
1660			witness_leveldescendents(w->w_children[i], level);
1661}
1662
1663static void
1664witness_displaydescendants(void(*prnt)(const char *fmt, ...),
1665			   struct witness *parent)
1666{
1667	struct witness *w;
1668	int i;
1669	int level = parent->w_level;
1670
1671	prnt("%d", level);
1672	if (level < 10)
1673		prnt(" ");
1674	for (i = 0; i < level; i++)
1675		prnt(" ");
1676	prnt("%s", parent->w_description);
1677	if (parent->w_file != NULL) {
1678		prnt(" -- last acquired @ %s", parent->w_file);
1679#ifndef W_USE_WHERE
1680		prnt(":%d", parent->w_line);
1681#endif
1682		prnt("\n");
1683	}
1684
1685	for (w = parent; w != NULL; w = w->w_morechildren)
1686		for (i = 0; i < w->w_childcnt; i++)
1687			    witness_displaydescendants(prnt, w->w_children[i]);
1688    }
1689
1690static int
1691dup_ok(struct witness *w)
1692{
1693	char **dup;
1694
1695	for (dup = dup_list; *dup!= NULL; dup++)
1696		if (strcmp(w->w_description, *dup) == 0)
1697			return (1);
1698	return (0);
1699}
1700
1701static int
1702blessed(struct witness *w1, struct witness *w2)
1703{
1704	int i;
1705	struct witness_blessed *b;
1706
1707	for (i = 0; i < blessed_count; i++) {
1708		b = &blessed_list[i];
1709		if (strcmp(w1->w_description, b->b_lock1) == 0) {
1710			if (strcmp(w2->w_description, b->b_lock2) == 0)
1711				return (1);
1712			continue;
1713		}
1714		if (strcmp(w1->w_description, b->b_lock2) == 0)
1715			if (strcmp(w2->w_description, b->b_lock1) == 0)
1716				return (1);
1717	}
1718	return (0);
1719}
1720
1721static struct witness *
1722witness_get()
1723{
1724	struct witness *w;
1725
1726	if ((w = w_free) == NULL) {
1727		witness_dead = 1;
1728		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1729		printf("witness exhausted\n");
1730		return (NULL);
1731	}
1732	w_free = w->w_next;
1733	bzero(w, sizeof(*w));
1734	return (w);
1735}
1736
1737static void
1738witness_free(struct witness *w)
1739{
1740	w->w_next = w_free;
1741	w_free = w;
1742}
1743
1744int
1745witness_list(struct proc *p)
1746{
1747	struct mtx *m;
1748	int nheld;
1749
1750	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1751	nheld = 0;
1752	for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
1753	    m = LIST_NEXT(m, mtx_held)) {
1754		printf("\t\"%s\" (%p) locked at %s:%d\n",
1755		    m->mtx_description, m,
1756		    m->mtx_witness->w_file, m->mtx_witness->w_line);
1757		nheld++;
1758	}
1759
1760	return (nheld);
1761}
1762
1763void
1764witness_save(struct mtx *m, const char **filep, int *linep)
1765{
1766
1767	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1768	if (m->mtx_witness == NULL)
1769		return;
1770
1771	*filep = m->mtx_witness->w_file;
1772	*linep = m->mtx_witness->w_line;
1773}
1774
1775void
1776witness_restore(struct mtx *m, const char *file, int line)
1777{
1778
1779	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1780	if (m->mtx_witness == NULL)
1781		return;
1782
1783	m->mtx_witness->w_file = file;
1784	m->mtx_witness->w_line = line;
1785}
1786
1787#endif	/* WITNESS */
1788