subr_witness.c revision 71360
1192830Sed/*-
2192830Sed * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
3192830Sed *
4192830Sed * Redistribution and use in source and binary forms, with or without
5192830Sed * modification, are permitted provided that the following conditions
6192830Sed * are met:
7192830Sed * 1. Redistributions of source code must retain the above copyright
8192914Sed *    notice, this list of conditions and the following disclaimer.
9192914Sed * 2. Redistributions in binary form must reproduce the above copyright
10192914Sed *    notice, this list of conditions and the following disclaimer in the
11192914Sed *    documentation and/or other materials provided with the distribution.
12192914Sed * 3. Berkeley Software Design Inc's name may not be used to endorse or
13192914Sed *    promote products derived from this software without specific prior
14192914Sed *    written permission.
15192914Sed *
16192914Sed * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
17192914Sed * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18192914Sed * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19192914Sed * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
20192914Sed * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21192914Sed * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22192914Sed * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23192914Sed * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24192914Sed * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25192914Sed * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26192914Sed * SUCH DAMAGE.
27192914Sed *
28192914Sed *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
29192914Sed *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
30192914Sed * $FreeBSD: head/sys/kern/subr_witness.c 71360 2001-01-22 05:56:55Z jasone $
31192914Sed */
32192914Sed
33192914Sed/*
34192830Sed *	Main Entry: witness
35192914Sed *	Pronunciation: 'wit-n&s
36192830Sed *	Function: noun
37192830Sed *	Etymology: Middle English witnesse, from Old English witnes knowledge,
38192914Sed *	    testimony, witness, from 2wit
39192830Sed *	Date: before 12th century
40192830Sed *	1 : attestation of a fact or event : TESTIMONY
41192830Sed *	2 : one that gives evidence; specifically : one who testifies in
42192914Sed *	    a cause or before a judicial tribunal
43192830Sed *	3 : one asked to be present at a transaction so as to be able to
44192830Sed *	    testify to its having taken place
45192914Sed *	4 : one who has personal knowledge of something
46192830Sed *	5 a : something serving as evidence or proof : SIGN
47192830Sed *	  b : public affirmation by word or example of usually
48192830Sed *	      religious faith or conviction <the heroic witness to divine
49192830Sed *	      life -- Pilot>
50192830Sed *	6 capitalized : a member of the Jehovah's Witnesses
51192830Sed */
52192830Sed
53192830Sed#include "opt_ddb.h"
54192830Sed#include "opt_witness.h"
55192830Sed
56192830Sed/*
57192830Sed * Cause non-inlined mtx_*() to be compiled.
58192830Sed * Must be defined early because other system headers may include mutex.h.
59192830Sed */
60192830Sed#define _KERN_MUTEX_C_
61192830Sed
62192830Sed#include <sys/param.h>
63192830Sed#include <sys/bus.h>
64192830Sed#include <sys/kernel.h>
65192830Sed#include <sys/malloc.h>
66192830Sed#include <sys/proc.h>
67192830Sed#include <sys/sysctl.h>
68192830Sed#include <sys/systm.h>
69192830Sed#include <sys/vmmeter.h>
70192830Sed#include <sys/ktr.h>
71192830Sed
72192830Sed#include <machine/atomic.h>
73192830Sed#include <machine/bus.h>
74192830Sed#include <machine/clock.h>
75192830Sed#include <machine/cpu.h>
76192830Sed
77192830Sed#include <ddb/ddb.h>
78192830Sed
79192830Sed#include <vm/vm.h>
80192830Sed#include <vm/vm_extern.h>
81192830Sed
82192830Sed#include <sys/mutex.h>
83192830Sed
84192830Sed/*
85192830Sed * Machine independent bits of the mutex implementation
86192830Sed */
87192830Sed
88192830Sed#ifdef WITNESS
89192830Sedstruct mtx_debug {
90192830Sed	struct witness	*mtxd_witness;
91192830Sed	LIST_ENTRY(mtx)	mtxd_held;
92192830Sed	const char	*mtxd_file;
93192830Sed	int		mtxd_line;
94192830Sed	const char	*mtxd_description;
95192830Sed};
96192830Sed
97192830Sed#define mtx_description	mtx_union.mtxu_debug->mtxd_description
98192830Sed#define mtx_held	mtx_union.mtxu_debug->mtxd_held
99192830Sed#define	mtx_file	mtx_union.mtxu_debug->mtxd_file
100192830Sed#define	mtx_line	mtx_union.mtxu_debug->mtxd_line
101192830Sed#define	mtx_witness	mtx_union.mtxu_debug->mtxd_witness
102192830Sed#else	/* WITNESS */
103192830Sed#define mtx_description	mtx_union.mtxu_description
104192830Sed#endif	/* WITNESS */
105192830Sed
106192830Sed/*
107192830Sed * Assembly macros
108192830Sed *------------------------------------------------------------------------------
109192830Sed */
110192830Sed
111192830Sed#define	_V(x)	__STRING(x)
112192830Sed
113192830Sed/*
114192830Sed * Default, unoptimized mutex micro-operations
115192830Sed */
116192830Sed
117192830Sed#ifndef _obtain_lock
118192830Sed/* Actually obtain mtx_lock */
119192830Sed#define _obtain_lock(mp, tid)						\
120192830Sed	atomic_cmpset_acq_ptr(&(mp)->mtx_lock, (void *)MTX_UNOWNED, (tid))
121192830Sed#endif
122192830Sed
123192830Sed#ifndef _release_lock
124192830Sed/* Actually release mtx_lock */
125192830Sed#define _release_lock(mp, tid)		       				\
126192830Sed	atomic_cmpset_rel_ptr(&(mp)->mtx_lock, (tid), (void *)MTX_UNOWNED)
127192830Sed#endif
128192830Sed
129192830Sed#ifndef _release_lock_quick
130192830Sed/* Actually release mtx_lock quickly assuming that we own it */
131192830Sed#define	_release_lock_quick(mp) 					\
132192830Sed	atomic_store_rel_ptr(&(mp)->mtx_lock, (void *)MTX_UNOWNED)
133192830Sed#endif
134192830Sed
135192830Sed#ifndef _getlock_sleep
136192830Sed/* Get a sleep lock, deal with recursion inline. */
137192830Sed#define	_getlock_sleep(mp, tid, type) do {				\
138192830Sed	if (!_obtain_lock(mp, tid)) {					\
139192830Sed		if (((mp)->mtx_lock & MTX_FLAGMASK) != ((uintptr_t)(tid)))\
140192830Sed			mtx_enter_hard(mp, (type) & MTX_HARDOPTS, 0);	\
141192830Sed		else {							\
142192830Sed			atomic_set_ptr(&(mp)->mtx_lock, MTX_RECURSED);	\
143192830Sed			(mp)->mtx_recurse++;				\
144192830Sed		}							\
145192830Sed	}								\
146192830Sed} while (0)
147192830Sed#endif
148192830Sed
149192830Sed#ifndef _getlock_spin_block
150192830Sed/* Get a spin lock, handle recursion inline (as the less common case) */
151192830Sed#define	_getlock_spin_block(mp, tid, type) do {				\
152192830Sed	u_int _mtx_intr = save_intr();					\
153192830Sed	disable_intr();							\
154192830Sed	if (!_obtain_lock(mp, tid))					\
155192830Sed		mtx_enter_hard(mp, (type) & MTX_HARDOPTS, _mtx_intr);	\
156192830Sed	else								\
157192830Sed		(mp)->mtx_saveintr = _mtx_intr;				\
158192830Sed} while (0)
159192830Sed#endif
160192830Sed
161192830Sed#ifndef _getlock_norecurse
162192830Sed/*
163192830Sed * Get a lock without any recursion handling. Calls the hard enter function if
164192830Sed * we can't get it inline.
165192830Sed */
166192830Sed#define	_getlock_norecurse(mp, tid, type) do {				\
167192830Sed	if (!_obtain_lock(mp, tid))					\
168192830Sed		mtx_enter_hard((mp), (type) & MTX_HARDOPTS, 0);		\
169192830Sed} while (0)
170192830Sed#endif
171192830Sed
172192830Sed#ifndef _exitlock_norecurse
173192830Sed/*
174192830Sed * Release a sleep lock assuming we haven't recursed on it, recursion is handled
175192830Sed * in the hard function.
176192830Sed */
177192830Sed#define	_exitlock_norecurse(mp, tid, type) do {				\
178192830Sed	if (!_release_lock(mp, tid))					\
179192830Sed		mtx_exit_hard((mp), (type) & MTX_HARDOPTS);		\
180192830Sed} while (0)
181192830Sed#endif
182192830Sed
183192830Sed#ifndef _exitlock
184192830Sed/*
185192830Sed * Release a sleep lock when its likely we recursed (the code to
186192830Sed * deal with simple recursion is inline).
187192830Sed */
188192830Sed#define	_exitlock(mp, tid, type) do {					\
189192830Sed	if (!_release_lock(mp, tid)) {					\
190192830Sed		if ((mp)->mtx_lock & MTX_RECURSED) {			\
191192830Sed			if (--((mp)->mtx_recurse) == 0)			\
192192830Sed				atomic_clear_ptr(&(mp)->mtx_lock,	\
193192830Sed				    MTX_RECURSED);			\
194192830Sed		} else {						\
195192830Sed			mtx_exit_hard((mp), (type) & MTX_HARDOPTS);	\
196192830Sed		}							\
197192830Sed	}								\
198192830Sed} while (0)
199192830Sed#endif
200192830Sed
201192830Sed#ifndef _exitlock_spin
202192830Sed/* Release a spin lock (with possible recursion). */
203192830Sed#define	_exitlock_spin(mp) do {						\
204192830Sed	if (!mtx_recursed((mp))) {					\
205192830Sed		int _mtx_intr = (mp)->mtx_saveintr;			\
206192830Sed									\
207192830Sed		_release_lock_quick(mp);				\
208192830Sed		restore_intr(_mtx_intr);				\
209192830Sed	} else {							\
210192830Sed		(mp)->mtx_recurse--;					\
211192830Sed	}								\
212192830Sed} while (0)
213192830Sed#endif
214192830Sed
215192830Sed#ifdef WITNESS
216192830Sedstatic void	witness_init(struct mtx *, int flag);
217192830Sedstatic void	witness_destroy(struct mtx *);
218192830Sedstatic void	witness_display(void(*)(const char *fmt, ...));
219192830Sed
220192830Sed/* All mutexes in system (used for debug/panic) */
221192830Sedstatic struct mtx_debug all_mtx_debug = { NULL, {NULL, NULL}, NULL, 0,
222192830Sed	"All mutexes queue head" };
223192830Sedstatic struct mtx all_mtx = { MTX_UNOWNED, 0, 0, 0, {&all_mtx_debug},
224192830Sed	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
225192830Sed	{ NULL, NULL }, &all_mtx, &all_mtx };
226192830Sed/*
227192830Sed * Set to 0 once mutexes have been fully initialized so that witness code can be
228192830Sed * safely executed.
229192830Sed */
230192830Sedstatic int witness_cold = 1;
231192830Sed#else	/* WITNESS */
232192830Sed/* All mutexes in system (used for debug/panic) */
233192830Sedstatic struct mtx all_mtx = { MTX_UNOWNED, 0, 0, 0, {"All mutexes queue head"},
234192830Sed	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
235192830Sed	{ NULL, NULL }, &all_mtx, &all_mtx };
236192830Sed
237192830Sed/*
238192830Sed * flag++ is slezoid way of shutting up unused parameter warning
239192830Sed * in mtx_init()
240192830Sed */
241192830Sed#define witness_init(m, flag) flag++
242192830Sed#define witness_destroy(m)
243192830Sed#define witness_try_enter(m, t, f, l)
244192830Sed#endif	/* WITNESS */
245192830Sed
246192830Sedstatic int	mtx_cur_cnt;
247192830Sedstatic int	mtx_max_cnt;
248192830Sed
249192830Sedstatic void	propagate_priority(struct proc *);
250192830Sedstatic void	mtx_enter_hard(struct mtx *, int type, int saveintr);
251192830Sedstatic void	mtx_exit_hard(struct mtx *, int type);
252192830Sed
253192830Sed#define	mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
254192830Sed#define	mtx_owner(m)	(mtx_unowned(m) ? NULL \
255192830Sed			    : (struct proc *)((m)->mtx_lock & MTX_FLAGMASK))
256192830Sed
257192830Sed#define RETIP(x)		*(((uintptr_t *)(&x)) - 1)
258192830Sed#define	SET_PRIO(p, pri)	(p)->p_priority = (pri)
259192830Sed
260192830Sedstatic void
261192830Sedpropagate_priority(struct proc *p)
262192830Sed{
263192830Sed	int pri = p->p_priority;
264192830Sed	struct mtx *m = p->p_blocked;
265192830Sed
266192830Sed	mtx_assert(&sched_lock, MA_OWNED);
267192830Sed	for (;;) {
268192830Sed		struct proc *p1;
269192830Sed
270192830Sed		p = mtx_owner(m);
271192830Sed
272192830Sed		if (p == NULL) {
273192830Sed			/*
274192830Sed			 * This really isn't quite right. Really
275192830Sed			 * ought to bump priority of process that
276192830Sed			 * next acquires the mutex.
277192830Sed			 */
278192830Sed			MPASS(m->mtx_lock == MTX_CONTESTED);
279192830Sed			return;
280192830Sed		}
281192830Sed		MPASS(p->p_magic == P_MAGIC);
282192830Sed		KASSERT(p->p_stat != SSLEEP, ("sleeping process owns a mutex"));
283192830Sed		if (p->p_priority <= pri)
284192830Sed			return;
285192830Sed
286192830Sed		/*
287192830Sed		 * Bump this process' priority.
288192830Sed		 */
289192830Sed		SET_PRIO(p, pri);
290192830Sed
291192830Sed		/*
292192830Sed		 * If lock holder is actually running, just bump priority.
293192830Sed		 */
294192830Sed#ifdef SMP
295192830Sed		/*
296192830Sed		 * For SMP, we can check the p_oncpu field to see if we are
297192830Sed		 * running.
298192830Sed		 */
299192830Sed		if (p->p_oncpu != 0xff) {
300192830Sed			MPASS(p->p_stat == SRUN || p->p_stat == SZOMB);
301192830Sed			return;
302192830Sed		}
303192830Sed#else
304192830Sed		/*
305192830Sed		 * For UP, we check to see if p is curproc (this shouldn't
306192830Sed		 * ever happen however as it would mean we are in a deadlock.)
307192830Sed		 */
308192830Sed		if (p == curproc) {
309192830Sed			panic("Deadlock detected");
310192830Sed			return;
311192830Sed		}
312192830Sed#endif
313192830Sed		/*
314192830Sed		 * If on run queue move to new run queue, and
315192830Sed		 * quit.
316192830Sed		 */
317192830Sed		if (p->p_stat == SRUN) {
318192830Sed			printf("XXX: moving process %d(%s) to a new run queue\n",
319192830Sed			       p->p_pid, p->p_comm);
320192830Sed			MPASS(p->p_blocked == NULL);
321192830Sed			remrunqueue(p);
322192830Sed			setrunqueue(p);
323192830Sed			return;
324192830Sed		}
325192830Sed
326192830Sed		/*
327192830Sed		 * If we aren't blocked on a mutex, we should be.
328192830Sed		 */
329192830Sed		KASSERT(p->p_stat == SMTX, (
330192830Sed		    "process %d(%s):%d holds %s but isn't blocked on a mutex\n",
331192830Sed		    p->p_pid, p->p_comm, p->p_stat,
332192830Sed		    m->mtx_description));
333192830Sed
334192830Sed		/*
335192830Sed		 * Pick up the mutex that p is blocked on.
336192830Sed		 */
337192830Sed		m = p->p_blocked;
338192830Sed		MPASS(m != NULL);
339192830Sed
340192830Sed		printf("XXX: process %d(%s) is blocked on %s\n", p->p_pid,
341192830Sed		    p->p_comm, m->mtx_description);
342192830Sed		/*
343192830Sed		 * Check if the proc needs to be moved up on
344192830Sed		 * the blocked chain
345192830Sed		 */
346192830Sed		if (p == TAILQ_FIRST(&m->mtx_blocked)) {
347192830Sed			printf("XXX: process at head of run queue\n");
348192830Sed			continue;
349192830Sed		}
350192830Sed		p1 = TAILQ_PREV(p, rq, p_procq);
351192830Sed		if (p1->p_priority <= pri) {
352192830Sed			printf(
353192830Sed	"XXX: previous process %d(%s) has higher priority\n",
354192830Sed	                    p->p_pid, p->p_comm);
355192830Sed			continue;
356192830Sed		}
357192830Sed
358192830Sed		/*
359192830Sed		 * Remove proc from blocked chain and determine where
360192830Sed		 * it should be moved up to.  Since we know that p1 has
361192830Sed		 * a lower priority than p, we know that at least one
362192830Sed		 * process in the chain has a lower priority and that
363192830Sed		 * p1 will thus not be NULL after the loop.
364192830Sed		 */
365192830Sed		TAILQ_REMOVE(&m->mtx_blocked, p, p_procq);
366192830Sed		TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) {
367192830Sed			MPASS(p1->p_magic == P_MAGIC);
368192830Sed			if (p1->p_priority > pri)
369192830Sed				break;
370192830Sed		}
371192830Sed		MPASS(p1 != NULL);
372192830Sed		TAILQ_INSERT_BEFORE(p1, p, p_procq);
373192830Sed		CTR4(KTR_LOCK,
374192830Sed		    "propagate_priority: p 0x%p moved before 0x%p on [0x%p] %s",
375192830Sed		    p, p1, m, m->mtx_description);
376192830Sed	}
377192830Sed}
378192830Sed
379192830Sed/*
380192830Sed * Get lock 'm', the macro handles the easy (and most common cases) and leaves
381192830Sed * the slow stuff to the mtx_enter_hard() function.
382192830Sed *
383192830Sed * Note: since type is usually a constant much of this code is optimized out.
384192830Sed */
385192830Sedvoid
386192830Sed_mtx_enter(struct mtx *mtxp, int type, const char *file, int line)
387192830Sed{
388192830Sed	struct mtx	*mpp = mtxp;
389192830Sed
390192830Sed	/* bits only valid on mtx_exit() */
391192830Sed	MPASS4(((type) & (MTX_NORECURSE | MTX_NOSWITCH)) == 0,
392192830Sed	    STR_mtx_bad_type, file, line);
393192830Sed
394192830Sed	if ((type) & MTX_SPIN) {
395192830Sed		/*
396192830Sed		 * Easy cases of spin locks:
397192830Sed		 *
398192830Sed		 * 1) We already own the lock and will simply recurse on it (if
399192830Sed		 *    RLIKELY)
400192830Sed		 *
401192830Sed		 * 2) The lock is free, we just get it
402192830Sed		 */
403192830Sed		if ((type) & MTX_RLIKELY) {
404192830Sed			/*
405192830Sed			 * Check for recursion, if we already have this
406192830Sed			 * lock we just bump the recursion count.
407192830Sed			 */
408192830Sed			if (mpp->mtx_lock == (uintptr_t)CURTHD) {
409192830Sed				mpp->mtx_recurse++;
410192830Sed				goto done;
411192830Sed			}
412192830Sed		}
413192830Sed
414192830Sed		if (((type) & MTX_TOPHALF) == 0) {
415192830Sed			/*
416192830Sed			 * If an interrupt thread uses this we must block
417192830Sed			 * interrupts here.
418192830Sed			 */
419192830Sed			if ((type) & MTX_FIRST) {
420192830Sed				ASS_IEN;
421192830Sed				disable_intr();
422192830Sed				_getlock_norecurse(mpp, CURTHD,
423192830Sed				    (type) & MTX_HARDOPTS);
424192830Sed			} else {
425192830Sed				_getlock_spin_block(mpp, CURTHD,
426192830Sed				    (type) & MTX_HARDOPTS);
427192830Sed			}
428192830Sed		} else
429192830Sed			_getlock_norecurse(mpp, CURTHD, (type) & MTX_HARDOPTS);
430192830Sed	} else {
431192830Sed		/* Sleep locks */
432192830Sed		if ((type) & MTX_RLIKELY)
433192830Sed			_getlock_sleep(mpp, CURTHD, (type) & MTX_HARDOPTS);
434192830Sed		else
435192830Sed			_getlock_norecurse(mpp, CURTHD, (type) & MTX_HARDOPTS);
436192830Sed	}
437192830Seddone:
438192830Sed	WITNESS_ENTER(mpp, type, file, line);
439192830Sed	if (((type) & MTX_QUIET) == 0)
440192830Sed		CTR5(KTR_LOCK, STR_mtx_enter_fmt,
441192830Sed		    mpp->mtx_description, mpp, mpp->mtx_recurse, file, line);
442192830Sed
443192830Sed}
444192830Sed
445192830Sed/*
446192830Sed * Attempt to get MTX_DEF lock, return non-zero if lock acquired.
447192830Sed *
448192830Sed * XXX DOES NOT HANDLE RECURSION
449192830Sed */
450192830Sedint
451192830Sed_mtx_try_enter(struct mtx *mtxp, int type, const char *file, int line)
452192830Sed{
453192830Sed	struct mtx	*const mpp = mtxp;
454192830Sed	int	rval;
455192830Sed
456192830Sed	rval = _obtain_lock(mpp, CURTHD);
457192830Sed#ifdef WITNESS
458192830Sed	if (rval && mpp->mtx_witness != NULL) {
459192830Sed		MPASS(mpp->mtx_recurse == 0);
460192830Sed		witness_try_enter(mpp, type, file, line);
461192830Sed	}
462192830Sed#endif	/* WITNESS */
463192830Sed	if (((type) & MTX_QUIET) == 0)
464192830Sed		CTR5(KTR_LOCK, STR_mtx_try_enter_fmt,
465192830Sed		    mpp->mtx_description, mpp, rval, file, line);
466192830Sed
467192830Sed	return rval;
468192830Sed}
469192830Sed
470192830Sed/*
471192830Sed * Release lock m.
472192830Sed */
473192830Sedvoid
474192830Sed_mtx_exit(struct mtx *mtxp, int type, const char *file, int line)
475192830Sed{
476192830Sed	struct mtx	*const mpp = mtxp;
477192830Sed
478192830Sed	MPASS4(mtx_owned(mpp), STR_mtx_owned, file, line);
479192830Sed	WITNESS_EXIT(mpp, type, file, line);
480192830Sed	if (((type) & MTX_QUIET) == 0)
481192830Sed		CTR5(KTR_LOCK, STR_mtx_exit_fmt,
482192830Sed		    mpp->mtx_description, mpp, mpp->mtx_recurse, file, line);
483192830Sed	if ((type) & MTX_SPIN) {
484192830Sed		if ((type) & MTX_NORECURSE) {
485192830Sed			int mtx_intr = mpp->mtx_saveintr;
486192830Sed
487192830Sed			MPASS4(mpp->mtx_recurse == 0, STR_mtx_recurse,
488192830Sed			    file, line);
489192830Sed			_release_lock_quick(mpp);
490192830Sed			if (((type) & MTX_TOPHALF) == 0) {
491192830Sed				if ((type) & MTX_FIRST) {
492192830Sed					ASS_IDIS;
493192830Sed					enable_intr();
494192830Sed				} else
495192830Sed					restore_intr(mtx_intr);
496192830Sed			}
497192830Sed		} else {
498192830Sed			if (((type & MTX_TOPHALF) == 0) &&
499192830Sed			    (type & MTX_FIRST)) {
500192830Sed				ASS_IDIS;
501192830Sed				ASS_SIEN(mpp);
502192914Sed			}
503192914Sed			_exitlock_spin(mpp);
504192914Sed		}
505192914Sed	} else {
506192830Sed		/* Handle sleep locks */
507192830Sed		if ((type) & MTX_RLIKELY)
508192830Sed			_exitlock(mpp, CURTHD, (type) & MTX_HARDOPTS);
509192830Sed		else {
510192830Sed			_exitlock_norecurse(mpp, CURTHD,
511192830Sed			    (type) & MTX_HARDOPTS);
512192830Sed		}
513192830Sed	}
514192830Sed}
515192830Sed
516192830Sedvoid
517192830Sedmtx_enter_hard(struct mtx *m, int type, int saveintr)
518192830Sed{
519192830Sed	struct proc *p = CURPROC;
520192830Sed
521192830Sed	KASSERT(p != NULL, ("curproc is NULL in mutex"));
522192830Sed
523192830Sed	switch (type) {
524192830Sed	case MTX_DEF:
525192830Sed		if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)p) {
526192830Sed			m->mtx_recurse++;
527192830Sed			atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
528192830Sed			if ((type & MTX_QUIET) == 0)
529192830Sed				CTR1(KTR_LOCK, "mtx_enter: 0x%p recurse", m);
530192830Sed			return;
531192830Sed		}
532192830Sed		if ((type & MTX_QUIET) == 0)
533192830Sed			CTR3(KTR_LOCK,
534192830Sed			    "mtx_enter: 0x%p contested (lock=%p) [0x%p]",
535192830Sed			    m, (void *)m->mtx_lock, (void *)RETIP(m));
536192830Sed
537192830Sed		/*
538192830Sed		 * Save our priority.  Even though p_nativepri is protected
539192830Sed		 * by sched_lock, we don't obtain it here as it can be
540192830Sed		 * expensive.  Since this is the only place p_nativepri is
541192830Sed		 * set, and since two CPUs will not be executing the same
542192830Sed		 * process concurrently, we know that no other CPU is going
543192830Sed		 * to be messing with this.  Also, p_nativepri is only read
544192830Sed		 * when we are blocked on a mutex, so that can't be happening
545192830Sed		 * right now either.
546192830Sed		 */
547192830Sed		p->p_nativepri = p->p_priority;
548192830Sed		while (!_obtain_lock(m, p)) {
549192830Sed			uintptr_t v;
550192830Sed			struct proc *p1;
551192830Sed
552192830Sed			mtx_enter(&sched_lock, MTX_SPIN | MTX_RLIKELY);
553192830Sed			/*
554192830Sed			 * check if the lock has been released while
555192830Sed			 * waiting for the schedlock.
556192830Sed			 */
557192830Sed			if ((v = m->mtx_lock) == MTX_UNOWNED) {
558192830Sed				mtx_exit(&sched_lock, MTX_SPIN);
559192830Sed				continue;
560192830Sed			}
561192830Sed			/*
562192830Sed			 * The mutex was marked contested on release. This
563192830Sed			 * means that there are processes blocked on it.
564192830Sed			 */
565192830Sed			if (v == MTX_CONTESTED) {
566192830Sed				p1 = TAILQ_FIRST(&m->mtx_blocked);
567192830Sed				KASSERT(p1 != NULL, ("contested mutex has no contesters"));
568192830Sed				KASSERT(p != NULL, ("curproc is NULL for contested mutex"));
569192830Sed				m->mtx_lock = (uintptr_t)p | MTX_CONTESTED;
570192830Sed				if (p1->p_priority < p->p_priority) {
571192830Sed					SET_PRIO(p, p1->p_priority);
572192830Sed				}
573192830Sed				mtx_exit(&sched_lock, MTX_SPIN);
574192830Sed				return;
575192830Sed			}
576192830Sed			/*
577192830Sed			 * If the mutex isn't already contested and
578192830Sed			 * a failure occurs setting the contested bit the
579192830Sed			 * mutex was either release or the
580192830Sed			 * state of the RECURSION bit changed.
581192830Sed			 */
582192830Sed			if ((v & MTX_CONTESTED) == 0 &&
583192830Sed			    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
584192830Sed				               (void *)(v | MTX_CONTESTED))) {
585192830Sed				mtx_exit(&sched_lock, MTX_SPIN);
586192830Sed				continue;
587192830Sed			}
588192830Sed
589192830Sed			/* We definitely have to sleep for this lock */
590192830Sed			mtx_assert(m, MA_NOTOWNED);
591192830Sed
592192830Sed#ifdef notyet
593192830Sed			/*
594192830Sed			 * If we're borrowing an interrupted thread's VM
595192830Sed			 * context must clean up before going to sleep.
596192830Sed			 */
597192830Sed			if (p->p_flag & (P_ITHD | P_SITHD)) {
598192830Sed				ithd_t *it = (ithd_t *)p;
599192830Sed
600192830Sed				if (it->it_interrupted) {
601192830Sed					if ((type & MTX_QUIET) == 0)
602192830Sed						CTR2(KTR_LOCK,
603192830Sed					    "mtx_enter: 0x%x interrupted 0x%x",
604192830Sed						    it, it->it_interrupted);
605192830Sed					intr_thd_fixup(it);
606192830Sed				}
607192830Sed			}
608192830Sed#endif
609192830Sed
610192830Sed			/* Put us on the list of procs blocked on this mutex */
611192830Sed			if (TAILQ_EMPTY(&m->mtx_blocked)) {
612192830Sed				p1 = (struct proc *)(m->mtx_lock &
613192830Sed						     MTX_FLAGMASK);
614192830Sed				LIST_INSERT_HEAD(&p1->p_contested, m,
615192830Sed						 mtx_contested);
616192830Sed				TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
617192830Sed			} else {
618192830Sed				TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq)
619192830Sed					if (p1->p_priority > p->p_priority)
620192830Sed						break;
621192830Sed				if (p1)
622192830Sed					TAILQ_INSERT_BEFORE(p1, p, p_procq);
623192830Sed				else
624192830Sed					TAILQ_INSERT_TAIL(&m->mtx_blocked, p,
625192830Sed							  p_procq);
626192830Sed			}
627192830Sed
628192830Sed			p->p_blocked = m;	/* Who we're blocked on */
629192830Sed			p->p_mtxname = m->mtx_description;
630192830Sed			p->p_stat = SMTX;
631192830Sed#if 0
632192830Sed			propagate_priority(p);
633192830Sed#endif
634192830Sed			if ((type & MTX_QUIET) == 0)
635192830Sed				CTR3(KTR_LOCK,
636192830Sed				    "mtx_enter: p 0x%p blocked on [0x%p] %s",
637192830Sed				    p, m, m->mtx_description);
638192830Sed			mi_switch();
639192830Sed			if ((type & MTX_QUIET) == 0)
640192830Sed				CTR3(KTR_LOCK,
641192830Sed			    "mtx_enter: p 0x%p free from blocked on [0x%p] %s",
642192830Sed				    p, m, m->mtx_description);
643192830Sed			mtx_exit(&sched_lock, MTX_SPIN);
644192830Sed		}
645192830Sed		return;
646192830Sed	case MTX_SPIN:
647192830Sed	case MTX_SPIN | MTX_FIRST:
648192830Sed	case MTX_SPIN | MTX_TOPHALF:
649192830Sed	    {
650192830Sed		int i = 0;
651192830Sed
652192830Sed		if (m->mtx_lock == (uintptr_t)p) {
653192830Sed			m->mtx_recurse++;
654192830Sed			return;
655192830Sed		}
656192830Sed		if ((type & MTX_QUIET) == 0)
657192830Sed			CTR1(KTR_LOCK, "mtx_enter: %p spinning", m);
658192830Sed		for (;;) {
659192830Sed			if (_obtain_lock(m, p))
660192830Sed				break;
661192830Sed			while (m->mtx_lock != MTX_UNOWNED) {
662192830Sed				if (i++ < 1000000)
663192914Sed					continue;
664192914Sed				if (i++ < 6000000)
665192914Sed					DELAY (1);
666192914Sed#ifdef DDB
667192914Sed				else if (!db_active)
668192914Sed#else
669192914Sed				else
670192830Sed#endif
671192830Sed					panic(
672192830Sed				"spin lock %s held by 0x%p for > 5 seconds",
673192830Sed					    m->mtx_description,
674192830Sed					    (void *)m->mtx_lock);
675192830Sed			}
676192830Sed		}
677192830Sed
678192830Sed#ifdef MUTEX_DEBUG
679192830Sed		if (type != MTX_SPIN)
680192830Sed			m->mtx_saveintr = 0xbeefface;
681192830Sed		else
682192830Sed#endif
683192830Sed			m->mtx_saveintr = saveintr;
684192830Sed		if ((type & MTX_QUIET) == 0)
685192830Sed			CTR1(KTR_LOCK, "mtx_enter: 0x%p spin done", m);
686192830Sed		return;
687192830Sed	    }
688192830Sed	}
689192830Sed}
690192830Sed
691192830Sedvoid
692192830Sedmtx_exit_hard(struct mtx *m, int type)
693192830Sed{
694192830Sed	struct proc *p, *p1;
695192830Sed	struct mtx *m1;
696192830Sed	int pri;
697192830Sed
698192830Sed	p = CURPROC;
699192830Sed	switch (type) {
700192830Sed	case MTX_DEF:
701192830Sed	case MTX_DEF | MTX_NOSWITCH:
702192830Sed		if (mtx_recursed(m)) {
703192830Sed			if (--(m->mtx_recurse) == 0)
704192830Sed				atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
705192830Sed			if ((type & MTX_QUIET) == 0)
706192830Sed				CTR1(KTR_LOCK, "mtx_exit: 0x%p unrecurse", m);
707192830Sed			return;
708192830Sed		}
709192830Sed		mtx_enter(&sched_lock, MTX_SPIN);
710192830Sed		if ((type & MTX_QUIET) == 0)
711192830Sed			CTR1(KTR_LOCK, "mtx_exit: 0x%p contested", m);
712192830Sed		p1 = TAILQ_FIRST(&m->mtx_blocked);
713192830Sed		MPASS(p->p_magic == P_MAGIC);
714192830Sed		MPASS(p1->p_magic == P_MAGIC);
715192830Sed		TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq);
716192830Sed		if (TAILQ_EMPTY(&m->mtx_blocked)) {
717192830Sed			LIST_REMOVE(m, mtx_contested);
718192830Sed			_release_lock_quick(m);
719192830Sed			if ((type & MTX_QUIET) == 0)
720192830Sed				CTR1(KTR_LOCK, "mtx_exit: 0x%p not held", m);
721192830Sed		} else
722192830Sed			atomic_store_rel_ptr(&m->mtx_lock,
723192830Sed			    (void *)MTX_CONTESTED);
724192830Sed		pri = MAXPRI;
725192830Sed		LIST_FOREACH(m1, &p->p_contested, mtx_contested) {
726192830Sed			int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_priority;
727192830Sed			if (cp < pri)
728192830Sed				pri = cp;
729192830Sed		}
730192830Sed		if (pri > p->p_nativepri)
731192830Sed			pri = p->p_nativepri;
732192830Sed		SET_PRIO(p, pri);
733192830Sed		if ((type & MTX_QUIET) == 0)
734192830Sed			CTR2(KTR_LOCK,
735192830Sed			    "mtx_exit: 0x%p contested setrunqueue 0x%p", m, p1);
736192830Sed		p1->p_blocked = NULL;
737192830Sed		p1->p_mtxname = NULL;
738192830Sed		p1->p_stat = SRUN;
739192830Sed		setrunqueue(p1);
740192830Sed		if ((type & MTX_NOSWITCH) == 0 && p1->p_priority < pri) {
741192830Sed#ifdef notyet
742192830Sed			if (p->p_flag & (P_ITHD | P_SITHD)) {
743192830Sed				ithd_t *it = (ithd_t *)p;
744192830Sed
745192830Sed				if (it->it_interrupted) {
746192830Sed					if ((type & MTX_QUIET) == 0)
747192830Sed						CTR2(KTR_LOCK,
748192830Sed					    "mtx_exit: 0x%x interruped 0x%x",
749192830Sed						    it, it->it_interrupted);
750192830Sed					intr_thd_fixup(it);
751192830Sed				}
752192830Sed			}
753192830Sed#endif
754192830Sed			setrunqueue(p);
755192830Sed			if ((type & MTX_QUIET) == 0)
756192830Sed				CTR2(KTR_LOCK,
757192830Sed				    "mtx_exit: 0x%p switching out lock=0x%p",
758192830Sed				    m, (void *)m->mtx_lock);
759192830Sed			mi_switch();
760192830Sed			if ((type & MTX_QUIET) == 0)
761192830Sed				CTR2(KTR_LOCK,
762192830Sed				    "mtx_exit: 0x%p resuming lock=0x%p",
763192830Sed				    m, (void *)m->mtx_lock);
764192830Sed		}
765192830Sed		mtx_exit(&sched_lock, MTX_SPIN);
766192830Sed		break;
767192830Sed	case MTX_SPIN:
768192830Sed	case MTX_SPIN | MTX_FIRST:
769192830Sed		if (mtx_recursed(m)) {
770192830Sed			m->mtx_recurse--;
771192830Sed			return;
772192830Sed		}
773192830Sed		MPASS(mtx_owned(m));
774192830Sed		_release_lock_quick(m);
775192830Sed		if (type & MTX_FIRST)
776192830Sed			enable_intr();	/* XXX is this kosher? */
777192830Sed		else {
778192830Sed			MPASS(m->mtx_saveintr != 0xbeefface);
779192914Sed			restore_intr(m->mtx_saveintr);
780192830Sed		}
781192830Sed		break;
782192830Sed	case MTX_SPIN | MTX_TOPHALF:
783192830Sed		if (mtx_recursed(m)) {
784192830Sed			m->mtx_recurse--;
785192830Sed			return;
786192830Sed		}
787192830Sed		MPASS(mtx_owned(m));
788192830Sed		_release_lock_quick(m);
789192830Sed		break;
790192830Sed	default:
791192830Sed		panic("mtx_exit_hard: unsupported type 0x%x\n", type);
792192830Sed	}
793192830Sed}
794192830Sed
795192830Sed#ifdef INVARIANTS
796192830Sedvoid
797192830Sed_mtx_assert(struct mtx *m, int what, const char *file, int line)
798192830Sed{
799192830Sed	switch ((what)) {
800192830Sed	case MA_OWNED:
801192830Sed	case MA_OWNED | MA_RECURSED:
802192830Sed	case MA_OWNED | MA_NOTRECURSED:
803192830Sed		if (!mtx_owned((m)))
804192830Sed			panic("mutex %s not owned at %s:%d",
805192830Sed			    (m)->mtx_description, file, line);
806192830Sed		if (mtx_recursed((m))) {
807192830Sed			if (((what) & MA_NOTRECURSED) != 0)
808192830Sed				panic("mutex %s recursed at %s:%d",
809192830Sed				    (m)->mtx_description, file, line);
810192830Sed		} else if (((what) & MA_RECURSED) != 0) {
811192830Sed			panic("mutex %s unrecursed at %s:%d",
812192830Sed			    (m)->mtx_description, file, line);
813192830Sed		}
814192830Sed		break;
815192830Sed	case MA_NOTOWNED:
816192830Sed		if (mtx_owned((m)))
817192830Sed			panic("mutex %s owned at %s:%d",
818192830Sed			    (m)->mtx_description, file, line);
819192830Sed		break;
820192830Sed	default:
821192830Sed		panic("unknown mtx_assert at %s:%d", file, line);
822192830Sed	}
823192830Sed}
824192830Sed#endif
825192830Sed
826192830Sed#define MV_DESTROY	0	/* validate before destory */
827192830Sed#define MV_INIT		1	/* validate before init */
828192830Sed
829192830Sed#ifdef MUTEX_DEBUG
830192830Sed
831192830Sedint mtx_validate __P((struct mtx *, int));
832192830Sed
833192830Sedint
834192830Sedmtx_validate(struct mtx *m, int when)
835192830Sed{
836192830Sed	struct mtx *mp;
837192830Sed	int i;
838192830Sed	int retval = 0;
839192830Sed
840192830Sed#ifdef WITNESS
841192830Sed	if (witness_cold)
842192830Sed		return 0;
843192830Sed#endif
844192830Sed	if (m == &all_mtx || cold)
845192830Sed		return 0;
846192830Sed
847192830Sed	mtx_enter(&all_mtx, MTX_DEF);
848192830Sed/*
849192830Sed * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
850192830Sed * we can re-enable the kernacc() checks.
851192830Sed */
852192830Sed#ifndef __alpha__
853192830Sed	MPASS(kernacc((caddr_t)all_mtx.mtx_next, sizeof(uintptr_t),
854192830Sed	    VM_PROT_READ) == 1);
855192830Sed#endif
856192830Sed	MPASS(all_mtx.mtx_next->mtx_prev == &all_mtx);
857192830Sed	for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
858192830Sed#ifndef __alpha__
859192830Sed		if (kernacc((caddr_t)mp->mtx_next, sizeof(uintptr_t),
860192830Sed		    VM_PROT_READ) != 1) {
861192830Sed			panic("mtx_validate: mp=%p mp->mtx_next=%p",
862192830Sed			    mp, mp->mtx_next);
863192830Sed		}
864192830Sed#endif
865192830Sed		i++;
866192830Sed		if (i > mtx_cur_cnt) {
867192830Sed			panic("mtx_validate: too many in chain, known=%d\n",
868192830Sed			    mtx_cur_cnt);
869192830Sed		}
870192830Sed	}
871192830Sed	MPASS(i == mtx_cur_cnt);
872192830Sed	switch (when) {
873192830Sed	case MV_DESTROY:
874192830Sed		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
875192830Sed			if (mp == m)
876192830Sed				break;
877192830Sed		MPASS(mp == m);
878192830Sed		break;
879192830Sed	case MV_INIT:
880192830Sed		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
881192830Sed		if (mp == m) {
882192830Sed			/*
883192830Sed			 * Not good. This mutex already exists.
884192830Sed			 */
885192830Sed			printf("re-initing existing mutex %s\n",
886192830Sed			    m->mtx_description);
887192830Sed			MPASS(m->mtx_lock == MTX_UNOWNED);
888192830Sed			retval = 1;
889192830Sed		}
890192830Sed	}
891192830Sed	mtx_exit(&all_mtx, MTX_DEF);
892192830Sed	return (retval);
893192830Sed}
894192830Sed#endif
895192830Sed
896192830Sedvoid
897192830Sedmtx_init(struct mtx *m, const char *t, int flag)
898192830Sed{
899192830Sed	if ((flag & MTX_QUIET) == 0)
900192830Sed		CTR2(KTR_LOCK, "mtx_init 0x%p (%s)", m, t);
901192830Sed#ifdef MUTEX_DEBUG
902192830Sed	if (mtx_validate(m, MV_INIT))	/* diagnostic and error correction */
903192830Sed		return;
904192830Sed#endif
905192830Sed
906192830Sed	bzero((void *)m, sizeof *m);
907192830Sed	TAILQ_INIT(&m->mtx_blocked);
908192830Sed#ifdef WITNESS
909192830Sed	if (!witness_cold) {
910192830Sed		/* XXX - should not use DEVBUF */
911192830Sed		m->mtx_union.mtxu_debug = malloc(sizeof(struct mtx_debug),
912192914Sed		    M_DEVBUF, M_NOWAIT | M_ZERO);
913192914Sed		MPASS(m->mtx_union.mtxu_debug != NULL);
914192914Sed
915192914Sed		m->mtx_description = t;
916192914Sed	} else {
917192830Sed		/*
918192914Sed		 * Save a pointer to the description so that witness_fixup()
919192914Sed		 * can properly initialize this mutex later on.
920192830Sed		 */
921192914Sed		m->mtx_union.mtxu_description = t;
922192830Sed	}
923192830Sed#else
924192830Sed	m->mtx_description = t;
925192830Sed#endif
926192914Sed
927192914Sed	m->mtx_flags = flag;
928192914Sed	m->mtx_lock = MTX_UNOWNED;
929192914Sed	/* Put on all mutex queue */
930192914Sed	mtx_enter(&all_mtx, MTX_DEF);
931192914Sed	m->mtx_next = &all_mtx;
932192830Sed	m->mtx_prev = all_mtx.mtx_prev;
933192830Sed	m->mtx_prev->mtx_next = m;
934192830Sed	all_mtx.mtx_prev = m;
935192830Sed	if (++mtx_cur_cnt > mtx_max_cnt)
936192830Sed		mtx_max_cnt = mtx_cur_cnt;
937192830Sed	mtx_exit(&all_mtx, MTX_DEF);
938192830Sed#ifdef WITNESS
939192830Sed	if (!witness_cold)
940192830Sed		witness_init(m, flag);
941192830Sed#endif
942192830Sed}
943192830Sed
944192830Sedvoid
945192830Sedmtx_destroy(struct mtx *m)
946192830Sed{
947192830Sed
948192914Sed#ifdef WITNESS
949192830Sed	KASSERT(!witness_cold, ("%s: Cannot destroy while still cold\n",
950192830Sed	    __FUNCTION__));
951192830Sed#endif
952192914Sed	CTR2(KTR_LOCK, "mtx_destroy 0x%p (%s)", m, m->mtx_description);
953192830Sed#ifdef MUTEX_DEBUG
954192830Sed	if (m->mtx_next == NULL)
955192830Sed		panic("mtx_destroy: %p (%s) already destroyed",
956192914Sed		    m, m->mtx_description);
957192830Sed
958192830Sed	if (!mtx_owned(m)) {
959192830Sed		MPASS(m->mtx_lock == MTX_UNOWNED);
960192830Sed	} else {
961192830Sed		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
962192830Sed	}
963192830Sed	mtx_validate(m, MV_DESTROY);		/* diagnostic */
964192830Sed#endif
965192830Sed
966192830Sed#ifdef WITNESS
967192830Sed	if (m->mtx_witness)
968192830Sed		witness_destroy(m);
969192830Sed#endif /* WITNESS */
970192830Sed
971192830Sed	/* Remove from the all mutex queue */
972192830Sed	mtx_enter(&all_mtx, MTX_DEF);
973192830Sed	m->mtx_next->mtx_prev = m->mtx_prev;
974192830Sed	m->mtx_prev->mtx_next = m->mtx_next;
975192830Sed#ifdef MUTEX_DEBUG
976192830Sed	m->mtx_next = m->mtx_prev = NULL;
977192830Sed#endif
978192830Sed#ifdef WITNESS
979192830Sed	free(m->mtx_union.mtxu_debug, M_DEVBUF);
980192830Sed	m->mtx_union.mtxu_debug = NULL;
981192830Sed#endif
982192830Sed	mtx_cur_cnt--;
983192830Sed	mtx_exit(&all_mtx, MTX_DEF);
984192830Sed}
985192830Sed
986192830Sedstatic void
987192830Sedwitness_fixup(void *dummy __unused)
988192830Sed{
989192830Sed#ifdef WITNESS
990192830Sed	struct mtx *mp;
991192830Sed	const char *description;
992192830Sed
993192830Sed	/* Iterate through all mutexes and finish up mutex initialization. */
994192830Sed	for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
995192830Sed		description = mp->mtx_union.mtxu_description;
996192830Sed
997192830Sed		/* XXX - should not use DEVBUF */
998192830Sed		mp->mtx_union.mtxu_debug = malloc(sizeof(struct mtx_debug),
999192830Sed		    M_DEVBUF, M_NOWAIT | M_ZERO);
1000192830Sed		MPASS(mp->mtx_union.mtxu_debug != NULL);
1001192830Sed
1002192830Sed		mp->mtx_description = description;
1003192830Sed
1004192830Sed		witness_init(mp, mp->mtx_flags);
1005192830Sed	}
1006192830Sed
1007192830Sed	/* Mark the witness code as being ready for use. */
1008192830Sed	atomic_store_rel_int(&witness_cold, 0);
1009192830Sed#endif
1010192830Sed}
1011192830SedSYSINIT(wtnsfxup, SI_SUB_MUTEX, SI_ORDER_FIRST, witness_fixup, NULL)
1012192830Sed
1013192830Sed/*
1014192830Sed * The non-inlined versions of the mtx_*() functions are always built (above),
1015192830Sed * but the witness code depends on the WITNESS kernel option being specified.
1016192830Sed */
1017192830Sed#ifdef WITNESS
1018192830Sed
1019192830Sed#define WITNESS_COUNT 200
1020192830Sed#define	WITNESS_NCHILDREN 2
1021192830Sed
1022192830Sedint witness_watch = 1;
1023192830Sed
1024192830Sedstruct witness {
1025192830Sed	struct witness	*w_next;
1026192830Sed	const char	*w_description;
1027192830Sed	const char	*w_file;
1028192830Sed	int		 w_line;
1029192830Sed	struct witness	*w_morechildren;
1030192830Sed	u_char		 w_childcnt;
1031192830Sed	u_char		 w_Giant_squawked:1;
1032192830Sed	u_char		 w_other_squawked:1;
1033192830Sed	u_char		 w_same_squawked:1;
1034192830Sed	u_char		 w_sleep:1;	/* MTX_DEF type mutex. */
1035192830Sed	u_char		 w_spin:1;	/* MTX_SPIN type mutex. */
1036192830Sed	u_char		 w_recurse:1;	/* MTX_RECURSE mutex option. */
1037192830Sed	u_int		 w_level;
1038192830Sed	struct witness	*w_children[WITNESS_NCHILDREN];
1039192830Sed};
1040192830Sed
1041192830Sedstruct witness_blessed {
1042192830Sed	char 	*b_lock1;
1043192830Sed	char	*b_lock2;
1044192830Sed};
1045192830Sed
1046192830Sed#ifdef DDB
1047192830Sed/*
1048192830Sed * When DDB is enabled and witness_ddb is set to 1, it will cause the system to
1049192830Sed * drop into kdebug() when:
1050192830Sed *	- a lock heirarchy violation occurs
1051192830Sed *	- locks are held when going to sleep.
1052192830Sed */
1053192830Sed#ifdef WITNESS_DDB
1054192830Sedint	witness_ddb = 1;
1055192830Sed#else
1056192830Sedint	witness_ddb = 0;
1057192830Sed#endif
1058192830SedSYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, "");
1059192830Sed#endif /* DDB */
1060192830Sed
1061192830Sed#ifdef WITNESS_SKIPSPIN
1062192830Sedint	witness_skipspin = 1;
1063192830Sed#else
1064192830Sedint	witness_skipspin = 0;
1065192830Sed#endif
1066192830SedSYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RD, &witness_skipspin, 0,
1067192830Sed    "");
1068192830Sed
1069192830Sedstatic struct mtx	w_mtx;
1070192830Sedstatic struct witness	*w_free;
1071192830Sedstatic struct witness	*w_all;
1072192830Sedstatic int		 w_inited;
1073192830Sedstatic int		 witness_dead;	/* fatal error, probably no memory */
1074192830Sed
1075192830Sedstatic struct witness	 w_data[WITNESS_COUNT];
1076192830Sed
1077192830Sedstatic struct witness	 *enroll __P((const char *description, int flag));
1078192830Sedstatic int itismychild __P((struct witness *parent, struct witness *child));
1079192830Sedstatic void removechild __P((struct witness *parent, struct witness *child));
1080192830Sedstatic int isitmychild __P((struct witness *parent, struct witness *child));
1081192830Sedstatic int isitmydescendant __P((struct witness *parent, struct witness *child));
1082192830Sedstatic int dup_ok __P((struct witness *));
1083192830Sedstatic int blessed __P((struct witness *, struct witness *));
1084192830Sedstatic void witness_displaydescendants
1085192830Sed    __P((void(*)(const char *fmt, ...), struct witness *));
1086192830Sedstatic void witness_leveldescendents __P((struct witness *parent, int level));
1087192830Sedstatic void witness_levelall __P((void));
1088192830Sedstatic struct witness * witness_get __P((void));
1089192830Sedstatic void witness_free __P((struct witness *m));
1090192830Sed
1091192830Sed
1092192830Sedstatic char *ignore_list[] = {
1093192830Sed	"witness lock",
1094192830Sed	NULL
1095192830Sed};
1096192830Sed
1097192830Sedstatic char *spin_order_list[] = {
1098192830Sed	"sio",
1099192830Sed	"sched lock",
1100192830Sed#ifdef __i386__
1101192830Sed	"clk",
1102192830Sed#endif
1103192830Sed	"callout",
1104192830Sed	/*
1105192830Sed	 * leaf locks
1106192830Sed	 */
1107192830Sed	NULL
1108192830Sed};
1109192830Sed
1110192830Sedstatic char *order_list[] = {
1111192830Sed	"uidinfo hash", "uidinfo struct", NULL,
1112192830Sed	NULL
1113192830Sed};
1114192914Sed
1115192830Sedstatic char *dup_list[] = {
1116192830Sed	NULL
1117192830Sed};
1118192830Sed
1119192830Sedstatic char *sleep_list[] = {
1120192830Sed	"Giant",
1121192830Sed	NULL
1122192830Sed};
1123192830Sed
1124192830Sed/*
1125192830Sed * Pairs of locks which have been blessed
1126192830Sed * Don't complain about order problems with blessed locks
1127192830Sed */
1128192830Sedstatic struct witness_blessed blessed_list[] = {
1129192830Sed};
1130192830Sedstatic int blessed_count = sizeof(blessed_list) / sizeof(struct witness_blessed);
1131192830Sed
1132192830Sedstatic void
1133192830Sedwitness_init(struct mtx *m, int flag)
1134192830Sed{
1135192830Sed	m->mtx_witness = enroll(m->mtx_description, flag);
1136192830Sed}
1137192830Sed
1138192830Sedstatic void
1139192830Sedwitness_destroy(struct mtx *m)
1140192830Sed{
1141192830Sed	struct mtx *m1;
1142192830Sed	struct proc *p;
1143192830Sed	p = CURPROC;
1144192830Sed	for ((m1 = LIST_FIRST(&p->p_heldmtx)); m1 != NULL;
1145192830Sed		m1 = LIST_NEXT(m1, mtx_held)) {
1146192830Sed		if (m1 == m) {
1147192830Sed			LIST_REMOVE(m, mtx_held);
1148192830Sed			break;
1149192830Sed		}
1150192830Sed	}
1151192830Sed	return;
1152192830Sed
1153192830Sed}
1154192830Sed
1155192830Sedstatic void
1156192830Sedwitness_display(void(*prnt)(const char *fmt, ...))
1157192830Sed{
1158192830Sed	struct witness *w, *w1;
1159192830Sed
1160192830Sed	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1161192830Sed	witness_levelall();
1162192830Sed
1163192830Sed	for (w = w_all; w; w = w->w_next) {
1164192830Sed		if (w->w_file == NULL)
1165192830Sed			continue;
1166192830Sed		for (w1 = w_all; w1; w1 = w1->w_next) {
1167192830Sed			if (isitmychild(w1, w))
1168192830Sed				break;
1169192830Sed		}
1170192830Sed		if (w1 != NULL)
1171192830Sed			continue;
1172192830Sed		/*
1173192830Sed		 * This lock has no anscestors, display its descendants.
1174192830Sed		 */
1175192830Sed		witness_displaydescendants(prnt, w);
1176192830Sed	}
1177192830Sed	prnt("\nMutex which were never acquired\n");
1178192830Sed	for (w = w_all; w; w = w->w_next) {
1179192830Sed		if (w->w_file != NULL)
1180192830Sed			continue;
1181192830Sed		prnt("%s\n", w->w_description);
1182192830Sed	}
1183192830Sed}
1184192830Sed
1185192830Sedvoid
1186192830Sedwitness_enter(struct mtx *m, int flags, const char *file, int line)
1187192830Sed{
1188192830Sed	struct witness *w, *w1;
1189192830Sed	struct mtx *m1;
1190192830Sed	struct proc *p;
1191192830Sed	int i;
1192192830Sed#ifdef DDB
1193192830Sed	int go_into_ddb = 0;
1194192830Sed#endif /* DDB */
1195192830Sed
1196192830Sed	if (witness_cold || m->mtx_witness == NULL || panicstr)
1197192830Sed		return;
1198192830Sed	w = m->mtx_witness;
1199192830Sed	p = CURPROC;
1200192830Sed
1201192830Sed	if (flags & MTX_SPIN) {
1202192830Sed		if (!(w->w_spin))
1203192830Sed			panic("mutex_enter: MTX_SPIN on MTX_DEF mutex %s @"
1204192830Sed			    " %s:%d", m->mtx_description, file, line);
1205192830Sed		if (mtx_recursed(m)) {
1206192830Sed			if (!(w->w_recurse))
1207192830Sed				panic("mutex_enter: recursion on non-recursive"
1208192830Sed				    " mutex %s @ %s:%d", m->mtx_description,
1209192830Sed				    file, line);
1210192830Sed			return;
1211192830Sed		}
1212192830Sed		mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
1213192830Sed		i = PCPU_GET(witness_spin_check);
1214192830Sed		if (i != 0 && w->w_level < i) {
1215192830Sed			mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1216192830Sed			panic("mutex_enter(%s:%x, MTX_SPIN) out of order @"
1217192830Sed			    " %s:%d already holding %s:%x",
1218192830Sed			    m->mtx_description, w->w_level, file, line,
1219192830Sed			    spin_order_list[ffs(i)-1], i);
1220192830Sed		}
1221192830Sed		PCPU_SET(witness_spin_check, i | w->w_level);
1222192830Sed		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1223192830Sed		w->w_file = file;
1224192830Sed		w->w_line = line;
1225192830Sed		m->mtx_line = line;
1226192830Sed		m->mtx_file = file;
1227192830Sed		return;
1228192830Sed	}
1229192830Sed	if (w->w_spin)
1230192830Sed		panic("mutex_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1231192830Sed		    m->mtx_description, file, line);
1232192830Sed
1233192830Sed	if (mtx_recursed(m)) {
1234192830Sed		if (!(w->w_recurse))
1235192830Sed			panic("mutex_enter: recursion on non-recursive"
1236192830Sed			    " mutex %s @ %s:%d", m->mtx_description,
1237192830Sed			    file, line);
1238192830Sed		return;
1239192830Sed	}
1240192830Sed	if (witness_dead)
1241192830Sed		goto out;
1242192830Sed	if (cold)
1243192830Sed		goto out;
1244192830Sed
1245192830Sed	if (!mtx_legal2block())
1246192830Sed		panic("blockable mtx_enter() of %s when not legal @ %s:%d",
1247192830Sed			    m->mtx_description, file, line);
1248192830Sed	/*
1249192830Sed	 * Is this the first mutex acquired
1250192830Sed	 */
1251192830Sed	if ((m1 = LIST_FIRST(&p->p_heldmtx)) == NULL)
1252192830Sed		goto out;
1253192830Sed
1254192830Sed	if ((w1 = m1->mtx_witness) == w) {
1255192830Sed		if (w->w_same_squawked || dup_ok(w))
1256192830Sed			goto out;
1257192830Sed		w->w_same_squawked = 1;
1258192830Sed		printf("acquring duplicate lock of same type: \"%s\"\n",
1259192830Sed			m->mtx_description);
1260192830Sed		printf(" 1st @ %s:%d\n", w->w_file, w->w_line);
1261192830Sed		printf(" 2nd @ %s:%d\n", file, line);
1262192830Sed#ifdef DDB
1263192830Sed		go_into_ddb = 1;
1264192830Sed#endif /* DDB */
1265192830Sed		goto out;
1266192830Sed	}
1267192830Sed	MPASS(!mtx_owned(&w_mtx));
1268192830Sed	mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
1269192830Sed	/*
1270192830Sed	 * If we have a known higher number just say ok
1271192830Sed	 */
1272192830Sed	if (witness_watch > 1 && w->w_level > w1->w_level) {
1273192830Sed		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1274192830Sed		goto out;
1275192830Sed	}
1276192830Sed	if (isitmydescendant(m1->mtx_witness, w)) {
1277192830Sed		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1278192830Sed		goto out;
1279192830Sed	}
1280192830Sed	for (i = 0; m1 != NULL; m1 = LIST_NEXT(m1, mtx_held), i++) {
1281192830Sed
1282192830Sed		MPASS(i < 200);
1283192830Sed		w1 = m1->mtx_witness;
1284192830Sed		if (isitmydescendant(w, w1)) {
1285192830Sed			mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1286192830Sed			if (blessed(w, w1))
1287192830Sed				goto out;
1288192830Sed			if (m1 == &Giant) {
1289192830Sed				if (w1->w_Giant_squawked)
1290192830Sed					goto out;
1291192830Sed				else
1292192830Sed					w1->w_Giant_squawked = 1;
1293192830Sed			} else {
1294192830Sed				if (w1->w_other_squawked)
1295192830Sed					goto out;
1296192830Sed				else
1297192830Sed					w1->w_other_squawked = 1;
1298192830Sed			}
1299192830Sed			printf("lock order reversal\n");
1300192830Sed			printf(" 1st %s last acquired @ %s:%d\n",
1301192830Sed			    w->w_description, w->w_file, w->w_line);
1302192830Sed			printf(" 2nd %p %s @ %s:%d\n",
1303192830Sed			    m1, w1->w_description, w1->w_file, w1->w_line);
1304192830Sed			printf(" 3rd %p %s @ %s:%d\n",
1305192830Sed			    m, w->w_description, file, line);
1306192830Sed#ifdef DDB
1307192830Sed			go_into_ddb = 1;
1308192830Sed#endif /* DDB */
1309192830Sed			goto out;
1310192830Sed		}
1311192830Sed	}
1312192830Sed	m1 = LIST_FIRST(&p->p_heldmtx);
1313192830Sed	if (!itismychild(m1->mtx_witness, w))
1314192830Sed		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1315192830Sed
1316192830Sedout:
1317192830Sed#ifdef DDB
1318192830Sed	if (witness_ddb && go_into_ddb)
1319192830Sed		Debugger("witness_enter");
1320192830Sed#endif /* DDB */
1321192830Sed	w->w_file = file;
1322192830Sed	w->w_line = line;
1323192830Sed	m->mtx_line = line;
1324192830Sed	m->mtx_file = file;
1325192830Sed
1326192830Sed	/*
1327192830Sed	 * If this pays off it likely means that a mutex being witnessed
1328192830Sed	 * is acquired in hardclock. Put it in the ignore list. It is
1329192830Sed	 * likely not the mutex this assert fails on.
1330192830Sed	 */
1331192830Sed	MPASS(m->mtx_held.le_prev == NULL);
1332192830Sed	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
1333192830Sed}
1334192830Sed
1335192830Sedvoid
1336192830Sedwitness_try_enter(struct mtx *m, int flags, const char *file, int line)
1337192830Sed{
1338192830Sed	struct proc *p;
1339192830Sed	struct witness *w = m->mtx_witness;
1340192830Sed
1341192830Sed	if (witness_cold)
1342192830Sed		return;
1343192830Sed	if (panicstr)
1344192830Sed		return;
1345192830Sed	if (flags & MTX_SPIN) {
1346192830Sed		if (!(w->w_spin))
1347192830Sed			panic("mutex_try_enter: "
1348192830Sed			    "MTX_SPIN on MTX_DEF mutex %s @ %s:%d",
1349192830Sed			    m->mtx_description, file, line);
1350192830Sed		if (mtx_recursed(m)) {
1351192830Sed			if (!(w->w_recurse))
1352192830Sed				panic("mutex_try_enter: recursion on"
1353192830Sed				    " non-recursive mutex %s @ %s:%d",
1354192830Sed				    m->mtx_description, file, line);
1355192830Sed			return;
1356192830Sed		}
1357192830Sed		mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
1358192830Sed		PCPU_SET(witness_spin_check,
1359192830Sed		    PCPU_GET(witness_spin_check) | w->w_level);
1360192830Sed		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1361192830Sed		w->w_file = file;
1362192830Sed		w->w_line = line;
1363192830Sed		m->mtx_line = line;
1364192830Sed		m->mtx_file = file;
1365192830Sed		return;
1366192830Sed	}
1367192830Sed
1368192830Sed	if (w->w_spin)
1369192914Sed		panic("mutex_try_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1370192830Sed		    m->mtx_description, file, line);
1371192830Sed
1372192830Sed	if (mtx_recursed(m)) {
1373192830Sed		if (!(w->w_recurse))
1374192830Sed			panic("mutex_try_enter: recursion on non-recursive"
1375192830Sed			    " mutex %s @ %s:%d", m->mtx_description, file,
1376192830Sed			    line);
1377192830Sed		return;
1378192830Sed	}
1379192830Sed	w->w_file = file;
1380192830Sed	w->w_line = line;
1381192830Sed	m->mtx_line = line;
1382192830Sed	m->mtx_file = file;
1383192830Sed	p = CURPROC;
1384192830Sed	MPASS(m->mtx_held.le_prev == NULL);
1385192830Sed	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
1386192830Sed}
1387192830Sed
1388192830Sedvoid
1389192830Sedwitness_exit(struct mtx *m, int flags, const char *file, int line)
1390192830Sed{
1391192830Sed	struct witness *w;
1392192830Sed
1393192830Sed	if (witness_cold || m->mtx_witness == NULL || panicstr)
1394192830Sed		return;
1395192830Sed	w = m->mtx_witness;
1396192830Sed
1397192830Sed	if (flags & MTX_SPIN) {
1398192830Sed		if (!(w->w_spin))
1399192830Sed			panic("mutex_exit: MTX_SPIN on MTX_DEF mutex %s @"
1400192830Sed			    " %s:%d", m->mtx_description, file, line);
1401192830Sed		if (mtx_recursed(m)) {
1402192830Sed			if (!(w->w_recurse))
1403192830Sed				panic("mutex_exit: recursion on non-recursive"
1404192830Sed				    " mutex %s @ %s:%d", m->mtx_description,
1405192830Sed				    file, line);
1406192830Sed			return;
1407192830Sed		}
1408192830Sed		mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
1409192830Sed		PCPU_SET(witness_spin_check,
1410192830Sed		    PCPU_GET(witness_spin_check) & ~w->w_level);
1411192830Sed		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1412192830Sed		return;
1413192830Sed	}
1414192830Sed	if (w->w_spin)
1415192830Sed		panic("mutex_exit: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1416192830Sed		    m->mtx_description, file, line);
1417192830Sed
1418192830Sed	if (mtx_recursed(m)) {
1419192830Sed		if (!(w->w_recurse))
1420192830Sed			panic("mutex_exit: recursion on non-recursive"
1421192830Sed			    " mutex %s @ %s:%d", m->mtx_description,
1422192830Sed			    file, line);
1423192830Sed		return;
1424192830Sed	}
1425192830Sed
1426192830Sed	if ((flags & MTX_NOSWITCH) == 0 && !mtx_legal2block() && !cold)
1427192830Sed		panic("switchable mtx_exit() of %s when not legal @ %s:%d",
1428192830Sed			    m->mtx_description, file, line);
1429192830Sed	LIST_REMOVE(m, mtx_held);
1430192830Sed	m->mtx_held.le_prev = NULL;
1431192830Sed}
1432192830Sed
1433192830Sedint
1434192830Sedwitness_sleep(int check_only, struct mtx *mtx, const char *file, int line)
1435192830Sed{
1436192830Sed	struct mtx *m;
1437192830Sed	struct proc *p;
1438192830Sed	char **sleep;
1439192830Sed	int n = 0;
1440192830Sed
1441192830Sed	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1442192830Sed	p = CURPROC;
1443192830Sed	for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
1444192830Sed	    m = LIST_NEXT(m, mtx_held)) {
1445192830Sed		if (m == mtx)
1446192830Sed			continue;
1447192830Sed		for (sleep = sleep_list; *sleep!= NULL; sleep++)
1448192830Sed			if (strcmp(m->mtx_description, *sleep) == 0)
1449192830Sed				goto next;
1450192830Sed		printf("%s:%d: %s with \"%s\" locked from %s:%d\n",
1451192830Sed			file, line, check_only ? "could sleep" : "sleeping",
1452192830Sed			m->mtx_description,
1453192830Sed			m->mtx_witness->w_file, m->mtx_witness->w_line);
1454192830Sed		n++;
1455192830Sed	next:
1456192830Sed	}
1457192830Sed#ifdef DDB
1458192830Sed	if (witness_ddb && n)
1459192830Sed		Debugger("witness_sleep");
1460192830Sed#endif /* DDB */
1461192830Sed	return (n);
1462192830Sed}
1463192830Sed
1464192830Sedstatic struct witness *
1465192830Sedenroll(const char *description, int flag)
1466192830Sed{
1467192830Sed	int i;
1468192830Sed	struct witness *w, *w1;
1469192830Sed	char **ignore;
1470192830Sed	char **order;
1471192830Sed
1472192830Sed	if (!witness_watch)
1473192830Sed		return (NULL);
1474192830Sed	for (ignore = ignore_list; *ignore != NULL; ignore++)
1475192830Sed		if (strcmp(description, *ignore) == 0)
1476192830Sed			return (NULL);
1477192914Sed
1478192830Sed	if (w_inited == 0) {
1479192830Sed		mtx_init(&w_mtx, "witness lock", MTX_SPIN);
1480192914Sed		for (i = 0; i < WITNESS_COUNT; i++) {
1481192914Sed			w = &w_data[i];
1482192914Sed			witness_free(w);
1483192914Sed		}
1484192830Sed		w_inited = 1;
1485192830Sed		for (order = order_list; *order != NULL; order++) {
1486192830Sed			w = enroll(*order, MTX_DEF);
1487192830Sed			w->w_file = "order list";
1488192830Sed			for (order++; *order != NULL; order++) {
1489192830Sed				w1 = enroll(*order, MTX_DEF);
1490192830Sed				w1->w_file = "order list";
1491192830Sed				itismychild(w, w1);
1492192830Sed				w = w1;
1493192830Sed    	    	    	}
1494192830Sed		}
1495192830Sed	}
1496192830Sed	if ((flag & MTX_SPIN) && witness_skipspin)
1497192830Sed		return (NULL);
1498192830Sed	mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
1499192830Sed	for (w = w_all; w; w = w->w_next) {
1500192830Sed		if (strcmp(description, w->w_description) == 0) {
1501192830Sed			mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1502192830Sed			return (w);
1503192830Sed		}
1504192830Sed	}
1505192830Sed	if ((w = witness_get()) == NULL)
1506192830Sed		return (NULL);
1507192830Sed	w->w_next = w_all;
1508192830Sed	w_all = w;
1509192830Sed	w->w_description = description;
1510192830Sed	mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1511192830Sed	if (flag & MTX_SPIN) {
1512192830Sed		w->w_spin = 1;
1513192830Sed
1514192830Sed		i = 1;
1515192830Sed		for (order = spin_order_list; *order != NULL; order++) {
1516192830Sed			if (strcmp(description, *order) == 0)
1517192830Sed				break;
1518192830Sed			i <<= 1;
1519192830Sed		}
1520192830Sed		if (*order == NULL)
1521192830Sed			panic("spin lock %s not in order list", description);
1522192830Sed		w->w_level = i;
1523192830Sed	} else
1524192830Sed		w->w_sleep = 1;
1525192830Sed
1526192830Sed	if (flag & MTX_RECURSE)
1527192830Sed		w->w_recurse = 1;
1528192830Sed
1529192830Sed	return (w);
1530192830Sed}
1531192830Sed
1532192830Sedstatic int
1533192830Seditismychild(struct witness *parent, struct witness *child)
1534192830Sed{
1535192830Sed	static int recursed;
1536192830Sed
1537192830Sed	/*
1538192830Sed	 * Insert "child" after "parent"
1539192830Sed	 */
1540192830Sed	while (parent->w_morechildren)
1541192830Sed		parent = parent->w_morechildren;
1542192830Sed
1543192830Sed	if (parent->w_childcnt == WITNESS_NCHILDREN) {
1544192830Sed		if ((parent->w_morechildren = witness_get()) == NULL)
1545192830Sed			return (1);
1546192830Sed		parent = parent->w_morechildren;
1547192830Sed	}
1548192830Sed	MPASS(child != NULL);
1549192830Sed	parent->w_children[parent->w_childcnt++] = child;
1550192830Sed	/*
1551192830Sed	 * now prune whole tree
1552192830Sed	 */
1553192830Sed	if (recursed)
1554192830Sed		return (0);
1555192830Sed	recursed = 1;
1556192830Sed	for (child = w_all; child != NULL; child = child->w_next) {
1557192830Sed		for (parent = w_all; parent != NULL;
1558192830Sed		    parent = parent->w_next) {
1559192830Sed			if (!isitmychild(parent, child))
1560192830Sed				continue;
1561192830Sed			removechild(parent, child);
1562192830Sed			if (isitmydescendant(parent, child))
1563192830Sed				continue;
1564192830Sed			itismychild(parent, child);
1565192830Sed		}
1566192830Sed	}
1567192830Sed	recursed = 0;
1568192830Sed	witness_levelall();
1569192830Sed	return (0);
1570192830Sed}
1571192830Sed
1572192830Sedstatic void
1573192830Sedremovechild(struct witness *parent, struct witness *child)
1574192830Sed{
1575192830Sed	struct witness *w, *w1;
1576192830Sed	int i;
1577192830Sed
1578192830Sed	for (w = parent; w != NULL; w = w->w_morechildren)
1579192830Sed		for (i = 0; i < w->w_childcnt; i++)
1580192830Sed			if (w->w_children[i] == child)
1581192830Sed				goto found;
1582192830Sed	return;
1583192830Sedfound:
1584192830Sed	for (w1 = w; w1->w_morechildren != NULL; w1 = w1->w_morechildren)
1585192830Sed		continue;
1586192830Sed	w->w_children[i] = w1->w_children[--w1->w_childcnt];
1587192830Sed	MPASS(w->w_children[i] != NULL);
1588192830Sed
1589192830Sed	if (w1->w_childcnt != 0)
1590192830Sed		return;
1591192830Sed
1592192830Sed	if (w1 == parent)
1593192830Sed		return;
1594192830Sed	for (w = parent; w->w_morechildren != w1; w = w->w_morechildren)
1595192830Sed		continue;
1596192830Sed	w->w_morechildren = 0;
1597192830Sed	witness_free(w1);
1598192830Sed}
1599192830Sed
1600192830Sedstatic int
1601192830Sedisitmychild(struct witness *parent, struct witness *child)
1602192830Sed{
1603192830Sed	struct witness *w;
1604192830Sed	int i;
1605192830Sed
1606192830Sed	for (w = parent; w != NULL; w = w->w_morechildren) {
1607192830Sed		for (i = 0; i < w->w_childcnt; i++) {
1608192830Sed			if (w->w_children[i] == child)
1609192830Sed				return (1);
1610192830Sed		}
1611192830Sed	}
1612192830Sed	return (0);
1613192830Sed}
1614192830Sed
1615192830Sedstatic int
1616192830Sedisitmydescendant(struct witness *parent, struct witness *child)
1617192830Sed{
1618192830Sed	struct witness *w;
1619192830Sed	int i;
1620192830Sed	int j;
1621192830Sed
1622192830Sed	for (j = 0, w = parent; w != NULL; w = w->w_morechildren, j++) {
1623192830Sed		MPASS(j < 1000);
1624192830Sed		for (i = 0; i < w->w_childcnt; i++) {
1625192830Sed			if (w->w_children[i] == child)
1626192830Sed				return (1);
1627192830Sed		}
1628192830Sed		for (i = 0; i < w->w_childcnt; i++) {
1629192830Sed			if (isitmydescendant(w->w_children[i], child))
1630192830Sed				return (1);
1631192830Sed		}
1632192830Sed	}
1633192830Sed	return (0);
1634192830Sed}
1635192830Sed
1636192830Sedvoid
1637192830Sedwitness_levelall (void)
1638192830Sed{
1639192830Sed	struct witness *w, *w1;
1640192830Sed
1641192830Sed	for (w = w_all; w; w = w->w_next)
1642192830Sed		if (!(w->w_spin))
1643192830Sed			w->w_level = 0;
1644192830Sed	for (w = w_all; w; w = w->w_next) {
1645192830Sed		if (w->w_spin)
1646192830Sed			continue;
1647192830Sed		for (w1 = w_all; w1; w1 = w1->w_next) {
1648192830Sed			if (isitmychild(w1, w))
1649192830Sed				break;
1650192830Sed		}
1651192830Sed		if (w1 != NULL)
1652192830Sed			continue;
1653192830Sed		witness_leveldescendents(w, 0);
1654192830Sed	}
1655192830Sed}
1656192830Sed
1657192830Sedstatic void
1658192830Sedwitness_leveldescendents(struct witness *parent, int level)
1659192830Sed{
1660192830Sed	int i;
1661192830Sed	struct witness *w;
1662192830Sed
1663192830Sed	if (parent->w_level < level)
1664192830Sed		parent->w_level = level;
1665192830Sed	level++;
1666192830Sed	for (w = parent; w != NULL; w = w->w_morechildren)
1667192830Sed		for (i = 0; i < w->w_childcnt; i++)
1668192830Sed			witness_leveldescendents(w->w_children[i], level);
1669192830Sed}
1670192830Sed
1671192830Sedstatic void
1672192830Sedwitness_displaydescendants(void(*prnt)(const char *fmt, ...),
1673192830Sed			   struct witness *parent)
1674192830Sed{
1675192830Sed	struct witness *w;
1676192830Sed	int i;
1677192830Sed	int level = parent->w_level;
1678192830Sed
1679192830Sed	prnt("%d", level);
1680192830Sed	if (level < 10)
1681192830Sed		prnt(" ");
1682192830Sed	for (i = 0; i < level; i++)
1683192830Sed		prnt(" ");
1684192830Sed	prnt("%s", parent->w_description);
1685192830Sed	if (parent->w_file != NULL) {
1686192830Sed		prnt(" -- last acquired @ %s", parent->w_file);
1687192830Sed#ifndef W_USE_WHERE
1688192830Sed		prnt(":%d", parent->w_line);
1689192830Sed#endif
1690192830Sed		prnt("\n");
1691192830Sed	}
1692192830Sed
1693192830Sed	for (w = parent; w != NULL; w = w->w_morechildren)
1694192830Sed		for (i = 0; i < w->w_childcnt; i++)
1695192830Sed			    witness_displaydescendants(prnt, w->w_children[i]);
1696192830Sed    }
1697192830Sed
1698192830Sedstatic int
1699192830Seddup_ok(struct witness *w)
1700192830Sed{
1701192830Sed	char **dup;
1702192830Sed
1703192830Sed	for (dup = dup_list; *dup!= NULL; dup++)
1704192830Sed		if (strcmp(w->w_description, *dup) == 0)
1705192830Sed			return (1);
1706192830Sed	return (0);
1707192830Sed}
1708192830Sed
1709192830Sedstatic int
1710192830Sedblessed(struct witness *w1, struct witness *w2)
1711192830Sed{
1712192830Sed	int i;
1713192830Sed	struct witness_blessed *b;
1714192830Sed
1715192830Sed	for (i = 0; i < blessed_count; i++) {
1716192830Sed		b = &blessed_list[i];
1717192830Sed		if (strcmp(w1->w_description, b->b_lock1) == 0) {
1718192830Sed			if (strcmp(w2->w_description, b->b_lock2) == 0)
1719192830Sed				return (1);
1720192830Sed			continue;
1721192830Sed		}
1722192830Sed		if (strcmp(w1->w_description, b->b_lock2) == 0)
1723192830Sed			if (strcmp(w2->w_description, b->b_lock1) == 0)
1724192830Sed				return (1);
1725192830Sed	}
1726192830Sed	return (0);
1727192830Sed}
1728192830Sed
1729192830Sedstatic struct witness *
1730192830Sedwitness_get()
1731192830Sed{
1732192830Sed	struct witness *w;
1733192830Sed
1734192830Sed	if ((w = w_free) == NULL) {
1735192830Sed		witness_dead = 1;
1736192830Sed		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1737192830Sed		printf("witness exhausted\n");
1738192830Sed		return (NULL);
1739192830Sed	}
1740192830Sed	w_free = w->w_next;
1741192830Sed	bzero(w, sizeof(*w));
1742192830Sed	return (w);
1743192830Sed}
1744192830Sed
1745192830Sedstatic void
1746192830Sedwitness_free(struct witness *w)
1747192830Sed{
1748192830Sed	w->w_next = w_free;
1749192830Sed	w_free = w;
1750192830Sed}
1751192830Sed
1752192830Sedint
1753192830Sedwitness_list(struct proc *p)
1754192830Sed{
1755192830Sed	struct mtx *m;
1756192830Sed	int nheld;
1757192830Sed
1758192830Sed	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1759192830Sed	nheld = 0;
1760192830Sed	for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
1761192830Sed	    m = LIST_NEXT(m, mtx_held)) {
1762192830Sed		printf("\t\"%s\" (%p) locked at %s:%d\n",
1763192830Sed		    m->mtx_description, m,
1764192830Sed		    m->mtx_witness->w_file, m->mtx_witness->w_line);
1765192830Sed		nheld++;
1766192830Sed	}
1767192830Sed
1768192830Sed	return (nheld);
1769192830Sed}
1770192830Sed
1771192830Sedvoid
1772192830Sedwitness_save(struct mtx *m, const char **filep, int *linep)
1773192830Sed{
1774192830Sed
1775192830Sed	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1776192830Sed	if (m->mtx_witness == NULL)
1777192830Sed		return;
1778192830Sed
1779192830Sed	*filep = m->mtx_witness->w_file;
1780192830Sed	*linep = m->mtx_witness->w_line;
1781192830Sed}
1782192830Sed
1783192830Sedvoid
1784192830Sedwitness_restore(struct mtx *m, const char *file, int line)
1785192830Sed{
1786192830Sed
1787192830Sed	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1788192830Sed	if (m->mtx_witness == NULL)
1789192830Sed		return;
1790192830Sed
1791192830Sed	m->mtx_witness->w_file = file;
1792192830Sed	m->mtx_witness->w_line = line;
1793192830Sed}
1794192830Sed
1795192830Sed#endif	/* WITNESS */
1796192830Sed