subr_witness.c revision 71560
1/*-
2 * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 * 3. Berkeley Software Design Inc's name may not be used to endorse or
13 *    promote products derived from this software without specific prior
14 *    written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
29 *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
30 * $FreeBSD: head/sys/kern/subr_witness.c 71560 2001-01-24 10:57:01Z jhb $
31 */
32
33/*
34 *	Main Entry: witness
35 *	Pronunciation: 'wit-n&s
36 *	Function: noun
37 *	Etymology: Middle English witnesse, from Old English witnes knowledge,
38 *	    testimony, witness, from 2wit
39 *	Date: before 12th century
40 *	1 : attestation of a fact or event : TESTIMONY
41 *	2 : one that gives evidence; specifically : one who testifies in
42 *	    a cause or before a judicial tribunal
43 *	3 : one asked to be present at a transaction so as to be able to
44 *	    testify to its having taken place
45 *	4 : one who has personal knowledge of something
46 *	5 a : something serving as evidence or proof : SIGN
47 *	  b : public affirmation by word or example of usually
48 *	      religious faith or conviction <the heroic witness to divine
49 *	      life -- Pilot>
50 *	6 capitalized : a member of the Jehovah's Witnesses
51 */
52
53#include "opt_ddb.h"
54#include "opt_witness.h"
55
56/*
57 * Cause non-inlined mtx_*() to be compiled.
58 * Must be defined early because other system headers may include mutex.h.
59 */
60#define _KERN_MUTEX_C_
61
62#include <sys/param.h>
63#include <sys/bus.h>
64#include <sys/kernel.h>
65#include <sys/malloc.h>
66#include <sys/proc.h>
67#include <sys/sysctl.h>
68#include <sys/systm.h>
69#include <sys/vmmeter.h>
70#include <sys/ktr.h>
71
72#include <machine/atomic.h>
73#include <machine/bus.h>
74#include <machine/clock.h>
75#include <machine/cpu.h>
76
77#include <ddb/ddb.h>
78
79#include <vm/vm.h>
80#include <vm/vm_extern.h>
81
82#include <sys/mutex.h>
83
84/*
85 * Machine independent bits of the mutex implementation
86 */
87
88#ifdef WITNESS
89struct mtx_debug {
90	struct witness	*mtxd_witness;
91	LIST_ENTRY(mtx)	mtxd_held;
92	const char	*mtxd_file;
93	int		mtxd_line;
94};
95
96#define mtx_held	mtx_debug->mtxd_held
97#define	mtx_file	mtx_debug->mtxd_file
98#define	mtx_line	mtx_debug->mtxd_line
99#define	mtx_witness	mtx_debug->mtxd_witness
100#endif	/* WITNESS */
101
102/*
103 * Assembly macros
104 *------------------------------------------------------------------------------
105 */
106
107#define	_V(x)	__STRING(x)
108
109/*
110 * Default, unoptimized mutex micro-operations
111 */
112
113#ifndef _obtain_lock
114/* Actually obtain mtx_lock */
115#define _obtain_lock(mp, tid)						\
116	atomic_cmpset_acq_ptr(&(mp)->mtx_lock, (void *)MTX_UNOWNED, (tid))
117#endif
118
119#ifndef _release_lock
120/* Actually release mtx_lock */
121#define _release_lock(mp, tid)		       				\
122	atomic_cmpset_rel_ptr(&(mp)->mtx_lock, (tid), (void *)MTX_UNOWNED)
123#endif
124
125#ifndef _release_lock_quick
126/* Actually release mtx_lock quickly assuming that we own it */
127#define	_release_lock_quick(mp) 					\
128	atomic_store_rel_ptr(&(mp)->mtx_lock, (void *)MTX_UNOWNED)
129#endif
130
131#ifndef _getlock_sleep
132/* Get a sleep lock, deal with recursion inline. */
133#define	_getlock_sleep(mp, tid, type) do {				\
134	if (!_obtain_lock(mp, tid)) {					\
135		if (((mp)->mtx_lock & MTX_FLAGMASK) != ((uintptr_t)(tid)))\
136			mtx_enter_hard(mp, (type) & MTX_HARDOPTS, 0);	\
137		else {							\
138			atomic_set_ptr(&(mp)->mtx_lock, MTX_RECURSED);	\
139			(mp)->mtx_recurse++;				\
140		}							\
141	}								\
142} while (0)
143#endif
144
145#ifndef _getlock_spin_block
146/* Get a spin lock, handle recursion inline (as the less common case) */
147#define	_getlock_spin_block(mp, tid, type) do {				\
148	u_int _mtx_intr = save_intr();					\
149	disable_intr();							\
150	if (!_obtain_lock(mp, tid))					\
151		mtx_enter_hard(mp, (type) & MTX_HARDOPTS, _mtx_intr);	\
152	else								\
153		(mp)->mtx_saveintr = _mtx_intr;				\
154} while (0)
155#endif
156
157#ifndef _getlock_norecurse
158/*
159 * Get a lock without any recursion handling. Calls the hard enter function if
160 * we can't get it inline.
161 */
162#define	_getlock_norecurse(mp, tid, type) do {				\
163	if (!_obtain_lock(mp, tid))					\
164		mtx_enter_hard((mp), (type) & MTX_HARDOPTS, 0);		\
165} while (0)
166#endif
167
168#ifndef _exitlock_norecurse
169/*
170 * Release a sleep lock assuming we haven't recursed on it, recursion is handled
171 * in the hard function.
172 */
173#define	_exitlock_norecurse(mp, tid, type) do {				\
174	if (!_release_lock(mp, tid))					\
175		mtx_exit_hard((mp), (type) & MTX_HARDOPTS);		\
176} while (0)
177#endif
178
179#ifndef _exitlock
180/*
181 * Release a sleep lock when its likely we recursed (the code to
182 * deal with simple recursion is inline).
183 */
184#define	_exitlock(mp, tid, type) do {					\
185	if (!_release_lock(mp, tid)) {					\
186		if ((mp)->mtx_lock & MTX_RECURSED) {			\
187			if (--((mp)->mtx_recurse) == 0)			\
188				atomic_clear_ptr(&(mp)->mtx_lock,	\
189				    MTX_RECURSED);			\
190		} else {						\
191			mtx_exit_hard((mp), (type) & MTX_HARDOPTS);	\
192		}							\
193	}								\
194} while (0)
195#endif
196
197#ifndef _exitlock_spin
198/* Release a spin lock (with possible recursion). */
199#define	_exitlock_spin(mp) do {						\
200	if (!mtx_recursed((mp))) {					\
201		int _mtx_intr = (mp)->mtx_saveintr;			\
202									\
203		_release_lock_quick(mp);				\
204		restore_intr(_mtx_intr);				\
205	} else {							\
206		(mp)->mtx_recurse--;					\
207	}								\
208} while (0)
209#endif
210
211#ifdef WITNESS
212static void	witness_init(struct mtx *, int flag);
213static void	witness_destroy(struct mtx *);
214static void	witness_display(void(*)(const char *fmt, ...));
215
216/* All mutexes in system (used for debug/panic) */
217static struct mtx_debug all_mtx_debug = { NULL, {NULL, NULL}, NULL, 0 };
218/*
219 * Set to 0 once mutexes have been fully initialized so that witness code can be
220 * safely executed.
221 */
222static int witness_cold = 1;
223#else	/* WITNESS */
224
225/*
226 * flag++ is slezoid way of shutting up unused parameter warning
227 * in mtx_init()
228 */
229#define witness_init(m, flag) flag++
230#define witness_destroy(m)
231#define witness_try_enter(m, t, f, l)
232#endif	/* WITNESS */
233
234/* All mutexes in system (used for debug/panic) */
235static struct mtx all_mtx = { MTX_UNOWNED, 0, 0, 0, "All mutexes queue head",
236	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
237	{ NULL, NULL }, &all_mtx, &all_mtx,
238#ifdef WITNESS
239	&all_mtx_debug
240#else
241	NULL
242#endif
243	 };
244
245static int	mtx_cur_cnt;
246static int	mtx_max_cnt;
247
248static void	propagate_priority(struct proc *);
249static void	mtx_enter_hard(struct mtx *, int type, int saveintr);
250static void	mtx_exit_hard(struct mtx *, int type);
251
252#define	mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
253#define	mtx_owner(m)	(mtx_unowned(m) ? NULL \
254			    : (struct proc *)((m)->mtx_lock & MTX_FLAGMASK))
255
256#define RETIP(x)		*(((uintptr_t *)(&x)) - 1)
257#define	SET_PRIO(p, pri)	(p)->p_priority = (pri)
258
259static void
260propagate_priority(struct proc *p)
261{
262	int pri = p->p_priority;
263	struct mtx *m = p->p_blocked;
264
265	mtx_assert(&sched_lock, MA_OWNED);
266	for (;;) {
267		struct proc *p1;
268
269		p = mtx_owner(m);
270
271		if (p == NULL) {
272			/*
273			 * This really isn't quite right. Really
274			 * ought to bump priority of process that
275			 * next acquires the mutex.
276			 */
277			MPASS(m->mtx_lock == MTX_CONTESTED);
278			return;
279		}
280		MPASS(p->p_magic == P_MAGIC);
281		KASSERT(p->p_stat != SSLEEP, ("sleeping process owns a mutex"));
282		if (p->p_priority <= pri)
283			return;
284
285		/*
286		 * Bump this process' priority.
287		 */
288		SET_PRIO(p, pri);
289
290		/*
291		 * If lock holder is actually running, just bump priority.
292		 */
293#ifdef SMP
294		/*
295		 * For SMP, we can check the p_oncpu field to see if we are
296		 * running.
297		 */
298		if (p->p_oncpu != 0xff) {
299			MPASS(p->p_stat == SRUN || p->p_stat == SZOMB);
300			return;
301		}
302#else
303		/*
304		 * For UP, we check to see if p is curproc (this shouldn't
305		 * ever happen however as it would mean we are in a deadlock.)
306		 */
307		if (p == curproc) {
308			panic("Deadlock detected");
309			return;
310		}
311#endif
312		/*
313		 * If on run queue move to new run queue, and
314		 * quit.
315		 */
316		if (p->p_stat == SRUN) {
317			printf("XXX: moving process %d(%s) to a new run queue\n",
318			       p->p_pid, p->p_comm);
319			MPASS(p->p_blocked == NULL);
320			remrunqueue(p);
321			setrunqueue(p);
322			return;
323		}
324
325		/*
326		 * If we aren't blocked on a mutex, we should be.
327		 */
328		KASSERT(p->p_stat == SMTX, (
329		    "process %d(%s):%d holds %s but isn't blocked on a mutex\n",
330		    p->p_pid, p->p_comm, p->p_stat,
331		    m->mtx_description));
332
333		/*
334		 * Pick up the mutex that p is blocked on.
335		 */
336		m = p->p_blocked;
337		MPASS(m != NULL);
338
339		printf("XXX: process %d(%s) is blocked on %s\n", p->p_pid,
340		    p->p_comm, m->mtx_description);
341		/*
342		 * Check if the proc needs to be moved up on
343		 * the blocked chain
344		 */
345		if (p == TAILQ_FIRST(&m->mtx_blocked)) {
346			printf("XXX: process at head of run queue\n");
347			continue;
348		}
349		p1 = TAILQ_PREV(p, rq, p_procq);
350		if (p1->p_priority <= pri) {
351			printf(
352	"XXX: previous process %d(%s) has higher priority\n",
353	                    p->p_pid, p->p_comm);
354			continue;
355		}
356
357		/*
358		 * Remove proc from blocked chain and determine where
359		 * it should be moved up to.  Since we know that p1 has
360		 * a lower priority than p, we know that at least one
361		 * process in the chain has a lower priority and that
362		 * p1 will thus not be NULL after the loop.
363		 */
364		TAILQ_REMOVE(&m->mtx_blocked, p, p_procq);
365		TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) {
366			MPASS(p1->p_magic == P_MAGIC);
367			if (p1->p_priority > pri)
368				break;
369		}
370		MPASS(p1 != NULL);
371		TAILQ_INSERT_BEFORE(p1, p, p_procq);
372		CTR4(KTR_LOCK,
373		    "propagate_priority: p %p moved before %p on [%p] %s",
374		    p, p1, m, m->mtx_description);
375	}
376}
377
378/*
379 * Get lock 'm', the macro handles the easy (and most common cases) and leaves
380 * the slow stuff to the mtx_enter_hard() function.
381 *
382 * Note: since type is usually a constant much of this code is optimized out.
383 */
384void
385_mtx_enter(struct mtx *mtxp, int type, const char *file, int line)
386{
387	struct mtx	*mpp = mtxp;
388
389	/* bits only valid on mtx_exit() */
390	MPASS4(((type) & (MTX_NORECURSE | MTX_NOSWITCH)) == 0,
391	    STR_mtx_bad_type, file, line);
392
393	if ((type) & MTX_SPIN) {
394		/*
395		 * Easy cases of spin locks:
396		 *
397		 * 1) We already own the lock and will simply recurse on it (if
398		 *    RLIKELY)
399		 *
400		 * 2) The lock is free, we just get it
401		 */
402		if ((type) & MTX_RLIKELY) {
403			/*
404			 * Check for recursion, if we already have this
405			 * lock we just bump the recursion count.
406			 */
407			if (mpp->mtx_lock == (uintptr_t)CURTHD) {
408				mpp->mtx_recurse++;
409				goto done;
410			}
411		}
412
413		if (((type) & MTX_TOPHALF) == 0) {
414			/*
415			 * If an interrupt thread uses this we must block
416			 * interrupts here.
417			 */
418			if ((type) & MTX_FIRST) {
419				ASS_IEN;
420				disable_intr();
421				_getlock_norecurse(mpp, CURTHD,
422				    (type) & MTX_HARDOPTS);
423			} else {
424				_getlock_spin_block(mpp, CURTHD,
425				    (type) & MTX_HARDOPTS);
426			}
427		} else
428			_getlock_norecurse(mpp, CURTHD, (type) & MTX_HARDOPTS);
429	} else {
430		/* Sleep locks */
431		if ((type) & MTX_RLIKELY)
432			_getlock_sleep(mpp, CURTHD, (type) & MTX_HARDOPTS);
433		else
434			_getlock_norecurse(mpp, CURTHD, (type) & MTX_HARDOPTS);
435	}
436done:
437	WITNESS_ENTER(mpp, type, file, line);
438	if (((type) & MTX_QUIET) == 0)
439		CTR5(KTR_LOCK, STR_mtx_enter_fmt,
440		    mpp->mtx_description, mpp, mpp->mtx_recurse, file, line);
441
442}
443
444/*
445 * Attempt to get MTX_DEF lock, return non-zero if lock acquired.
446 *
447 * XXX DOES NOT HANDLE RECURSION
448 */
449int
450_mtx_try_enter(struct mtx *mtxp, int type, const char *file, int line)
451{
452	struct mtx	*const mpp = mtxp;
453	int	rval;
454
455	rval = _obtain_lock(mpp, CURTHD);
456#ifdef WITNESS
457	if (rval && mpp->mtx_witness != NULL) {
458		MPASS(mpp->mtx_recurse == 0);
459		witness_try_enter(mpp, type, file, line);
460	}
461#endif	/* WITNESS */
462	if (((type) & MTX_QUIET) == 0)
463		CTR5(KTR_LOCK, STR_mtx_try_enter_fmt,
464		    mpp->mtx_description, mpp, rval, file, line);
465
466	return rval;
467}
468
469/*
470 * Release lock m.
471 */
472void
473_mtx_exit(struct mtx *mtxp, int type, const char *file, int line)
474{
475	struct mtx	*const mpp = mtxp;
476
477	MPASS4(mtx_owned(mpp), STR_mtx_owned, file, line);
478	WITNESS_EXIT(mpp, type, file, line);
479	if (((type) & MTX_QUIET) == 0)
480		CTR5(KTR_LOCK, STR_mtx_exit_fmt,
481		    mpp->mtx_description, mpp, mpp->mtx_recurse, file, line);
482	if ((type) & MTX_SPIN) {
483		if ((type) & MTX_NORECURSE) {
484			int mtx_intr = mpp->mtx_saveintr;
485
486			MPASS4(mpp->mtx_recurse == 0, STR_mtx_recurse,
487			    file, line);
488			_release_lock_quick(mpp);
489			if (((type) & MTX_TOPHALF) == 0) {
490				if ((type) & MTX_FIRST) {
491					ASS_IDIS;
492					enable_intr();
493				} else
494					restore_intr(mtx_intr);
495			}
496		} else {
497			if (((type & MTX_TOPHALF) == 0) &&
498			    (type & MTX_FIRST)) {
499				ASS_IDIS;
500				ASS_SIEN(mpp);
501			}
502			_exitlock_spin(mpp);
503		}
504	} else {
505		/* Handle sleep locks */
506		if ((type) & MTX_RLIKELY)
507			_exitlock(mpp, CURTHD, (type) & MTX_HARDOPTS);
508		else {
509			_exitlock_norecurse(mpp, CURTHD,
510			    (type) & MTX_HARDOPTS);
511		}
512	}
513}
514
515void
516mtx_enter_hard(struct mtx *m, int type, int saveintr)
517{
518	struct proc *p = CURPROC;
519
520	KASSERT(p != NULL, ("curproc is NULL in mutex"));
521
522	switch (type) {
523	case MTX_DEF:
524		if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)p) {
525			m->mtx_recurse++;
526			atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
527			if ((type & MTX_QUIET) == 0)
528				CTR1(KTR_LOCK, "mtx_enter: %p recurse", m);
529			return;
530		}
531		if ((type & MTX_QUIET) == 0)
532			CTR3(KTR_LOCK,
533			    "mtx_enter: %p contested (lock=%p) [%p]",
534			    m, (void *)m->mtx_lock, (void *)RETIP(m));
535
536		/*
537		 * Save our priority.  Even though p_nativepri is protected
538		 * by sched_lock, we don't obtain it here as it can be
539		 * expensive.  Since this is the only place p_nativepri is
540		 * set, and since two CPUs will not be executing the same
541		 * process concurrently, we know that no other CPU is going
542		 * to be messing with this.  Also, p_nativepri is only read
543		 * when we are blocked on a mutex, so that can't be happening
544		 * right now either.
545		 */
546		p->p_nativepri = p->p_priority;
547		while (!_obtain_lock(m, p)) {
548			uintptr_t v;
549			struct proc *p1;
550
551			mtx_enter(&sched_lock, MTX_SPIN | MTX_RLIKELY);
552			/*
553			 * check if the lock has been released while
554			 * waiting for the schedlock.
555			 */
556			if ((v = m->mtx_lock) == MTX_UNOWNED) {
557				mtx_exit(&sched_lock, MTX_SPIN);
558				continue;
559			}
560			/*
561			 * The mutex was marked contested on release. This
562			 * means that there are processes blocked on it.
563			 */
564			if (v == MTX_CONTESTED) {
565				p1 = TAILQ_FIRST(&m->mtx_blocked);
566				KASSERT(p1 != NULL, ("contested mutex has no contesters"));
567				KASSERT(p != NULL, ("curproc is NULL for contested mutex"));
568				m->mtx_lock = (uintptr_t)p | MTX_CONTESTED;
569				if (p1->p_priority < p->p_priority) {
570					SET_PRIO(p, p1->p_priority);
571				}
572				mtx_exit(&sched_lock, MTX_SPIN);
573				return;
574			}
575			/*
576			 * If the mutex isn't already contested and
577			 * a failure occurs setting the contested bit the
578			 * mutex was either release or the
579			 * state of the RECURSION bit changed.
580			 */
581			if ((v & MTX_CONTESTED) == 0 &&
582			    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
583				               (void *)(v | MTX_CONTESTED))) {
584				mtx_exit(&sched_lock, MTX_SPIN);
585				continue;
586			}
587
588			/* We definitely have to sleep for this lock */
589			mtx_assert(m, MA_NOTOWNED);
590
591#ifdef notyet
592			/*
593			 * If we're borrowing an interrupted thread's VM
594			 * context must clean up before going to sleep.
595			 */
596			if (p->p_flag & (P_ITHD | P_SITHD)) {
597				ithd_t *it = (ithd_t *)p;
598
599				if (it->it_interrupted) {
600					if ((type & MTX_QUIET) == 0)
601						CTR2(KTR_LOCK,
602					    "mtx_enter: 0x%x interrupted 0x%x",
603						    it, it->it_interrupted);
604					intr_thd_fixup(it);
605				}
606			}
607#endif
608
609			/* Put us on the list of procs blocked on this mutex */
610			if (TAILQ_EMPTY(&m->mtx_blocked)) {
611				p1 = (struct proc *)(m->mtx_lock &
612						     MTX_FLAGMASK);
613				LIST_INSERT_HEAD(&p1->p_contested, m,
614						 mtx_contested);
615				TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
616			} else {
617				TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq)
618					if (p1->p_priority > p->p_priority)
619						break;
620				if (p1)
621					TAILQ_INSERT_BEFORE(p1, p, p_procq);
622				else
623					TAILQ_INSERT_TAIL(&m->mtx_blocked, p,
624							  p_procq);
625			}
626
627			p->p_blocked = m;	/* Who we're blocked on */
628			p->p_mtxname = m->mtx_description;
629			p->p_stat = SMTX;
630#if 0
631			propagate_priority(p);
632#endif
633			if ((type & MTX_QUIET) == 0)
634				CTR3(KTR_LOCK,
635				    "mtx_enter: p %p blocked on [%p] %s",
636				    p, m, m->mtx_description);
637			mi_switch();
638			if ((type & MTX_QUIET) == 0)
639				CTR3(KTR_LOCK,
640			    "mtx_enter: p %p free from blocked on [%p] %s",
641				    p, m, m->mtx_description);
642			mtx_exit(&sched_lock, MTX_SPIN);
643		}
644		return;
645	case MTX_SPIN:
646	case MTX_SPIN | MTX_FIRST:
647	case MTX_SPIN | MTX_TOPHALF:
648	    {
649		int i = 0;
650
651		if (m->mtx_lock == (uintptr_t)p) {
652			m->mtx_recurse++;
653			return;
654		}
655		if ((type & MTX_QUIET) == 0)
656			CTR1(KTR_LOCK, "mtx_enter: %p spinning", m);
657		for (;;) {
658			if (_obtain_lock(m, p))
659				break;
660			while (m->mtx_lock != MTX_UNOWNED) {
661				if (i++ < 1000000)
662					continue;
663				if (i++ < 6000000)
664					DELAY (1);
665#ifdef DDB
666				else if (!db_active)
667#else
668				else
669#endif
670					panic(
671				"spin lock %s held by %p for > 5 seconds",
672					    m->mtx_description,
673					    (void *)m->mtx_lock);
674			}
675		}
676
677#ifdef MUTEX_DEBUG
678		if (type != MTX_SPIN)
679			m->mtx_saveintr = 0xbeefface;
680		else
681#endif
682			m->mtx_saveintr = saveintr;
683		if ((type & MTX_QUIET) == 0)
684			CTR1(KTR_LOCK, "mtx_enter: %p spin done", m);
685		return;
686	    }
687	}
688}
689
690void
691mtx_exit_hard(struct mtx *m, int type)
692{
693	struct proc *p, *p1;
694	struct mtx *m1;
695	int pri;
696
697	p = CURPROC;
698	switch (type) {
699	case MTX_DEF:
700	case MTX_DEF | MTX_NOSWITCH:
701		if (mtx_recursed(m)) {
702			if (--(m->mtx_recurse) == 0)
703				atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
704			if ((type & MTX_QUIET) == 0)
705				CTR1(KTR_LOCK, "mtx_exit: %p unrecurse", m);
706			return;
707		}
708		mtx_enter(&sched_lock, MTX_SPIN);
709		if ((type & MTX_QUIET) == 0)
710			CTR1(KTR_LOCK, "mtx_exit: %p contested", m);
711		p1 = TAILQ_FIRST(&m->mtx_blocked);
712		MPASS(p->p_magic == P_MAGIC);
713		MPASS(p1->p_magic == P_MAGIC);
714		TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq);
715		if (TAILQ_EMPTY(&m->mtx_blocked)) {
716			LIST_REMOVE(m, mtx_contested);
717			_release_lock_quick(m);
718			if ((type & MTX_QUIET) == 0)
719				CTR1(KTR_LOCK, "mtx_exit: %p not held", m);
720		} else
721			atomic_store_rel_ptr(&m->mtx_lock,
722			    (void *)MTX_CONTESTED);
723		pri = MAXPRI;
724		LIST_FOREACH(m1, &p->p_contested, mtx_contested) {
725			int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_priority;
726			if (cp < pri)
727				pri = cp;
728		}
729		if (pri > p->p_nativepri)
730			pri = p->p_nativepri;
731		SET_PRIO(p, pri);
732		if ((type & MTX_QUIET) == 0)
733			CTR2(KTR_LOCK,
734			    "mtx_exit: %p contested setrunqueue %p", m, p1);
735		p1->p_blocked = NULL;
736		p1->p_mtxname = NULL;
737		p1->p_stat = SRUN;
738		setrunqueue(p1);
739		if ((type & MTX_NOSWITCH) == 0 && p1->p_priority < pri) {
740#ifdef notyet
741			if (p->p_flag & (P_ITHD | P_SITHD)) {
742				ithd_t *it = (ithd_t *)p;
743
744				if (it->it_interrupted) {
745					if ((type & MTX_QUIET) == 0)
746						CTR2(KTR_LOCK,
747					    "mtx_exit: 0x%x interruped 0x%x",
748						    it, it->it_interrupted);
749					intr_thd_fixup(it);
750				}
751			}
752#endif
753			setrunqueue(p);
754			if ((type & MTX_QUIET) == 0)
755				CTR2(KTR_LOCK,
756				    "mtx_exit: %p switching out lock=%p",
757				    m, (void *)m->mtx_lock);
758			mi_switch();
759			if ((type & MTX_QUIET) == 0)
760				CTR2(KTR_LOCK,
761				    "mtx_exit: %p resuming lock=%p",
762				    m, (void *)m->mtx_lock);
763		}
764		mtx_exit(&sched_lock, MTX_SPIN);
765		break;
766	case MTX_SPIN:
767	case MTX_SPIN | MTX_FIRST:
768		if (mtx_recursed(m)) {
769			m->mtx_recurse--;
770			return;
771		}
772		MPASS(mtx_owned(m));
773		_release_lock_quick(m);
774		if (type & MTX_FIRST)
775			enable_intr();	/* XXX is this kosher? */
776		else {
777			MPASS(m->mtx_saveintr != 0xbeefface);
778			restore_intr(m->mtx_saveintr);
779		}
780		break;
781	case MTX_SPIN | MTX_TOPHALF:
782		if (mtx_recursed(m)) {
783			m->mtx_recurse--;
784			return;
785		}
786		MPASS(mtx_owned(m));
787		_release_lock_quick(m);
788		break;
789	default:
790		panic("mtx_exit_hard: unsupported type 0x%x\n", type);
791	}
792}
793
794#ifdef INVARIANTS
795void
796_mtx_assert(struct mtx *m, int what, const char *file, int line)
797{
798	switch ((what)) {
799	case MA_OWNED:
800	case MA_OWNED | MA_RECURSED:
801	case MA_OWNED | MA_NOTRECURSED:
802		if (!mtx_owned((m)))
803			panic("mutex %s not owned at %s:%d",
804			    (m)->mtx_description, file, line);
805		if (mtx_recursed((m))) {
806			if (((what) & MA_NOTRECURSED) != 0)
807				panic("mutex %s recursed at %s:%d",
808				    (m)->mtx_description, file, line);
809		} else if (((what) & MA_RECURSED) != 0) {
810			panic("mutex %s unrecursed at %s:%d",
811			    (m)->mtx_description, file, line);
812		}
813		break;
814	case MA_NOTOWNED:
815		if (mtx_owned((m)))
816			panic("mutex %s owned at %s:%d",
817			    (m)->mtx_description, file, line);
818		break;
819	default:
820		panic("unknown mtx_assert at %s:%d", file, line);
821	}
822}
823#endif
824
825#define MV_DESTROY	0	/* validate before destory */
826#define MV_INIT		1	/* validate before init */
827
828#ifdef MUTEX_DEBUG
829
830int mtx_validate __P((struct mtx *, int));
831
832int
833mtx_validate(struct mtx *m, int when)
834{
835	struct mtx *mp;
836	int i;
837	int retval = 0;
838
839#ifdef WITNESS
840	if (witness_cold)
841		return 0;
842#endif
843	if (m == &all_mtx || cold)
844		return 0;
845
846	mtx_enter(&all_mtx, MTX_DEF);
847/*
848 * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
849 * we can re-enable the kernacc() checks.
850 */
851#ifndef __alpha__
852	MPASS(kernacc((caddr_t)all_mtx.mtx_next, sizeof(uintptr_t),
853	    VM_PROT_READ) == 1);
854#endif
855	MPASS(all_mtx.mtx_next->mtx_prev == &all_mtx);
856	for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
857#ifndef __alpha__
858		if (kernacc((caddr_t)mp->mtx_next, sizeof(uintptr_t),
859		    VM_PROT_READ) != 1) {
860			panic("mtx_validate: mp=%p mp->mtx_next=%p",
861			    mp, mp->mtx_next);
862		}
863#endif
864		i++;
865		if (i > mtx_cur_cnt) {
866			panic("mtx_validate: too many in chain, known=%d\n",
867			    mtx_cur_cnt);
868		}
869	}
870	MPASS(i == mtx_cur_cnt);
871	switch (when) {
872	case MV_DESTROY:
873		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
874			if (mp == m)
875				break;
876		MPASS(mp == m);
877		break;
878	case MV_INIT:
879		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
880		if (mp == m) {
881			/*
882			 * Not good. This mutex already exists.
883			 */
884			printf("re-initing existing mutex %s\n",
885			    m->mtx_description);
886			MPASS(m->mtx_lock == MTX_UNOWNED);
887			retval = 1;
888		}
889	}
890	mtx_exit(&all_mtx, MTX_DEF);
891	return (retval);
892}
893#endif
894
895void
896mtx_init(struct mtx *m, const char *t, int flag)
897{
898	if ((flag & MTX_QUIET) == 0)
899		CTR2(KTR_LOCK, "mtx_init %p (%s)", m, t);
900#ifdef MUTEX_DEBUG
901	if (mtx_validate(m, MV_INIT))	/* diagnostic and error correction */
902		return;
903#endif
904
905	bzero((void *)m, sizeof *m);
906	TAILQ_INIT(&m->mtx_blocked);
907#ifdef WITNESS
908	if (!witness_cold) {
909		/* XXX - should not use DEVBUF */
910		m->mtx_debug = malloc(sizeof(struct mtx_debug),
911		    M_DEVBUF, M_NOWAIT | M_ZERO);
912		MPASS(m->mtx_debug != NULL);
913	}
914#endif
915	m->mtx_description = t;
916
917	m->mtx_flags = flag;
918	m->mtx_lock = MTX_UNOWNED;
919	/* Put on all mutex queue */
920	mtx_enter(&all_mtx, MTX_DEF);
921	m->mtx_next = &all_mtx;
922	m->mtx_prev = all_mtx.mtx_prev;
923	m->mtx_prev->mtx_next = m;
924	all_mtx.mtx_prev = m;
925	if (++mtx_cur_cnt > mtx_max_cnt)
926		mtx_max_cnt = mtx_cur_cnt;
927	mtx_exit(&all_mtx, MTX_DEF);
928#ifdef WITNESS
929	if (!witness_cold)
930		witness_init(m, flag);
931#endif
932}
933
934void
935mtx_destroy(struct mtx *m)
936{
937
938#ifdef WITNESS
939	KASSERT(!witness_cold, ("%s: Cannot destroy while still cold\n",
940	    __FUNCTION__));
941#endif
942	CTR2(KTR_LOCK, "mtx_destroy %p (%s)", m, m->mtx_description);
943#ifdef MUTEX_DEBUG
944	if (m->mtx_next == NULL)
945		panic("mtx_destroy: %p (%s) already destroyed",
946		    m, m->mtx_description);
947
948	if (!mtx_owned(m)) {
949		MPASS(m->mtx_lock == MTX_UNOWNED);
950	} else {
951		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
952	}
953	mtx_validate(m, MV_DESTROY);		/* diagnostic */
954#endif
955
956#ifdef WITNESS
957	if (m->mtx_witness)
958		witness_destroy(m);
959#endif /* WITNESS */
960
961	/* Remove from the all mutex queue */
962	mtx_enter(&all_mtx, MTX_DEF);
963	m->mtx_next->mtx_prev = m->mtx_prev;
964	m->mtx_prev->mtx_next = m->mtx_next;
965#ifdef MUTEX_DEBUG
966	m->mtx_next = m->mtx_prev = NULL;
967#endif
968#ifdef WITNESS
969	free(m->mtx_debug, M_DEVBUF);
970	m->mtx_debug = NULL;
971#endif
972	mtx_cur_cnt--;
973	mtx_exit(&all_mtx, MTX_DEF);
974}
975
976/*
977 * The non-inlined versions of the mtx_*() functions are always built (above),
978 * but the witness code depends on the WITNESS kernel option being specified.
979 */
980
981#ifdef WITNESS
982static void
983witness_fixup(void *dummy __unused)
984{
985	struct mtx *mp;
986
987	/*
988	 * We have to release Giant before initializing its witness
989	 * structure so that WITNESS doesn't get confused.
990	 */
991	mtx_exit(&Giant, MTX_DEF);
992	mtx_assert(&Giant, MA_NOTOWNED);
993	mtx_enter(&all_mtx, MTX_DEF);
994
995	/* Iterate through all mutexes and finish up mutex initialization. */
996	for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
997
998		/* XXX - should not use DEVBUF */
999		mp->mtx_debug = malloc(sizeof(struct mtx_debug),
1000		    M_DEVBUF, M_NOWAIT | M_ZERO);
1001		MPASS(mp->mtx_debug != NULL);
1002
1003		witness_init(mp, mp->mtx_flags);
1004	}
1005	mtx_exit(&all_mtx, MTX_DEF);
1006
1007	/* Mark the witness code as being ready for use. */
1008	atomic_store_rel_int(&witness_cold, 0);
1009
1010	mtx_enter(&Giant, MTX_DEF);
1011}
1012SYSINIT(wtnsfxup, SI_SUB_MUTEX, SI_ORDER_FIRST, witness_fixup, NULL)
1013
1014#define WITNESS_COUNT 200
1015#define	WITNESS_NCHILDREN 2
1016
1017int witness_watch = 1;
1018
1019struct witness {
1020	struct witness	*w_next;
1021	const char	*w_description;
1022	const char	*w_file;
1023	int		 w_line;
1024	struct witness	*w_morechildren;
1025	u_char		 w_childcnt;
1026	u_char		 w_Giant_squawked:1;
1027	u_char		 w_other_squawked:1;
1028	u_char		 w_same_squawked:1;
1029	u_char		 w_spin:1;	/* MTX_SPIN type mutex. */
1030	u_int		 w_level;
1031	struct witness	*w_children[WITNESS_NCHILDREN];
1032};
1033
1034struct witness_blessed {
1035	char 	*b_lock1;
1036	char	*b_lock2;
1037};
1038
1039#ifdef DDB
1040/*
1041 * When DDB is enabled and witness_ddb is set to 1, it will cause the system to
1042 * drop into kdebug() when:
1043 *	- a lock heirarchy violation occurs
1044 *	- locks are held when going to sleep.
1045 */
1046int	witness_ddb;
1047#ifdef WITNESS_DDB
1048TUNABLE_INT_DECL("debug.witness_ddb", 1, witness_ddb);
1049#else
1050TUNABLE_INT_DECL("debug.witness_ddb", 0, witness_ddb);
1051#endif
1052SYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, "");
1053#endif /* DDB */
1054
1055int	witness_skipspin;
1056#ifdef WITNESS_SKIPSPIN
1057TUNABLE_INT_DECL("debug.witness_skipspin", 1, witness_skipspin);
1058#else
1059TUNABLE_INT_DECL("debug.witness_skipspin", 0, witness_skipspin);
1060#endif
1061SYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RD, &witness_skipspin, 0,
1062    "");
1063
1064static struct mtx	w_mtx;
1065static struct witness	*w_free;
1066static struct witness	*w_all;
1067static int		 w_inited;
1068static int		 witness_dead;	/* fatal error, probably no memory */
1069
1070static struct witness	 w_data[WITNESS_COUNT];
1071
1072static struct witness	 *enroll __P((const char *description, int flag));
1073static int itismychild __P((struct witness *parent, struct witness *child));
1074static void removechild __P((struct witness *parent, struct witness *child));
1075static int isitmychild __P((struct witness *parent, struct witness *child));
1076static int isitmydescendant __P((struct witness *parent, struct witness *child));
1077static int dup_ok __P((struct witness *));
1078static int blessed __P((struct witness *, struct witness *));
1079static void witness_displaydescendants
1080    __P((void(*)(const char *fmt, ...), struct witness *));
1081static void witness_leveldescendents __P((struct witness *parent, int level));
1082static void witness_levelall __P((void));
1083static struct witness * witness_get __P((void));
1084static void witness_free __P((struct witness *m));
1085
1086
1087static char *ignore_list[] = {
1088	"witness lock",
1089	NULL
1090};
1091
1092static char *spin_order_list[] = {
1093	"sio",
1094	"sched lock",
1095#ifdef __i386__
1096	"clk",
1097#endif
1098	"callout",
1099	/*
1100	 * leaf locks
1101	 */
1102	NULL
1103};
1104
1105static char *order_list[] = {
1106	"Giant", "uidinfo hash", "uidinfo struct", NULL,
1107	"Giant", "proctree", "allproc", "process lock", NULL,
1108	NULL
1109};
1110
1111static char *dup_list[] = {
1112	NULL
1113};
1114
1115static char *sleep_list[] = {
1116	"Giant",
1117	NULL
1118};
1119
1120/*
1121 * Pairs of locks which have been blessed
1122 * Don't complain about order problems with blessed locks
1123 */
1124static struct witness_blessed blessed_list[] = {
1125};
1126static int blessed_count = sizeof(blessed_list) / sizeof(struct witness_blessed);
1127
1128static void
1129witness_init(struct mtx *m, int flag)
1130{
1131	m->mtx_witness = enroll(m->mtx_description, flag);
1132}
1133
1134static void
1135witness_destroy(struct mtx *m)
1136{
1137	struct mtx *m1;
1138	struct proc *p;
1139	p = CURPROC;
1140	for ((m1 = LIST_FIRST(&p->p_heldmtx)); m1 != NULL;
1141		m1 = LIST_NEXT(m1, mtx_held)) {
1142		if (m1 == m) {
1143			LIST_REMOVE(m, mtx_held);
1144			break;
1145		}
1146	}
1147	return;
1148
1149}
1150
1151static void
1152witness_display(void(*prnt)(const char *fmt, ...))
1153{
1154	struct witness *w, *w1;
1155
1156	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1157	witness_levelall();
1158
1159	for (w = w_all; w; w = w->w_next) {
1160		if (w->w_file == NULL)
1161			continue;
1162		for (w1 = w_all; w1; w1 = w1->w_next) {
1163			if (isitmychild(w1, w))
1164				break;
1165		}
1166		if (w1 != NULL)
1167			continue;
1168		/*
1169		 * This lock has no anscestors, display its descendants.
1170		 */
1171		witness_displaydescendants(prnt, w);
1172	}
1173	prnt("\nMutex which were never acquired\n");
1174	for (w = w_all; w; w = w->w_next) {
1175		if (w->w_file != NULL)
1176			continue;
1177		prnt("%s\n", w->w_description);
1178	}
1179}
1180
1181void
1182witness_enter(struct mtx *m, int flags, const char *file, int line)
1183{
1184	struct witness *w, *w1;
1185	struct mtx *m1;
1186	struct proc *p;
1187	int i;
1188#ifdef DDB
1189	int go_into_ddb = 0;
1190#endif /* DDB */
1191
1192	if (witness_cold || m->mtx_witness == NULL || panicstr)
1193		return;
1194	w = m->mtx_witness;
1195	p = CURPROC;
1196
1197	if (flags & MTX_SPIN) {
1198		if ((m->mtx_flags & MTX_SPIN) == 0)
1199			panic("mutex_enter: MTX_SPIN on MTX_DEF mutex %s @"
1200			    " %s:%d", m->mtx_description, file, line);
1201		if (mtx_recursed(m)) {
1202			if ((m->mtx_flags & MTX_RECURSE) == 0)
1203				panic("mutex_enter: recursion on non-recursive"
1204				    " mutex %s @ %s:%d", m->mtx_description,
1205				    file, line);
1206			return;
1207		}
1208		mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
1209		i = PCPU_GET(witness_spin_check);
1210		if (i != 0 && w->w_level < i) {
1211			mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1212			panic("mutex_enter(%s:%x, MTX_SPIN) out of order @"
1213			    " %s:%d already holding %s:%x",
1214			    m->mtx_description, w->w_level, file, line,
1215			    spin_order_list[ffs(i)-1], i);
1216		}
1217		PCPU_SET(witness_spin_check, i | w->w_level);
1218		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1219		w->w_file = file;
1220		w->w_line = line;
1221		m->mtx_line = line;
1222		m->mtx_file = file;
1223		return;
1224	}
1225	if ((m->mtx_flags & MTX_SPIN) != 0)
1226		panic("mutex_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1227		    m->mtx_description, file, line);
1228
1229	if (mtx_recursed(m)) {
1230		if ((m->mtx_flags & MTX_RECURSE) == 0)
1231			panic("mutex_enter: recursion on non-recursive"
1232			    " mutex %s @ %s:%d", m->mtx_description,
1233			    file, line);
1234		return;
1235	}
1236	if (witness_dead)
1237		goto out;
1238	if (cold)
1239		goto out;
1240
1241	if (!mtx_legal2block())
1242		panic("blockable mtx_enter() of %s when not legal @ %s:%d",
1243			    m->mtx_description, file, line);
1244	/*
1245	 * Is this the first mutex acquired
1246	 */
1247	if ((m1 = LIST_FIRST(&p->p_heldmtx)) == NULL)
1248		goto out;
1249
1250	if ((w1 = m1->mtx_witness) == w) {
1251		if (w->w_same_squawked || dup_ok(w))
1252			goto out;
1253		w->w_same_squawked = 1;
1254		printf("acquring duplicate lock of same type: \"%s\"\n",
1255			m->mtx_description);
1256		printf(" 1st @ %s:%d\n", w->w_file, w->w_line);
1257		printf(" 2nd @ %s:%d\n", file, line);
1258#ifdef DDB
1259		go_into_ddb = 1;
1260#endif /* DDB */
1261		goto out;
1262	}
1263	MPASS(!mtx_owned(&w_mtx));
1264	mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
1265	/*
1266	 * If we have a known higher number just say ok
1267	 */
1268	if (witness_watch > 1 && w->w_level > w1->w_level) {
1269		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1270		goto out;
1271	}
1272	if (isitmydescendant(m1->mtx_witness, w)) {
1273		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1274		goto out;
1275	}
1276	for (i = 0; m1 != NULL; m1 = LIST_NEXT(m1, mtx_held), i++) {
1277
1278		MPASS(i < 200);
1279		w1 = m1->mtx_witness;
1280		if (isitmydescendant(w, w1)) {
1281			mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1282			if (blessed(w, w1))
1283				goto out;
1284			if (m1 == &Giant) {
1285				if (w1->w_Giant_squawked)
1286					goto out;
1287				else
1288					w1->w_Giant_squawked = 1;
1289			} else {
1290				if (w1->w_other_squawked)
1291					goto out;
1292				else
1293					w1->w_other_squawked = 1;
1294			}
1295			printf("lock order reversal\n");
1296			printf(" 1st %s last acquired @ %s:%d\n",
1297			    w->w_description, w->w_file, w->w_line);
1298			printf(" 2nd %p %s @ %s:%d\n",
1299			    m1, w1->w_description, w1->w_file, w1->w_line);
1300			printf(" 3rd %p %s @ %s:%d\n",
1301			    m, w->w_description, file, line);
1302#ifdef DDB
1303			go_into_ddb = 1;
1304#endif /* DDB */
1305			goto out;
1306		}
1307	}
1308	m1 = LIST_FIRST(&p->p_heldmtx);
1309	if (!itismychild(m1->mtx_witness, w))
1310		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1311
1312out:
1313#ifdef DDB
1314	if (witness_ddb && go_into_ddb)
1315		Debugger("witness_enter");
1316#endif /* DDB */
1317	w->w_file = file;
1318	w->w_line = line;
1319	m->mtx_line = line;
1320	m->mtx_file = file;
1321
1322	/*
1323	 * If this pays off it likely means that a mutex being witnessed
1324	 * is acquired in hardclock. Put it in the ignore list. It is
1325	 * likely not the mutex this assert fails on.
1326	 */
1327	MPASS(m->mtx_held.le_prev == NULL);
1328	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
1329}
1330
1331void
1332witness_try_enter(struct mtx *m, int flags, const char *file, int line)
1333{
1334	struct proc *p;
1335	struct witness *w = m->mtx_witness;
1336
1337	if (witness_cold)
1338		return;
1339	if (panicstr)
1340		return;
1341	if (flags & MTX_SPIN) {
1342		if ((m->mtx_flags & MTX_SPIN) == 0)
1343			panic("mutex_try_enter: "
1344			    "MTX_SPIN on MTX_DEF mutex %s @ %s:%d",
1345			    m->mtx_description, file, line);
1346		if (mtx_recursed(m)) {
1347			if ((m->mtx_flags & MTX_RECURSE) == 0)
1348				panic("mutex_try_enter: recursion on"
1349				    " non-recursive mutex %s @ %s:%d",
1350				    m->mtx_description, file, line);
1351			return;
1352		}
1353		mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
1354		PCPU_SET(witness_spin_check,
1355		    PCPU_GET(witness_spin_check) | w->w_level);
1356		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1357		w->w_file = file;
1358		w->w_line = line;
1359		m->mtx_line = line;
1360		m->mtx_file = file;
1361		return;
1362	}
1363
1364	if ((m->mtx_flags & MTX_SPIN) != 0)
1365		panic("mutex_try_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1366		    m->mtx_description, file, line);
1367
1368	if (mtx_recursed(m)) {
1369		if ((m->mtx_flags & MTX_RECURSE) == 0)
1370			panic("mutex_try_enter: recursion on non-recursive"
1371			    " mutex %s @ %s:%d", m->mtx_description, file,
1372			    line);
1373		return;
1374	}
1375	w->w_file = file;
1376	w->w_line = line;
1377	m->mtx_line = line;
1378	m->mtx_file = file;
1379	p = CURPROC;
1380	MPASS(m->mtx_held.le_prev == NULL);
1381	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
1382}
1383
1384void
1385witness_exit(struct mtx *m, int flags, const char *file, int line)
1386{
1387	struct witness *w;
1388
1389	if (witness_cold || m->mtx_witness == NULL || panicstr)
1390		return;
1391	w = m->mtx_witness;
1392
1393	if (flags & MTX_SPIN) {
1394		if ((m->mtx_flags & MTX_SPIN) == 0)
1395			panic("mutex_exit: MTX_SPIN on MTX_DEF mutex %s @"
1396			    " %s:%d", m->mtx_description, file, line);
1397		if (mtx_recursed(m)) {
1398			if ((m->mtx_flags & MTX_RECURSE) == 0)
1399				panic("mutex_exit: recursion on non-recursive"
1400				    " mutex %s @ %s:%d", m->mtx_description,
1401				    file, line);
1402			return;
1403		}
1404		mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
1405		PCPU_SET(witness_spin_check,
1406		    PCPU_GET(witness_spin_check) & ~w->w_level);
1407		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1408		return;
1409	}
1410	if ((m->mtx_flags & MTX_SPIN) != 0)
1411		panic("mutex_exit: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1412		    m->mtx_description, file, line);
1413
1414	if (mtx_recursed(m)) {
1415		if ((m->mtx_flags & MTX_RECURSE) == 0)
1416			panic("mutex_exit: recursion on non-recursive"
1417			    " mutex %s @ %s:%d", m->mtx_description,
1418			    file, line);
1419		return;
1420	}
1421
1422	if ((flags & MTX_NOSWITCH) == 0 && !mtx_legal2block() && !cold)
1423		panic("switchable mtx_exit() of %s when not legal @ %s:%d",
1424			    m->mtx_description, file, line);
1425	LIST_REMOVE(m, mtx_held);
1426	m->mtx_held.le_prev = NULL;
1427}
1428
1429int
1430witness_sleep(int check_only, struct mtx *mtx, const char *file, int line)
1431{
1432	struct mtx *m;
1433	struct proc *p;
1434	char **sleep;
1435	int n = 0;
1436
1437	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1438	p = CURPROC;
1439	for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
1440	    m = LIST_NEXT(m, mtx_held)) {
1441		if (m == mtx)
1442			continue;
1443		for (sleep = sleep_list; *sleep!= NULL; sleep++)
1444			if (strcmp(m->mtx_description, *sleep) == 0)
1445				goto next;
1446		printf("%s:%d: %s with \"%s\" locked from %s:%d\n",
1447			file, line, check_only ? "could sleep" : "sleeping",
1448			m->mtx_description,
1449			m->mtx_witness->w_file, m->mtx_witness->w_line);
1450		n++;
1451	next:
1452	}
1453#ifdef DDB
1454	if (witness_ddb && n)
1455		Debugger("witness_sleep");
1456#endif /* DDB */
1457	return (n);
1458}
1459
1460static struct witness *
1461enroll(const char *description, int flag)
1462{
1463	int i;
1464	struct witness *w, *w1;
1465	char **ignore;
1466	char **order;
1467
1468	if (!witness_watch)
1469		return (NULL);
1470	for (ignore = ignore_list; *ignore != NULL; ignore++)
1471		if (strcmp(description, *ignore) == 0)
1472			return (NULL);
1473
1474	if (w_inited == 0) {
1475		mtx_init(&w_mtx, "witness lock", MTX_SPIN);
1476		for (i = 0; i < WITNESS_COUNT; i++) {
1477			w = &w_data[i];
1478			witness_free(w);
1479		}
1480		w_inited = 1;
1481		for (order = order_list; *order != NULL; order++) {
1482			w = enroll(*order, MTX_DEF);
1483			w->w_file = "order list";
1484			for (order++; *order != NULL; order++) {
1485				w1 = enroll(*order, MTX_DEF);
1486				w1->w_file = "order list";
1487				itismychild(w, w1);
1488				w = w1;
1489    	    	    	}
1490		}
1491	}
1492	if ((flag & MTX_SPIN) && witness_skipspin)
1493		return (NULL);
1494	mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
1495	for (w = w_all; w; w = w->w_next) {
1496		if (strcmp(description, w->w_description) == 0) {
1497			mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1498			return (w);
1499		}
1500	}
1501	if ((w = witness_get()) == NULL)
1502		return (NULL);
1503	w->w_next = w_all;
1504	w_all = w;
1505	w->w_description = description;
1506	mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1507	if (flag & MTX_SPIN) {
1508		w->w_spin = 1;
1509
1510		i = 1;
1511		for (order = spin_order_list; *order != NULL; order++) {
1512			if (strcmp(description, *order) == 0)
1513				break;
1514			i <<= 1;
1515		}
1516		if (*order == NULL)
1517			panic("spin lock %s not in order list", description);
1518		w->w_level = i;
1519	}
1520
1521	return (w);
1522}
1523
1524static int
1525itismychild(struct witness *parent, struct witness *child)
1526{
1527	static int recursed;
1528
1529	/*
1530	 * Insert "child" after "parent"
1531	 */
1532	while (parent->w_morechildren)
1533		parent = parent->w_morechildren;
1534
1535	if (parent->w_childcnt == WITNESS_NCHILDREN) {
1536		if ((parent->w_morechildren = witness_get()) == NULL)
1537			return (1);
1538		parent = parent->w_morechildren;
1539	}
1540	MPASS(child != NULL);
1541	parent->w_children[parent->w_childcnt++] = child;
1542	/*
1543	 * now prune whole tree
1544	 */
1545	if (recursed)
1546		return (0);
1547	recursed = 1;
1548	for (child = w_all; child != NULL; child = child->w_next) {
1549		for (parent = w_all; parent != NULL;
1550		    parent = parent->w_next) {
1551			if (!isitmychild(parent, child))
1552				continue;
1553			removechild(parent, child);
1554			if (isitmydescendant(parent, child))
1555				continue;
1556			itismychild(parent, child);
1557		}
1558	}
1559	recursed = 0;
1560	witness_levelall();
1561	return (0);
1562}
1563
1564static void
1565removechild(struct witness *parent, struct witness *child)
1566{
1567	struct witness *w, *w1;
1568	int i;
1569
1570	for (w = parent; w != NULL; w = w->w_morechildren)
1571		for (i = 0; i < w->w_childcnt; i++)
1572			if (w->w_children[i] == child)
1573				goto found;
1574	return;
1575found:
1576	for (w1 = w; w1->w_morechildren != NULL; w1 = w1->w_morechildren)
1577		continue;
1578	w->w_children[i] = w1->w_children[--w1->w_childcnt];
1579	MPASS(w->w_children[i] != NULL);
1580
1581	if (w1->w_childcnt != 0)
1582		return;
1583
1584	if (w1 == parent)
1585		return;
1586	for (w = parent; w->w_morechildren != w1; w = w->w_morechildren)
1587		continue;
1588	w->w_morechildren = 0;
1589	witness_free(w1);
1590}
1591
1592static int
1593isitmychild(struct witness *parent, struct witness *child)
1594{
1595	struct witness *w;
1596	int i;
1597
1598	for (w = parent; w != NULL; w = w->w_morechildren) {
1599		for (i = 0; i < w->w_childcnt; i++) {
1600			if (w->w_children[i] == child)
1601				return (1);
1602		}
1603	}
1604	return (0);
1605}
1606
1607static int
1608isitmydescendant(struct witness *parent, struct witness *child)
1609{
1610	struct witness *w;
1611	int i;
1612	int j;
1613
1614	for (j = 0, w = parent; w != NULL; w = w->w_morechildren, j++) {
1615		MPASS(j < 1000);
1616		for (i = 0; i < w->w_childcnt; i++) {
1617			if (w->w_children[i] == child)
1618				return (1);
1619		}
1620		for (i = 0; i < w->w_childcnt; i++) {
1621			if (isitmydescendant(w->w_children[i], child))
1622				return (1);
1623		}
1624	}
1625	return (0);
1626}
1627
1628void
1629witness_levelall (void)
1630{
1631	struct witness *w, *w1;
1632
1633	for (w = w_all; w; w = w->w_next)
1634		if (!(w->w_spin))
1635			w->w_level = 0;
1636	for (w = w_all; w; w = w->w_next) {
1637		if (w->w_spin)
1638			continue;
1639		for (w1 = w_all; w1; w1 = w1->w_next) {
1640			if (isitmychild(w1, w))
1641				break;
1642		}
1643		if (w1 != NULL)
1644			continue;
1645		witness_leveldescendents(w, 0);
1646	}
1647}
1648
1649static void
1650witness_leveldescendents(struct witness *parent, int level)
1651{
1652	int i;
1653	struct witness *w;
1654
1655	if (parent->w_level < level)
1656		parent->w_level = level;
1657	level++;
1658	for (w = parent; w != NULL; w = w->w_morechildren)
1659		for (i = 0; i < w->w_childcnt; i++)
1660			witness_leveldescendents(w->w_children[i], level);
1661}
1662
1663static void
1664witness_displaydescendants(void(*prnt)(const char *fmt, ...),
1665			   struct witness *parent)
1666{
1667	struct witness *w;
1668	int i;
1669	int level = parent->w_level;
1670
1671	prnt("%d", level);
1672	if (level < 10)
1673		prnt(" ");
1674	for (i = 0; i < level; i++)
1675		prnt(" ");
1676	prnt("%s", parent->w_description);
1677	if (parent->w_file != NULL) {
1678		prnt(" -- last acquired @ %s", parent->w_file);
1679#ifndef W_USE_WHERE
1680		prnt(":%d", parent->w_line);
1681#endif
1682		prnt("\n");
1683	}
1684
1685	for (w = parent; w != NULL; w = w->w_morechildren)
1686		for (i = 0; i < w->w_childcnt; i++)
1687			    witness_displaydescendants(prnt, w->w_children[i]);
1688    }
1689
1690static int
1691dup_ok(struct witness *w)
1692{
1693	char **dup;
1694
1695	for (dup = dup_list; *dup!= NULL; dup++)
1696		if (strcmp(w->w_description, *dup) == 0)
1697			return (1);
1698	return (0);
1699}
1700
1701static int
1702blessed(struct witness *w1, struct witness *w2)
1703{
1704	int i;
1705	struct witness_blessed *b;
1706
1707	for (i = 0; i < blessed_count; i++) {
1708		b = &blessed_list[i];
1709		if (strcmp(w1->w_description, b->b_lock1) == 0) {
1710			if (strcmp(w2->w_description, b->b_lock2) == 0)
1711				return (1);
1712			continue;
1713		}
1714		if (strcmp(w1->w_description, b->b_lock2) == 0)
1715			if (strcmp(w2->w_description, b->b_lock1) == 0)
1716				return (1);
1717	}
1718	return (0);
1719}
1720
1721static struct witness *
1722witness_get()
1723{
1724	struct witness *w;
1725
1726	if ((w = w_free) == NULL) {
1727		witness_dead = 1;
1728		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1729		printf("witness exhausted\n");
1730		return (NULL);
1731	}
1732	w_free = w->w_next;
1733	bzero(w, sizeof(*w));
1734	return (w);
1735}
1736
1737static void
1738witness_free(struct witness *w)
1739{
1740	w->w_next = w_free;
1741	w_free = w;
1742}
1743
1744int
1745witness_list(struct proc *p)
1746{
1747	struct mtx *m;
1748	int nheld;
1749
1750	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1751	nheld = 0;
1752	for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
1753	    m = LIST_NEXT(m, mtx_held)) {
1754		printf("\t\"%s\" (%p) locked at %s:%d\n",
1755		    m->mtx_description, m,
1756		    m->mtx_witness->w_file, m->mtx_witness->w_line);
1757		nheld++;
1758	}
1759
1760	return (nheld);
1761}
1762
1763void
1764witness_save(struct mtx *m, const char **filep, int *linep)
1765{
1766
1767	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1768	if (m->mtx_witness == NULL)
1769		return;
1770
1771	*filep = m->mtx_witness->w_file;
1772	*linep = m->mtx_witness->w_line;
1773}
1774
1775void
1776witness_restore(struct mtx *m, const char *file, int line)
1777{
1778
1779	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1780	if (m->mtx_witness == NULL)
1781		return;
1782
1783	m->mtx_witness->w_file = file;
1784	m->mtx_witness->w_line = line;
1785}
1786
1787#endif	/* WITNESS */
1788