subr_turnstile.c revision 71352
1/*-
2 * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 * 3. Berkeley Software Design Inc's name may not be used to endorse or
13 *    promote products derived from this software without specific prior
14 *    written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
29 *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
30 * $FreeBSD: head/sys/kern/subr_turnstile.c 71352 2001-01-21 22:34:43Z jasone $
31 */
32
33/*
34 *	Main Entry: witness
35 *	Pronunciation: 'wit-n&s
36 *	Function: noun
37 *	Etymology: Middle English witnesse, from Old English witnes knowledge,
38 *	    testimony, witness, from 2wit
39 *	Date: before 12th century
40 *	1 : attestation of a fact or event : TESTIMONY
41 *	2 : one that gives evidence; specifically : one who testifies in
42 *	    a cause or before a judicial tribunal
43 *	3 : one asked to be present at a transaction so as to be able to
44 *	    testify to its having taken place
45 *	4 : one who has personal knowledge of something
46 *	5 a : something serving as evidence or proof : SIGN
47 *	  b : public affirmation by word or example of usually
48 *	      religious faith or conviction <the heroic witness to divine
49 *	      life -- Pilot>
50 *	6 capitalized : a member of the Jehovah's Witnesses
51 */
52
53#include "opt_ddb.h"
54#include "opt_witness.h"
55
56/*
57 * Cause non-inlined mtx_*() to be compiled.
58 * Must be defined early because other system headers may include mutex.h.
59 */
60#define _KERN_MUTEX_C_
61
62#include <sys/param.h>
63#include <sys/bus.h>
64#include <sys/kernel.h>
65#include <sys/malloc.h>
66#include <sys/proc.h>
67#include <sys/sysctl.h>
68#include <sys/systm.h>
69#include <sys/vmmeter.h>
70#include <sys/ktr.h>
71
72#include <machine/atomic.h>
73#include <machine/bus.h>
74#include <machine/clock.h>
75#include <machine/cpu.h>
76
77#include <ddb/ddb.h>
78
79#include <vm/vm.h>
80#include <vm/vm_extern.h>
81
82#include <sys/mutex.h>
83
84/*
85 * Machine independent bits of the mutex implementation
86 */
87
88#ifdef WITNESS
89struct mtx_debug {
90	struct witness	*mtxd_witness;
91	LIST_ENTRY(mtx)	mtxd_held;
92	const char	*mtxd_file;
93	int		mtxd_line;
94	const char	*mtxd_description;
95};
96
97#define mtx_description	mtx_union.mtxu_debug->mtxd_description
98#define mtx_held	mtx_union.mtxu_debug->mtxd_held
99#define	mtx_file	mtx_union.mtxu_debug->mtxd_file
100#define	mtx_line	mtx_union.mtxu_debug->mtxd_line
101#define	mtx_witness	mtx_union.mtxu_debug->mtxd_witness
102#else	/* WITNESS */
103#define mtx_description	mtx_union.mtxu_description
104#endif	/* WITNESS */
105
106/*
107 * Assembly macros
108 *------------------------------------------------------------------------------
109 */
110
111#define	_V(x)	__STRING(x)
112
113/*
114 * Default, unoptimized mutex micro-operations
115 */
116
117#ifndef _obtain_lock
118/* Actually obtain mtx_lock */
119#define _obtain_lock(mp, tid)						\
120	atomic_cmpset_acq_ptr(&(mp)->mtx_lock, (void *)MTX_UNOWNED, (tid))
121#endif
122
123#ifndef _release_lock
124/* Actually release mtx_lock */
125#define _release_lock(mp, tid)		       				\
126	atomic_cmpset_rel_ptr(&(mp)->mtx_lock, (tid), (void *)MTX_UNOWNED)
127#endif
128
129#ifndef _release_lock_quick
130/* Actually release mtx_lock quickly assuming that we own it */
131#define	_release_lock_quick(mp) 					\
132	atomic_store_rel_ptr(&(mp)->mtx_lock, (void *)MTX_UNOWNED)
133#endif
134
135#ifndef _getlock_sleep
136/* Get a sleep lock, deal with recursion inline. */
137#define	_getlock_sleep(mp, tid, type) do {				\
138	if (!_obtain_lock(mp, tid)) {					\
139		if (((mp)->mtx_lock & MTX_FLAGMASK) != ((uintptr_t)(tid)))\
140			mtx_enter_hard(mp, (type) & MTX_HARDOPTS, 0);	\
141		else {							\
142			atomic_set_ptr(&(mp)->mtx_lock, MTX_RECURSED);	\
143			(mp)->mtx_recurse++;				\
144		}							\
145	}								\
146} while (0)
147#endif
148
149#ifndef _getlock_spin_block
150/* Get a spin lock, handle recursion inline (as the less common case) */
151#define	_getlock_spin_block(mp, tid, type) do {				\
152	u_int _mtx_intr = save_intr();					\
153	disable_intr();							\
154	if (!_obtain_lock(mp, tid))					\
155		mtx_enter_hard(mp, (type) & MTX_HARDOPTS, _mtx_intr);	\
156	else								\
157		(mp)->mtx_saveintr = _mtx_intr;				\
158} while (0)
159#endif
160
161#ifndef _getlock_norecurse
162/*
163 * Get a lock without any recursion handling. Calls the hard enter function if
164 * we can't get it inline.
165 */
166#define	_getlock_norecurse(mp, tid, type) do {				\
167	if (!_obtain_lock(mp, tid))					\
168		mtx_enter_hard((mp), (type) & MTX_HARDOPTS, 0);		\
169} while (0)
170#endif
171
172#ifndef _exitlock_norecurse
173/*
174 * Release a sleep lock assuming we haven't recursed on it, recursion is handled
175 * in the hard function.
176 */
177#define	_exitlock_norecurse(mp, tid, type) do {				\
178	if (!_release_lock(mp, tid))					\
179		mtx_exit_hard((mp), (type) & MTX_HARDOPTS);		\
180} while (0)
181#endif
182
183#ifndef _exitlock
184/*
185 * Release a sleep lock when its likely we recursed (the code to
186 * deal with simple recursion is inline).
187 */
188#define	_exitlock(mp, tid, type) do {					\
189	if (!_release_lock(mp, tid)) {					\
190		if ((mp)->mtx_lock & MTX_RECURSED) {			\
191			if (--((mp)->mtx_recurse) == 0)			\
192				atomic_clear_ptr(&(mp)->mtx_lock,	\
193				    MTX_RECURSED);			\
194		} else {						\
195			mtx_exit_hard((mp), (type) & MTX_HARDOPTS);	\
196		}							\
197	}								\
198} while (0)
199#endif
200
201#ifndef _exitlock_spin
202/* Release a spin lock (with possible recursion). */
203#define	_exitlock_spin(mp) do {						\
204	if (!mtx_recursed((mp))) {					\
205		int _mtx_intr = (mp)->mtx_saveintr;			\
206									\
207		_release_lock_quick(mp);				\
208		restore_intr(_mtx_intr);				\
209	} else {							\
210		(mp)->mtx_recurse--;					\
211	}								\
212} while (0)
213#endif
214
215#ifdef WITNESS
216static void	witness_init(struct mtx *, int flag);
217static void	witness_destroy(struct mtx *);
218static void	witness_display(void(*)(const char *fmt, ...));
219
220/* All mutexes in system (used for debug/panic) */
221static struct mtx_debug all_mtx_debug = { NULL, {NULL, NULL}, NULL, 0,
222	"All mutexes queue head" };
223static struct mtx all_mtx = { MTX_UNOWNED, 0, 0, 0, {&all_mtx_debug},
224	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
225	{ NULL, NULL }, &all_mtx, &all_mtx };
226/*
227 * Set to 0 once mutexes have been fully initialized so that witness code can be
228 * safely executed.
229 */
230static int witness_cold = 1;
231#else	/* WITNESS */
232/* All mutexes in system (used for debug/panic) */
233static struct mtx all_mtx = { MTX_UNOWNED, 0, 0, 0, {"All mutexes queue head"},
234	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
235	{ NULL, NULL }, &all_mtx, &all_mtx };
236
237/*
238 * flag++ is slezoid way of shutting up unused parameter warning
239 * in mtx_init()
240 */
241#define witness_init(m, flag) flag++
242#define witness_destroy(m)
243#define witness_try_enter(m, t, f, l)
244#endif	/* WITNESS */
245
246static int	mtx_cur_cnt;
247static int	mtx_max_cnt;
248
249static void	propagate_priority(struct proc *);
250static void	mtx_enter_hard(struct mtx *, int type, int saveintr);
251static void	mtx_exit_hard(struct mtx *, int type);
252
253#define	mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
254#define	mtx_owner(m)	(mtx_unowned(m) ? NULL \
255			    : (struct proc *)((m)->mtx_lock & MTX_FLAGMASK))
256
257#define RETIP(x)		*(((uintptr_t *)(&x)) - 1)
258#define	SET_PRIO(p, pri)	(p)->p_priority = (pri)
259
260static void
261propagate_priority(struct proc *p)
262{
263	int pri = p->p_priority;
264	struct mtx *m = p->p_blocked;
265
266	mtx_assert(&sched_lock, MA_OWNED);
267	for (;;) {
268		struct proc *p1;
269
270		p = mtx_owner(m);
271
272		if (p == NULL) {
273			/*
274			 * This really isn't quite right. Really
275			 * ought to bump priority of process that
276			 * next acquires the mutex.
277			 */
278			MPASS(m->mtx_lock == MTX_CONTESTED);
279			return;
280		}
281		MPASS(p->p_magic == P_MAGIC);
282		KASSERT(p->p_stat != SSLEEP, ("sleeping process owns a mutex"));
283		if (p->p_priority <= pri)
284			return;
285
286		/*
287		 * Bump this process' priority.
288		 */
289		SET_PRIO(p, pri);
290
291		/*
292		 * If lock holder is actually running, just bump priority.
293		 */
294#ifdef SMP
295		/*
296		 * For SMP, we can check the p_oncpu field to see if we are
297		 * running.
298		 */
299		if (p->p_oncpu != 0xff) {
300			MPASS(p->p_stat == SRUN || p->p_stat == SZOMB);
301			return;
302		}
303#else
304		/*
305		 * For UP, we check to see if p is curproc (this shouldn't
306		 * ever happen however as it would mean we are in a deadlock.)
307		 */
308		if (p == curproc) {
309			panic("Deadlock detected");
310			return;
311		}
312#endif
313		/*
314		 * If on run queue move to new run queue, and
315		 * quit.
316		 */
317		if (p->p_stat == SRUN) {
318			printf("XXX: moving process %d(%s) to a new run queue\n",
319			       p->p_pid, p->p_comm);
320			MPASS(p->p_blocked == NULL);
321			remrunqueue(p);
322			setrunqueue(p);
323			return;
324		}
325
326		/*
327		 * If we aren't blocked on a mutex, we should be.
328		 */
329		KASSERT(p->p_stat == SMTX, (
330		    "process %d(%s):%d holds %s but isn't blocked on a mutex\n",
331		    p->p_pid, p->p_comm, p->p_stat,
332		    m->mtx_description));
333
334		/*
335		 * Pick up the mutex that p is blocked on.
336		 */
337		m = p->p_blocked;
338		MPASS(m != NULL);
339
340		printf("XXX: process %d(%s) is blocked on %s\n", p->p_pid,
341		    p->p_comm, m->mtx_description);
342		/*
343		 * Check if the proc needs to be moved up on
344		 * the blocked chain
345		 */
346		if (p == TAILQ_FIRST(&m->mtx_blocked)) {
347			printf("XXX: process at head of run queue\n");
348			continue;
349		}
350		p1 = TAILQ_PREV(p, rq, p_procq);
351		if (p1->p_priority <= pri) {
352			printf(
353	"XXX: previous process %d(%s) has higher priority\n",
354	                    p->p_pid, p->p_comm);
355			continue;
356		}
357
358		/*
359		 * Remove proc from blocked chain and determine where
360		 * it should be moved up to.  Since we know that p1 has
361		 * a lower priority than p, we know that at least one
362		 * process in the chain has a lower priority and that
363		 * p1 will thus not be NULL after the loop.
364		 */
365		TAILQ_REMOVE(&m->mtx_blocked, p, p_procq);
366		TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) {
367			MPASS(p1->p_magic == P_MAGIC);
368			if (p1->p_priority > pri)
369				break;
370		}
371		MPASS(p1 != NULL);
372		TAILQ_INSERT_BEFORE(p1, p, p_procq);
373		CTR4(KTR_LOCK,
374		    "propagate_priority: p 0x%p moved before 0x%p on [0x%p] %s",
375		    p, p1, m, m->mtx_description);
376	}
377}
378
379/*
380 * Get lock 'm', the macro handles the easy (and most common cases) and leaves
381 * the slow stuff to the mtx_enter_hard() function.
382 *
383 * Note: since type is usually a constant much of this code is optimized out.
384 */
385void
386_mtx_enter(struct mtx *mtxp, int type, const char *file, int line)
387{
388	struct mtx	*mpp = mtxp;
389
390	/* bits only valid on mtx_exit() */
391	MPASS4(((type) & (MTX_NORECURSE | MTX_NOSWITCH)) == 0,
392	    STR_mtx_bad_type, file, line);
393
394	if ((type) & MTX_SPIN) {
395		/*
396		 * Easy cases of spin locks:
397		 *
398		 * 1) We already own the lock and will simply recurse on it (if
399		 *    RLIKELY)
400		 *
401		 * 2) The lock is free, we just get it
402		 */
403		if ((type) & MTX_RLIKELY) {
404			/*
405			 * Check for recursion, if we already have this
406			 * lock we just bump the recursion count.
407			 */
408			if (mpp->mtx_lock == (uintptr_t)CURTHD) {
409				mpp->mtx_recurse++;
410				goto done;
411			}
412		}
413
414		if (((type) & MTX_TOPHALF) == 0) {
415			/*
416			 * If an interrupt thread uses this we must block
417			 * interrupts here.
418			 */
419			if ((type) & MTX_FIRST) {
420				ASS_IEN;
421				disable_intr();
422				_getlock_norecurse(mpp, CURTHD,
423				    (type) & MTX_HARDOPTS);
424			} else {
425				_getlock_spin_block(mpp, CURTHD,
426				    (type) & MTX_HARDOPTS);
427			}
428		} else
429			_getlock_norecurse(mpp, CURTHD, (type) & MTX_HARDOPTS);
430	} else {
431		/* Sleep locks */
432		if ((type) & MTX_RLIKELY)
433			_getlock_sleep(mpp, CURTHD, (type) & MTX_HARDOPTS);
434		else
435			_getlock_norecurse(mpp, CURTHD, (type) & MTX_HARDOPTS);
436	}
437done:
438	WITNESS_ENTER(mpp, type, file, line);
439	if (((type) & MTX_QUIET) == 0)
440		CTR5(KTR_LOCK, STR_mtx_enter_fmt,
441		    mpp->mtx_description, mpp, mpp->mtx_recurse, file, line);
442
443}
444
445/*
446 * Attempt to get MTX_DEF lock, return non-zero if lock acquired.
447 *
448 * XXX DOES NOT HANDLE RECURSION
449 */
450int
451_mtx_try_enter(struct mtx *mtxp, int type, const char *file, int line)
452{
453	struct mtx	*const mpp = mtxp;
454	int	rval;
455
456	rval = _obtain_lock(mpp, CURTHD);
457#ifdef WITNESS
458	if (rval && mpp->mtx_witness != NULL) {
459		MPASS(mpp->mtx_recurse == 0);
460		witness_try_enter(mpp, type, file, line);
461	}
462#endif	/* WITNESS */
463	if (((type) & MTX_QUIET) == 0)
464		CTR5(KTR_LOCK, STR_mtx_try_enter_fmt,
465		    mpp->mtx_description, mpp, rval, file, line);
466
467	return rval;
468}
469
470/*
471 * Release lock m.
472 */
473void
474_mtx_exit(struct mtx *mtxp, int type, const char *file, int line)
475{
476	struct mtx	*const mpp = mtxp;
477
478	MPASS4(mtx_owned(mpp), STR_mtx_owned, file, line);
479	WITNESS_EXIT(mpp, type, file, line);
480	if (((type) & MTX_QUIET) == 0)
481		CTR5(KTR_LOCK, STR_mtx_exit_fmt,
482		    mpp->mtx_description, mpp, mpp->mtx_recurse, file, line);
483	if ((type) & MTX_SPIN) {
484		if ((type) & MTX_NORECURSE) {
485			int mtx_intr = mpp->mtx_saveintr;
486
487			MPASS4(mpp->mtx_recurse == 0, STR_mtx_recurse,
488			    file, line);
489			_release_lock_quick(mpp);
490			if (((type) & MTX_TOPHALF) == 0) {
491				if ((type) & MTX_FIRST) {
492					ASS_IDIS;
493					enable_intr();
494				} else
495					restore_intr(mtx_intr);
496			}
497		} else {
498			if (((type & MTX_TOPHALF) == 0) &&
499			    (type & MTX_FIRST)) {
500				ASS_IDIS;
501				ASS_SIEN(mpp);
502			}
503			_exitlock_spin(mpp);
504		}
505	} else {
506		/* Handle sleep locks */
507		if ((type) & MTX_RLIKELY)
508			_exitlock(mpp, CURTHD, (type) & MTX_HARDOPTS);
509		else {
510			_exitlock_norecurse(mpp, CURTHD,
511			    (type) & MTX_HARDOPTS);
512		}
513	}
514}
515
516void
517mtx_enter_hard(struct mtx *m, int type, int saveintr)
518{
519	struct proc *p = CURPROC;
520
521	KASSERT(p != NULL, ("curproc is NULL in mutex"));
522
523	switch (type) {
524	case MTX_DEF:
525		if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)p) {
526			m->mtx_recurse++;
527			atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
528			if ((type & MTX_QUIET) == 0)
529				CTR1(KTR_LOCK, "mtx_enter: 0x%p recurse", m);
530			return;
531		}
532		if ((type & MTX_QUIET) == 0)
533			CTR3(KTR_LOCK,
534			    "mtx_enter: 0x%p contested (lock=%p) [0x%p]",
535			    m, (void *)m->mtx_lock, (void *)RETIP(m));
536
537		/*
538		 * Save our priority.  Even though p_nativepri is protected
539		 * by sched_lock, we don't obtain it here as it can be
540		 * expensive.  Since this is the only place p_nativepri is
541		 * set, and since two CPUs will not be executing the same
542		 * process concurrently, we know that no other CPU is going
543		 * to be messing with this.  Also, p_nativepri is only read
544		 * when we are blocked on a mutex, so that can't be happening
545		 * right now either.
546		 */
547		p->p_nativepri = p->p_priority;
548		while (!_obtain_lock(m, p)) {
549			uintptr_t v;
550			struct proc *p1;
551
552			mtx_enter(&sched_lock, MTX_SPIN | MTX_RLIKELY);
553			/*
554			 * check if the lock has been released while
555			 * waiting for the schedlock.
556			 */
557			if ((v = m->mtx_lock) == MTX_UNOWNED) {
558				mtx_exit(&sched_lock, MTX_SPIN);
559				continue;
560			}
561			/*
562			 * The mutex was marked contested on release. This
563			 * means that there are processes blocked on it.
564			 */
565			if (v == MTX_CONTESTED) {
566				p1 = TAILQ_FIRST(&m->mtx_blocked);
567				KASSERT(p1 != NULL, ("contested mutex has no contesters"));
568				KASSERT(p != NULL, ("curproc is NULL for contested mutex"));
569				m->mtx_lock = (uintptr_t)p | MTX_CONTESTED;
570				if (p1->p_priority < p->p_priority) {
571					SET_PRIO(p, p1->p_priority);
572				}
573				mtx_exit(&sched_lock, MTX_SPIN);
574				return;
575			}
576			/*
577			 * If the mutex isn't already contested and
578			 * a failure occurs setting the contested bit the
579			 * mutex was either release or the
580			 * state of the RECURSION bit changed.
581			 */
582			if ((v & MTX_CONTESTED) == 0 &&
583			    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
584				               (void *)(v | MTX_CONTESTED))) {
585				mtx_exit(&sched_lock, MTX_SPIN);
586				continue;
587			}
588
589			/* We definitely have to sleep for this lock */
590			mtx_assert(m, MA_NOTOWNED);
591
592#ifdef notyet
593			/*
594			 * If we're borrowing an interrupted thread's VM
595			 * context must clean up before going to sleep.
596			 */
597			if (p->p_flag & (P_ITHD | P_SITHD)) {
598				ithd_t *it = (ithd_t *)p;
599
600				if (it->it_interrupted) {
601					if ((type & MTX_QUIET) == 0)
602						CTR2(KTR_LOCK,
603					    "mtx_enter: 0x%x interrupted 0x%x",
604						    it, it->it_interrupted);
605					intr_thd_fixup(it);
606				}
607			}
608#endif
609
610			/* Put us on the list of procs blocked on this mutex */
611			if (TAILQ_EMPTY(&m->mtx_blocked)) {
612				p1 = (struct proc *)(m->mtx_lock &
613						     MTX_FLAGMASK);
614				LIST_INSERT_HEAD(&p1->p_contested, m,
615						 mtx_contested);
616				TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
617			} else {
618				TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq)
619					if (p1->p_priority > p->p_priority)
620						break;
621				if (p1)
622					TAILQ_INSERT_BEFORE(p1, p, p_procq);
623				else
624					TAILQ_INSERT_TAIL(&m->mtx_blocked, p,
625							  p_procq);
626			}
627
628			p->p_blocked = m;	/* Who we're blocked on */
629			p->p_mtxname = m->mtx_description;
630			p->p_stat = SMTX;
631#if 0
632			propagate_priority(p);
633#endif
634			if ((type & MTX_QUIET) == 0)
635				CTR3(KTR_LOCK,
636				    "mtx_enter: p 0x%p blocked on [0x%p] %s",
637				    p, m, m->mtx_description);
638			mi_switch();
639			if ((type & MTX_QUIET) == 0)
640				CTR3(KTR_LOCK,
641			    "mtx_enter: p 0x%p free from blocked on [0x%p] %s",
642				    p, m, m->mtx_description);
643			mtx_exit(&sched_lock, MTX_SPIN);
644		}
645		return;
646	case MTX_SPIN:
647	case MTX_SPIN | MTX_FIRST:
648	case MTX_SPIN | MTX_TOPHALF:
649	    {
650		int i = 0;
651
652		if (m->mtx_lock == (uintptr_t)p) {
653			m->mtx_recurse++;
654			return;
655		}
656		if ((type & MTX_QUIET) == 0)
657			CTR1(KTR_LOCK, "mtx_enter: %p spinning", m);
658		for (;;) {
659			if (_obtain_lock(m, p))
660				break;
661			while (m->mtx_lock != MTX_UNOWNED) {
662				if (i++ < 1000000)
663					continue;
664				if (i++ < 6000000)
665					DELAY (1);
666#ifdef DDB
667				else if (!db_active)
668#else
669				else
670#endif
671					panic(
672				"spin lock %s held by 0x%p for > 5 seconds",
673					    m->mtx_description,
674					    (void *)m->mtx_lock);
675			}
676		}
677
678#ifdef MUTEX_DEBUG
679		if (type != MTX_SPIN)
680			m->mtx_saveintr = 0xbeefface;
681		else
682#endif
683			m->mtx_saveintr = saveintr;
684		if ((type & MTX_QUIET) == 0)
685			CTR1(KTR_LOCK, "mtx_enter: 0x%p spin done", m);
686		return;
687	    }
688	}
689}
690
691void
692mtx_exit_hard(struct mtx *m, int type)
693{
694	struct proc *p, *p1;
695	struct mtx *m1;
696	int pri;
697
698	p = CURPROC;
699	switch (type) {
700	case MTX_DEF:
701	case MTX_DEF | MTX_NOSWITCH:
702		if (mtx_recursed(m)) {
703			if (--(m->mtx_recurse) == 0)
704				atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
705			if ((type & MTX_QUIET) == 0)
706				CTR1(KTR_LOCK, "mtx_exit: 0x%p unrecurse", m);
707			return;
708		}
709		mtx_enter(&sched_lock, MTX_SPIN);
710		if ((type & MTX_QUIET) == 0)
711			CTR1(KTR_LOCK, "mtx_exit: 0x%p contested", m);
712		p1 = TAILQ_FIRST(&m->mtx_blocked);
713		MPASS(p->p_magic == P_MAGIC);
714		MPASS(p1->p_magic == P_MAGIC);
715		TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq);
716		if (TAILQ_EMPTY(&m->mtx_blocked)) {
717			LIST_REMOVE(m, mtx_contested);
718			_release_lock_quick(m);
719			if ((type & MTX_QUIET) == 0)
720				CTR1(KTR_LOCK, "mtx_exit: 0x%p not held", m);
721		} else
722			atomic_store_rel_ptr(&m->mtx_lock,
723			    (void *)MTX_CONTESTED);
724		pri = MAXPRI;
725		LIST_FOREACH(m1, &p->p_contested, mtx_contested) {
726			int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_priority;
727			if (cp < pri)
728				pri = cp;
729		}
730		if (pri > p->p_nativepri)
731			pri = p->p_nativepri;
732		SET_PRIO(p, pri);
733		if ((type & MTX_QUIET) == 0)
734			CTR2(KTR_LOCK,
735			    "mtx_exit: 0x%p contested setrunqueue 0x%p", m, p1);
736		p1->p_blocked = NULL;
737		p1->p_mtxname = NULL;
738		p1->p_stat = SRUN;
739		setrunqueue(p1);
740		if ((type & MTX_NOSWITCH) == 0 && p1->p_priority < pri) {
741#ifdef notyet
742			if (p->p_flag & (P_ITHD | P_SITHD)) {
743				ithd_t *it = (ithd_t *)p;
744
745				if (it->it_interrupted) {
746					if ((type & MTX_QUIET) == 0)
747						CTR2(KTR_LOCK,
748					    "mtx_exit: 0x%x interruped 0x%x",
749						    it, it->it_interrupted);
750					intr_thd_fixup(it);
751				}
752			}
753#endif
754			setrunqueue(p);
755			if ((type & MTX_QUIET) == 0)
756				CTR2(KTR_LOCK,
757				    "mtx_exit: 0x%p switching out lock=0x%p",
758				    m, (void *)m->mtx_lock);
759			mi_switch();
760			if ((type & MTX_QUIET) == 0)
761				CTR2(KTR_LOCK,
762				    "mtx_exit: 0x%p resuming lock=0x%p",
763				    m, (void *)m->mtx_lock);
764		}
765		mtx_exit(&sched_lock, MTX_SPIN);
766		break;
767	case MTX_SPIN:
768	case MTX_SPIN | MTX_FIRST:
769		if (mtx_recursed(m)) {
770			m->mtx_recurse--;
771			return;
772		}
773		MPASS(mtx_owned(m));
774		_release_lock_quick(m);
775		if (type & MTX_FIRST)
776			enable_intr();	/* XXX is this kosher? */
777		else {
778			MPASS(m->mtx_saveintr != 0xbeefface);
779			restore_intr(m->mtx_saveintr);
780		}
781		break;
782	case MTX_SPIN | MTX_TOPHALF:
783		if (mtx_recursed(m)) {
784			m->mtx_recurse--;
785			return;
786		}
787		MPASS(mtx_owned(m));
788		_release_lock_quick(m);
789		break;
790	default:
791		panic("mtx_exit_hard: unsupported type 0x%x\n", type);
792	}
793}
794
795#ifdef INVARIANTS
796void
797mtx_assert(struct mtx *m, int what)
798{
799	switch ((what)) {
800	case MA_OWNED:
801	case MA_OWNED | MA_RECURSED:
802	case MA_OWNED | MA_NOTRECURSED:
803		if (!mtx_owned((m)))
804			panic("mutex %s not owned at %s:%d",
805			    (m)->mtx_description, __FILE__, __LINE__);
806		if (mtx_recursed((m))) {
807			if (((what) & MA_NOTRECURSED) != 0)
808				panic("mutex %s recursed at %s:%d",
809				    (m)->mtx_description, __FILE__, __LINE__);
810		} else if (((what) & MA_RECURSED) != 0) {
811			panic("mutex %s unrecursed at %s:%d",
812			    (m)->mtx_description, __FILE__, __LINE__);
813		}
814		break;
815	case MA_NOTOWNED:
816		if (mtx_owned((m)))
817			panic("mutex %s owned at %s:%d",
818			    (m)->mtx_description, __FILE__, __LINE__);
819		break;
820	default:
821		panic("unknown mtx_assert at %s:%d", __FILE__, __LINE__);
822	}
823}
824#endif
825
826#define MV_DESTROY	0	/* validate before destory */
827#define MV_INIT		1	/* validate before init */
828
829#ifdef MUTEX_DEBUG
830
831int mtx_validate __P((struct mtx *, int));
832
833int
834mtx_validate(struct mtx *m, int when)
835{
836	struct mtx *mp;
837	int i;
838	int retval = 0;
839
840#ifdef WITNESS
841	if (witness_cold)
842		return 0;
843#endif
844	if (m == &all_mtx || cold)
845		return 0;
846
847	mtx_enter(&all_mtx, MTX_DEF);
848/*
849 * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
850 * we can re-enable the kernacc() checks.
851 */
852#ifndef __alpha__
853	MPASS(kernacc((caddr_t)all_mtx.mtx_next, sizeof(uintptr_t),
854	    VM_PROT_READ) == 1);
855#endif
856	MPASS(all_mtx.mtx_next->mtx_prev == &all_mtx);
857	for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
858#ifndef __alpha__
859		if (kernacc((caddr_t)mp->mtx_next, sizeof(uintptr_t),
860		    VM_PROT_READ) != 1) {
861			panic("mtx_validate: mp=%p mp->mtx_next=%p",
862			    mp, mp->mtx_next);
863		}
864#endif
865		i++;
866		if (i > mtx_cur_cnt) {
867			panic("mtx_validate: too many in chain, known=%d\n",
868			    mtx_cur_cnt);
869		}
870	}
871	MPASS(i == mtx_cur_cnt);
872	switch (when) {
873	case MV_DESTROY:
874		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
875			if (mp == m)
876				break;
877		MPASS(mp == m);
878		break;
879	case MV_INIT:
880		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
881		if (mp == m) {
882			/*
883			 * Not good. This mutex already exists.
884			 */
885			printf("re-initing existing mutex %s\n",
886			    m->mtx_description);
887			MPASS(m->mtx_lock == MTX_UNOWNED);
888			retval = 1;
889		}
890	}
891	mtx_exit(&all_mtx, MTX_DEF);
892	return (retval);
893}
894#endif
895
896void
897mtx_init(struct mtx *m, const char *t, int flag)
898{
899	if ((flag & MTX_QUIET) == 0)
900		CTR2(KTR_LOCK, "mtx_init 0x%p (%s)", m, t);
901#ifdef MUTEX_DEBUG
902	if (mtx_validate(m, MV_INIT))	/* diagnostic and error correction */
903		return;
904#endif
905
906	bzero((void *)m, sizeof *m);
907	TAILQ_INIT(&m->mtx_blocked);
908#ifdef WITNESS
909	if (!witness_cold) {
910		/* XXX - should not use DEVBUF */
911		m->mtx_union.mtxu_debug = malloc(sizeof(struct mtx_debug),
912		    M_DEVBUF, M_NOWAIT | M_ZERO);
913		MPASS(m->mtx_union.mtxu_debug != NULL);
914
915		m->mtx_description = t;
916	} else {
917		/*
918		 * Save a pointer to the description so that witness_fixup()
919		 * can properly initialize this mutex later on.
920		 */
921		m->mtx_union.mtxu_description = t;
922	}
923#else
924	m->mtx_description = t;
925#endif
926
927	m->mtx_flags = flag;
928	m->mtx_lock = MTX_UNOWNED;
929	/* Put on all mutex queue */
930	mtx_enter(&all_mtx, MTX_DEF);
931	m->mtx_next = &all_mtx;
932	m->mtx_prev = all_mtx.mtx_prev;
933	m->mtx_prev->mtx_next = m;
934	all_mtx.mtx_prev = m;
935	if (++mtx_cur_cnt > mtx_max_cnt)
936		mtx_max_cnt = mtx_cur_cnt;
937	mtx_exit(&all_mtx, MTX_DEF);
938#ifdef WITNESS
939	if (!witness_cold)
940		witness_init(m, flag);
941#endif
942}
943
944void
945mtx_destroy(struct mtx *m)
946{
947
948#ifdef WITNESS
949	KASSERT(!witness_cold, ("%s: Cannot destroy while still cold\n",
950	    __FUNCTION__));
951#endif
952	CTR2(KTR_LOCK, "mtx_destroy 0x%p (%s)", m, m->mtx_description);
953#ifdef MUTEX_DEBUG
954	if (m->mtx_next == NULL)
955		panic("mtx_destroy: %p (%s) already destroyed",
956		    m, m->mtx_description);
957
958	if (!mtx_owned(m)) {
959		MPASS(m->mtx_lock == MTX_UNOWNED);
960	} else {
961		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
962	}
963	mtx_validate(m, MV_DESTROY);		/* diagnostic */
964#endif
965
966#ifdef WITNESS
967	if (m->mtx_witness)
968		witness_destroy(m);
969#endif /* WITNESS */
970
971	/* Remove from the all mutex queue */
972	mtx_enter(&all_mtx, MTX_DEF);
973	m->mtx_next->mtx_prev = m->mtx_prev;
974	m->mtx_prev->mtx_next = m->mtx_next;
975#ifdef MUTEX_DEBUG
976	m->mtx_next = m->mtx_prev = NULL;
977#endif
978#ifdef WITNESS
979	free(m->mtx_union.mtxu_debug, M_DEVBUF);
980	m->mtx_union.mtxu_debug = NULL;
981#endif
982	mtx_cur_cnt--;
983	mtx_exit(&all_mtx, MTX_DEF);
984}
985
986static void
987witness_fixup(void *dummy __unused)
988{
989#ifdef WITNESS
990	struct mtx *mp;
991	const char *description;
992
993	/* Iterate through all mutexes and finish up mutex initialization. */
994	for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
995		description = mp->mtx_union.mtxu_description;
996
997		/* XXX - should not use DEVBUF */
998		mp->mtx_union.mtxu_debug = malloc(sizeof(struct mtx_debug),
999		    M_DEVBUF, M_NOWAIT | M_ZERO);
1000		MPASS(mp->mtx_union.mtxu_debug != NULL);
1001
1002		mp->mtx_description = description;
1003
1004		witness_init(mp, mp->mtx_flags);
1005	}
1006
1007	/* Mark the witness code as being ready for use. */
1008	atomic_store_rel_int(&witness_cold, 0);
1009#endif
1010}
1011SYSINIT(wtnsfxup, SI_SUB_MUTEX, SI_ORDER_FIRST, witness_fixup, NULL)
1012
1013/*
1014 * The non-inlined versions of the mtx_*() functions are always built (above),
1015 * but the witness code depends on the WITNESS kernel option being specified.
1016 */
1017#ifdef WITNESS
1018
1019#define WITNESS_COUNT 200
1020#define	WITNESS_NCHILDREN 2
1021
1022int witness_watch = 1;
1023
1024struct witness {
1025	struct witness	*w_next;
1026	const char	*w_description;
1027	const char	*w_file;
1028	int		 w_line;
1029	struct witness	*w_morechildren;
1030	u_char		 w_childcnt;
1031	u_char		 w_Giant_squawked:1;
1032	u_char		 w_other_squawked:1;
1033	u_char		 w_same_squawked:1;
1034	u_char		 w_sleep:1;	/* MTX_DEF type mutex. */
1035	u_char		 w_spin:1;	/* MTX_SPIN type mutex. */
1036	u_char		 w_recurse:1;	/* MTX_RECURSE mutex option. */
1037	u_int		 w_level;
1038	struct witness	*w_children[WITNESS_NCHILDREN];
1039};
1040
1041struct witness_blessed {
1042	char 	*b_lock1;
1043	char	*b_lock2;
1044};
1045
1046#ifdef DDB
1047/*
1048 * When DDB is enabled and witness_ddb is set to 1, it will cause the system to
1049 * drop into kdebug() when:
1050 *	- a lock heirarchy violation occurs
1051 *	- locks are held when going to sleep.
1052 */
1053#ifdef WITNESS_DDB
1054int	witness_ddb = 1;
1055#else
1056int	witness_ddb = 0;
1057#endif
1058SYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, "");
1059#endif /* DDB */
1060
1061#ifdef WITNESS_SKIPSPIN
1062int	witness_skipspin = 1;
1063#else
1064int	witness_skipspin = 0;
1065#endif
1066SYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RD, &witness_skipspin, 0,
1067    "");
1068
1069static struct mtx	w_mtx;
1070static struct witness	*w_free;
1071static struct witness	*w_all;
1072static int		 w_inited;
1073static int		 witness_dead;	/* fatal error, probably no memory */
1074
1075static struct witness	 w_data[WITNESS_COUNT];
1076
1077static struct witness	 *enroll __P((const char *description, int flag));
1078static int itismychild __P((struct witness *parent, struct witness *child));
1079static void removechild __P((struct witness *parent, struct witness *child));
1080static int isitmychild __P((struct witness *parent, struct witness *child));
1081static int isitmydescendant __P((struct witness *parent, struct witness *child));
1082static int dup_ok __P((struct witness *));
1083static int blessed __P((struct witness *, struct witness *));
1084static void witness_displaydescendants
1085    __P((void(*)(const char *fmt, ...), struct witness *));
1086static void witness_leveldescendents __P((struct witness *parent, int level));
1087static void witness_levelall __P((void));
1088static struct witness * witness_get __P((void));
1089static void witness_free __P((struct witness *m));
1090
1091
1092static char *ignore_list[] = {
1093	"witness lock",
1094	NULL
1095};
1096
1097static char *spin_order_list[] = {
1098	"sio",
1099	"sched lock",
1100#ifdef __i386__
1101	"clk",
1102#endif
1103	"callout",
1104	/*
1105	 * leaf locks
1106	 */
1107	NULL
1108};
1109
1110static char *order_list[] = {
1111	"uidinfo hash", "uidinfo struct", NULL,
1112	NULL
1113};
1114
1115static char *dup_list[] = {
1116	NULL
1117};
1118
1119static char *sleep_list[] = {
1120	"Giant",
1121	NULL
1122};
1123
1124/*
1125 * Pairs of locks which have been blessed
1126 * Don't complain about order problems with blessed locks
1127 */
1128static struct witness_blessed blessed_list[] = {
1129};
1130static int blessed_count = sizeof(blessed_list) / sizeof(struct witness_blessed);
1131
1132static void
1133witness_init(struct mtx *m, int flag)
1134{
1135	m->mtx_witness = enroll(m->mtx_description, flag);
1136}
1137
1138static void
1139witness_destroy(struct mtx *m)
1140{
1141	struct mtx *m1;
1142	struct proc *p;
1143	p = CURPROC;
1144	for ((m1 = LIST_FIRST(&p->p_heldmtx)); m1 != NULL;
1145		m1 = LIST_NEXT(m1, mtx_held)) {
1146		if (m1 == m) {
1147			LIST_REMOVE(m, mtx_held);
1148			break;
1149		}
1150	}
1151	return;
1152
1153}
1154
1155static void
1156witness_display(void(*prnt)(const char *fmt, ...))
1157{
1158	struct witness *w, *w1;
1159
1160	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1161	witness_levelall();
1162
1163	for (w = w_all; w; w = w->w_next) {
1164		if (w->w_file == NULL)
1165			continue;
1166		for (w1 = w_all; w1; w1 = w1->w_next) {
1167			if (isitmychild(w1, w))
1168				break;
1169		}
1170		if (w1 != NULL)
1171			continue;
1172		/*
1173		 * This lock has no anscestors, display its descendants.
1174		 */
1175		witness_displaydescendants(prnt, w);
1176	}
1177	prnt("\nMutex which were never acquired\n");
1178	for (w = w_all; w; w = w->w_next) {
1179		if (w->w_file != NULL)
1180			continue;
1181		prnt("%s\n", w->w_description);
1182	}
1183}
1184
1185void
1186witness_enter(struct mtx *m, int flags, const char *file, int line)
1187{
1188	struct witness *w, *w1;
1189	struct mtx *m1;
1190	struct proc *p;
1191	int i;
1192#ifdef DDB
1193	int go_into_ddb = 0;
1194#endif /* DDB */
1195
1196	if (witness_cold || m->mtx_witness == NULL || panicstr)
1197		return;
1198	w = m->mtx_witness;
1199	p = CURPROC;
1200
1201	if (flags & MTX_SPIN) {
1202		if (!(w->w_spin))
1203			panic("mutex_enter: MTX_SPIN on MTX_DEF mutex %s @"
1204			    " %s:%d", m->mtx_description, file, line);
1205		if (mtx_recursed(m)) {
1206			if (!(w->w_recurse))
1207				panic("mutex_enter: recursion on non-recursive"
1208				    " mutex %s @ %s:%d", m->mtx_description,
1209				    file, line);
1210			return;
1211		}
1212		mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
1213		i = PCPU_GET(witness_spin_check);
1214		if (i != 0 && w->w_level < i) {
1215			mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1216			panic("mutex_enter(%s:%x, MTX_SPIN) out of order @"
1217			    " %s:%d already holding %s:%x",
1218			    m->mtx_description, w->w_level, file, line,
1219			    spin_order_list[ffs(i)-1], i);
1220		}
1221		PCPU_SET(witness_spin_check, i | w->w_level);
1222		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1223		w->w_file = file;
1224		w->w_line = line;
1225		m->mtx_line = line;
1226		m->mtx_file = file;
1227		return;
1228	}
1229	if (w->w_spin)
1230		panic("mutex_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1231		    m->mtx_description, file, line);
1232
1233	if (mtx_recursed(m)) {
1234		if (!(w->w_recurse))
1235			panic("mutex_enter: recursion on non-recursive"
1236			    " mutex %s @ %s:%d", m->mtx_description,
1237			    file, line);
1238		return;
1239	}
1240	if (witness_dead)
1241		goto out;
1242	if (cold)
1243		goto out;
1244
1245	if (!mtx_legal2block())
1246		panic("blockable mtx_enter() of %s when not legal @ %s:%d",
1247			    m->mtx_description, file, line);
1248	/*
1249	 * Is this the first mutex acquired
1250	 */
1251	if ((m1 = LIST_FIRST(&p->p_heldmtx)) == NULL)
1252		goto out;
1253
1254	if ((w1 = m1->mtx_witness) == w) {
1255		if (w->w_same_squawked || dup_ok(w))
1256			goto out;
1257		w->w_same_squawked = 1;
1258		printf("acquring duplicate lock of same type: \"%s\"\n",
1259			m->mtx_description);
1260		printf(" 1st @ %s:%d\n", w->w_file, w->w_line);
1261		printf(" 2nd @ %s:%d\n", file, line);
1262#ifdef DDB
1263		go_into_ddb = 1;
1264#endif /* DDB */
1265		goto out;
1266	}
1267	MPASS(!mtx_owned(&w_mtx));
1268	mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
1269	/*
1270	 * If we have a known higher number just say ok
1271	 */
1272	if (witness_watch > 1 && w->w_level > w1->w_level) {
1273		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1274		goto out;
1275	}
1276	if (isitmydescendant(m1->mtx_witness, w)) {
1277		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1278		goto out;
1279	}
1280	for (i = 0; m1 != NULL; m1 = LIST_NEXT(m1, mtx_held), i++) {
1281
1282		MPASS(i < 200);
1283		w1 = m1->mtx_witness;
1284		if (isitmydescendant(w, w1)) {
1285			mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1286			if (blessed(w, w1))
1287				goto out;
1288			if (m1 == &Giant) {
1289				if (w1->w_Giant_squawked)
1290					goto out;
1291				else
1292					w1->w_Giant_squawked = 1;
1293			} else {
1294				if (w1->w_other_squawked)
1295					goto out;
1296				else
1297					w1->w_other_squawked = 1;
1298			}
1299			printf("lock order reversal\n");
1300			printf(" 1st %s last acquired @ %s:%d\n",
1301			    w->w_description, w->w_file, w->w_line);
1302			printf(" 2nd %p %s @ %s:%d\n",
1303			    m1, w1->w_description, w1->w_file, w1->w_line);
1304			printf(" 3rd %p %s @ %s:%d\n",
1305			    m, w->w_description, file, line);
1306#ifdef DDB
1307			go_into_ddb = 1;
1308#endif /* DDB */
1309			goto out;
1310		}
1311	}
1312	m1 = LIST_FIRST(&p->p_heldmtx);
1313	if (!itismychild(m1->mtx_witness, w))
1314		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1315
1316out:
1317#ifdef DDB
1318	if (witness_ddb && go_into_ddb)
1319		Debugger("witness_enter");
1320#endif /* DDB */
1321	w->w_file = file;
1322	w->w_line = line;
1323	m->mtx_line = line;
1324	m->mtx_file = file;
1325
1326	/*
1327	 * If this pays off it likely means that a mutex being witnessed
1328	 * is acquired in hardclock. Put it in the ignore list. It is
1329	 * likely not the mutex this assert fails on.
1330	 */
1331	MPASS(m->mtx_held.le_prev == NULL);
1332	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
1333}
1334
1335void
1336witness_try_enter(struct mtx *m, int flags, const char *file, int line)
1337{
1338	struct proc *p;
1339	struct witness *w = m->mtx_witness;
1340
1341	if (witness_cold)
1342		return;
1343	if (panicstr)
1344		return;
1345	if (flags & MTX_SPIN) {
1346		if (!(w->w_spin))
1347			panic("mutex_try_enter: "
1348			    "MTX_SPIN on MTX_DEF mutex %s @ %s:%d",
1349			    m->mtx_description, file, line);
1350		if (mtx_recursed(m)) {
1351			if (!(w->w_recurse))
1352				panic("mutex_try_enter: recursion on"
1353				    " non-recursive mutex %s @ %s:%d",
1354				    m->mtx_description, file, line);
1355			return;
1356		}
1357		mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
1358		PCPU_SET(witness_spin_check,
1359		    PCPU_GET(witness_spin_check) | w->w_level);
1360		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1361		w->w_file = file;
1362		w->w_line = line;
1363		m->mtx_line = line;
1364		m->mtx_file = file;
1365		return;
1366	}
1367
1368	if (w->w_spin)
1369		panic("mutex_try_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1370		    m->mtx_description, file, line);
1371
1372	if (mtx_recursed(m)) {
1373		if (!(w->w_recurse))
1374			panic("mutex_try_enter: recursion on non-recursive"
1375			    " mutex %s @ %s:%d", m->mtx_description, file,
1376			    line);
1377		return;
1378	}
1379	w->w_file = file;
1380	w->w_line = line;
1381	m->mtx_line = line;
1382	m->mtx_file = file;
1383	p = CURPROC;
1384	MPASS(m->mtx_held.le_prev == NULL);
1385	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
1386}
1387
1388void
1389witness_exit(struct mtx *m, int flags, const char *file, int line)
1390{
1391	struct witness *w;
1392
1393	if (witness_cold || m->mtx_witness == NULL || panicstr)
1394		return;
1395	w = m->mtx_witness;
1396
1397	if (flags & MTX_SPIN) {
1398		if (!(w->w_spin))
1399			panic("mutex_exit: MTX_SPIN on MTX_DEF mutex %s @"
1400			    " %s:%d", m->mtx_description, file, line);
1401		if (mtx_recursed(m)) {
1402			if (!(w->w_recurse))
1403				panic("mutex_exit: recursion on non-recursive"
1404				    " mutex %s @ %s:%d", m->mtx_description,
1405				    file, line);
1406			return;
1407		}
1408		mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
1409		PCPU_SET(witness_spin_check,
1410		    PCPU_GET(witness_spin_check) & ~w->w_level);
1411		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1412		return;
1413	}
1414	if (w->w_spin)
1415		panic("mutex_exit: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1416		    m->mtx_description, file, line);
1417
1418	if (mtx_recursed(m)) {
1419		if (!(w->w_recurse))
1420			panic("mutex_exit: recursion on non-recursive"
1421			    " mutex %s @ %s:%d", m->mtx_description,
1422			    file, line);
1423		return;
1424	}
1425
1426	if ((flags & MTX_NOSWITCH) == 0 && !mtx_legal2block() && !cold)
1427		panic("switchable mtx_exit() of %s when not legal @ %s:%d",
1428			    m->mtx_description, file, line);
1429	LIST_REMOVE(m, mtx_held);
1430	m->mtx_held.le_prev = NULL;
1431}
1432
1433int
1434witness_sleep(int check_only, struct mtx *mtx, const char *file, int line)
1435{
1436	struct mtx *m;
1437	struct proc *p;
1438	char **sleep;
1439	int n = 0;
1440
1441	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1442	p = CURPROC;
1443	for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
1444	    m = LIST_NEXT(m, mtx_held)) {
1445		if (m == mtx)
1446			continue;
1447		for (sleep = sleep_list; *sleep!= NULL; sleep++)
1448			if (strcmp(m->mtx_description, *sleep) == 0)
1449				goto next;
1450		printf("%s:%d: %s with \"%s\" locked from %s:%d\n",
1451			file, line, check_only ? "could sleep" : "sleeping",
1452			m->mtx_description,
1453			m->mtx_witness->w_file, m->mtx_witness->w_line);
1454		n++;
1455	next:
1456	}
1457#ifdef DDB
1458	if (witness_ddb && n)
1459		Debugger("witness_sleep");
1460#endif /* DDB */
1461	return (n);
1462}
1463
1464static struct witness *
1465enroll(const char *description, int flag)
1466{
1467	int i;
1468	struct witness *w, *w1;
1469	char **ignore;
1470	char **order;
1471
1472	if (!witness_watch)
1473		return (NULL);
1474	for (ignore = ignore_list; *ignore != NULL; ignore++)
1475		if (strcmp(description, *ignore) == 0)
1476			return (NULL);
1477
1478	if (w_inited == 0) {
1479		mtx_init(&w_mtx, "witness lock", MTX_SPIN);
1480		for (i = 0; i < WITNESS_COUNT; i++) {
1481			w = &w_data[i];
1482			witness_free(w);
1483		}
1484		w_inited = 1;
1485		for (order = order_list; *order != NULL; order++) {
1486			w = enroll(*order, MTX_DEF);
1487			w->w_file = "order list";
1488			for (order++; *order != NULL; order++) {
1489				w1 = enroll(*order, MTX_DEF);
1490				w1->w_file = "order list";
1491				itismychild(w, w1);
1492				w = w1;
1493    	    	    	}
1494		}
1495	}
1496	if ((flag & MTX_SPIN) && witness_skipspin)
1497		return (NULL);
1498	mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
1499	for (w = w_all; w; w = w->w_next) {
1500		if (strcmp(description, w->w_description) == 0) {
1501			mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1502			return (w);
1503		}
1504	}
1505	if ((w = witness_get()) == NULL)
1506		return (NULL);
1507	w->w_next = w_all;
1508	w_all = w;
1509	w->w_description = description;
1510	mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1511	if (flag & MTX_SPIN) {
1512		w->w_spin = 1;
1513
1514		i = 1;
1515		for (order = spin_order_list; *order != NULL; order++) {
1516			if (strcmp(description, *order) == 0)
1517				break;
1518			i <<= 1;
1519		}
1520		if (*order == NULL)
1521			panic("spin lock %s not in order list", description);
1522		w->w_level = i;
1523	} else
1524		w->w_sleep = 1;
1525
1526	if (flag & MTX_RECURSE)
1527		w->w_recurse = 1;
1528
1529	return (w);
1530}
1531
1532static int
1533itismychild(struct witness *parent, struct witness *child)
1534{
1535	static int recursed;
1536
1537	/*
1538	 * Insert "child" after "parent"
1539	 */
1540	while (parent->w_morechildren)
1541		parent = parent->w_morechildren;
1542
1543	if (parent->w_childcnt == WITNESS_NCHILDREN) {
1544		if ((parent->w_morechildren = witness_get()) == NULL)
1545			return (1);
1546		parent = parent->w_morechildren;
1547	}
1548	MPASS(child != NULL);
1549	parent->w_children[parent->w_childcnt++] = child;
1550	/*
1551	 * now prune whole tree
1552	 */
1553	if (recursed)
1554		return (0);
1555	recursed = 1;
1556	for (child = w_all; child != NULL; child = child->w_next) {
1557		for (parent = w_all; parent != NULL;
1558		    parent = parent->w_next) {
1559			if (!isitmychild(parent, child))
1560				continue;
1561			removechild(parent, child);
1562			if (isitmydescendant(parent, child))
1563				continue;
1564			itismychild(parent, child);
1565		}
1566	}
1567	recursed = 0;
1568	witness_levelall();
1569	return (0);
1570}
1571
1572static void
1573removechild(struct witness *parent, struct witness *child)
1574{
1575	struct witness *w, *w1;
1576	int i;
1577
1578	for (w = parent; w != NULL; w = w->w_morechildren)
1579		for (i = 0; i < w->w_childcnt; i++)
1580			if (w->w_children[i] == child)
1581				goto found;
1582	return;
1583found:
1584	for (w1 = w; w1->w_morechildren != NULL; w1 = w1->w_morechildren)
1585		continue;
1586	w->w_children[i] = w1->w_children[--w1->w_childcnt];
1587	MPASS(w->w_children[i] != NULL);
1588
1589	if (w1->w_childcnt != 0)
1590		return;
1591
1592	if (w1 == parent)
1593		return;
1594	for (w = parent; w->w_morechildren != w1; w = w->w_morechildren)
1595		continue;
1596	w->w_morechildren = 0;
1597	witness_free(w1);
1598}
1599
1600static int
1601isitmychild(struct witness *parent, struct witness *child)
1602{
1603	struct witness *w;
1604	int i;
1605
1606	for (w = parent; w != NULL; w = w->w_morechildren) {
1607		for (i = 0; i < w->w_childcnt; i++) {
1608			if (w->w_children[i] == child)
1609				return (1);
1610		}
1611	}
1612	return (0);
1613}
1614
1615static int
1616isitmydescendant(struct witness *parent, struct witness *child)
1617{
1618	struct witness *w;
1619	int i;
1620	int j;
1621
1622	for (j = 0, w = parent; w != NULL; w = w->w_morechildren, j++) {
1623		MPASS(j < 1000);
1624		for (i = 0; i < w->w_childcnt; i++) {
1625			if (w->w_children[i] == child)
1626				return (1);
1627		}
1628		for (i = 0; i < w->w_childcnt; i++) {
1629			if (isitmydescendant(w->w_children[i], child))
1630				return (1);
1631		}
1632	}
1633	return (0);
1634}
1635
1636void
1637witness_levelall (void)
1638{
1639	struct witness *w, *w1;
1640
1641	for (w = w_all; w; w = w->w_next)
1642		if (!(w->w_spin))
1643			w->w_level = 0;
1644	for (w = w_all; w; w = w->w_next) {
1645		if (w->w_spin)
1646			continue;
1647		for (w1 = w_all; w1; w1 = w1->w_next) {
1648			if (isitmychild(w1, w))
1649				break;
1650		}
1651		if (w1 != NULL)
1652			continue;
1653		witness_leveldescendents(w, 0);
1654	}
1655}
1656
1657static void
1658witness_leveldescendents(struct witness *parent, int level)
1659{
1660	int i;
1661	struct witness *w;
1662
1663	if (parent->w_level < level)
1664		parent->w_level = level;
1665	level++;
1666	for (w = parent; w != NULL; w = w->w_morechildren)
1667		for (i = 0; i < w->w_childcnt; i++)
1668			witness_leveldescendents(w->w_children[i], level);
1669}
1670
1671static void
1672witness_displaydescendants(void(*prnt)(const char *fmt, ...),
1673			   struct witness *parent)
1674{
1675	struct witness *w;
1676	int i;
1677	int level = parent->w_level;
1678
1679	prnt("%d", level);
1680	if (level < 10)
1681		prnt(" ");
1682	for (i = 0; i < level; i++)
1683		prnt(" ");
1684	prnt("%s", parent->w_description);
1685	if (parent->w_file != NULL) {
1686		prnt(" -- last acquired @ %s", parent->w_file);
1687#ifndef W_USE_WHERE
1688		prnt(":%d", parent->w_line);
1689#endif
1690		prnt("\n");
1691	}
1692
1693	for (w = parent; w != NULL; w = w->w_morechildren)
1694		for (i = 0; i < w->w_childcnt; i++)
1695			    witness_displaydescendants(prnt, w->w_children[i]);
1696    }
1697
1698static int
1699dup_ok(struct witness *w)
1700{
1701	char **dup;
1702
1703	for (dup = dup_list; *dup!= NULL; dup++)
1704		if (strcmp(w->w_description, *dup) == 0)
1705			return (1);
1706	return (0);
1707}
1708
1709static int
1710blessed(struct witness *w1, struct witness *w2)
1711{
1712	int i;
1713	struct witness_blessed *b;
1714
1715	for (i = 0; i < blessed_count; i++) {
1716		b = &blessed_list[i];
1717		if (strcmp(w1->w_description, b->b_lock1) == 0) {
1718			if (strcmp(w2->w_description, b->b_lock2) == 0)
1719				return (1);
1720			continue;
1721		}
1722		if (strcmp(w1->w_description, b->b_lock2) == 0)
1723			if (strcmp(w2->w_description, b->b_lock1) == 0)
1724				return (1);
1725	}
1726	return (0);
1727}
1728
1729static struct witness *
1730witness_get()
1731{
1732	struct witness *w;
1733
1734	if ((w = w_free) == NULL) {
1735		witness_dead = 1;
1736		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1737		printf("witness exhausted\n");
1738		return (NULL);
1739	}
1740	w_free = w->w_next;
1741	bzero(w, sizeof(*w));
1742	return (w);
1743}
1744
1745static void
1746witness_free(struct witness *w)
1747{
1748	w->w_next = w_free;
1749	w_free = w;
1750}
1751
1752int
1753witness_list(struct proc *p)
1754{
1755	struct mtx *m;
1756	int nheld;
1757
1758	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1759	nheld = 0;
1760	for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
1761	    m = LIST_NEXT(m, mtx_held)) {
1762		printf("\t\"%s\" (%p) locked at %s:%d\n",
1763		    m->mtx_description, m,
1764		    m->mtx_witness->w_file, m->mtx_witness->w_line);
1765		nheld++;
1766	}
1767
1768	return (nheld);
1769}
1770
1771void
1772witness_save(struct mtx *m, const char **filep, int *linep)
1773{
1774
1775	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1776	if (m->mtx_witness == NULL)
1777		return;
1778
1779	*filep = m->mtx_witness->w_file;
1780	*linep = m->mtx_witness->w_line;
1781}
1782
1783void
1784witness_restore(struct mtx *m, const char *file, int line)
1785{
1786
1787	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1788	if (m->mtx_witness == NULL)
1789		return;
1790
1791	m->mtx_witness->w_file = file;
1792	m->mtx_witness->w_line = line;
1793}
1794
1795#endif	/* WITNESS */
1796