subr_witness.c revision 71576
1/*-
2 * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 * 3. Berkeley Software Design Inc's name may not be used to endorse or
13 *    promote products derived from this software without specific prior
14 *    written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
29 *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
30 * $FreeBSD: head/sys/kern/subr_witness.c 71576 2001-01-24 12:35:55Z jasone $
31 */
32
33/*
34 *	Main Entry: witness
35 *	Pronunciation: 'wit-n&s
36 *	Function: noun
37 *	Etymology: Middle English witnesse, from Old English witnes knowledge,
38 *	    testimony, witness, from 2wit
39 *	Date: before 12th century
40 *	1 : attestation of a fact or event : TESTIMONY
41 *	2 : one that gives evidence; specifically : one who testifies in
42 *	    a cause or before a judicial tribunal
43 *	3 : one asked to be present at a transaction so as to be able to
44 *	    testify to its having taken place
45 *	4 : one who has personal knowledge of something
46 *	5 a : something serving as evidence or proof : SIGN
47 *	  b : public affirmation by word or example of usually
48 *	      religious faith or conviction <the heroic witness to divine
49 *	      life -- Pilot>
50 *	6 capitalized : a member of the Jehovah's Witnesses
51 */
52
53#include "opt_ddb.h"
54#include "opt_witness.h"
55
56/*
57 * Cause non-inlined mtx_*() to be compiled.
58 * Must be defined early because other system headers may include mutex.h.
59 */
60#define _KERN_MUTEX_C_
61
62#include <sys/param.h>
63#include <sys/bus.h>
64#include <sys/kernel.h>
65#include <sys/malloc.h>
66#include <sys/proc.h>
67#include <sys/sysctl.h>
68#include <sys/systm.h>
69#include <sys/vmmeter.h>
70#include <sys/ktr.h>
71
72#include <machine/atomic.h>
73#include <machine/bus.h>
74#include <machine/clock.h>
75#include <machine/cpu.h>
76
77#include <ddb/ddb.h>
78
79#include <vm/vm.h>
80#include <vm/vm_extern.h>
81
82#include <sys/mutex.h>
83
84/*
85 * Machine independent bits of the mutex implementation
86 */
87
88#ifdef WITNESS
89struct mtx_debug {
90	struct witness	*mtxd_witness;
91	LIST_ENTRY(mtx)	mtxd_held;
92	const char	*mtxd_file;
93	int		mtxd_line;
94};
95
96#define mtx_held	mtx_debug->mtxd_held
97#define	mtx_file	mtx_debug->mtxd_file
98#define	mtx_line	mtx_debug->mtxd_line
99#define	mtx_witness	mtx_debug->mtxd_witness
100#endif	/* WITNESS */
101
102/*
103 * Assembly macros
104 *------------------------------------------------------------------------------
105 */
106
107#define	_V(x)	__STRING(x)
108
109/*
110 * Default, unoptimized mutex micro-operations
111 */
112
113#ifndef _obtain_lock
114/* Actually obtain mtx_lock */
115#define _obtain_lock(mp, tid)						\
116	atomic_cmpset_acq_ptr(&(mp)->mtx_lock, (void *)MTX_UNOWNED, (tid))
117#endif
118
119#ifndef _release_lock
120/* Actually release mtx_lock */
121#define _release_lock(mp, tid)		       				\
122	atomic_cmpset_rel_ptr(&(mp)->mtx_lock, (tid), (void *)MTX_UNOWNED)
123#endif
124
125#ifndef _release_lock_quick
126/* Actually release mtx_lock quickly assuming that we own it */
127#define	_release_lock_quick(mp) 					\
128	atomic_store_rel_ptr(&(mp)->mtx_lock, (void *)MTX_UNOWNED)
129#endif
130
131#ifndef _getlock_sleep
132/* Get a sleep lock, deal with recursion inline. */
133#define	_getlock_sleep(mp, tid, type) do {				\
134	if (!_obtain_lock(mp, tid)) {					\
135		if (((mp)->mtx_lock & MTX_FLAGMASK) != ((uintptr_t)(tid)))\
136			mtx_enter_hard(mp, (type) & MTX_HARDOPTS, 0);	\
137		else {							\
138			atomic_set_ptr(&(mp)->mtx_lock, MTX_RECURSED);	\
139			(mp)->mtx_recurse++;				\
140		}							\
141	}								\
142} while (0)
143#endif
144
145#ifndef _getlock_spin_block
146/* Get a spin lock, handle recursion inline (as the less common case) */
147#define	_getlock_spin_block(mp, tid, type) do {				\
148	u_int _mtx_intr = save_intr();					\
149	disable_intr();							\
150	if (!_obtain_lock(mp, tid))					\
151		mtx_enter_hard(mp, (type) & MTX_HARDOPTS, _mtx_intr);	\
152	else								\
153		(mp)->mtx_saveintr = _mtx_intr;				\
154} while (0)
155#endif
156
157#ifndef _getlock_norecurse
158/*
159 * Get a lock without any recursion handling. Calls the hard enter function if
160 * we can't get it inline.
161 */
162#define	_getlock_norecurse(mp, tid, type) do {				\
163	if (!_obtain_lock(mp, tid))					\
164		mtx_enter_hard((mp), (type) & MTX_HARDOPTS, 0);		\
165} while (0)
166#endif
167
168#ifndef _exitlock_norecurse
169/*
170 * Release a sleep lock assuming we haven't recursed on it, recursion is handled
171 * in the hard function.
172 */
173#define	_exitlock_norecurse(mp, tid, type) do {				\
174	if (!_release_lock(mp, tid))					\
175		mtx_exit_hard((mp), (type) & MTX_HARDOPTS);		\
176} while (0)
177#endif
178
179#ifndef _exitlock
180/*
181 * Release a sleep lock when its likely we recursed (the code to
182 * deal with simple recursion is inline).
183 */
184#define	_exitlock(mp, tid, type) do {					\
185	if (!_release_lock(mp, tid)) {					\
186		if ((mp)->mtx_lock & MTX_RECURSED) {			\
187			if (--((mp)->mtx_recurse) == 0)			\
188				atomic_clear_ptr(&(mp)->mtx_lock,	\
189				    MTX_RECURSED);			\
190		} else {						\
191			mtx_exit_hard((mp), (type) & MTX_HARDOPTS);	\
192		}							\
193	}								\
194} while (0)
195#endif
196
197#ifndef _exitlock_spin
198/* Release a spin lock (with possible recursion). */
199#define	_exitlock_spin(mp) do {						\
200	if (!mtx_recursed((mp))) {					\
201		int _mtx_intr = (mp)->mtx_saveintr;			\
202									\
203		_release_lock_quick(mp);				\
204		restore_intr(_mtx_intr);				\
205	} else {							\
206		(mp)->mtx_recurse--;					\
207	}								\
208} while (0)
209#endif
210
211#ifdef WITNESS
212static void	witness_init(struct mtx *, int flag);
213static void	witness_destroy(struct mtx *);
214static void	witness_display(void(*)(const char *fmt, ...));
215
216/* All mutexes in system (used for debug/panic) */
217static struct mtx_debug all_mtx_debug = { NULL, {NULL, NULL}, NULL, 0 };
218/*
219 * Set to 0 once mutexes have been fully initialized so that witness code can be
220 * safely executed.
221 */
222static int witness_cold = 1;
223#else	/* WITNESS */
224
225/*
226 * flag++ is slezoid way of shutting up unused parameter warning
227 * in mtx_init()
228 */
229#define witness_init(m, flag) flag++
230#define witness_destroy(m)
231#define witness_try_enter(m, t, f, l)
232#endif	/* WITNESS */
233
234/* All mutexes in system (used for debug/panic) */
235static struct mtx all_mtx = { MTX_UNOWNED, 0, 0, 0, "All mutexes queue head",
236	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
237	{ NULL, NULL }, &all_mtx, &all_mtx,
238#ifdef WITNESS
239	&all_mtx_debug
240#else
241	NULL
242#endif
243	 };
244
245static int	mtx_cur_cnt;
246static int	mtx_max_cnt;
247
248static void	propagate_priority(struct proc *);
249static void	mtx_enter_hard(struct mtx *, int type, int saveintr);
250static void	mtx_exit_hard(struct mtx *, int type);
251
252#define	mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
253#define	mtx_owner(m)	(mtx_unowned(m) ? NULL \
254			    : (struct proc *)((m)->mtx_lock & MTX_FLAGMASK))
255
256#define RETIP(x)		*(((uintptr_t *)(&x)) - 1)
257#define	SET_PRIO(p, pri)	(p)->p_priority = (pri)
258
259static void
260propagate_priority(struct proc *p)
261{
262	int pri = p->p_priority;
263	struct mtx *m = p->p_blocked;
264
265	mtx_assert(&sched_lock, MA_OWNED);
266	for (;;) {
267		struct proc *p1;
268
269		p = mtx_owner(m);
270
271		if (p == NULL) {
272			/*
273			 * This really isn't quite right. Really
274			 * ought to bump priority of process that
275			 * next acquires the mutex.
276			 */
277			MPASS(m->mtx_lock == MTX_CONTESTED);
278			return;
279		}
280		MPASS(p->p_magic == P_MAGIC);
281		KASSERT(p->p_stat != SSLEEP, ("sleeping process owns a mutex"));
282		if (p->p_priority <= pri)
283			return;
284
285		/*
286		 * Bump this process' priority.
287		 */
288		SET_PRIO(p, pri);
289
290		/*
291		 * If lock holder is actually running, just bump priority.
292		 */
293#ifdef SMP
294		/*
295		 * For SMP, we can check the p_oncpu field to see if we are
296		 * running.
297		 */
298		if (p->p_oncpu != 0xff) {
299			MPASS(p->p_stat == SRUN || p->p_stat == SZOMB);
300			return;
301		}
302#else
303		/*
304		 * For UP, we check to see if p is curproc (this shouldn't
305		 * ever happen however as it would mean we are in a deadlock.)
306		 */
307		if (p == curproc) {
308			panic("Deadlock detected");
309			return;
310		}
311#endif
312		/*
313		 * If on run queue move to new run queue, and
314		 * quit.
315		 */
316		if (p->p_stat == SRUN) {
317			printf("XXX: moving process %d(%s) to a new run queue\n",
318			       p->p_pid, p->p_comm);
319			MPASS(p->p_blocked == NULL);
320			remrunqueue(p);
321			setrunqueue(p);
322			return;
323		}
324
325		/*
326		 * If we aren't blocked on a mutex, we should be.
327		 */
328		KASSERT(p->p_stat == SMTX, (
329		    "process %d(%s):%d holds %s but isn't blocked on a mutex\n",
330		    p->p_pid, p->p_comm, p->p_stat,
331		    m->mtx_description));
332
333		/*
334		 * Pick up the mutex that p is blocked on.
335		 */
336		m = p->p_blocked;
337		MPASS(m != NULL);
338
339		printf("XXX: process %d(%s) is blocked on %s\n", p->p_pid,
340		    p->p_comm, m->mtx_description);
341		/*
342		 * Check if the proc needs to be moved up on
343		 * the blocked chain
344		 */
345		if (p == TAILQ_FIRST(&m->mtx_blocked)) {
346			printf("XXX: process at head of run queue\n");
347			continue;
348		}
349		p1 = TAILQ_PREV(p, rq, p_procq);
350		if (p1->p_priority <= pri) {
351			printf(
352	"XXX: previous process %d(%s) has higher priority\n",
353	                    p->p_pid, p->p_comm);
354			continue;
355		}
356
357		/*
358		 * Remove proc from blocked chain and determine where
359		 * it should be moved up to.  Since we know that p1 has
360		 * a lower priority than p, we know that at least one
361		 * process in the chain has a lower priority and that
362		 * p1 will thus not be NULL after the loop.
363		 */
364		TAILQ_REMOVE(&m->mtx_blocked, p, p_procq);
365		TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) {
366			MPASS(p1->p_magic == P_MAGIC);
367			if (p1->p_priority > pri)
368				break;
369		}
370		MPASS(p1 != NULL);
371		TAILQ_INSERT_BEFORE(p1, p, p_procq);
372		CTR4(KTR_LOCK,
373		    "propagate_priority: p %p moved before %p on [%p] %s",
374		    p, p1, m, m->mtx_description);
375	}
376}
377
378/*
379 * Get lock 'm', the macro handles the easy (and most common cases) and leaves
380 * the slow stuff to the mtx_enter_hard() function.
381 *
382 * Note: since type is usually a constant much of this code is optimized out.
383 */
384void
385_mtx_enter(struct mtx *mtxp, int type, const char *file, int line)
386{
387	struct mtx	*mpp = mtxp;
388
389	/* bits only valid on mtx_exit() */
390	MPASS4(((type) & (MTX_NORECURSE | MTX_NOSWITCH)) == 0,
391	    STR_mtx_bad_type, file, line);
392
393	if ((type) & MTX_SPIN) {
394		/*
395		 * Easy cases of spin locks:
396		 *
397		 * 1) We already own the lock and will simply recurse on it (if
398		 *    RLIKELY)
399		 *
400		 * 2) The lock is free, we just get it
401		 */
402		if ((type) & MTX_RLIKELY) {
403			/*
404			 * Check for recursion, if we already have this
405			 * lock we just bump the recursion count.
406			 */
407			if (mpp->mtx_lock == (uintptr_t)CURTHD) {
408				mpp->mtx_recurse++;
409				goto done;
410			}
411		}
412
413		if (((type) & MTX_TOPHALF) == 0) {
414			/*
415			 * If an interrupt thread uses this we must block
416			 * interrupts here.
417			 */
418			if ((type) & MTX_FIRST) {
419				ASS_IEN;
420				disable_intr();
421				_getlock_norecurse(mpp, CURTHD,
422				    (type) & MTX_HARDOPTS);
423			} else {
424				_getlock_spin_block(mpp, CURTHD,
425				    (type) & MTX_HARDOPTS);
426			}
427		} else
428			_getlock_norecurse(mpp, CURTHD, (type) & MTX_HARDOPTS);
429	} else {
430		/* Sleep locks */
431		if ((type) & MTX_RLIKELY)
432			_getlock_sleep(mpp, CURTHD, (type) & MTX_HARDOPTS);
433		else
434			_getlock_norecurse(mpp, CURTHD, (type) & MTX_HARDOPTS);
435	}
436done:
437	WITNESS_ENTER(mpp, type, file, line);
438	if (((type) & MTX_QUIET) == 0)
439		CTR5(KTR_LOCK, STR_mtx_enter_fmt,
440		    mpp->mtx_description, mpp, mpp->mtx_recurse, file, line);
441
442}
443
444/*
445 * Attempt to get MTX_DEF lock, return non-zero if lock acquired.
446 *
447 * XXX DOES NOT HANDLE RECURSION
448 */
449int
450_mtx_try_enter(struct mtx *mtxp, int type, const char *file, int line)
451{
452	struct mtx	*const mpp = mtxp;
453	int	rval;
454
455	rval = _obtain_lock(mpp, CURTHD);
456#ifdef WITNESS
457	if (rval && mpp->mtx_witness != NULL) {
458		MPASS(mpp->mtx_recurse == 0);
459		witness_try_enter(mpp, type, file, line);
460	}
461#endif	/* WITNESS */
462	if (((type) & MTX_QUIET) == 0)
463		CTR5(KTR_LOCK, STR_mtx_try_enter_fmt,
464		    mpp->mtx_description, mpp, rval, file, line);
465
466	return rval;
467}
468
469/*
470 * Release lock m.
471 */
472void
473_mtx_exit(struct mtx *mtxp, int type, const char *file, int line)
474{
475	struct mtx	*const mpp = mtxp;
476
477	MPASS4(mtx_owned(mpp), STR_mtx_owned, file, line);
478	WITNESS_EXIT(mpp, type, file, line);
479	if (((type) & MTX_QUIET) == 0)
480		CTR5(KTR_LOCK, STR_mtx_exit_fmt,
481		    mpp->mtx_description, mpp, mpp->mtx_recurse, file, line);
482	if ((type) & MTX_SPIN) {
483		if ((type) & MTX_NORECURSE) {
484			int mtx_intr = mpp->mtx_saveintr;
485
486			MPASS4(mpp->mtx_recurse == 0, STR_mtx_recurse,
487			    file, line);
488			_release_lock_quick(mpp);
489			if (((type) & MTX_TOPHALF) == 0) {
490				if ((type) & MTX_FIRST) {
491					ASS_IDIS;
492					enable_intr();
493				} else
494					restore_intr(mtx_intr);
495			}
496		} else {
497			if (((type & MTX_TOPHALF) == 0) &&
498			    (type & MTX_FIRST)) {
499				ASS_IDIS;
500				ASS_SIEN(mpp);
501			}
502			_exitlock_spin(mpp);
503		}
504	} else {
505		/* Handle sleep locks */
506		if ((type) & MTX_RLIKELY)
507			_exitlock(mpp, CURTHD, (type) & MTX_HARDOPTS);
508		else {
509			_exitlock_norecurse(mpp, CURTHD,
510			    (type) & MTX_HARDOPTS);
511		}
512	}
513}
514
515void
516mtx_enter_hard(struct mtx *m, int type, int saveintr)
517{
518	struct proc *p = CURPROC;
519
520	KASSERT(p != NULL, ("curproc is NULL in mutex"));
521
522	switch (type) {
523	case MTX_DEF:
524		if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)p) {
525			m->mtx_recurse++;
526			atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
527			if ((type & MTX_QUIET) == 0)
528				CTR1(KTR_LOCK, "mtx_enter: %p recurse", m);
529			return;
530		}
531		if ((type & MTX_QUIET) == 0)
532			CTR3(KTR_LOCK,
533			    "mtx_enter: %p contested (lock=%p) [%p]",
534			    m, (void *)m->mtx_lock, (void *)RETIP(m));
535
536		/*
537		 * Save our priority.  Even though p_nativepri is protected
538		 * by sched_lock, we don't obtain it here as it can be
539		 * expensive.  Since this is the only place p_nativepri is
540		 * set, and since two CPUs will not be executing the same
541		 * process concurrently, we know that no other CPU is going
542		 * to be messing with this.  Also, p_nativepri is only read
543		 * when we are blocked on a mutex, so that can't be happening
544		 * right now either.
545		 */
546		p->p_nativepri = p->p_priority;
547		while (!_obtain_lock(m, p)) {
548			uintptr_t v;
549			struct proc *p1;
550
551			mtx_enter(&sched_lock, MTX_SPIN | MTX_RLIKELY);
552			/*
553			 * check if the lock has been released while
554			 * waiting for the schedlock.
555			 */
556			if ((v = m->mtx_lock) == MTX_UNOWNED) {
557				mtx_exit(&sched_lock, MTX_SPIN);
558				continue;
559			}
560			/*
561			 * The mutex was marked contested on release. This
562			 * means that there are processes blocked on it.
563			 */
564			if (v == MTX_CONTESTED) {
565				p1 = TAILQ_FIRST(&m->mtx_blocked);
566				KASSERT(p1 != NULL, ("contested mutex has no contesters"));
567				KASSERT(p != NULL, ("curproc is NULL for contested mutex"));
568				m->mtx_lock = (uintptr_t)p | MTX_CONTESTED;
569				if (p1->p_priority < p->p_priority) {
570					SET_PRIO(p, p1->p_priority);
571				}
572				mtx_exit(&sched_lock, MTX_SPIN);
573				return;
574			}
575			/*
576			 * If the mutex isn't already contested and
577			 * a failure occurs setting the contested bit the
578			 * mutex was either release or the
579			 * state of the RECURSION bit changed.
580			 */
581			if ((v & MTX_CONTESTED) == 0 &&
582			    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
583				               (void *)(v | MTX_CONTESTED))) {
584				mtx_exit(&sched_lock, MTX_SPIN);
585				continue;
586			}
587
588			/* We definitely have to sleep for this lock */
589			mtx_assert(m, MA_NOTOWNED);
590
591#ifdef notyet
592			/*
593			 * If we're borrowing an interrupted thread's VM
594			 * context must clean up before going to sleep.
595			 */
596			if (p->p_flag & (P_ITHD | P_SITHD)) {
597				ithd_t *it = (ithd_t *)p;
598
599				if (it->it_interrupted) {
600					if ((type & MTX_QUIET) == 0)
601						CTR2(KTR_LOCK,
602					    "mtx_enter: 0x%x interrupted 0x%x",
603						    it, it->it_interrupted);
604					intr_thd_fixup(it);
605				}
606			}
607#endif
608
609			/* Put us on the list of procs blocked on this mutex */
610			if (TAILQ_EMPTY(&m->mtx_blocked)) {
611				p1 = (struct proc *)(m->mtx_lock &
612						     MTX_FLAGMASK);
613				LIST_INSERT_HEAD(&p1->p_contested, m,
614						 mtx_contested);
615				TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
616			} else {
617				TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq)
618					if (p1->p_priority > p->p_priority)
619						break;
620				if (p1)
621					TAILQ_INSERT_BEFORE(p1, p, p_procq);
622				else
623					TAILQ_INSERT_TAIL(&m->mtx_blocked, p,
624							  p_procq);
625			}
626
627			p->p_blocked = m;	/* Who we're blocked on */
628			p->p_mtxname = m->mtx_description;
629			p->p_stat = SMTX;
630#if 0
631			propagate_priority(p);
632#endif
633			if ((type & MTX_QUIET) == 0)
634				CTR3(KTR_LOCK,
635				    "mtx_enter: p %p blocked on [%p] %s",
636				    p, m, m->mtx_description);
637			mi_switch();
638			if ((type & MTX_QUIET) == 0)
639				CTR3(KTR_LOCK,
640			    "mtx_enter: p %p free from blocked on [%p] %s",
641				    p, m, m->mtx_description);
642			mtx_exit(&sched_lock, MTX_SPIN);
643		}
644		return;
645	case MTX_SPIN:
646	case MTX_SPIN | MTX_FIRST:
647	case MTX_SPIN | MTX_TOPHALF:
648	    {
649		int i = 0;
650
651		if (m->mtx_lock == (uintptr_t)p) {
652			m->mtx_recurse++;
653			return;
654		}
655		if ((type & MTX_QUIET) == 0)
656			CTR1(KTR_LOCK, "mtx_enter: %p spinning", m);
657		for (;;) {
658			if (_obtain_lock(m, p))
659				break;
660			while (m->mtx_lock != MTX_UNOWNED) {
661				if (i++ < 1000000)
662					continue;
663				if (i++ < 6000000)
664					DELAY (1);
665#ifdef DDB
666				else if (!db_active)
667#else
668				else
669#endif
670					panic(
671				"spin lock %s held by %p for > 5 seconds",
672					    m->mtx_description,
673					    (void *)m->mtx_lock);
674			}
675		}
676
677#ifdef MUTEX_DEBUG
678		if (type != MTX_SPIN)
679			m->mtx_saveintr = 0xbeefface;
680		else
681#endif
682			m->mtx_saveintr = saveintr;
683		if ((type & MTX_QUIET) == 0)
684			CTR1(KTR_LOCK, "mtx_enter: %p spin done", m);
685		return;
686	    }
687	}
688}
689
690void
691mtx_exit_hard(struct mtx *m, int type)
692{
693	struct proc *p, *p1;
694	struct mtx *m1;
695	int pri;
696
697	p = CURPROC;
698	switch (type) {
699	case MTX_DEF:
700	case MTX_DEF | MTX_NOSWITCH:
701		if (mtx_recursed(m)) {
702			if (--(m->mtx_recurse) == 0)
703				atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
704			if ((type & MTX_QUIET) == 0)
705				CTR1(KTR_LOCK, "mtx_exit: %p unrecurse", m);
706			return;
707		}
708		mtx_enter(&sched_lock, MTX_SPIN);
709		if ((type & MTX_QUIET) == 0)
710			CTR1(KTR_LOCK, "mtx_exit: %p contested", m);
711		p1 = TAILQ_FIRST(&m->mtx_blocked);
712		MPASS(p->p_magic == P_MAGIC);
713		MPASS(p1->p_magic == P_MAGIC);
714		TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq);
715		if (TAILQ_EMPTY(&m->mtx_blocked)) {
716			LIST_REMOVE(m, mtx_contested);
717			_release_lock_quick(m);
718			if ((type & MTX_QUIET) == 0)
719				CTR1(KTR_LOCK, "mtx_exit: %p not held", m);
720		} else
721			atomic_store_rel_ptr(&m->mtx_lock,
722			    (void *)MTX_CONTESTED);
723		pri = MAXPRI;
724		LIST_FOREACH(m1, &p->p_contested, mtx_contested) {
725			int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_priority;
726			if (cp < pri)
727				pri = cp;
728		}
729		if (pri > p->p_nativepri)
730			pri = p->p_nativepri;
731		SET_PRIO(p, pri);
732		if ((type & MTX_QUIET) == 0)
733			CTR2(KTR_LOCK,
734			    "mtx_exit: %p contested setrunqueue %p", m, p1);
735		p1->p_blocked = NULL;
736		p1->p_mtxname = NULL;
737		p1->p_stat = SRUN;
738		setrunqueue(p1);
739		if ((type & MTX_NOSWITCH) == 0 && p1->p_priority < pri) {
740#ifdef notyet
741			if (p->p_flag & (P_ITHD | P_SITHD)) {
742				ithd_t *it = (ithd_t *)p;
743
744				if (it->it_interrupted) {
745					if ((type & MTX_QUIET) == 0)
746						CTR2(KTR_LOCK,
747					    "mtx_exit: 0x%x interruped 0x%x",
748						    it, it->it_interrupted);
749					intr_thd_fixup(it);
750				}
751			}
752#endif
753			setrunqueue(p);
754			if ((type & MTX_QUIET) == 0)
755				CTR2(KTR_LOCK,
756				    "mtx_exit: %p switching out lock=%p",
757				    m, (void *)m->mtx_lock);
758			mi_switch();
759			if ((type & MTX_QUIET) == 0)
760				CTR2(KTR_LOCK,
761				    "mtx_exit: %p resuming lock=%p",
762				    m, (void *)m->mtx_lock);
763		}
764		mtx_exit(&sched_lock, MTX_SPIN);
765		break;
766	case MTX_SPIN:
767	case MTX_SPIN | MTX_FIRST:
768		if (mtx_recursed(m)) {
769			m->mtx_recurse--;
770			return;
771		}
772		MPASS(mtx_owned(m));
773		_release_lock_quick(m);
774		if (type & MTX_FIRST)
775			enable_intr();	/* XXX is this kosher? */
776		else {
777			MPASS(m->mtx_saveintr != 0xbeefface);
778			restore_intr(m->mtx_saveintr);
779		}
780		break;
781	case MTX_SPIN | MTX_TOPHALF:
782		if (mtx_recursed(m)) {
783			m->mtx_recurse--;
784			return;
785		}
786		MPASS(mtx_owned(m));
787		_release_lock_quick(m);
788		break;
789	default:
790		panic("mtx_exit_hard: unsupported type 0x%x\n", type);
791	}
792}
793
794#ifdef INVARIANTS
795void
796_mtx_assert(struct mtx *m, int what, const char *file, int line)
797{
798	switch ((what)) {
799	case MA_OWNED:
800	case MA_OWNED | MA_RECURSED:
801	case MA_OWNED | MA_NOTRECURSED:
802		if (!mtx_owned((m)))
803			panic("mutex %s not owned at %s:%d",
804			    (m)->mtx_description, file, line);
805		if (mtx_recursed((m))) {
806			if (((what) & MA_NOTRECURSED) != 0)
807				panic("mutex %s recursed at %s:%d",
808				    (m)->mtx_description, file, line);
809		} else if (((what) & MA_RECURSED) != 0) {
810			panic("mutex %s unrecursed at %s:%d",
811			    (m)->mtx_description, file, line);
812		}
813		break;
814	case MA_NOTOWNED:
815		if (mtx_owned((m)))
816			panic("mutex %s owned at %s:%d",
817			    (m)->mtx_description, file, line);
818		break;
819	default:
820		panic("unknown mtx_assert at %s:%d", file, line);
821	}
822}
823#endif
824
825#define MV_DESTROY	0	/* validate before destory */
826#define MV_INIT		1	/* validate before init */
827
828#ifdef MUTEX_DEBUG
829
830int mtx_validate __P((struct mtx *, int));
831
832int
833mtx_validate(struct mtx *m, int when)
834{
835	struct mtx *mp;
836	int i;
837	int retval = 0;
838
839#ifdef WITNESS
840	if (witness_cold)
841		return 0;
842#endif
843	if (m == &all_mtx || cold)
844		return 0;
845
846	mtx_enter(&all_mtx, MTX_DEF);
847/*
848 * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
849 * we can re-enable the kernacc() checks.
850 */
851#ifndef __alpha__
852	MPASS(kernacc((caddr_t)all_mtx.mtx_next, sizeof(uintptr_t),
853	    VM_PROT_READ) == 1);
854#endif
855	MPASS(all_mtx.mtx_next->mtx_prev == &all_mtx);
856	for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
857#ifndef __alpha__
858		if (kernacc((caddr_t)mp->mtx_next, sizeof(uintptr_t),
859		    VM_PROT_READ) != 1) {
860			panic("mtx_validate: mp=%p mp->mtx_next=%p",
861			    mp, mp->mtx_next);
862		}
863#endif
864		i++;
865		if (i > mtx_cur_cnt) {
866			panic("mtx_validate: too many in chain, known=%d\n",
867			    mtx_cur_cnt);
868		}
869	}
870	MPASS(i == mtx_cur_cnt);
871	switch (when) {
872	case MV_DESTROY:
873		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
874			if (mp == m)
875				break;
876		MPASS(mp == m);
877		break;
878	case MV_INIT:
879		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
880		if (mp == m) {
881			/*
882			 * Not good. This mutex already exists.
883			 */
884			printf("re-initing existing mutex %s\n",
885			    m->mtx_description);
886			MPASS(m->mtx_lock == MTX_UNOWNED);
887			retval = 1;
888		}
889	}
890	mtx_exit(&all_mtx, MTX_DEF);
891	return (retval);
892}
893#endif
894
895void
896mtx_init(struct mtx *m, const char *t, int flag)
897{
898	if ((flag & MTX_QUIET) == 0)
899		CTR2(KTR_LOCK, "mtx_init %p (%s)", m, t);
900#ifdef MUTEX_DEBUG
901	if (mtx_validate(m, MV_INIT))	/* diagnostic and error correction */
902		return;
903#endif
904
905	bzero((void *)m, sizeof *m);
906	TAILQ_INIT(&m->mtx_blocked);
907#ifdef WITNESS
908	if (!witness_cold) {
909		/* XXX - should not use DEVBUF */
910		m->mtx_debug = malloc(sizeof(struct mtx_debug),
911		    M_DEVBUF, M_NOWAIT | M_ZERO);
912		MPASS(m->mtx_debug != NULL);
913	}
914#endif
915	m->mtx_description = t;
916
917	m->mtx_flags = flag;
918	m->mtx_lock = MTX_UNOWNED;
919	/* Put on all mutex queue */
920	mtx_enter(&all_mtx, MTX_DEF);
921	m->mtx_next = &all_mtx;
922	m->mtx_prev = all_mtx.mtx_prev;
923	m->mtx_prev->mtx_next = m;
924	all_mtx.mtx_prev = m;
925	if (++mtx_cur_cnt > mtx_max_cnt)
926		mtx_max_cnt = mtx_cur_cnt;
927	mtx_exit(&all_mtx, MTX_DEF);
928#ifdef WITNESS
929	if (!witness_cold)
930		witness_init(m, flag);
931#endif
932}
933
934void
935mtx_destroy(struct mtx *m)
936{
937
938#ifdef WITNESS
939	KASSERT(!witness_cold, ("%s: Cannot destroy while still cold\n",
940	    __FUNCTION__));
941#endif
942	CTR2(KTR_LOCK, "mtx_destroy %p (%s)", m, m->mtx_description);
943#ifdef MUTEX_DEBUG
944	if (m->mtx_next == NULL)
945		panic("mtx_destroy: %p (%s) already destroyed",
946		    m, m->mtx_description);
947
948	if (!mtx_owned(m)) {
949		MPASS(m->mtx_lock == MTX_UNOWNED);
950	} else {
951		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
952	}
953	mtx_validate(m, MV_DESTROY);		/* diagnostic */
954#endif
955
956#ifdef WITNESS
957	if (m->mtx_witness)
958		witness_destroy(m);
959#endif /* WITNESS */
960
961	/* Remove from the all mutex queue */
962	mtx_enter(&all_mtx, MTX_DEF);
963	m->mtx_next->mtx_prev = m->mtx_prev;
964	m->mtx_prev->mtx_next = m->mtx_next;
965#ifdef MUTEX_DEBUG
966	m->mtx_next = m->mtx_prev = NULL;
967#endif
968#ifdef WITNESS
969	free(m->mtx_debug, M_DEVBUF);
970	m->mtx_debug = NULL;
971#endif
972	mtx_cur_cnt--;
973	mtx_exit(&all_mtx, MTX_DEF);
974}
975
976/*
977 * The non-inlined versions of the mtx_*() functions are always built (above),
978 * but the witness code depends on the WITNESS kernel option being specified.
979 */
980
981#ifdef WITNESS
982static void
983witness_fixup(void *dummy __unused)
984{
985	struct mtx *mp;
986
987	/*
988	 * We have to release Giant before initializing its witness
989	 * structure so that WITNESS doesn't get confused.
990	 */
991	mtx_exit(&Giant, MTX_DEF);
992	mtx_assert(&Giant, MA_NOTOWNED);
993	mtx_enter(&all_mtx, MTX_DEF);
994
995	/* Iterate through all mutexes and finish up mutex initialization. */
996	for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
997
998		/* XXX - should not use DEVBUF */
999		mp->mtx_debug = malloc(sizeof(struct mtx_debug),
1000		    M_DEVBUF, M_NOWAIT | M_ZERO);
1001		MPASS(mp->mtx_debug != NULL);
1002
1003		witness_init(mp, mp->mtx_flags);
1004	}
1005	mtx_exit(&all_mtx, MTX_DEF);
1006
1007	/* Mark the witness code as being ready for use. */
1008	atomic_store_rel_int(&witness_cold, 0);
1009
1010	mtx_enter(&Giant, MTX_DEF);
1011}
1012SYSINIT(wtnsfxup, SI_SUB_MUTEX, SI_ORDER_FIRST, witness_fixup, NULL)
1013
1014#define WITNESS_COUNT 200
1015#define	WITNESS_NCHILDREN 2
1016
1017int witness_watch = 1;
1018
1019struct witness {
1020	struct witness	*w_next;
1021	const char	*w_description;
1022	const char	*w_file;
1023	int		 w_line;
1024	struct witness	*w_morechildren;
1025	u_char		 w_childcnt;
1026	u_char		 w_Giant_squawked:1;
1027	u_char		 w_other_squawked:1;
1028	u_char		 w_same_squawked:1;
1029	u_char		 w_spin:1;	/* MTX_SPIN type mutex. */
1030	u_int		 w_level;
1031	struct witness	*w_children[WITNESS_NCHILDREN];
1032};
1033
1034struct witness_blessed {
1035	char 	*b_lock1;
1036	char	*b_lock2;
1037};
1038
1039#ifdef DDB
1040/*
1041 * When DDB is enabled and witness_ddb is set to 1, it will cause the system to
1042 * drop into kdebug() when:
1043 *	- a lock heirarchy violation occurs
1044 *	- locks are held when going to sleep.
1045 */
1046int	witness_ddb;
1047#ifdef WITNESS_DDB
1048TUNABLE_INT_DECL("debug.witness_ddb", 1, witness_ddb);
1049#else
1050TUNABLE_INT_DECL("debug.witness_ddb", 0, witness_ddb);
1051#endif
1052SYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, "");
1053#endif /* DDB */
1054
1055int	witness_skipspin;
1056#ifdef WITNESS_SKIPSPIN
1057TUNABLE_INT_DECL("debug.witness_skipspin", 1, witness_skipspin);
1058#else
1059TUNABLE_INT_DECL("debug.witness_skipspin", 0, witness_skipspin);
1060#endif
1061SYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RD, &witness_skipspin, 0,
1062    "");
1063
1064static struct mtx	w_mtx;
1065static struct witness	*w_free;
1066static struct witness	*w_all;
1067static int		 w_inited;
1068static int		 witness_dead;	/* fatal error, probably no memory */
1069
1070static struct witness	 w_data[WITNESS_COUNT];
1071
1072static struct witness	 *enroll __P((const char *description, int flag));
1073static int itismychild __P((struct witness *parent, struct witness *child));
1074static void removechild __P((struct witness *parent, struct witness *child));
1075static int isitmychild __P((struct witness *parent, struct witness *child));
1076static int isitmydescendant __P((struct witness *parent, struct witness *child));
1077static int dup_ok __P((struct witness *));
1078static int blessed __P((struct witness *, struct witness *));
1079static void witness_displaydescendants
1080    __P((void(*)(const char *fmt, ...), struct witness *));
1081static void witness_leveldescendents __P((struct witness *parent, int level));
1082static void witness_levelall __P((void));
1083static struct witness * witness_get __P((void));
1084static void witness_free __P((struct witness *m));
1085
1086
1087static char *ignore_list[] = {
1088	"witness lock",
1089	NULL
1090};
1091
1092static char *spin_order_list[] = {
1093	"sio",
1094	"sched lock",
1095#ifdef __i386__
1096	"clk",
1097#endif
1098	"callout",
1099	/*
1100	 * leaf locks
1101	 */
1102#ifdef __i386__
1103	"ap boot",
1104	"imen",
1105#endif
1106	"com",
1107	"smp rendezvous",
1108	NULL
1109};
1110
1111static char *order_list[] = {
1112	"Giant", "uidinfo hash", "uidinfo struct", NULL,
1113	"Giant", "proctree", "allproc", "process lock", NULL,
1114	NULL
1115};
1116
1117static char *dup_list[] = {
1118	NULL
1119};
1120
1121static char *sleep_list[] = {
1122	"Giant",
1123	NULL
1124};
1125
1126/*
1127 * Pairs of locks which have been blessed
1128 * Don't complain about order problems with blessed locks
1129 */
1130static struct witness_blessed blessed_list[] = {
1131};
1132static int blessed_count = sizeof(blessed_list) / sizeof(struct witness_blessed);
1133
1134static void
1135witness_init(struct mtx *m, int flag)
1136{
1137	m->mtx_witness = enroll(m->mtx_description, flag);
1138}
1139
1140static void
1141witness_destroy(struct mtx *m)
1142{
1143	struct mtx *m1;
1144	struct proc *p;
1145	p = CURPROC;
1146	for ((m1 = LIST_FIRST(&p->p_heldmtx)); m1 != NULL;
1147		m1 = LIST_NEXT(m1, mtx_held)) {
1148		if (m1 == m) {
1149			LIST_REMOVE(m, mtx_held);
1150			break;
1151		}
1152	}
1153	return;
1154
1155}
1156
1157static void
1158witness_display(void(*prnt)(const char *fmt, ...))
1159{
1160	struct witness *w, *w1;
1161
1162	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1163	witness_levelall();
1164
1165	for (w = w_all; w; w = w->w_next) {
1166		if (w->w_file == NULL)
1167			continue;
1168		for (w1 = w_all; w1; w1 = w1->w_next) {
1169			if (isitmychild(w1, w))
1170				break;
1171		}
1172		if (w1 != NULL)
1173			continue;
1174		/*
1175		 * This lock has no anscestors, display its descendants.
1176		 */
1177		witness_displaydescendants(prnt, w);
1178	}
1179	prnt("\nMutex which were never acquired\n");
1180	for (w = w_all; w; w = w->w_next) {
1181		if (w->w_file != NULL)
1182			continue;
1183		prnt("%s\n", w->w_description);
1184	}
1185}
1186
1187void
1188witness_enter(struct mtx *m, int flags, const char *file, int line)
1189{
1190	struct witness *w, *w1;
1191	struct mtx *m1;
1192	struct proc *p;
1193	int i;
1194#ifdef DDB
1195	int go_into_ddb = 0;
1196#endif /* DDB */
1197
1198	if (witness_cold || m->mtx_witness == NULL || panicstr)
1199		return;
1200	w = m->mtx_witness;
1201	p = CURPROC;
1202
1203	if (flags & MTX_SPIN) {
1204		if ((m->mtx_flags & MTX_SPIN) == 0)
1205			panic("mutex_enter: MTX_SPIN on MTX_DEF mutex %s @"
1206			    " %s:%d", m->mtx_description, file, line);
1207		if (mtx_recursed(m)) {
1208			if ((m->mtx_flags & MTX_RECURSE) == 0)
1209				panic("mutex_enter: recursion on non-recursive"
1210				    " mutex %s @ %s:%d", m->mtx_description,
1211				    file, line);
1212			return;
1213		}
1214		mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
1215		i = PCPU_GET(witness_spin_check);
1216		if (i != 0 && w->w_level < i) {
1217			mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1218			panic("mutex_enter(%s:%x, MTX_SPIN) out of order @"
1219			    " %s:%d already holding %s:%x",
1220			    m->mtx_description, w->w_level, file, line,
1221			    spin_order_list[ffs(i)-1], i);
1222		}
1223		PCPU_SET(witness_spin_check, i | w->w_level);
1224		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1225		w->w_file = file;
1226		w->w_line = line;
1227		m->mtx_line = line;
1228		m->mtx_file = file;
1229		return;
1230	}
1231	if ((m->mtx_flags & MTX_SPIN) != 0)
1232		panic("mutex_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1233		    m->mtx_description, file, line);
1234
1235	if (mtx_recursed(m)) {
1236		if ((m->mtx_flags & MTX_RECURSE) == 0)
1237			panic("mutex_enter: recursion on non-recursive"
1238			    " mutex %s @ %s:%d", m->mtx_description,
1239			    file, line);
1240		return;
1241	}
1242	if (witness_dead)
1243		goto out;
1244	if (cold)
1245		goto out;
1246
1247	if (!mtx_legal2block())
1248		panic("blockable mtx_enter() of %s when not legal @ %s:%d",
1249			    m->mtx_description, file, line);
1250	/*
1251	 * Is this the first mutex acquired
1252	 */
1253	if ((m1 = LIST_FIRST(&p->p_heldmtx)) == NULL)
1254		goto out;
1255
1256	if ((w1 = m1->mtx_witness) == w) {
1257		if (w->w_same_squawked || dup_ok(w))
1258			goto out;
1259		w->w_same_squawked = 1;
1260		printf("acquring duplicate lock of same type: \"%s\"\n",
1261			m->mtx_description);
1262		printf(" 1st @ %s:%d\n", w->w_file, w->w_line);
1263		printf(" 2nd @ %s:%d\n", file, line);
1264#ifdef DDB
1265		go_into_ddb = 1;
1266#endif /* DDB */
1267		goto out;
1268	}
1269	MPASS(!mtx_owned(&w_mtx));
1270	mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
1271	/*
1272	 * If we have a known higher number just say ok
1273	 */
1274	if (witness_watch > 1 && w->w_level > w1->w_level) {
1275		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1276		goto out;
1277	}
1278	if (isitmydescendant(m1->mtx_witness, w)) {
1279		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1280		goto out;
1281	}
1282	for (i = 0; m1 != NULL; m1 = LIST_NEXT(m1, mtx_held), i++) {
1283
1284		MPASS(i < 200);
1285		w1 = m1->mtx_witness;
1286		if (isitmydescendant(w, w1)) {
1287			mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1288			if (blessed(w, w1))
1289				goto out;
1290			if (m1 == &Giant) {
1291				if (w1->w_Giant_squawked)
1292					goto out;
1293				else
1294					w1->w_Giant_squawked = 1;
1295			} else {
1296				if (w1->w_other_squawked)
1297					goto out;
1298				else
1299					w1->w_other_squawked = 1;
1300			}
1301			printf("lock order reversal\n");
1302			printf(" 1st %s last acquired @ %s:%d\n",
1303			    w->w_description, w->w_file, w->w_line);
1304			printf(" 2nd %p %s @ %s:%d\n",
1305			    m1, w1->w_description, w1->w_file, w1->w_line);
1306			printf(" 3rd %p %s @ %s:%d\n",
1307			    m, w->w_description, file, line);
1308#ifdef DDB
1309			go_into_ddb = 1;
1310#endif /* DDB */
1311			goto out;
1312		}
1313	}
1314	m1 = LIST_FIRST(&p->p_heldmtx);
1315	if (!itismychild(m1->mtx_witness, w))
1316		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1317
1318out:
1319#ifdef DDB
1320	if (witness_ddb && go_into_ddb)
1321		Debugger("witness_enter");
1322#endif /* DDB */
1323	w->w_file = file;
1324	w->w_line = line;
1325	m->mtx_line = line;
1326	m->mtx_file = file;
1327
1328	/*
1329	 * If this pays off it likely means that a mutex being witnessed
1330	 * is acquired in hardclock. Put it in the ignore list. It is
1331	 * likely not the mutex this assert fails on.
1332	 */
1333	MPASS(m->mtx_held.le_prev == NULL);
1334	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
1335}
1336
1337void
1338witness_try_enter(struct mtx *m, int flags, const char *file, int line)
1339{
1340	struct proc *p;
1341	struct witness *w = m->mtx_witness;
1342
1343	if (witness_cold)
1344		return;
1345	if (panicstr)
1346		return;
1347	if (flags & MTX_SPIN) {
1348		if ((m->mtx_flags & MTX_SPIN) == 0)
1349			panic("mutex_try_enter: "
1350			    "MTX_SPIN on MTX_DEF mutex %s @ %s:%d",
1351			    m->mtx_description, file, line);
1352		if (mtx_recursed(m)) {
1353			if ((m->mtx_flags & MTX_RECURSE) == 0)
1354				panic("mutex_try_enter: recursion on"
1355				    " non-recursive mutex %s @ %s:%d",
1356				    m->mtx_description, file, line);
1357			return;
1358		}
1359		mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
1360		PCPU_SET(witness_spin_check,
1361		    PCPU_GET(witness_spin_check) | w->w_level);
1362		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1363		w->w_file = file;
1364		w->w_line = line;
1365		m->mtx_line = line;
1366		m->mtx_file = file;
1367		return;
1368	}
1369
1370	if ((m->mtx_flags & MTX_SPIN) != 0)
1371		panic("mutex_try_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1372		    m->mtx_description, file, line);
1373
1374	if (mtx_recursed(m)) {
1375		if ((m->mtx_flags & MTX_RECURSE) == 0)
1376			panic("mutex_try_enter: recursion on non-recursive"
1377			    " mutex %s @ %s:%d", m->mtx_description, file,
1378			    line);
1379		return;
1380	}
1381	w->w_file = file;
1382	w->w_line = line;
1383	m->mtx_line = line;
1384	m->mtx_file = file;
1385	p = CURPROC;
1386	MPASS(m->mtx_held.le_prev == NULL);
1387	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
1388}
1389
1390void
1391witness_exit(struct mtx *m, int flags, const char *file, int line)
1392{
1393	struct witness *w;
1394
1395	if (witness_cold || m->mtx_witness == NULL || panicstr)
1396		return;
1397	w = m->mtx_witness;
1398
1399	if (flags & MTX_SPIN) {
1400		if ((m->mtx_flags & MTX_SPIN) == 0)
1401			panic("mutex_exit: MTX_SPIN on MTX_DEF mutex %s @"
1402			    " %s:%d", m->mtx_description, file, line);
1403		if (mtx_recursed(m)) {
1404			if ((m->mtx_flags & MTX_RECURSE) == 0)
1405				panic("mutex_exit: recursion on non-recursive"
1406				    " mutex %s @ %s:%d", m->mtx_description,
1407				    file, line);
1408			return;
1409		}
1410		mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
1411		PCPU_SET(witness_spin_check,
1412		    PCPU_GET(witness_spin_check) & ~w->w_level);
1413		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1414		return;
1415	}
1416	if ((m->mtx_flags & MTX_SPIN) != 0)
1417		panic("mutex_exit: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1418		    m->mtx_description, file, line);
1419
1420	if (mtx_recursed(m)) {
1421		if ((m->mtx_flags & MTX_RECURSE) == 0)
1422			panic("mutex_exit: recursion on non-recursive"
1423			    " mutex %s @ %s:%d", m->mtx_description,
1424			    file, line);
1425		return;
1426	}
1427
1428	if ((flags & MTX_NOSWITCH) == 0 && !mtx_legal2block() && !cold)
1429		panic("switchable mtx_exit() of %s when not legal @ %s:%d",
1430			    m->mtx_description, file, line);
1431	LIST_REMOVE(m, mtx_held);
1432	m->mtx_held.le_prev = NULL;
1433}
1434
1435int
1436witness_sleep(int check_only, struct mtx *mtx, const char *file, int line)
1437{
1438	struct mtx *m;
1439	struct proc *p;
1440	char **sleep;
1441	int n = 0;
1442
1443	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1444	p = CURPROC;
1445	for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
1446	    m = LIST_NEXT(m, mtx_held)) {
1447		if (m == mtx)
1448			continue;
1449		for (sleep = sleep_list; *sleep!= NULL; sleep++)
1450			if (strcmp(m->mtx_description, *sleep) == 0)
1451				goto next;
1452		printf("%s:%d: %s with \"%s\" locked from %s:%d\n",
1453			file, line, check_only ? "could sleep" : "sleeping",
1454			m->mtx_description,
1455			m->mtx_witness->w_file, m->mtx_witness->w_line);
1456		n++;
1457	next:
1458	}
1459#ifdef DDB
1460	if (witness_ddb && n)
1461		Debugger("witness_sleep");
1462#endif /* DDB */
1463	return (n);
1464}
1465
1466static struct witness *
1467enroll(const char *description, int flag)
1468{
1469	int i;
1470	struct witness *w, *w1;
1471	char **ignore;
1472	char **order;
1473
1474	if (!witness_watch)
1475		return (NULL);
1476	for (ignore = ignore_list; *ignore != NULL; ignore++)
1477		if (strcmp(description, *ignore) == 0)
1478			return (NULL);
1479
1480	if (w_inited == 0) {
1481		mtx_init(&w_mtx, "witness lock", MTX_SPIN);
1482		for (i = 0; i < WITNESS_COUNT; i++) {
1483			w = &w_data[i];
1484			witness_free(w);
1485		}
1486		w_inited = 1;
1487		for (order = order_list; *order != NULL; order++) {
1488			w = enroll(*order, MTX_DEF);
1489			w->w_file = "order list";
1490			for (order++; *order != NULL; order++) {
1491				w1 = enroll(*order, MTX_DEF);
1492				w1->w_file = "order list";
1493				itismychild(w, w1);
1494				w = w1;
1495    	    	    	}
1496		}
1497	}
1498	if ((flag & MTX_SPIN) && witness_skipspin)
1499		return (NULL);
1500	mtx_enter(&w_mtx, MTX_SPIN | MTX_QUIET);
1501	for (w = w_all; w; w = w->w_next) {
1502		if (strcmp(description, w->w_description) == 0) {
1503			mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1504			return (w);
1505		}
1506	}
1507	if ((w = witness_get()) == NULL)
1508		return (NULL);
1509	w->w_next = w_all;
1510	w_all = w;
1511	w->w_description = description;
1512	mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1513	if (flag & MTX_SPIN) {
1514		w->w_spin = 1;
1515
1516		i = 1;
1517		for (order = spin_order_list; *order != NULL; order++) {
1518			if (strcmp(description, *order) == 0)
1519				break;
1520			i <<= 1;
1521		}
1522		if (*order == NULL)
1523			panic("spin lock %s not in order list", description);
1524		w->w_level = i;
1525	}
1526
1527	return (w);
1528}
1529
1530static int
1531itismychild(struct witness *parent, struct witness *child)
1532{
1533	static int recursed;
1534
1535	/*
1536	 * Insert "child" after "parent"
1537	 */
1538	while (parent->w_morechildren)
1539		parent = parent->w_morechildren;
1540
1541	if (parent->w_childcnt == WITNESS_NCHILDREN) {
1542		if ((parent->w_morechildren = witness_get()) == NULL)
1543			return (1);
1544		parent = parent->w_morechildren;
1545	}
1546	MPASS(child != NULL);
1547	parent->w_children[parent->w_childcnt++] = child;
1548	/*
1549	 * now prune whole tree
1550	 */
1551	if (recursed)
1552		return (0);
1553	recursed = 1;
1554	for (child = w_all; child != NULL; child = child->w_next) {
1555		for (parent = w_all; parent != NULL;
1556		    parent = parent->w_next) {
1557			if (!isitmychild(parent, child))
1558				continue;
1559			removechild(parent, child);
1560			if (isitmydescendant(parent, child))
1561				continue;
1562			itismychild(parent, child);
1563		}
1564	}
1565	recursed = 0;
1566	witness_levelall();
1567	return (0);
1568}
1569
1570static void
1571removechild(struct witness *parent, struct witness *child)
1572{
1573	struct witness *w, *w1;
1574	int i;
1575
1576	for (w = parent; w != NULL; w = w->w_morechildren)
1577		for (i = 0; i < w->w_childcnt; i++)
1578			if (w->w_children[i] == child)
1579				goto found;
1580	return;
1581found:
1582	for (w1 = w; w1->w_morechildren != NULL; w1 = w1->w_morechildren)
1583		continue;
1584	w->w_children[i] = w1->w_children[--w1->w_childcnt];
1585	MPASS(w->w_children[i] != NULL);
1586
1587	if (w1->w_childcnt != 0)
1588		return;
1589
1590	if (w1 == parent)
1591		return;
1592	for (w = parent; w->w_morechildren != w1; w = w->w_morechildren)
1593		continue;
1594	w->w_morechildren = 0;
1595	witness_free(w1);
1596}
1597
1598static int
1599isitmychild(struct witness *parent, struct witness *child)
1600{
1601	struct witness *w;
1602	int i;
1603
1604	for (w = parent; w != NULL; w = w->w_morechildren) {
1605		for (i = 0; i < w->w_childcnt; i++) {
1606			if (w->w_children[i] == child)
1607				return (1);
1608		}
1609	}
1610	return (0);
1611}
1612
1613static int
1614isitmydescendant(struct witness *parent, struct witness *child)
1615{
1616	struct witness *w;
1617	int i;
1618	int j;
1619
1620	for (j = 0, w = parent; w != NULL; w = w->w_morechildren, j++) {
1621		MPASS(j < 1000);
1622		for (i = 0; i < w->w_childcnt; i++) {
1623			if (w->w_children[i] == child)
1624				return (1);
1625		}
1626		for (i = 0; i < w->w_childcnt; i++) {
1627			if (isitmydescendant(w->w_children[i], child))
1628				return (1);
1629		}
1630	}
1631	return (0);
1632}
1633
1634void
1635witness_levelall (void)
1636{
1637	struct witness *w, *w1;
1638
1639	for (w = w_all; w; w = w->w_next)
1640		if (!(w->w_spin))
1641			w->w_level = 0;
1642	for (w = w_all; w; w = w->w_next) {
1643		if (w->w_spin)
1644			continue;
1645		for (w1 = w_all; w1; w1 = w1->w_next) {
1646			if (isitmychild(w1, w))
1647				break;
1648		}
1649		if (w1 != NULL)
1650			continue;
1651		witness_leveldescendents(w, 0);
1652	}
1653}
1654
1655static void
1656witness_leveldescendents(struct witness *parent, int level)
1657{
1658	int i;
1659	struct witness *w;
1660
1661	if (parent->w_level < level)
1662		parent->w_level = level;
1663	level++;
1664	for (w = parent; w != NULL; w = w->w_morechildren)
1665		for (i = 0; i < w->w_childcnt; i++)
1666			witness_leveldescendents(w->w_children[i], level);
1667}
1668
1669static void
1670witness_displaydescendants(void(*prnt)(const char *fmt, ...),
1671			   struct witness *parent)
1672{
1673	struct witness *w;
1674	int i;
1675	int level = parent->w_level;
1676
1677	prnt("%d", level);
1678	if (level < 10)
1679		prnt(" ");
1680	for (i = 0; i < level; i++)
1681		prnt(" ");
1682	prnt("%s", parent->w_description);
1683	if (parent->w_file != NULL) {
1684		prnt(" -- last acquired @ %s", parent->w_file);
1685#ifndef W_USE_WHERE
1686		prnt(":%d", parent->w_line);
1687#endif
1688		prnt("\n");
1689	}
1690
1691	for (w = parent; w != NULL; w = w->w_morechildren)
1692		for (i = 0; i < w->w_childcnt; i++)
1693			    witness_displaydescendants(prnt, w->w_children[i]);
1694    }
1695
1696static int
1697dup_ok(struct witness *w)
1698{
1699	char **dup;
1700
1701	for (dup = dup_list; *dup!= NULL; dup++)
1702		if (strcmp(w->w_description, *dup) == 0)
1703			return (1);
1704	return (0);
1705}
1706
1707static int
1708blessed(struct witness *w1, struct witness *w2)
1709{
1710	int i;
1711	struct witness_blessed *b;
1712
1713	for (i = 0; i < blessed_count; i++) {
1714		b = &blessed_list[i];
1715		if (strcmp(w1->w_description, b->b_lock1) == 0) {
1716			if (strcmp(w2->w_description, b->b_lock2) == 0)
1717				return (1);
1718			continue;
1719		}
1720		if (strcmp(w1->w_description, b->b_lock2) == 0)
1721			if (strcmp(w2->w_description, b->b_lock1) == 0)
1722				return (1);
1723	}
1724	return (0);
1725}
1726
1727static struct witness *
1728witness_get()
1729{
1730	struct witness *w;
1731
1732	if ((w = w_free) == NULL) {
1733		witness_dead = 1;
1734		mtx_exit(&w_mtx, MTX_SPIN | MTX_QUIET);
1735		printf("witness exhausted\n");
1736		return (NULL);
1737	}
1738	w_free = w->w_next;
1739	bzero(w, sizeof(*w));
1740	return (w);
1741}
1742
1743static void
1744witness_free(struct witness *w)
1745{
1746	w->w_next = w_free;
1747	w_free = w;
1748}
1749
1750int
1751witness_list(struct proc *p)
1752{
1753	struct mtx *m;
1754	int nheld;
1755
1756	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1757	nheld = 0;
1758	for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
1759	    m = LIST_NEXT(m, mtx_held)) {
1760		printf("\t\"%s\" (%p) locked at %s:%d\n",
1761		    m->mtx_description, m,
1762		    m->mtx_witness->w_file, m->mtx_witness->w_line);
1763		nheld++;
1764	}
1765
1766	return (nheld);
1767}
1768
1769void
1770witness_save(struct mtx *m, const char **filep, int *linep)
1771{
1772
1773	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1774	if (m->mtx_witness == NULL)
1775		return;
1776
1777	*filep = m->mtx_witness->w_file;
1778	*linep = m->mtx_witness->w_line;
1779}
1780
1781void
1782witness_restore(struct mtx *m, const char *file, int line)
1783{
1784
1785	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1786	if (m->mtx_witness == NULL)
1787		return;
1788
1789	m->mtx_witness->w_file = file;
1790	m->mtx_witness->w_line = line;
1791}
1792
1793#endif	/* WITNESS */
1794