subr_witness.c revision 73912
150476Speter/*-
282486Sbde * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
3139761Skrion *
4139761Skrion * Redistribution and use in source and binary forms, with or without
521611Swosch * modification, are permitted provided that the following conditions
618052Sbde * are met:
794940Sru * 1. Redistributions of source code must retain the above copyright
894940Sru *    notice, this list of conditions and the following disclaimer.
994940Sru * 2. Redistributions in binary form must reproduce the above copyright
1094940Sru *    notice, this list of conditions and the following disclaimer in the
1136494Sbde *    documentation and/or other materials provided with the distribution.
1218052Sbde * 3. Berkeley Software Design Inc's name may not be used to endorse or
1336494Sbde *    promote products derived from this software without specific prior
14125762Skientzle *    written permission.
15125255Sbde *
16270905Sngie * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
17270905Sngie * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1839412Sphk * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19186647Srwatson * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
20168418Spjd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21135771Strhodes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22125256Sbde * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23117183Sru * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24155211Srwatson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25122404Sharti * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2679495Sobrien * SUCH DAMAGE.
27243348Sdim *
28227983Stheraven *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
29227983Stheraven *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
30243348Sdim * $FreeBSD: head/sys/kern/subr_witness.c 73912 2001-03-07 02:45:15Z jhb $
3136494Sbde */
3276515Sbde
3336494Sbde/*
3439259Sgibbs * Machine independent bits of mutex implementation and implementation of
3576515Sbde * `witness' structure & related debugging routines.
3636494Sbde */
3736494Sbde
38125255Sbde/*
39179184Sjb *	Main Entry: witness
4036494Sbde *	Pronunciation: 'wit-n&s
4176515Sbde *	Function: noun
4239259Sgibbs *	Etymology: Middle English witnesse, from Old English witnes knowledge,
4336494Sbde *	    testimony, witness, from 2wit
44135549Sdes *	Date: before 12th century
45179184Sjb *	1 : attestation of a fact or event : TESTIMONY
46179184Sjb *	2 : one that gives evidence; specifically : one who testifies in
4736494Sbde *	    a cause or before a judicial tribunal
48176439Sru *	3 : one asked to be present at a transaction so as to be able to
49255180Semaste *	    testify to its having taken place
5074535Sdes *	4 : one who has personal knowledge of something
5118052Sbde *	5 a : something serving as evidence or proof : SIGN
5276515Sbde *	  b : public affirmation by word or example of usually
5376515Sbde *	      religious faith or conviction <the heroic witness to divine
5436494Sbde *	      life -- Pilot>
55116318Speter *	6 capitalized : a member of the Jehovah's Witnesses
56112461Sru */
5736494Sbde
58125255Sbde#include "opt_ddb.h"
59178828Sdfr#include "opt_witness.h"
60125255Sbde
6147570Sache#include <sys/param.h>
62233294Sstas#include <sys/bus.h>
63255455Sdes#include <sys/kernel.h>
64255455Sdes#include <sys/malloc.h>
65178828Sdfr#include <sys/proc.h>
66233294Sstas#include <sys/sysctl.h>
67178828Sdfr#include <sys/systm.h>
6857538Sshin#include <sys/vmmeter.h>
69156905Sru#include <sys/ktr.h>
7036494Sbde
71156905Sru#include <machine/atomic.h>
72194869Sjamie#include <machine/bus.h>
73125255Sbde#include <machine/clock.h>
74125255Sbde#include <machine/cpu.h>
75125255Sbde
76233294Sstas#include <ddb/ddb.h>
77104465Sru
78120492Sfjoe#include <vm/vm.h>
79125255Sbde#include <vm/vm_extern.h>
8036494Sbde
8136494Sbde#include <sys/mutex.h>
82246827Sdes
83255386Sdes/*
84246827Sdes * The WITNESS-enabled mutex debug structure.
8565916Sache */
86207842Smm#ifdef WITNESS
8736494Sbdestruct mtx_debug {
88133362Sobrien	struct witness	*mtxd_witness;
8936494Sbde	LIST_ENTRY(mtx)	mtxd_held;
90148100Srwatson	const char	*mtxd_file;
9176515Sbde	int		mtxd_line;
92156813Sru};
9390796Sgshapiro
9490796Sgshapiro#define mtx_held	mtx_debug->mtxd_held
9536494Sbde#define	mtx_file	mtx_debug->mtxd_file
9636494Sbde#define	mtx_line	mtx_debug->mtxd_line
97167359Srafan#define	mtx_witness	mtx_debug->mtxd_witness
9852419Sjulian#endif	/* WITNESS */
99121615Sharti
100168407Spjd/*
10136494Sbde * Internal utility macros.
10242916Sjdp */
10342916Sjdp#define mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
104107256Sru
10559770Sbde#define mtx_owner(m)	(mtx_unowned((m)) ? NULL \
106125381Sru	: (struct proc *)((m)->mtx_lock & MTX_FLAGMASK))
107107256Sru
108156813Sru#define SET_PRIO(p, pri)	(p)->p_pri.pri_level = (pri)
109178828Sdfr
110125381Sru/*
111178828Sdfr * Early WITNESS-enabled declarations.
11276576Smarkm */
113125381Sru#ifdef WITNESS
114137675Sbz
115125381Sru/*
116137675Sbz * Internal WITNESS routines which must be prototyped early.
117156813Sru *
118125381Sru * XXX: When/if witness code is cleaned up, it would be wise to place all
119125381Sru *	witness prototyping early in this file.
12042916Sjdp */
121156813Srustatic void witness_init(struct mtx *, int flag);
122137675Sbzstatic void witness_destroy(struct mtx *);
123137675Sbzstatic void witness_display(void(*)(const char *fmt, ...));
12489705Sru
125137675SbzMALLOC_DEFINE(M_WITNESS, "witness", "witness mtx_debug structure");
12642916Sjdp
12776515Sbde/* All mutexes in system (used for debug/panic) */
12836494Sbdestatic struct mtx_debug all_mtx_debug = { NULL, {NULL, NULL}, NULL, 0 };
129145256Sjkoshy
130179184Sjb/*
131235641Smarcel * This global is set to 0 once it becomes safe to use the witness code.
132172401Sru */
13341231Sjdpstatic int witness_cold = 1;
13436494Sbde
135125255Sbde#else	/* WITNESS */
13636494Sbde
137204311Sru/* XXX XXX XXX
138204311Sru * flag++ is sleazoid way of shuting up warning
139210680Srpaulo */
140125256Sbde#define witness_init(m, flag) flag++
141121054Semax#define witness_destroy(m)
142252356Sdavide#define witness_try_enter(m, t, f, l)
143255386Sdes#endif	/* WITNESS */
144125255Sbde
145270760Sngie/*
146125537Sru * All mutex locks in system are kept on the all_mtx list.
14736494Sbde */
14841231Sjdpstatic struct mtx all_mtx = { MTX_UNOWNED, 0, 0, 0, "All mutexes queue head",
14936494Sbde	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
15018052Sbde	{ NULL, NULL }, &all_mtx, &all_mtx,
15176515Sbde#ifdef WITNESS
152263019Sbapt	&all_mtx_debug
153109725Sru#else
154101224Srwatson	NULL
155168407Spjd#endif
156255597Sdes	 };
157255627Sdes
158255597Sdes/*
159104465Sru * Global variables for book keeping.
160201920Santoine */
161200062Sedstatic int	mtx_cur_cnt;
16236494Sbdestatic int	mtx_max_cnt;
163168407Spjd
16476515Sbde/*
165245652Sneel * Couple of strings for KTR_LOCK tracing in order to avoid duplicates.
166233294Sstas */
16744757Smarkmchar	STR_mtx_lock_slp[] = "GOT (sleep) %s [%p] r=%d at %s:%d";
16836494Sbdechar	STR_mtx_unlock_slp[] = "REL (sleep) %s [%p] r=%d at %s:%d";
16936494Sbdechar	STR_mtx_lock_spn[] = "GOT (spin) %s [%p] r=%d at %s:%d";
170256998Sbdrewerychar	STR_mtx_unlock_spn[] = "REL (spin) %s [%p] r=%d at %s:%d";
17194578Sdes
17236494Sbde/*
173168407Spjd * Prototypes for non-exported routines.
174248571Smm *
175168407Spjd * NOTE: Prototypes for witness routines are placed at the bottom of the file.
176 */
177static void	propagate_priority(struct proc *);
178
179static void
180propagate_priority(struct proc *p)
181{
182	int pri = p->p_pri.pri_level;
183	struct mtx *m = p->p_blocked;
184
185	mtx_assert(&sched_lock, MA_OWNED);
186	for (;;) {
187		struct proc *p1;
188
189		p = mtx_owner(m);
190
191		if (p == NULL) {
192			/*
193			 * This really isn't quite right. Really
194			 * ought to bump priority of process that
195			 * next acquires the mutex.
196			 */
197			MPASS(m->mtx_lock == MTX_CONTESTED);
198			return;
199		}
200
201		MPASS(p->p_magic == P_MAGIC);
202		KASSERT(p->p_stat != SSLEEP, ("sleeping process owns a mutex"));
203		if (p->p_pri.pri_level <= pri)
204			return;
205
206		/*
207		 * Bump this process' priority.
208		 */
209		SET_PRIO(p, pri);
210
211		/*
212		 * If lock holder is actually running, just bump priority.
213		 */
214		if (p->p_oncpu != NOCPU) {
215			MPASS(p->p_stat == SRUN || p->p_stat == SZOMB);
216			return;
217		}
218
219#ifndef SMP
220		/*
221		 * For UP, we check to see if p is curproc (this shouldn't
222		 * ever happen however as it would mean we are in a deadlock.)
223		 */
224		KASSERT(p != curproc, ("Deadlock detected"));
225#endif
226
227		/*
228		 * If on run queue move to new run queue, and
229		 * quit.
230		 */
231		if (p->p_stat == SRUN) {
232			MPASS(p->p_blocked == NULL);
233			remrunqueue(p);
234			setrunqueue(p);
235			return;
236		}
237
238		/*
239		 * If we aren't blocked on a mutex, we should be.
240		 */
241		KASSERT(p->p_stat == SMTX, (
242		    "process %d(%s):%d holds %s but isn't blocked on a mutex\n",
243		    p->p_pid, p->p_comm, p->p_stat,
244		    m->mtx_description));
245
246		/*
247		 * Pick up the mutex that p is blocked on.
248		 */
249		m = p->p_blocked;
250		MPASS(m != NULL);
251
252		/*
253		 * Check if the proc needs to be moved up on
254		 * the blocked chain
255		 */
256		if (p == TAILQ_FIRST(&m->mtx_blocked)) {
257			continue;
258		}
259
260		p1 = TAILQ_PREV(p, procqueue, p_procq);
261		if (p1->p_pri.pri_level <= pri) {
262			continue;
263		}
264
265		/*
266		 * Remove proc from blocked chain and determine where
267		 * it should be moved up to.  Since we know that p1 has
268		 * a lower priority than p, we know that at least one
269		 * process in the chain has a lower priority and that
270		 * p1 will thus not be NULL after the loop.
271		 */
272		TAILQ_REMOVE(&m->mtx_blocked, p, p_procq);
273		TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) {
274			MPASS(p1->p_magic == P_MAGIC);
275			if (p1->p_pri.pri_level > pri)
276				break;
277		}
278
279		MPASS(p1 != NULL);
280		TAILQ_INSERT_BEFORE(p1, p, p_procq);
281		CTR4(KTR_LOCK,
282		    "propagate_priority: p %p moved before %p on [%p] %s",
283		    p, p1, m, m->mtx_description);
284	}
285}
286
287/*
288 * The important part of mtx_trylock{,_flags}()
289 * Tries to acquire lock `m.' We do NOT handle recursion here; we assume that
290 * if we're called, it's because we know we don't already own this lock.
291 */
292int
293_mtx_trylock(struct mtx *m, int opts, const char *file, int line)
294{
295	int rval;
296
297	MPASS(curproc != NULL);
298
299	/*
300	 * _mtx_trylock does not accept MTX_NOSWITCH option.
301	 */
302	KASSERT((opts & MTX_NOSWITCH) == 0,
303	    ("mtx_trylock() called with invalid option flag(s) %d", opts));
304
305	rval = _obtain_lock(m, curproc);
306
307#ifdef WITNESS
308	if (rval && m->mtx_witness != NULL) {
309		/*
310		 * We do not handle recursion in _mtx_trylock; see the
311		 * note at the top of the routine.
312		 */
313		KASSERT(!mtx_recursed(m),
314		    ("mtx_trylock() called on a recursed mutex"));
315		witness_try_enter(m, (opts | m->mtx_flags), file, line);
316	}
317#endif	/* WITNESS */
318
319	if ((opts & MTX_QUIET) == 0)
320		CTR5(KTR_LOCK, "TRY_LOCK %s [%p] result=%d at %s:%d",
321		    m->mtx_description, m, rval, file, line);
322
323	return rval;
324}
325
326/*
327 * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
328 *
329 * We call this if the lock is either contested (i.e. we need to go to
330 * sleep waiting for it), or if we need to recurse on it.
331 */
332void
333_mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line)
334{
335	struct proc *p = curproc;
336
337	if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)p) {
338		m->mtx_recurse++;
339		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
340		if ((opts & MTX_QUIET) == 0)
341			CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
342		return;
343	}
344
345	if ((opts & MTX_QUIET) == 0)
346		CTR4(KTR_LOCK,
347		    "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d",
348		    m->mtx_description, (void *)m->mtx_lock, file, line);
349
350	while (!_obtain_lock(m, p)) {
351		uintptr_t v;
352		struct proc *p1;
353
354		mtx_lock_spin(&sched_lock);
355		/*
356		 * Check if the lock has been released while spinning for
357		 * the sched_lock.
358		 */
359		if ((v = m->mtx_lock) == MTX_UNOWNED) {
360			mtx_unlock_spin(&sched_lock);
361			continue;
362		}
363
364		/*
365		 * The mutex was marked contested on release. This means that
366		 * there are processes blocked on it.
367		 */
368		if (v == MTX_CONTESTED) {
369			p1 = TAILQ_FIRST(&m->mtx_blocked);
370			MPASS(p1 != NULL);
371			m->mtx_lock = (uintptr_t)p | MTX_CONTESTED;
372
373			if (p1->p_pri.pri_level < p->p_pri.pri_level)
374				SET_PRIO(p, p1->p_pri.pri_level);
375			mtx_unlock_spin(&sched_lock);
376			return;
377		}
378
379		/*
380		 * If the mutex isn't already contested and a failure occurs
381		 * setting the contested bit, the mutex was either released
382		 * or the state of the MTX_RECURSED bit changed.
383		 */
384		if ((v & MTX_CONTESTED) == 0 &&
385		    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
386			(void *)(v | MTX_CONTESTED))) {
387			mtx_unlock_spin(&sched_lock);
388			continue;
389		}
390
391		/*
392		 * We deffinately must sleep for this lock.
393		 */
394		mtx_assert(m, MA_NOTOWNED);
395
396#ifdef notyet
397		/*
398		 * If we're borrowing an interrupted thread's VM context, we
399		 * must clean up before going to sleep.
400		 */
401		if (p->p_ithd != NULL) {
402			struct ithd *it = p->p_ithd;
403
404			if (it->it_interrupted) {
405				if ((opts & MTX_QUIET) == 0)
406					CTR2(KTR_LOCK,
407				    "_mtx_lock_sleep: %p interrupted %p",
408					    it, it->it_interrupted);
409				intr_thd_fixup(it);
410			}
411		}
412#endif
413
414		/*
415		 * Put us on the list of threads blocked on this mutex.
416		 */
417		if (TAILQ_EMPTY(&m->mtx_blocked)) {
418			p1 = (struct proc *)(m->mtx_lock & MTX_FLAGMASK);
419			LIST_INSERT_HEAD(&p1->p_contested, m, mtx_contested);
420			TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
421		} else {
422			TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq)
423				if (p1->p_pri.pri_level > p->p_pri.pri_level)
424					break;
425			if (p1)
426				TAILQ_INSERT_BEFORE(p1, p, p_procq);
427			else
428				TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
429		}
430
431		/*
432		 * Save who we're blocked on.
433		 */
434		p->p_blocked = m;
435		p->p_mtxname = m->mtx_description;
436		p->p_stat = SMTX;
437		propagate_priority(p);
438
439		if ((opts & MTX_QUIET) == 0)
440			CTR3(KTR_LOCK,
441			    "_mtx_lock_sleep: p %p blocked on [%p] %s", p, m,
442			    m->mtx_description);
443
444		mi_switch();
445
446		if ((opts & MTX_QUIET) == 0)
447			CTR3(KTR_LOCK,
448			  "_mtx_lock_sleep: p %p free from blocked on [%p] %s",
449			  p, m, m->mtx_description);
450
451		mtx_unlock_spin(&sched_lock);
452	}
453
454	return;
455}
456
457/*
458 * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock.
459 *
460 * This is only called if we need to actually spin for the lock. Recursion
461 * is handled inline.
462 */
463void
464_mtx_lock_spin(struct mtx *m, int opts, u_int mtx_intr, const char *file,
465	       int line)
466{
467	int i = 0;
468
469	if ((opts & MTX_QUIET) == 0)
470		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
471
472	for (;;) {
473		if (_obtain_lock(m, curproc))
474			break;
475
476		while (m->mtx_lock != MTX_UNOWNED) {
477			if (i++ < 1000000)
478				continue;
479			if (i++ < 6000000)
480				DELAY(1);
481#ifdef DDB
482			else if (!db_active)
483#else
484			else
485#endif
486			panic("spin lock %s held by %p for > 5 seconds",
487			    m->mtx_description, (void *)m->mtx_lock);
488		}
489	}
490
491	m->mtx_saveintr = mtx_intr;
492	if ((opts & MTX_QUIET) == 0)
493		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
494
495	return;
496}
497
498/*
499 * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
500 *
501 * We are only called here if the lock is recursed or contested (i.e. we
502 * need to wake up a blocked thread).
503 */
504void
505_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
506{
507	struct proc *p, *p1;
508	struct mtx *m1;
509	int pri;
510
511	p = curproc;
512
513	if (mtx_recursed(m)) {
514		if (--(m->mtx_recurse) == 0)
515			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
516		if ((opts & MTX_QUIET) == 0)
517			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m);
518		return;
519	}
520
521	mtx_lock_spin(&sched_lock);
522	if ((opts & MTX_QUIET) == 0)
523		CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
524
525	p1 = TAILQ_FIRST(&m->mtx_blocked);
526	MPASS(p->p_magic == P_MAGIC);
527	MPASS(p1->p_magic == P_MAGIC);
528
529	TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq);
530
531	if (TAILQ_EMPTY(&m->mtx_blocked)) {
532		LIST_REMOVE(m, mtx_contested);
533		_release_lock_quick(m);
534		if ((opts & MTX_QUIET) == 0)
535			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p not held", m);
536	} else
537		atomic_store_rel_ptr(&m->mtx_lock, (void *)MTX_CONTESTED);
538
539	pri = PRI_MAX;
540	LIST_FOREACH(m1, &p->p_contested, mtx_contested) {
541		int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_pri.pri_level;
542		if (cp < pri)
543			pri = cp;
544	}
545
546	if (pri > p->p_pri.pri_native)
547		pri = p->p_pri.pri_native;
548	SET_PRIO(p, pri);
549
550	if ((opts & MTX_QUIET) == 0)
551		CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p contested setrunqueue %p",
552		    m, p1);
553
554	p1->p_blocked = NULL;
555	p1->p_stat = SRUN;
556	setrunqueue(p1);
557
558	if ((opts & MTX_NOSWITCH) == 0 && p1->p_pri.pri_level < pri) {
559#ifdef notyet
560		if (p->p_ithd != NULL) {
561			struct ithd *it = p->p_ithd;
562
563			if (it->it_interrupted) {
564				if ((opts & MTX_QUIET) == 0)
565					CTR2(KTR_LOCK,
566				    "_mtx_unlock_sleep: %p interrupted %p",
567					    it, it->it_interrupted);
568				intr_thd_fixup(it);
569			}
570		}
571#endif
572		setrunqueue(p);
573		if ((opts & MTX_QUIET) == 0)
574			CTR2(KTR_LOCK,
575			    "_mtx_unlock_sleep: %p switching out lock=%p", m,
576			    (void *)m->mtx_lock);
577
578		mi_switch();
579		if ((opts & MTX_QUIET) == 0)
580			CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p",
581			    m, (void *)m->mtx_lock);
582	}
583
584	mtx_unlock_spin(&sched_lock);
585
586	return;
587}
588
589/*
590 * All the unlocking of MTX_SPIN locks is done inline.
591 * See the _rel_spin_lock() macro for the details.
592 */
593
594/*
595 * The backing function for the INVARIANTS-enabled mtx_assert()
596 */
597#ifdef INVARIANT_SUPPORT
598void
599_mtx_assert(struct mtx *m, int what, const char *file, int line)
600{
601	switch (what) {
602	case MA_OWNED:
603	case MA_OWNED | MA_RECURSED:
604	case MA_OWNED | MA_NOTRECURSED:
605		if (!mtx_owned(m))
606			panic("mutex %s not owned at %s:%d",
607			    m->mtx_description, file, line);
608		if (mtx_recursed(m)) {
609			if ((what & MA_NOTRECURSED) != 0)
610				panic("mutex %s recursed at %s:%d",
611				    m->mtx_description, file, line);
612		} else if ((what & MA_RECURSED) != 0) {
613			panic("mutex %s unrecursed at %s:%d",
614			    m->mtx_description, file, line);
615		}
616		break;
617	case MA_NOTOWNED:
618		if (mtx_owned(m))
619			panic("mutex %s owned at %s:%d",
620			    m->mtx_description, file, line);
621		break;
622	default:
623		panic("unknown mtx_assert at %s:%d", file, line);
624	}
625}
626#endif
627
628/*
629 * The MUTEX_DEBUG-enabled mtx_validate()
630 */
631#define MV_DESTROY	0	/* validate before destory */
632#define MV_INIT		1	/* validate before init */
633
634#ifdef MUTEX_DEBUG
635
636int mtx_validate __P((struct mtx *, int));
637
638int
639mtx_validate(struct mtx *m, int when)
640{
641	struct mtx *mp;
642	int i;
643	int retval = 0;
644
645#ifdef WITNESS
646	if (witness_cold)
647		return 0;
648#endif
649	if (m == &all_mtx || cold)
650		return 0;
651
652	mtx_lock(&all_mtx);
653/*
654 * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
655 * we can re-enable the kernacc() checks.
656 */
657#ifndef __alpha__
658	MPASS(kernacc((caddr_t)all_mtx.mtx_next, sizeof(uintptr_t),
659	    VM_PROT_READ) == 1);
660#endif
661	MPASS(all_mtx.mtx_next->mtx_prev == &all_mtx);
662	for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
663#ifndef __alpha__
664		if (kernacc((caddr_t)mp->mtx_next, sizeof(uintptr_t),
665		    VM_PROT_READ) != 1) {
666			panic("mtx_validate: mp=%p mp->mtx_next=%p",
667			    mp, mp->mtx_next);
668		}
669#endif
670		i++;
671		if (i > mtx_cur_cnt) {
672			panic("mtx_validate: too many in chain, known=%d\n",
673			    mtx_cur_cnt);
674		}
675	}
676	MPASS(i == mtx_cur_cnt);
677	switch (when) {
678	case MV_DESTROY:
679		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
680			if (mp == m)
681				break;
682		MPASS(mp == m);
683		break;
684	case MV_INIT:
685		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
686		if (mp == m) {
687			/*
688			 * Not good. This mutex already exists.
689			 */
690			printf("re-initing existing mutex %s\n",
691			    m->mtx_description);
692			MPASS(m->mtx_lock == MTX_UNOWNED);
693			retval = 1;
694		}
695	}
696	mtx_unlock(&all_mtx);
697	return (retval);
698}
699#endif
700
701/*
702 * Mutex initialization routine; initialize lock `m' of type contained in
703 * `opts' with options contained in `opts' and description `description.'
704 * Place on "all_mtx" queue.
705 */
706void
707mtx_init(struct mtx *m, const char *description, int opts)
708{
709
710	if ((opts & MTX_QUIET) == 0)
711		CTR2(KTR_LOCK, "mtx_init %p (%s)", m, description);
712
713#ifdef MUTEX_DEBUG
714	/* Diagnostic and error correction */
715	if (mtx_validate(m, MV_INIT))
716		return;
717#endif
718
719	bzero((void *)m, sizeof *m);
720	TAILQ_INIT(&m->mtx_blocked);
721
722#ifdef WITNESS
723	if (!witness_cold) {
724		m->mtx_debug = malloc(sizeof(struct mtx_debug),
725		    M_WITNESS, M_NOWAIT | M_ZERO);
726		MPASS(m->mtx_debug != NULL);
727	}
728#endif
729
730	m->mtx_description = description;
731	m->mtx_flags = opts;
732	m->mtx_lock = MTX_UNOWNED;
733
734	/* Put on all mutex queue */
735	mtx_lock(&all_mtx);
736	m->mtx_next = &all_mtx;
737	m->mtx_prev = all_mtx.mtx_prev;
738	m->mtx_prev->mtx_next = m;
739	all_mtx.mtx_prev = m;
740	if (++mtx_cur_cnt > mtx_max_cnt)
741		mtx_max_cnt = mtx_cur_cnt;
742	mtx_unlock(&all_mtx);
743
744#ifdef WITNESS
745	if (!witness_cold)
746		witness_init(m, opts);
747#endif
748}
749
750/*
751 * Remove lock `m' from all_mtx queue.
752 */
753void
754mtx_destroy(struct mtx *m)
755{
756
757#ifdef WITNESS
758	KASSERT(!witness_cold, ("%s: Cannot destroy while still cold\n",
759	    __FUNCTION__));
760#endif
761
762	CTR2(KTR_LOCK, "mtx_destroy %p (%s)", m, m->mtx_description);
763
764#ifdef MUTEX_DEBUG
765	if (m->mtx_next == NULL)
766		panic("mtx_destroy: %p (%s) already destroyed",
767		    m, m->mtx_description);
768
769	if (!mtx_owned(m)) {
770		MPASS(m->mtx_lock == MTX_UNOWNED);
771	} else {
772		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
773	}
774
775	/* diagnostic */
776	mtx_validate(m, MV_DESTROY);
777#endif
778
779#ifdef WITNESS
780	if (m->mtx_witness)
781		witness_destroy(m);
782#endif /* WITNESS */
783
784	/* Remove from the all mutex queue */
785	mtx_lock(&all_mtx);
786	m->mtx_next->mtx_prev = m->mtx_prev;
787	m->mtx_prev->mtx_next = m->mtx_next;
788
789#ifdef MUTEX_DEBUG
790	m->mtx_next = m->mtx_prev = NULL;
791#endif
792
793#ifdef WITNESS
794	free(m->mtx_debug, M_WITNESS);
795	m->mtx_debug = NULL;
796#endif
797
798	mtx_cur_cnt--;
799	mtx_unlock(&all_mtx);
800}
801
802
803/*
804 * The WITNESS-enabled diagnostic code.
805 */
806#ifdef WITNESS
807static void
808witness_fixup(void *dummy __unused)
809{
810	struct mtx *mp;
811
812	/*
813	 * We have to release Giant before initializing its witness
814	 * structure so that WITNESS doesn't get confused.
815	 */
816	mtx_unlock(&Giant);
817	mtx_assert(&Giant, MA_NOTOWNED);
818
819	mtx_lock(&all_mtx);
820
821	/* Iterate through all mutexes and finish up mutex initialization. */
822	for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
823
824		mp->mtx_debug = malloc(sizeof(struct mtx_debug),
825		    M_WITNESS, M_NOWAIT | M_ZERO);
826		MPASS(mp->mtx_debug != NULL);
827
828		witness_init(mp, mp->mtx_flags);
829	}
830	mtx_unlock(&all_mtx);
831
832	/* Mark the witness code as being ready for use. */
833	atomic_store_rel_int(&witness_cold, 0);
834
835	mtx_lock(&Giant);
836}
837SYSINIT(wtnsfxup, SI_SUB_MUTEX, SI_ORDER_FIRST, witness_fixup, NULL)
838
839#define WITNESS_COUNT 200
840#define	WITNESS_NCHILDREN 2
841
842int witness_watch = 1;
843
844struct witness {
845	struct witness	*w_next;
846	const char	*w_description;
847	const char	*w_file;
848	int		 w_line;
849	struct witness	*w_morechildren;
850	u_char		 w_childcnt;
851	u_char		 w_Giant_squawked:1;
852	u_char		 w_other_squawked:1;
853	u_char		 w_same_squawked:1;
854	u_char		 w_spin:1;	/* MTX_SPIN type mutex. */
855	u_int		 w_level;
856	struct witness	*w_children[WITNESS_NCHILDREN];
857};
858
859struct witness_blessed {
860	char 	*b_lock1;
861	char	*b_lock2;
862};
863
864#ifdef DDB
865/*
866 * When DDB is enabled and witness_ddb is set to 1, it will cause the system to
867 * drop into kdebug() when:
868 *	- a lock heirarchy violation occurs
869 *	- locks are held when going to sleep.
870 */
871int	witness_ddb;
872#ifdef WITNESS_DDB
873TUNABLE_INT_DECL("debug.witness_ddb", 1, witness_ddb);
874#else
875TUNABLE_INT_DECL("debug.witness_ddb", 0, witness_ddb);
876#endif
877SYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, "");
878#endif /* DDB */
879
880int	witness_skipspin;
881#ifdef WITNESS_SKIPSPIN
882TUNABLE_INT_DECL("debug.witness_skipspin", 1, witness_skipspin);
883#else
884TUNABLE_INT_DECL("debug.witness_skipspin", 0, witness_skipspin);
885#endif
886SYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RD, &witness_skipspin, 0,
887    "");
888
889/*
890 * Witness-enabled globals
891 */
892static struct mtx	w_mtx;
893static struct witness	*w_free;
894static struct witness	*w_all;
895static int		 w_inited;
896static int		 witness_dead;	/* fatal error, probably no memory */
897
898static struct witness	 w_data[WITNESS_COUNT];
899
900/*
901 * Internal witness routine prototypes
902 */
903static struct witness *enroll(const char *description, int flag);
904static int itismychild(struct witness *parent, struct witness *child);
905static void removechild(struct witness *parent, struct witness *child);
906static int isitmychild(struct witness *parent, struct witness *child);
907static int isitmydescendant(struct witness *parent, struct witness *child);
908static int dup_ok(struct witness *);
909static int blessed(struct witness *, struct witness *);
910static void
911    witness_displaydescendants(void(*)(const char *fmt, ...), struct witness *);
912static void witness_leveldescendents(struct witness *parent, int level);
913static void witness_levelall(void);
914static struct witness * witness_get(void);
915static void witness_free(struct witness *m);
916
917static char *ignore_list[] = {
918	"witness lock",
919	NULL
920};
921
922static char *spin_order_list[] = {
923#if defined(__i386__) && defined (SMP)
924	"com",
925#endif
926	"sio",
927#ifdef __i386__
928	"cy",
929#endif
930	"ng_node",
931	"ng_worklist",
932	"ithread table lock",
933	"ithread list lock",
934	"sched lock",
935#ifdef __i386__
936	"clk",
937#endif
938	"callout",
939	/*
940	 * leaf locks
941	 */
942#ifdef SMP
943#ifdef __i386__
944	"ap boot",
945	"imen",
946#endif
947	"smp rendezvous",
948#endif
949	NULL
950};
951
952static char *order_list[] = {
953	"Giant", "proctree", "allproc", "process lock", "uidinfo hash",
954	    "uidinfo struct", NULL,
955	NULL
956};
957
958static char *dup_list[] = {
959	"process lock",
960	NULL
961};
962
963static char *sleep_list[] = {
964	"Giant",
965	NULL
966};
967
968/*
969 * Pairs of locks which have been blessed
970 * Don't complain about order problems with blessed locks
971 */
972static struct witness_blessed blessed_list[] = {
973};
974static int blessed_count =
975	sizeof(blessed_list) / sizeof(struct witness_blessed);
976
977static void
978witness_init(struct mtx *m, int flag)
979{
980	m->mtx_witness = enroll(m->mtx_description, flag);
981}
982
983static void
984witness_destroy(struct mtx *m)
985{
986	struct mtx *m1;
987	struct proc *p;
988	p = curproc;
989	LIST_FOREACH(m1, &p->p_heldmtx, mtx_held) {
990		if (m1 == m) {
991			LIST_REMOVE(m, mtx_held);
992			break;
993		}
994	}
995	return;
996
997}
998
999static void
1000witness_display(void(*prnt)(const char *fmt, ...))
1001{
1002	struct witness *w, *w1;
1003	int level, found;
1004
1005	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1006	witness_levelall();
1007
1008	/*
1009	 * First, handle sleep mutexes which have been acquired at least
1010	 * once.
1011	 */
1012	prnt("Sleep mutexes:\n");
1013	for (w = w_all; w; w = w->w_next) {
1014		if (w->w_file == NULL || w->w_spin)
1015			continue;
1016		for (w1 = w_all; w1; w1 = w1->w_next) {
1017			if (isitmychild(w1, w))
1018				break;
1019		}
1020		if (w1 != NULL)
1021			continue;
1022		/*
1023		 * This lock has no anscestors, display its descendants.
1024		 */
1025		witness_displaydescendants(prnt, w);
1026	}
1027
1028	/*
1029	 * Now do spin mutexes which have been acquired at least once.
1030	 */
1031	prnt("\nSpin mutexes:\n");
1032	level = 0;
1033	while (level < sizeof(spin_order_list) / sizeof(char *)) {
1034		found = 0;
1035		for (w = w_all; w; w = w->w_next) {
1036			if (w->w_file == NULL || !w->w_spin)
1037				continue;
1038			if (w->w_level == 1 << level) {
1039				witness_displaydescendants(prnt, w);
1040				level++;
1041				found = 1;
1042			}
1043		}
1044		if (found == 0)
1045			level++;
1046	}
1047
1048	/*
1049	 * Finally, any mutexes which have not been acquired yet.
1050	 */
1051	prnt("\nMutexes which were never acquired:\n");
1052	for (w = w_all; w; w = w->w_next) {
1053		if (w->w_file != NULL)
1054			continue;
1055		prnt("%s\n", w->w_description);
1056	}
1057}
1058
1059void
1060witness_enter(struct mtx *m, int flags, const char *file, int line)
1061{
1062	struct witness *w, *w1;
1063	struct mtx *m1;
1064	struct proc *p;
1065	int i;
1066#ifdef DDB
1067	int go_into_ddb = 0;
1068#endif /* DDB */
1069
1070	if (witness_cold || m->mtx_witness == NULL || panicstr)
1071		return;
1072	w = m->mtx_witness;
1073	p = curproc;
1074
1075	if (flags & MTX_SPIN) {
1076		if ((m->mtx_flags & MTX_SPIN) == 0)
1077			panic("mutex_enter: MTX_SPIN on MTX_DEF mutex %s @"
1078			    " %s:%d", m->mtx_description, file, line);
1079		if (mtx_recursed(m)) {
1080			if ((m->mtx_flags & MTX_RECURSE) == 0)
1081				panic("mutex_enter: recursion on non-recursive"
1082				    " mutex %s @ %s:%d", m->mtx_description,
1083				    file, line);
1084			return;
1085		}
1086		mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1087		i = PCPU_GET(witness_spin_check);
1088		if (i != 0 && w->w_level < i) {
1089			mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1090			panic("mutex_enter(%s:%x, MTX_SPIN) out of order @"
1091			    " %s:%d already holding %s:%x",
1092			    m->mtx_description, w->w_level, file, line,
1093			    spin_order_list[ffs(i)-1], i);
1094		}
1095		PCPU_SET(witness_spin_check, i | w->w_level);
1096		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1097		w->w_file = file;
1098		w->w_line = line;
1099		m->mtx_line = line;
1100		m->mtx_file = file;
1101		return;
1102	}
1103	if ((m->mtx_flags & MTX_SPIN) != 0)
1104		panic("mutex_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1105		    m->mtx_description, file, line);
1106
1107	if (mtx_recursed(m)) {
1108		if ((m->mtx_flags & MTX_RECURSE) == 0)
1109			panic("mutex_enter: recursion on non-recursive"
1110			    " mutex %s @ %s:%d", m->mtx_description,
1111			    file, line);
1112		return;
1113	}
1114	if (witness_dead)
1115		goto out;
1116	if (cold)
1117		goto out;
1118
1119	if (!mtx_legal2block())
1120		panic("blockable mtx_lock() of %s when not legal @ %s:%d",
1121			    m->mtx_description, file, line);
1122	/*
1123	 * Is this the first mutex acquired
1124	 */
1125	if ((m1 = LIST_FIRST(&p->p_heldmtx)) == NULL)
1126		goto out;
1127
1128	if ((w1 = m1->mtx_witness) == w) {
1129		if (w->w_same_squawked || dup_ok(w))
1130			goto out;
1131		w->w_same_squawked = 1;
1132		printf("acquring duplicate lock of same type: \"%s\"\n",
1133			m->mtx_description);
1134		printf(" 1st @ %s:%d\n", w->w_file, w->w_line);
1135		printf(" 2nd @ %s:%d\n", file, line);
1136#ifdef DDB
1137		go_into_ddb = 1;
1138#endif /* DDB */
1139		goto out;
1140	}
1141	MPASS(!mtx_owned(&w_mtx));
1142	mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1143	/*
1144	 * If we have a known higher number just say ok
1145	 */
1146	if (witness_watch > 1 && w->w_level > w1->w_level) {
1147		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1148		goto out;
1149	}
1150	if (isitmydescendant(m1->mtx_witness, w)) {
1151		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1152		goto out;
1153	}
1154	for (i = 0; m1 != NULL; m1 = LIST_NEXT(m1, mtx_held), i++) {
1155
1156		MPASS(i < 200);
1157		w1 = m1->mtx_witness;
1158		if (isitmydescendant(w, w1)) {
1159			mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1160			if (blessed(w, w1))
1161				goto out;
1162			if (m1 == &Giant) {
1163				if (w1->w_Giant_squawked)
1164					goto out;
1165				else
1166					w1->w_Giant_squawked = 1;
1167			} else {
1168				if (w1->w_other_squawked)
1169					goto out;
1170				else
1171					w1->w_other_squawked = 1;
1172			}
1173			printf("lock order reversal\n");
1174			printf(" 1st %s last acquired @ %s:%d\n",
1175			    w->w_description, w->w_file, w->w_line);
1176			printf(" 2nd %p %s @ %s:%d\n",
1177			    m1, w1->w_description, w1->w_file, w1->w_line);
1178			printf(" 3rd %p %s @ %s:%d\n",
1179			    m, w->w_description, file, line);
1180#ifdef DDB
1181			go_into_ddb = 1;
1182#endif /* DDB */
1183			goto out;
1184		}
1185	}
1186	m1 = LIST_FIRST(&p->p_heldmtx);
1187	if (!itismychild(m1->mtx_witness, w))
1188		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1189
1190out:
1191#ifdef DDB
1192	if (witness_ddb && go_into_ddb)
1193		Debugger("witness_enter");
1194#endif /* DDB */
1195	w->w_file = file;
1196	w->w_line = line;
1197	m->mtx_line = line;
1198	m->mtx_file = file;
1199
1200	/*
1201	 * If this pays off it likely means that a mutex being witnessed
1202	 * is acquired in hardclock. Put it in the ignore list. It is
1203	 * likely not the mutex this assert fails on.
1204	 */
1205	MPASS(m->mtx_held.le_prev == NULL);
1206	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
1207}
1208
1209void
1210witness_try_enter(struct mtx *m, int flags, const char *file, int line)
1211{
1212	struct proc *p;
1213	struct witness *w = m->mtx_witness;
1214
1215	if (witness_cold)
1216		return;
1217	if (panicstr)
1218		return;
1219	if (flags & MTX_SPIN) {
1220		if ((m->mtx_flags & MTX_SPIN) == 0)
1221			panic("mutex_try_enter: "
1222			    "MTX_SPIN on MTX_DEF mutex %s @ %s:%d",
1223			    m->mtx_description, file, line);
1224		if (mtx_recursed(m)) {
1225			if ((m->mtx_flags & MTX_RECURSE) == 0)
1226				panic("mutex_try_enter: recursion on"
1227				    " non-recursive mutex %s @ %s:%d",
1228				    m->mtx_description, file, line);
1229			return;
1230		}
1231		mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1232		PCPU_SET(witness_spin_check,
1233		    PCPU_GET(witness_spin_check) | w->w_level);
1234		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1235		w->w_file = file;
1236		w->w_line = line;
1237		m->mtx_line = line;
1238		m->mtx_file = file;
1239		return;
1240	}
1241
1242	if ((m->mtx_flags & MTX_SPIN) != 0)
1243		panic("mutex_try_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1244		    m->mtx_description, file, line);
1245
1246	if (mtx_recursed(m)) {
1247		if ((m->mtx_flags & MTX_RECURSE) == 0)
1248			panic("mutex_try_enter: recursion on non-recursive"
1249			    " mutex %s @ %s:%d", m->mtx_description, file,
1250			    line);
1251		return;
1252	}
1253	w->w_file = file;
1254	w->w_line = line;
1255	m->mtx_line = line;
1256	m->mtx_file = file;
1257	p = curproc;
1258	MPASS(m->mtx_held.le_prev == NULL);
1259	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
1260}
1261
1262void
1263witness_exit(struct mtx *m, int flags, const char *file, int line)
1264{
1265	struct witness *w;
1266
1267	if (witness_cold || m->mtx_witness == NULL || panicstr)
1268		return;
1269	w = m->mtx_witness;
1270
1271	if (flags & MTX_SPIN) {
1272		if ((m->mtx_flags & MTX_SPIN) == 0)
1273			panic("mutex_exit: MTX_SPIN on MTX_DEF mutex %s @"
1274			    " %s:%d", m->mtx_description, file, line);
1275		if (mtx_recursed(m)) {
1276			if ((m->mtx_flags & MTX_RECURSE) == 0)
1277				panic("mutex_exit: recursion on non-recursive"
1278				    " mutex %s @ %s:%d", m->mtx_description,
1279				    file, line);
1280			return;
1281		}
1282		mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1283		PCPU_SET(witness_spin_check,
1284		    PCPU_GET(witness_spin_check) & ~w->w_level);
1285		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1286		return;
1287	}
1288	if ((m->mtx_flags & MTX_SPIN) != 0)
1289		panic("mutex_exit: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1290		    m->mtx_description, file, line);
1291
1292	if (mtx_recursed(m)) {
1293		if ((m->mtx_flags & MTX_RECURSE) == 0)
1294			panic("mutex_exit: recursion on non-recursive"
1295			    " mutex %s @ %s:%d", m->mtx_description,
1296			    file, line);
1297		return;
1298	}
1299
1300	if ((flags & MTX_NOSWITCH) == 0 && !mtx_legal2block() && !cold)
1301		panic("switchable mtx_unlock() of %s when not legal @ %s:%d",
1302			    m->mtx_description, file, line);
1303	LIST_REMOVE(m, mtx_held);
1304	m->mtx_held.le_prev = NULL;
1305}
1306
1307int
1308witness_sleep(int check_only, struct mtx *mtx, const char *file, int line)
1309{
1310	struct mtx *m;
1311	struct proc *p;
1312	char **sleep;
1313	int n = 0;
1314
1315	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1316	p = curproc;
1317	LIST_FOREACH(m, &p->p_heldmtx, mtx_held) {
1318		if (m == mtx)
1319			continue;
1320		for (sleep = sleep_list; *sleep!= NULL; sleep++)
1321			if (strcmp(m->mtx_description, *sleep) == 0)
1322				goto next;
1323		if (n == 0)
1324			printf("Whee!\n");
1325		printf("%s:%d: %s with \"%s\" locked from %s:%d\n",
1326			file, line, check_only ? "could sleep" : "sleeping",
1327			m->mtx_description,
1328			m->mtx_witness->w_file, m->mtx_witness->w_line);
1329		n++;
1330	next:
1331	}
1332#ifdef DDB
1333	if (witness_ddb && n)
1334		Debugger("witness_sleep");
1335#endif /* DDB */
1336	return (n);
1337}
1338
1339static struct witness *
1340enroll(const char *description, int flag)
1341{
1342	int i;
1343	struct witness *w, *w1;
1344	char **ignore;
1345	char **order;
1346
1347	if (!witness_watch)
1348		return (NULL);
1349	for (ignore = ignore_list; *ignore != NULL; ignore++)
1350		if (strcmp(description, *ignore) == 0)
1351			return (NULL);
1352
1353	if (w_inited == 0) {
1354		mtx_init(&w_mtx, "witness lock", MTX_SPIN);
1355		for (i = 0; i < WITNESS_COUNT; i++) {
1356			w = &w_data[i];
1357			witness_free(w);
1358		}
1359		w_inited = 1;
1360		for (order = order_list; *order != NULL; order++) {
1361			w = enroll(*order, MTX_DEF);
1362			w->w_file = "order list";
1363			for (order++; *order != NULL; order++) {
1364				w1 = enroll(*order, MTX_DEF);
1365				w1->w_file = "order list";
1366				itismychild(w, w1);
1367				w = w1;
1368    	    	    	}
1369		}
1370	}
1371	if ((flag & MTX_SPIN) && witness_skipspin)
1372		return (NULL);
1373	mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1374	for (w = w_all; w; w = w->w_next) {
1375		if (strcmp(description, w->w_description) == 0) {
1376			mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1377			return (w);
1378		}
1379	}
1380	if ((w = witness_get()) == NULL)
1381		return (NULL);
1382	w->w_next = w_all;
1383	w_all = w;
1384	w->w_description = description;
1385	mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1386	if (flag & MTX_SPIN) {
1387		w->w_spin = 1;
1388
1389		i = 1;
1390		for (order = spin_order_list; *order != NULL; order++) {
1391			if (strcmp(description, *order) == 0)
1392				break;
1393			i <<= 1;
1394		}
1395		if (*order == NULL)
1396			panic("spin lock %s not in order list", description);
1397		w->w_level = i;
1398	}
1399
1400	return (w);
1401}
1402
1403static int
1404itismychild(struct witness *parent, struct witness *child)
1405{
1406	static int recursed;
1407
1408	/*
1409	 * Insert "child" after "parent"
1410	 */
1411	while (parent->w_morechildren)
1412		parent = parent->w_morechildren;
1413
1414	if (parent->w_childcnt == WITNESS_NCHILDREN) {
1415		if ((parent->w_morechildren = witness_get()) == NULL)
1416			return (1);
1417		parent = parent->w_morechildren;
1418	}
1419	MPASS(child != NULL);
1420	parent->w_children[parent->w_childcnt++] = child;
1421	/*
1422	 * now prune whole tree
1423	 */
1424	if (recursed)
1425		return (0);
1426	recursed = 1;
1427	for (child = w_all; child != NULL; child = child->w_next) {
1428		for (parent = w_all; parent != NULL;
1429		    parent = parent->w_next) {
1430			if (!isitmychild(parent, child))
1431				continue;
1432			removechild(parent, child);
1433			if (isitmydescendant(parent, child))
1434				continue;
1435			itismychild(parent, child);
1436		}
1437	}
1438	recursed = 0;
1439	witness_levelall();
1440	return (0);
1441}
1442
1443static void
1444removechild(struct witness *parent, struct witness *child)
1445{
1446	struct witness *w, *w1;
1447	int i;
1448
1449	for (w = parent; w != NULL; w = w->w_morechildren)
1450		for (i = 0; i < w->w_childcnt; i++)
1451			if (w->w_children[i] == child)
1452				goto found;
1453	return;
1454found:
1455	for (w1 = w; w1->w_morechildren != NULL; w1 = w1->w_morechildren)
1456		continue;
1457	w->w_children[i] = w1->w_children[--w1->w_childcnt];
1458	MPASS(w->w_children[i] != NULL);
1459
1460	if (w1->w_childcnt != 0)
1461		return;
1462
1463	if (w1 == parent)
1464		return;
1465	for (w = parent; w->w_morechildren != w1; w = w->w_morechildren)
1466		continue;
1467	w->w_morechildren = 0;
1468	witness_free(w1);
1469}
1470
1471static int
1472isitmychild(struct witness *parent, struct witness *child)
1473{
1474	struct witness *w;
1475	int i;
1476
1477	for (w = parent; w != NULL; w = w->w_morechildren) {
1478		for (i = 0; i < w->w_childcnt; i++) {
1479			if (w->w_children[i] == child)
1480				return (1);
1481		}
1482	}
1483	return (0);
1484}
1485
1486static int
1487isitmydescendant(struct witness *parent, struct witness *child)
1488{
1489	struct witness *w;
1490	int i;
1491	int j;
1492
1493	for (j = 0, w = parent; w != NULL; w = w->w_morechildren, j++) {
1494		MPASS(j < 1000);
1495		for (i = 0; i < w->w_childcnt; i++) {
1496			if (w->w_children[i] == child)
1497				return (1);
1498		}
1499		for (i = 0; i < w->w_childcnt; i++) {
1500			if (isitmydescendant(w->w_children[i], child))
1501				return (1);
1502		}
1503	}
1504	return (0);
1505}
1506
1507void
1508witness_levelall (void)
1509{
1510	struct witness *w, *w1;
1511
1512	for (w = w_all; w; w = w->w_next)
1513		if (!(w->w_spin))
1514			w->w_level = 0;
1515	for (w = w_all; w; w = w->w_next) {
1516		if (w->w_spin)
1517			continue;
1518		for (w1 = w_all; w1; w1 = w1->w_next) {
1519			if (isitmychild(w1, w))
1520				break;
1521		}
1522		if (w1 != NULL)
1523			continue;
1524		witness_leveldescendents(w, 0);
1525	}
1526}
1527
1528static void
1529witness_leveldescendents(struct witness *parent, int level)
1530{
1531	int i;
1532	struct witness *w;
1533
1534	if (parent->w_level < level)
1535		parent->w_level = level;
1536	level++;
1537	for (w = parent; w != NULL; w = w->w_morechildren)
1538		for (i = 0; i < w->w_childcnt; i++)
1539			witness_leveldescendents(w->w_children[i], level);
1540}
1541
1542static void
1543witness_displaydescendants(void(*prnt)(const char *fmt, ...),
1544			   struct witness *parent)
1545{
1546	struct witness *w;
1547	int i;
1548	int level;
1549
1550	level = parent->w_spin ? ffs(parent->w_level) : parent->w_level;
1551
1552	prnt("%d", level);
1553	if (level < 10)
1554		prnt(" ");
1555	for (i = 0; i < level; i++)
1556		prnt(" ");
1557	prnt("%s", parent->w_description);
1558	if (parent->w_file != NULL)
1559		prnt(" -- last acquired @ %s:%d\n", parent->w_file,
1560		    parent->w_line);
1561
1562	for (w = parent; w != NULL; w = w->w_morechildren)
1563		for (i = 0; i < w->w_childcnt; i++)
1564			    witness_displaydescendants(prnt, w->w_children[i]);
1565    }
1566
1567static int
1568dup_ok(struct witness *w)
1569{
1570	char **dup;
1571
1572	for (dup = dup_list; *dup!= NULL; dup++)
1573		if (strcmp(w->w_description, *dup) == 0)
1574			return (1);
1575	return (0);
1576}
1577
1578static int
1579blessed(struct witness *w1, struct witness *w2)
1580{
1581	int i;
1582	struct witness_blessed *b;
1583
1584	for (i = 0; i < blessed_count; i++) {
1585		b = &blessed_list[i];
1586		if (strcmp(w1->w_description, b->b_lock1) == 0) {
1587			if (strcmp(w2->w_description, b->b_lock2) == 0)
1588				return (1);
1589			continue;
1590		}
1591		if (strcmp(w1->w_description, b->b_lock2) == 0)
1592			if (strcmp(w2->w_description, b->b_lock1) == 0)
1593				return (1);
1594	}
1595	return (0);
1596}
1597
1598static struct witness *
1599witness_get()
1600{
1601	struct witness *w;
1602
1603	if ((w = w_free) == NULL) {
1604		witness_dead = 1;
1605		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1606		printf("witness exhausted\n");
1607		return (NULL);
1608	}
1609	w_free = w->w_next;
1610	bzero(w, sizeof(*w));
1611	return (w);
1612}
1613
1614static void
1615witness_free(struct witness *w)
1616{
1617	w->w_next = w_free;
1618	w_free = w;
1619}
1620
1621int
1622witness_list(struct proc *p)
1623{
1624	struct mtx *m;
1625	int nheld;
1626
1627	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1628	nheld = 0;
1629	LIST_FOREACH(m, &p->p_heldmtx, mtx_held) {
1630		printf("\t\"%s\" (%p) locked at %s:%d\n",
1631		    m->mtx_description, m,
1632		    m->mtx_witness->w_file, m->mtx_witness->w_line);
1633		nheld++;
1634	}
1635
1636	return (nheld);
1637}
1638
1639#ifdef DDB
1640
1641DB_SHOW_COMMAND(mutexes, db_witness_list)
1642{
1643
1644	witness_list(curproc);
1645}
1646
1647DB_SHOW_COMMAND(witness, db_witness_display)
1648{
1649
1650	witness_display(db_printf);
1651}
1652#endif
1653
1654void
1655witness_save(struct mtx *m, const char **filep, int *linep)
1656{
1657
1658	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1659	if (m->mtx_witness == NULL)
1660		return;
1661
1662	*filep = m->mtx_witness->w_file;
1663	*linep = m->mtx_witness->w_line;
1664}
1665
1666void
1667witness_restore(struct mtx *m, const char *file, int line)
1668{
1669
1670	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1671	if (m->mtx_witness == NULL)
1672		return;
1673
1674	m->mtx_witness->w_file = file;
1675	m->mtx_witness->w_line = line;
1676}
1677
1678#endif	/* WITNESS */
1679