subr_witness.c revision 72344
1/*-
2 * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 * 3. Berkeley Software Design Inc's name may not be used to endorse or
13 *    promote products derived from this software without specific prior
14 *    written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
29 *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
30 * $FreeBSD: head/sys/kern/subr_witness.c 72344 2001-02-11 02:54:16Z bmilekic $
31 */
32
33/*
34 * Machine independent bits of mutex implementation and implementation of
35 * `witness' structure & related debugging routines.
36 */
37
38/*
39 *	Main Entry: witness
40 *	Pronunciation: 'wit-n&s
41 *	Function: noun
42 *	Etymology: Middle English witnesse, from Old English witnes knowledge,
43 *	    testimony, witness, from 2wit
44 *	Date: before 12th century
45 *	1 : attestation of a fact or event : TESTIMONY
46 *	2 : one that gives evidence; specifically : one who testifies in
47 *	    a cause or before a judicial tribunal
48 *	3 : one asked to be present at a transaction so as to be able to
49 *	    testify to its having taken place
50 *	4 : one who has personal knowledge of something
51 *	5 a : something serving as evidence or proof : SIGN
52 *	  b : public affirmation by word or example of usually
53 *	      religious faith or conviction <the heroic witness to divine
54 *	      life -- Pilot>
55 *	6 capitalized : a member of the Jehovah's Witnesses
56 */
57
58#include "opt_ddb.h"
59#include "opt_witness.h"
60
61#include <sys/param.h>
62#include <sys/bus.h>
63#include <sys/kernel.h>
64#include <sys/malloc.h>
65#include <sys/proc.h>
66#include <sys/sysctl.h>
67#include <sys/systm.h>
68#include <sys/vmmeter.h>
69#include <sys/ktr.h>
70
71#include <machine/atomic.h>
72#include <machine/bus.h>
73#include <machine/clock.h>
74#include <machine/cpu.h>
75
76#include <ddb/ddb.h>
77
78#include <vm/vm.h>
79#include <vm/vm_extern.h>
80
81#include <sys/mutex.h>
82
83/*
84 * The WITNESS-enabled mutex debug structure.
85 */
86#ifdef WITNESS
87struct mtx_debug {
88	struct witness	*mtxd_witness;
89	LIST_ENTRY(mtx)	mtxd_held;
90	const char	*mtxd_file;
91	int		mtxd_line;
92};
93
94#define mtx_held	mtx_debug->mtxd_held
95#define	mtx_file	mtx_debug->mtxd_file
96#define	mtx_line	mtx_debug->mtxd_line
97#define	mtx_witness	mtx_debug->mtxd_witness
98#endif	/* WITNESS */
99
100/*
101 * Internal utility macros.
102 */
103#define mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
104
105#define mtx_owner(m)	(mtx_unowned((m)) ? NULL \
106	: (struct proc *)((m)->mtx_lock & MTX_FLAGMASK))
107
108#define RETIP(x)		*(((uintptr_t *)(&x)) - 1)
109#define SET_PRIO(p, pri)	(p)->p_priority = (pri)
110
111/*
112 * Early WITNESS-enabled declarations.
113 */
114#ifdef WITNESS
115
116/*
117 * Internal WITNESS routines which must be prototyped early.
118 *
119 * XXX: When/if witness code is cleaned up, it would be wise to place all
120 *	witness prototyping early in this file.
121 */
122static void witness_init(struct mtx *, int flag);
123static void witness_destroy(struct mtx *);
124static void witness_display(void(*)(const char *fmt, ...));
125
126MALLOC_DEFINE(M_WITNESS, "witness", "witness mtx_debug structure");
127
128/* All mutexes in system (used for debug/panic) */
129static struct mtx_debug all_mtx_debug = { NULL, {NULL, NULL}, NULL, 0 };
130
131/*
132 * This global is set to 0 once it becomes safe to use the witness code.
133 */
134static int witness_cold = 1;
135
136#else	/* WITNESS */
137
138/* XXX XXX XXX
139 * flag++ is sleazoid way of shuting up warning
140 */
141#define witness_init(m, flag) flag++
142#define witness_destroy(m)
143#define witness_try_enter(m, t, f, l)
144#endif	/* WITNESS */
145
146/*
147 * All mutex locks in system are kept on the all_mtx list.
148 */
149static struct mtx all_mtx = { MTX_UNOWNED, 0, 0, 0, "All mutexes queue head",
150	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
151	{ NULL, NULL }, &all_mtx, &all_mtx,
152#ifdef WITNESS
153	&all_mtx_debug
154#else
155	NULL
156#endif
157	 };
158
159/*
160 * Global variables for book keeping.
161 */
162static int	mtx_cur_cnt;
163static int	mtx_max_cnt;
164
165/*
166 * Couple of strings for KTR_LOCK tracing in order to avoid duplicates.
167 */
168char	STR_mtx_lock_slp[] = "GOT (sleep) %s [%p] r=%d at %s:%d";
169char	STR_mtx_unlock_slp[] = "REL (sleep) %s [%p] r=%d at %s:%d";
170char	STR_mtx_lock_spn[] = "GOT (spin) %s [%p] r=%d at %s:%d";
171char	STR_mtx_unlock_spn[] = "REL (spin) %s [%p] r=%d at %s:%d";
172
173/*
174 * Prototypes for non-exported routines.
175 *
176 * NOTE: Prototypes for witness routines are placed at the bottom of the file.
177 */
178static void	propagate_priority(struct proc *);
179
180static void
181propagate_priority(struct proc *p)
182{
183	int pri = p->p_priority;
184	struct mtx *m = p->p_blocked;
185
186	mtx_assert(&sched_lock, MA_OWNED);
187	for (;;) {
188		struct proc *p1;
189
190		p = mtx_owner(m);
191
192		if (p == NULL) {
193			/*
194			 * This really isn't quite right. Really
195			 * ought to bump priority of process that
196			 * next acquires the mutex.
197			 */
198			MPASS(m->mtx_lock == MTX_CONTESTED);
199			return;
200		}
201
202		MPASS(p->p_magic == P_MAGIC);
203		KASSERT(p->p_stat != SSLEEP, ("sleeping process owns a mutex"));
204		if (p->p_priority <= pri)
205			return;
206
207		/*
208		 * Bump this process' priority.
209		 */
210		SET_PRIO(p, pri);
211
212		/*
213		 * If lock holder is actually running, just bump priority.
214		 */
215#ifdef SMP
216		/*
217		 * For SMP, we can check the p_oncpu field to see if we are
218		 * running.
219		 */
220		if (p->p_oncpu != 0xff) {
221			MPASS(p->p_stat == SRUN || p->p_stat == SZOMB);
222			return;
223		}
224#else
225		/*
226		 * For UP, we check to see if p is curproc (this shouldn't
227		 * ever happen however as it would mean we are in a deadlock.)
228		 */
229		if (p == curproc) {
230			panic("Deadlock detected");
231			return;
232		}
233#endif
234		/*
235		 * If on run queue move to new run queue, and
236		 * quit.
237		 */
238		if (p->p_stat == SRUN) {
239			printf("XXX: moving proc %d(%s) to a new run queue\n",
240			       p->p_pid, p->p_comm);
241			MPASS(p->p_blocked == NULL);
242			remrunqueue(p);
243			setrunqueue(p);
244			return;
245		}
246
247		/*
248		 * If we aren't blocked on a mutex, we should be.
249		 */
250		KASSERT(p->p_stat == SMTX, (
251		    "process %d(%s):%d holds %s but isn't blocked on a mutex\n",
252		    p->p_pid, p->p_comm, p->p_stat,
253		    m->mtx_description));
254
255		/*
256		 * Pick up the mutex that p is blocked on.
257		 */
258		m = p->p_blocked;
259		MPASS(m != NULL);
260
261		printf("XXX: process %d(%s) is blocked on %s\n", p->p_pid,
262		    p->p_comm, m->mtx_description);
263
264		/*
265		 * Check if the proc needs to be moved up on
266		 * the blocked chain
267		 */
268		if (p == TAILQ_FIRST(&m->mtx_blocked)) {
269			printf("XXX: process at head of run queue\n");
270			continue;
271		}
272
273		p1 = TAILQ_PREV(p, rq, p_procq);
274		if (p1->p_priority <= pri) {
275			printf(
276			   "XXX: previous process %d(%s) has higher priority\n",
277	                    p->p_pid, p->p_comm);
278			continue;
279		}
280
281		/*
282		 * Remove proc from blocked chain and determine where
283		 * it should be moved up to.  Since we know that p1 has
284		 * a lower priority than p, we know that at least one
285		 * process in the chain has a lower priority and that
286		 * p1 will thus not be NULL after the loop.
287		 */
288		TAILQ_REMOVE(&m->mtx_blocked, p, p_procq);
289		TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) {
290			MPASS(p1->p_magic == P_MAGIC);
291			if (p1->p_priority > pri)
292				break;
293		}
294
295		MPASS(p1 != NULL);
296		TAILQ_INSERT_BEFORE(p1, p, p_procq);
297		CTR4(KTR_LOCK,
298		    "propagate_priority: p %p moved before %p on [%p] %s",
299		    p, p1, m, m->mtx_description);
300	}
301}
302
303/*
304 * The important part of mtx_trylock{,_flags}()
305 * Tries to acquire lock `m.' We do NOT handle recursion here; we assume that
306 * if we're called, it's because we know we don't already own this lock.
307 */
308int
309_mtx_trylock(struct mtx *m, int opts, const char *file, int line)
310{
311	int rval;
312
313	MPASS(CURPROC != NULL);
314
315	/*
316	 * _mtx_trylock does not accept MTX_NOSWITCH option.
317	 */
318	KASSERT((opts & MTX_NOSWITCH) == 0,
319	    ("mtx_trylock() called with invalid option flag(s) %d", opts));
320
321	rval = _obtain_lock(m, CURTHD);
322
323#ifdef WITNESS
324	if (rval && m->mtx_witness != NULL) {
325		/*
326		 * We do not handle recursion in _mtx_trylock; see the
327		 * note at the top of the routine.
328		 */
329		KASSERT(!mtx_recursed(m),
330		    ("mtx_trylock() called on a recursed mutex"));
331		witness_try_enter(m, (opts | m->mtx_flags), file, line);
332	}
333#endif	/* WITNESS */
334
335	if ((opts & MTX_QUIET) == 0)
336		CTR5(KTR_LOCK, "TRY_ENTER %s [%p] result=%d at %s:%d",
337		    m->mtx_description, m, rval, file, line);
338
339	return rval;
340}
341
342/*
343 * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
344 *
345 * We call this if the lock is either contested (i.e. we need to go to
346 * sleep waiting for it), or if we need to recurse on it.
347 */
348void
349_mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line)
350{
351	struct proc *p = CURPROC;
352
353	if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)p) {
354		m->mtx_recurse++;
355		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
356		if ((opts & MTX_QUIET) == 0)
357			CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
358		return;
359	}
360
361	if ((opts & MTX_QUIET) == 0)
362		CTR3(KTR_LOCK, "_mtx_lock_sleep: %p contested (lock=%p) [%p]",
363		    m, (void *)m->mtx_lock, (void *)RETIP(m));
364
365	/*
366	 * Save our priority. Even though p_nativepri is protected by
367	 * sched_lock, we don't obtain it here as it can be expensive.
368	 * Since this is the only place p_nativepri is set, and since two
369	 * CPUs will not be executing the same process concurrently, we know
370	 * that no other CPU is going to be messing with this. Also,
371	 * p_nativepri is only read when we are blocked on a mutex, so that
372	 * can't be happening right now either.
373	 */
374	p->p_nativepri = p->p_priority;
375
376	while (!_obtain_lock(m, p)) {
377		uintptr_t v;
378		struct proc *p1;
379
380		mtx_lock_spin(&sched_lock);
381		/*
382		 * Check if the lock has been released while spinning for
383		 * the sched_lock.
384		 */
385		if ((v = m->mtx_lock) == MTX_UNOWNED) {
386			mtx_unlock_spin(&sched_lock);
387			continue;
388		}
389
390		/*
391		 * The mutex was marked contested on release. This means that
392		 * there are processes blocked on it.
393		 */
394		if (v == MTX_CONTESTED) {
395			p1 = TAILQ_FIRST(&m->mtx_blocked);
396			MPASS(p1 != NULL);
397			m->mtx_lock = (uintptr_t)p | MTX_CONTESTED;
398
399			if (p1->p_priority < p->p_priority)
400				SET_PRIO(p, p1->p_priority);
401			mtx_unlock_spin(&sched_lock);
402			return;
403		}
404
405		/*
406		 * If the mutex isn't already contested and a failure occurs
407		 * setting the contested bit, the mutex was either released
408		 * or the state of the MTX_RECURSED bit changed.
409		 */
410		if ((v & MTX_CONTESTED) == 0 &&
411		    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
412			(void *)(v | MTX_CONTESTED))) {
413			mtx_unlock_spin(&sched_lock);
414			continue;
415		}
416
417		/*
418		 * We deffinately must sleep for this lock.
419		 */
420		mtx_assert(m, MA_NOTOWNED);
421
422#ifdef notyet
423		/*
424		 * If we're borrowing an interrupted thread's VM context, we
425		 * must clean up before going to sleep.
426		 */
427		if (p->p_flag & (P_ITHD | P_SITHD)) {
428			ithd_t *it = (ithd_t *)p;
429
430			if (it->it_interrupted) {
431				if ((opts & MTX_QUIET) == 0)
432					CTR2(KTR_LOCK,
433				    "_mtx_lock_sleep: 0x%x interrupted 0x%x",
434					    it, it->it_interrupted);
435				intr_thd_fixup(it);
436			}
437		}
438#endif
439
440		/*
441		 * Put us on the list of threads blocked on this mutex.
442		 */
443		if (TAILQ_EMPTY(&m->mtx_blocked)) {
444			p1 = (struct proc *)(m->mtx_lock & MTX_FLAGMASK);
445			LIST_INSERT_HEAD(&p1->p_contested, m, mtx_contested);
446			TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
447		} else {
448			TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq)
449				if (p1->p_priority > p->p_priority)
450					break;
451			if (p1)
452				TAILQ_INSERT_BEFORE(p1, p, p_procq);
453			else
454				TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
455		}
456
457		/*
458		 * Save who we're blocked on.
459		 */
460		p->p_blocked = m;
461		p->p_mtxname = m->mtx_description;
462		p->p_stat = SMTX;
463#if 0
464		propagate_priority(p);
465#endif
466
467		if ((opts & MTX_QUIET) == 0)
468			CTR3(KTR_LOCK,
469			    "_mtx_lock_sleep: p %p blocked on [%p] %s", p, m,
470			    m->mtx_description);
471
472		mi_switch();
473
474		if ((opts & MTX_QUIET) == 0)
475			CTR3(KTR_LOCK,
476			  "_mtx_lock_sleep: p %p free from blocked on [%p] %s",
477			  p, m, m->mtx_description);
478
479		mtx_unlock_spin(&sched_lock);
480	}
481
482	return;
483}
484
485/*
486 * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock.
487 *
488 * This is only called if we need to actually spin for the lock. Recursion
489 * is handled inline.
490 */
491void
492_mtx_lock_spin(struct mtx *m, int opts, u_int mtx_intr, const char *file,
493	       int line)
494{
495	int i = 0;
496
497	if ((opts & MTX_QUIET) == 0)
498		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
499
500	for (;;) {
501		if (_obtain_lock(m, CURPROC))
502			break;
503
504		while (m->mtx_lock != MTX_UNOWNED) {
505			if (i++ < 1000000)
506				continue;
507			if (i++ < 6000000)
508				DELAY(1);
509#ifdef DDB
510			else if (!db_active)
511#else
512			else
513#endif
514			panic("spin lock %s held by %p for > 5 seconds",
515			    m->mtx_description, (void *)m->mtx_lock);
516		}
517	}
518
519	m->mtx_saveintr = mtx_intr;
520	if ((opts & MTX_QUIET) == 0)
521		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
522
523	return;
524}
525
526/*
527 * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
528 *
529 * We are only called here if the lock is recursed or contested (i.e. we
530 * need to wake up a blocked thread).
531 */
532void
533_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
534{
535	struct proc *p, *p1;
536	struct mtx *m1;
537	int pri;
538
539	p = CURPROC;
540	MPASS4(mtx_owned(m), "mtx_owned(mpp)", file, line);
541
542	if (mtx_recursed(m)) {
543		if (--(m->mtx_recurse) == 0)
544			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
545		if ((opts & MTX_QUIET) == 0)
546			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m);
547		return;
548	}
549
550	mtx_lock_spin(&sched_lock);
551	if ((opts & MTX_QUIET) == 0)
552		CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
553
554	p1 = TAILQ_FIRST(&m->mtx_blocked);
555	MPASS(p->p_magic == P_MAGIC);
556	MPASS(p1->p_magic == P_MAGIC);
557
558	TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq);
559
560	if (TAILQ_EMPTY(&m->mtx_blocked)) {
561		LIST_REMOVE(m, mtx_contested);
562		_release_lock_quick(m);
563		if ((opts & MTX_QUIET) == 0)
564			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p not held", m);
565	} else
566		atomic_store_rel_ptr(&m->mtx_lock, (void *)MTX_CONTESTED);
567
568	pri = MAXPRI;
569	LIST_FOREACH(m1, &p->p_contested, mtx_contested) {
570		int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_priority;
571		if (cp < pri)
572			pri = cp;
573	}
574
575	if (pri > p->p_nativepri)
576		pri = p->p_nativepri;
577	SET_PRIO(p, pri);
578
579	if ((opts & MTX_QUIET) == 0)
580		CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p contested setrunqueue %p",
581		    m, p1);
582
583	p1->p_blocked = NULL;
584	p1->p_mtxname = NULL;
585	p1->p_stat = SRUN;
586	setrunqueue(p1);
587
588	if ((opts & MTX_NOSWITCH) == 0 && p1->p_priority < pri) {
589#ifdef notyet
590		if (p->p_flag & (P_ITHD | P_SITHD)) {
591			ithd_t *it = (ithd_t *)p;
592
593			if (it->it_interrupted) {
594				if ((opts & MTX_QUIET) == 0)
595					CTR2(KTR_LOCK,
596				    "_mtx_unlock_sleep: 0x%x interrupted 0x%x",
597					    it, it->it_interrupted);
598				intr_thd_fixup(it);
599			}
600		}
601#endif
602		setrunqueue(p);
603		if ((opts & MTX_QUIET) == 0)
604			CTR2(KTR_LOCK,
605			    "_mtx_unlock_sleep: %p switching out lock=%p", m,
606			    (void *)m->mtx_lock);
607
608		mi_switch();
609		if ((opts & MTX_QUIET) == 0)
610			CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p",
611			    m, (void *)m->mtx_lock);
612	}
613
614	mtx_unlock_spin(&sched_lock);
615
616	return;
617}
618
619/*
620 * All the unlocking of MTX_SPIN locks is done inline.
621 * See the _rel_spin_lock() macro for the details.
622 */
623
624/*
625 * The INVARIANTS-enabled mtx_assert()
626 */
627#ifdef INVARIANTS
628void
629_mtx_assert(struct mtx *m, int what, const char *file, int line)
630{
631	switch ((what)) {
632	case MA_OWNED:
633	case MA_OWNED | MA_RECURSED:
634	case MA_OWNED | MA_NOTRECURSED:
635		if (!mtx_owned((m)))
636			panic("mutex %s not owned at %s:%d",
637			    (m)->mtx_description, file, line);
638		if (mtx_recursed((m))) {
639			if (((what) & MA_NOTRECURSED) != 0)
640				panic("mutex %s recursed at %s:%d",
641				    (m)->mtx_description, file, line);
642		} else if (((what) & MA_RECURSED) != 0) {
643			panic("mutex %s unrecursed at %s:%d",
644			    (m)->mtx_description, file, line);
645		}
646		break;
647	case MA_NOTOWNED:
648		if (mtx_owned((m)))
649			panic("mutex %s owned at %s:%d",
650			    (m)->mtx_description, file, line);
651		break;
652	default:
653		panic("unknown mtx_assert at %s:%d", file, line);
654	}
655}
656#endif
657
658/*
659 * The MUTEX_DEBUG-enabled mtx_validate()
660 */
661#define MV_DESTROY	0	/* validate before destory */
662#define MV_INIT		1	/* validate before init */
663
664#ifdef MUTEX_DEBUG
665
666int mtx_validate __P((struct mtx *, int));
667
668int
669mtx_validate(struct mtx *m, int when)
670{
671	struct mtx *mp;
672	int i;
673	int retval = 0;
674
675#ifdef WITNESS
676	if (witness_cold)
677		return 0;
678#endif
679	if (m == &all_mtx || cold)
680		return 0;
681
682	mtx_lock(&all_mtx);
683/*
684 * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
685 * we can re-enable the kernacc() checks.
686 */
687#ifndef __alpha__
688	MPASS(kernacc((caddr_t)all_mtx.mtx_next, sizeof(uintptr_t),
689	    VM_PROT_READ) == 1);
690#endif
691	MPASS(all_mtx.mtx_next->mtx_prev == &all_mtx);
692	for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
693#ifndef __alpha__
694		if (kernacc((caddr_t)mp->mtx_next, sizeof(uintptr_t),
695		    VM_PROT_READ) != 1) {
696			panic("mtx_validate: mp=%p mp->mtx_next=%p",
697			    mp, mp->mtx_next);
698		}
699#endif
700		i++;
701		if (i > mtx_cur_cnt) {
702			panic("mtx_validate: too many in chain, known=%d\n",
703			    mtx_cur_cnt);
704		}
705	}
706	MPASS(i == mtx_cur_cnt);
707	switch (when) {
708	case MV_DESTROY:
709		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
710			if (mp == m)
711				break;
712		MPASS(mp == m);
713		break;
714	case MV_INIT:
715		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
716		if (mp == m) {
717			/*
718			 * Not good. This mutex already exists.
719			 */
720			printf("re-initing existing mutex %s\n",
721			    m->mtx_description);
722			MPASS(m->mtx_lock == MTX_UNOWNED);
723			retval = 1;
724		}
725	}
726	mtx_unlock(&all_mtx);
727	return (retval);
728}
729#endif
730
731/*
732 * Mutex initialization routine; initialize lock `m' of type contained in
733 * `opts' with options contained in `opts' and description `description.'
734 * Place on "all_mtx" queue.
735 */
736void
737mtx_init(struct mtx *m, const char *description, int opts)
738{
739
740	if ((opts & MTX_QUIET) == 0)
741		CTR2(KTR_LOCK, "mtx_init %p (%s)", m, description);
742
743#ifdef MUTEX_DEBUG
744	/* Diagnostic and error correction */
745	if (mtx_validate(m, MV_INIT))
746		return;
747#endif
748
749	bzero((void *)m, sizeof *m);
750	TAILQ_INIT(&m->mtx_blocked);
751
752#ifdef WITNESS
753	if (!witness_cold) {
754		m->mtx_debug = malloc(sizeof(struct mtx_debug),
755		    M_WITNESS, M_NOWAIT | M_ZERO);
756		MPASS(m->mtx_debug != NULL);
757	}
758#endif
759
760	m->mtx_description = description;
761	m->mtx_flags = opts;
762	m->mtx_lock = MTX_UNOWNED;
763
764	/* Put on all mutex queue */
765	mtx_lock(&all_mtx);
766	m->mtx_next = &all_mtx;
767	m->mtx_prev = all_mtx.mtx_prev;
768	m->mtx_prev->mtx_next = m;
769	all_mtx.mtx_prev = m;
770	if (++mtx_cur_cnt > mtx_max_cnt)
771		mtx_max_cnt = mtx_cur_cnt;
772	mtx_unlock(&all_mtx);
773
774#ifdef WITNESS
775	if (!witness_cold)
776		witness_init(m, opts);
777#endif
778}
779
780/*
781 * Remove lock `m' from all_mtx queue.
782 */
783void
784mtx_destroy(struct mtx *m)
785{
786
787#ifdef WITNESS
788	KASSERT(!witness_cold, ("%s: Cannot destroy while still cold\n",
789	    __FUNCTION__));
790#endif
791
792	CTR2(KTR_LOCK, "mtx_destroy %p (%s)", m, m->mtx_description);
793
794#ifdef MUTEX_DEBUG
795	if (m->mtx_next == NULL)
796		panic("mtx_destroy: %p (%s) already destroyed",
797		    m, m->mtx_description);
798
799	if (!mtx_owned(m)) {
800		MPASS(m->mtx_lock == MTX_UNOWNED);
801	} else {
802		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
803	}
804
805	/* diagnostic */
806	mtx_validate(m, MV_DESTROY);
807#endif
808
809#ifdef WITNESS
810	if (m->mtx_witness)
811		witness_destroy(m);
812#endif /* WITNESS */
813
814	/* Remove from the all mutex queue */
815	mtx_lock(&all_mtx);
816	m->mtx_next->mtx_prev = m->mtx_prev;
817	m->mtx_prev->mtx_next = m->mtx_next;
818
819#ifdef MUTEX_DEBUG
820	m->mtx_next = m->mtx_prev = NULL;
821#endif
822
823#ifdef WITNESS
824	free(m->mtx_debug, M_WITNESS);
825	m->mtx_debug = NULL;
826#endif
827
828	mtx_cur_cnt--;
829	mtx_unlock(&all_mtx);
830}
831
832
833/*
834 * The WITNESS-enabled diagnostic code.
835 */
836#ifdef WITNESS
837static void
838witness_fixup(void *dummy __unused)
839{
840	struct mtx *mp;
841
842	/*
843	 * We have to release Giant before initializing its witness
844	 * structure so that WITNESS doesn't get confused.
845	 */
846	mtx_unlock(&Giant);
847	mtx_assert(&Giant, MA_NOTOWNED);
848
849	mtx_lock(&all_mtx);
850
851	/* Iterate through all mutexes and finish up mutex initialization. */
852	for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
853
854		mp->mtx_debug = malloc(sizeof(struct mtx_debug),
855		    M_WITNESS, M_NOWAIT | M_ZERO);
856		MPASS(mp->mtx_debug != NULL);
857
858		witness_init(mp, mp->mtx_flags);
859	}
860	mtx_unlock(&all_mtx);
861
862	/* Mark the witness code as being ready for use. */
863	atomic_store_rel_int(&witness_cold, 0);
864
865	mtx_lock(&Giant);
866}
867SYSINIT(wtnsfxup, SI_SUB_MUTEX, SI_ORDER_FIRST, witness_fixup, NULL)
868
869#define WITNESS_COUNT 200
870#define	WITNESS_NCHILDREN 2
871
872int witness_watch = 1;
873
874struct witness {
875	struct witness	*w_next;
876	const char	*w_description;
877	const char	*w_file;
878	int		 w_line;
879	struct witness	*w_morechildren;
880	u_char		 w_childcnt;
881	u_char		 w_Giant_squawked:1;
882	u_char		 w_other_squawked:1;
883	u_char		 w_same_squawked:1;
884	u_char		 w_spin:1;	/* MTX_SPIN type mutex. */
885	u_int		 w_level;
886	struct witness	*w_children[WITNESS_NCHILDREN];
887};
888
889struct witness_blessed {
890	char 	*b_lock1;
891	char	*b_lock2;
892};
893
894#ifdef DDB
895/*
896 * When DDB is enabled and witness_ddb is set to 1, it will cause the system to
897 * drop into kdebug() when:
898 *	- a lock heirarchy violation occurs
899 *	- locks are held when going to sleep.
900 */
901int	witness_ddb;
902#ifdef WITNESS_DDB
903TUNABLE_INT_DECL("debug.witness_ddb", 1, witness_ddb);
904#else
905TUNABLE_INT_DECL("debug.witness_ddb", 0, witness_ddb);
906#endif
907SYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, "");
908#endif /* DDB */
909
910int	witness_skipspin;
911#ifdef WITNESS_SKIPSPIN
912TUNABLE_INT_DECL("debug.witness_skipspin", 1, witness_skipspin);
913#else
914TUNABLE_INT_DECL("debug.witness_skipspin", 0, witness_skipspin);
915#endif
916SYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RD, &witness_skipspin, 0,
917    "");
918
919/*
920 * Witness-enabled globals
921 */
922static struct mtx	w_mtx;
923static struct witness	*w_free;
924static struct witness	*w_all;
925static int		 w_inited;
926static int		 witness_dead;	/* fatal error, probably no memory */
927
928static struct witness	 w_data[WITNESS_COUNT];
929
930/*
931 * Internal witness routine prototypes
932 */
933static struct witness *enroll(const char *description, int flag);
934static int itismychild(struct witness *parent, struct witness *child);
935static void removechild(struct witness *parent, struct witness *child);
936static int isitmychild(struct witness *parent, struct witness *child);
937static int isitmydescendant(struct witness *parent, struct witness *child);
938static int dup_ok(struct witness *);
939static int blessed(struct witness *, struct witness *);
940static void
941    witness_displaydescendants(void(*)(const char *fmt, ...), struct witness *);
942static void witness_leveldescendents(struct witness *parent, int level);
943static void witness_levelall(void);
944static struct witness * witness_get(void);
945static void witness_free(struct witness *m);
946
947static char *ignore_list[] = {
948	"witness lock",
949	NULL
950};
951
952static char *spin_order_list[] = {
953#if defined(__i386__) && defined (SMP)
954	"com",
955#endif
956	"sio",
957#ifdef __i386__
958	"cy",
959#endif
960	"sched lock",
961#ifdef __i386__
962	"clk",
963#endif
964	"callout",
965	/*
966	 * leaf locks
967	 */
968	"ithread table lock",
969	"ithread list lock",
970#ifdef SMP
971#ifdef __i386__
972	"ap boot",
973	"imen",
974#endif
975	"smp rendezvous",
976#endif
977	NULL
978};
979
980static char *order_list[] = {
981	"Giant", "proctree", "allproc", "process lock", "uidinfo hash",
982	    "uidinfo struct", NULL,
983	NULL
984};
985
986static char *dup_list[] = {
987	NULL
988};
989
990static char *sleep_list[] = {
991	"Giant",
992	NULL
993};
994
995/*
996 * Pairs of locks which have been blessed
997 * Don't complain about order problems with blessed locks
998 */
999static struct witness_blessed blessed_list[] = {
1000};
1001static int blessed_count =
1002	sizeof(blessed_list) / sizeof(struct witness_blessed);
1003
1004static void
1005witness_init(struct mtx *m, int flag)
1006{
1007	m->mtx_witness = enroll(m->mtx_description, flag);
1008}
1009
1010static void
1011witness_destroy(struct mtx *m)
1012{
1013	struct mtx *m1;
1014	struct proc *p;
1015	p = CURPROC;
1016	LIST_FOREACH(m1, &p->p_heldmtx, mtx_held) {
1017		if (m1 == m) {
1018			LIST_REMOVE(m, mtx_held);
1019			break;
1020		}
1021	}
1022	return;
1023
1024}
1025
1026static void
1027witness_display(void(*prnt)(const char *fmt, ...))
1028{
1029	struct witness *w, *w1;
1030	int level, found;
1031
1032	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1033	witness_levelall();
1034
1035	/*
1036	 * First, handle sleep mutexes which have been acquired at least
1037	 * once.
1038	 */
1039	prnt("Sleep mutexes:\n");
1040	for (w = w_all; w; w = w->w_next) {
1041		if (w->w_file == NULL || w->w_spin)
1042			continue;
1043		for (w1 = w_all; w1; w1 = w1->w_next) {
1044			if (isitmychild(w1, w))
1045				break;
1046		}
1047		if (w1 != NULL)
1048			continue;
1049		/*
1050		 * This lock has no anscestors, display its descendants.
1051		 */
1052		witness_displaydescendants(prnt, w);
1053	}
1054
1055	/*
1056	 * Now do spin mutexes which have been acquired at least once.
1057	 */
1058	prnt("\nSpin mutexes:\n");
1059	level = 0;
1060	while (level < sizeof(spin_order_list) / sizeof(char *)) {
1061		found = 0;
1062		for (w = w_all; w; w = w->w_next) {
1063			if (w->w_file == NULL || !w->w_spin)
1064				continue;
1065			if (w->w_level == 1 << level) {
1066				witness_displaydescendants(prnt, w);
1067				level++;
1068				found = 1;
1069			}
1070		}
1071		if (found == 0)
1072			level++;
1073	}
1074
1075	/*
1076	 * Finally, any mutexes which have not been acquired yet.
1077	 */
1078	prnt("\nMutexes which were never acquired:\n");
1079	for (w = w_all; w; w = w->w_next) {
1080		if (w->w_file != NULL)
1081			continue;
1082		prnt("%s\n", w->w_description);
1083	}
1084}
1085
1086void
1087witness_enter(struct mtx *m, int flags, const char *file, int line)
1088{
1089	struct witness *w, *w1;
1090	struct mtx *m1;
1091	struct proc *p;
1092	int i;
1093#ifdef DDB
1094	int go_into_ddb = 0;
1095#endif /* DDB */
1096
1097	if (witness_cold || m->mtx_witness == NULL || panicstr)
1098		return;
1099	w = m->mtx_witness;
1100	p = CURPROC;
1101
1102	if (flags & MTX_SPIN) {
1103		if ((m->mtx_flags & MTX_SPIN) == 0)
1104			panic("mutex_enter: MTX_SPIN on MTX_DEF mutex %s @"
1105			    " %s:%d", m->mtx_description, file, line);
1106		if (mtx_recursed(m)) {
1107			if ((m->mtx_flags & MTX_RECURSE) == 0)
1108				panic("mutex_enter: recursion on non-recursive"
1109				    " mutex %s @ %s:%d", m->mtx_description,
1110				    file, line);
1111			return;
1112		}
1113		mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1114		i = PCPU_GET(witness_spin_check);
1115		if (i != 0 && w->w_level < i) {
1116			mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1117			panic("mutex_enter(%s:%x, MTX_SPIN) out of order @"
1118			    " %s:%d already holding %s:%x",
1119			    m->mtx_description, w->w_level, file, line,
1120			    spin_order_list[ffs(i)-1], i);
1121		}
1122		PCPU_SET(witness_spin_check, i | w->w_level);
1123		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1124		w->w_file = file;
1125		w->w_line = line;
1126		m->mtx_line = line;
1127		m->mtx_file = file;
1128		return;
1129	}
1130	if ((m->mtx_flags & MTX_SPIN) != 0)
1131		panic("mutex_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1132		    m->mtx_description, file, line);
1133
1134	if (mtx_recursed(m)) {
1135		if ((m->mtx_flags & MTX_RECURSE) == 0)
1136			panic("mutex_enter: recursion on non-recursive"
1137			    " mutex %s @ %s:%d", m->mtx_description,
1138			    file, line);
1139		return;
1140	}
1141	if (witness_dead)
1142		goto out;
1143	if (cold)
1144		goto out;
1145
1146	if (!mtx_legal2block())
1147		panic("blockable mtx_lock() of %s when not legal @ %s:%d",
1148			    m->mtx_description, file, line);
1149	/*
1150	 * Is this the first mutex acquired
1151	 */
1152	if ((m1 = LIST_FIRST(&p->p_heldmtx)) == NULL)
1153		goto out;
1154
1155	if ((w1 = m1->mtx_witness) == w) {
1156		if (w->w_same_squawked || dup_ok(w))
1157			goto out;
1158		w->w_same_squawked = 1;
1159		printf("acquring duplicate lock of same type: \"%s\"\n",
1160			m->mtx_description);
1161		printf(" 1st @ %s:%d\n", w->w_file, w->w_line);
1162		printf(" 2nd @ %s:%d\n", file, line);
1163#ifdef DDB
1164		go_into_ddb = 1;
1165#endif /* DDB */
1166		goto out;
1167	}
1168	MPASS(!mtx_owned(&w_mtx));
1169	mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1170	/*
1171	 * If we have a known higher number just say ok
1172	 */
1173	if (witness_watch > 1 && w->w_level > w1->w_level) {
1174		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1175		goto out;
1176	}
1177	if (isitmydescendant(m1->mtx_witness, w)) {
1178		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1179		goto out;
1180	}
1181	for (i = 0; m1 != NULL; m1 = LIST_NEXT(m1, mtx_held), i++) {
1182
1183		MPASS(i < 200);
1184		w1 = m1->mtx_witness;
1185		if (isitmydescendant(w, w1)) {
1186			mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1187			if (blessed(w, w1))
1188				goto out;
1189			if (m1 == &Giant) {
1190				if (w1->w_Giant_squawked)
1191					goto out;
1192				else
1193					w1->w_Giant_squawked = 1;
1194			} else {
1195				if (w1->w_other_squawked)
1196					goto out;
1197				else
1198					w1->w_other_squawked = 1;
1199			}
1200			printf("lock order reversal\n");
1201			printf(" 1st %s last acquired @ %s:%d\n",
1202			    w->w_description, w->w_file, w->w_line);
1203			printf(" 2nd %p %s @ %s:%d\n",
1204			    m1, w1->w_description, w1->w_file, w1->w_line);
1205			printf(" 3rd %p %s @ %s:%d\n",
1206			    m, w->w_description, file, line);
1207#ifdef DDB
1208			go_into_ddb = 1;
1209#endif /* DDB */
1210			goto out;
1211		}
1212	}
1213	m1 = LIST_FIRST(&p->p_heldmtx);
1214	if (!itismychild(m1->mtx_witness, w))
1215		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1216
1217out:
1218#ifdef DDB
1219	if (witness_ddb && go_into_ddb)
1220		Debugger("witness_enter");
1221#endif /* DDB */
1222	w->w_file = file;
1223	w->w_line = line;
1224	m->mtx_line = line;
1225	m->mtx_file = file;
1226
1227	/*
1228	 * If this pays off it likely means that a mutex being witnessed
1229	 * is acquired in hardclock. Put it in the ignore list. It is
1230	 * likely not the mutex this assert fails on.
1231	 */
1232	MPASS(m->mtx_held.le_prev == NULL);
1233	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
1234}
1235
1236void
1237witness_try_enter(struct mtx *m, int flags, const char *file, int line)
1238{
1239	struct proc *p;
1240	struct witness *w = m->mtx_witness;
1241
1242	if (witness_cold)
1243		return;
1244	if (panicstr)
1245		return;
1246	if (flags & MTX_SPIN) {
1247		if ((m->mtx_flags & MTX_SPIN) == 0)
1248			panic("mutex_try_enter: "
1249			    "MTX_SPIN on MTX_DEF mutex %s @ %s:%d",
1250			    m->mtx_description, file, line);
1251		if (mtx_recursed(m)) {
1252			if ((m->mtx_flags & MTX_RECURSE) == 0)
1253				panic("mutex_try_enter: recursion on"
1254				    " non-recursive mutex %s @ %s:%d",
1255				    m->mtx_description, file, line);
1256			return;
1257		}
1258		mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1259		PCPU_SET(witness_spin_check,
1260		    PCPU_GET(witness_spin_check) | w->w_level);
1261		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1262		w->w_file = file;
1263		w->w_line = line;
1264		m->mtx_line = line;
1265		m->mtx_file = file;
1266		return;
1267	}
1268
1269	if ((m->mtx_flags & MTX_SPIN) != 0)
1270		panic("mutex_try_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1271		    m->mtx_description, file, line);
1272
1273	if (mtx_recursed(m)) {
1274		if ((m->mtx_flags & MTX_RECURSE) == 0)
1275			panic("mutex_try_enter: recursion on non-recursive"
1276			    " mutex %s @ %s:%d", m->mtx_description, file,
1277			    line);
1278		return;
1279	}
1280	w->w_file = file;
1281	w->w_line = line;
1282	m->mtx_line = line;
1283	m->mtx_file = file;
1284	p = CURPROC;
1285	MPASS(m->mtx_held.le_prev == NULL);
1286	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
1287}
1288
1289void
1290witness_exit(struct mtx *m, int flags, const char *file, int line)
1291{
1292	struct witness *w;
1293
1294	if (witness_cold || m->mtx_witness == NULL || panicstr)
1295		return;
1296	w = m->mtx_witness;
1297
1298	if (flags & MTX_SPIN) {
1299		if ((m->mtx_flags & MTX_SPIN) == 0)
1300			panic("mutex_exit: MTX_SPIN on MTX_DEF mutex %s @"
1301			    " %s:%d", m->mtx_description, file, line);
1302		if (mtx_recursed(m)) {
1303			if ((m->mtx_flags & MTX_RECURSE) == 0)
1304				panic("mutex_exit: recursion on non-recursive"
1305				    " mutex %s @ %s:%d", m->mtx_description,
1306				    file, line);
1307			return;
1308		}
1309		mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1310		PCPU_SET(witness_spin_check,
1311		    PCPU_GET(witness_spin_check) & ~w->w_level);
1312		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1313		return;
1314	}
1315	if ((m->mtx_flags & MTX_SPIN) != 0)
1316		panic("mutex_exit: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1317		    m->mtx_description, file, line);
1318
1319	if (mtx_recursed(m)) {
1320		if ((m->mtx_flags & MTX_RECURSE) == 0)
1321			panic("mutex_exit: recursion on non-recursive"
1322			    " mutex %s @ %s:%d", m->mtx_description,
1323			    file, line);
1324		return;
1325	}
1326
1327	if ((flags & MTX_NOSWITCH) == 0 && !mtx_legal2block() && !cold)
1328		panic("switchable mtx_unlock() of %s when not legal @ %s:%d",
1329			    m->mtx_description, file, line);
1330	LIST_REMOVE(m, mtx_held);
1331	m->mtx_held.le_prev = NULL;
1332}
1333
1334int
1335witness_sleep(int check_only, struct mtx *mtx, const char *file, int line)
1336{
1337	struct mtx *m;
1338	struct proc *p;
1339	char **sleep;
1340	int n = 0;
1341
1342	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1343	p = CURPROC;
1344	LIST_FOREACH(m, &p->p_heldmtx, mtx_held) {
1345		if (m == mtx)
1346			continue;
1347		for (sleep = sleep_list; *sleep!= NULL; sleep++)
1348			if (strcmp(m->mtx_description, *sleep) == 0)
1349				goto next;
1350		if (n == 0)
1351			printf("Whee!\n");
1352		printf("%s:%d: %s with \"%s\" locked from %s:%d\n",
1353			file, line, check_only ? "could sleep" : "sleeping",
1354			m->mtx_description,
1355			m->mtx_witness->w_file, m->mtx_witness->w_line);
1356		n++;
1357	next:
1358	}
1359#ifdef DDB
1360	if (witness_ddb && n)
1361		Debugger("witness_sleep");
1362#endif /* DDB */
1363	return (n);
1364}
1365
1366static struct witness *
1367enroll(const char *description, int flag)
1368{
1369	int i;
1370	struct witness *w, *w1;
1371	char **ignore;
1372	char **order;
1373
1374	if (!witness_watch)
1375		return (NULL);
1376	for (ignore = ignore_list; *ignore != NULL; ignore++)
1377		if (strcmp(description, *ignore) == 0)
1378			return (NULL);
1379
1380	if (w_inited == 0) {
1381		mtx_init(&w_mtx, "witness lock", MTX_SPIN);
1382		for (i = 0; i < WITNESS_COUNT; i++) {
1383			w = &w_data[i];
1384			witness_free(w);
1385		}
1386		w_inited = 1;
1387		for (order = order_list; *order != NULL; order++) {
1388			w = enroll(*order, MTX_DEF);
1389			w->w_file = "order list";
1390			for (order++; *order != NULL; order++) {
1391				w1 = enroll(*order, MTX_DEF);
1392				w1->w_file = "order list";
1393				itismychild(w, w1);
1394				w = w1;
1395    	    	    	}
1396		}
1397	}
1398	if ((flag & MTX_SPIN) && witness_skipspin)
1399		return (NULL);
1400	mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1401	for (w = w_all; w; w = w->w_next) {
1402		if (strcmp(description, w->w_description) == 0) {
1403			mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1404			return (w);
1405		}
1406	}
1407	if ((w = witness_get()) == NULL)
1408		return (NULL);
1409	w->w_next = w_all;
1410	w_all = w;
1411	w->w_description = description;
1412	mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1413	if (flag & MTX_SPIN) {
1414		w->w_spin = 1;
1415
1416		i = 1;
1417		for (order = spin_order_list; *order != NULL; order++) {
1418			if (strcmp(description, *order) == 0)
1419				break;
1420			i <<= 1;
1421		}
1422		if (*order == NULL)
1423			panic("spin lock %s not in order list", description);
1424		w->w_level = i;
1425	}
1426
1427	return (w);
1428}
1429
1430static int
1431itismychild(struct witness *parent, struct witness *child)
1432{
1433	static int recursed;
1434
1435	/*
1436	 * Insert "child" after "parent"
1437	 */
1438	while (parent->w_morechildren)
1439		parent = parent->w_morechildren;
1440
1441	if (parent->w_childcnt == WITNESS_NCHILDREN) {
1442		if ((parent->w_morechildren = witness_get()) == NULL)
1443			return (1);
1444		parent = parent->w_morechildren;
1445	}
1446	MPASS(child != NULL);
1447	parent->w_children[parent->w_childcnt++] = child;
1448	/*
1449	 * now prune whole tree
1450	 */
1451	if (recursed)
1452		return (0);
1453	recursed = 1;
1454	for (child = w_all; child != NULL; child = child->w_next) {
1455		for (parent = w_all; parent != NULL;
1456		    parent = parent->w_next) {
1457			if (!isitmychild(parent, child))
1458				continue;
1459			removechild(parent, child);
1460			if (isitmydescendant(parent, child))
1461				continue;
1462			itismychild(parent, child);
1463		}
1464	}
1465	recursed = 0;
1466	witness_levelall();
1467	return (0);
1468}
1469
1470static void
1471removechild(struct witness *parent, struct witness *child)
1472{
1473	struct witness *w, *w1;
1474	int i;
1475
1476	for (w = parent; w != NULL; w = w->w_morechildren)
1477		for (i = 0; i < w->w_childcnt; i++)
1478			if (w->w_children[i] == child)
1479				goto found;
1480	return;
1481found:
1482	for (w1 = w; w1->w_morechildren != NULL; w1 = w1->w_morechildren)
1483		continue;
1484	w->w_children[i] = w1->w_children[--w1->w_childcnt];
1485	MPASS(w->w_children[i] != NULL);
1486
1487	if (w1->w_childcnt != 0)
1488		return;
1489
1490	if (w1 == parent)
1491		return;
1492	for (w = parent; w->w_morechildren != w1; w = w->w_morechildren)
1493		continue;
1494	w->w_morechildren = 0;
1495	witness_free(w1);
1496}
1497
1498static int
1499isitmychild(struct witness *parent, struct witness *child)
1500{
1501	struct witness *w;
1502	int i;
1503
1504	for (w = parent; w != NULL; w = w->w_morechildren) {
1505		for (i = 0; i < w->w_childcnt; i++) {
1506			if (w->w_children[i] == child)
1507				return (1);
1508		}
1509	}
1510	return (0);
1511}
1512
1513static int
1514isitmydescendant(struct witness *parent, struct witness *child)
1515{
1516	struct witness *w;
1517	int i;
1518	int j;
1519
1520	for (j = 0, w = parent; w != NULL; w = w->w_morechildren, j++) {
1521		MPASS(j < 1000);
1522		for (i = 0; i < w->w_childcnt; i++) {
1523			if (w->w_children[i] == child)
1524				return (1);
1525		}
1526		for (i = 0; i < w->w_childcnt; i++) {
1527			if (isitmydescendant(w->w_children[i], child))
1528				return (1);
1529		}
1530	}
1531	return (0);
1532}
1533
1534void
1535witness_levelall (void)
1536{
1537	struct witness *w, *w1;
1538
1539	for (w = w_all; w; w = w->w_next)
1540		if (!(w->w_spin))
1541			w->w_level = 0;
1542	for (w = w_all; w; w = w->w_next) {
1543		if (w->w_spin)
1544			continue;
1545		for (w1 = w_all; w1; w1 = w1->w_next) {
1546			if (isitmychild(w1, w))
1547				break;
1548		}
1549		if (w1 != NULL)
1550			continue;
1551		witness_leveldescendents(w, 0);
1552	}
1553}
1554
1555static void
1556witness_leveldescendents(struct witness *parent, int level)
1557{
1558	int i;
1559	struct witness *w;
1560
1561	if (parent->w_level < level)
1562		parent->w_level = level;
1563	level++;
1564	for (w = parent; w != NULL; w = w->w_morechildren)
1565		for (i = 0; i < w->w_childcnt; i++)
1566			witness_leveldescendents(w->w_children[i], level);
1567}
1568
1569static void
1570witness_displaydescendants(void(*prnt)(const char *fmt, ...),
1571			   struct witness *parent)
1572{
1573	struct witness *w;
1574	int i;
1575	int level;
1576
1577	level = parent->w_spin ? ffs(parent->w_level) : parent->w_level;
1578
1579	prnt("%d", level);
1580	if (level < 10)
1581		prnt(" ");
1582	for (i = 0; i < level; i++)
1583		prnt(" ");
1584	prnt("%s", parent->w_description);
1585	if (parent->w_file != NULL)
1586		prnt(" -- last acquired @ %s:%d\n", parent->w_file,
1587		    parent->w_line);
1588
1589	for (w = parent; w != NULL; w = w->w_morechildren)
1590		for (i = 0; i < w->w_childcnt; i++)
1591			    witness_displaydescendants(prnt, w->w_children[i]);
1592    }
1593
1594static int
1595dup_ok(struct witness *w)
1596{
1597	char **dup;
1598
1599	for (dup = dup_list; *dup!= NULL; dup++)
1600		if (strcmp(w->w_description, *dup) == 0)
1601			return (1);
1602	return (0);
1603}
1604
1605static int
1606blessed(struct witness *w1, struct witness *w2)
1607{
1608	int i;
1609	struct witness_blessed *b;
1610
1611	for (i = 0; i < blessed_count; i++) {
1612		b = &blessed_list[i];
1613		if (strcmp(w1->w_description, b->b_lock1) == 0) {
1614			if (strcmp(w2->w_description, b->b_lock2) == 0)
1615				return (1);
1616			continue;
1617		}
1618		if (strcmp(w1->w_description, b->b_lock2) == 0)
1619			if (strcmp(w2->w_description, b->b_lock1) == 0)
1620				return (1);
1621	}
1622	return (0);
1623}
1624
1625static struct witness *
1626witness_get()
1627{
1628	struct witness *w;
1629
1630	if ((w = w_free) == NULL) {
1631		witness_dead = 1;
1632		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1633		printf("witness exhausted\n");
1634		return (NULL);
1635	}
1636	w_free = w->w_next;
1637	bzero(w, sizeof(*w));
1638	return (w);
1639}
1640
1641static void
1642witness_free(struct witness *w)
1643{
1644	w->w_next = w_free;
1645	w_free = w;
1646}
1647
1648int
1649witness_list(struct proc *p)
1650{
1651	struct mtx *m;
1652	int nheld;
1653
1654	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1655	nheld = 0;
1656	LIST_FOREACH(m, &p->p_heldmtx, mtx_held) {
1657		printf("\t\"%s\" (%p) locked at %s:%d\n",
1658		    m->mtx_description, m,
1659		    m->mtx_witness->w_file, m->mtx_witness->w_line);
1660		nheld++;
1661	}
1662
1663	return (nheld);
1664}
1665
1666#ifdef DDB
1667
1668DB_SHOW_COMMAND(mutexes, db_witness_list)
1669{
1670
1671	witness_list(CURPROC);
1672}
1673
1674DB_SHOW_COMMAND(witness, db_witness_display)
1675{
1676
1677	witness_display(db_printf);
1678}
1679#endif
1680
1681void
1682witness_save(struct mtx *m, const char **filep, int *linep)
1683{
1684
1685	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1686	if (m->mtx_witness == NULL)
1687		return;
1688
1689	*filep = m->mtx_witness->w_file;
1690	*linep = m->mtx_witness->w_line;
1691}
1692
1693void
1694witness_restore(struct mtx *m, const char *file, int line)
1695{
1696
1697	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1698	if (m->mtx_witness == NULL)
1699		return;
1700
1701	m->mtx_witness->w_file = file;
1702	m->mtx_witness->w_line = line;
1703}
1704
1705#endif	/* WITNESS */
1706