subr_witness.c revision 69363
1/*-
2 * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 * 3. Berkeley Software Design Inc's name may not be used to endorse or
13 *    promote products derived from this software without specific prior
14 *    written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
29 *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
30 * $FreeBSD: head/sys/kern/subr_witness.c 69363 2000-11-29 18:41:19Z jhb $
31 */
32
33/*
34 *	Main Entry: witness
35 *	Pronunciation: 'wit-n&s
36 *	Function: noun
37 *	Etymology: Middle English witnesse, from Old English witnes knowledge,
38 *	    testimony, witness, from 2wit
39 *	Date: before 12th century
40 *	1 : attestation of a fact or event : TESTIMONY
41 *	2 : one that gives evidence; specifically : one who testifies in
42 *	    a cause or before a judicial tribunal
43 *	3 : one asked to be present at a transaction so as to be able to
44 *	    testify to its having taken place
45 *	4 : one who has personal knowledge of something
46 *	5 a : something serving as evidence or proof : SIGN
47 *	  b : public affirmation by word or example of usually
48 *	      religious faith or conviction <the heroic witness to divine
49 *	      life -- Pilot>
50 *	6 capitalized : a member of the Jehovah's Witnesses
51 */
52
53#include "opt_ddb.h"
54#include "opt_witness.h"
55
56/*
57 * Cause non-inlined mtx_*() to be compiled.
58 * Must be defined early because other system headers may include mutex.h.
59 */
60#define _KERN_MUTEX_C_
61
62#include <sys/param.h>
63#include <sys/bus.h>
64#include <sys/kernel.h>
65#include <sys/malloc.h>
66#include <sys/proc.h>
67#include <sys/sysctl.h>
68#include <sys/systm.h>
69#include <sys/vmmeter.h>
70#include <sys/ktr.h>
71
72#include <machine/atomic.h>
73#include <machine/bus.h>
74#include <machine/clock.h>
75#include <machine/cpu.h>
76
77#include <ddb/ddb.h>
78
79#include <vm/vm.h>
80#include <vm/vm_extern.h>
81
82#include <sys/mutex.h>
83
84/*
85 * Machine independent bits of the mutex implementation
86 */
87/* All mutexes in system (used for debug/panic) */
88#ifdef MUTEX_DEBUG
89static struct mtx_debug all_mtx_debug = { NULL, {NULL, NULL}, NULL, 0,
90	"All mutexes queue head" };
91static struct mtx all_mtx = { MTX_UNOWNED, 0, 0, &all_mtx_debug,
92	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
93	{ NULL, NULL }, &all_mtx, &all_mtx };
94#else	/* MUTEX_DEBUG */
95static struct mtx all_mtx = { MTX_UNOWNED, 0, 0, "All mutexes queue head",
96	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
97	{ NULL, NULL }, &all_mtx, &all_mtx };
98#endif	/* MUTEX_DEBUG */
99
100static int	mtx_cur_cnt;
101static int	mtx_max_cnt;
102
103void	_mtx_enter_giant_def(void);
104void	_mtx_exit_giant_def(void);
105static void propagate_priority(struct proc *) __unused;
106
107#define	mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
108#define	mtx_owner(m)	(mtx_unowned(m) ? NULL \
109			    : (struct proc *)((m)->mtx_lock & MTX_FLAGMASK))
110
111#define RETIP(x)		*(((uintptr_t *)(&x)) - 1)
112#define	SET_PRIO(p, pri)	(p)->p_priority = (pri)
113
114/*
115 * XXX Temporary, for use from assembly language
116 */
117
118void
119_mtx_enter_giant_def(void)
120{
121
122	mtx_enter(&Giant, MTX_DEF);
123}
124
125void
126_mtx_exit_giant_def(void)
127{
128
129	mtx_exit(&Giant, MTX_DEF);
130}
131
132static void
133propagate_priority(struct proc *p)
134{
135	int pri = p->p_priority;
136	struct mtx *m = p->p_blocked;
137
138	for (;;) {
139		struct proc *p1;
140
141		p = mtx_owner(m);
142
143		if (p == NULL) {
144			/*
145			 * This really isn't quite right. Really
146			 * ought to bump priority of process that
147			 * next acquires the mutex.
148			 */
149			MPASS(m->mtx_lock == MTX_CONTESTED);
150			return;
151		}
152		MPASS(p->p_magic == P_MAGIC);
153		if (p->p_priority <= pri)
154			return;
155		/*
156		 * If lock holder is actually running, just bump priority.
157		 */
158		if (TAILQ_NEXT(p, p_procq) == NULL) {
159			MPASS(p->p_stat == SRUN || p->p_stat == SZOMB);
160			SET_PRIO(p, pri);
161			return;
162		}
163		/*
164		 * If on run queue move to new run queue, and
165		 * quit.
166		 */
167		if (p->p_stat == SRUN) {
168			MPASS(p->p_blocked == NULL);
169			remrunqueue(p);
170			SET_PRIO(p, pri);
171			setrunqueue(p);
172			return;
173		}
174
175		/*
176		 * If we aren't blocked on a mutex, give up and quit.
177		 */
178		if (p->p_stat != SMTX) {
179			printf(
180	"XXX: process %d(%s):%d holds %s but isn't blocked on a mutex\n",
181			    p->p_pid, p->p_comm, p->p_stat, m->mtx_description);
182			return;
183		}
184
185		/*
186		 * Pick up the mutex that p is blocked on.
187		 */
188		m = p->p_blocked;
189		MPASS(m != NULL);
190
191		printf("XXX: process %d(%s) is blocked on %s\n", p->p_pid,
192		    p->p_comm, m->mtx_description);
193		/*
194		 * Check if the proc needs to be moved up on
195		 * the blocked chain
196		 */
197		if ((p1 = TAILQ_PREV(p, rq, p_procq)) == NULL ||
198		    p1->p_priority <= pri) {
199			if (p1)
200				printf(
201	"XXX: previous process %d(%s) has higher priority\n",
202				    p->p_pid, p->p_comm);
203			else
204				printf("XXX: process at head of run queue\n");
205			continue;
206		}
207
208		/*
209		 * Remove proc from blocked chain
210		 */
211		TAILQ_REMOVE(&m->mtx_blocked, p, p_procq);
212		TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) {
213			MPASS(p1->p_magic == P_MAGIC);
214			if (p1->p_priority > pri)
215				break;
216		}
217		if (p1)
218			TAILQ_INSERT_BEFORE(p1, p, p_procq);
219		else
220			TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
221		CTR4(KTR_LOCK,
222		    "propagate priority: p 0x%p moved before 0x%p on [0x%p] %s",
223		    p, p1, m, m->mtx_description);
224	}
225}
226
227void
228mtx_enter_hard(struct mtx *m, int type, int saveintr)
229{
230	struct proc *p = CURPROC;
231
232	KASSERT(p != NULL, ("curproc is NULL in mutex"));
233
234	switch (type) {
235	case MTX_DEF:
236		if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)p) {
237			m->mtx_recurse++;
238			atomic_set_ptr(&m->mtx_lock, MTX_RECURSE);
239			CTR1(KTR_LOCK, "mtx_enter: 0x%p recurse", m);
240			return;
241		}
242		CTR3(KTR_LOCK, "mtx_enter: 0x%p contested (lock=%p) [0x%p]",
243		    m, (void *)m->mtx_lock, (void *)RETIP(m));
244		while (!_obtain_lock(m, p)) {
245			uintptr_t v;
246			struct proc *p1;
247
248			mtx_enter(&sched_lock, MTX_SPIN | MTX_RLIKELY);
249			/*
250			 * check if the lock has been released while
251			 * waiting for the schedlock.
252			 */
253			if ((v = m->mtx_lock) == MTX_UNOWNED) {
254				mtx_exit(&sched_lock, MTX_SPIN);
255				continue;
256			}
257			/*
258			 * The mutex was marked contested on release. This
259			 * means that there are processes blocked on it.
260			 */
261			if (v == MTX_CONTESTED) {
262				p1 = TAILQ_FIRST(&m->mtx_blocked);
263				KASSERT(p1 != NULL, ("contested mutex has no contesters"));
264				KASSERT(p != NULL, ("curproc is NULL for contested mutex"));
265				m->mtx_lock = (uintptr_t)p | MTX_CONTESTED;
266				if (p1->p_priority < p->p_priority) {
267					SET_PRIO(p, p1->p_priority);
268				}
269				mtx_exit(&sched_lock, MTX_SPIN);
270				return;
271			}
272			/*
273			 * If the mutex isn't already contested and
274			 * a failure occurs setting the contested bit the
275			 * mutex was either release or the
276			 * state of the RECURSION bit changed.
277			 */
278			if ((v & MTX_CONTESTED) == 0 &&
279			    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
280				               (void *)(v | MTX_CONTESTED))) {
281				mtx_exit(&sched_lock, MTX_SPIN);
282				continue;
283			}
284
285			/* We definitely have to sleep for this lock */
286			mtx_assert(m, MA_NOTOWNED);
287
288#ifdef notyet
289			/*
290			 * If we're borrowing an interrupted thread's VM
291			 * context must clean up before going to sleep.
292			 */
293			if (p->p_flag & (P_ITHD | P_SITHD)) {
294				ithd_t *it = (ithd_t *)p;
295
296				if (it->it_interrupted) {
297					CTR2(KTR_LOCK,
298					    "mtx_enter: 0x%x interrupted 0x%x",
299					    it, it->it_interrupted);
300					intr_thd_fixup(it);
301				}
302			}
303#endif
304
305			/* Put us on the list of procs blocked on this mutex */
306			if (TAILQ_EMPTY(&m->mtx_blocked)) {
307				p1 = (struct proc *)(m->mtx_lock &
308						     MTX_FLAGMASK);
309				LIST_INSERT_HEAD(&p1->p_contested, m,
310						 mtx_contested);
311				TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
312			} else {
313				TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq)
314					if (p1->p_priority > p->p_priority)
315						break;
316				if (p1)
317					TAILQ_INSERT_BEFORE(p1, p, p_procq);
318				else
319					TAILQ_INSERT_TAIL(&m->mtx_blocked, p,
320							  p_procq);
321			}
322
323			p->p_blocked = m;	/* Who we're blocked on */
324			p->p_stat = SMTX;
325#if 0
326			propagate_priority(p);
327#endif
328			CTR3(KTR_LOCK, "mtx_enter: p 0x%p blocked on [0x%p] %s",
329			    p, m, m->mtx_description);
330			mi_switch();
331			CTR3(KTR_LOCK,
332			    "mtx_enter: p 0x%p free from blocked on [0x%p] %s",
333			    p, m, m->mtx_description);
334			mtx_exit(&sched_lock, MTX_SPIN);
335		}
336		return;
337	case MTX_SPIN:
338	case MTX_SPIN | MTX_FIRST:
339	case MTX_SPIN | MTX_TOPHALF:
340	    {
341		int i = 0;
342
343		if (m->mtx_lock == (uintptr_t)p) {
344			m->mtx_recurse++;
345			return;
346		}
347		CTR1(KTR_LOCK, "mtx_enter: %p spinning", m);
348		for (;;) {
349			if (_obtain_lock(m, p))
350				break;
351			while (m->mtx_lock != MTX_UNOWNED) {
352				if (i++ < 1000000)
353					continue;
354				if (i++ < 6000000)
355					DELAY (1);
356#ifdef DDB
357				else if (!db_active)
358#else
359				else
360#endif
361					panic(
362				"spin lock %s held by 0x%p for > 5 seconds",
363					    m->mtx_description,
364					    (void *)m->mtx_lock);
365			}
366		}
367
368#ifdef MUTEX_DEBUG
369		if (type != MTX_SPIN)
370			m->mtx_saveintr = 0xbeefface;
371		else
372#endif
373			m->mtx_saveintr = saveintr;
374		CTR1(KTR_LOCK, "mtx_enter: 0x%p spin done", m);
375		return;
376	    }
377	}
378}
379
380void
381mtx_exit_hard(struct mtx *m, int type)
382{
383	struct proc *p, *p1;
384	struct mtx *m1;
385	int pri;
386
387	p = CURPROC;
388	switch (type) {
389	case MTX_DEF:
390	case MTX_DEF | MTX_NOSWITCH:
391		if (m->mtx_recurse != 0) {
392			if (--(m->mtx_recurse) == 0)
393				atomic_clear_ptr(&m->mtx_lock, MTX_RECURSE);
394			CTR1(KTR_LOCK, "mtx_exit: 0x%p unrecurse", m);
395			return;
396		}
397		mtx_enter(&sched_lock, MTX_SPIN);
398		CTR1(KTR_LOCK, "mtx_exit: 0x%p contested", m);
399		p1 = TAILQ_FIRST(&m->mtx_blocked);
400		MPASS(p->p_magic == P_MAGIC);
401		MPASS(p1->p_magic == P_MAGIC);
402		TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq);
403		if (TAILQ_EMPTY(&m->mtx_blocked)) {
404			LIST_REMOVE(m, mtx_contested);
405			_release_lock_quick(m);
406			CTR1(KTR_LOCK, "mtx_exit: 0x%p not held", m);
407		} else
408			atomic_store_rel_ptr(&m->mtx_lock,
409			    (void *)MTX_CONTESTED);
410		pri = MAXPRI;
411		LIST_FOREACH(m1, &p->p_contested, mtx_contested) {
412			int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_priority;
413			if (cp < pri)
414				pri = cp;
415		}
416		if (pri > p->p_nativepri)
417			pri = p->p_nativepri;
418		SET_PRIO(p, pri);
419		CTR2(KTR_LOCK, "mtx_exit: 0x%p contested setrunqueue 0x%p",
420		    m, p1);
421		p1->p_blocked = NULL;
422		p1->p_stat = SRUN;
423		setrunqueue(p1);
424		if ((type & MTX_NOSWITCH) == 0 && p1->p_priority < pri) {
425#ifdef notyet
426			if (p->p_flag & (P_ITHD | P_SITHD)) {
427				ithd_t *it = (ithd_t *)p;
428
429				if (it->it_interrupted) {
430					CTR2(KTR_LOCK,
431					    "mtx_exit: 0x%x interruped 0x%x",
432					    it, it->it_interrupted);
433					intr_thd_fixup(it);
434				}
435			}
436#endif
437			setrunqueue(p);
438			CTR2(KTR_LOCK, "mtx_exit: 0x%p switching out lock=0x%p",
439			    m, (void *)m->mtx_lock);
440			mi_switch();
441			CTR2(KTR_LOCK, "mtx_exit: 0x%p resuming lock=0x%p",
442			    m, (void *)m->mtx_lock);
443		}
444		mtx_exit(&sched_lock, MTX_SPIN);
445		break;
446	case MTX_SPIN:
447	case MTX_SPIN | MTX_FIRST:
448		if (m->mtx_recurse != 0) {
449			m->mtx_recurse--;
450			return;
451		}
452		MPASS(mtx_owned(m));
453		_release_lock_quick(m);
454		if (type & MTX_FIRST)
455			enable_intr();	/* XXX is this kosher? */
456		else {
457			MPASS(m->mtx_saveintr != 0xbeefface);
458			restore_intr(m->mtx_saveintr);
459		}
460		break;
461	case MTX_SPIN | MTX_TOPHALF:
462		if (m->mtx_recurse != 0) {
463			m->mtx_recurse--;
464			return;
465		}
466		MPASS(mtx_owned(m));
467		_release_lock_quick(m);
468		break;
469	default:
470		panic("mtx_exit_hard: unsupported type 0x%x\n", type);
471	}
472}
473
474#define MV_DESTROY	0	/* validate before destory */
475#define MV_INIT		1	/* validate before init */
476
477#ifdef MUTEX_DEBUG
478
479int mtx_validate __P((struct mtx *, int));
480
481int
482mtx_validate(struct mtx *m, int when)
483{
484	struct mtx *mp;
485	int i;
486	int retval = 0;
487
488	if (m == &all_mtx || cold)
489		return 0;
490
491	mtx_enter(&all_mtx, MTX_DEF);
492/*
493 * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
494 * we can re-enable the kernacc() checks.
495 */
496#ifndef __alpha__
497	MPASS(kernacc((caddr_t)all_mtx.mtx_next, sizeof(uintptr_t),
498	    VM_PROT_READ) == 1);
499#endif
500	MPASS(all_mtx.mtx_next->mtx_prev == &all_mtx);
501	for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
502#ifndef __alpha__
503		if (kernacc((caddr_t)mp->mtx_next, sizeof(uintptr_t),
504		    VM_PROT_READ) != 1) {
505			panic("mtx_validate: mp=%p mp->mtx_next=%p",
506			    mp, mp->mtx_next);
507		}
508#endif
509		i++;
510		if (i > mtx_cur_cnt) {
511			panic("mtx_validate: too many in chain, known=%d\n",
512			    mtx_cur_cnt);
513		}
514	}
515	MPASS(i == mtx_cur_cnt);
516	switch (when) {
517	case MV_DESTROY:
518		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
519			if (mp == m)
520				break;
521		MPASS(mp == m);
522		break;
523	case MV_INIT:
524		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
525		if (mp == m) {
526			/*
527			 * Not good. This mutex already exists.
528			 */
529			printf("re-initing existing mutex %s\n",
530			    m->mtx_description);
531			MPASS(m->mtx_lock == MTX_UNOWNED);
532			retval = 1;
533		}
534	}
535	mtx_exit(&all_mtx, MTX_DEF);
536	return (retval);
537}
538#endif
539
540void
541mtx_init(struct mtx *m, const char *t, int flag)
542{
543#ifdef MUTEX_DEBUG
544	struct mtx_debug *debug;
545#endif
546
547	CTR2(KTR_LOCK, "mtx_init 0x%p (%s)", m, t);
548#ifdef MUTEX_DEBUG
549	if (mtx_validate(m, MV_INIT))	/* diagnostic and error correction */
550		return;
551	if (flag & MTX_COLD)
552		debug = m->mtx_debug;
553	else
554		debug = NULL;
555	if (debug == NULL) {
556#ifdef DIAGNOSTIC
557		if(cold && bootverbose)
558			printf("malloc'ing mtx_debug while cold for %s\n", t);
559#endif
560
561		/* XXX - should not use DEVBUF */
562		debug = malloc(sizeof(struct mtx_debug), M_DEVBUF, M_NOWAIT);
563		MPASS(debug != NULL);
564		bzero(debug, sizeof(struct mtx_debug));
565	}
566#endif
567	bzero((void *)m, sizeof *m);
568	TAILQ_INIT(&m->mtx_blocked);
569#ifdef MUTEX_DEBUG
570	m->mtx_debug = debug;
571#endif
572	m->mtx_description = t;
573	m->mtx_lock = MTX_UNOWNED;
574	/* Put on all mutex queue */
575	mtx_enter(&all_mtx, MTX_DEF);
576	m->mtx_next = &all_mtx;
577	m->mtx_prev = all_mtx.mtx_prev;
578	m->mtx_prev->mtx_next = m;
579	all_mtx.mtx_prev = m;
580	if (++mtx_cur_cnt > mtx_max_cnt)
581		mtx_max_cnt = mtx_cur_cnt;
582	mtx_exit(&all_mtx, MTX_DEF);
583	witness_init(m, flag);
584}
585
586void
587mtx_destroy(struct mtx *m)
588{
589
590	CTR2(KTR_LOCK, "mtx_destroy 0x%p (%s)", m, m->mtx_description);
591#ifdef MUTEX_DEBUG
592	if (m->mtx_next == NULL)
593		panic("mtx_destroy: %p (%s) already destroyed",
594		    m, m->mtx_description);
595
596	if (!mtx_owned(m)) {
597		MPASS(m->mtx_lock == MTX_UNOWNED);
598	} else {
599		MPASS((m->mtx_lock & (MTX_RECURSE|MTX_CONTESTED)) == 0);
600	}
601	mtx_validate(m, MV_DESTROY);		/* diagnostic */
602#endif
603
604#ifdef WITNESS
605	if (m->mtx_witness)
606		witness_destroy(m);
607#endif /* WITNESS */
608
609	/* Remove from the all mutex queue */
610	mtx_enter(&all_mtx, MTX_DEF);
611	m->mtx_next->mtx_prev = m->mtx_prev;
612	m->mtx_prev->mtx_next = m->mtx_next;
613#ifdef MUTEX_DEBUG
614	m->mtx_next = m->mtx_prev = NULL;
615	free(m->mtx_debug, M_DEVBUF);
616	m->mtx_debug = NULL;
617#endif
618	mtx_cur_cnt--;
619	mtx_exit(&all_mtx, MTX_DEF);
620}
621
622/*
623 * The non-inlined versions of the mtx_*() functions are always built (above),
624 * but the witness code depends on the MUTEX_DEBUG and WITNESS kernel options
625 * being specified.
626 */
627#if (defined(MUTEX_DEBUG) && defined(WITNESS))
628
629#define WITNESS_COUNT 200
630#define	WITNESS_NCHILDREN 2
631
632int witness_watch = 1;
633
634struct witness {
635	struct witness	*w_next;
636	const char	*w_description;
637	const char	*w_file;
638	int		 w_line;
639	struct witness	*w_morechildren;
640	u_char		 w_childcnt;
641	u_char		 w_Giant_squawked:1;
642	u_char		 w_other_squawked:1;
643	u_char		 w_same_squawked:1;
644	u_char		 w_sleep:1;
645	u_char		 w_spin:1;	/* this is a spin mutex */
646	u_int		 w_level;
647	struct witness	*w_children[WITNESS_NCHILDREN];
648};
649
650struct witness_blessed {
651	char 	*b_lock1;
652	char	*b_lock2;
653};
654
655#ifdef DDB
656/*
657 * When DDB is enabled and witness_ddb is set to 1, it will cause the system to
658 * drop into kdebug() when:
659 *	- a lock heirarchy violation occurs
660 *	- locks are held when going to sleep.
661 */
662#ifdef WITNESS_DDB
663int	witness_ddb = 1;
664#else
665int	witness_ddb = 0;
666#endif
667SYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, "");
668#endif /* DDB */
669
670#ifdef WITNESS_SKIPSPIN
671int	witness_skipspin = 1;
672#else
673int	witness_skipspin = 0;
674#endif
675SYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RD, &witness_skipspin, 0,
676    "");
677
678MUTEX_DECLARE(static,w_mtx);
679static struct witness	*w_free;
680static struct witness	*w_all;
681static int		 w_inited;
682static int		 witness_dead;	/* fatal error, probably no memory */
683
684static struct witness	 w_data[WITNESS_COUNT];
685
686static struct witness	 *enroll __P((const char *description, int flag));
687static int itismychild __P((struct witness *parent, struct witness *child));
688static void removechild __P((struct witness *parent, struct witness *child));
689static int isitmychild __P((struct witness *parent, struct witness *child));
690static int isitmydescendant __P((struct witness *parent, struct witness *child));
691static int dup_ok __P((struct witness *));
692static int blessed __P((struct witness *, struct witness *));
693static void witness_displaydescendants
694    __P((void(*)(const char *fmt, ...), struct witness *));
695static void witness_leveldescendents __P((struct witness *parent, int level));
696static void witness_levelall __P((void));
697static struct witness * witness_get __P((void));
698static void witness_free __P((struct witness *m));
699
700
701static char *ignore_list[] = {
702	"witness lock",
703	NULL
704};
705
706static char *spin_order_list[] = {
707	"sio",
708	"sched lock",
709#ifdef __i386__
710	"clk",
711#endif
712	"callout",
713	/*
714	 * leaf locks
715	 */
716	NULL
717};
718
719static char *order_list[] = {
720	"uidinfo hash", "uidinfo struct", NULL,
721	NULL
722};
723
724static char *dup_list[] = {
725	NULL
726};
727
728static char *sleep_list[] = {
729	"Giant",
730	NULL
731};
732
733/*
734 * Pairs of locks which have been blessed
735 * Don't complain about order problems with blessed locks
736 */
737static struct witness_blessed blessed_list[] = {
738};
739static int blessed_count = sizeof(blessed_list) / sizeof(struct witness_blessed);
740
741void
742witness_init(struct mtx *m, int flag)
743{
744	m->mtx_witness = enroll(m->mtx_description, flag);
745}
746
747void
748witness_destroy(struct mtx *m)
749{
750	struct mtx *m1;
751	struct proc *p;
752	p = CURPROC;
753	for ((m1 = LIST_FIRST(&p->p_heldmtx)); m1 != NULL;
754		m1 = LIST_NEXT(m1, mtx_held)) {
755		if (m1 == m) {
756			LIST_REMOVE(m, mtx_held);
757			break;
758		}
759	}
760	return;
761
762}
763
764void
765witness_enter(struct mtx *m, int flags, const char *file, int line)
766{
767	struct witness *w, *w1;
768	struct mtx *m1;
769	struct proc *p;
770	int i;
771#ifdef DDB
772	int go_into_ddb = 0;
773#endif /* DDB */
774
775	w = m->mtx_witness;
776	p = CURPROC;
777
778	if (flags & MTX_SPIN) {
779		if (!w->w_spin)
780			panic("mutex_enter: MTX_SPIN on MTX_DEF mutex %s @"
781			    " %s:%d", m->mtx_description, file, line);
782		if (m->mtx_recurse != 0)
783			return;
784		mtx_enter(&w_mtx, MTX_SPIN);
785		i = witness_spin_check;
786		if (i != 0 && w->w_level < i) {
787			mtx_exit(&w_mtx, MTX_SPIN);
788			panic("mutex_enter(%s:%x, MTX_SPIN) out of order @"
789			    " %s:%d already holding %s:%x",
790			    m->mtx_description, w->w_level, file, line,
791			    spin_order_list[ffs(i)-1], i);
792		}
793		PCPU_SET(witness_spin_check, i | w->w_level);
794		mtx_exit(&w_mtx, MTX_SPIN);
795		w->w_file = file;
796		w->w_line = line;
797		m->mtx_line = line;
798		m->mtx_file = file;
799		return;
800	}
801	if (w->w_spin)
802		panic("mutex_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
803		    m->mtx_description, file, line);
804
805	if (m->mtx_recurse != 0)
806		return;
807	if (witness_dead)
808		goto out;
809	if (cold || panicstr)
810		goto out;
811
812	if (!mtx_legal2block())
813		panic("blockable mtx_enter() of %s when not legal @ %s:%d",
814			    m->mtx_description, file, line);
815	/*
816	 * Is this the first mutex acquired
817	 */
818	if ((m1 = LIST_FIRST(&p->p_heldmtx)) == NULL)
819		goto out;
820
821	if ((w1 = m1->mtx_witness) == w) {
822		if (w->w_same_squawked || dup_ok(w))
823			goto out;
824		w->w_same_squawked = 1;
825		printf("acquring duplicate lock of same type: \"%s\"\n",
826			m->mtx_description);
827		printf(" 1st @ %s:%d\n", w->w_file, w->w_line);
828		printf(" 2nd @ %s:%d\n", file, line);
829#ifdef DDB
830		go_into_ddb = 1;
831#endif /* DDB */
832		goto out;
833	}
834	MPASS(!mtx_owned(&w_mtx));
835	mtx_enter(&w_mtx, MTX_SPIN);
836	/*
837	 * If we have a known higher number just say ok
838	 */
839	if (witness_watch > 1 && w->w_level > w1->w_level) {
840		mtx_exit(&w_mtx, MTX_SPIN);
841		goto out;
842	}
843	if (isitmydescendant(m1->mtx_witness, w)) {
844		mtx_exit(&w_mtx, MTX_SPIN);
845		goto out;
846	}
847	for (i = 0; m1 != NULL; m1 = LIST_NEXT(m1, mtx_held), i++) {
848
849		MPASS(i < 200);
850		w1 = m1->mtx_witness;
851		if (isitmydescendant(w, w1)) {
852			mtx_exit(&w_mtx, MTX_SPIN);
853			if (blessed(w, w1))
854				goto out;
855			if (m1 == &Giant) {
856				if (w1->w_Giant_squawked)
857					goto out;
858				else
859					w1->w_Giant_squawked = 1;
860			} else {
861				if (w1->w_other_squawked)
862					goto out;
863				else
864					w1->w_other_squawked = 1;
865			}
866			printf("lock order reversal\n");
867			printf(" 1st %s last acquired @ %s:%d\n",
868			    w->w_description, w->w_file, w->w_line);
869			printf(" 2nd %p %s @ %s:%d\n",
870			    m1, w1->w_description, w1->w_file, w1->w_line);
871			printf(" 3rd %p %s @ %s:%d\n",
872			    m, w->w_description, file, line);
873#ifdef DDB
874			go_into_ddb = 1;
875#endif /* DDB */
876			goto out;
877		}
878	}
879	m1 = LIST_FIRST(&p->p_heldmtx);
880	if (!itismychild(m1->mtx_witness, w))
881		mtx_exit(&w_mtx, MTX_SPIN);
882
883out:
884#ifdef DDB
885	if (witness_ddb && go_into_ddb)
886		Debugger("witness_enter");
887#endif /* DDB */
888	w->w_file = file;
889	w->w_line = line;
890	m->mtx_line = line;
891	m->mtx_file = file;
892
893	/*
894	 * If this pays off it likely means that a mutex being witnessed
895	 * is acquired in hardclock. Put it in the ignore list. It is
896	 * likely not the mutex this assert fails on.
897	 */
898	MPASS(m->mtx_held.le_prev == NULL);
899	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
900}
901
902void
903witness_exit(struct mtx *m, int flags, const char *file, int line)
904{
905	struct witness *w;
906
907	w = m->mtx_witness;
908
909	if (flags & MTX_SPIN) {
910		if (!w->w_spin)
911			panic("mutex_exit: MTX_SPIN on MTX_DEF mutex %s @"
912			    " %s:%d", m->mtx_description, file, line);
913		if (m->mtx_recurse != 0)
914			return;
915		mtx_enter(&w_mtx, MTX_SPIN);
916		PCPU_SET(witness_spin_check, witness_spin_check & ~w->w_level);
917		mtx_exit(&w_mtx, MTX_SPIN);
918		return;
919	}
920	if (w->w_spin)
921		panic("mutex_exit: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
922		    m->mtx_description, file, line);
923
924	if (m->mtx_recurse != 0)
925		return;
926
927	if ((flags & MTX_NOSWITCH) == 0 && !mtx_legal2block() && !cold)
928		panic("switchable mtx_exit() of %s when not legal @ %s:%d",
929			    m->mtx_description, file, line);
930	LIST_REMOVE(m, mtx_held);
931	m->mtx_held.le_prev = NULL;
932}
933
934void
935witness_try_enter(struct mtx *m, int flags, const char *file, int line)
936{
937	struct proc *p;
938	struct witness *w = m->mtx_witness;
939
940	if (flags & MTX_SPIN) {
941		if (!w->w_spin)
942			panic("mutex_try_enter: "
943			    "MTX_SPIN on MTX_DEF mutex %s @ %s:%d",
944			    m->mtx_description, file, line);
945		if (m->mtx_recurse != 0)
946			return;
947		mtx_enter(&w_mtx, MTX_SPIN);
948		PCPU_SET(witness_spin_check, witness_spin_check | w->w_level);
949		mtx_exit(&w_mtx, MTX_SPIN);
950		w->w_file = file;
951		w->w_line = line;
952		m->mtx_line = line;
953		m->mtx_file = file;
954		return;
955	}
956
957	if (w->w_spin)
958		panic("mutex_try_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
959		    m->mtx_description, file, line);
960
961	if (m->mtx_recurse != 0)
962		return;
963
964	w->w_file = file;
965	w->w_line = line;
966	m->mtx_line = line;
967	m->mtx_file = file;
968	p = CURPROC;
969	MPASS(m->mtx_held.le_prev == NULL);
970	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
971}
972
973void
974witness_display(void(*prnt)(const char *fmt, ...))
975{
976	struct witness *w, *w1;
977
978	witness_levelall();
979
980	for (w = w_all; w; w = w->w_next) {
981		if (w->w_file == NULL)
982			continue;
983		for (w1 = w_all; w1; w1 = w1->w_next) {
984			if (isitmychild(w1, w))
985				break;
986		}
987		if (w1 != NULL)
988			continue;
989		/*
990		 * This lock has no anscestors, display its descendants.
991		 */
992		witness_displaydescendants(prnt, w);
993	}
994	prnt("\nMutex which were never acquired\n");
995	for (w = w_all; w; w = w->w_next) {
996		if (w->w_file != NULL)
997			continue;
998		prnt("%s\n", w->w_description);
999	}
1000}
1001
1002int
1003witness_sleep(int check_only, struct mtx *mtx, const char *file, int line)
1004{
1005	struct mtx *m;
1006	struct proc *p;
1007	char **sleep;
1008	int n = 0;
1009
1010	p = CURPROC;
1011	for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
1012	    m = LIST_NEXT(m, mtx_held)) {
1013		if (m == mtx)
1014			continue;
1015		for (sleep = sleep_list; *sleep!= NULL; sleep++)
1016			if (strcmp(m->mtx_description, *sleep) == 0)
1017				goto next;
1018		printf("%s:%d: %s with \"%s\" locked from %s:%d\n",
1019			file, line, check_only ? "could sleep" : "sleeping",
1020			m->mtx_description,
1021			m->mtx_witness->w_file, m->mtx_witness->w_line);
1022		n++;
1023	next:
1024	}
1025#ifdef DDB
1026	if (witness_ddb && n)
1027		Debugger("witness_sleep");
1028#endif /* DDB */
1029	return (n);
1030}
1031
1032static struct witness *
1033enroll(const char *description, int flag)
1034{
1035	int i;
1036	struct witness *w, *w1;
1037	char **ignore;
1038	char **order;
1039
1040	if (!witness_watch)
1041		return (NULL);
1042	for (ignore = ignore_list; *ignore != NULL; ignore++)
1043		if (strcmp(description, *ignore) == 0)
1044			return (NULL);
1045
1046	if (w_inited == 0) {
1047		mtx_init(&w_mtx, "witness lock", MTX_COLD | MTX_DEF);
1048		for (i = 0; i < WITNESS_COUNT; i++) {
1049			w = &w_data[i];
1050			witness_free(w);
1051		}
1052		w_inited = 1;
1053		for (order = order_list; *order != NULL; order++) {
1054			w = enroll(*order, MTX_DEF);
1055			w->w_file = "order list";
1056			for (order++; *order != NULL; order++) {
1057				w1 = enroll(*order, MTX_DEF);
1058				w1->w_file = "order list";
1059				itismychild(w, w1);
1060				w = w1;
1061    	    	    	}
1062		}
1063	}
1064	if ((flag & MTX_SPIN) && witness_skipspin)
1065		return (NULL);
1066	mtx_enter(&w_mtx, MTX_SPIN);
1067	for (w = w_all; w; w = w->w_next) {
1068		if (strcmp(description, w->w_description) == 0) {
1069			mtx_exit(&w_mtx, MTX_SPIN);
1070			return (w);
1071		}
1072	}
1073	if ((w = witness_get()) == NULL)
1074		return (NULL);
1075	w->w_next = w_all;
1076	w_all = w;
1077	w->w_description = description;
1078	mtx_exit(&w_mtx, MTX_SPIN);
1079	if (flag & MTX_SPIN) {
1080		w->w_spin = 1;
1081
1082		i = 1;
1083		for (order = spin_order_list; *order != NULL; order++) {
1084			if (strcmp(description, *order) == 0)
1085				break;
1086			i <<= 1;
1087		}
1088		if (*order == NULL)
1089			panic("spin lock %s not in order list", description);
1090		w->w_level = i;
1091	}
1092	return (w);
1093}
1094
1095static int
1096itismychild(struct witness *parent, struct witness *child)
1097{
1098	static int recursed;
1099
1100	/*
1101	 * Insert "child" after "parent"
1102	 */
1103	while (parent->w_morechildren)
1104		parent = parent->w_morechildren;
1105
1106	if (parent->w_childcnt == WITNESS_NCHILDREN) {
1107		if ((parent->w_morechildren = witness_get()) == NULL)
1108			return (1);
1109		parent = parent->w_morechildren;
1110	}
1111	MPASS(child != NULL);
1112	parent->w_children[parent->w_childcnt++] = child;
1113	/*
1114	 * now prune whole tree
1115	 */
1116	if (recursed)
1117		return (0);
1118	recursed = 1;
1119	for (child = w_all; child != NULL; child = child->w_next) {
1120		for (parent = w_all; parent != NULL;
1121		    parent = parent->w_next) {
1122			if (!isitmychild(parent, child))
1123				continue;
1124			removechild(parent, child);
1125			if (isitmydescendant(parent, child))
1126				continue;
1127			itismychild(parent, child);
1128		}
1129	}
1130	recursed = 0;
1131	witness_levelall();
1132	return (0);
1133}
1134
1135static void
1136removechild(struct witness *parent, struct witness *child)
1137{
1138	struct witness *w, *w1;
1139	int i;
1140
1141	for (w = parent; w != NULL; w = w->w_morechildren)
1142		for (i = 0; i < w->w_childcnt; i++)
1143			if (w->w_children[i] == child)
1144				goto found;
1145	return;
1146found:
1147	for (w1 = w; w1->w_morechildren != NULL; w1 = w1->w_morechildren)
1148		continue;
1149	w->w_children[i] = w1->w_children[--w1->w_childcnt];
1150	MPASS(w->w_children[i] != NULL);
1151
1152	if (w1->w_childcnt != 0)
1153		return;
1154
1155	if (w1 == parent)
1156		return;
1157	for (w = parent; w->w_morechildren != w1; w = w->w_morechildren)
1158		continue;
1159	w->w_morechildren = 0;
1160	witness_free(w1);
1161}
1162
1163static int
1164isitmychild(struct witness *parent, struct witness *child)
1165{
1166	struct witness *w;
1167	int i;
1168
1169	for (w = parent; w != NULL; w = w->w_morechildren) {
1170		for (i = 0; i < w->w_childcnt; i++) {
1171			if (w->w_children[i] == child)
1172				return (1);
1173		}
1174	}
1175	return (0);
1176}
1177
1178static int
1179isitmydescendant(struct witness *parent, struct witness *child)
1180{
1181	struct witness *w;
1182	int i;
1183	int j;
1184
1185	for (j = 0, w = parent; w != NULL; w = w->w_morechildren, j++) {
1186		MPASS(j < 1000);
1187		for (i = 0; i < w->w_childcnt; i++) {
1188			if (w->w_children[i] == child)
1189				return (1);
1190		}
1191		for (i = 0; i < w->w_childcnt; i++) {
1192			if (isitmydescendant(w->w_children[i], child))
1193				return (1);
1194		}
1195	}
1196	return (0);
1197}
1198
1199void
1200witness_levelall (void)
1201{
1202	struct witness *w, *w1;
1203
1204	for (w = w_all; w; w = w->w_next)
1205		if (!w->w_spin)
1206			w->w_level = 0;
1207	for (w = w_all; w; w = w->w_next) {
1208		if (w->w_spin)
1209			continue;
1210		for (w1 = w_all; w1; w1 = w1->w_next) {
1211			if (isitmychild(w1, w))
1212				break;
1213		}
1214		if (w1 != NULL)
1215			continue;
1216		witness_leveldescendents(w, 0);
1217	}
1218}
1219
1220static void
1221witness_leveldescendents(struct witness *parent, int level)
1222{
1223	int i;
1224	struct witness *w;
1225
1226	if (parent->w_level < level)
1227		parent->w_level = level;
1228	level++;
1229	for (w = parent; w != NULL; w = w->w_morechildren)
1230		for (i = 0; i < w->w_childcnt; i++)
1231			witness_leveldescendents(w->w_children[i], level);
1232}
1233
1234static void
1235witness_displaydescendants(void(*prnt)(const char *fmt, ...),
1236			   struct witness *parent)
1237{
1238	struct witness *w;
1239	int i;
1240	int level = parent->w_level;
1241
1242	prnt("%d", level);
1243	if (level < 10)
1244		prnt(" ");
1245	for (i = 0; i < level; i++)
1246		prnt(" ");
1247	prnt("%s", parent->w_description);
1248	if (parent->w_file != NULL) {
1249		prnt(" -- last acquired @ %s", parent->w_file);
1250#ifndef W_USE_WHERE
1251		prnt(":%d", parent->w_line);
1252#endif
1253		prnt("\n");
1254	}
1255
1256	for (w = parent; w != NULL; w = w->w_morechildren)
1257		for (i = 0; i < w->w_childcnt; i++)
1258			    witness_displaydescendants(prnt, w->w_children[i]);
1259    }
1260
1261static int
1262dup_ok(struct witness *w)
1263{
1264	char **dup;
1265
1266	for (dup = dup_list; *dup!= NULL; dup++)
1267		if (strcmp(w->w_description, *dup) == 0)
1268			return (1);
1269	return (0);
1270}
1271
1272static int
1273blessed(struct witness *w1, struct witness *w2)
1274{
1275	int i;
1276	struct witness_blessed *b;
1277
1278	for (i = 0; i < blessed_count; i++) {
1279		b = &blessed_list[i];
1280		if (strcmp(w1->w_description, b->b_lock1) == 0) {
1281			if (strcmp(w2->w_description, b->b_lock2) == 0)
1282				return (1);
1283			continue;
1284		}
1285		if (strcmp(w1->w_description, b->b_lock2) == 0)
1286			if (strcmp(w2->w_description, b->b_lock1) == 0)
1287				return (1);
1288	}
1289	return (0);
1290}
1291
1292static struct witness *
1293witness_get()
1294{
1295	struct witness *w;
1296
1297	if ((w = w_free) == NULL) {
1298		witness_dead = 1;
1299		mtx_exit(&w_mtx, MTX_SPIN);
1300		printf("witness exhausted\n");
1301		return (NULL);
1302	}
1303	w_free = w->w_next;
1304	bzero(w, sizeof(*w));
1305	return (w);
1306}
1307
1308static void
1309witness_free(struct witness *w)
1310{
1311	w->w_next = w_free;
1312	w_free = w;
1313}
1314
1315void
1316witness_list(struct proc *p)
1317{
1318	struct mtx *m;
1319
1320	for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
1321	    m = LIST_NEXT(m, mtx_held)) {
1322		printf("\t\"%s\" (%p) locked at %s:%d\n",
1323		    m->mtx_description, m,
1324		    m->mtx_witness->w_file, m->mtx_witness->w_line);
1325	}
1326}
1327
1328void
1329witness_save(struct mtx *m, const char **filep, int *linep)
1330{
1331	*filep = m->mtx_witness->w_file;
1332	*linep = m->mtx_witness->w_line;
1333}
1334
1335void
1336witness_restore(struct mtx *m, const char *file, int line)
1337{
1338	m->mtx_witness->w_file = file;
1339	m->mtx_witness->w_line = line;
1340}
1341
1342#endif	/* (defined(MUTEX_DEBUG) && defined(WITNESS)) */
1343