subr_witness.c revision 69369
1/*-
2 * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 * 3. Berkeley Software Design Inc's name may not be used to endorse or
13 *    promote products derived from this software without specific prior
14 *    written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
29 *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
30 * $FreeBSD: head/sys/kern/subr_witness.c 69369 2000-11-29 20:17:15Z jhb $
31 */
32
33/*
34 *	Main Entry: witness
35 *	Pronunciation: 'wit-n&s
36 *	Function: noun
37 *	Etymology: Middle English witnesse, from Old English witnes knowledge,
38 *	    testimony, witness, from 2wit
39 *	Date: before 12th century
40 *	1 : attestation of a fact or event : TESTIMONY
41 *	2 : one that gives evidence; specifically : one who testifies in
42 *	    a cause or before a judicial tribunal
43 *	3 : one asked to be present at a transaction so as to be able to
44 *	    testify to its having taken place
45 *	4 : one who has personal knowledge of something
46 *	5 a : something serving as evidence or proof : SIGN
47 *	  b : public affirmation by word or example of usually
48 *	      religious faith or conviction <the heroic witness to divine
49 *	      life -- Pilot>
50 *	6 capitalized : a member of the Jehovah's Witnesses
51 */
52
53#include "opt_ddb.h"
54#include "opt_witness.h"
55
56/*
57 * Cause non-inlined mtx_*() to be compiled.
58 * Must be defined early because other system headers may include mutex.h.
59 */
60#define _KERN_MUTEX_C_
61
62#include <sys/param.h>
63#include <sys/bus.h>
64#include <sys/kernel.h>
65#include <sys/malloc.h>
66#include <sys/proc.h>
67#include <sys/sysctl.h>
68#include <sys/systm.h>
69#include <sys/vmmeter.h>
70#include <sys/ktr.h>
71
72#include <machine/atomic.h>
73#include <machine/bus.h>
74#include <machine/clock.h>
75#include <machine/cpu.h>
76
77#include <ddb/ddb.h>
78
79#include <vm/vm.h>
80#include <vm/vm_extern.h>
81
82#include <sys/mutex.h>
83
84/*
85 * Machine independent bits of the mutex implementation
86 */
87/* All mutexes in system (used for debug/panic) */
88#ifdef MUTEX_DEBUG
89static struct mtx_debug all_mtx_debug = { NULL, {NULL, NULL}, NULL, 0,
90	"All mutexes queue head" };
91static struct mtx all_mtx = { MTX_UNOWNED, 0, 0, &all_mtx_debug,
92	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
93	{ NULL, NULL }, &all_mtx, &all_mtx };
94#else	/* MUTEX_DEBUG */
95static struct mtx all_mtx = { MTX_UNOWNED, 0, 0, "All mutexes queue head",
96	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
97	{ NULL, NULL }, &all_mtx, &all_mtx };
98#endif	/* MUTEX_DEBUG */
99
100static int	mtx_cur_cnt;
101static int	mtx_max_cnt;
102
103void	_mtx_enter_giant_def(void);
104void	_mtx_exit_giant_def(void);
105static void propagate_priority(struct proc *) __unused;
106
107#define	mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
108#define	mtx_owner(m)	(mtx_unowned(m) ? NULL \
109			    : (struct proc *)((m)->mtx_lock & MTX_FLAGMASK))
110
111#define RETIP(x)		*(((uintptr_t *)(&x)) - 1)
112#define	SET_PRIO(p, pri)	(p)->p_priority = (pri)
113
114/*
115 * XXX Temporary, for use from assembly language
116 */
117
118void
119_mtx_enter_giant_def(void)
120{
121
122	mtx_enter(&Giant, MTX_DEF);
123}
124
125void
126_mtx_exit_giant_def(void)
127{
128
129	mtx_exit(&Giant, MTX_DEF);
130}
131
132static void
133propagate_priority(struct proc *p)
134{
135	int pri = p->p_priority;
136	struct mtx *m = p->p_blocked;
137
138	for (;;) {
139		struct proc *p1;
140
141		p = mtx_owner(m);
142
143		if (p == NULL) {
144			/*
145			 * This really isn't quite right. Really
146			 * ought to bump priority of process that
147			 * next acquires the mutex.
148			 */
149			MPASS(m->mtx_lock == MTX_CONTESTED);
150			return;
151		}
152		MPASS(p->p_magic == P_MAGIC);
153		if (p->p_priority <= pri)
154			return;
155		/*
156		 * If lock holder is actually running, just bump priority.
157		 */
158		if (TAILQ_NEXT(p, p_procq) == NULL) {
159			MPASS(p->p_stat == SRUN || p->p_stat == SZOMB);
160			SET_PRIO(p, pri);
161			return;
162		}
163		/*
164		 * If on run queue move to new run queue, and
165		 * quit.
166		 */
167		if (p->p_stat == SRUN) {
168			MPASS(p->p_blocked == NULL);
169			remrunqueue(p);
170			SET_PRIO(p, pri);
171			setrunqueue(p);
172			return;
173		}
174
175		/*
176		 * If we aren't blocked on a mutex, give up and quit.
177		 */
178		if (p->p_stat != SMTX) {
179			printf(
180	"XXX: process %d(%s):%d holds %s but isn't blocked on a mutex\n",
181			    p->p_pid, p->p_comm, p->p_stat, m->mtx_description);
182			return;
183		}
184
185		/*
186		 * Pick up the mutex that p is blocked on.
187		 */
188		m = p->p_blocked;
189		MPASS(m != NULL);
190
191		printf("XXX: process %d(%s) is blocked on %s\n", p->p_pid,
192		    p->p_comm, m->mtx_description);
193		/*
194		 * Check if the proc needs to be moved up on
195		 * the blocked chain
196		 */
197		if ((p1 = TAILQ_PREV(p, rq, p_procq)) == NULL ||
198		    p1->p_priority <= pri) {
199			if (p1)
200				printf(
201	"XXX: previous process %d(%s) has higher priority\n",
202				    p->p_pid, p->p_comm);
203			else
204				printf("XXX: process at head of run queue\n");
205			continue;
206		}
207
208		/*
209		 * Remove proc from blocked chain
210		 */
211		TAILQ_REMOVE(&m->mtx_blocked, p, p_procq);
212		TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) {
213			MPASS(p1->p_magic == P_MAGIC);
214			if (p1->p_priority > pri)
215				break;
216		}
217		if (p1)
218			TAILQ_INSERT_BEFORE(p1, p, p_procq);
219		else
220			TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
221		CTR4(KTR_LOCK,
222		    "propagate priority: p 0x%p moved before 0x%p on [0x%p] %s",
223		    p, p1, m, m->mtx_description);
224	}
225}
226
227void
228mtx_enter_hard(struct mtx *m, int type, int saveintr)
229{
230	struct proc *p = CURPROC;
231
232	KASSERT(p != NULL, ("curproc is NULL in mutex"));
233
234	switch (type) {
235	case MTX_DEF:
236		if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)p) {
237			m->mtx_recurse++;
238			atomic_set_ptr(&m->mtx_lock, MTX_RECURSE);
239			CTR1(KTR_LOCK, "mtx_enter: 0x%p recurse", m);
240			return;
241		}
242		CTR3(KTR_LOCK, "mtx_enter: 0x%p contested (lock=%p) [0x%p]",
243		    m, (void *)m->mtx_lock, (void *)RETIP(m));
244		while (!_obtain_lock(m, p)) {
245			uintptr_t v;
246			struct proc *p1;
247
248			mtx_enter(&sched_lock, MTX_SPIN | MTX_RLIKELY);
249			/*
250			 * check if the lock has been released while
251			 * waiting for the schedlock.
252			 */
253			if ((v = m->mtx_lock) == MTX_UNOWNED) {
254				mtx_exit(&sched_lock, MTX_SPIN);
255				continue;
256			}
257			/*
258			 * The mutex was marked contested on release. This
259			 * means that there are processes blocked on it.
260			 */
261			if (v == MTX_CONTESTED) {
262				p1 = TAILQ_FIRST(&m->mtx_blocked);
263				KASSERT(p1 != NULL, ("contested mutex has no contesters"));
264				KASSERT(p != NULL, ("curproc is NULL for contested mutex"));
265				m->mtx_lock = (uintptr_t)p | MTX_CONTESTED;
266				if (p1->p_priority < p->p_priority) {
267					SET_PRIO(p, p1->p_priority);
268				}
269				mtx_exit(&sched_lock, MTX_SPIN);
270				return;
271			}
272			/*
273			 * If the mutex isn't already contested and
274			 * a failure occurs setting the contested bit the
275			 * mutex was either release or the
276			 * state of the RECURSION bit changed.
277			 */
278			if ((v & MTX_CONTESTED) == 0 &&
279			    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
280				               (void *)(v | MTX_CONTESTED))) {
281				mtx_exit(&sched_lock, MTX_SPIN);
282				continue;
283			}
284
285			/* We definitely have to sleep for this lock */
286			mtx_assert(m, MA_NOTOWNED);
287
288#ifdef notyet
289			/*
290			 * If we're borrowing an interrupted thread's VM
291			 * context must clean up before going to sleep.
292			 */
293			if (p->p_flag & (P_ITHD | P_SITHD)) {
294				ithd_t *it = (ithd_t *)p;
295
296				if (it->it_interrupted) {
297					CTR2(KTR_LOCK,
298					    "mtx_enter: 0x%x interrupted 0x%x",
299					    it, it->it_interrupted);
300					intr_thd_fixup(it);
301				}
302			}
303#endif
304
305			/* Put us on the list of procs blocked on this mutex */
306			if (TAILQ_EMPTY(&m->mtx_blocked)) {
307				p1 = (struct proc *)(m->mtx_lock &
308						     MTX_FLAGMASK);
309				LIST_INSERT_HEAD(&p1->p_contested, m,
310						 mtx_contested);
311				TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
312			} else {
313				TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq)
314					if (p1->p_priority > p->p_priority)
315						break;
316				if (p1)
317					TAILQ_INSERT_BEFORE(p1, p, p_procq);
318				else
319					TAILQ_INSERT_TAIL(&m->mtx_blocked, p,
320							  p_procq);
321			}
322
323			p->p_blocked = m;	/* Who we're blocked on */
324			p->p_mtxname = m->mtx_description;
325			p->p_stat = SMTX;
326#if 0
327			propagate_priority(p);
328#endif
329			CTR3(KTR_LOCK, "mtx_enter: p 0x%p blocked on [0x%p] %s",
330			    p, m, m->mtx_description);
331			mi_switch();
332			CTR3(KTR_LOCK,
333			    "mtx_enter: p 0x%p free from blocked on [0x%p] %s",
334			    p, m, m->mtx_description);
335			mtx_exit(&sched_lock, MTX_SPIN);
336		}
337		return;
338	case MTX_SPIN:
339	case MTX_SPIN | MTX_FIRST:
340	case MTX_SPIN | MTX_TOPHALF:
341	    {
342		int i = 0;
343
344		if (m->mtx_lock == (uintptr_t)p) {
345			m->mtx_recurse++;
346			return;
347		}
348		CTR1(KTR_LOCK, "mtx_enter: %p spinning", m);
349		for (;;) {
350			if (_obtain_lock(m, p))
351				break;
352			while (m->mtx_lock != MTX_UNOWNED) {
353				if (i++ < 1000000)
354					continue;
355				if (i++ < 6000000)
356					DELAY (1);
357#ifdef DDB
358				else if (!db_active)
359#else
360				else
361#endif
362					panic(
363				"spin lock %s held by 0x%p for > 5 seconds",
364					    m->mtx_description,
365					    (void *)m->mtx_lock);
366			}
367		}
368
369#ifdef MUTEX_DEBUG
370		if (type != MTX_SPIN)
371			m->mtx_saveintr = 0xbeefface;
372		else
373#endif
374			m->mtx_saveintr = saveintr;
375		CTR1(KTR_LOCK, "mtx_enter: 0x%p spin done", m);
376		return;
377	    }
378	}
379}
380
381void
382mtx_exit_hard(struct mtx *m, int type)
383{
384	struct proc *p, *p1;
385	struct mtx *m1;
386	int pri;
387
388	p = CURPROC;
389	switch (type) {
390	case MTX_DEF:
391	case MTX_DEF | MTX_NOSWITCH:
392		if (m->mtx_recurse != 0) {
393			if (--(m->mtx_recurse) == 0)
394				atomic_clear_ptr(&m->mtx_lock, MTX_RECURSE);
395			CTR1(KTR_LOCK, "mtx_exit: 0x%p unrecurse", m);
396			return;
397		}
398		mtx_enter(&sched_lock, MTX_SPIN);
399		CTR1(KTR_LOCK, "mtx_exit: 0x%p contested", m);
400		p1 = TAILQ_FIRST(&m->mtx_blocked);
401		MPASS(p->p_magic == P_MAGIC);
402		MPASS(p1->p_magic == P_MAGIC);
403		TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq);
404		if (TAILQ_EMPTY(&m->mtx_blocked)) {
405			LIST_REMOVE(m, mtx_contested);
406			_release_lock_quick(m);
407			CTR1(KTR_LOCK, "mtx_exit: 0x%p not held", m);
408		} else
409			atomic_store_rel_ptr(&m->mtx_lock,
410			    (void *)MTX_CONTESTED);
411		pri = MAXPRI;
412		LIST_FOREACH(m1, &p->p_contested, mtx_contested) {
413			int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_priority;
414			if (cp < pri)
415				pri = cp;
416		}
417		if (pri > p->p_nativepri)
418			pri = p->p_nativepri;
419		SET_PRIO(p, pri);
420		CTR2(KTR_LOCK, "mtx_exit: 0x%p contested setrunqueue 0x%p",
421		    m, p1);
422		p1->p_blocked = NULL;
423		p1->p_mtxname = NULL;
424		p1->p_stat = SRUN;
425		setrunqueue(p1);
426		if ((type & MTX_NOSWITCH) == 0 && p1->p_priority < pri) {
427#ifdef notyet
428			if (p->p_flag & (P_ITHD | P_SITHD)) {
429				ithd_t *it = (ithd_t *)p;
430
431				if (it->it_interrupted) {
432					CTR2(KTR_LOCK,
433					    "mtx_exit: 0x%x interruped 0x%x",
434					    it, it->it_interrupted);
435					intr_thd_fixup(it);
436				}
437			}
438#endif
439			setrunqueue(p);
440			CTR2(KTR_LOCK, "mtx_exit: 0x%p switching out lock=0x%p",
441			    m, (void *)m->mtx_lock);
442			mi_switch();
443			CTR2(KTR_LOCK, "mtx_exit: 0x%p resuming lock=0x%p",
444			    m, (void *)m->mtx_lock);
445		}
446		mtx_exit(&sched_lock, MTX_SPIN);
447		break;
448	case MTX_SPIN:
449	case MTX_SPIN | MTX_FIRST:
450		if (m->mtx_recurse != 0) {
451			m->mtx_recurse--;
452			return;
453		}
454		MPASS(mtx_owned(m));
455		_release_lock_quick(m);
456		if (type & MTX_FIRST)
457			enable_intr();	/* XXX is this kosher? */
458		else {
459			MPASS(m->mtx_saveintr != 0xbeefface);
460			restore_intr(m->mtx_saveintr);
461		}
462		break;
463	case MTX_SPIN | MTX_TOPHALF:
464		if (m->mtx_recurse != 0) {
465			m->mtx_recurse--;
466			return;
467		}
468		MPASS(mtx_owned(m));
469		_release_lock_quick(m);
470		break;
471	default:
472		panic("mtx_exit_hard: unsupported type 0x%x\n", type);
473	}
474}
475
476#define MV_DESTROY	0	/* validate before destory */
477#define MV_INIT		1	/* validate before init */
478
479#ifdef MUTEX_DEBUG
480
481int mtx_validate __P((struct mtx *, int));
482
483int
484mtx_validate(struct mtx *m, int when)
485{
486	struct mtx *mp;
487	int i;
488	int retval = 0;
489
490	if (m == &all_mtx || cold)
491		return 0;
492
493	mtx_enter(&all_mtx, MTX_DEF);
494/*
495 * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
496 * we can re-enable the kernacc() checks.
497 */
498#ifndef __alpha__
499	MPASS(kernacc((caddr_t)all_mtx.mtx_next, sizeof(uintptr_t),
500	    VM_PROT_READ) == 1);
501#endif
502	MPASS(all_mtx.mtx_next->mtx_prev == &all_mtx);
503	for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
504#ifndef __alpha__
505		if (kernacc((caddr_t)mp->mtx_next, sizeof(uintptr_t),
506		    VM_PROT_READ) != 1) {
507			panic("mtx_validate: mp=%p mp->mtx_next=%p",
508			    mp, mp->mtx_next);
509		}
510#endif
511		i++;
512		if (i > mtx_cur_cnt) {
513			panic("mtx_validate: too many in chain, known=%d\n",
514			    mtx_cur_cnt);
515		}
516	}
517	MPASS(i == mtx_cur_cnt);
518	switch (when) {
519	case MV_DESTROY:
520		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
521			if (mp == m)
522				break;
523		MPASS(mp == m);
524		break;
525	case MV_INIT:
526		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
527		if (mp == m) {
528			/*
529			 * Not good. This mutex already exists.
530			 */
531			printf("re-initing existing mutex %s\n",
532			    m->mtx_description);
533			MPASS(m->mtx_lock == MTX_UNOWNED);
534			retval = 1;
535		}
536	}
537	mtx_exit(&all_mtx, MTX_DEF);
538	return (retval);
539}
540#endif
541
542void
543mtx_init(struct mtx *m, const char *t, int flag)
544{
545#ifdef MUTEX_DEBUG
546	struct mtx_debug *debug;
547#endif
548
549	CTR2(KTR_LOCK, "mtx_init 0x%p (%s)", m, t);
550#ifdef MUTEX_DEBUG
551	if (mtx_validate(m, MV_INIT))	/* diagnostic and error correction */
552		return;
553	if (flag & MTX_COLD)
554		debug = m->mtx_debug;
555	else
556		debug = NULL;
557	if (debug == NULL) {
558#ifdef DIAGNOSTIC
559		if(cold && bootverbose)
560			printf("malloc'ing mtx_debug while cold for %s\n", t);
561#endif
562
563		/* XXX - should not use DEVBUF */
564		debug = malloc(sizeof(struct mtx_debug), M_DEVBUF, M_NOWAIT);
565		MPASS(debug != NULL);
566		bzero(debug, sizeof(struct mtx_debug));
567	}
568#endif
569	bzero((void *)m, sizeof *m);
570	TAILQ_INIT(&m->mtx_blocked);
571#ifdef MUTEX_DEBUG
572	m->mtx_debug = debug;
573#endif
574	m->mtx_description = t;
575	m->mtx_lock = MTX_UNOWNED;
576	/* Put on all mutex queue */
577	mtx_enter(&all_mtx, MTX_DEF);
578	m->mtx_next = &all_mtx;
579	m->mtx_prev = all_mtx.mtx_prev;
580	m->mtx_prev->mtx_next = m;
581	all_mtx.mtx_prev = m;
582	if (++mtx_cur_cnt > mtx_max_cnt)
583		mtx_max_cnt = mtx_cur_cnt;
584	mtx_exit(&all_mtx, MTX_DEF);
585	witness_init(m, flag);
586}
587
588void
589mtx_destroy(struct mtx *m)
590{
591
592	CTR2(KTR_LOCK, "mtx_destroy 0x%p (%s)", m, m->mtx_description);
593#ifdef MUTEX_DEBUG
594	if (m->mtx_next == NULL)
595		panic("mtx_destroy: %p (%s) already destroyed",
596		    m, m->mtx_description);
597
598	if (!mtx_owned(m)) {
599		MPASS(m->mtx_lock == MTX_UNOWNED);
600	} else {
601		MPASS((m->mtx_lock & (MTX_RECURSE|MTX_CONTESTED)) == 0);
602	}
603	mtx_validate(m, MV_DESTROY);		/* diagnostic */
604#endif
605
606#ifdef WITNESS
607	if (m->mtx_witness)
608		witness_destroy(m);
609#endif /* WITNESS */
610
611	/* Remove from the all mutex queue */
612	mtx_enter(&all_mtx, MTX_DEF);
613	m->mtx_next->mtx_prev = m->mtx_prev;
614	m->mtx_prev->mtx_next = m->mtx_next;
615#ifdef MUTEX_DEBUG
616	m->mtx_next = m->mtx_prev = NULL;
617	free(m->mtx_debug, M_DEVBUF);
618	m->mtx_debug = NULL;
619#endif
620	mtx_cur_cnt--;
621	mtx_exit(&all_mtx, MTX_DEF);
622}
623
624/*
625 * The non-inlined versions of the mtx_*() functions are always built (above),
626 * but the witness code depends on the MUTEX_DEBUG and WITNESS kernel options
627 * being specified.
628 */
629#if (defined(MUTEX_DEBUG) && defined(WITNESS))
630
631#define WITNESS_COUNT 200
632#define	WITNESS_NCHILDREN 2
633
634int witness_watch = 1;
635
636struct witness {
637	struct witness	*w_next;
638	const char	*w_description;
639	const char	*w_file;
640	int		 w_line;
641	struct witness	*w_morechildren;
642	u_char		 w_childcnt;
643	u_char		 w_Giant_squawked:1;
644	u_char		 w_other_squawked:1;
645	u_char		 w_same_squawked:1;
646	u_char		 w_sleep:1;
647	u_char		 w_spin:1;	/* this is a spin mutex */
648	u_int		 w_level;
649	struct witness	*w_children[WITNESS_NCHILDREN];
650};
651
652struct witness_blessed {
653	char 	*b_lock1;
654	char	*b_lock2;
655};
656
657#ifdef DDB
658/*
659 * When DDB is enabled and witness_ddb is set to 1, it will cause the system to
660 * drop into kdebug() when:
661 *	- a lock heirarchy violation occurs
662 *	- locks are held when going to sleep.
663 */
664#ifdef WITNESS_DDB
665int	witness_ddb = 1;
666#else
667int	witness_ddb = 0;
668#endif
669SYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, "");
670#endif /* DDB */
671
672#ifdef WITNESS_SKIPSPIN
673int	witness_skipspin = 1;
674#else
675int	witness_skipspin = 0;
676#endif
677SYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RD, &witness_skipspin, 0,
678    "");
679
680MUTEX_DECLARE(static,w_mtx);
681static struct witness	*w_free;
682static struct witness	*w_all;
683static int		 w_inited;
684static int		 witness_dead;	/* fatal error, probably no memory */
685
686static struct witness	 w_data[WITNESS_COUNT];
687
688static struct witness	 *enroll __P((const char *description, int flag));
689static int itismychild __P((struct witness *parent, struct witness *child));
690static void removechild __P((struct witness *parent, struct witness *child));
691static int isitmychild __P((struct witness *parent, struct witness *child));
692static int isitmydescendant __P((struct witness *parent, struct witness *child));
693static int dup_ok __P((struct witness *));
694static int blessed __P((struct witness *, struct witness *));
695static void witness_displaydescendants
696    __P((void(*)(const char *fmt, ...), struct witness *));
697static void witness_leveldescendents __P((struct witness *parent, int level));
698static void witness_levelall __P((void));
699static struct witness * witness_get __P((void));
700static void witness_free __P((struct witness *m));
701
702
703static char *ignore_list[] = {
704	"witness lock",
705	NULL
706};
707
708static char *spin_order_list[] = {
709	"sio",
710	"sched lock",
711#ifdef __i386__
712	"clk",
713#endif
714	"callout",
715	/*
716	 * leaf locks
717	 */
718	NULL
719};
720
721static char *order_list[] = {
722	"uidinfo hash", "uidinfo struct", NULL,
723	NULL
724};
725
726static char *dup_list[] = {
727	NULL
728};
729
730static char *sleep_list[] = {
731	"Giant",
732	NULL
733};
734
735/*
736 * Pairs of locks which have been blessed
737 * Don't complain about order problems with blessed locks
738 */
739static struct witness_blessed blessed_list[] = {
740};
741static int blessed_count = sizeof(blessed_list) / sizeof(struct witness_blessed);
742
743void
744witness_init(struct mtx *m, int flag)
745{
746	m->mtx_witness = enroll(m->mtx_description, flag);
747}
748
749void
750witness_destroy(struct mtx *m)
751{
752	struct mtx *m1;
753	struct proc *p;
754	p = CURPROC;
755	for ((m1 = LIST_FIRST(&p->p_heldmtx)); m1 != NULL;
756		m1 = LIST_NEXT(m1, mtx_held)) {
757		if (m1 == m) {
758			LIST_REMOVE(m, mtx_held);
759			break;
760		}
761	}
762	return;
763
764}
765
766void
767witness_enter(struct mtx *m, int flags, const char *file, int line)
768{
769	struct witness *w, *w1;
770	struct mtx *m1;
771	struct proc *p;
772	int i;
773#ifdef DDB
774	int go_into_ddb = 0;
775#endif /* DDB */
776
777	w = m->mtx_witness;
778	p = CURPROC;
779
780	if (flags & MTX_SPIN) {
781		if (!w->w_spin)
782			panic("mutex_enter: MTX_SPIN on MTX_DEF mutex %s @"
783			    " %s:%d", m->mtx_description, file, line);
784		if (m->mtx_recurse != 0)
785			return;
786		mtx_enter(&w_mtx, MTX_SPIN);
787		i = witness_spin_check;
788		if (i != 0 && w->w_level < i) {
789			mtx_exit(&w_mtx, MTX_SPIN);
790			panic("mutex_enter(%s:%x, MTX_SPIN) out of order @"
791			    " %s:%d already holding %s:%x",
792			    m->mtx_description, w->w_level, file, line,
793			    spin_order_list[ffs(i)-1], i);
794		}
795		PCPU_SET(witness_spin_check, i | w->w_level);
796		mtx_exit(&w_mtx, MTX_SPIN);
797		w->w_file = file;
798		w->w_line = line;
799		m->mtx_line = line;
800		m->mtx_file = file;
801		return;
802	}
803	if (w->w_spin)
804		panic("mutex_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
805		    m->mtx_description, file, line);
806
807	if (m->mtx_recurse != 0)
808		return;
809	if (witness_dead)
810		goto out;
811	if (cold || panicstr)
812		goto out;
813
814	if (!mtx_legal2block())
815		panic("blockable mtx_enter() of %s when not legal @ %s:%d",
816			    m->mtx_description, file, line);
817	/*
818	 * Is this the first mutex acquired
819	 */
820	if ((m1 = LIST_FIRST(&p->p_heldmtx)) == NULL)
821		goto out;
822
823	if ((w1 = m1->mtx_witness) == w) {
824		if (w->w_same_squawked || dup_ok(w))
825			goto out;
826		w->w_same_squawked = 1;
827		printf("acquring duplicate lock of same type: \"%s\"\n",
828			m->mtx_description);
829		printf(" 1st @ %s:%d\n", w->w_file, w->w_line);
830		printf(" 2nd @ %s:%d\n", file, line);
831#ifdef DDB
832		go_into_ddb = 1;
833#endif /* DDB */
834		goto out;
835	}
836	MPASS(!mtx_owned(&w_mtx));
837	mtx_enter(&w_mtx, MTX_SPIN);
838	/*
839	 * If we have a known higher number just say ok
840	 */
841	if (witness_watch > 1 && w->w_level > w1->w_level) {
842		mtx_exit(&w_mtx, MTX_SPIN);
843		goto out;
844	}
845	if (isitmydescendant(m1->mtx_witness, w)) {
846		mtx_exit(&w_mtx, MTX_SPIN);
847		goto out;
848	}
849	for (i = 0; m1 != NULL; m1 = LIST_NEXT(m1, mtx_held), i++) {
850
851		MPASS(i < 200);
852		w1 = m1->mtx_witness;
853		if (isitmydescendant(w, w1)) {
854			mtx_exit(&w_mtx, MTX_SPIN);
855			if (blessed(w, w1))
856				goto out;
857			if (m1 == &Giant) {
858				if (w1->w_Giant_squawked)
859					goto out;
860				else
861					w1->w_Giant_squawked = 1;
862			} else {
863				if (w1->w_other_squawked)
864					goto out;
865				else
866					w1->w_other_squawked = 1;
867			}
868			printf("lock order reversal\n");
869			printf(" 1st %s last acquired @ %s:%d\n",
870			    w->w_description, w->w_file, w->w_line);
871			printf(" 2nd %p %s @ %s:%d\n",
872			    m1, w1->w_description, w1->w_file, w1->w_line);
873			printf(" 3rd %p %s @ %s:%d\n",
874			    m, w->w_description, file, line);
875#ifdef DDB
876			go_into_ddb = 1;
877#endif /* DDB */
878			goto out;
879		}
880	}
881	m1 = LIST_FIRST(&p->p_heldmtx);
882	if (!itismychild(m1->mtx_witness, w))
883		mtx_exit(&w_mtx, MTX_SPIN);
884
885out:
886#ifdef DDB
887	if (witness_ddb && go_into_ddb)
888		Debugger("witness_enter");
889#endif /* DDB */
890	w->w_file = file;
891	w->w_line = line;
892	m->mtx_line = line;
893	m->mtx_file = file;
894
895	/*
896	 * If this pays off it likely means that a mutex being witnessed
897	 * is acquired in hardclock. Put it in the ignore list. It is
898	 * likely not the mutex this assert fails on.
899	 */
900	MPASS(m->mtx_held.le_prev == NULL);
901	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
902}
903
904void
905witness_exit(struct mtx *m, int flags, const char *file, int line)
906{
907	struct witness *w;
908
909	w = m->mtx_witness;
910
911	if (flags & MTX_SPIN) {
912		if (!w->w_spin)
913			panic("mutex_exit: MTX_SPIN on MTX_DEF mutex %s @"
914			    " %s:%d", m->mtx_description, file, line);
915		if (m->mtx_recurse != 0)
916			return;
917		mtx_enter(&w_mtx, MTX_SPIN);
918		PCPU_SET(witness_spin_check, witness_spin_check & ~w->w_level);
919		mtx_exit(&w_mtx, MTX_SPIN);
920		return;
921	}
922	if (w->w_spin)
923		panic("mutex_exit: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
924		    m->mtx_description, file, line);
925
926	if (m->mtx_recurse != 0)
927		return;
928
929	if ((flags & MTX_NOSWITCH) == 0 && !mtx_legal2block() && !cold)
930		panic("switchable mtx_exit() of %s when not legal @ %s:%d",
931			    m->mtx_description, file, line);
932	LIST_REMOVE(m, mtx_held);
933	m->mtx_held.le_prev = NULL;
934}
935
936void
937witness_try_enter(struct mtx *m, int flags, const char *file, int line)
938{
939	struct proc *p;
940	struct witness *w = m->mtx_witness;
941
942	if (flags & MTX_SPIN) {
943		if (!w->w_spin)
944			panic("mutex_try_enter: "
945			    "MTX_SPIN on MTX_DEF mutex %s @ %s:%d",
946			    m->mtx_description, file, line);
947		if (m->mtx_recurse != 0)
948			return;
949		mtx_enter(&w_mtx, MTX_SPIN);
950		PCPU_SET(witness_spin_check, witness_spin_check | w->w_level);
951		mtx_exit(&w_mtx, MTX_SPIN);
952		w->w_file = file;
953		w->w_line = line;
954		m->mtx_line = line;
955		m->mtx_file = file;
956		return;
957	}
958
959	if (w->w_spin)
960		panic("mutex_try_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
961		    m->mtx_description, file, line);
962
963	if (m->mtx_recurse != 0)
964		return;
965
966	w->w_file = file;
967	w->w_line = line;
968	m->mtx_line = line;
969	m->mtx_file = file;
970	p = CURPROC;
971	MPASS(m->mtx_held.le_prev == NULL);
972	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
973}
974
975void
976witness_display(void(*prnt)(const char *fmt, ...))
977{
978	struct witness *w, *w1;
979
980	witness_levelall();
981
982	for (w = w_all; w; w = w->w_next) {
983		if (w->w_file == NULL)
984			continue;
985		for (w1 = w_all; w1; w1 = w1->w_next) {
986			if (isitmychild(w1, w))
987				break;
988		}
989		if (w1 != NULL)
990			continue;
991		/*
992		 * This lock has no anscestors, display its descendants.
993		 */
994		witness_displaydescendants(prnt, w);
995	}
996	prnt("\nMutex which were never acquired\n");
997	for (w = w_all; w; w = w->w_next) {
998		if (w->w_file != NULL)
999			continue;
1000		prnt("%s\n", w->w_description);
1001	}
1002}
1003
1004int
1005witness_sleep(int check_only, struct mtx *mtx, const char *file, int line)
1006{
1007	struct mtx *m;
1008	struct proc *p;
1009	char **sleep;
1010	int n = 0;
1011
1012	p = CURPROC;
1013	for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
1014	    m = LIST_NEXT(m, mtx_held)) {
1015		if (m == mtx)
1016			continue;
1017		for (sleep = sleep_list; *sleep!= NULL; sleep++)
1018			if (strcmp(m->mtx_description, *sleep) == 0)
1019				goto next;
1020		printf("%s:%d: %s with \"%s\" locked from %s:%d\n",
1021			file, line, check_only ? "could sleep" : "sleeping",
1022			m->mtx_description,
1023			m->mtx_witness->w_file, m->mtx_witness->w_line);
1024		n++;
1025	next:
1026	}
1027#ifdef DDB
1028	if (witness_ddb && n)
1029		Debugger("witness_sleep");
1030#endif /* DDB */
1031	return (n);
1032}
1033
1034static struct witness *
1035enroll(const char *description, int flag)
1036{
1037	int i;
1038	struct witness *w, *w1;
1039	char **ignore;
1040	char **order;
1041
1042	if (!witness_watch)
1043		return (NULL);
1044	for (ignore = ignore_list; *ignore != NULL; ignore++)
1045		if (strcmp(description, *ignore) == 0)
1046			return (NULL);
1047
1048	if (w_inited == 0) {
1049		mtx_init(&w_mtx, "witness lock", MTX_COLD | MTX_DEF);
1050		for (i = 0; i < WITNESS_COUNT; i++) {
1051			w = &w_data[i];
1052			witness_free(w);
1053		}
1054		w_inited = 1;
1055		for (order = order_list; *order != NULL; order++) {
1056			w = enroll(*order, MTX_DEF);
1057			w->w_file = "order list";
1058			for (order++; *order != NULL; order++) {
1059				w1 = enroll(*order, MTX_DEF);
1060				w1->w_file = "order list";
1061				itismychild(w, w1);
1062				w = w1;
1063    	    	    	}
1064		}
1065	}
1066	if ((flag & MTX_SPIN) && witness_skipspin)
1067		return (NULL);
1068	mtx_enter(&w_mtx, MTX_SPIN);
1069	for (w = w_all; w; w = w->w_next) {
1070		if (strcmp(description, w->w_description) == 0) {
1071			mtx_exit(&w_mtx, MTX_SPIN);
1072			return (w);
1073		}
1074	}
1075	if ((w = witness_get()) == NULL)
1076		return (NULL);
1077	w->w_next = w_all;
1078	w_all = w;
1079	w->w_description = description;
1080	mtx_exit(&w_mtx, MTX_SPIN);
1081	if (flag & MTX_SPIN) {
1082		w->w_spin = 1;
1083
1084		i = 1;
1085		for (order = spin_order_list; *order != NULL; order++) {
1086			if (strcmp(description, *order) == 0)
1087				break;
1088			i <<= 1;
1089		}
1090		if (*order == NULL)
1091			panic("spin lock %s not in order list", description);
1092		w->w_level = i;
1093	}
1094	return (w);
1095}
1096
1097static int
1098itismychild(struct witness *parent, struct witness *child)
1099{
1100	static int recursed;
1101
1102	/*
1103	 * Insert "child" after "parent"
1104	 */
1105	while (parent->w_morechildren)
1106		parent = parent->w_morechildren;
1107
1108	if (parent->w_childcnt == WITNESS_NCHILDREN) {
1109		if ((parent->w_morechildren = witness_get()) == NULL)
1110			return (1);
1111		parent = parent->w_morechildren;
1112	}
1113	MPASS(child != NULL);
1114	parent->w_children[parent->w_childcnt++] = child;
1115	/*
1116	 * now prune whole tree
1117	 */
1118	if (recursed)
1119		return (0);
1120	recursed = 1;
1121	for (child = w_all; child != NULL; child = child->w_next) {
1122		for (parent = w_all; parent != NULL;
1123		    parent = parent->w_next) {
1124			if (!isitmychild(parent, child))
1125				continue;
1126			removechild(parent, child);
1127			if (isitmydescendant(parent, child))
1128				continue;
1129			itismychild(parent, child);
1130		}
1131	}
1132	recursed = 0;
1133	witness_levelall();
1134	return (0);
1135}
1136
1137static void
1138removechild(struct witness *parent, struct witness *child)
1139{
1140	struct witness *w, *w1;
1141	int i;
1142
1143	for (w = parent; w != NULL; w = w->w_morechildren)
1144		for (i = 0; i < w->w_childcnt; i++)
1145			if (w->w_children[i] == child)
1146				goto found;
1147	return;
1148found:
1149	for (w1 = w; w1->w_morechildren != NULL; w1 = w1->w_morechildren)
1150		continue;
1151	w->w_children[i] = w1->w_children[--w1->w_childcnt];
1152	MPASS(w->w_children[i] != NULL);
1153
1154	if (w1->w_childcnt != 0)
1155		return;
1156
1157	if (w1 == parent)
1158		return;
1159	for (w = parent; w->w_morechildren != w1; w = w->w_morechildren)
1160		continue;
1161	w->w_morechildren = 0;
1162	witness_free(w1);
1163}
1164
1165static int
1166isitmychild(struct witness *parent, struct witness *child)
1167{
1168	struct witness *w;
1169	int i;
1170
1171	for (w = parent; w != NULL; w = w->w_morechildren) {
1172		for (i = 0; i < w->w_childcnt; i++) {
1173			if (w->w_children[i] == child)
1174				return (1);
1175		}
1176	}
1177	return (0);
1178}
1179
1180static int
1181isitmydescendant(struct witness *parent, struct witness *child)
1182{
1183	struct witness *w;
1184	int i;
1185	int j;
1186
1187	for (j = 0, w = parent; w != NULL; w = w->w_morechildren, j++) {
1188		MPASS(j < 1000);
1189		for (i = 0; i < w->w_childcnt; i++) {
1190			if (w->w_children[i] == child)
1191				return (1);
1192		}
1193		for (i = 0; i < w->w_childcnt; i++) {
1194			if (isitmydescendant(w->w_children[i], child))
1195				return (1);
1196		}
1197	}
1198	return (0);
1199}
1200
1201void
1202witness_levelall (void)
1203{
1204	struct witness *w, *w1;
1205
1206	for (w = w_all; w; w = w->w_next)
1207		if (!w->w_spin)
1208			w->w_level = 0;
1209	for (w = w_all; w; w = w->w_next) {
1210		if (w->w_spin)
1211			continue;
1212		for (w1 = w_all; w1; w1 = w1->w_next) {
1213			if (isitmychild(w1, w))
1214				break;
1215		}
1216		if (w1 != NULL)
1217			continue;
1218		witness_leveldescendents(w, 0);
1219	}
1220}
1221
1222static void
1223witness_leveldescendents(struct witness *parent, int level)
1224{
1225	int i;
1226	struct witness *w;
1227
1228	if (parent->w_level < level)
1229		parent->w_level = level;
1230	level++;
1231	for (w = parent; w != NULL; w = w->w_morechildren)
1232		for (i = 0; i < w->w_childcnt; i++)
1233			witness_leveldescendents(w->w_children[i], level);
1234}
1235
1236static void
1237witness_displaydescendants(void(*prnt)(const char *fmt, ...),
1238			   struct witness *parent)
1239{
1240	struct witness *w;
1241	int i;
1242	int level = parent->w_level;
1243
1244	prnt("%d", level);
1245	if (level < 10)
1246		prnt(" ");
1247	for (i = 0; i < level; i++)
1248		prnt(" ");
1249	prnt("%s", parent->w_description);
1250	if (parent->w_file != NULL) {
1251		prnt(" -- last acquired @ %s", parent->w_file);
1252#ifndef W_USE_WHERE
1253		prnt(":%d", parent->w_line);
1254#endif
1255		prnt("\n");
1256	}
1257
1258	for (w = parent; w != NULL; w = w->w_morechildren)
1259		for (i = 0; i < w->w_childcnt; i++)
1260			    witness_displaydescendants(prnt, w->w_children[i]);
1261    }
1262
1263static int
1264dup_ok(struct witness *w)
1265{
1266	char **dup;
1267
1268	for (dup = dup_list; *dup!= NULL; dup++)
1269		if (strcmp(w->w_description, *dup) == 0)
1270			return (1);
1271	return (0);
1272}
1273
1274static int
1275blessed(struct witness *w1, struct witness *w2)
1276{
1277	int i;
1278	struct witness_blessed *b;
1279
1280	for (i = 0; i < blessed_count; i++) {
1281		b = &blessed_list[i];
1282		if (strcmp(w1->w_description, b->b_lock1) == 0) {
1283			if (strcmp(w2->w_description, b->b_lock2) == 0)
1284				return (1);
1285			continue;
1286		}
1287		if (strcmp(w1->w_description, b->b_lock2) == 0)
1288			if (strcmp(w2->w_description, b->b_lock1) == 0)
1289				return (1);
1290	}
1291	return (0);
1292}
1293
1294static struct witness *
1295witness_get()
1296{
1297	struct witness *w;
1298
1299	if ((w = w_free) == NULL) {
1300		witness_dead = 1;
1301		mtx_exit(&w_mtx, MTX_SPIN);
1302		printf("witness exhausted\n");
1303		return (NULL);
1304	}
1305	w_free = w->w_next;
1306	bzero(w, sizeof(*w));
1307	return (w);
1308}
1309
1310static void
1311witness_free(struct witness *w)
1312{
1313	w->w_next = w_free;
1314	w_free = w;
1315}
1316
1317void
1318witness_list(struct proc *p)
1319{
1320	struct mtx *m;
1321
1322	for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
1323	    m = LIST_NEXT(m, mtx_held)) {
1324		printf("\t\"%s\" (%p) locked at %s:%d\n",
1325		    m->mtx_description, m,
1326		    m->mtx_witness->w_file, m->mtx_witness->w_line);
1327	}
1328}
1329
1330void
1331witness_save(struct mtx *m, const char **filep, int *linep)
1332{
1333	*filep = m->mtx_witness->w_file;
1334	*linep = m->mtx_witness->w_line;
1335}
1336
1337void
1338witness_restore(struct mtx *m, const char *file, int line)
1339{
1340	m->mtx_witness->w_file = file;
1341	m->mtx_witness->w_line = line;
1342}
1343
1344#endif	/* (defined(MUTEX_DEBUG) && defined(WITNESS)) */
1345