kern_mutex.c revision 68862
1/*-
2 * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 * 3. Berkeley Software Design Inc's name may not be used to endorse or
13 *    promote products derived from this software without specific prior
14 *    written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
29 *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
30 * $FreeBSD: head/sys/kern/kern_mutex.c 68862 2000-11-17 18:09:18Z jake $
31 */
32
33/*
34 *	Main Entry: witness
35 *	Pronunciation: 'wit-n&s
36 *	Function: noun
37 *	Etymology: Middle English witnesse, from Old English witnes knowledge,
38 *	    testimony, witness, from 2wit
39 *	Date: before 12th century
40 *	1 : attestation of a fact or event : TESTIMONY
41 *	2 : one that gives evidence; specifically : one who testifies in
42 *	    a cause or before a judicial tribunal
43 *	3 : one asked to be present at a transaction so as to be able to
44 *	    testify to its having taken place
45 *	4 : one who has personal knowledge of something
46 *	5 a : something serving as evidence or proof : SIGN
47 *	  b : public affirmation by word or example of usually
48 *	      religious faith or conviction <the heroic witness to divine
49 *	      life -- Pilot>
50 *	6 capitalized : a member of the Jehovah's Witnesses
51 */
52
53#include "opt_ddb.h"
54#include "opt_witness.h"
55
56#include <sys/param.h>
57#include <sys/bus.h>
58#include <sys/kernel.h>
59#include <sys/malloc.h>
60#include <sys/proc.h>
61#include <sys/sysctl.h>
62#include <sys/systm.h>
63#include <sys/vmmeter.h>
64#include <sys/ktr.h>
65
66#include <machine/atomic.h>
67#include <machine/bus.h>
68#include <machine/clock.h>
69#include <machine/cpu.h>
70
71#include <ddb/ddb.h>
72
73#include <vm/vm.h>
74#include <vm/vm_extern.h>
75
76#define _KERN_MUTEX_C_		/* Cause non-inlined mtx_*() to be compiled. */
77#include <sys/mutex.h>
78
79/*
80 * Machine independent bits of the mutex implementation
81 */
82/* All mutexes in system (used for debug/panic) */
83#ifdef MUTEX_DEBUG
84static struct mtx_debug all_mtx_debug = { NULL, {NULL, NULL}, NULL, 0,
85	"All mutexes queue head" };
86static struct mtx all_mtx = { MTX_UNOWNED, 0, 0, &all_mtx_debug,
87	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
88	{ NULL, NULL }, &all_mtx, &all_mtx };
89#else	/* MUTEX_DEBUG */
90static struct mtx all_mtx = { MTX_UNOWNED, 0, 0, "All mutexes queue head",
91	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
92	{ NULL, NULL }, &all_mtx, &all_mtx };
93#endif	/* MUTEX_DEBUG */
94
95static int	mtx_cur_cnt;
96static int	mtx_max_cnt;
97
98void	_mtx_enter_giant_def(void);
99void	_mtx_exit_giant_def(void);
100static void propagate_priority(struct proc *) __unused;
101
102#define	mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
103#define	mtx_owner(m)	(mtx_unowned(m) ? NULL \
104			    : (struct proc *)((m)->mtx_lock & MTX_FLAGMASK))
105
106#define RETIP(x)		*(((uintptr_t *)(&x)) - 1)
107#define	SET_PRIO(p, pri)	(p)->p_priority = (pri)
108
109/*
110 * XXX Temporary, for use from assembly language
111 */
112
113void
114_mtx_enter_giant_def(void)
115{
116
117	mtx_enter(&Giant, MTX_DEF);
118}
119
120void
121_mtx_exit_giant_def(void)
122{
123
124	mtx_exit(&Giant, MTX_DEF);
125}
126
127static void
128propagate_priority(struct proc *p)
129{
130	int pri = p->p_priority;
131	struct mtx *m = p->p_blocked;
132
133	for (;;) {
134		struct proc *p1;
135
136		p = mtx_owner(m);
137
138		if (p == NULL) {
139			/*
140			 * This really isn't quite right. Really
141			 * ought to bump priority of process that
142			 * next acquires the mutex.
143			 */
144			MPASS(m->mtx_lock == MTX_CONTESTED);
145			return;
146		}
147		MPASS(p->p_magic == P_MAGIC);
148		if (p->p_priority <= pri)
149			return;
150		/*
151		 * If lock holder is actually running, just bump priority.
152		 */
153		if (TAILQ_NEXT(p, p_procq) == NULL) {
154			MPASS(p->p_stat == SRUN || p->p_stat == SZOMB);
155			SET_PRIO(p, pri);
156			return;
157		}
158		/*
159		 * If on run queue move to new run queue, and
160		 * quit.
161		 */
162		if (p->p_stat == SRUN) {
163			MPASS(p->p_blocked == NULL);
164			remrunqueue(p);
165			SET_PRIO(p, pri);
166			setrunqueue(p);
167			return;
168		}
169
170		/*
171		 * If we aren't blocked on a mutex, give up and quit.
172		 */
173		if (p->p_stat != SMTX) {
174			printf(
175	"XXX: process %d(%s):%d holds %s but isn't blocked on a mutex\n",
176			    p->p_pid, p->p_comm, p->p_stat, m->mtx_description);
177			return;
178		}
179
180		/*
181		 * Pick up the mutex that p is blocked on.
182		 */
183		m = p->p_blocked;
184		MPASS(m != NULL);
185
186		printf("XXX: process %d(%s) is blocked on %s\n", p->p_pid,
187		    p->p_comm, m->mtx_description);
188		/*
189		 * Check if the proc needs to be moved up on
190		 * the blocked chain
191		 */
192		if ((p1 = TAILQ_PREV(p, rq, p_procq)) == NULL ||
193		    p1->p_priority <= pri) {
194			if (p1)
195				printf(
196	"XXX: previous process %d(%s) has higher priority\n",
197				    p->p_pid, p->p_comm);
198			else
199				printf("XXX: process at head of run queue\n");
200			continue;
201		}
202
203		/*
204		 * Remove proc from blocked chain
205		 */
206		TAILQ_REMOVE(&m->mtx_blocked, p, p_procq);
207		TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) {
208			MPASS(p1->p_magic == P_MAGIC);
209			if (p1->p_priority > pri)
210				break;
211		}
212		if (p1)
213			TAILQ_INSERT_BEFORE(p1, p, p_procq);
214		else
215			TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
216		CTR4(KTR_LOCK,
217		    "propagate priority: p 0x%p moved before 0x%p on [0x%p] %s",
218		    p, p1, m, m->mtx_description);
219	}
220}
221
222void
223mtx_enter_hard(struct mtx *m, int type, int saveintr)
224{
225	struct proc *p = CURPROC;
226
227	KASSERT(p != NULL, ("curproc is NULL in mutex"));
228
229	switch (type) {
230	case MTX_DEF:
231		if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)p) {
232			m->mtx_recurse++;
233			atomic_set_ptr(&m->mtx_lock, MTX_RECURSE);
234			CTR1(KTR_LOCK, "mtx_enter: 0x%p recurse", m);
235			return;
236		}
237		CTR3(KTR_LOCK, "mtx_enter: 0x%p contested (lock=%p) [0x%p]",
238		    m, (void *)m->mtx_lock, (void *)RETIP(m));
239		while (!_obtain_lock(m, p)) {
240			uintptr_t v;
241			struct proc *p1;
242
243			mtx_enter(&sched_lock, MTX_SPIN | MTX_RLIKELY);
244			/*
245			 * check if the lock has been released while
246			 * waiting for the schedlock.
247			 */
248			if ((v = m->mtx_lock) == MTX_UNOWNED) {
249				mtx_exit(&sched_lock, MTX_SPIN);
250				continue;
251			}
252			/*
253			 * The mutex was marked contested on release. This
254			 * means that there are processes blocked on it.
255			 */
256			if (v == MTX_CONTESTED) {
257				p1 = TAILQ_FIRST(&m->mtx_blocked);
258				KASSERT(p1 != NULL, ("contested mutex has no contesters"));
259				KASSERT(p != NULL, ("curproc is NULL for contested mutex"));
260				m->mtx_lock = (uintptr_t)p | MTX_CONTESTED;
261				if (p1->p_priority < p->p_priority) {
262					SET_PRIO(p, p1->p_priority);
263				}
264				mtx_exit(&sched_lock, MTX_SPIN);
265				return;
266			}
267			/*
268			 * If the mutex isn't already contested and
269			 * a failure occurs setting the contested bit the
270			 * mutex was either release or the
271			 * state of the RECURSION bit changed.
272			 */
273			if ((v & MTX_CONTESTED) == 0 &&
274			    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
275				               (void *)(v | MTX_CONTESTED))) {
276				mtx_exit(&sched_lock, MTX_SPIN);
277				continue;
278			}
279
280			/* We definitely have to sleep for this lock */
281			mtx_assert(m, MA_NOTOWNED);
282
283#ifdef notyet
284			/*
285			 * If we're borrowing an interrupted thread's VM
286			 * context must clean up before going to sleep.
287			 */
288			if (p->p_flag & (P_ITHD | P_SITHD)) {
289				ithd_t *it = (ithd_t *)p;
290
291				if (it->it_interrupted) {
292					CTR2(KTR_LOCK,
293					    "mtx_enter: 0x%x interrupted 0x%x",
294					    it, it->it_interrupted);
295					intr_thd_fixup(it);
296				}
297			}
298#endif
299
300			/* Put us on the list of procs blocked on this mutex */
301			if (TAILQ_EMPTY(&m->mtx_blocked)) {
302				p1 = (struct proc *)(m->mtx_lock &
303						     MTX_FLAGMASK);
304				LIST_INSERT_HEAD(&p1->p_contested, m,
305						 mtx_contested);
306				TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
307			} else {
308				TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq)
309					if (p1->p_priority > p->p_priority)
310						break;
311				if (p1)
312					TAILQ_INSERT_BEFORE(p1, p, p_procq);
313				else
314					TAILQ_INSERT_TAIL(&m->mtx_blocked, p,
315							  p_procq);
316			}
317
318			p->p_blocked = m;	/* Who we're blocked on */
319			p->p_stat = SMTX;
320#if 0
321			propagate_priority(p);
322#endif
323			CTR3(KTR_LOCK, "mtx_enter: p 0x%p blocked on [0x%p] %s",
324			    p, m, m->mtx_description);
325			mi_switch();
326			CTR3(KTR_LOCK,
327			    "mtx_enter: p 0x%p free from blocked on [0x%p] %s",
328			    p, m, m->mtx_description);
329			mtx_exit(&sched_lock, MTX_SPIN);
330		}
331		return;
332	case MTX_SPIN:
333	case MTX_SPIN | MTX_FIRST:
334	case MTX_SPIN | MTX_TOPHALF:
335	    {
336		int i = 0;
337
338		if (m->mtx_lock == (uintptr_t)p) {
339			m->mtx_recurse++;
340			return;
341		}
342		CTR1(KTR_LOCK, "mtx_enter: %p spinning", m);
343		for (;;) {
344			if (_obtain_lock(m, p))
345				break;
346			while (m->mtx_lock != MTX_UNOWNED) {
347				if (i++ < 1000000)
348					continue;
349				if (i++ < 6000000)
350					DELAY (1);
351#ifdef DDB
352				else if (!db_active)
353#else
354				else
355#endif
356					panic(
357				"spin lock %s held by 0x%p for > 5 seconds",
358					    m->mtx_description,
359					    (void *)m->mtx_lock);
360			}
361		}
362
363#ifdef MUTEX_DEBUG
364		if (type != MTX_SPIN)
365			m->mtx_saveintr = 0xbeefface;
366		else
367#endif
368			m->mtx_saveintr = saveintr;
369		CTR1(KTR_LOCK, "mtx_enter: 0x%p spin done", m);
370		return;
371	    }
372	}
373}
374
375void
376mtx_exit_hard(struct mtx *m, int type)
377{
378	struct proc *p, *p1;
379	struct mtx *m1;
380	int pri;
381
382	p = CURPROC;
383	switch (type) {
384	case MTX_DEF:
385	case MTX_DEF | MTX_NOSWITCH:
386		if (m->mtx_recurse != 0) {
387			if (--(m->mtx_recurse) == 0)
388				atomic_clear_ptr(&m->mtx_lock, MTX_RECURSE);
389			CTR1(KTR_LOCK, "mtx_exit: 0x%p unrecurse", m);
390			return;
391		}
392		mtx_enter(&sched_lock, MTX_SPIN);
393		CTR1(KTR_LOCK, "mtx_exit: 0x%p contested", m);
394		p1 = TAILQ_FIRST(&m->mtx_blocked);
395		MPASS(p->p_magic == P_MAGIC);
396		MPASS(p1->p_magic == P_MAGIC);
397		TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq);
398		if (TAILQ_EMPTY(&m->mtx_blocked)) {
399			LIST_REMOVE(m, mtx_contested);
400			_release_lock_quick(m);
401			CTR1(KTR_LOCK, "mtx_exit: 0x%p not held", m);
402		} else
403			m->mtx_lock = MTX_CONTESTED;
404		pri = MAXPRI;
405		LIST_FOREACH(m1, &p->p_contested, mtx_contested) {
406			int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_priority;
407			if (cp < pri)
408				pri = cp;
409		}
410		if (pri > p->p_nativepri)
411			pri = p->p_nativepri;
412		SET_PRIO(p, pri);
413		CTR2(KTR_LOCK, "mtx_exit: 0x%p contested setrunqueue 0x%p",
414		    m, p1);
415		p1->p_blocked = NULL;
416		p1->p_stat = SRUN;
417		setrunqueue(p1);
418		if ((type & MTX_NOSWITCH) == 0 && p1->p_priority < pri) {
419#ifdef notyet
420			if (p->p_flag & (P_ITHD | P_SITHD)) {
421				ithd_t *it = (ithd_t *)p;
422
423				if (it->it_interrupted) {
424					CTR2(KTR_LOCK,
425					    "mtx_exit: 0x%x interruped 0x%x",
426					    it, it->it_interrupted);
427					intr_thd_fixup(it);
428				}
429			}
430#endif
431			setrunqueue(p);
432			CTR2(KTR_LOCK, "mtx_exit: 0x%p switching out lock=0x%p",
433			    m, (void *)m->mtx_lock);
434			mi_switch();
435			CTR2(KTR_LOCK, "mtx_exit: 0x%p resuming lock=0x%p",
436			    m, (void *)m->mtx_lock);
437		}
438		mtx_exit(&sched_lock, MTX_SPIN);
439		break;
440	case MTX_SPIN:
441	case MTX_SPIN | MTX_FIRST:
442		if (m->mtx_recurse != 0) {
443			m->mtx_recurse--;
444			return;
445		}
446		MPASS(mtx_owned(m));
447		_release_lock_quick(m);
448		if (type & MTX_FIRST)
449			enable_intr();	/* XXX is this kosher? */
450		else {
451			MPASS(m->mtx_saveintr != 0xbeefface);
452			restore_intr(m->mtx_saveintr);
453		}
454		break;
455	case MTX_SPIN | MTX_TOPHALF:
456		if (m->mtx_recurse != 0) {
457			m->mtx_recurse--;
458			return;
459		}
460		MPASS(mtx_owned(m));
461		_release_lock_quick(m);
462		break;
463	default:
464		panic("mtx_exit_hard: unsupported type 0x%x\n", type);
465	}
466}
467
468#define MV_DESTROY	0	/* validate before destory */
469#define MV_INIT		1	/* validate before init */
470
471#ifdef MUTEX_DEBUG
472
473int mtx_validate __P((struct mtx *, int));
474
475int
476mtx_validate(struct mtx *m, int when)
477{
478	struct mtx *mp;
479	int i;
480	int retval = 0;
481
482	if (m == &all_mtx || cold)
483		return 0;
484
485	mtx_enter(&all_mtx, MTX_DEF);
486/*
487 * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
488 * we can re-enable the kernacc() checks.
489 */
490#ifndef __alpha__
491	MPASS(kernacc((caddr_t)all_mtx.mtx_next, sizeof(uintptr_t),
492	    VM_PROT_READ) == 1);
493#endif
494	MPASS(all_mtx.mtx_next->mtx_prev == &all_mtx);
495	for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
496#ifndef __alpha__
497		if (kernacc((caddr_t)mp->mtx_next, sizeof(uintptr_t),
498		    VM_PROT_READ) != 1) {
499			panic("mtx_validate: mp=%p mp->mtx_next=%p",
500			    mp, mp->mtx_next);
501		}
502#endif
503		i++;
504		if (i > mtx_cur_cnt) {
505			panic("mtx_validate: too many in chain, known=%d\n",
506			    mtx_cur_cnt);
507		}
508	}
509	MPASS(i == mtx_cur_cnt);
510	switch (when) {
511	case MV_DESTROY:
512		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
513			if (mp == m)
514				break;
515		MPASS(mp == m);
516		break;
517	case MV_INIT:
518		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
519		if (mp == m) {
520			/*
521			 * Not good. This mutex already exists.
522			 */
523			printf("re-initing existing mutex %s\n",
524			    m->mtx_description);
525			MPASS(m->mtx_lock == MTX_UNOWNED);
526			retval = 1;
527		}
528	}
529	mtx_exit(&all_mtx, MTX_DEF);
530	return (retval);
531}
532#endif
533
534void
535mtx_init(struct mtx *m, const char *t, int flag)
536{
537#ifdef MUTEX_DEBUG
538	struct mtx_debug *debug;
539#endif
540
541	CTR2(KTR_LOCK, "mtx_init 0x%p (%s)", m, t);
542#ifdef MUTEX_DEBUG
543	if (mtx_validate(m, MV_INIT))	/* diagnostic and error correction */
544		return;
545	if (flag & MTX_COLD)
546		debug = m->mtx_debug;
547	else
548		debug = NULL;
549	if (debug == NULL) {
550#ifdef DIAGNOSTIC
551		if(cold && bootverbose)
552			printf("malloc'ing mtx_debug while cold for %s\n", t);
553#endif
554
555		/* XXX - should not use DEVBUF */
556		debug = malloc(sizeof(struct mtx_debug), M_DEVBUF, M_NOWAIT);
557		MPASS(debug != NULL);
558		bzero(debug, sizeof(struct mtx_debug));
559	}
560#endif
561	bzero((void *)m, sizeof *m);
562	TAILQ_INIT(&m->mtx_blocked);
563#ifdef MUTEX_DEBUG
564	m->mtx_debug = debug;
565#endif
566	m->mtx_description = t;
567	m->mtx_lock = MTX_UNOWNED;
568	/* Put on all mutex queue */
569	mtx_enter(&all_mtx, MTX_DEF);
570	m->mtx_next = &all_mtx;
571	m->mtx_prev = all_mtx.mtx_prev;
572	m->mtx_prev->mtx_next = m;
573	all_mtx.mtx_prev = m;
574	if (++mtx_cur_cnt > mtx_max_cnt)
575		mtx_max_cnt = mtx_cur_cnt;
576	mtx_exit(&all_mtx, MTX_DEF);
577	witness_init(m, flag);
578}
579
580void
581mtx_destroy(struct mtx *m)
582{
583
584	CTR2(KTR_LOCK, "mtx_destroy 0x%p (%s)", m, m->mtx_description);
585#ifdef MUTEX_DEBUG
586	if (m->mtx_next == NULL)
587		panic("mtx_destroy: %p (%s) already destroyed",
588		    m, m->mtx_description);
589
590	if (!mtx_owned(m)) {
591		MPASS(m->mtx_lock == MTX_UNOWNED);
592	} else {
593		MPASS((m->mtx_lock & (MTX_RECURSE|MTX_CONTESTED)) == 0);
594	}
595	mtx_validate(m, MV_DESTROY);		/* diagnostic */
596#endif
597
598#ifdef WITNESS
599	if (m->mtx_witness)
600		witness_destroy(m);
601#endif /* WITNESS */
602
603	/* Remove from the all mutex queue */
604	mtx_enter(&all_mtx, MTX_DEF);
605	m->mtx_next->mtx_prev = m->mtx_prev;
606	m->mtx_prev->mtx_next = m->mtx_next;
607#ifdef MUTEX_DEBUG
608	m->mtx_next = m->mtx_prev = NULL;
609	free(m->mtx_debug, M_DEVBUF);
610	m->mtx_debug = NULL;
611#endif
612	mtx_cur_cnt--;
613	mtx_exit(&all_mtx, MTX_DEF);
614}
615
616/*
617 * The non-inlined versions of the mtx_*() functions are always built (above),
618 * but the witness code depends on the MUTEX_DEBUG and WITNESS kernel options
619 * being specified.
620 */
621#if (defined(MUTEX_DEBUG) && defined(WITNESS))
622
623#define WITNESS_COUNT 200
624#define	WITNESS_NCHILDREN 2
625
626int witness_watch = 1;
627
628struct witness {
629	struct witness	*w_next;
630	const char	*w_description;
631	const char	*w_file;
632	int		 w_line;
633	struct witness	*w_morechildren;
634	u_char		 w_childcnt;
635	u_char		 w_Giant_squawked:1;
636	u_char		 w_other_squawked:1;
637	u_char		 w_same_squawked:1;
638	u_char		 w_sleep:1;
639	u_char		 w_spin:1;	/* this is a spin mutex */
640	u_int		 w_level;
641	struct witness	*w_children[WITNESS_NCHILDREN];
642};
643
644struct witness_blessed {
645	char 	*b_lock1;
646	char	*b_lock2;
647};
648
649#ifdef DDB
650/*
651 * When DDB is enabled and witness_ddb is set to 1, it will cause the system to
652 * drop into kdebug() when:
653 *	- a lock heirarchy violation occurs
654 *	- locks are held when going to sleep.
655 */
656#ifdef WITNESS_DDB
657int	witness_ddb = 1;
658#else
659int	witness_ddb = 0;
660#endif
661SYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, "");
662#endif /* DDB */
663
664#ifdef WITNESS_SKIPSPIN
665int	witness_skipspin = 1;
666#else
667int	witness_skipspin = 0;
668#endif
669SYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RD, &witness_skipspin, 0,
670    "");
671
672MUTEX_DECLARE(static,w_mtx);
673static struct witness	*w_free;
674static struct witness	*w_all;
675static int		 w_inited;
676static int		 witness_dead;	/* fatal error, probably no memory */
677
678static struct witness	 w_data[WITNESS_COUNT];
679
680static struct witness	 *enroll __P((const char *description, int flag));
681static int itismychild __P((struct witness *parent, struct witness *child));
682static void removechild __P((struct witness *parent, struct witness *child));
683static int isitmychild __P((struct witness *parent, struct witness *child));
684static int isitmydescendant __P((struct witness *parent, struct witness *child));
685static int dup_ok __P((struct witness *));
686static int blessed __P((struct witness *, struct witness *));
687static void witness_displaydescendants
688    __P((void(*)(const char *fmt, ...), struct witness *));
689static void witness_leveldescendents __P((struct witness *parent, int level));
690static void witness_levelall __P((void));
691static struct witness * witness_get __P((void));
692static void witness_free __P((struct witness *m));
693
694
695static char *ignore_list[] = {
696	"witness lock",
697	NULL
698};
699
700static char *spin_order_list[] = {
701	"sched lock",
702	"sio",
703#ifdef __i386__
704	"clk",
705#endif
706	/*
707	 * leaf locks
708	 */
709	NULL
710};
711
712static char *order_list[] = {
713	NULL
714};
715
716static char *dup_list[] = {
717	NULL
718};
719
720static char *sleep_list[] = {
721	"Giant",
722	NULL
723};
724
725/*
726 * Pairs of locks which have been blessed
727 * Don't complain about order problems with blessed locks
728 */
729static struct witness_blessed blessed_list[] = {
730};
731static int blessed_count = sizeof(blessed_list) / sizeof(struct witness_blessed);
732
733void
734witness_init(struct mtx *m, int flag)
735{
736	m->mtx_witness = enroll(m->mtx_description, flag);
737}
738
739void
740witness_destroy(struct mtx *m)
741{
742	struct mtx *m1;
743	struct proc *p;
744	p = CURPROC;
745	for ((m1 = LIST_FIRST(&p->p_heldmtx)); m1 != NULL;
746		m1 = LIST_NEXT(m1, mtx_held)) {
747		if (m1 == m) {
748			LIST_REMOVE(m, mtx_held);
749			break;
750		}
751	}
752	return;
753
754}
755
756void
757witness_enter(struct mtx *m, int flags, const char *file, int line)
758{
759	struct witness *w, *w1;
760	struct mtx *m1;
761	struct proc *p;
762	int i;
763#ifdef DDB
764	int go_into_ddb = 0;
765#endif /* DDB */
766
767	w = m->mtx_witness;
768	p = CURPROC;
769
770	if (flags & MTX_SPIN) {
771		if (!w->w_spin)
772			panic("mutex_enter: MTX_SPIN on MTX_DEF mutex %s @"
773			    " %s:%d", m->mtx_description, file, line);
774		if (m->mtx_recurse != 0)
775			return;
776		mtx_enter(&w_mtx, MTX_SPIN);
777		i = witness_spin_check;
778		if (i != 0 && w->w_level < i) {
779			mtx_exit(&w_mtx, MTX_SPIN);
780			panic("mutex_enter(%s:%x, MTX_SPIN) out of order @"
781			    " %s:%d already holding %s:%x",
782			    m->mtx_description, w->w_level, file, line,
783			    spin_order_list[ffs(i)-1], i);
784		}
785		PCPU_SET(witness_spin_check, i | w->w_level);
786		mtx_exit(&w_mtx, MTX_SPIN);
787		return;
788	}
789	if (w->w_spin)
790		panic("mutex_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
791		    m->mtx_description, file, line);
792
793	if (m->mtx_recurse != 0)
794		return;
795	if (witness_dead)
796		goto out;
797	if (cold || panicstr)
798		goto out;
799
800	if (!mtx_legal2block())
801		panic("blockable mtx_enter() of %s when not legal @ %s:%d",
802			    m->mtx_description, file, line);
803	/*
804	 * Is this the first mutex acquired
805	 */
806	if ((m1 = LIST_FIRST(&p->p_heldmtx)) == NULL)
807		goto out;
808
809	if ((w1 = m1->mtx_witness) == w) {
810		if (w->w_same_squawked || dup_ok(w))
811			goto out;
812		w->w_same_squawked = 1;
813		printf("acquring duplicate lock of same type: \"%s\"\n",
814			m->mtx_description);
815		printf(" 1st @ %s:%d\n", w->w_file, w->w_line);
816		printf(" 2nd @ %s:%d\n", file, line);
817#ifdef DDB
818		go_into_ddb = 1;
819#endif /* DDB */
820		goto out;
821	}
822	MPASS(!mtx_owned(&w_mtx));
823	mtx_enter(&w_mtx, MTX_SPIN);
824	/*
825	 * If we have a known higher number just say ok
826	 */
827	if (witness_watch > 1 && w->w_level > w1->w_level) {
828		mtx_exit(&w_mtx, MTX_SPIN);
829		goto out;
830	}
831	if (isitmydescendant(m1->mtx_witness, w)) {
832		mtx_exit(&w_mtx, MTX_SPIN);
833		goto out;
834	}
835	for (i = 0; m1 != NULL; m1 = LIST_NEXT(m1, mtx_held), i++) {
836
837		MPASS(i < 200);
838		w1 = m1->mtx_witness;
839		if (isitmydescendant(w, w1)) {
840			mtx_exit(&w_mtx, MTX_SPIN);
841			if (blessed(w, w1))
842				goto out;
843			if (m1 == &Giant) {
844				if (w1->w_Giant_squawked)
845					goto out;
846				else
847					w1->w_Giant_squawked = 1;
848			} else {
849				if (w1->w_other_squawked)
850					goto out;
851				else
852					w1->w_other_squawked = 1;
853			}
854			printf("lock order reversal\n");
855			printf(" 1st %s last acquired @ %s:%d\n",
856			    w->w_description, w->w_file, w->w_line);
857			printf(" 2nd %p %s @ %s:%d\n",
858			    m1, w1->w_description, w1->w_file, w1->w_line);
859			printf(" 3rd %p %s @ %s:%d\n",
860			    m, w->w_description, file, line);
861#ifdef DDB
862			go_into_ddb = 1;
863#endif /* DDB */
864			goto out;
865		}
866	}
867	m1 = LIST_FIRST(&p->p_heldmtx);
868	if (!itismychild(m1->mtx_witness, w))
869		mtx_exit(&w_mtx, MTX_SPIN);
870
871out:
872#ifdef DDB
873	if (witness_ddb && go_into_ddb)
874		Debugger("witness_enter");
875#endif /* DDB */
876	w->w_file = file;
877	w->w_line = line;
878	m->mtx_line = line;
879	m->mtx_file = file;
880
881	/*
882	 * If this pays off it likely means that a mutex being witnessed
883	 * is acquired in hardclock. Put it in the ignore list. It is
884	 * likely not the mutex this assert fails on.
885	 */
886	MPASS(m->mtx_held.le_prev == NULL);
887	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
888}
889
890void
891witness_exit(struct mtx *m, int flags, const char *file, int line)
892{
893	struct witness *w;
894
895	w = m->mtx_witness;
896
897	if (flags & MTX_SPIN) {
898		if (!w->w_spin)
899			panic("mutex_exit: MTX_SPIN on MTX_DEF mutex %s @"
900			    " %s:%d", m->mtx_description, file, line);
901		if (m->mtx_recurse != 0)
902			return;
903		mtx_enter(&w_mtx, MTX_SPIN);
904		PCPU_SET(witness_spin_check, witness_spin_check & ~w->w_level);
905		mtx_exit(&w_mtx, MTX_SPIN);
906		return;
907	}
908	if (w->w_spin)
909		panic("mutex_exit: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
910		    m->mtx_description, file, line);
911
912	if (m->mtx_recurse != 0)
913		return;
914
915	if ((flags & MTX_NOSWITCH) == 0 && !mtx_legal2block() && !cold)
916		panic("switchable mtx_exit() of %s when not legal @ %s:%d",
917			    m->mtx_description, file, line);
918	LIST_REMOVE(m, mtx_held);
919	m->mtx_held.le_prev = NULL;
920}
921
922void
923witness_try_enter(struct mtx *m, int flags, const char *file, int line)
924{
925	struct proc *p;
926	struct witness *w = m->mtx_witness;
927
928	if (flags & MTX_SPIN) {
929		if (!w->w_spin)
930			panic("mutex_try_enter: "
931			    "MTX_SPIN on MTX_DEF mutex %s @ %s:%d",
932			    m->mtx_description, file, line);
933		if (m->mtx_recurse != 0)
934			return;
935		mtx_enter(&w_mtx, MTX_SPIN);
936		PCPU_SET(witness_spin_check, witness_spin_check | w->w_level);
937		mtx_exit(&w_mtx, MTX_SPIN);
938		return;
939	}
940
941	if (w->w_spin)
942		panic("mutex_try_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
943		    m->mtx_description, file, line);
944
945	if (m->mtx_recurse != 0)
946		return;
947
948	w->w_file = file;
949	w->w_line = line;
950	m->mtx_line = line;
951	m->mtx_file = file;
952	p = CURPROC;
953	MPASS(m->mtx_held.le_prev == NULL);
954	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
955}
956
957void
958witness_display(void(*prnt)(const char *fmt, ...))
959{
960	struct witness *w, *w1;
961
962	witness_levelall();
963
964	for (w = w_all; w; w = w->w_next) {
965		if (w->w_file == NULL)
966			continue;
967		for (w1 = w_all; w1; w1 = w1->w_next) {
968			if (isitmychild(w1, w))
969				break;
970		}
971		if (w1 != NULL)
972			continue;
973		/*
974		 * This lock has no anscestors, display its descendants.
975		 */
976		witness_displaydescendants(prnt, w);
977	}
978	prnt("\nMutex which were never acquired\n");
979	for (w = w_all; w; w = w->w_next) {
980		if (w->w_file != NULL)
981			continue;
982		prnt("%s\n", w->w_description);
983	}
984}
985
986int
987witness_sleep(int check_only, struct mtx *mtx, const char *file, int line)
988{
989	struct mtx *m;
990	struct proc *p;
991	char **sleep;
992	int n = 0;
993
994	p = CURPROC;
995	for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
996	    m = LIST_NEXT(m, mtx_held)) {
997		if (m == mtx)
998			continue;
999		for (sleep = sleep_list; *sleep!= NULL; sleep++)
1000			if (strcmp(m->mtx_description, *sleep) == 0)
1001				goto next;
1002		printf("%s:%d: %s with \"%s\" locked from %s:%d\n",
1003			file, line, check_only ? "could sleep" : "sleeping",
1004			m->mtx_description,
1005			m->mtx_witness->w_file, m->mtx_witness->w_line);
1006		n++;
1007	next:
1008	}
1009#ifdef DDB
1010	if (witness_ddb && n)
1011		Debugger("witness_sleep");
1012#endif /* DDB */
1013	return (n);
1014}
1015
1016static struct witness *
1017enroll(const char *description, int flag)
1018{
1019	int i;
1020	struct witness *w, *w1;
1021	char **ignore;
1022	char **order;
1023
1024	if (!witness_watch)
1025		return (NULL);
1026	for (ignore = ignore_list; *ignore != NULL; ignore++)
1027		if (strcmp(description, *ignore) == 0)
1028			return (NULL);
1029
1030	if (w_inited == 0) {
1031		mtx_init(&w_mtx, "witness lock", MTX_COLD | MTX_DEF);
1032		for (i = 0; i < WITNESS_COUNT; i++) {
1033			w = &w_data[i];
1034			witness_free(w);
1035		}
1036		w_inited = 1;
1037		for (order = order_list; *order != NULL; order++) {
1038			w = enroll(*order, MTX_DEF);
1039			w->w_file = "order list";
1040			for (order++; *order != NULL; order++) {
1041				w1 = enroll(*order, MTX_DEF);
1042				w1->w_file = "order list";
1043				itismychild(w, w1);
1044				w = w1;
1045    	    	    	}
1046		}
1047	}
1048	if ((flag & MTX_SPIN) && witness_skipspin)
1049		return (NULL);
1050	mtx_enter(&w_mtx, MTX_SPIN);
1051	for (w = w_all; w; w = w->w_next) {
1052		if (strcmp(description, w->w_description) == 0) {
1053			mtx_exit(&w_mtx, MTX_SPIN);
1054			return (w);
1055		}
1056	}
1057	if ((w = witness_get()) == NULL)
1058		return (NULL);
1059	w->w_next = w_all;
1060	w_all = w;
1061	w->w_description = description;
1062	mtx_exit(&w_mtx, MTX_SPIN);
1063	if (flag & MTX_SPIN) {
1064		w->w_spin = 1;
1065
1066		i = 1;
1067		for (order = spin_order_list; *order != NULL; order++) {
1068			if (strcmp(description, *order) == 0)
1069				break;
1070			i <<= 1;
1071		}
1072		if (*order == NULL)
1073			panic("spin lock %s not in order list", description);
1074		w->w_level = i;
1075	}
1076	return (w);
1077}
1078
1079static int
1080itismychild(struct witness *parent, struct witness *child)
1081{
1082	static int recursed;
1083
1084	/*
1085	 * Insert "child" after "parent"
1086	 */
1087	while (parent->w_morechildren)
1088		parent = parent->w_morechildren;
1089
1090	if (parent->w_childcnt == WITNESS_NCHILDREN) {
1091		if ((parent->w_morechildren = witness_get()) == NULL)
1092			return (1);
1093		parent = parent->w_morechildren;
1094	}
1095	MPASS(child != NULL);
1096	parent->w_children[parent->w_childcnt++] = child;
1097	/*
1098	 * now prune whole tree
1099	 */
1100	if (recursed)
1101		return (0);
1102	recursed = 1;
1103	for (child = w_all; child != NULL; child = child->w_next) {
1104		for (parent = w_all; parent != NULL;
1105		    parent = parent->w_next) {
1106			if (!isitmychild(parent, child))
1107				continue;
1108			removechild(parent, child);
1109			if (isitmydescendant(parent, child))
1110				continue;
1111			itismychild(parent, child);
1112		}
1113	}
1114	recursed = 0;
1115	witness_levelall();
1116	return (0);
1117}
1118
1119static void
1120removechild(struct witness *parent, struct witness *child)
1121{
1122	struct witness *w, *w1;
1123	int i;
1124
1125	for (w = parent; w != NULL; w = w->w_morechildren)
1126		for (i = 0; i < w->w_childcnt; i++)
1127			if (w->w_children[i] == child)
1128				goto found;
1129	return;
1130found:
1131	for (w1 = w; w1->w_morechildren != NULL; w1 = w1->w_morechildren)
1132		continue;
1133	w->w_children[i] = w1->w_children[--w1->w_childcnt];
1134	MPASS(w->w_children[i] != NULL);
1135
1136	if (w1->w_childcnt != 0)
1137		return;
1138
1139	if (w1 == parent)
1140		return;
1141	for (w = parent; w->w_morechildren != w1; w = w->w_morechildren)
1142		continue;
1143	w->w_morechildren = 0;
1144	witness_free(w1);
1145}
1146
1147static int
1148isitmychild(struct witness *parent, struct witness *child)
1149{
1150	struct witness *w;
1151	int i;
1152
1153	for (w = parent; w != NULL; w = w->w_morechildren) {
1154		for (i = 0; i < w->w_childcnt; i++) {
1155			if (w->w_children[i] == child)
1156				return (1);
1157		}
1158	}
1159	return (0);
1160}
1161
1162static int
1163isitmydescendant(struct witness *parent, struct witness *child)
1164{
1165	struct witness *w;
1166	int i;
1167	int j;
1168
1169	for (j = 0, w = parent; w != NULL; w = w->w_morechildren, j++) {
1170		MPASS(j < 1000);
1171		for (i = 0; i < w->w_childcnt; i++) {
1172			if (w->w_children[i] == child)
1173				return (1);
1174		}
1175		for (i = 0; i < w->w_childcnt; i++) {
1176			if (isitmydescendant(w->w_children[i], child))
1177				return (1);
1178		}
1179	}
1180	return (0);
1181}
1182
1183void
1184witness_levelall (void)
1185{
1186	struct witness *w, *w1;
1187
1188	for (w = w_all; w; w = w->w_next)
1189		if (!w->w_spin)
1190			w->w_level = 0;
1191	for (w = w_all; w; w = w->w_next) {
1192		if (w->w_spin)
1193			continue;
1194		for (w1 = w_all; w1; w1 = w1->w_next) {
1195			if (isitmychild(w1, w))
1196				break;
1197		}
1198		if (w1 != NULL)
1199			continue;
1200		witness_leveldescendents(w, 0);
1201	}
1202}
1203
1204static void
1205witness_leveldescendents(struct witness *parent, int level)
1206{
1207	int i;
1208	struct witness *w;
1209
1210	if (parent->w_level < level)
1211		parent->w_level = level;
1212	level++;
1213	for (w = parent; w != NULL; w = w->w_morechildren)
1214		for (i = 0; i < w->w_childcnt; i++)
1215			witness_leveldescendents(w->w_children[i], level);
1216}
1217
1218static void
1219witness_displaydescendants(void(*prnt)(const char *fmt, ...),
1220			   struct witness *parent)
1221{
1222	struct witness *w;
1223	int i;
1224	int level = parent->w_level;
1225
1226	prnt("%d", level);
1227	if (level < 10)
1228		prnt(" ");
1229	for (i = 0; i < level; i++)
1230		prnt(" ");
1231	prnt("%s", parent->w_description);
1232	if (parent->w_file != NULL) {
1233		prnt(" -- last acquired @ %s", parent->w_file);
1234#ifndef W_USE_WHERE
1235		prnt(":%d", parent->w_line);
1236#endif
1237		prnt("\n");
1238	}
1239
1240	for (w = parent; w != NULL; w = w->w_morechildren)
1241		for (i = 0; i < w->w_childcnt; i++)
1242			    witness_displaydescendants(prnt, w->w_children[i]);
1243    }
1244
1245static int
1246dup_ok(struct witness *w)
1247{
1248	char **dup;
1249
1250	for (dup = dup_list; *dup!= NULL; dup++)
1251		if (strcmp(w->w_description, *dup) == 0)
1252			return (1);
1253	return (0);
1254}
1255
1256static int
1257blessed(struct witness *w1, struct witness *w2)
1258{
1259	int i;
1260	struct witness_blessed *b;
1261
1262	for (i = 0; i < blessed_count; i++) {
1263		b = &blessed_list[i];
1264		if (strcmp(w1->w_description, b->b_lock1) == 0) {
1265			if (strcmp(w2->w_description, b->b_lock2) == 0)
1266				return (1);
1267			continue;
1268		}
1269		if (strcmp(w1->w_description, b->b_lock2) == 0)
1270			if (strcmp(w2->w_description, b->b_lock1) == 0)
1271				return (1);
1272	}
1273	return (0);
1274}
1275
1276static struct witness *
1277witness_get()
1278{
1279	struct witness *w;
1280
1281	if ((w = w_free) == NULL) {
1282		witness_dead = 1;
1283		mtx_exit(&w_mtx, MTX_SPIN);
1284		printf("witness exhausted\n");
1285		return (NULL);
1286	}
1287	w_free = w->w_next;
1288	bzero(w, sizeof(*w));
1289	return (w);
1290}
1291
1292static void
1293witness_free(struct witness *w)
1294{
1295	w->w_next = w_free;
1296	w_free = w;
1297}
1298
1299void
1300witness_list(struct proc *p)
1301{
1302	struct mtx *m;
1303
1304	for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
1305	    m = LIST_NEXT(m, mtx_held)) {
1306		printf("\t\"%s\" (%p) locked at %s:%d\n",
1307		    m->mtx_description, m,
1308		    m->mtx_witness->w_file, m->mtx_witness->w_line);
1309	}
1310}
1311
1312void
1313witness_save(struct mtx *m, const char **filep, int *linep)
1314{
1315	*filep = m->mtx_witness->w_file;
1316	*linep = m->mtx_witness->w_line;
1317}
1318
1319void
1320witness_restore(struct mtx *m, const char *file, int line)
1321{
1322	m->mtx_witness->w_file = file;
1323	m->mtx_witness->w_line = line;
1324}
1325
1326#endif	/* (defined(MUTEX_DEBUG) && defined(WITNESS)) */
1327