subr_turnstile.c revision 74016
1284193Sdelphij/*-
2284193Sdelphij * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
3175296Sobrien *
4175296Sobrien * Redistribution and use in source and binary forms, with or without
5175296Sobrien * modification, are permitted provided that the following conditions
6175296Sobrien * are met:
7175296Sobrien * 1. Redistributions of source code must retain the above copyright
8175296Sobrien *    notice, this list of conditions and the following disclaimer.
9175296Sobrien * 2. Redistributions in binary form must reproduce the above copyright
10226048Sobrien *    notice, this list of conditions and the following disclaimer in the
11267843Sdelphij *    documentation and/or other materials provided with the distribution.
12226048Sobrien * 3. Berkeley Software Design Inc's name may not be used to endorse or
13226048Sobrien *    promote products derived from this software without specific prior
14226048Sobrien *    written permission.
15226048Sobrien *
16226048Sobrien * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
17175296Sobrien * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18175296Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19284193Sdelphij * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
20226048Sobrien * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21226048Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22175296Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23175296Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24226048Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25186690Sobrien * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26226048Sobrien * SUCH DAMAGE.
27175296Sobrien *
2868349Sobrien *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
29175296Sobrien *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
3068349Sobrien * $FreeBSD: head/sys/kern/subr_turnstile.c 74016 2001-03-09 07:24:17Z jhb $
31175296Sobrien */
32175296Sobrien
3368349Sobrien/*
3468349Sobrien * Machine independent bits of mutex implementation and implementation of
35186690Sobrien * `witness' structure & related debugging routines.
3668349Sobrien */
37175296Sobrien
3868349Sobrien/*
39175296Sobrien *	Main Entry: witness
4068349Sobrien *	Pronunciation: 'wit-n&s
41175296Sobrien *	Function: noun
4268349Sobrien *	Etymology: Middle English witnesse, from Old English witnes knowledge,
4368349Sobrien *	    testimony, witness, from 2wit
4468349Sobrien *	Date: before 12th century
45175296Sobrien *	1 : attestation of a fact or event : TESTIMONY
4668349Sobrien *	2 : one that gives evidence; specifically : one who testifies in
47175296Sobrien *	    a cause or before a judicial tribunal
4868349Sobrien *	3 : one asked to be present at a transaction so as to be able to
49191736Sobrien *	    testify to its having taken place
50226048Sobrien *	4 : one who has personal knowledge of something
51175296Sobrien *	5 a : something serving as evidence or proof : SIGN
5268349Sobrien *	  b : public affirmation by word or example of usually
53175296Sobrien *	      religious faith or conviction <the heroic witness to divine
54191736Sobrien *	      life -- Pilot>
55226048Sobrien *	6 capitalized : a member of the Jehovah's Witnesses
56175296Sobrien */
5768349Sobrien
5868349Sobrien#include "opt_ddb.h"
59186690Sobrien#include "opt_witness.h"
60175296Sobrien
61186690Sobrien#include <sys/param.h>
62191736Sobrien#include <sys/bus.h>
63226048Sobrien#include <sys/kernel.h>
64175296Sobrien#include <sys/malloc.h>
65191736Sobrien#include <sys/proc.h>
66226048Sobrien#include <sys/sysctl.h>
67191736Sobrien#include <sys/systm.h>
68226048Sobrien#include <sys/vmmeter.h>
69175296Sobrien#include <sys/ktr.h>
7068349Sobrien
71175296Sobrien#include <machine/atomic.h>
7268349Sobrien#include <machine/bus.h>
7368349Sobrien#include <machine/clock.h>
7468349Sobrien#include <machine/cpu.h>
7568349Sobrien
7668349Sobrien#include <ddb/ddb.h>
7768349Sobrien
78226048Sobrien#include <vm/vm.h>
79175296Sobrien#include <vm/vm_extern.h>
80175296Sobrien
81186690Sobrien#include <sys/mutex.h>
8268349Sobrien
8368349Sobrien/*
84175296Sobrien * The WITNESS-enabled mutex debug structure.
85191736Sobrien */
86175296Sobrien#ifdef WITNESS
87175296Sobrienstruct mtx_debug {
8868349Sobrien	struct witness	*mtxd_witness;
89175296Sobrien	LIST_ENTRY(mtx)	mtxd_held;
9068349Sobrien	const char	*mtxd_file;
91191736Sobrien	int		mtxd_line;
92226048Sobrien};
93175296Sobrien
94191736Sobrien#define mtx_held	mtx_debug->mtxd_held
95226048Sobrien#define	mtx_file	mtx_debug->mtxd_file
96226048Sobrien#define	mtx_line	mtx_debug->mtxd_line
9768349Sobrien#define	mtx_witness	mtx_debug->mtxd_witness
98175296Sobrien#endif	/* WITNESS */
99226048Sobrien
100175296Sobrien/*
10168349Sobrien * Internal utility macros.
10268349Sobrien */
10374784Sobrien#define mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
10474784Sobrien
105175296Sobrien#define mtx_owner(m)	(mtx_unowned((m)) ? NULL \
106186690Sobrien	: (struct proc *)((m)->mtx_lock & MTX_FLAGMASK))
107175296Sobrien
108226048Sobrien#define SET_PRIO(p, pri)	(p)->p_pri.pri_level = (pri)
109226048Sobrien
110186690Sobrien/*
111159764Sobrien * Early WITNESS-enabled declarations.
112175296Sobrien */
113186690Sobrien#ifdef WITNESS
114175296Sobrien
11568349Sobrien/*
11668349Sobrien * Internal WITNESS routines which must be prototyped early.
11768349Sobrien *
11868349Sobrien * XXX: When/if witness code is cleaned up, it would be wise to place all
11968349Sobrien *	witness prototyping early in this file.
12068349Sobrien */
12168349Sobrienstatic void witness_init(struct mtx *, int flag);
12268349Sobrienstatic void witness_destroy(struct mtx *);
12368349Sobrienstatic void witness_display(void(*)(const char *fmt, ...));
12468349Sobrien
125191736SobrienMALLOC_DEFINE(M_WITNESS, "witness", "witness mtx_debug structure");
126226048Sobrien
127175296Sobrien/* All mutexes in system (used for debug/panic) */
128191736Sobrienstatic struct mtx_debug all_mtx_debug = { NULL, {NULL, NULL}, NULL, 0 };
129226048Sobrien
130175296Sobrien/*
13168349Sobrien * This global is set to 0 once it becomes safe to use the witness code.
13268349Sobrien */
13368349Sobrienstatic int witness_cold = 1;
134175296Sobrien
13568349Sobrien#else	/* WITNESS */
13668349Sobrien
13768349Sobrien/* XXX XXX XXX
13868349Sobrien * flag++ is sleazoid way of shuting up warning
13968349Sobrien */
140175296Sobrien#define witness_init(m, flag) flag++
14168349Sobrien#define witness_destroy(m)
142175296Sobrien#define witness_try_enter(m, t, f, l)
14368349Sobrien#endif	/* WITNESS */
14468349Sobrien
14568349Sobrien/*
146186690Sobrien * All mutex locks in system are kept on the all_mtx list.
147226048Sobrien */
148226048Sobrienstatic struct mtx all_mtx = { MTX_UNOWNED, 0, 0, 0, "All mutexes queue head",
14968349Sobrien	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
150175296Sobrien	{ NULL, NULL }, &all_mtx, &all_mtx,
15168349Sobrien#ifdef WITNESS
152175296Sobrien	&all_mtx_debug
153191736Sobrien#else
154175296Sobrien	NULL
15568349Sobrien#endif
15668349Sobrien	 };
15768349Sobrien
15868349Sobrien/*
159191736Sobrien * Global variables for book keeping.
160175296Sobrien */
16168349Sobrienstatic int	mtx_cur_cnt;
162175296Sobrienstatic int	mtx_max_cnt;
16368349Sobrien
164186690Sobrien/*
165226048Sobrien * Couple of strings for KTR_LOCK tracing in order to avoid duplicates.
166175296Sobrien */
167175296Sobrienchar	STR_mtx_lock_slp[] = "GOT (sleep) %s [%p] r=%d at %s:%d";
168267843Sdelphijchar	STR_mtx_unlock_slp[] = "REL (sleep) %s [%p] r=%d at %s:%d";
169267843Sdelphijchar	STR_mtx_lock_spn[] = "GOT (spin) %s [%p] r=%d at %s:%d";
170267843Sdelphijchar	STR_mtx_unlock_spn[] = "REL (spin) %s [%p] r=%d at %s:%d";
171267843Sdelphij
172226048Sobrien/*
17368349Sobrien * Prototypes for non-exported routines.
174226048Sobrien *
175226048Sobrien * NOTE: Prototypes for witness routines are placed at the bottom of the file.
176226048Sobrien */
177226048Sobrienstatic void	propagate_priority(struct proc *);
178226048Sobrien
17968349Sobrienstatic void
180175296Sobrienpropagate_priority(struct proc *p)
181175296Sobrien{
182175296Sobrien	int pri = p->p_pri.pri_level;
183267843Sdelphij	struct mtx *m = p->p_blocked;
184267843Sdelphij
185267843Sdelphij	mtx_assert(&sched_lock, MA_OWNED);
186267843Sdelphij	for (;;) {
187226048Sobrien		struct proc *p1;
188175296Sobrien
189175296Sobrien		p = mtx_owner(m);
190226048Sobrien
191226048Sobrien		if (p == NULL) {
192226048Sobrien			/*
193175296Sobrien			 * This really isn't quite right. Really
194175296Sobrien			 * ought to bump priority of process that
195175296Sobrien			 * next acquires the mutex.
196226048Sobrien			 */
197226048Sobrien			MPASS(m->mtx_lock == MTX_CONTESTED);
198226048Sobrien			return;
199191736Sobrien		}
200191736Sobrien
201191736Sobrien		MPASS(p->p_magic == P_MAGIC);
202191736Sobrien		KASSERT(p->p_stat != SSLEEP, ("sleeping process owns a mutex"));
203191736Sobrien		if (p->p_pri.pri_level <= pri)
204234250Sobrien			return;
205191736Sobrien
206191736Sobrien		/*
207175296Sobrien		 * Bump this process' priority.
208191736Sobrien		 */
209175296Sobrien		SET_PRIO(p, pri);
210191736Sobrien
211175296Sobrien		/*
212191736Sobrien		 * If lock holder is actually running, just bump priority.
213175296Sobrien		 */
214191736Sobrien		if (p->p_oncpu != NOCPU) {
215175296Sobrien			MPASS(p->p_stat == SRUN || p->p_stat == SZOMB);
216226048Sobrien			return;
217226048Sobrien		}
218226048Sobrien
219226048Sobrien#ifndef SMP
220226048Sobrien		/*
221226048Sobrien		 * For UP, we check to see if p is curproc (this shouldn't
222191736Sobrien		 * ever happen however as it would mean we are in a deadlock.)
223175296Sobrien		 */
224191736Sobrien		KASSERT(p != curproc, ("Deadlock detected"));
22568349Sobrien#endif
226191736Sobrien
227175296Sobrien		/*
22868349Sobrien		 * If on run queue move to new run queue, and
229191736Sobrien		 * quit.
230175296Sobrien		 */
231175296Sobrien		if (p->p_stat == SRUN) {
232234250Sobrien			MPASS(p->p_blocked == NULL);
233234250Sobrien			remrunqueue(p);
234234250Sobrien			setrunqueue(p);
235234250Sobrien			return;
236234250Sobrien		}
237234250Sobrien
238234250Sobrien		/*
239234250Sobrien		 * If we aren't blocked on a mutex, we should be.
240234250Sobrien		 */
241234250Sobrien		KASSERT(p->p_stat == SMTX, (
242234250Sobrien		    "process %d(%s):%d holds %s but isn't blocked on a mutex\n",
243234250Sobrien		    p->p_pid, p->p_comm, p->p_stat,
244234250Sobrien		    m->mtx_description));
245226048Sobrien
246159764Sobrien		/*
247226048Sobrien		 * Pick up the mutex that p is blocked on.
248226048Sobrien		 */
249175296Sobrien		m = p->p_blocked;
250159764Sobrien		MPASS(m != NULL);
251226048Sobrien
25268349Sobrien		/*
253226048Sobrien		 * Check if the proc needs to be moved up on
254226048Sobrien		 * the blocked chain
255191736Sobrien		 */
256175296Sobrien		if (p == TAILQ_FIRST(&m->mtx_blocked)) {
257226048Sobrien			continue;
258226048Sobrien		}
259175296Sobrien
260175296Sobrien		p1 = TAILQ_PREV(p, procqueue, p_procq);
261175296Sobrien		if (p1->p_pri.pri_level <= pri) {
262226048Sobrien			continue;
263226048Sobrien		}
264226048Sobrien
265186690Sobrien		/*
266191736Sobrien		 * Remove proc from blocked chain and determine where
267186690Sobrien		 * it should be moved up to.  Since we know that p1 has
268186690Sobrien		 * a lower priority than p, we know that at least one
269226048Sobrien		 * process in the chain has a lower priority and that
270186690Sobrien		 * p1 will thus not be NULL after the loop.
271267843Sdelphij		 */
272267843Sdelphij		TAILQ_REMOVE(&m->mtx_blocked, p, p_procq);
273267843Sdelphij		TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) {
274226048Sobrien			MPASS(p1->p_magic == P_MAGIC);
275267843Sdelphij			if (p1->p_pri.pri_level > pri)
276267843Sdelphij				break;
277267843Sdelphij		}
278267843Sdelphij
279267843Sdelphij		MPASS(p1 != NULL);
280267843Sdelphij		TAILQ_INSERT_BEFORE(p1, p, p_procq);
281226048Sobrien		CTR4(KTR_LOCK,
282133359Sobrien		    "propagate_priority: p %p moved before %p on [%p] %s",
283175296Sobrien		    p, p1, m, m->mtx_description);
284133359Sobrien	}
285159764Sobrien}
286226048Sobrien
287159764Sobrien/*
288226048Sobrien * The important part of mtx_trylock{,_flags}()
289186690Sobrien * Tries to acquire lock `m.' We do NOT handle recursion here; we assume that
290186690Sobrien * if we're called, it's because we know we don't already own this lock.
291226048Sobrien */
292226048Sobrienint
293226048Sobrien_mtx_trylock(struct mtx *m, int opts, const char *file, int line)
294226048Sobrien{
295226048Sobrien	int rval;
296111658Sobrien
297111658Sobrien	MPASS(curproc != NULL);
298111658Sobrien
299226048Sobrien	/*
300133359Sobrien	 * _mtx_trylock does not accept MTX_NOSWITCH option.
301226048Sobrien	 */
302133359Sobrien	KASSERT((opts & MTX_NOSWITCH) == 0,
303175296Sobrien	    ("mtx_trylock() called with invalid option flag(s) %d", opts));
304133359Sobrien
305175296Sobrien	rval = _obtain_lock(m, curproc);
306133359Sobrien
307284193Sdelphij#ifdef WITNESS
308284193Sdelphij	if (rval && m->mtx_witness != NULL) {
309284193Sdelphij		/*
310284193Sdelphij		 * We do not handle recursion in _mtx_trylock; see the
311284193Sdelphij		 * note at the top of the routine.
312284193Sdelphij		 */
313284193Sdelphij		KASSERT(!mtx_recursed(m),
314284193Sdelphij		    ("mtx_trylock() called on a recursed mutex"));
315284193Sdelphij		witness_try_enter(m, (opts | m->mtx_flags), file, line);
316284193Sdelphij	}
317226048Sobrien#endif	/* WITNESS */
318133359Sobrien
319133359Sobrien	if ((opts & MTX_QUIET) == 0)
320175296Sobrien		CTR5(KTR_LOCK, "TRY_LOCK %s [%p] result=%d at %s:%d",
321133359Sobrien		    m->mtx_description, m, rval, file, line);
322226048Sobrien
32368349Sobrien	return rval;
324175296Sobrien}
32568349Sobrien
326175296Sobrien/*
32768349Sobrien * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
32868349Sobrien *
32968349Sobrien * We call this if the lock is either contested (i.e. we need to go to
33068349Sobrien * sleep waiting for it), or if we need to recurse on it.
331175296Sobrien */
33268349Sobrienvoid
333175296Sobrien_mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line)
33468349Sobrien{
33568349Sobrien	struct proc *p = curproc;
33668349Sobrien
33768349Sobrien	if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)p) {
338175296Sobrien		m->mtx_recurse++;
33968349Sobrien		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
340175296Sobrien		if ((opts & MTX_QUIET) == 0)
34168349Sobrien			CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
342226048Sobrien		return;
343133359Sobrien	}
344226048Sobrien
345133359Sobrien	if ((opts & MTX_QUIET) == 0)
346226048Sobrien		CTR4(KTR_LOCK,
347175296Sobrien		    "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d",
348175296Sobrien		    m->mtx_description, (void *)m->mtx_lock, file, line);
349226048Sobrien
350226048Sobrien	while (!_obtain_lock(m, p)) {
351175296Sobrien		uintptr_t v;
352226048Sobrien		struct proc *p1;
353267843Sdelphij
354175296Sobrien		mtx_lock_spin(&sched_lock);
355133359Sobrien		/*
356175296Sobrien		 * Check if the lock has been released while spinning for
357175296Sobrien		 * the sched_lock.
358186690Sobrien		 */
359175296Sobrien		if ((v = m->mtx_lock) == MTX_UNOWNED) {
360186690Sobrien			mtx_unlock_spin(&sched_lock);
361175296Sobrien			continue;
362186690Sobrien		}
363175296Sobrien
364175296Sobrien		/*
36568349Sobrien		 * The mutex was marked contested on release. This means that
366226048Sobrien		 * there are processes blocked on it.
367186690Sobrien		 */
368159764Sobrien		if (v == MTX_CONTESTED) {
369175296Sobrien			p1 = TAILQ_FIRST(&m->mtx_blocked);
370159764Sobrien			MPASS(p1 != NULL);
371175296Sobrien			m->mtx_lock = (uintptr_t)p | MTX_CONTESTED;
372175296Sobrien
373175296Sobrien			if (p1->p_pri.pri_level < p->p_pri.pri_level)
374226048Sobrien				SET_PRIO(p, p1->p_pri.pri_level);
375175296Sobrien			mtx_unlock_spin(&sched_lock);
376226048Sobrien			return;
377226048Sobrien		}
378226048Sobrien
379226048Sobrien		/*
380226048Sobrien		 * If the mutex isn't already contested and a failure occurs
381159764Sobrien		 * setting the contested bit, the mutex was either released
382226048Sobrien		 * or the state of the MTX_RECURSED bit changed.
383186690Sobrien		 */
384175296Sobrien		if ((v & MTX_CONTESTED) == 0 &&
385226048Sobrien		    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
386226048Sobrien			(void *)(v | MTX_CONTESTED))) {
387175296Sobrien			mtx_unlock_spin(&sched_lock);
388226048Sobrien			continue;
389226048Sobrien		}
390175296Sobrien
391159764Sobrien		/*
392175296Sobrien		 * We deffinately must sleep for this lock.
393159764Sobrien		 */
394175296Sobrien		mtx_assert(m, MA_NOTOWNED);
395175296Sobrien
396226048Sobrien#ifdef notyet
397226048Sobrien		/*
398175296Sobrien		 * If we're borrowing an interrupted thread's VM context, we
399284193Sdelphij		 * must clean up before going to sleep.
400175296Sobrien		 */
40168349Sobrien		if (p->p_ithd != NULL) {
40268349Sobrien			struct ithd *it = p->p_ithd;
403191736Sobrien
404175296Sobrien			if (it->it_interrupted) {
40568349Sobrien				if ((opts & MTX_QUIET) == 0)
406191736Sobrien					CTR2(KTR_LOCK,
407175296Sobrien				    "_mtx_lock_sleep: %p interrupted %p",
408175296Sobrien					    it, it->it_interrupted);
409191736Sobrien				intr_thd_fixup(it);
41068349Sobrien			}
41168349Sobrien		}
41268349Sobrien#endif
41368349Sobrien
414191736Sobrien		/*
415226048Sobrien		 * Put us on the list of threads blocked on this mutex.
416175296Sobrien		 */
417175296Sobrien		if (TAILQ_EMPTY(&m->mtx_blocked)) {
41868349Sobrien			p1 = (struct proc *)(m->mtx_lock & MTX_FLAGMASK);
419191736Sobrien			LIST_INSERT_HEAD(&p1->p_contested, m, mtx_contested);
420226048Sobrien			TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
421175296Sobrien		} else {
422175296Sobrien			TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq)
42368349Sobrien				if (p1->p_pri.pri_level > p->p_pri.pri_level)
424133359Sobrien					break;
425133359Sobrien			if (p1)
426191736Sobrien				TAILQ_INSERT_BEFORE(p1, p, p_procq);
42768349Sobrien			else
428175296Sobrien				TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
429175296Sobrien		}
43068349Sobrien
431191736Sobrien		/*
43268349Sobrien		 * Save who we're blocked on.
433175296Sobrien		 */
434175296Sobrien		p->p_blocked = m;
43568349Sobrien		p->p_mtxname = m->mtx_description;
436191736Sobrien		p->p_stat = SMTX;
43768349Sobrien		propagate_priority(p);
438226048Sobrien
439191736Sobrien		if ((opts & MTX_QUIET) == 0)
440226048Sobrien			CTR3(KTR_LOCK,
441175296Sobrien			    "_mtx_lock_sleep: p %p blocked on [%p] %s", p, m,
44268349Sobrien			    m->mtx_description);
443191736Sobrien
444226048Sobrien		mi_switch();
445175296Sobrien
446175296Sobrien		if ((opts & MTX_QUIET) == 0)
44768349Sobrien			CTR3(KTR_LOCK,
44868349Sobrien			  "_mtx_lock_sleep: p %p free from blocked on [%p] %s",
44968349Sobrien			  p, m, m->mtx_description);
45068349Sobrien
451191736Sobrien		mtx_unlock_spin(&sched_lock);
45268349Sobrien	}
453175296Sobrien
45468349Sobrien	return;
45568349Sobrien}
45668349Sobrien
45768349Sobrien/*
458175296Sobrien * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock.
45968349Sobrien *
46068349Sobrien * This is only called if we need to actually spin for the lock. Recursion
461191736Sobrien * is handled inline.
462226048Sobrien */
463175296Sobrienvoid
464191736Sobrien_mtx_lock_spin(struct mtx *m, int opts, u_int mtx_intr, const char *file,
465103373Sobrien	       int line)
46668349Sobrien{
467103373Sobrien	int i = 0;
468226048Sobrien
469103373Sobrien	if ((opts & MTX_QUIET) == 0)
470103373Sobrien		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
471175296Sobrien
472103373Sobrien	for (;;) {
473103373Sobrien		if (_obtain_lock(m, curproc))
474103373Sobrien			break;
475175296Sobrien
476133359Sobrien		while (m->mtx_lock != MTX_UNOWNED) {
47768349Sobrien			if (i++ < 1000000)
47868349Sobrien				continue;
47968349Sobrien			if (i++ < 6000000)
48068349Sobrien				DELAY(1);
48168349Sobrien#ifdef DDB
48268349Sobrien			else if (!db_active)
48368349Sobrien#else
48468349Sobrien			else
48568349Sobrien#endif
48668349Sobrien			panic("spin lock %s held by %p for > 5 seconds",
48768349Sobrien			    m->mtx_description, (void *)m->mtx_lock);
48868349Sobrien		}
489103373Sobrien	}
49068349Sobrien
491175296Sobrien	m->mtx_saveintr = mtx_intr;
49268349Sobrien	if ((opts & MTX_QUIET) == 0)
493103373Sobrien		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
49468349Sobrien
495175296Sobrien	return;
496175296Sobrien}
497191736Sobrien
498191736Sobrien/*
499191736Sobrien * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
500175296Sobrien *
501103373Sobrien * We are only called here if the lock is recursed or contested (i.e. we
50268349Sobrien * need to wake up a blocked thread).
503186690Sobrien */
50468349Sobrienvoid
505175296Sobrien_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
50668349Sobrien{
507226048Sobrien	struct proc *p, *p1;
508226048Sobrien	struct mtx *m1;
50969216Sobrien	int pri;
510175296Sobrien
51168349Sobrien	p = curproc;
51268349Sobrien
51368349Sobrien	if (mtx_recursed(m)) {
51468349Sobrien		if (--(m->mtx_recurse) == 0)
515226048Sobrien			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
516226048Sobrien		if ((opts & MTX_QUIET) == 0)
517226048Sobrien			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m);
518226048Sobrien		return;
519226048Sobrien	}
520175296Sobrien
521226048Sobrien	mtx_lock_spin(&sched_lock);
522226048Sobrien	if ((opts & MTX_QUIET) == 0)
523226048Sobrien		CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
524226048Sobrien
525175296Sobrien	p1 = TAILQ_FIRST(&m->mtx_blocked);
52669216Sobrien	MPASS(p->p_magic == P_MAGIC);
527226048Sobrien	MPASS(p1->p_magic == P_MAGIC);
528226048Sobrien
529175296Sobrien	TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq);
530226048Sobrien
531226048Sobrien	if (TAILQ_EMPTY(&m->mtx_blocked)) {
532226048Sobrien		LIST_REMOVE(m, mtx_contested);
533175296Sobrien		_release_lock_quick(m);
534186690Sobrien		if ((opts & MTX_QUIET) == 0)
53568349Sobrien			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p not held", m);
536175296Sobrien	} else
537226048Sobrien		atomic_store_rel_ptr(&m->mtx_lock, (void *)MTX_CONTESTED);
538226048Sobrien
539226048Sobrien	pri = PRI_MAX;
54068349Sobrien	LIST_FOREACH(m1, &p->p_contested, mtx_contested) {
54168349Sobrien		int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_pri.pri_level;
542175296Sobrien		if (cp < pri)
543226048Sobrien			pri = cp;
544226048Sobrien	}
545226048Sobrien
546226048Sobrien	if (pri > p->p_pri.pri_native)
547226048Sobrien		pri = p->p_pri.pri_native;
548226048Sobrien	SET_PRIO(p, pri);
549186690Sobrien
550186690Sobrien	if ((opts & MTX_QUIET) == 0)
551191736Sobrien		CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p contested setrunqueue %p",
552186690Sobrien		    m, p1);
553186690Sobrien
554111658Sobrien	p1->p_blocked = NULL;
555186690Sobrien	p1->p_stat = SRUN;
556175296Sobrien	setrunqueue(p1);
55769216Sobrien
55869216Sobrien	if ((opts & MTX_NOSWITCH) == 0 && p1->p_pri.pri_level < pri) {
559226048Sobrien#ifdef notyet
560175296Sobrien		if (p->p_ithd != NULL) {
56168349Sobrien			struct ithd *it = p->p_ithd;
562226048Sobrien
56368349Sobrien			if (it->it_interrupted) {
564226048Sobrien				if ((opts & MTX_QUIET) == 0)
56568349Sobrien					CTR2(KTR_LOCK,
566175296Sobrien				    "_mtx_unlock_sleep: %p interrupted %p",
56769216Sobrien					    it, it->it_interrupted);
568226048Sobrien				intr_thd_fixup(it);
569226048Sobrien			}
570226048Sobrien		}
571175296Sobrien#endif
572186690Sobrien		setrunqueue(p);
573226048Sobrien		if ((opts & MTX_QUIET) == 0)
574226048Sobrien			CTR2(KTR_LOCK,
575226048Sobrien			    "_mtx_unlock_sleep: %p switching out lock=%p", m,
576267843Sdelphij			    (void *)m->mtx_lock);
577267843Sdelphij
578267843Sdelphij		mi_switch();
579267843Sdelphij		if ((opts & MTX_QUIET) == 0)
580226048Sobrien			CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p",
581175296Sobrien			    m, (void *)m->mtx_lock);
582226048Sobrien	}
583267843Sdelphij
584267843Sdelphij	mtx_unlock_spin(&sched_lock);
585267843Sdelphij
586267843Sdelphij	return;
587267843Sdelphij}
588267843Sdelphij
589175296Sobrien/*
590267843Sdelphij * All the unlocking of MTX_SPIN locks is done inline.
591267843Sdelphij * See the _rel_spin_lock() macro for the details.
592175296Sobrien */
593226048Sobrien
594267843Sdelphij/*
595267843Sdelphij * The backing function for the INVARIANTS-enabled mtx_assert()
596267843Sdelphij */
597175296Sobrien#ifdef INVARIANT_SUPPORT
598226048Sobrienvoid
599175296Sobrien_mtx_assert(struct mtx *m, int what, const char *file, int line)
600226048Sobrien{
601175296Sobrien	switch (what) {
602226048Sobrien	case MA_OWNED:
603226048Sobrien	case MA_OWNED | MA_RECURSED:
604226048Sobrien	case MA_OWNED | MA_NOTRECURSED:
605226048Sobrien		if (!mtx_owned(m))
606267843Sdelphij			panic("mutex %s not owned at %s:%d",
607267843Sdelphij			    m->mtx_description, file, line);
608267843Sdelphij		if (mtx_recursed(m)) {
609267843Sdelphij			if ((what & MA_NOTRECURSED) != 0)
610267843Sdelphij				panic("mutex %s recursed at %s:%d",
611267843Sdelphij				    m->mtx_description, file, line);
612267843Sdelphij		} else if ((what & MA_RECURSED) != 0) {
613267843Sdelphij			panic("mutex %s unrecursed at %s:%d",
614267843Sdelphij			    m->mtx_description, file, line);
615267843Sdelphij		}
616267843Sdelphij		break;
617267843Sdelphij	case MA_NOTOWNED:
618267843Sdelphij		if (mtx_owned(m))
619267843Sdelphij			panic("mutex %s owned at %s:%d",
620267843Sdelphij			    m->mtx_description, file, line);
621267843Sdelphij		break;
622267843Sdelphij	default:
623267843Sdelphij		panic("unknown mtx_assert at %s:%d", file, line);
624267843Sdelphij	}
625267843Sdelphij}
626267843Sdelphij#endif
627267843Sdelphij
628267843Sdelphij/*
629267843Sdelphij * The MUTEX_DEBUG-enabled mtx_validate()
630175296Sobrien */
63168349Sobrien#define MV_DESTROY	0	/* validate before destory */
63268349Sobrien#define MV_INIT		1	/* validate before init */
633226048Sobrien
63468349Sobrien#ifdef MUTEX_DEBUG
635226048Sobrien
636int mtx_validate __P((struct mtx *, int));
637
638int
639mtx_validate(struct mtx *m, int when)
640{
641	struct mtx *mp;
642	int i;
643	int retval = 0;
644
645#ifdef WITNESS
646	if (witness_cold)
647		return 0;
648#endif
649	if (m == &all_mtx || cold)
650		return 0;
651
652	mtx_lock(&all_mtx);
653/*
654 * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
655 * we can re-enable the kernacc() checks.
656 */
657#ifndef __alpha__
658	MPASS(kernacc((caddr_t)all_mtx.mtx_next, sizeof(uintptr_t),
659	    VM_PROT_READ) == 1);
660#endif
661	MPASS(all_mtx.mtx_next->mtx_prev == &all_mtx);
662	for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
663#ifndef __alpha__
664		if (kernacc((caddr_t)mp->mtx_next, sizeof(uintptr_t),
665		    VM_PROT_READ) != 1) {
666			panic("mtx_validate: mp=%p mp->mtx_next=%p",
667			    mp, mp->mtx_next);
668		}
669#endif
670		i++;
671		if (i > mtx_cur_cnt) {
672			panic("mtx_validate: too many in chain, known=%d\n",
673			    mtx_cur_cnt);
674		}
675	}
676	MPASS(i == mtx_cur_cnt);
677	switch (when) {
678	case MV_DESTROY:
679		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
680			if (mp == m)
681				break;
682		MPASS(mp == m);
683		break;
684	case MV_INIT:
685		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
686		if (mp == m) {
687			/*
688			 * Not good. This mutex already exists.
689			 */
690			printf("re-initing existing mutex %s\n",
691			    m->mtx_description);
692			MPASS(m->mtx_lock == MTX_UNOWNED);
693			retval = 1;
694		}
695	}
696	mtx_unlock(&all_mtx);
697	return (retval);
698}
699#endif
700
701/*
702 * Mutex initialization routine; initialize lock `m' of type contained in
703 * `opts' with options contained in `opts' and description `description.'
704 * Place on "all_mtx" queue.
705 */
706void
707mtx_init(struct mtx *m, const char *description, int opts)
708{
709
710	if ((opts & MTX_QUIET) == 0)
711		CTR2(KTR_LOCK, "mtx_init %p (%s)", m, description);
712
713#ifdef MUTEX_DEBUG
714	/* Diagnostic and error correction */
715	if (mtx_validate(m, MV_INIT))
716		return;
717#endif
718
719	bzero((void *)m, sizeof *m);
720	TAILQ_INIT(&m->mtx_blocked);
721
722#ifdef WITNESS
723	if (!witness_cold) {
724		m->mtx_debug = malloc(sizeof(struct mtx_debug),
725		    M_WITNESS, M_NOWAIT | M_ZERO);
726		MPASS(m->mtx_debug != NULL);
727	}
728#endif
729
730	m->mtx_description = description;
731	m->mtx_flags = opts;
732	m->mtx_lock = MTX_UNOWNED;
733
734	/* Put on all mutex queue */
735	mtx_lock(&all_mtx);
736	m->mtx_next = &all_mtx;
737	m->mtx_prev = all_mtx.mtx_prev;
738	m->mtx_prev->mtx_next = m;
739	all_mtx.mtx_prev = m;
740	if (++mtx_cur_cnt > mtx_max_cnt)
741		mtx_max_cnt = mtx_cur_cnt;
742	mtx_unlock(&all_mtx);
743
744#ifdef WITNESS
745	if (!witness_cold)
746		witness_init(m, opts);
747#endif
748}
749
750/*
751 * Remove lock `m' from all_mtx queue.
752 */
753void
754mtx_destroy(struct mtx *m)
755{
756
757#ifdef WITNESS
758	KASSERT(!witness_cold, ("%s: Cannot destroy while still cold\n",
759	    __FUNCTION__));
760#endif
761
762	CTR2(KTR_LOCK, "mtx_destroy %p (%s)", m, m->mtx_description);
763
764#ifdef MUTEX_DEBUG
765	if (m->mtx_next == NULL)
766		panic("mtx_destroy: %p (%s) already destroyed",
767		    m, m->mtx_description);
768
769	if (!mtx_owned(m)) {
770		MPASS(m->mtx_lock == MTX_UNOWNED);
771	} else {
772		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
773	}
774
775	/* diagnostic */
776	mtx_validate(m, MV_DESTROY);
777#endif
778
779#ifdef WITNESS
780	if (m->mtx_witness)
781		witness_destroy(m);
782#endif /* WITNESS */
783
784	/* Remove from the all mutex queue */
785	mtx_lock(&all_mtx);
786	m->mtx_next->mtx_prev = m->mtx_prev;
787	m->mtx_prev->mtx_next = m->mtx_next;
788
789#ifdef MUTEX_DEBUG
790	m->mtx_next = m->mtx_prev = NULL;
791#endif
792
793#ifdef WITNESS
794	free(m->mtx_debug, M_WITNESS);
795	m->mtx_debug = NULL;
796#endif
797
798	mtx_cur_cnt--;
799	mtx_unlock(&all_mtx);
800}
801
802
803/*
804 * The WITNESS-enabled diagnostic code.
805 */
806#ifdef WITNESS
807static void
808witness_fixup(void *dummy __unused)
809{
810	struct mtx *mp;
811
812	/*
813	 * We have to release Giant before initializing its witness
814	 * structure so that WITNESS doesn't get confused.
815	 */
816	mtx_unlock(&Giant);
817	mtx_assert(&Giant, MA_NOTOWNED);
818
819	mtx_lock(&all_mtx);
820
821	/* Iterate through all mutexes and finish up mutex initialization. */
822	for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
823
824		mp->mtx_debug = malloc(sizeof(struct mtx_debug),
825		    M_WITNESS, M_NOWAIT | M_ZERO);
826		MPASS(mp->mtx_debug != NULL);
827
828		witness_init(mp, mp->mtx_flags);
829	}
830	mtx_unlock(&all_mtx);
831
832	/* Mark the witness code as being ready for use. */
833	atomic_store_rel_int(&witness_cold, 0);
834
835	mtx_lock(&Giant);
836}
837SYSINIT(wtnsfxup, SI_SUB_MUTEX, SI_ORDER_FIRST, witness_fixup, NULL)
838
839#define WITNESS_COUNT 200
840#define	WITNESS_NCHILDREN 2
841
842int witness_watch = 1;
843
844struct witness {
845	struct witness	*w_next;
846	const char	*w_description;
847	const char	*w_file;
848	int		 w_line;
849	struct witness	*w_morechildren;
850	u_char		 w_childcnt;
851	u_char		 w_Giant_squawked:1;
852	u_char		 w_other_squawked:1;
853	u_char		 w_same_squawked:1;
854	u_char		 w_spin:1;	/* MTX_SPIN type mutex. */
855	u_int		 w_level;
856	struct witness	*w_children[WITNESS_NCHILDREN];
857};
858
859struct witness_blessed {
860	char 	*b_lock1;
861	char	*b_lock2;
862};
863
864#ifdef DDB
865/*
866 * When DDB is enabled and witness_ddb is set to 1, it will cause the system to
867 * drop into kdebug() when:
868 *	- a lock heirarchy violation occurs
869 *	- locks are held when going to sleep.
870 */
871int	witness_ddb;
872#ifdef WITNESS_DDB
873TUNABLE_INT_DECL("debug.witness_ddb", 1, witness_ddb);
874#else
875TUNABLE_INT_DECL("debug.witness_ddb", 0, witness_ddb);
876#endif
877SYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, "");
878#endif /* DDB */
879
880int	witness_skipspin;
881#ifdef WITNESS_SKIPSPIN
882TUNABLE_INT_DECL("debug.witness_skipspin", 1, witness_skipspin);
883#else
884TUNABLE_INT_DECL("debug.witness_skipspin", 0, witness_skipspin);
885#endif
886SYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RD, &witness_skipspin, 0,
887    "");
888
889/*
890 * Witness-enabled globals
891 */
892static struct mtx	w_mtx;
893static struct witness	*w_free;
894static struct witness	*w_all;
895static int		 w_inited;
896static int		 witness_dead;	/* fatal error, probably no memory */
897
898static struct witness	 w_data[WITNESS_COUNT];
899
900/*
901 * Internal witness routine prototypes
902 */
903static struct witness *enroll(const char *description, int flag);
904static int itismychild(struct witness *parent, struct witness *child);
905static void removechild(struct witness *parent, struct witness *child);
906static int isitmychild(struct witness *parent, struct witness *child);
907static int isitmydescendant(struct witness *parent, struct witness *child);
908static int dup_ok(struct witness *);
909static int blessed(struct witness *, struct witness *);
910static void
911    witness_displaydescendants(void(*)(const char *fmt, ...), struct witness *);
912static void witness_leveldescendents(struct witness *parent, int level);
913static void witness_levelall(void);
914static struct witness * witness_get(void);
915static void witness_free(struct witness *m);
916
917static char *ignore_list[] = {
918	"witness lock",
919	NULL
920};
921
922static char *spin_order_list[] = {
923#if defined(__i386__) && defined (SMP)
924	"com",
925#endif
926	"sio",
927#ifdef __i386__
928	"cy",
929#endif
930	"ng_node",
931	"ng_worklist",
932	"ithread table lock",
933	"ithread list lock",
934	"sched lock",
935#ifdef __i386__
936	"clk",
937#endif
938	"callout",
939	/*
940	 * leaf locks
941	 */
942#ifdef SMP
943#ifdef __i386__
944	"ap boot",
945	"imen",
946#endif
947	"smp rendezvous",
948#endif
949	NULL
950};
951
952static char *order_list[] = {
953	"Giant", "proctree", "allproc", "process lock", "uidinfo hash",
954	    "uidinfo struct", NULL,
955	NULL
956};
957
958static char *dup_list[] = {
959	"process lock",
960	NULL
961};
962
963static char *sleep_list[] = {
964	"Giant",
965	NULL
966};
967
968/*
969 * Pairs of locks which have been blessed
970 * Don't complain about order problems with blessed locks
971 */
972static struct witness_blessed blessed_list[] = {
973};
974static int blessed_count =
975	sizeof(blessed_list) / sizeof(struct witness_blessed);
976
977static void
978witness_init(struct mtx *m, int flag)
979{
980	m->mtx_witness = enroll(m->mtx_description, flag);
981}
982
983static void
984witness_destroy(struct mtx *m)
985{
986	struct mtx *m1;
987	struct proc *p;
988	p = curproc;
989	LIST_FOREACH(m1, &p->p_heldmtx, mtx_held) {
990		if (m1 == m) {
991			LIST_REMOVE(m, mtx_held);
992			break;
993		}
994	}
995	return;
996
997}
998
999static void
1000witness_display(void(*prnt)(const char *fmt, ...))
1001{
1002	struct witness *w, *w1;
1003	int level, found;
1004
1005	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1006	witness_levelall();
1007
1008	/*
1009	 * First, handle sleep mutexes which have been acquired at least
1010	 * once.
1011	 */
1012	prnt("Sleep mutexes:\n");
1013	for (w = w_all; w; w = w->w_next) {
1014		if (w->w_file == NULL || w->w_spin)
1015			continue;
1016		for (w1 = w_all; w1; w1 = w1->w_next) {
1017			if (isitmychild(w1, w))
1018				break;
1019		}
1020		if (w1 != NULL)
1021			continue;
1022		/*
1023		 * This lock has no anscestors, display its descendants.
1024		 */
1025		witness_displaydescendants(prnt, w);
1026	}
1027
1028	/*
1029	 * Now do spin mutexes which have been acquired at least once.
1030	 */
1031	prnt("\nSpin mutexes:\n");
1032	level = 0;
1033	while (level < sizeof(spin_order_list) / sizeof(char *)) {
1034		found = 0;
1035		for (w = w_all; w; w = w->w_next) {
1036			if (w->w_file == NULL || !w->w_spin)
1037				continue;
1038			if (w->w_level == 1 << level) {
1039				witness_displaydescendants(prnt, w);
1040				level++;
1041				found = 1;
1042			}
1043		}
1044		if (found == 0)
1045			level++;
1046	}
1047
1048	/*
1049	 * Finally, any mutexes which have not been acquired yet.
1050	 */
1051	prnt("\nMutexes which were never acquired:\n");
1052	for (w = w_all; w; w = w->w_next) {
1053		if (w->w_file != NULL)
1054			continue;
1055		prnt("%s\n", w->w_description);
1056	}
1057}
1058
1059void
1060witness_enter(struct mtx *m, int flags, const char *file, int line)
1061{
1062	struct witness *w, *w1;
1063	struct mtx *m1;
1064	struct proc *p;
1065	int i;
1066#ifdef DDB
1067	int go_into_ddb = 0;
1068#endif /* DDB */
1069
1070	if (witness_cold || m->mtx_witness == NULL || panicstr)
1071		return;
1072	w = m->mtx_witness;
1073	p = curproc;
1074
1075	if (flags & MTX_SPIN) {
1076		if ((m->mtx_flags & MTX_SPIN) == 0)
1077			panic("mutex_enter: MTX_SPIN on MTX_DEF mutex %s @"
1078			    " %s:%d", m->mtx_description, file, line);
1079		if (mtx_recursed(m)) {
1080			if ((m->mtx_flags & MTX_RECURSE) == 0)
1081				panic("mutex_enter: recursion on non-recursive"
1082				    " mutex %s @ %s:%d", m->mtx_description,
1083				    file, line);
1084			return;
1085		}
1086		mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1087		i = PCPU_GET(witness_spin_check);
1088		if (i != 0 && w->w_level < i) {
1089			mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1090			panic("mutex_enter(%s:%x, MTX_SPIN) out of order @"
1091			    " %s:%d already holding %s:%x",
1092			    m->mtx_description, w->w_level, file, line,
1093			    spin_order_list[ffs(i)-1], i);
1094		}
1095		PCPU_SET(witness_spin_check, i | w->w_level);
1096		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1097		p->p_spinlocks++;
1098		MPASS(p->p_spinlocks > 0);
1099		w->w_file = file;
1100		w->w_line = line;
1101		m->mtx_line = line;
1102		m->mtx_file = file;
1103		return;
1104	}
1105	if ((m->mtx_flags & MTX_SPIN) != 0)
1106		panic("mutex_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1107		    m->mtx_description, file, line);
1108
1109	if (mtx_recursed(m)) {
1110		if ((m->mtx_flags & MTX_RECURSE) == 0)
1111			panic("mutex_enter: recursion on non-recursive"
1112			    " mutex %s @ %s:%d", m->mtx_description,
1113			    file, line);
1114		return;
1115	}
1116	if (witness_dead)
1117		goto out;
1118	if (cold)
1119		goto out;
1120
1121	if (p->p_spinlocks != 0)
1122		panic("blockable mtx_lock() of %s when not legal @ %s:%d",
1123			    m->mtx_description, file, line);
1124	/*
1125	 * Is this the first mutex acquired
1126	 */
1127	if ((m1 = LIST_FIRST(&p->p_heldmtx)) == NULL)
1128		goto out;
1129
1130	if ((w1 = m1->mtx_witness) == w) {
1131		if (w->w_same_squawked || dup_ok(w))
1132			goto out;
1133		w->w_same_squawked = 1;
1134		printf("acquring duplicate lock of same type: \"%s\"\n",
1135			m->mtx_description);
1136		printf(" 1st @ %s:%d\n", w->w_file, w->w_line);
1137		printf(" 2nd @ %s:%d\n", file, line);
1138#ifdef DDB
1139		go_into_ddb = 1;
1140#endif /* DDB */
1141		goto out;
1142	}
1143	MPASS(!mtx_owned(&w_mtx));
1144	mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1145	/*
1146	 * If we have a known higher number just say ok
1147	 */
1148	if (witness_watch > 1 && w->w_level > w1->w_level) {
1149		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1150		goto out;
1151	}
1152	if (isitmydescendant(m1->mtx_witness, w)) {
1153		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1154		goto out;
1155	}
1156	for (i = 0; m1 != NULL; m1 = LIST_NEXT(m1, mtx_held), i++) {
1157
1158		MPASS(i < 200);
1159		w1 = m1->mtx_witness;
1160		if (isitmydescendant(w, w1)) {
1161			mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1162			if (blessed(w, w1))
1163				goto out;
1164			if (m1 == &Giant) {
1165				if (w1->w_Giant_squawked)
1166					goto out;
1167				else
1168					w1->w_Giant_squawked = 1;
1169			} else {
1170				if (w1->w_other_squawked)
1171					goto out;
1172				else
1173					w1->w_other_squawked = 1;
1174			}
1175			printf("lock order reversal\n");
1176			printf(" 1st %s last acquired @ %s:%d\n",
1177			    w->w_description, w->w_file, w->w_line);
1178			printf(" 2nd %p %s @ %s:%d\n",
1179			    m1, w1->w_description, w1->w_file, w1->w_line);
1180			printf(" 3rd %p %s @ %s:%d\n",
1181			    m, w->w_description, file, line);
1182#ifdef DDB
1183			go_into_ddb = 1;
1184#endif /* DDB */
1185			goto out;
1186		}
1187	}
1188	m1 = LIST_FIRST(&p->p_heldmtx);
1189	if (!itismychild(m1->mtx_witness, w))
1190		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1191
1192out:
1193#ifdef DDB
1194	if (witness_ddb && go_into_ddb)
1195		Debugger("witness_enter");
1196#endif /* DDB */
1197	w->w_file = file;
1198	w->w_line = line;
1199	m->mtx_line = line;
1200	m->mtx_file = file;
1201
1202	/*
1203	 * If this pays off it likely means that a mutex being witnessed
1204	 * is acquired in hardclock. Put it in the ignore list. It is
1205	 * likely not the mutex this assert fails on.
1206	 */
1207	MPASS(m->mtx_held.le_prev == NULL);
1208	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
1209}
1210
1211void
1212witness_try_enter(struct mtx *m, int flags, const char *file, int line)
1213{
1214	struct proc *p;
1215	struct witness *w = m->mtx_witness;
1216
1217	if (witness_cold)
1218		return;
1219	if (panicstr)
1220		return;
1221	if (flags & MTX_SPIN) {
1222		if ((m->mtx_flags & MTX_SPIN) == 0)
1223			panic("mutex_try_enter: "
1224			    "MTX_SPIN on MTX_DEF mutex %s @ %s:%d",
1225			    m->mtx_description, file, line);
1226		if (mtx_recursed(m)) {
1227			if ((m->mtx_flags & MTX_RECURSE) == 0)
1228				panic("mutex_try_enter: recursion on"
1229				    " non-recursive mutex %s @ %s:%d",
1230				    m->mtx_description, file, line);
1231			return;
1232		}
1233		mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1234		PCPU_SET(witness_spin_check,
1235		    PCPU_GET(witness_spin_check) | w->w_level);
1236		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1237		w->w_file = file;
1238		w->w_line = line;
1239		m->mtx_line = line;
1240		m->mtx_file = file;
1241		return;
1242	}
1243
1244	if ((m->mtx_flags & MTX_SPIN) != 0)
1245		panic("mutex_try_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1246		    m->mtx_description, file, line);
1247
1248	if (mtx_recursed(m)) {
1249		if ((m->mtx_flags & MTX_RECURSE) == 0)
1250			panic("mutex_try_enter: recursion on non-recursive"
1251			    " mutex %s @ %s:%d", m->mtx_description, file,
1252			    line);
1253		return;
1254	}
1255	w->w_file = file;
1256	w->w_line = line;
1257	m->mtx_line = line;
1258	m->mtx_file = file;
1259	p = curproc;
1260	MPASS(m->mtx_held.le_prev == NULL);
1261	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
1262}
1263
1264void
1265witness_exit(struct mtx *m, int flags, const char *file, int line)
1266{
1267	struct witness *w;
1268	struct proc *p;
1269
1270	if (witness_cold || m->mtx_witness == NULL || panicstr)
1271		return;
1272	w = m->mtx_witness;
1273	p = curproc;
1274
1275	if (flags & MTX_SPIN) {
1276		if ((m->mtx_flags & MTX_SPIN) == 0)
1277			panic("mutex_exit: MTX_SPIN on MTX_DEF mutex %s @"
1278			    " %s:%d", m->mtx_description, file, line);
1279		if (mtx_recursed(m)) {
1280			if ((m->mtx_flags & MTX_RECURSE) == 0)
1281				panic("mutex_exit: recursion on non-recursive"
1282				    " mutex %s @ %s:%d", m->mtx_description,
1283				    file, line);
1284			return;
1285		}
1286		mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1287		PCPU_SET(witness_spin_check,
1288		    PCPU_GET(witness_spin_check) & ~w->w_level);
1289		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1290		MPASS(p->p_spinlocks > 0);
1291		p->p_spinlocks--;
1292		return;
1293	}
1294	if ((m->mtx_flags & MTX_SPIN) != 0)
1295		panic("mutex_exit: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1296		    m->mtx_description, file, line);
1297
1298	if (mtx_recursed(m)) {
1299		if ((m->mtx_flags & MTX_RECURSE) == 0)
1300			panic("mutex_exit: recursion on non-recursive"
1301			    " mutex %s @ %s:%d", m->mtx_description,
1302			    file, line);
1303		return;
1304	}
1305
1306	if ((flags & MTX_NOSWITCH) == 0 && p->p_spinlocks != 0 && !cold)
1307		panic("switchable mtx_unlock() of %s when not legal @ %s:%d",
1308			    m->mtx_description, file, line);
1309	LIST_REMOVE(m, mtx_held);
1310	m->mtx_held.le_prev = NULL;
1311}
1312
1313int
1314witness_sleep(int check_only, struct mtx *mtx, const char *file, int line)
1315{
1316	struct mtx *m;
1317	struct proc *p;
1318	char **sleep;
1319	int n = 0;
1320
1321	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1322	p = curproc;
1323	LIST_FOREACH(m, &p->p_heldmtx, mtx_held) {
1324		if (m == mtx)
1325			continue;
1326		for (sleep = sleep_list; *sleep!= NULL; sleep++)
1327			if (strcmp(m->mtx_description, *sleep) == 0)
1328				goto next;
1329		if (n == 0)
1330			printf("Whee!\n");
1331		printf("%s:%d: %s with \"%s\" locked from %s:%d\n",
1332			file, line, check_only ? "could sleep" : "sleeping",
1333			m->mtx_description,
1334			m->mtx_witness->w_file, m->mtx_witness->w_line);
1335		n++;
1336	next:
1337	}
1338#ifdef DDB
1339	if (witness_ddb && n)
1340		Debugger("witness_sleep");
1341#endif /* DDB */
1342	return (n);
1343}
1344
1345static struct witness *
1346enroll(const char *description, int flag)
1347{
1348	int i;
1349	struct witness *w, *w1;
1350	char **ignore;
1351	char **order;
1352
1353	if (!witness_watch)
1354		return (NULL);
1355	for (ignore = ignore_list; *ignore != NULL; ignore++)
1356		if (strcmp(description, *ignore) == 0)
1357			return (NULL);
1358
1359	if (w_inited == 0) {
1360		mtx_init(&w_mtx, "witness lock", MTX_SPIN);
1361		for (i = 0; i < WITNESS_COUNT; i++) {
1362			w = &w_data[i];
1363			witness_free(w);
1364		}
1365		w_inited = 1;
1366		for (order = order_list; *order != NULL; order++) {
1367			w = enroll(*order, MTX_DEF);
1368			w->w_file = "order list";
1369			for (order++; *order != NULL; order++) {
1370				w1 = enroll(*order, MTX_DEF);
1371				w1->w_file = "order list";
1372				itismychild(w, w1);
1373				w = w1;
1374    	    	    	}
1375		}
1376	}
1377	if ((flag & MTX_SPIN) && witness_skipspin)
1378		return (NULL);
1379	mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1380	for (w = w_all; w; w = w->w_next) {
1381		if (strcmp(description, w->w_description) == 0) {
1382			mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1383			return (w);
1384		}
1385	}
1386	if ((w = witness_get()) == NULL)
1387		return (NULL);
1388	w->w_next = w_all;
1389	w_all = w;
1390	w->w_description = description;
1391	mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1392	if (flag & MTX_SPIN) {
1393		w->w_spin = 1;
1394
1395		i = 1;
1396		for (order = spin_order_list; *order != NULL; order++) {
1397			if (strcmp(description, *order) == 0)
1398				break;
1399			i <<= 1;
1400		}
1401		if (*order == NULL)
1402			panic("spin lock %s not in order list", description);
1403		w->w_level = i;
1404	}
1405
1406	return (w);
1407}
1408
1409static int
1410itismychild(struct witness *parent, struct witness *child)
1411{
1412	static int recursed;
1413
1414	/*
1415	 * Insert "child" after "parent"
1416	 */
1417	while (parent->w_morechildren)
1418		parent = parent->w_morechildren;
1419
1420	if (parent->w_childcnt == WITNESS_NCHILDREN) {
1421		if ((parent->w_morechildren = witness_get()) == NULL)
1422			return (1);
1423		parent = parent->w_morechildren;
1424	}
1425	MPASS(child != NULL);
1426	parent->w_children[parent->w_childcnt++] = child;
1427	/*
1428	 * now prune whole tree
1429	 */
1430	if (recursed)
1431		return (0);
1432	recursed = 1;
1433	for (child = w_all; child != NULL; child = child->w_next) {
1434		for (parent = w_all; parent != NULL;
1435		    parent = parent->w_next) {
1436			if (!isitmychild(parent, child))
1437				continue;
1438			removechild(parent, child);
1439			if (isitmydescendant(parent, child))
1440				continue;
1441			itismychild(parent, child);
1442		}
1443	}
1444	recursed = 0;
1445	witness_levelall();
1446	return (0);
1447}
1448
1449static void
1450removechild(struct witness *parent, struct witness *child)
1451{
1452	struct witness *w, *w1;
1453	int i;
1454
1455	for (w = parent; w != NULL; w = w->w_morechildren)
1456		for (i = 0; i < w->w_childcnt; i++)
1457			if (w->w_children[i] == child)
1458				goto found;
1459	return;
1460found:
1461	for (w1 = w; w1->w_morechildren != NULL; w1 = w1->w_morechildren)
1462		continue;
1463	w->w_children[i] = w1->w_children[--w1->w_childcnt];
1464	MPASS(w->w_children[i] != NULL);
1465
1466	if (w1->w_childcnt != 0)
1467		return;
1468
1469	if (w1 == parent)
1470		return;
1471	for (w = parent; w->w_morechildren != w1; w = w->w_morechildren)
1472		continue;
1473	w->w_morechildren = 0;
1474	witness_free(w1);
1475}
1476
1477static int
1478isitmychild(struct witness *parent, struct witness *child)
1479{
1480	struct witness *w;
1481	int i;
1482
1483	for (w = parent; w != NULL; w = w->w_morechildren) {
1484		for (i = 0; i < w->w_childcnt; i++) {
1485			if (w->w_children[i] == child)
1486				return (1);
1487		}
1488	}
1489	return (0);
1490}
1491
1492static int
1493isitmydescendant(struct witness *parent, struct witness *child)
1494{
1495	struct witness *w;
1496	int i;
1497	int j;
1498
1499	for (j = 0, w = parent; w != NULL; w = w->w_morechildren, j++) {
1500		MPASS(j < 1000);
1501		for (i = 0; i < w->w_childcnt; i++) {
1502			if (w->w_children[i] == child)
1503				return (1);
1504		}
1505		for (i = 0; i < w->w_childcnt; i++) {
1506			if (isitmydescendant(w->w_children[i], child))
1507				return (1);
1508		}
1509	}
1510	return (0);
1511}
1512
1513void
1514witness_levelall (void)
1515{
1516	struct witness *w, *w1;
1517
1518	for (w = w_all; w; w = w->w_next)
1519		if (!(w->w_spin))
1520			w->w_level = 0;
1521	for (w = w_all; w; w = w->w_next) {
1522		if (w->w_spin)
1523			continue;
1524		for (w1 = w_all; w1; w1 = w1->w_next) {
1525			if (isitmychild(w1, w))
1526				break;
1527		}
1528		if (w1 != NULL)
1529			continue;
1530		witness_leveldescendents(w, 0);
1531	}
1532}
1533
1534static void
1535witness_leveldescendents(struct witness *parent, int level)
1536{
1537	int i;
1538	struct witness *w;
1539
1540	if (parent->w_level < level)
1541		parent->w_level = level;
1542	level++;
1543	for (w = parent; w != NULL; w = w->w_morechildren)
1544		for (i = 0; i < w->w_childcnt; i++)
1545			witness_leveldescendents(w->w_children[i], level);
1546}
1547
1548static void
1549witness_displaydescendants(void(*prnt)(const char *fmt, ...),
1550			   struct witness *parent)
1551{
1552	struct witness *w;
1553	int i;
1554	int level;
1555
1556	level = parent->w_spin ? ffs(parent->w_level) : parent->w_level;
1557
1558	prnt("%d", level);
1559	if (level < 10)
1560		prnt(" ");
1561	for (i = 0; i < level; i++)
1562		prnt(" ");
1563	prnt("%s", parent->w_description);
1564	if (parent->w_file != NULL)
1565		prnt(" -- last acquired @ %s:%d\n", parent->w_file,
1566		    parent->w_line);
1567
1568	for (w = parent; w != NULL; w = w->w_morechildren)
1569		for (i = 0; i < w->w_childcnt; i++)
1570			    witness_displaydescendants(prnt, w->w_children[i]);
1571    }
1572
1573static int
1574dup_ok(struct witness *w)
1575{
1576	char **dup;
1577
1578	for (dup = dup_list; *dup!= NULL; dup++)
1579		if (strcmp(w->w_description, *dup) == 0)
1580			return (1);
1581	return (0);
1582}
1583
1584static int
1585blessed(struct witness *w1, struct witness *w2)
1586{
1587	int i;
1588	struct witness_blessed *b;
1589
1590	for (i = 0; i < blessed_count; i++) {
1591		b = &blessed_list[i];
1592		if (strcmp(w1->w_description, b->b_lock1) == 0) {
1593			if (strcmp(w2->w_description, b->b_lock2) == 0)
1594				return (1);
1595			continue;
1596		}
1597		if (strcmp(w1->w_description, b->b_lock2) == 0)
1598			if (strcmp(w2->w_description, b->b_lock1) == 0)
1599				return (1);
1600	}
1601	return (0);
1602}
1603
1604static struct witness *
1605witness_get()
1606{
1607	struct witness *w;
1608
1609	if ((w = w_free) == NULL) {
1610		witness_dead = 1;
1611		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1612		printf("witness exhausted\n");
1613		return (NULL);
1614	}
1615	w_free = w->w_next;
1616	bzero(w, sizeof(*w));
1617	return (w);
1618}
1619
1620static void
1621witness_free(struct witness *w)
1622{
1623	w->w_next = w_free;
1624	w_free = w;
1625}
1626
1627int
1628witness_list(struct proc *p)
1629{
1630	struct mtx *m;
1631	int nheld;
1632
1633	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1634	nheld = 0;
1635	LIST_FOREACH(m, &p->p_heldmtx, mtx_held) {
1636		printf("\t\"%s\" (%p) locked at %s:%d\n",
1637		    m->mtx_description, m,
1638		    m->mtx_witness->w_file, m->mtx_witness->w_line);
1639		nheld++;
1640	}
1641
1642	return (nheld);
1643}
1644
1645#ifdef DDB
1646
1647DB_SHOW_COMMAND(mutexes, db_witness_list)
1648{
1649
1650	witness_list(curproc);
1651}
1652
1653DB_SHOW_COMMAND(witness, db_witness_display)
1654{
1655
1656	witness_display(db_printf);
1657}
1658#endif
1659
1660void
1661witness_save(struct mtx *m, const char **filep, int *linep)
1662{
1663
1664	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1665	if (m->mtx_witness == NULL)
1666		return;
1667
1668	*filep = m->mtx_witness->w_file;
1669	*linep = m->mtx_witness->w_line;
1670}
1671
1672void
1673witness_restore(struct mtx *m, const char *file, int line)
1674{
1675
1676	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1677	if (m->mtx_witness == NULL)
1678		return;
1679
1680	m->mtx_witness->w_file = file;
1681	m->mtx_witness->w_line = line;
1682}
1683
1684#endif	/* WITNESS */
1685