kern_mutex.c revision 104157
1100966Siwasaki/*-
2100966Siwasaki * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
3100966Siwasaki *
4167802Sjkim * Redistribution and use in source and binary forms, with or without
5100966Siwasaki * modification, are permitted provided that the following conditions
6100966Siwasaki * are met:
7100966Siwasaki * 1. Redistributions of source code must retain the above copyright
8100966Siwasaki *    notice, this list of conditions and the following disclaimer.
9100966Siwasaki * 2. Redistributions in binary form must reproduce the above copyright
10100966Siwasaki *    notice, this list of conditions and the following disclaimer in the
11100966Siwasaki *    documentation and/or other materials provided with the distribution.
12167802Sjkim * 3. Berkeley Software Design Inc's name may not be used to endorse or
13100966Siwasaki *    promote products derived from this software without specific prior
14100966Siwasaki *    written permission.
15100966Siwasaki *
16100966Siwasaki * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
17100966Siwasaki * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18100966Siwasaki * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19100966Siwasaki * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
20100966Siwasaki * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21100966Siwasaki * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22100966Siwasaki * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23100966Siwasaki * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24100966Siwasaki * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25100966Siwasaki * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26100966Siwasaki * SUCH DAMAGE.
27100966Siwasaki *
28100966Siwasaki *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
29100966Siwasaki *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
30100966Siwasaki * $FreeBSD: head/sys/kern/kern_mutex.c 104157 2002-09-29 23:04:34Z julian $
31100966Siwasaki */
32100966Siwasaki
33100966Siwasaki/*
34100966Siwasaki * Machine independent bits of mutex implementation.
35100966Siwasaki */
36100966Siwasaki
37100966Siwasaki#include "opt_adaptive_mutexes.h"
38100966Siwasaki#include "opt_ddb.h"
39100966Siwasaki
40100966Siwasaki#include <sys/param.h>
41100966Siwasaki#include <sys/systm.h>
42100966Siwasaki#include <sys/bus.h>
43100966Siwasaki#include <sys/kernel.h>
44100966Siwasaki#include <sys/ktr.h>
45100966Siwasaki#include <sys/lock.h>
46100966Siwasaki#include <sys/malloc.h>
47100966Siwasaki#include <sys/mutex.h>
48100966Siwasaki#include <sys/proc.h>
49100966Siwasaki#include <sys/resourcevar.h>
50100966Siwasaki#include <sys/sbuf.h>
51100966Siwasaki#include <sys/stdint.h>
52100966Siwasaki#include <sys/sysctl.h>
53100966Siwasaki#include <sys/vmmeter.h>
54100966Siwasaki
55100966Siwasaki#include <machine/atomic.h>
56100966Siwasaki#include <machine/bus.h>
57100966Siwasaki#include <machine/clock.h>
58100966Siwasaki#include <machine/cpu.h>
59100966Siwasaki
60100966Siwasaki#include <ddb/ddb.h>
61100966Siwasaki
62100966Siwasaki#include <vm/vm.h>
63100966Siwasaki#include <vm/vm_extern.h>
64100966Siwasaki
65100966Siwasaki/*
66100966Siwasaki * Internal utility macros.
67100966Siwasaki */
68100966Siwasaki#define mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
69100966Siwasaki
70100966Siwasaki#define mtx_owner(m)	(mtx_unowned((m)) ? NULL \
71100966Siwasaki	: (struct thread *)((m)->mtx_lock & MTX_FLAGMASK))
72100966Siwasaki
73100966Siwasaki/* XXXKSE This test will change. */
74100966Siwasaki#define	thread_running(td)						\
75100966Siwasaki	((td)->td_kse != NULL && (td)->td_kse->ke_oncpu != NOCPU)
76100966Siwasaki
77100966Siwasaki/*
78100966Siwasaki * Lock classes for sleep and spin mutexes.
79100966Siwasaki */
80100966Siwasakistruct lock_class lock_class_mtx_sleep = {
81100966Siwasaki	"sleep mutex",
82100966Siwasaki	LC_SLEEPLOCK | LC_RECURSABLE
83100966Siwasaki};
84100966Siwasakistruct lock_class lock_class_mtx_spin = {
85100966Siwasaki	"spin mutex",
86100966Siwasaki	LC_SPINLOCK | LC_RECURSABLE
87100966Siwasaki};
88100966Siwasaki
89100966Siwasaki/*
90100966Siwasaki * System-wide mutexes
91100966Siwasaki */
92100966Siwasakistruct mtx sched_lock;
93100966Siwasakistruct mtx Giant;
94100966Siwasaki
95100966Siwasaki/*
96100966Siwasaki * Prototypes for non-exported routines.
97100966Siwasaki */
98100966Siwasakistatic void	propagate_priority(struct thread *);
99100966Siwasaki
100100966Siwasakistatic void
101100966Siwasakipropagate_priority(struct thread *td)
102100966Siwasaki{
103100966Siwasaki	int pri = td->td_priority;
104100966Siwasaki	struct mtx *m = td->td_blocked;
105100966Siwasaki
106100966Siwasaki	mtx_assert(&sched_lock, MA_OWNED);
107100966Siwasaki	for (;;) {
108100966Siwasaki		struct thread *td1;
109100966Siwasaki
110100966Siwasaki		td = mtx_owner(m);
111100966Siwasaki
112100966Siwasaki		if (td == NULL) {
113100966Siwasaki			/*
114100966Siwasaki			 * This really isn't quite right. Really
115100966Siwasaki			 * ought to bump priority of thread that
116100966Siwasaki			 * next acquires the mutex.
117100966Siwasaki			 */
118151600Sobrien			MPASS(m->mtx_lock == MTX_CONTESTED);
119151600Sobrien			return;
120151600Sobrien		}
121151600Sobrien
122151600Sobrien		MPASS(td->td_proc != NULL);
123100966Siwasaki		MPASS(td->td_proc->p_magic == P_MAGIC);
124100966Siwasaki		KASSERT(!TD_IS_SLEEPING(td), ("sleeping thread owns a mutex"));
125100966Siwasaki		if (td->td_priority <= pri) /* lower is higher priority */
126100966Siwasaki			return;
127102550Siwasaki
128100966Siwasaki
129100966Siwasaki		/*
130100966Siwasaki		 * If lock holder is actually running, just bump priority.
131151937Sjkim		 */
132100966Siwasaki		if (TD_IS_RUNNING(td)) {
133151937Sjkim			td->td_priority = pri;
134100966Siwasaki			return;
135151937Sjkim		}
136151937Sjkim
137151937Sjkim#ifndef SMP
138151937Sjkim		/*
139151937Sjkim		 * For UP, we check to see if td is curthread (this shouldn't
140151937Sjkim		 * ever happen however as it would mean we are in a deadlock.)
141151937Sjkim		 */
142151937Sjkim		KASSERT(td != curthread, ("Deadlock detected"));
143151937Sjkim#endif
144151937Sjkim
145151937Sjkim		/*
146151937Sjkim		 * If on run queue move to new run queue, and quit.
147151937Sjkim		 * XXXKSE this gets a lot more complicated under threads
148151937Sjkim		 * but try anyhow.
149151937Sjkim		 * We should have a special call to do this more efficiently.
150151937Sjkim		 */
151151937Sjkim		if (TD_ON_RUNQ(td)) {
152100966Siwasaki			MPASS(td->td_blocked == NULL);
153100966Siwasaki			remrunqueue(td);
154100966Siwasaki			td->td_priority = pri;
155100966Siwasaki			setrunqueue(td);
156151937Sjkim			return;
157151937Sjkim		}
158100966Siwasaki		/*
159100966Siwasaki		 * Adjust for any other cases.
160100966Siwasaki		 */
161100966Siwasaki		td->td_priority = pri;
162100966Siwasaki
163100966Siwasaki		/*
164100966Siwasaki		 * If we aren't blocked on a mutex, we should be.
165100966Siwasaki		 */
166100966Siwasaki		KASSERT(TD_ON_MUTEX(td), (
167100966Siwasaki		    "process %d(%s):%d holds %s but isn't blocked on a mutex\n",
168100966Siwasaki		    td->td_proc->p_pid, td->td_proc->p_comm, td->td_state,
169100966Siwasaki		    m->mtx_object.lo_name));
170100966Siwasaki
171100966Siwasaki		/*
172100966Siwasaki		 * Pick up the mutex that td is blocked on.
173100966Siwasaki		 */
174100966Siwasaki		m = td->td_blocked;
175100966Siwasaki		MPASS(m != NULL);
176100966Siwasaki
177100966Siwasaki		/*
178100966Siwasaki		 * Check if the thread needs to be moved up on
179100966Siwasaki		 * the blocked chain
180100966Siwasaki		 */
181100966Siwasaki		if (td == TAILQ_FIRST(&m->mtx_blocked)) {
182167802Sjkim			continue;
183100966Siwasaki		}
184167802Sjkim
185151937Sjkim		td1 = TAILQ_PREV(td, threadqueue, td_blkq);
186100966Siwasaki		if (td1->td_priority <= pri) {
187100966Siwasaki			continue;
188100966Siwasaki		}
189100966Siwasaki
190100966Siwasaki		/*
191100966Siwasaki		 * Remove thread from blocked chain and determine where
192100966Siwasaki		 * it should be moved up to.  Since we know that td1 has
193100966Siwasaki		 * a lower priority than td, we know that at least one
194100966Siwasaki		 * thread in the chain has a lower priority and that
195151937Sjkim		 * td1 will thus not be NULL after the loop.
196151937Sjkim		 */
197100966Siwasaki		TAILQ_REMOVE(&m->mtx_blocked, td, td_blkq);
198100966Siwasaki		TAILQ_FOREACH(td1, &m->mtx_blocked, td_blkq) {
199100966Siwasaki			MPASS(td1->td_proc->p_magic == P_MAGIC);
200100966Siwasaki			if (td1->td_priority > pri)
201100966Siwasaki				break;
202100966Siwasaki		}
203100966Siwasaki
204100966Siwasaki		MPASS(td1 != NULL);
205100966Siwasaki		TAILQ_INSERT_BEFORE(td1, td, td_blkq);
206167802Sjkim		CTR4(KTR_LOCK,
207100966Siwasaki		    "propagate_priority: p %p moved before %p on [%p] %s",
208100966Siwasaki		    td, td1, m, m->mtx_object.lo_name);
209100966Siwasaki	}
210100966Siwasaki}
211100966Siwasaki
212100966Siwasaki#ifdef MUTEX_PROFILING
213100966SiwasakiSYSCTL_NODE(_debug, OID_AUTO, mutex, CTLFLAG_RD, NULL, "mutex debugging");
214100966SiwasakiSYSCTL_NODE(_debug_mutex, OID_AUTO, prof, CTLFLAG_RD, NULL, "mutex profiling");
215100966Siwasakistatic int mutex_prof_enable = 0;
216100966SiwasakiSYSCTL_INT(_debug_mutex_prof, OID_AUTO, enable, CTLFLAG_RW,
217100966Siwasaki    &mutex_prof_enable, 0, "Enable tracing of mutex holdtime");
218100966Siwasaki
219100966Siwasakistruct mutex_prof {
220100966Siwasaki	const char *name;
221100966Siwasaki	const char *file;
222100966Siwasaki	int line;
223100966Siwasaki#define MPROF_MAX 0
224100966Siwasaki#define MPROF_TOT 1
225100966Siwasaki#define MPROF_CNT 2
226100966Siwasaki#define MPROF_AVG 3
227167802Sjkim	uintmax_t counter[4];
228100966Siwasaki	struct mutex_prof *next;
229167802Sjkim};
230167802Sjkim
231167802Sjkim/*
232167802Sjkim * mprof_buf is a static pool of profiling records to avoid possible
233167802Sjkim * reentrance of the memory allocation functions.
234100966Siwasaki *
235100966Siwasaki * Note: NUM_MPROF_BUFFERS must be smaller than MPROF_HASH_SIZE.
236100966Siwasaki */
237100966Siwasaki#define NUM_MPROF_BUFFERS 1000
238117521Snjlstatic struct mutex_prof mprof_buf[NUM_MPROF_BUFFERS];
239117521Snjlstatic int first_free_mprof_buf;
240100966Siwasaki#define MPROF_HASH_SIZE 1009
241100966Siwasakistatic struct mutex_prof *mprof_hash[MPROF_HASH_SIZE];
242100966Siwasaki
243100966Siwasakistatic int mutex_prof_acquisitions;
244100966SiwasakiSYSCTL_INT(_debug_mutex_prof, OID_AUTO, acquisitions, CTLFLAG_RD,
245100966Siwasaki    &mutex_prof_acquisitions, 0, "Number of mutex acquistions recorded");
246100966Siwasakistatic int mutex_prof_records;
247100966SiwasakiSYSCTL_INT(_debug_mutex_prof, OID_AUTO, records, CTLFLAG_RD,
248100966Siwasaki    &mutex_prof_records, 0, "Number of profiling records");
249100966Siwasakistatic int mutex_prof_maxrecords = NUM_MPROF_BUFFERS;
250100966SiwasakiSYSCTL_INT(_debug_mutex_prof, OID_AUTO, maxrecords, CTLFLAG_RD,
251100966Siwasaki    &mutex_prof_maxrecords, 0, "Maximum number of profiling records");
252100966Siwasakistatic int mutex_prof_rejected;
253100966SiwasakiSYSCTL_INT(_debug_mutex_prof, OID_AUTO, rejected, CTLFLAG_RD,
254100966Siwasaki    &mutex_prof_rejected, 0, "Number of rejected profiling records");
255100966Siwasakistatic int mutex_prof_hashsize = MPROF_HASH_SIZE;
256100966SiwasakiSYSCTL_INT(_debug_mutex_prof, OID_AUTO, hashsize, CTLFLAG_RD,
257100966Siwasaki    &mutex_prof_hashsize, 0, "Hash size");
258100966Siwasakistatic int mutex_prof_collisions = 0;
259100966SiwasakiSYSCTL_INT(_debug_mutex_prof, OID_AUTO, collisions, CTLFLAG_RD,
260100966Siwasaki    &mutex_prof_collisions, 0, "Number of hash collisions");
261100966Siwasaki
262100966Siwasaki/*
263100966Siwasaki * mprof_mtx protects the profiling buffers and the hash.
264100966Siwasaki */
265100966Siwasakistatic struct mtx mprof_mtx;
266100966SiwasakiMTX_SYSINIT(mprof, &mprof_mtx, "mutex profiling lock", MTX_SPIN | MTX_QUIET);
267100966Siwasaki
268100966Siwasakistatic u_int64_t
269100966Siwasakinanoseconds(void)
270100966Siwasaki{
271100966Siwasaki	struct timespec tv;
272100966Siwasaki
273100966Siwasaki	nanotime(&tv);
274100966Siwasaki	return (tv.tv_sec * (u_int64_t)1000000000 + tv.tv_nsec);
275100966Siwasaki}
276100966Siwasaki
277100966Siwasakistatic int
278100966Siwasakidump_mutex_prof_stats(SYSCTL_HANDLER_ARGS)
279100966Siwasaki{
280100966Siwasaki	struct sbuf *sb;
281100966Siwasaki	int error, i;
282100966Siwasaki
283100966Siwasaki	if (first_free_mprof_buf == 0)
284100966Siwasaki		return SYSCTL_OUT(req, "No locking recorded",
285100966Siwasaki		    sizeof("No locking recorded"));
286100966Siwasaki
287100966Siwasaki	sb = sbuf_new(NULL, NULL, 1024, SBUF_AUTOEXTEND);
288100966Siwasaki	sbuf_printf(sb, "%12s %12s %12s %12s %s\n",
289100966Siwasaki	    "max", "total", "count", "average", "name");
290100966Siwasaki	mtx_lock_spin(&mprof_mtx);
291100966Siwasaki	for (i = 0; i < first_free_mprof_buf; ++i)
292100966Siwasaki		sbuf_printf(sb, "%12ju %12ju %12ju %12ju %s:%d (%s)\n",
293100966Siwasaki		    mprof_buf[i].counter[MPROF_MAX] / 1000,
294100966Siwasaki		    mprof_buf[i].counter[MPROF_TOT] / 1000,
295100966Siwasaki		    mprof_buf[i].counter[MPROF_CNT],
296100966Siwasaki		    mprof_buf[i].counter[MPROF_AVG] / 1000,
297100966Siwasaki		    mprof_buf[i].file, mprof_buf[i].line, mprof_buf[i].name);
298100966Siwasaki	mtx_unlock_spin(&mprof_mtx);
299100966Siwasaki	sbuf_finish(sb);
300100966Siwasaki	error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
301100966Siwasaki	sbuf_delete(sb);
302100966Siwasaki	return (error);
303100966Siwasaki}
304100966SiwasakiSYSCTL_PROC(_debug_mutex_prof, OID_AUTO, stats, CTLTYPE_STRING|CTLFLAG_RD,
305100966Siwasaki    NULL, 0, dump_mutex_prof_stats, "A", "Mutex profiling statistics");
306100966Siwasaki#endif
307100966Siwasaki
308100966Siwasaki/*
309151937Sjkim * Function versions of the inlined __mtx_* macros.  These are used by
310100966Siwasaki * modules and can also be called from assembly language if needed.
311100966Siwasaki */
312100966Siwasakivoid
313100966Siwasaki_mtx_lock_flags(struct mtx *m, int opts, const char *file, int line)
314100966Siwasaki{
315151937Sjkim
316100966Siwasaki	MPASS(curthread != NULL);
317100966Siwasaki	KASSERT(m->mtx_object.lo_class == &lock_class_mtx_sleep,
318100966Siwasaki	    ("mtx_lock() of spin mutex %s @ %s:%d", m->mtx_object.lo_name,
319100966Siwasaki	    file, line));
320100966Siwasaki	_get_sleep_lock(m, curthread, opts, file, line);
321100966Siwasaki	LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file,
322100966Siwasaki	    line);
323100966Siwasaki	WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
324100966Siwasaki#ifdef MUTEX_PROFILING
325100966Siwasaki	/* don't reset the timer when/if recursing */
326100966Siwasaki	if (m->mtx_acqtime == 0) {
327100966Siwasaki		m->mtx_filename = file;
328100966Siwasaki		m->mtx_lineno = line;
329100966Siwasaki		m->mtx_acqtime = mutex_prof_enable ? nanoseconds() : 0;
330100966Siwasaki		++mutex_prof_acquisitions;
331100966Siwasaki	}
332100966Siwasaki#endif
333100966Siwasaki}
334100966Siwasaki
335100966Siwasakivoid
336100966Siwasaki_mtx_unlock_flags(struct mtx *m, int opts, const char *file, int line)
337100966Siwasaki{
338100966Siwasaki
339100966Siwasaki	MPASS(curthread != NULL);
340100966Siwasaki	KASSERT(m->mtx_object.lo_class == &lock_class_mtx_sleep,
341100966Siwasaki	    ("mtx_unlock() of spin mutex %s @ %s:%d", m->mtx_object.lo_name,
342100966Siwasaki	    file, line));
343100966Siwasaki 	WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
344100966Siwasaki	LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file,
345100966Siwasaki	    line);
346100966Siwasaki	mtx_assert(m, MA_OWNED);
347100966Siwasaki#ifdef MUTEX_PROFILING
348100966Siwasaki	if (m->mtx_acqtime != 0) {
349100966Siwasaki		static const char *unknown = "(unknown)";
350100966Siwasaki		struct mutex_prof *mpp;
351100966Siwasaki		u_int64_t acqtime, now;
352100966Siwasaki		const char *p, *q;
353100966Siwasaki		volatile u_int hash;
354100966Siwasaki
355100966Siwasaki		now = nanoseconds();
356100966Siwasaki		acqtime = m->mtx_acqtime;
357100966Siwasaki		m->mtx_acqtime = 0;
358100966Siwasaki		if (now <= acqtime)
359100966Siwasaki			goto out;
360100966Siwasaki		for (p = m->mtx_filename; strncmp(p, "../", 3) == 0; p += 3)
361100966Siwasaki			/* nothing */ ;
362100966Siwasaki		if (p == NULL || *p == '\0')
363100966Siwasaki			p = unknown;
364100966Siwasaki		for (hash = m->mtx_lineno, q = p; *q != '\0'; ++q)
365100966Siwasaki			hash = (hash * 2 + *q) % MPROF_HASH_SIZE;
366100966Siwasaki		mtx_lock_spin(&mprof_mtx);
367100966Siwasaki		for (mpp = mprof_hash[hash]; mpp != NULL; mpp = mpp->next)
368100966Siwasaki			if (mpp->line == m->mtx_lineno &&
369100966Siwasaki			    strcmp(mpp->file, p) == 0)
370100966Siwasaki				break;
371100966Siwasaki		if (mpp == NULL) {
372100966Siwasaki			/* Just exit if we cannot get a trace buffer */
373100966Siwasaki			if (first_free_mprof_buf >= NUM_MPROF_BUFFERS) {
374100966Siwasaki				++mutex_prof_rejected;
375100966Siwasaki				goto unlock;
376100966Siwasaki			}
377100966Siwasaki			mpp = &mprof_buf[first_free_mprof_buf++];
378100966Siwasaki			mpp->name = mtx_name(m);
379100966Siwasaki			mpp->file = p;
380100966Siwasaki			mpp->line = m->mtx_lineno;
381100966Siwasaki			mpp->next = mprof_hash[hash];
382100966Siwasaki			if (mprof_hash[hash] != NULL)
383100966Siwasaki				++mutex_prof_collisions;
384151937Sjkim			mprof_hash[hash] = mpp;
385100966Siwasaki			++mutex_prof_records;
386100966Siwasaki		}
387100966Siwasaki		/*
388100966Siwasaki		 * Record if the mutex has been held longer now than ever
389100966Siwasaki		 * before
390100966Siwasaki		 */
391100966Siwasaki		if ((now - acqtime) > mpp->counter[MPROF_MAX])
392100966Siwasaki			mpp->counter[MPROF_MAX] = now - acqtime;
393100966Siwasaki		mpp->counter[MPROF_TOT] += now - acqtime;
394100966Siwasaki		mpp->counter[MPROF_CNT] += 1;
395100966Siwasaki		mpp->counter[MPROF_AVG] =
396100966Siwasaki		    mpp->counter[MPROF_TOT] / mpp->counter[MPROF_CNT];
397100966Siwasakiunlock:
398100966Siwasaki		mtx_unlock_spin(&mprof_mtx);
399100966Siwasaki	}
400100966Siwasakiout:
401100966Siwasaki#endif
402100966Siwasaki	_rel_sleep_lock(m, curthread, opts, file, line);
403100966Siwasaki}
404100966Siwasaki
405100966Siwasakivoid
406100966Siwasaki_mtx_lock_spin_flags(struct mtx *m, int opts, const char *file, int line)
407100966Siwasaki{
408100966Siwasaki
409100966Siwasaki	MPASS(curthread != NULL);
410100966Siwasaki	KASSERT(m->mtx_object.lo_class == &lock_class_mtx_spin,
411100966Siwasaki	    ("mtx_lock_spin() of sleep mutex %s @ %s:%d",
412100966Siwasaki	    m->mtx_object.lo_name, file, line));
413100966Siwasaki#if defined(SMP) || LOCK_DEBUG > 0 || 1
414100966Siwasaki	_get_spin_lock(m, curthread, opts, file, line);
415100966Siwasaki#else
416100966Siwasaki	critical_enter();
417100966Siwasaki#endif
418151937Sjkim	LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file,
419100966Siwasaki	    line);
420100966Siwasaki	WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
421100966Siwasaki}
422100966Siwasaki
423100966Siwasakivoid
424100966Siwasaki_mtx_unlock_spin_flags(struct mtx *m, int opts, const char *file, int line)
425100966Siwasaki{
426100966Siwasaki
427100966Siwasaki	MPASS(curthread != NULL);
428100966Siwasaki	KASSERT(m->mtx_object.lo_class == &lock_class_mtx_spin,
429100966Siwasaki	    ("mtx_unlock_spin() of sleep mutex %s @ %s:%d",
430100966Siwasaki	    m->mtx_object.lo_name, file, line));
431100966Siwasaki 	WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
432100966Siwasaki	LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file,
433100966Siwasaki	    line);
434100966Siwasaki	mtx_assert(m, MA_OWNED);
435100966Siwasaki#if defined(SMP) || LOCK_DEBUG > 0 || 1
436100966Siwasaki	_rel_spin_lock(m);
437100966Siwasaki#else
438100966Siwasaki	critical_exit();
439100966Siwasaki#endif
440100966Siwasaki}
441100966Siwasaki
442100966Siwasaki/*
443100966Siwasaki * The important part of mtx_trylock{,_flags}()
444100966Siwasaki * Tries to acquire lock `m.' We do NOT handle recursion here; we assume that
445100966Siwasaki * if we're called, it's because we know we don't already own this lock.
446100966Siwasaki */
447102550Siwasakiint
448100966Siwasaki_mtx_trylock(struct mtx *m, int opts, const char *file, int line)
449100966Siwasaki{
450100966Siwasaki	int rval;
451100966Siwasaki
452151937Sjkim	MPASS(curthread != NULL);
453100966Siwasaki
454100966Siwasaki	rval = _obtain_lock(m, curthread);
455100966Siwasaki
456100966Siwasaki	LOCK_LOG_TRY("LOCK", &m->mtx_object, opts, rval, file, line);
457100966Siwasaki	if (rval) {
458100966Siwasaki		/*
459100966Siwasaki		 * We do not handle recursion in _mtx_trylock; see the
460100966Siwasaki		 * note at the top of the routine.
461100966Siwasaki		 */
462128212Snjl		KASSERT(!mtx_recursed(m),
463100966Siwasaki		    ("mtx_trylock() called on a recursed mutex"));
464100966Siwasaki		WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK,
465100966Siwasaki		    file, line);
466100966Siwasaki	}
467100966Siwasaki
468100966Siwasaki	return (rval);
469100966Siwasaki}
470100966Siwasaki
471100966Siwasaki/*
472128212Snjl * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
473100966Siwasaki *
474100966Siwasaki * We call this if the lock is either contested (i.e. we need to go to
475100966Siwasaki * sleep waiting for it), or if we need to recurse on it.
476100966Siwasaki */
477100966Siwasakivoid
478151937Sjkim_mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line)
479151937Sjkim{
480151937Sjkim	struct thread *td = curthread;
481151937Sjkim#if defined(SMP) && defined(ADAPTIVE_MUTEXES)
482151937Sjkim	struct thread *owner;
483151937Sjkim#endif
484151937Sjkim#ifdef KTR
485100966Siwasaki	int cont_logged = 0;
486100966Siwasaki#endif
487100966Siwasaki
488128212Snjl	if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)td) {
489128212Snjl		m->mtx_recurse++;
490100966Siwasaki		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
491128212Snjl		if (LOCK_LOG_TEST(&m->mtx_object, opts))
492128212Snjl			CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
493128212Snjl		return;
494128212Snjl	}
495128212Snjl
496151937Sjkim	if (LOCK_LOG_TEST(&m->mtx_object, opts))
497151937Sjkim		CTR4(KTR_LOCK,
498151937Sjkim		    "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d",
499151937Sjkim		    m->mtx_object.lo_name, (void *)m->mtx_lock, file, line);
500128212Snjl
501128212Snjl	while (!_obtain_lock(m, td)) {
502167802Sjkim		uintptr_t v;
503151937Sjkim		struct thread *td1;
504128212Snjl
505167802Sjkim		mtx_lock_spin(&sched_lock);
506167802Sjkim		/*
507167802Sjkim		 * Check if the lock has been released while spinning for
508167802Sjkim		 * the sched_lock.
509167802Sjkim		 */
510167802Sjkim		if ((v = m->mtx_lock) == MTX_UNOWNED) {
511167802Sjkim			mtx_unlock_spin(&sched_lock);
512167802Sjkim#ifdef __i386__
513167802Sjkim			ia32_pause();
514167802Sjkim#endif
515167802Sjkim			continue;
516167802Sjkim		}
517167802Sjkim
518167802Sjkim		/*
519167802Sjkim		 * The mutex was marked contested on release. This means that
520167802Sjkim		 * there are threads blocked on it.
521167802Sjkim		 */
522167802Sjkim		if (v == MTX_CONTESTED) {
523167802Sjkim			td1 = TAILQ_FIRST(&m->mtx_blocked);
524167802Sjkim			MPASS(td1 != NULL);
525167802Sjkim			m->mtx_lock = (uintptr_t)td | MTX_CONTESTED;
526167802Sjkim
527167802Sjkim			if (td1->td_priority < td->td_priority)
528167802Sjkim				td->td_priority = td1->td_priority;
529167802Sjkim			mtx_unlock_spin(&sched_lock);
530167802Sjkim			return;
531167802Sjkim		}
532167802Sjkim
533167802Sjkim		/*
534128212Snjl		 * If the mutex isn't already contested and a failure occurs
535167802Sjkim		 * setting the contested bit, the mutex was either released
536167802Sjkim		 * or the state of the MTX_RECURSED bit changed.
537129684Snjl		 */
538128212Snjl		if ((v & MTX_CONTESTED) == 0 &&
539128212Snjl		    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
540128212Snjl			(void *)(v | MTX_CONTESTED))) {
541128212Snjl			mtx_unlock_spin(&sched_lock);
542100966Siwasaki#ifdef __i386__
543100966Siwasaki			ia32_pause();
544100966Siwasaki#endif
545100966Siwasaki			continue;
546100966Siwasaki		}
547100966Siwasaki
548100966Siwasaki#if defined(SMP) && defined(ADAPTIVE_MUTEXES)
549151937Sjkim		/*
550151937Sjkim		 * If the current owner of the lock is executing on another
551151937Sjkim		 * CPU, spin instead of blocking.
552151937Sjkim		 */
553100966Siwasaki		owner = (struct thread *)(v & MTX_FLAGMASK);
554167802Sjkim		if (m != &Giant && thread_running(owner)) {
555167802Sjkim			mtx_unlock_spin(&sched_lock);
556100966Siwasaki			while (mtx_owner(m) == owner && thread_running(owner)) {
557100966Siwasaki#ifdef __i386__
558167802Sjkim				ia32_pause();
559167802Sjkim#endif
560167802Sjkim			}
561167802Sjkim			continue;
562167802Sjkim		}
563167802Sjkim#endif	/* SMP && ADAPTIVE_MUTEXES */
564167802Sjkim
565167802Sjkim		/*
566167802Sjkim		 * We definitely must sleep for this lock.
567167802Sjkim		 */
568167802Sjkim		mtx_assert(m, MA_NOTOWNED);
569167802Sjkim
570167802Sjkim#ifdef notyet
571167802Sjkim		/*
572100966Siwasaki		 * If we're borrowing an interrupted thread's VM context, we
573100966Siwasaki		 * must clean up before going to sleep.
574100966Siwasaki		 */
575100966Siwasaki		if (td->td_ithd != NULL) {
576167802Sjkim			struct ithd *it = td->td_ithd;
577167802Sjkim
578167802Sjkim			if (it->it_interrupted) {
579167802Sjkim				if (LOCK_LOG_TEST(&m->mtx_object, opts))
580167802Sjkim					CTR2(KTR_LOCK,
581100966Siwasaki				    "_mtx_lock_sleep: %p interrupted %p",
582100966Siwasaki					    it, it->it_interrupted);
583100966Siwasaki				intr_thd_fixup(it);
584100966Siwasaki			}
585100966Siwasaki		}
586100966Siwasaki#endif
587117521Snjl
588117521Snjl		/*
589100966Siwasaki		 * Put us on the list of threads blocked on this mutex.
590100966Siwasaki		 */
591102550Siwasaki		if (TAILQ_EMPTY(&m->mtx_blocked)) {
592100966Siwasaki			td1 = mtx_owner(m);
593100966Siwasaki			LIST_INSERT_HEAD(&td1->td_contested, m, mtx_contested);
594100966Siwasaki			TAILQ_INSERT_TAIL(&m->mtx_blocked, td, td_blkq);
595100966Siwasaki		} else {
596100966Siwasaki			TAILQ_FOREACH(td1, &m->mtx_blocked, td_blkq)
597100966Siwasaki				if (td1->td_priority > td->td_priority)
598100966Siwasaki					break;
599100966Siwasaki			if (td1)
600100966Siwasaki				TAILQ_INSERT_BEFORE(td1, td, td_blkq);
601117521Snjl			else
602117521Snjl				TAILQ_INSERT_TAIL(&m->mtx_blocked, td, td_blkq);
603100966Siwasaki		}
604100966Siwasaki#ifdef KTR
605100966Siwasaki		if (!cont_logged) {
606100966Siwasaki			CTR6(KTR_CONTENTION,
607100966Siwasaki			    "contention: %p at %s:%d wants %s, taken by %s:%d",
608100966Siwasaki			    td, file, line, m->mtx_object.lo_name,
609100966Siwasaki			    WITNESS_FILE(&m->mtx_object),
610100966Siwasaki			    WITNESS_LINE(&m->mtx_object));
611100966Siwasaki			cont_logged = 1;
612100966Siwasaki		}
613100966Siwasaki#endif
614100966Siwasaki
615100966Siwasaki		/*
616100966Siwasaki		 * Save who we're blocked on.
617100966Siwasaki		 */
618100966Siwasaki		td->td_blocked = m;
619100966Siwasaki		td->td_mtxname = m->mtx_object.lo_name;
620100966Siwasaki		TD_SET_MUTEX(td);
621100966Siwasaki		propagate_priority(td);
622100966Siwasaki
623100966Siwasaki		if (LOCK_LOG_TEST(&m->mtx_object, opts))
624100966Siwasaki			CTR3(KTR_LOCK,
625100966Siwasaki			    "_mtx_lock_sleep: p %p blocked on [%p] %s", td, m,
626100966Siwasaki			    m->mtx_object.lo_name);
627100966Siwasaki
628100966Siwasaki		td->td_proc->p_stats->p_ru.ru_nvcsw++;
629100966Siwasaki		mi_switch();
630100966Siwasaki
631100966Siwasaki		if (LOCK_LOG_TEST(&m->mtx_object, opts))
632100966Siwasaki			CTR3(KTR_LOCK,
633100966Siwasaki			  "_mtx_lock_sleep: p %p free from blocked on [%p] %s",
634100966Siwasaki			  td, m, m->mtx_object.lo_name);
635100966Siwasaki
636100966Siwasaki		mtx_unlock_spin(&sched_lock);
637100966Siwasaki	}
638100966Siwasaki
639100966Siwasaki#ifdef KTR
640100966Siwasaki	if (cont_logged) {
641100966Siwasaki		CTR4(KTR_CONTENTION,
642100966Siwasaki		    "contention end: %s acquired by %p at %s:%d",
643100966Siwasaki		    m->mtx_object.lo_name, td, file, line);
644100966Siwasaki	}
645100966Siwasaki#endif
646100966Siwasaki	return;
647100966Siwasaki}
648100966Siwasaki
649100966Siwasaki/*
650100966Siwasaki * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock.
651151937Sjkim *
652100966Siwasaki * This is only called if we need to actually spin for the lock. Recursion
653100966Siwasaki * is handled inline.
654100966Siwasaki */
655100966Siwasakivoid
656100966Siwasaki_mtx_lock_spin(struct mtx *m, int opts, const char *file, int line)
657100966Siwasaki{
658100966Siwasaki	int i = 0;
659100966Siwasaki
660100966Siwasaki	if (LOCK_LOG_TEST(&m->mtx_object, opts))
661100966Siwasaki		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
662100966Siwasaki
663100966Siwasaki	for (;;) {
664100966Siwasaki		if (_obtain_lock(m, curthread))
665100966Siwasaki			break;
666100966Siwasaki
667100966Siwasaki		/* Give interrupts a chance while we spin. */
668100966Siwasaki		critical_exit();
669100966Siwasaki		while (m->mtx_lock != MTX_UNOWNED) {
670100966Siwasaki			if (i++ < 10000000) {
671100966Siwasaki#ifdef __i386__
672100966Siwasaki				ia32_pause();
673100966Siwasaki#endif
674100966Siwasaki				continue;
675100966Siwasaki			}
676100966Siwasaki			if (i < 60000000)
677100966Siwasaki				DELAY(1);
678100966Siwasaki#ifdef DDB
679100966Siwasaki			else if (!db_active)
680100966Siwasaki#else
681100966Siwasaki			else
682100966Siwasaki#endif
683100966Siwasaki				panic("spin lock %s held by %p for > 5 seconds",
684100966Siwasaki				    m->mtx_object.lo_name, (void *)m->mtx_lock);
685100966Siwasaki#ifdef __i386__
686100966Siwasaki			ia32_pause();
687100966Siwasaki#endif
688100966Siwasaki		}
689100966Siwasaki		critical_enter();
690100966Siwasaki	}
691100966Siwasaki
692167802Sjkim	if (LOCK_LOG_TEST(&m->mtx_object, opts))
693100966Siwasaki		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
694100966Siwasaki
695100966Siwasaki	return;
696100966Siwasaki}
697100966Siwasaki
698100966Siwasaki/*
699100966Siwasaki * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
700100966Siwasaki *
701100966Siwasaki * We are only called here if the lock is recursed or contested (i.e. we
702100966Siwasaki * need to wake up a blocked thread).
703100966Siwasaki */
704100966Siwasakivoid
705100966Siwasaki_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
706100966Siwasaki{
707100966Siwasaki	struct thread *td, *td1;
708100966Siwasaki	struct mtx *m1;
709100966Siwasaki	int pri;
710100966Siwasaki
711100966Siwasaki	td = curthread;
712100966Siwasaki
713100966Siwasaki	if (mtx_recursed(m)) {
714167802Sjkim		if (--(m->mtx_recurse) == 0)
715100966Siwasaki			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
716100966Siwasaki		if (LOCK_LOG_TEST(&m->mtx_object, opts))
717100966Siwasaki			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m);
718100966Siwasaki		return;
719100966Siwasaki	}
720100966Siwasaki
721100966Siwasaki	mtx_lock_spin(&sched_lock);
722100966Siwasaki	if (LOCK_LOG_TEST(&m->mtx_object, opts))
723100966Siwasaki		CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
724100966Siwasaki
725100966Siwasaki	td1 = TAILQ_FIRST(&m->mtx_blocked);
726100966Siwasaki#if defined(SMP) && defined(ADAPTIVE_MUTEXES)
727100966Siwasaki	if (td1 == NULL) {
728100966Siwasaki		_release_lock_quick(m);
729100966Siwasaki		if (LOCK_LOG_TEST(&m->mtx_object, opts))
730100966Siwasaki			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p no sleepers", m);
731100966Siwasaki		mtx_unlock_spin(&sched_lock);
732100966Siwasaki		return;
733100966Siwasaki	}
734100966Siwasaki#endif
735100966Siwasaki	MPASS(td->td_proc->p_magic == P_MAGIC);
736100966Siwasaki	MPASS(td1->td_proc->p_magic == P_MAGIC);
737100966Siwasaki
738167802Sjkim	TAILQ_REMOVE(&m->mtx_blocked, td1, td_blkq);
739100966Siwasaki
740100966Siwasaki	if (TAILQ_EMPTY(&m->mtx_blocked)) {
741100966Siwasaki		LIST_REMOVE(m, mtx_contested);
742100966Siwasaki		_release_lock_quick(m);
743100966Siwasaki		if (LOCK_LOG_TEST(&m->mtx_object, opts))
744100966Siwasaki			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p not held", m);
745167802Sjkim	} else
746167802Sjkim		atomic_store_rel_ptr(&m->mtx_lock, (void *)MTX_CONTESTED);
747167802Sjkim
748167802Sjkim	pri = PRI_MAX;
749167802Sjkim	LIST_FOREACH(m1, &td->td_contested, mtx_contested) {
750167802Sjkim		int cp = TAILQ_FIRST(&m1->mtx_blocked)->td_priority;
751167802Sjkim		if (cp < pri)
752167802Sjkim			pri = cp;
753100966Siwasaki	}
754167802Sjkim
755167802Sjkim	if (pri > td->td_base_pri)
756167802Sjkim		pri = td->td_base_pri;
757167802Sjkim	td->td_priority = pri;
758167802Sjkim
759100966Siwasaki	if (LOCK_LOG_TEST(&m->mtx_object, opts))
760100966Siwasaki		CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p contested setrunqueue %p",
761100966Siwasaki		    m, td1);
762100966Siwasaki
763100966Siwasaki	td1->td_blocked = NULL;
764100966Siwasaki	TD_CLR_MUTEX(td1);
765167802Sjkim	if (TD_CAN_RUN(td1))
766100966Siwasaki		setrunqueue(td1);
767100966Siwasaki
768100966Siwasaki	if (td->td_critnest == 1 && td1->td_priority < pri) {
769100966Siwasaki#ifdef notyet
770100966Siwasaki		if (td->td_ithd != NULL) {
771100966Siwasaki			struct ithd *it = td->td_ithd;
772100966Siwasaki
773100966Siwasaki			if (it->it_interrupted) {
774100966Siwasaki				if (LOCK_LOG_TEST(&m->mtx_object, opts))
775100966Siwasaki					CTR2(KTR_LOCK,
776100966Siwasaki				    "_mtx_unlock_sleep: %p interrupted %p",
777100966Siwasaki					    it, it->it_interrupted);
778100966Siwasaki				intr_thd_fixup(it);
779100966Siwasaki			}
780100966Siwasaki		}
781100966Siwasaki#endif
782100966Siwasaki		if (LOCK_LOG_TEST(&m->mtx_object, opts))
783100966Siwasaki			CTR2(KTR_LOCK,
784100966Siwasaki			    "_mtx_unlock_sleep: %p switching out lock=%p", m,
785100966Siwasaki			    (void *)m->mtx_lock);
786100966Siwasaki
787100966Siwasaki		td->td_proc->p_stats->p_ru.ru_nivcsw++;
788100966Siwasaki		mi_switch();
789100966Siwasaki		if (LOCK_LOG_TEST(&m->mtx_object, opts))
790100966Siwasaki			CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p",
791100966Siwasaki			    m, (void *)m->mtx_lock);
792100966Siwasaki	}
793100966Siwasaki
794100966Siwasaki	mtx_unlock_spin(&sched_lock);
795100966Siwasaki
796100966Siwasaki	return;
797102550Siwasaki}
798100966Siwasaki
799100966Siwasaki/*
800100966Siwasaki * All the unlocking of MTX_SPIN locks is done inline.
801100966Siwasaki * See the _rel_spin_lock() macro for the details.
802100966Siwasaki */
803100966Siwasaki
804100966Siwasaki/*
805100966Siwasaki * The backing function for the INVARIANTS-enabled mtx_assert()
806100966Siwasaki */
807100966Siwasaki#ifdef INVARIANT_SUPPORT
808100966Siwasakivoid
809100966Siwasaki_mtx_assert(struct mtx *m, int what, const char *file, int line)
810100966Siwasaki{
811100966Siwasaki
812100966Siwasaki	if (panicstr != NULL)
813100966Siwasaki		return;
814100966Siwasaki	switch (what) {
815100966Siwasaki	case MA_OWNED:
816100966Siwasaki	case MA_OWNED | MA_RECURSED:
817100966Siwasaki	case MA_OWNED | MA_NOTRECURSED:
818100966Siwasaki		if (!mtx_owned(m))
819100966Siwasaki			panic("mutex %s not owned at %s:%d",
820100966Siwasaki			    m->mtx_object.lo_name, file, line);
821100966Siwasaki		if (mtx_recursed(m)) {
822100966Siwasaki			if ((what & MA_NOTRECURSED) != 0)
823100966Siwasaki				panic("mutex %s recursed at %s:%d",
824100966Siwasaki				    m->mtx_object.lo_name, file, line);
825100966Siwasaki		} else if ((what & MA_RECURSED) != 0) {
826100966Siwasaki			panic("mutex %s unrecursed at %s:%d",
827100966Siwasaki			    m->mtx_object.lo_name, file, line);
828100966Siwasaki		}
829100966Siwasaki		break;
830100966Siwasaki	case MA_NOTOWNED:
831100966Siwasaki		if (mtx_owned(m))
832100966Siwasaki			panic("mutex %s owned at %s:%d",
833100966Siwasaki			    m->mtx_object.lo_name, file, line);
834100966Siwasaki		break;
835100966Siwasaki	default:
836100966Siwasaki		panic("unknown mtx_assert at %s:%d", file, line);
837100966Siwasaki	}
838100966Siwasaki}
839100966Siwasaki#endif
840100966Siwasaki
841100966Siwasaki/*
842100966Siwasaki * The MUTEX_DEBUG-enabled mtx_validate()
843100966Siwasaki *
844100966Siwasaki * Most of these checks have been moved off into the LO_INITIALIZED flag
845100966Siwasaki * maintained by the witness code.
846100966Siwasaki */
847100966Siwasaki#ifdef MUTEX_DEBUG
848100966Siwasaki
849100966Siwasakivoid	mtx_validate(struct mtx *);
850100966Siwasaki
851100966Siwasakivoid
852100966Siwasakimtx_validate(struct mtx *m)
853100966Siwasaki{
854100966Siwasaki
855100966Siwasaki/*
856100966Siwasaki * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
857100966Siwasaki * we can re-enable the kernacc() checks.
858100966Siwasaki */
859100966Siwasaki#ifndef __alpha__
860100966Siwasaki	/*
861100966Siwasaki	 * Can't call kernacc() from early init386(), especially when
862100966Siwasaki	 * initializing Giant mutex, because some stuff in kernacc()
863100966Siwasaki	 * requires Giant itself.
864100966Siwasaki	 */
865100966Siwasaki	if (!cold)
866100966Siwasaki		if (!kernacc((caddr_t)m, sizeof(m),
867100966Siwasaki		    VM_PROT_READ | VM_PROT_WRITE))
868100966Siwasaki			panic("Can't read and write to mutex %p", m);
869100966Siwasaki#endif
870100966Siwasaki}
871100966Siwasaki#endif
872100966Siwasaki
873100966Siwasaki/*
874100966Siwasaki * General init routine used by the MTX_SYSINIT() macro.
875100966Siwasaki */
876151937Sjkimvoid
877100966Siwasakimtx_sysinit(void *arg)
878100966Siwasaki{
879100966Siwasaki	struct mtx_args *margs = arg;
880100966Siwasaki
881100966Siwasaki	mtx_init(margs->ma_mtx, margs->ma_desc, NULL, margs->ma_opts);
882100966Siwasaki}
883100966Siwasaki
884100966Siwasaki/*
885100966Siwasaki * Mutex initialization routine; initialize lock `m' of type contained in
886100966Siwasaki * `opts' with options contained in `opts' and name `name.'  The optional
887100966Siwasaki * lock type `type' is used as a general lock category name for use with
888100966Siwasaki * witness.
889100966Siwasaki */
890100966Siwasakivoid
891100966Siwasakimtx_init(struct mtx *m, const char *name, const char *type, int opts)
892100966Siwasaki{
893100966Siwasaki	struct lock_object *lock;
894100966Siwasaki
895100966Siwasaki	MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE |
896100966Siwasaki	    MTX_SLEEPABLE | MTX_NOWITNESS | MTX_DUPOK)) == 0);
897100966Siwasaki
898100966Siwasaki#ifdef MUTEX_DEBUG
899100966Siwasaki	/* Diagnostic and error correction */
900100966Siwasaki	mtx_validate(m);
901100966Siwasaki#endif
902100966Siwasaki
903100966Siwasaki	lock = &m->mtx_object;
904100966Siwasaki	KASSERT((lock->lo_flags & LO_INITIALIZED) == 0,
905100966Siwasaki	    ("mutex %s %p already initialized", name, m));
906100966Siwasaki	bzero(m, sizeof(*m));
907100966Siwasaki	if (opts & MTX_SPIN)
908100966Siwasaki		lock->lo_class = &lock_class_mtx_spin;
909100966Siwasaki	else
910100966Siwasaki		lock->lo_class = &lock_class_mtx_sleep;
911100966Siwasaki	lock->lo_name = name;
912100966Siwasaki	lock->lo_type = type != NULL ? type : name;
913100966Siwasaki	if (opts & MTX_QUIET)
914100966Siwasaki		lock->lo_flags = LO_QUIET;
915100966Siwasaki	if (opts & MTX_RECURSE)
916151937Sjkim		lock->lo_flags |= LO_RECURSABLE;
917151937Sjkim	if (opts & MTX_SLEEPABLE)
918151937Sjkim		lock->lo_flags |= LO_SLEEPABLE;
919151937Sjkim	if ((opts & MTX_NOWITNESS) == 0)
920167802Sjkim		lock->lo_flags |= LO_WITNESS;
921167802Sjkim	if (opts & MTX_DUPOK)
922167802Sjkim		lock->lo_flags |= LO_DUPOK;
923167802Sjkim
924100966Siwasaki	m->mtx_lock = MTX_UNOWNED;
925100966Siwasaki	TAILQ_INIT(&m->mtx_blocked);
926100966Siwasaki
927100966Siwasaki	LOCK_LOG_INIT(lock, opts);
928100966Siwasaki
929100966Siwasaki	WITNESS_INIT(lock);
930100966Siwasaki}
931100966Siwasaki
932100966Siwasaki/*
933100966Siwasaki * Remove lock `m' from all_mtx queue.  We don't allow MTX_QUIET to be
934100966Siwasaki * passed in as a flag here because if the corresponding mtx_init() was
935100966Siwasaki * called with MTX_QUIET set, then it will already be set in the mutex's
936100966Siwasaki * flags.
937100966Siwasaki */
938100966Siwasakivoid
939100966Siwasakimtx_destroy(struct mtx *m)
940100966Siwasaki{
941100966Siwasaki
942100966Siwasaki	LOCK_LOG_DESTROY(&m->mtx_object, 0);
943100966Siwasaki
944100966Siwasaki	if (!mtx_owned(m))
945100966Siwasaki		MPASS(mtx_unowned(m));
946100966Siwasaki	else {
947100966Siwasaki		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
948100966Siwasaki
949100966Siwasaki		/* Tell witness this isn't locked to make it happy. */
950100966Siwasaki		WITNESS_UNLOCK(&m->mtx_object, LOP_EXCLUSIVE, __FILE__,
951100966Siwasaki		    __LINE__);
952100966Siwasaki	}
953100966Siwasaki
954100966Siwasaki	WITNESS_DESTROY(&m->mtx_object);
955100966Siwasaki}
956100966Siwasaki
957100966Siwasaki/*
958100966Siwasaki * Intialize the mutex code and system mutexes.  This is called from the MD
959100966Siwasaki * startup code prior to mi_startup().  The per-CPU data space needs to be
960100966Siwasaki * setup before this is called.
961100966Siwasaki */
962100966Siwasakivoid
963100966Siwasakimutex_init(void)
964100966Siwasaki{
965100966Siwasaki
966100966Siwasaki	/* Setup thread0 so that mutexes work. */
967100966Siwasaki	LIST_INIT(&thread0.td_contested);
968100966Siwasaki
969100966Siwasaki	/*
970100966Siwasaki	 * Initialize mutexes.
971100966Siwasaki	 */
972100966Siwasaki	mtx_init(&Giant, "Giant", NULL, MTX_DEF | MTX_RECURSE);
973100966Siwasaki	mtx_init(&sched_lock, "sched lock", NULL, MTX_SPIN | MTX_RECURSE);
974100966Siwasaki	mtx_init(&proc0.p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK);
975100966Siwasaki	mtx_lock(&Giant);
976100966Siwasaki}
977100966Siwasaki
978100966Siwasaki/*
979100966Siwasaki * Encapsulated Giant mutex routines.  These routines provide encapsulation
980151937Sjkim * control for the Giant mutex, allowing sysctls to be used to turn on and
981151937Sjkim * off Giant around certain subsystems.  The default value for the sysctls
982151937Sjkim * are set to what developers believe is stable and working in regards to
983151937Sjkim * the Giant pushdown.  Developers should not turn off Giant via these
984100966Siwasaki * sysctls unless they know what they are doing.
985100966Siwasaki *
986100966Siwasaki * Callers of mtx_lock_giant() are expected to pass the return value to an
987100966Siwasaki * accompanying mtx_unlock_giant() later on.  If multiple subsystems are
988100966Siwasaki * effected by a Giant wrap, all related sysctl variables must be zero for
989100966Siwasaki * the subsystem call to operate without Giant (as determined by the caller).
990100966Siwasaki */
991100966Siwasaki
992100966SiwasakiSYSCTL_NODE(_kern, OID_AUTO, giant, CTLFLAG_RD, NULL, "Giant mutex manipulation");
993100966Siwasaki
994100966Siwasakistatic int kern_giant_all = 0;
995100966SiwasakiSYSCTL_INT(_kern_giant, OID_AUTO, all, CTLFLAG_RW, &kern_giant_all, 0, "");
996100966Siwasaki
997100966Siwasakiint kern_giant_proc = 1;	/* Giant around PROC locks */
998100966Siwasakiint kern_giant_file = 1;	/* Giant around struct file & filedesc */
999100966Siwasakiint kern_giant_ucred = 1;	/* Giant around ucred */
1000100966SiwasakiSYSCTL_INT(_kern_giant, OID_AUTO, proc, CTLFLAG_RW, &kern_giant_proc, 0, "");
1001100966SiwasakiSYSCTL_INT(_kern_giant, OID_AUTO, file, CTLFLAG_RW, &kern_giant_file, 0, "");
1002100966SiwasakiSYSCTL_INT(_kern_giant, OID_AUTO, ucred, CTLFLAG_RW, &kern_giant_ucred, 0, "");
1003100966Siwasaki
1004100966Siwasakiint
1005100966Siwasakimtx_lock_giant(int sysctlvar)
1006100966Siwasaki{
1007100966Siwasaki	if (sysctlvar || kern_giant_all) {
1008100966Siwasaki		mtx_lock(&Giant);
1009100966Siwasaki		return(1);
1010100966Siwasaki	}
1011100966Siwasaki	return(0);
1012100966Siwasaki}
1013100966Siwasaki
1014100966Siwasakivoid
1015100966Siwasakimtx_unlock_giant(int s)
1016100966Siwasaki{
1017100966Siwasaki	if (s)
1018100966Siwasaki		mtx_unlock(&Giant);
1019100966Siwasaki}
1020100966Siwasaki
1021100966Siwasaki