subr_turnstile.c revision 97139
1103026Ssobomax/*-
2103026Ssobomax * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
3103026Ssobomax *
4103026Ssobomax * Redistribution and use in source and binary forms, with or without
5103026Ssobomax * modification, are permitted provided that the following conditions
6103026Ssobomax * are met:
7103026Ssobomax * 1. Redistributions of source code must retain the above copyright
8103026Ssobomax *    notice, this list of conditions and the following disclaimer.
9103026Ssobomax * 2. Redistributions in binary form must reproduce the above copyright
10103026Ssobomax *    notice, this list of conditions and the following disclaimer in the
11103026Ssobomax *    documentation and/or other materials provided with the distribution.
12103026Ssobomax * 3. Berkeley Software Design Inc's name may not be used to endorse or
13103026Ssobomax *    promote products derived from this software without specific prior
14103026Ssobomax *    written permission.
15103026Ssobomax *
16103026Ssobomax * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
17103026Ssobomax * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18103026Ssobomax * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19103026Ssobomax * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
20103026Ssobomax * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21103026Ssobomax * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22103026Ssobomax * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23103026Ssobomax * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24103026Ssobomax * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25103026Ssobomax * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26103026Ssobomax * SUCH DAMAGE.
27103026Ssobomax *
28103026Ssobomax *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
29103026Ssobomax *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
30103026Ssobomax * $FreeBSD: head/sys/kern/subr_turnstile.c 97139 2002-05-22 20:32:39Z jhb $
31103026Ssobomax */
32103026Ssobomax
33103026Ssobomax/*
34103026Ssobomax * Machine independent bits of mutex implementation.
35103026Ssobomax */
36103026Ssobomax
37103026Ssobomax#include "opt_adaptive_mutexes.h"
38103026Ssobomax#include "opt_ddb.h"
39103026Ssobomax
40103026Ssobomax#include <sys/param.h>
41103026Ssobomax#include <sys/systm.h>
42103026Ssobomax#include <sys/bus.h>
43103026Ssobomax#include <sys/kernel.h>
44103026Ssobomax#include <sys/ktr.h>
45103026Ssobomax#include <sys/lock.h>
46103026Ssobomax#include <sys/malloc.h>
47103026Ssobomax#include <sys/mutex.h>
48103026Ssobomax#include <sys/proc.h>
49103026Ssobomax#include <sys/resourcevar.h>
50103026Ssobomax#include <sys/sbuf.h>
51103026Ssobomax#include <sys/sysctl.h>
52103026Ssobomax#include <sys/vmmeter.h>
53103026Ssobomax
54103026Ssobomax#include <machine/atomic.h>
55103026Ssobomax#include <machine/bus.h>
56103026Ssobomax#include <machine/clock.h>
57103026Ssobomax#include <machine/cpu.h>
58103026Ssobomax
59103026Ssobomax#include <ddb/ddb.h>
60103026Ssobomax
61103026Ssobomax#include <vm/vm.h>
62103026Ssobomax#include <vm/vm_extern.h>
63103026Ssobomax
64103026Ssobomax/*
65103026Ssobomax * Internal utility macros.
66103026Ssobomax */
67103026Ssobomax#define mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
68103026Ssobomax
69103026Ssobomax#define mtx_owner(m)	(mtx_unowned((m)) ? NULL \
70103026Ssobomax	: (struct thread *)((m)->mtx_lock & MTX_FLAGMASK))
71103026Ssobomax
72103026Ssobomax/*
73103026Ssobomax * Lock classes for sleep and spin mutexes.
74103026Ssobomax */
75103026Ssobomaxstruct lock_class lock_class_mtx_sleep = {
76103026Ssobomax	"sleep mutex",
77103026Ssobomax	LC_SLEEPLOCK | LC_RECURSABLE
78103026Ssobomax};
79103026Ssobomaxstruct lock_class lock_class_mtx_spin = {
80103026Ssobomax	"spin mutex",
81103026Ssobomax	LC_SPINLOCK | LC_RECURSABLE
82103026Ssobomax};
83103026Ssobomax
84103026Ssobomax/*
85103026Ssobomax * System-wide mutexes
86103026Ssobomax */
87103026Ssobomaxstruct mtx sched_lock;
88103026Ssobomaxstruct mtx Giant;
89103026Ssobomax
90103026Ssobomax/*
91103026Ssobomax * Prototypes for non-exported routines.
92103026Ssobomax */
93103026Ssobomaxstatic void	propagate_priority(struct thread *);
94103026Ssobomax
95103026Ssobomaxstatic void
96103026Ssobomaxpropagate_priority(struct thread *td)
97103026Ssobomax{
98103026Ssobomax	int pri = td->td_priority;
99103026Ssobomax	struct mtx *m = td->td_blocked;
100103026Ssobomax
101103026Ssobomax	mtx_assert(&sched_lock, MA_OWNED);
102103026Ssobomax	for (;;) {
103103026Ssobomax		struct thread *td1;
104103026Ssobomax
105103026Ssobomax		td = mtx_owner(m);
106103026Ssobomax
107103026Ssobomax		if (td == NULL) {
108103026Ssobomax			/*
109103026Ssobomax			 * This really isn't quite right. Really
110103026Ssobomax			 * ought to bump priority of thread that
111103026Ssobomax			 * next acquires the mutex.
112103026Ssobomax			 */
113103026Ssobomax			MPASS(m->mtx_lock == MTX_CONTESTED);
114103026Ssobomax			return;
115103026Ssobomax		}
116103026Ssobomax
117103026Ssobomax		MPASS(td->td_proc->p_magic == P_MAGIC);
118103026Ssobomax		KASSERT(td->td_proc->p_stat != SSLEEP, ("sleeping thread owns a mutex"));
119103026Ssobomax		if (td->td_priority <= pri) /* lower is higher priority */
120103026Ssobomax			return;
121103026Ssobomax
122103026Ssobomax		/*
123103026Ssobomax		 * Bump this thread's priority.
124103026Ssobomax		 */
125103026Ssobomax		td->td_priority = pri;
126103026Ssobomax
127103026Ssobomax		/*
128103026Ssobomax		 * If lock holder is actually running, just bump priority.
129103026Ssobomax		 */
130103026Ssobomax		 /* XXXKSE this test is not sufficient */
131103026Ssobomax		if (td->td_kse && (td->td_kse->ke_oncpu != NOCPU)) {
132103026Ssobomax			MPASS(td->td_proc->p_stat == SRUN
133103026Ssobomax			|| td->td_proc->p_stat == SZOMB
134103026Ssobomax			|| td->td_proc->p_stat == SSTOP);
135103026Ssobomax			return;
136103026Ssobomax		}
137103026Ssobomax
138103026Ssobomax#ifndef SMP
139103026Ssobomax		/*
140103026Ssobomax		 * For UP, we check to see if td is curthread (this shouldn't
141103026Ssobomax		 * ever happen however as it would mean we are in a deadlock.)
142103026Ssobomax		 */
143103026Ssobomax		KASSERT(td != curthread, ("Deadlock detected"));
144103026Ssobomax#endif
145103026Ssobomax
146103026Ssobomax		/*
147103026Ssobomax		 * If on run queue move to new run queue, and quit.
148103026Ssobomax		 * XXXKSE this gets a lot more complicated under threads
149103026Ssobomax		 * but try anyhow.
150103026Ssobomax		 */
151103026Ssobomax		if (td->td_proc->p_stat == SRUN) {
152103026Ssobomax			MPASS(td->td_blocked == NULL);
153103026Ssobomax			remrunqueue(td);
154103026Ssobomax			setrunqueue(td);
155103026Ssobomax			return;
156103026Ssobomax		}
157103026Ssobomax
158103026Ssobomax		/*
159103026Ssobomax		 * If we aren't blocked on a mutex, we should be.
160103026Ssobomax		 */
161103026Ssobomax		KASSERT(td->td_proc->p_stat == SMTX, (
162103026Ssobomax		    "process %d(%s):%d holds %s but isn't blocked on a mutex\n",
163103026Ssobomax		    td->td_proc->p_pid, td->td_proc->p_comm, td->td_proc->p_stat,
164103026Ssobomax		    m->mtx_object.lo_name));
165103026Ssobomax
166103026Ssobomax		/*
167103026Ssobomax		 * Pick up the mutex that td is blocked on.
168103026Ssobomax		 */
169103026Ssobomax		m = td->td_blocked;
170103026Ssobomax		MPASS(m != NULL);
171103026Ssobomax
172103026Ssobomax		/*
173103026Ssobomax		 * Check if the thread needs to be moved up on
174103026Ssobomax		 * the blocked chain
175103026Ssobomax		 */
176103026Ssobomax		if (td == TAILQ_FIRST(&m->mtx_blocked)) {
177103026Ssobomax			continue;
178103026Ssobomax		}
179103026Ssobomax
180103026Ssobomax		td1 = TAILQ_PREV(td, threadqueue, td_blkq);
181103026Ssobomax		if (td1->td_priority <= pri) {
182103026Ssobomax			continue;
183103026Ssobomax		}
184103026Ssobomax
185103026Ssobomax		/*
186103026Ssobomax		 * Remove thread from blocked chain and determine where
187103026Ssobomax		 * it should be moved up to.  Since we know that td1 has
188103026Ssobomax		 * a lower priority than td, we know that at least one
189103026Ssobomax		 * thread in the chain has a lower priority and that
190103026Ssobomax		 * td1 will thus not be NULL after the loop.
191103026Ssobomax		 */
192103026Ssobomax		TAILQ_REMOVE(&m->mtx_blocked, td, td_blkq);
193103026Ssobomax		TAILQ_FOREACH(td1, &m->mtx_blocked, td_blkq) {
194103026Ssobomax			MPASS(td1->td_proc->p_magic == P_MAGIC);
195103026Ssobomax			if (td1->td_priority > pri)
196103026Ssobomax				break;
197103026Ssobomax		}
198103026Ssobomax
199103026Ssobomax		MPASS(td1 != NULL);
200103026Ssobomax		TAILQ_INSERT_BEFORE(td1, td, td_blkq);
201103026Ssobomax		CTR4(KTR_LOCK,
202103026Ssobomax		    "propagate_priority: p %p moved before %p on [%p] %s",
203103026Ssobomax		    td, td1, m, m->mtx_object.lo_name);
204103026Ssobomax	}
205103026Ssobomax}
206103026Ssobomax
207103026Ssobomax#ifdef MUTEX_PROFILING
208103026SsobomaxSYSCTL_NODE(_debug, OID_AUTO, mutex, CTLFLAG_RD, NULL, "mutex debugging");
209103026SsobomaxSYSCTL_NODE(_debug_mutex, OID_AUTO, prof, CTLFLAG_RD, NULL, "mutex profiling");
210103026Ssobomaxstatic int mutex_prof_enable = 0;
211103026SsobomaxSYSCTL_INT(_debug_mutex_prof, OID_AUTO, enable, CTLFLAG_RW,
212103026Ssobomax    &mutex_prof_enable, 0, "Enable tracing of mutex holdtime");
213103026Ssobomax
214103026Ssobomaxstruct mutex_prof {
215103026Ssobomax	const char *name;
216103026Ssobomax	const char *file;
217103026Ssobomax	int line;
218103026Ssobomax#define MPROF_MAX 0
219103026Ssobomax#define MPROF_TOT 1
220103026Ssobomax#define MPROF_CNT 2
221103026Ssobomax#define MPROF_AVG 3
222103026Ssobomax	u_int64_t counter[4];
223103026Ssobomax	struct mutex_prof *next;
224103026Ssobomax};
225103026Ssobomax
226103026Ssobomax/*
227103026Ssobomax * mprof_buf is a static pool of profiling records to avoid possible
228103026Ssobomax * reentrance of the memory allocation functions.
229103026Ssobomax *
230103026Ssobomax * Note: NUM_MPROF_BUFFERS must be smaller than MPROF_HASH_SIZE.
231103026Ssobomax */
232103026Ssobomax#define NUM_MPROF_BUFFERS 1000
233103026Ssobomaxstatic struct mutex_prof mprof_buf[NUM_MPROF_BUFFERS];
234103026Ssobomaxstatic int first_free_mprof_buf;
235103026Ssobomax#define MPROF_HASH_SIZE 1009
236103026Ssobomaxstatic struct mutex_prof *mprof_hash[MPROF_HASH_SIZE];
237103026Ssobomax
238103026Ssobomaxstatic int mutex_prof_acquisitions;
239103026SsobomaxSYSCTL_INT(_debug_mutex_prof, OID_AUTO, acquisitions, CTLFLAG_RD,
240103026Ssobomax    &mutex_prof_acquisitions, 0, "Number of mutex acquistions recorded");
241103026Ssobomaxstatic int mutex_prof_records;
242103026SsobomaxSYSCTL_INT(_debug_mutex_prof, OID_AUTO, records, CTLFLAG_RD,
243103026Ssobomax    &mutex_prof_records, 0, "Number of profiling records");
244103026Ssobomaxstatic int mutex_prof_maxrecords = NUM_MPROF_BUFFERS;
245103026SsobomaxSYSCTL_INT(_debug_mutex_prof, OID_AUTO, maxrecords, CTLFLAG_RD,
246103026Ssobomax    &mutex_prof_maxrecords, 0, "Maximum number of profiling records");
247103026Ssobomaxstatic int mutex_prof_rejected;
248103026SsobomaxSYSCTL_INT(_debug_mutex_prof, OID_AUTO, rejected, CTLFLAG_RD,
249103026Ssobomax    &mutex_prof_rejected, 0, "Number of rejected profiling records");
250103026Ssobomaxstatic int mutex_prof_hashsize = MPROF_HASH_SIZE;
251103026SsobomaxSYSCTL_INT(_debug_mutex_prof, OID_AUTO, hashsize, CTLFLAG_RD,
252103026Ssobomax    &mutex_prof_hashsize, 0, "Hash size");
253103026Ssobomaxstatic int mutex_prof_collisions = 0;
254103026SsobomaxSYSCTL_INT(_debug_mutex_prof, OID_AUTO, collisions, CTLFLAG_RD,
255103026Ssobomax    &mutex_prof_collisions, 0, "Number of hash collisions");
256103026Ssobomax
257103026Ssobomax/*
258103026Ssobomax * mprof_mtx protects the profiling buffers and the hash.
259103026Ssobomax */
260103026Ssobomaxstatic struct mtx mprof_mtx;
261103026SsobomaxMTX_SYSINIT(mprof, &mprof_mtx, "mutex profiling lock", MTX_SPIN | MTX_QUIET);
262103026Ssobomax
263103026Ssobomaxstatic u_int64_t
264103026Ssobomaxnanoseconds(void)
265103026Ssobomax{
266103026Ssobomax	struct timespec tv;
267103026Ssobomax
268103026Ssobomax	nanotime(&tv);
269103026Ssobomax	return (tv.tv_sec * (u_int64_t)1000000000 + tv.tv_nsec);
270103026Ssobomax}
271103026Ssobomax
272103026Ssobomaxstatic int
273103026Ssobomaxdump_mutex_prof_stats(SYSCTL_HANDLER_ARGS)
274103026Ssobomax{
275103026Ssobomax	struct sbuf *sb;
276103026Ssobomax	int error, i;
277103026Ssobomax
278103026Ssobomax	if (first_free_mprof_buf == 0)
279103026Ssobomax		return SYSCTL_OUT(req, "No locking recorded",
280103026Ssobomax		    sizeof("No locking recorded"));
281103026Ssobomax
282103026Ssobomax	sb = sbuf_new(NULL, NULL, 1024, SBUF_AUTOEXTEND);
283103026Ssobomax	sbuf_printf(sb, "%12s %12s %12s %12s %s\n",
284103026Ssobomax	    "max", "total", "count", "average", "name");
285103026Ssobomax	mtx_lock_spin(&mprof_mtx);
286103026Ssobomax	for (i = 0; i < first_free_mprof_buf; ++i)
287103026Ssobomax		sbuf_printf(sb, "%12llu %12llu %12llu %12llu %s:%d (%s)\n",
288103026Ssobomax		    mprof_buf[i].counter[MPROF_MAX] / 1000,
289103026Ssobomax		    mprof_buf[i].counter[MPROF_TOT] / 1000,
290103026Ssobomax		    mprof_buf[i].counter[MPROF_CNT],
291103026Ssobomax		    mprof_buf[i].counter[MPROF_AVG] / 1000,
292103026Ssobomax		    mprof_buf[i].file, mprof_buf[i].line, mprof_buf[i].name);
293103026Ssobomax	mtx_unlock_spin(&mprof_mtx);
294103026Ssobomax	sbuf_finish(sb);
295103026Ssobomax	error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
296103026Ssobomax	sbuf_delete(sb);
297103026Ssobomax	return (error);
298103026Ssobomax}
299103026SsobomaxSYSCTL_PROC(_debug_mutex_prof, OID_AUTO, stats, CTLTYPE_STRING|CTLFLAG_RD,
300103026Ssobomax    NULL, 0, dump_mutex_prof_stats, "A", "Mutex profiling statistics");
301103026Ssobomax#endif
302103026Ssobomax
303103026Ssobomax/*
304103026Ssobomax * Function versions of the inlined __mtx_* macros.  These are used by
305103026Ssobomax * modules and can also be called from assembly language if needed.
306103026Ssobomax */
307103026Ssobomaxvoid
308103026Ssobomax_mtx_lock_flags(struct mtx *m, int opts, const char *file, int line)
309103026Ssobomax{
310103026Ssobomax
311103026Ssobomax	MPASS(curthread != NULL);
312103026Ssobomax	_get_sleep_lock(m, curthread, opts, file, line);
313103026Ssobomax	LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file,
314103026Ssobomax	    line);
315103026Ssobomax	WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
316103026Ssobomax#ifdef MUTEX_PROFILING
317103026Ssobomax	/* don't reset the timer when/if recursing */
318103026Ssobomax	if (m->acqtime == 0) {
319103026Ssobomax		m->file = file;
320103026Ssobomax		m->line = line;
321103026Ssobomax		m->acqtime = mutex_prof_enable ? nanoseconds() : 0;
322103026Ssobomax		++mutex_prof_acquisitions;
323103026Ssobomax	}
324103026Ssobomax#endif
325103026Ssobomax}
326103026Ssobomax
327103026Ssobomaxvoid
328103026Ssobomax_mtx_unlock_flags(struct mtx *m, int opts, const char *file, int line)
329103026Ssobomax{
330103026Ssobomax
331103026Ssobomax	MPASS(curthread != NULL);
332103026Ssobomax	mtx_assert(m, MA_OWNED);
333103026Ssobomax#ifdef MUTEX_PROFILING
334103026Ssobomax	if (m->acqtime != 0) {
335103026Ssobomax		static const char *unknown = "(unknown)";
336103026Ssobomax		struct mutex_prof *mpp;
337103026Ssobomax		u_int64_t acqtime, now;
338103026Ssobomax		const char *p, *q;
339103026Ssobomax		volatile u_int hash;
340103026Ssobomax
341103026Ssobomax		now = nanoseconds();
342103026Ssobomax		acqtime = m->acqtime;
343103026Ssobomax		m->acqtime = 0;
344103026Ssobomax		if (now <= acqtime)
345103026Ssobomax			goto out;
346103026Ssobomax		for (p = file; strncmp(p, "../", 3) == 0; p += 3)
347103026Ssobomax			/* nothing */ ;
348103026Ssobomax		if (p == NULL || *p == '\0')
349103026Ssobomax			p = unknown;
350103026Ssobomax		for (hash = line, q = p; *q != '\0'; ++q)
351103026Ssobomax			hash = (hash * 2 + *q) % MPROF_HASH_SIZE;
352103026Ssobomax		mtx_lock_spin(&mprof_mtx);
353103026Ssobomax		for (mpp = mprof_hash[hash]; mpp != NULL; mpp = mpp->next)
354103026Ssobomax			if (mpp->line == line && strcmp(mpp->file, p) == 0)
355103026Ssobomax				break;
356103026Ssobomax		if (mpp == NULL) {
357103026Ssobomax			/* Just exit if we cannot get a trace buffer */
358103026Ssobomax			if (first_free_mprof_buf >= NUM_MPROF_BUFFERS) {
359103026Ssobomax				++mutex_prof_rejected;
360103026Ssobomax				goto unlock;
361103026Ssobomax			}
362103026Ssobomax			mpp = &mprof_buf[first_free_mprof_buf++];
363103026Ssobomax			mpp->name = mtx_name(m);
364103026Ssobomax			mpp->file = p;
365103026Ssobomax			mpp->line = line;
366103026Ssobomax			mpp->next = mprof_hash[hash];
367103026Ssobomax			if (mprof_hash[hash] != NULL)
368103026Ssobomax				++mutex_prof_collisions;
369103026Ssobomax			mprof_hash[hash] = mpp;
370103026Ssobomax			++mutex_prof_records;
371103026Ssobomax		}
372103026Ssobomax		/*
373103026Ssobomax		 * Record if the mutex has been held longer now than ever
374103026Ssobomax		 * before
375103026Ssobomax		 */
376103026Ssobomax		if ((now - acqtime) > mpp->counter[MPROF_MAX])
377103026Ssobomax			mpp->counter[MPROF_MAX] = now - acqtime;
378103026Ssobomax		mpp->counter[MPROF_TOT] += now - acqtime;
379103026Ssobomax		mpp->counter[MPROF_CNT] += 1;
380103026Ssobomax		mpp->counter[MPROF_AVG] =
381103026Ssobomax		    mpp->counter[MPROF_TOT] / mpp->counter[MPROF_CNT];
382103026Ssobomaxunlock:
383103026Ssobomax		mtx_unlock_spin(&mprof_mtx);
384103026Ssobomax	}
385103026Ssobomaxout:
386103026Ssobomax#endif
387103026Ssobomax 	WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
388103026Ssobomax	LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file,
389103026Ssobomax	    line);
390103026Ssobomax	_rel_sleep_lock(m, curthread, opts, file, line);
391103026Ssobomax}
392103026Ssobomax
393103026Ssobomaxvoid
394103026Ssobomax_mtx_lock_spin_flags(struct mtx *m, int opts, const char *file, int line)
395103026Ssobomax{
396103026Ssobomax
397103026Ssobomax	MPASS(curthread != NULL);
398103026Ssobomax#if defined(SMP) || LOCK_DEBUG > 0
399103026Ssobomax	_get_spin_lock(m, curthread, opts, file, line);
400103026Ssobomax#else
401103026Ssobomax	critical_enter();
402103026Ssobomax#endif
403103026Ssobomax	LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file,
404103026Ssobomax	    line);
405103026Ssobomax	WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
406103026Ssobomax}
407103026Ssobomax
408103026Ssobomaxvoid
409103026Ssobomax_mtx_unlock_spin_flags(struct mtx *m, int opts, const char *file, int line)
410103026Ssobomax{
411103026Ssobomax
412103026Ssobomax	MPASS(curthread != NULL);
413103026Ssobomax	mtx_assert(m, MA_OWNED);
414103026Ssobomax 	WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
415103026Ssobomax	LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file,
416103026Ssobomax	    line);
417103026Ssobomax#if defined(SMP) || LOCK_DEBUG > 0
418103026Ssobomax	_rel_spin_lock(m);
419103026Ssobomax#else
420103026Ssobomax	critical_exit();
421103026Ssobomax#endif
422103026Ssobomax}
423103026Ssobomax
424103026Ssobomax/*
425103026Ssobomax * The important part of mtx_trylock{,_flags}()
426103026Ssobomax * Tries to acquire lock `m.' We do NOT handle recursion here; we assume that
427103026Ssobomax * if we're called, it's because we know we don't already own this lock.
428103026Ssobomax */
429103026Ssobomaxint
430103026Ssobomax_mtx_trylock(struct mtx *m, int opts, const char *file, int line)
431103026Ssobomax{
432103026Ssobomax	int rval;
433103026Ssobomax
434103026Ssobomax	MPASS(curthread != NULL);
435103026Ssobomax
436103026Ssobomax	rval = _obtain_lock(m, curthread);
437103026Ssobomax
438103026Ssobomax	LOCK_LOG_TRY("LOCK", &m->mtx_object, opts, rval, file, line);
439103026Ssobomax	if (rval) {
440103026Ssobomax		/*
441103026Ssobomax		 * We do not handle recursion in _mtx_trylock; see the
442103026Ssobomax		 * note at the top of the routine.
443103026Ssobomax		 */
444103026Ssobomax		KASSERT(!mtx_recursed(m),
445103026Ssobomax		    ("mtx_trylock() called on a recursed mutex"));
446103026Ssobomax		WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK,
447103026Ssobomax		    file, line);
448103026Ssobomax	}
449103026Ssobomax
450103026Ssobomax	return (rval);
451103026Ssobomax}
452103026Ssobomax
453103026Ssobomax/*
454103026Ssobomax * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
455103026Ssobomax *
456103026Ssobomax * We call this if the lock is either contested (i.e. we need to go to
457103026Ssobomax * sleep waiting for it), or if we need to recurse on it.
458103026Ssobomax */
459103026Ssobomaxvoid
460103026Ssobomax_mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line)
461103026Ssobomax{
462103026Ssobomax	struct thread *td = curthread;
463103026Ssobomax#if defined(SMP) && defined(ADAPTIVE_MUTEXES)
464103026Ssobomax	struct thread *owner;
465103026Ssobomax#endif
466103026Ssobomax
467103026Ssobomax	if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)td) {
468103026Ssobomax		m->mtx_recurse++;
469103026Ssobomax		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
470103026Ssobomax		if (LOCK_LOG_TEST(&m->mtx_object, opts))
471103026Ssobomax			CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
472103026Ssobomax		return;
473103026Ssobomax	}
474103026Ssobomax
475103026Ssobomax	if (LOCK_LOG_TEST(&m->mtx_object, opts))
476103026Ssobomax		CTR4(KTR_LOCK,
477103026Ssobomax		    "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d",
478103026Ssobomax		    m->mtx_object.lo_name, (void *)m->mtx_lock, file, line);
479103026Ssobomax
480103026Ssobomax	while (!_obtain_lock(m, td)) {
481103026Ssobomax		uintptr_t v;
482103026Ssobomax		struct thread *td1;
483103026Ssobomax
484103026Ssobomax		mtx_lock_spin(&sched_lock);
485103026Ssobomax		/*
486103026Ssobomax		 * Check if the lock has been released while spinning for
487103026Ssobomax		 * the sched_lock.
488103026Ssobomax		 */
489103026Ssobomax		if ((v = m->mtx_lock) == MTX_UNOWNED) {
490103026Ssobomax			mtx_unlock_spin(&sched_lock);
491103026Ssobomax#ifdef __i386__
492103026Ssobomax			ia32_pause();
493103026Ssobomax#endif
494103026Ssobomax			continue;
495103026Ssobomax		}
496103026Ssobomax
497103026Ssobomax		/*
498103026Ssobomax		 * The mutex was marked contested on release. This means that
499103026Ssobomax		 * there are threads blocked on it.
500103026Ssobomax		 */
501103026Ssobomax		if (v == MTX_CONTESTED) {
502103026Ssobomax			td1 = TAILQ_FIRST(&m->mtx_blocked);
503103026Ssobomax			MPASS(td1 != NULL);
504103026Ssobomax			m->mtx_lock = (uintptr_t)td | MTX_CONTESTED;
505103026Ssobomax
506103026Ssobomax			if (td1->td_priority < td->td_priority)
507103026Ssobomax				td->td_priority = td1->td_priority;
508103026Ssobomax			mtx_unlock_spin(&sched_lock);
509103026Ssobomax			return;
510103026Ssobomax		}
511103026Ssobomax
512103026Ssobomax		/*
513103026Ssobomax		 * If the mutex isn't already contested and a failure occurs
514103026Ssobomax		 * setting the contested bit, the mutex was either released
515103026Ssobomax		 * or the state of the MTX_RECURSED bit changed.
516103026Ssobomax		 */
517103026Ssobomax		if ((v & MTX_CONTESTED) == 0 &&
518103026Ssobomax		    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
519103026Ssobomax			(void *)(v | MTX_CONTESTED))) {
520103026Ssobomax			mtx_unlock_spin(&sched_lock);
521103026Ssobomax#ifdef __i386__
522103026Ssobomax			ia32_pause();
523103026Ssobomax#endif
524103026Ssobomax			continue;
525103026Ssobomax		}
526103026Ssobomax
527103026Ssobomax#if defined(SMP) && defined(ADAPTIVE_MUTEXES)
528103026Ssobomax		/*
529103026Ssobomax		 * If the current owner of the lock is executing on another
530103026Ssobomax		 * CPU, spin instead of blocking.
531103026Ssobomax		 */
532103026Ssobomax		owner = (struct thread *)(v & MTX_FLAGMASK);
533103026Ssobomax		if (m != &Giant && owner->td_kse != NULL &&
534103026Ssobomax		    owner->td_kse->ke_oncpu != NOCPU) {
535103026Ssobomax			mtx_unlock_spin(&sched_lock);
536103026Ssobomax#ifdef __i386__
537103026Ssobomax			ia32_pause();
538103026Ssobomax#endif
539103026Ssobomax			continue;
540103026Ssobomax		}
541103026Ssobomax#endif	/* SMP && ADAPTIVE_MUTEXES */
542103026Ssobomax
543103026Ssobomax		/*
544103026Ssobomax		 * We definitely must sleep for this lock.
545103026Ssobomax		 */
546103026Ssobomax		mtx_assert(m, MA_NOTOWNED);
547103026Ssobomax
548103026Ssobomax#ifdef notyet
549103026Ssobomax		/*
550103026Ssobomax		 * If we're borrowing an interrupted thread's VM context, we
551103026Ssobomax		 * must clean up before going to sleep.
552103026Ssobomax		 */
553103026Ssobomax		if (td->td_ithd != NULL) {
554103026Ssobomax			struct ithd *it = td->td_ithd;
555103026Ssobomax
556103026Ssobomax			if (it->it_interrupted) {
557103026Ssobomax				if (LOCK_LOG_TEST(&m->mtx_object, opts))
558103026Ssobomax					CTR2(KTR_LOCK,
559103026Ssobomax				    "_mtx_lock_sleep: %p interrupted %p",
560103026Ssobomax					    it, it->it_interrupted);
561103026Ssobomax				intr_thd_fixup(it);
562103026Ssobomax			}
563103026Ssobomax		}
564103026Ssobomax#endif
565103026Ssobomax
566103026Ssobomax		/*
567103026Ssobomax		 * Put us on the list of threads blocked on this mutex.
568103026Ssobomax		 */
569103026Ssobomax		if (TAILQ_EMPTY(&m->mtx_blocked)) {
570103026Ssobomax			td1 = mtx_owner(m);
571103026Ssobomax			LIST_INSERT_HEAD(&td1->td_contested, m, mtx_contested);
572103026Ssobomax			TAILQ_INSERT_TAIL(&m->mtx_blocked, td, td_blkq);
573103026Ssobomax		} else {
574103026Ssobomax			TAILQ_FOREACH(td1, &m->mtx_blocked, td_blkq)
575103026Ssobomax				if (td1->td_priority > td->td_priority)
576103026Ssobomax					break;
577103026Ssobomax			if (td1)
578103026Ssobomax				TAILQ_INSERT_BEFORE(td1, td, td_blkq);
579103026Ssobomax			else
580103026Ssobomax				TAILQ_INSERT_TAIL(&m->mtx_blocked, td, td_blkq);
581103026Ssobomax		}
582103026Ssobomax
583103026Ssobomax		/*
584103026Ssobomax		 * Save who we're blocked on.
585103026Ssobomax		 */
586103026Ssobomax		td->td_blocked = m;
587103026Ssobomax		td->td_mtxname = m->mtx_object.lo_name;
588103026Ssobomax		td->td_proc->p_stat = SMTX;
589103026Ssobomax		propagate_priority(td);
590103026Ssobomax
591103026Ssobomax		if (LOCK_LOG_TEST(&m->mtx_object, opts))
592103026Ssobomax			CTR3(KTR_LOCK,
593103026Ssobomax			    "_mtx_lock_sleep: p %p blocked on [%p] %s", td, m,
594103026Ssobomax			    m->mtx_object.lo_name);
595103026Ssobomax
596103026Ssobomax		td->td_proc->p_stats->p_ru.ru_nvcsw++;
597103026Ssobomax		mi_switch();
598103026Ssobomax
599103026Ssobomax		if (LOCK_LOG_TEST(&m->mtx_object, opts))
600103026Ssobomax			CTR3(KTR_LOCK,
601103026Ssobomax			  "_mtx_lock_sleep: p %p free from blocked on [%p] %s",
602103026Ssobomax			  td, m, m->mtx_object.lo_name);
603103026Ssobomax
604103026Ssobomax		mtx_unlock_spin(&sched_lock);
605103026Ssobomax	}
606103026Ssobomax
607103026Ssobomax	return;
608103026Ssobomax}
609103026Ssobomax
610103026Ssobomax/*
611103026Ssobomax * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock.
612103026Ssobomax *
613103026Ssobomax * This is only called if we need to actually spin for the lock. Recursion
614103026Ssobomax * is handled inline.
615103026Ssobomax */
616103026Ssobomaxvoid
617103026Ssobomax_mtx_lock_spin(struct mtx *m, int opts, const char *file, int line)
618103026Ssobomax{
619103026Ssobomax	int i = 0;
620103026Ssobomax
621103026Ssobomax	if (LOCK_LOG_TEST(&m->mtx_object, opts))
622103026Ssobomax		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
623103026Ssobomax
624103026Ssobomax	for (;;) {
625103026Ssobomax		if (_obtain_lock(m, curthread))
626103026Ssobomax			break;
627103026Ssobomax
628103026Ssobomax		/* Give interrupts a chance while we spin. */
629103026Ssobomax		critical_exit();
630103026Ssobomax		while (m->mtx_lock != MTX_UNOWNED) {
631103026Ssobomax			if (i++ < 10000000) {
632103026Ssobomax#ifdef __i386__
633103026Ssobomax				ia32_pause();
634103026Ssobomax#endif
635103026Ssobomax				continue;
636103026Ssobomax			}
637103026Ssobomax			if (i < 60000000)
638103026Ssobomax				DELAY(1);
639103026Ssobomax#ifdef DDB
640103026Ssobomax			else if (!db_active)
641103026Ssobomax#else
642103026Ssobomax			else
643103026Ssobomax#endif
644103026Ssobomax				panic("spin lock %s held by %p for > 5 seconds",
645103026Ssobomax				    m->mtx_object.lo_name, (void *)m->mtx_lock);
646103026Ssobomax#ifdef __i386__
647103026Ssobomax			ia32_pause();
648103026Ssobomax#endif
649103026Ssobomax		}
650103026Ssobomax		critical_enter();
651103026Ssobomax	}
652103026Ssobomax
653103026Ssobomax	if (LOCK_LOG_TEST(&m->mtx_object, opts))
654103026Ssobomax		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
655103026Ssobomax
656103026Ssobomax	return;
657103026Ssobomax}
658103026Ssobomax
659103026Ssobomax/*
660103026Ssobomax * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
661103026Ssobomax *
662103026Ssobomax * We are only called here if the lock is recursed or contested (i.e. we
663103026Ssobomax * need to wake up a blocked thread).
664103026Ssobomax */
665103026Ssobomaxvoid
666103026Ssobomax_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
667103026Ssobomax{
668103026Ssobomax	struct thread *td, *td1;
669103026Ssobomax	struct mtx *m1;
670103026Ssobomax	int pri;
671103026Ssobomax
672103026Ssobomax	td = curthread;
673103026Ssobomax
674103026Ssobomax	if (mtx_recursed(m)) {
675103026Ssobomax		if (--(m->mtx_recurse) == 0)
676103026Ssobomax			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
677103026Ssobomax		if (LOCK_LOG_TEST(&m->mtx_object, opts))
678103026Ssobomax			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m);
679103026Ssobomax		return;
680103026Ssobomax	}
681103026Ssobomax
682103026Ssobomax	mtx_lock_spin(&sched_lock);
683103026Ssobomax	if (LOCK_LOG_TEST(&m->mtx_object, opts))
684103026Ssobomax		CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
685103026Ssobomax
686103026Ssobomax	td1 = TAILQ_FIRST(&m->mtx_blocked);
687103026Ssobomax#if defined(SMP) && defined(ADAPTIVE_MUTEXES)
688103026Ssobomax	if (td1 == NULL) {
689103026Ssobomax		_release_lock_quick(m);
690103026Ssobomax		if (LOCK_LOG_TEST(&m->mtx_object, opts))
691103026Ssobomax			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p no sleepers", m);
692103026Ssobomax		mtx_unlock_spin(&sched_lock);
693103026Ssobomax		return;
694103026Ssobomax	}
695103026Ssobomax#endif
696103026Ssobomax	MPASS(td->td_proc->p_magic == P_MAGIC);
697103026Ssobomax	MPASS(td1->td_proc->p_magic == P_MAGIC);
698103026Ssobomax
699103026Ssobomax	TAILQ_REMOVE(&m->mtx_blocked, td1, td_blkq);
700103026Ssobomax
701103026Ssobomax	if (TAILQ_EMPTY(&m->mtx_blocked)) {
702103026Ssobomax		LIST_REMOVE(m, mtx_contested);
703103026Ssobomax		_release_lock_quick(m);
704103026Ssobomax		if (LOCK_LOG_TEST(&m->mtx_object, opts))
705103026Ssobomax			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p not held", m);
706103026Ssobomax	} else
707103026Ssobomax		atomic_store_rel_ptr(&m->mtx_lock, (void *)MTX_CONTESTED);
708103026Ssobomax
709103026Ssobomax	pri = PRI_MAX;
710103026Ssobomax	LIST_FOREACH(m1, &td->td_contested, mtx_contested) {
711103026Ssobomax		int cp = TAILQ_FIRST(&m1->mtx_blocked)->td_priority;
712103026Ssobomax		if (cp < pri)
713103026Ssobomax			pri = cp;
714103026Ssobomax	}
715103026Ssobomax
716103026Ssobomax	if (pri > td->td_base_pri)
717103026Ssobomax		pri = td->td_base_pri;
718103026Ssobomax	td->td_priority = pri;
719103026Ssobomax
720103026Ssobomax	if (LOCK_LOG_TEST(&m->mtx_object, opts))
721103026Ssobomax		CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p contested setrunqueue %p",
722103026Ssobomax		    m, td1);
723103026Ssobomax
724103026Ssobomax	td1->td_blocked = NULL;
725103026Ssobomax	td1->td_proc->p_stat = SRUN;
726103026Ssobomax	setrunqueue(td1);
727103026Ssobomax
728103026Ssobomax	if (td->td_critnest == 1 && td1->td_priority < pri) {
729103026Ssobomax#ifdef notyet
730103026Ssobomax		if (td->td_ithd != NULL) {
731103026Ssobomax			struct ithd *it = td->td_ithd;
732103026Ssobomax
733103026Ssobomax			if (it->it_interrupted) {
734103026Ssobomax				if (LOCK_LOG_TEST(&m->mtx_object, opts))
735103026Ssobomax					CTR2(KTR_LOCK,
736103026Ssobomax				    "_mtx_unlock_sleep: %p interrupted %p",
737103026Ssobomax					    it, it->it_interrupted);
738103026Ssobomax				intr_thd_fixup(it);
739103026Ssobomax			}
740103026Ssobomax		}
741103026Ssobomax#endif
742103026Ssobomax		setrunqueue(td);
743103026Ssobomax		if (LOCK_LOG_TEST(&m->mtx_object, opts))
744103026Ssobomax			CTR2(KTR_LOCK,
745103026Ssobomax			    "_mtx_unlock_sleep: %p switching out lock=%p", m,
746103026Ssobomax			    (void *)m->mtx_lock);
747103026Ssobomax
748103026Ssobomax		td->td_proc->p_stats->p_ru.ru_nivcsw++;
749103026Ssobomax		mi_switch();
750103026Ssobomax		if (LOCK_LOG_TEST(&m->mtx_object, opts))
751103026Ssobomax			CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p",
752103026Ssobomax			    m, (void *)m->mtx_lock);
753103026Ssobomax	}
754103026Ssobomax
755103026Ssobomax	mtx_unlock_spin(&sched_lock);
756103026Ssobomax
757103026Ssobomax	return;
758103026Ssobomax}
759103026Ssobomax
760103026Ssobomax/*
761103026Ssobomax * All the unlocking of MTX_SPIN locks is done inline.
762103026Ssobomax * See the _rel_spin_lock() macro for the details.
763103026Ssobomax */
764103026Ssobomax
765103026Ssobomax/*
766103026Ssobomax * The backing function for the INVARIANTS-enabled mtx_assert()
767103026Ssobomax */
768103026Ssobomax#ifdef INVARIANT_SUPPORT
769103026Ssobomaxvoid
770103026Ssobomax_mtx_assert(struct mtx *m, int what, const char *file, int line)
771103026Ssobomax{
772103026Ssobomax
773103026Ssobomax	if (panicstr != NULL)
774103026Ssobomax		return;
775103026Ssobomax	switch (what) {
776103026Ssobomax	case MA_OWNED:
777103026Ssobomax	case MA_OWNED | MA_RECURSED:
778103026Ssobomax	case MA_OWNED | MA_NOTRECURSED:
779103026Ssobomax		if (!mtx_owned(m))
780103026Ssobomax			panic("mutex %s not owned at %s:%d",
781103026Ssobomax			    m->mtx_object.lo_name, file, line);
782		if (mtx_recursed(m)) {
783			if ((what & MA_NOTRECURSED) != 0)
784				panic("mutex %s recursed at %s:%d",
785				    m->mtx_object.lo_name, file, line);
786		} else if ((what & MA_RECURSED) != 0) {
787			panic("mutex %s unrecursed at %s:%d",
788			    m->mtx_object.lo_name, file, line);
789		}
790		break;
791	case MA_NOTOWNED:
792		if (mtx_owned(m))
793			panic("mutex %s owned at %s:%d",
794			    m->mtx_object.lo_name, file, line);
795		break;
796	default:
797		panic("unknown mtx_assert at %s:%d", file, line);
798	}
799}
800#endif
801
802/*
803 * The MUTEX_DEBUG-enabled mtx_validate()
804 *
805 * Most of these checks have been moved off into the LO_INITIALIZED flag
806 * maintained by the witness code.
807 */
808#ifdef MUTEX_DEBUG
809
810void	mtx_validate(struct mtx *);
811
812void
813mtx_validate(struct mtx *m)
814{
815
816/*
817 * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
818 * we can re-enable the kernacc() checks.
819 */
820#ifndef __alpha__
821	/*
822	 * Can't call kernacc() from early init386(), especially when
823	 * initializing Giant mutex, because some stuff in kernacc()
824	 * requires Giant itself.
825	 */
826	if (!cold)
827		if (!kernacc((caddr_t)m, sizeof(m),
828		    VM_PROT_READ | VM_PROT_WRITE))
829			panic("Can't read and write to mutex %p", m);
830#endif
831}
832#endif
833
834/*
835 * General init routine used by the MTX_SYSINIT() macro.
836 */
837void
838mtx_sysinit(void *arg)
839{
840	struct mtx_args *margs = arg;
841
842	mtx_init(margs->ma_mtx, margs->ma_desc, NULL, margs->ma_opts);
843}
844
845/*
846 * Mutex initialization routine; initialize lock `m' of type contained in
847 * `opts' with options contained in `opts' and name `name.'  The optional
848 * lock type `type' is used as a general lock category name for use with
849 * witness.
850 */
851void
852mtx_init(struct mtx *m, const char *name, const char *type, int opts)
853{
854	struct lock_object *lock;
855
856	MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE |
857	    MTX_SLEEPABLE | MTX_NOWITNESS | MTX_DUPOK)) == 0);
858
859#ifdef MUTEX_DEBUG
860	/* Diagnostic and error correction */
861	mtx_validate(m);
862#endif
863
864	lock = &m->mtx_object;
865	KASSERT((lock->lo_flags & LO_INITIALIZED) == 0,
866	    ("mutex %s %p already initialized", name, m));
867	bzero(m, sizeof(*m));
868	if (opts & MTX_SPIN)
869		lock->lo_class = &lock_class_mtx_spin;
870	else
871		lock->lo_class = &lock_class_mtx_sleep;
872	lock->lo_name = name;
873	lock->lo_type = type != NULL ? type : name;
874	if (opts & MTX_QUIET)
875		lock->lo_flags = LO_QUIET;
876	if (opts & MTX_RECURSE)
877		lock->lo_flags |= LO_RECURSABLE;
878	if (opts & MTX_SLEEPABLE)
879		lock->lo_flags |= LO_SLEEPABLE;
880	if ((opts & MTX_NOWITNESS) == 0)
881		lock->lo_flags |= LO_WITNESS;
882	if (opts & MTX_DUPOK)
883		lock->lo_flags |= LO_DUPOK;
884
885	m->mtx_lock = MTX_UNOWNED;
886	TAILQ_INIT(&m->mtx_blocked);
887
888	LOCK_LOG_INIT(lock, opts);
889
890	WITNESS_INIT(lock);
891}
892
893/*
894 * Remove lock `m' from all_mtx queue.  We don't allow MTX_QUIET to be
895 * passed in as a flag here because if the corresponding mtx_init() was
896 * called with MTX_QUIET set, then it will already be set in the mutex's
897 * flags.
898 */
899void
900mtx_destroy(struct mtx *m)
901{
902
903	LOCK_LOG_DESTROY(&m->mtx_object, 0);
904
905	if (!mtx_owned(m))
906		MPASS(mtx_unowned(m));
907	else {
908		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
909
910		/* Tell witness this isn't locked to make it happy. */
911		WITNESS_UNLOCK(&m->mtx_object, LOP_EXCLUSIVE, __FILE__,
912		    __LINE__);
913	}
914
915	WITNESS_DESTROY(&m->mtx_object);
916}
917
918/*
919 * Intialize the mutex code and system mutexes.  This is called from the MD
920 * startup code prior to mi_startup().  The per-CPU data space needs to be
921 * setup before this is called.
922 */
923void
924mutex_init(void)
925{
926
927	/* Setup thread0 so that mutexes work. */
928	LIST_INIT(&thread0.td_contested);
929
930	/*
931	 * Initialize mutexes.
932	 */
933	mtx_init(&Giant, "Giant", NULL, MTX_DEF | MTX_RECURSE);
934	mtx_init(&sched_lock, "sched lock", NULL, MTX_SPIN | MTX_RECURSE);
935	mtx_init(&proc0.p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK);
936	mtx_lock(&Giant);
937}
938
939/*
940 * Encapsulated Giant mutex routines.  These routines provide encapsulation
941 * control for the Giant mutex, allowing sysctls to be used to turn on and
942 * off Giant around certain subsystems.  The default value for the sysctls
943 * are set to what developers believe is stable and working in regards to
944 * the Giant pushdown.  Developers should not turn off Giant via these
945 * sysctls unless they know what they are doing.
946 *
947 * Callers of mtx_lock_giant() are expected to pass the return value to an
948 * accompanying mtx_unlock_giant() later on.  If multiple subsystems are
949 * effected by a Giant wrap, all related sysctl variables must be zero for
950 * the subsystem call to operate without Giant (as determined by the caller).
951 */
952
953SYSCTL_NODE(_kern, OID_AUTO, giant, CTLFLAG_RD, NULL, "Giant mutex manipulation");
954
955static int kern_giant_all = 0;
956SYSCTL_INT(_kern_giant, OID_AUTO, all, CTLFLAG_RW, &kern_giant_all, 0, "");
957
958int kern_giant_proc = 1;	/* Giant around PROC locks */
959int kern_giant_file = 1;	/* Giant around struct file & filedesc */
960int kern_giant_ucred = 1;	/* Giant around ucred */
961SYSCTL_INT(_kern_giant, OID_AUTO, proc, CTLFLAG_RW, &kern_giant_proc, 0, "");
962SYSCTL_INT(_kern_giant, OID_AUTO, file, CTLFLAG_RW, &kern_giant_file, 0, "");
963SYSCTL_INT(_kern_giant, OID_AUTO, ucred, CTLFLAG_RW, &kern_giant_ucred, 0, "");
964
965int
966mtx_lock_giant(int sysctlvar)
967{
968	if (sysctlvar || kern_giant_all) {
969		mtx_lock(&Giant);
970		return(1);
971	}
972	return(0);
973}
974
975void
976mtx_unlock_giant(int s)
977{
978	if (s)
979		mtx_unlock(&Giant);
980}
981
982