subr_turnstile.c revision 111879
165557Sjasone/*-
265557Sjasone * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
365557Sjasone *
465557Sjasone * Redistribution and use in source and binary forms, with or without
565557Sjasone * modification, are permitted provided that the following conditions
665557Sjasone * are met:
765557Sjasone * 1. Redistributions of source code must retain the above copyright
865557Sjasone *    notice, this list of conditions and the following disclaimer.
965557Sjasone * 2. Redistributions in binary form must reproduce the above copyright
1065557Sjasone *    notice, this list of conditions and the following disclaimer in the
1165557Sjasone *    documentation and/or other materials provided with the distribution.
1265557Sjasone * 3. Berkeley Software Design Inc's name may not be used to endorse or
1365557Sjasone *    promote products derived from this software without specific prior
1465557Sjasone *    written permission.
1565557Sjasone *
1665557Sjasone * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
1765557Sjasone * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1865557Sjasone * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1965557Sjasone * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
2065557Sjasone * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2165557Sjasone * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2265557Sjasone * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2365557Sjasone * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2465557Sjasone * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2565557Sjasone * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2665557Sjasone * SUCH DAMAGE.
2765557Sjasone *
2865557Sjasone *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
2967352Sjhb *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
3065557Sjasone * $FreeBSD: head/sys/kern/subr_turnstile.c 111879 2003-03-04 20:30:30Z jhb $
3165557Sjasone */
3265557Sjasone
3365557Sjasone/*
3486411Sjhb * Machine independent bits of mutex implementation.
3572200Sbmilekic */
3672200Sbmilekic
3797081Sjhb#include "opt_adaptive_mutexes.h"
3868790Sjhb#include "opt_ddb.h"
3967676Sjhb
4065557Sjasone#include <sys/param.h>
4193609Sdes#include <sys/systm.h>
4267352Sjhb#include <sys/bus.h>
4367352Sjhb#include <sys/kernel.h>
4493609Sdes#include <sys/ktr.h>
4576166Smarkm#include <sys/lock.h>
4667352Sjhb#include <sys/malloc.h>
4774912Sjhb#include <sys/mutex.h>
4865557Sjasone#include <sys/proc.h>
4978766Sjhb#include <sys/resourcevar.h>
50104964Sjeff#include <sys/sched.h>
5193609Sdes#include <sys/sbuf.h>
5297156Sdes#include <sys/stdint.h>
5367676Sjhb#include <sys/sysctl.h>
5467352Sjhb#include <sys/vmmeter.h>
5565557Sjasone
5667352Sjhb#include <machine/atomic.h>
5767352Sjhb#include <machine/bus.h>
5867352Sjhb#include <machine/clock.h>
5965557Sjasone#include <machine/cpu.h>
6067352Sjhb
6168790Sjhb#include <ddb/ddb.h>
6268790Sjhb
6367352Sjhb#include <vm/vm.h>
6467352Sjhb#include <vm/vm_extern.h>
6567352Sjhb
6665557Sjasone/*
6772200Sbmilekic * Internal utility macros.
6871352Sjasone */
6972200Sbmilekic#define mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
7071352Sjasone
7172200Sbmilekic#define mtx_owner(m)	(mtx_unowned((m)) ? NULL \
7283366Sjulian	: (struct thread *)((m)->mtx_lock & MTX_FLAGMASK))
7371352Sjasone
7497839Sjhb/* XXXKSE This test will change. */
7597839Sjhb#define	thread_running(td)						\
7697836Sjhb	((td)->td_kse != NULL && (td)->td_kse->ke_oncpu != NOCPU)
77105782Sdes
7871352Sjasone/*
7974912Sjhb * Lock classes for sleep and spin mutexes.
8071352Sjasone */
8174912Sjhbstruct lock_class lock_class_mtx_sleep = {
8274912Sjhb	"sleep mutex",
8374912Sjhb	LC_SLEEPLOCK | LC_RECURSABLE
8474912Sjhb};
8574912Sjhbstruct lock_class lock_class_mtx_spin = {
8674912Sjhb	"spin mutex",
8774912Sjhb	LC_SPINLOCK | LC_RECURSABLE
8874912Sjhb};
8971352Sjasone
9071352Sjasone/*
9193702Sjhb * System-wide mutexes
9293702Sjhb */
9393702Sjhbstruct mtx sched_lock;
9493702Sjhbstruct mtx Giant;
9593702Sjhb
9693702Sjhb/*
9772200Sbmilekic * Prototypes for non-exported routines.
9872200Sbmilekic */
9983366Sjulianstatic void	propagate_priority(struct thread *);
10067352Sjhb
10167352Sjhbstatic void
10283366Sjulianpropagate_priority(struct thread *td)
10367352Sjhb{
10490538Sjulian	int pri = td->td_priority;
10583366Sjulian	struct mtx *m = td->td_blocked;
10667352Sjhb
10769376Sjhb	mtx_assert(&sched_lock, MA_OWNED);
10867352Sjhb	for (;;) {
10983366Sjulian		struct thread *td1;
11067352Sjhb
11183366Sjulian		td = mtx_owner(m);
11267352Sjhb
11383366Sjulian		if (td == NULL) {
11467352Sjhb			/*
11567352Sjhb			 * This really isn't quite right. Really
11683366Sjulian			 * ought to bump priority of thread that
11767352Sjhb			 * next acquires the mutex.
11867352Sjhb			 */
11967352Sjhb			MPASS(m->mtx_lock == MTX_CONTESTED);
12067352Sjhb			return;
12167352Sjhb		}
12272200Sbmilekic
12399072Sjulian		MPASS(td->td_proc != NULL);
12483366Sjulian		MPASS(td->td_proc->p_magic == P_MAGIC);
125103216Sjulian		KASSERT(!TD_IS_SLEEPING(td), ("sleeping thread owns a mutex"));
12690538Sjulian		if (td->td_priority <= pri) /* lower is higher priority */
12767352Sjhb			return;
12869376Sjhb
12969376Sjhb
13069376Sjhb		/*
13167352Sjhb		 * If lock holder is actually running, just bump priority.
13267352Sjhb		 */
133103216Sjulian		if (TD_IS_RUNNING(td)) {
13499072Sjulian			td->td_priority = pri;
13567352Sjhb			return;
13667352Sjhb		}
13772376Sjake
13873912Sjhb#ifndef SMP
13967352Sjhb		/*
14083366Sjulian		 * For UP, we check to see if td is curthread (this shouldn't
14173912Sjhb		 * ever happen however as it would mean we are in a deadlock.)
14273912Sjhb		 */
14383366Sjulian		KASSERT(td != curthread, ("Deadlock detected"));
14473912Sjhb#endif
14573912Sjhb
14673912Sjhb		/*
14783366Sjulian		 * If on run queue move to new run queue, and quit.
14883366Sjulian		 * XXXKSE this gets a lot more complicated under threads
14983366Sjulian		 * but try anyhow.
15067352Sjhb		 */
151103216Sjulian		if (TD_ON_RUNQ(td)) {
15283366Sjulian			MPASS(td->td_blocked == NULL);
153104964Sjeff			sched_prio(td, pri);
15467352Sjhb			return;
15567352Sjhb		}
15699072Sjulian		/*
15799072Sjulian		 * Adjust for any other cases.
15899072Sjulian		 */
15999072Sjulian		td->td_priority = pri;
16067352Sjhb
16167352Sjhb		/*
16269376Sjhb		 * If we aren't blocked on a mutex, we should be.
16367352Sjhb		 */
164104387Sjhb		KASSERT(TD_ON_LOCK(td), (
16569376Sjhb		    "process %d(%s):%d holds %s but isn't blocked on a mutex\n",
16699072Sjulian		    td->td_proc->p_pid, td->td_proc->p_comm, td->td_state,
16774912Sjhb		    m->mtx_object.lo_name));
16867352Sjhb
16967352Sjhb		/*
17083366Sjulian		 * Pick up the mutex that td is blocked on.
17167352Sjhb		 */
17283366Sjulian		m = td->td_blocked;
17367352Sjhb		MPASS(m != NULL);
17467352Sjhb
17567352Sjhb		/*
17683366Sjulian		 * Check if the thread needs to be moved up on
17767352Sjhb		 * the blocked chain
17867352Sjhb		 */
17983366Sjulian		if (td == TAILQ_FIRST(&m->mtx_blocked)) {
18069376Sjhb			continue;
18169376Sjhb		}
18272200Sbmilekic
183104387Sjhb		td1 = TAILQ_PREV(td, threadqueue, td_lockq);
18490538Sjulian		if (td1->td_priority <= pri) {
18567352Sjhb			continue;
18667352Sjhb		}
18767352Sjhb
18867352Sjhb		/*
18983366Sjulian		 * Remove thread from blocked chain and determine where
19083366Sjulian		 * it should be moved up to.  Since we know that td1 has
19183366Sjulian		 * a lower priority than td, we know that at least one
19283366Sjulian		 * thread in the chain has a lower priority and that
19383366Sjulian		 * td1 will thus not be NULL after the loop.
19467352Sjhb		 */
195104387Sjhb		TAILQ_REMOVE(&m->mtx_blocked, td, td_lockq);
196104387Sjhb		TAILQ_FOREACH(td1, &m->mtx_blocked, td_lockq) {
19783366Sjulian			MPASS(td1->td_proc->p_magic == P_MAGIC);
19890538Sjulian			if (td1->td_priority > pri)
19967352Sjhb				break;
20067352Sjhb		}
20172200Sbmilekic
20283366Sjulian		MPASS(td1 != NULL);
203104387Sjhb		TAILQ_INSERT_BEFORE(td1, td, td_lockq);
20467352Sjhb		CTR4(KTR_LOCK,
20571560Sjhb		    "propagate_priority: p %p moved before %p on [%p] %s",
20683366Sjulian		    td, td1, m, m->mtx_object.lo_name);
20767352Sjhb	}
20867352Sjhb}
20967352Sjhb
21093609Sdes#ifdef MUTEX_PROFILING
21193609SdesSYSCTL_NODE(_debug, OID_AUTO, mutex, CTLFLAG_RD, NULL, "mutex debugging");
21293609SdesSYSCTL_NODE(_debug_mutex, OID_AUTO, prof, CTLFLAG_RD, NULL, "mutex profiling");
21393609Sdesstatic int mutex_prof_enable = 0;
21493609SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, enable, CTLFLAG_RW,
21593609Sdes    &mutex_prof_enable, 0, "Enable tracing of mutex holdtime");
21693609Sdes
21793609Sdesstruct mutex_prof {
218105644Sdes	const char	*name;
219105644Sdes	const char	*file;
220105644Sdes	int		line;
221109654Sdes	uintmax_t	cnt_max;
222109654Sdes	uintmax_t	cnt_tot;
223109654Sdes	uintmax_t	cnt_cur;
22493705Sdes	struct mutex_prof *next;
22593609Sdes};
22693609Sdes
22771352Sjasone/*
22893609Sdes * mprof_buf is a static pool of profiling records to avoid possible
22993609Sdes * reentrance of the memory allocation functions.
23093609Sdes *
23193609Sdes * Note: NUM_MPROF_BUFFERS must be smaller than MPROF_HASH_SIZE.
23293609Sdes */
233105644Sdes#define	NUM_MPROF_BUFFERS	1000
23493609Sdesstatic struct mutex_prof mprof_buf[NUM_MPROF_BUFFERS];
23593609Sdesstatic int first_free_mprof_buf;
236105644Sdes#define	MPROF_HASH_SIZE		1009
23793609Sdesstatic struct mutex_prof *mprof_hash[MPROF_HASH_SIZE];
238111508Smtm/* SWAG: sbuf size = avg stat. line size * number of locks */
239111508Smtm#define MPROF_SBUF_SIZE		256 * 400
24093609Sdes
24193609Sdesstatic int mutex_prof_acquisitions;
24293609SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, acquisitions, CTLFLAG_RD,
24393609Sdes    &mutex_prof_acquisitions, 0, "Number of mutex acquistions recorded");
24493609Sdesstatic int mutex_prof_records;
24593609SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, records, CTLFLAG_RD,
24693609Sdes    &mutex_prof_records, 0, "Number of profiling records");
24793609Sdesstatic int mutex_prof_maxrecords = NUM_MPROF_BUFFERS;
24893609SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, maxrecords, CTLFLAG_RD,
24993609Sdes    &mutex_prof_maxrecords, 0, "Maximum number of profiling records");
25093609Sdesstatic int mutex_prof_rejected;
25193609SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, rejected, CTLFLAG_RD,
25293609Sdes    &mutex_prof_rejected, 0, "Number of rejected profiling records");
25393609Sdesstatic int mutex_prof_hashsize = MPROF_HASH_SIZE;
25493609SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, hashsize, CTLFLAG_RD,
25593609Sdes    &mutex_prof_hashsize, 0, "Hash size");
25693609Sdesstatic int mutex_prof_collisions = 0;
25793609SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, collisions, CTLFLAG_RD,
25893609Sdes    &mutex_prof_collisions, 0, "Number of hash collisions");
25993609Sdes
26093609Sdes/*
26193609Sdes * mprof_mtx protects the profiling buffers and the hash.
26293609Sdes */
26393609Sdesstatic struct mtx mprof_mtx;
26493705SdesMTX_SYSINIT(mprof, &mprof_mtx, "mutex profiling lock", MTX_SPIN | MTX_QUIET);
26593609Sdes
26693667Sdesstatic u_int64_t
26793667Sdesnanoseconds(void)
26893667Sdes{
26993667Sdes	struct timespec tv;
27093667Sdes
27193667Sdes	nanotime(&tv);
27293667Sdes	return (tv.tv_sec * (u_int64_t)1000000000 + tv.tv_nsec);
27393667Sdes}
27493667Sdes
27593609Sdesstatic int
27693609Sdesdump_mutex_prof_stats(SYSCTL_HANDLER_ARGS)
27793609Sdes{
27893609Sdes	struct sbuf *sb;
27993609Sdes	int error, i;
280111508Smtm	static int multiplier = 1;
28193609Sdes
28293609Sdes	if (first_free_mprof_buf == 0)
283105644Sdes		return (SYSCTL_OUT(req, "No locking recorded",
284105644Sdes		    sizeof("No locking recorded")));
28593609Sdes
286111508Smtmretry_sbufops:
287111508Smtm	sb = sbuf_new(NULL, NULL, MPROF_SBUF_SIZE * multiplier, SBUF_FIXEDLEN);
288105644Sdes	sbuf_printf(sb, "%6s %12s %11s %5s %s\n",
289105644Sdes	    "max", "total", "count", "avg", "name");
290105644Sdes	/*
291105644Sdes	 * XXX this spinlock seems to be by far the largest perpetrator
292105644Sdes	 * of spinlock latency (1.6 msec on an Athlon1600 was recorded
293105644Sdes	 * even before I pessimized it further by moving the average
294105644Sdes	 * computation here).
295105644Sdes	 */
29693609Sdes	mtx_lock_spin(&mprof_mtx);
297111508Smtm	for (i = 0; i < first_free_mprof_buf; ++i) {
298105644Sdes		sbuf_printf(sb, "%6ju %12ju %11ju %5ju %s:%d (%s)\n",
299109654Sdes		    mprof_buf[i].cnt_max / 1000,
300109654Sdes		    mprof_buf[i].cnt_tot / 1000,
301109654Sdes		    mprof_buf[i].cnt_cur,
302109654Sdes		    mprof_buf[i].cnt_cur == 0 ? (uintmax_t)0 :
303109654Sdes			mprof_buf[i].cnt_tot / (mprof_buf[i].cnt_cur * 1000),
30493609Sdes		    mprof_buf[i].file, mprof_buf[i].line, mprof_buf[i].name);
305111508Smtm		if (sbuf_overflowed(sb)) {
306111508Smtm			mtx_unlock_spin(&mprof_mtx);
307111508Smtm			sbuf_delete(sb);
308111508Smtm			multiplier++;
309111508Smtm			goto retry_sbufops;
310111508Smtm		}
311111508Smtm	}
31293609Sdes	mtx_unlock_spin(&mprof_mtx);
31393609Sdes	sbuf_finish(sb);
31493609Sdes	error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
31593609Sdes	sbuf_delete(sb);
31693609Sdes	return (error);
31793609Sdes}
318105644SdesSYSCTL_PROC(_debug_mutex_prof, OID_AUTO, stats, CTLTYPE_STRING | CTLFLAG_RD,
31993609Sdes    NULL, 0, dump_mutex_prof_stats, "A", "Mutex profiling statistics");
32093609Sdes#endif
32193609Sdes
32293609Sdes/*
32374900Sjhb * Function versions of the inlined __mtx_* macros.  These are used by
32474900Sjhb * modules and can also be called from assembly language if needed.
32574900Sjhb */
32674900Sjhbvoid
32774900Sjhb_mtx_lock_flags(struct mtx *m, int opts, const char *file, int line)
32874900Sjhb{
32974900Sjhb
33083841Sjhb	MPASS(curthread != NULL);
331102907Sjhb	KASSERT(m->mtx_object.lo_class == &lock_class_mtx_sleep,
332102907Sjhb	    ("mtx_lock() of spin mutex %s @ %s:%d", m->mtx_object.lo_name,
333102907Sjhb	    file, line));
33483841Sjhb	_get_sleep_lock(m, curthread, opts, file, line);
33583841Sjhb	LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file,
33683841Sjhb	    line);
33783841Sjhb	WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
33893609Sdes#ifdef MUTEX_PROFILING
33993609Sdes	/* don't reset the timer when/if recursing */
34099324Sdes	if (m->mtx_acqtime == 0) {
34199324Sdes		m->mtx_filename = file;
34299324Sdes		m->mtx_lineno = line;
34399324Sdes		m->mtx_acqtime = mutex_prof_enable ? nanoseconds() : 0;
34493609Sdes		++mutex_prof_acquisitions;
34593609Sdes	}
34693609Sdes#endif
34774900Sjhb}
34874900Sjhb
34974900Sjhbvoid
35074900Sjhb_mtx_unlock_flags(struct mtx *m, int opts, const char *file, int line)
35174900Sjhb{
35274900Sjhb
35383841Sjhb	MPASS(curthread != NULL);
354102907Sjhb	KASSERT(m->mtx_object.lo_class == &lock_class_mtx_sleep,
355102907Sjhb	    ("mtx_unlock() of spin mutex %s @ %s:%d", m->mtx_object.lo_name,
356102907Sjhb	    file, line));
357105782Sdes	WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
358102907Sjhb	LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file,
359102907Sjhb	    line);
36083947Sjhb	mtx_assert(m, MA_OWNED);
36193609Sdes#ifdef MUTEX_PROFILING
36299324Sdes	if (m->mtx_acqtime != 0) {
36393609Sdes		static const char *unknown = "(unknown)";
36493609Sdes		struct mutex_prof *mpp;
36593667Sdes		u_int64_t acqtime, now;
36693609Sdes		const char *p, *q;
36793705Sdes		volatile u_int hash;
36893609Sdes
36993667Sdes		now = nanoseconds();
37099324Sdes		acqtime = m->mtx_acqtime;
37199324Sdes		m->mtx_acqtime = 0;
37293667Sdes		if (now <= acqtime)
37393609Sdes			goto out;
374111508Smtm		for (p = m->mtx_filename;
375111508Smtm		    p != NULL && strncmp(p, "../", 3) == 0; p += 3)
37693609Sdes			/* nothing */ ;
37793609Sdes		if (p == NULL || *p == '\0')
37893609Sdes			p = unknown;
37999324Sdes		for (hash = m->mtx_lineno, q = p; *q != '\0'; ++q)
38093609Sdes			hash = (hash * 2 + *q) % MPROF_HASH_SIZE;
38193609Sdes		mtx_lock_spin(&mprof_mtx);
38293705Sdes		for (mpp = mprof_hash[hash]; mpp != NULL; mpp = mpp->next)
38399324Sdes			if (mpp->line == m->mtx_lineno &&
38499324Sdes			    strcmp(mpp->file, p) == 0)
38593609Sdes				break;
38693609Sdes		if (mpp == NULL) {
38793609Sdes			/* Just exit if we cannot get a trace buffer */
38893609Sdes			if (first_free_mprof_buf >= NUM_MPROF_BUFFERS) {
38993609Sdes				++mutex_prof_rejected;
39093609Sdes				goto unlock;
39193609Sdes			}
39293609Sdes			mpp = &mprof_buf[first_free_mprof_buf++];
39393609Sdes			mpp->name = mtx_name(m);
39493609Sdes			mpp->file = p;
39599324Sdes			mpp->line = m->mtx_lineno;
39693705Sdes			mpp->next = mprof_hash[hash];
39793705Sdes			if (mprof_hash[hash] != NULL)
39893705Sdes				++mutex_prof_collisions;
399105782Sdes			mprof_hash[hash] = mpp;
40093609Sdes			++mutex_prof_records;
40193609Sdes		}
40293609Sdes		/*
40393609Sdes		 * Record if the mutex has been held longer now than ever
404105644Sdes		 * before.
40593609Sdes		 */
406109654Sdes		if (now - acqtime > mpp->cnt_max)
407109654Sdes			mpp->cnt_max = now - acqtime;
408109654Sdes		mpp->cnt_tot += now - acqtime;
409109654Sdes		mpp->cnt_cur++;
41093609Sdesunlock:
41193609Sdes		mtx_unlock_spin(&mprof_mtx);
41293609Sdes	}
41393609Sdesout:
41493609Sdes#endif
41583841Sjhb	_rel_sleep_lock(m, curthread, opts, file, line);
41674900Sjhb}
41774900Sjhb
41874900Sjhbvoid
41974900Sjhb_mtx_lock_spin_flags(struct mtx *m, int opts, const char *file, int line)
42074900Sjhb{
42174900Sjhb
42283841Sjhb	MPASS(curthread != NULL);
423102907Sjhb	KASSERT(m->mtx_object.lo_class == &lock_class_mtx_spin,
424102907Sjhb	    ("mtx_lock_spin() of sleep mutex %s @ %s:%d",
425102907Sjhb	    m->mtx_object.lo_name, file, line));
426100754Sjhb#if defined(SMP) || LOCK_DEBUG > 0 || 1
42783841Sjhb	_get_spin_lock(m, curthread, opts, file, line);
42897079Sjhb#else
42997079Sjhb	critical_enter();
43097079Sjhb#endif
43183841Sjhb	LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file,
43283841Sjhb	    line);
43383841Sjhb	WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
43474900Sjhb}
43574900Sjhb
43674900Sjhbvoid
43774900Sjhb_mtx_unlock_spin_flags(struct mtx *m, int opts, const char *file, int line)
43874900Sjhb{
43974900Sjhb
44083841Sjhb	MPASS(curthread != NULL);
441102907Sjhb	KASSERT(m->mtx_object.lo_class == &lock_class_mtx_spin,
442102907Sjhb	    ("mtx_unlock_spin() of sleep mutex %s @ %s:%d",
443102907Sjhb	    m->mtx_object.lo_name, file, line));
444105782Sdes	WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
44583841Sjhb	LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file,
44683841Sjhb	    line);
447102907Sjhb	mtx_assert(m, MA_OWNED);
448100754Sjhb#if defined(SMP) || LOCK_DEBUG > 0 || 1
44983841Sjhb	_rel_spin_lock(m);
45097079Sjhb#else
45197079Sjhb	critical_exit();
45297079Sjhb#endif
45374900Sjhb}
45474900Sjhb
45574900Sjhb/*
45672200Sbmilekic * The important part of mtx_trylock{,_flags}()
45772200Sbmilekic * Tries to acquire lock `m.' We do NOT handle recursion here; we assume that
45872200Sbmilekic * if we're called, it's because we know we don't already own this lock.
45971352Sjasone */
46072200Sbmilekicint
46172200Sbmilekic_mtx_trylock(struct mtx *m, int opts, const char *file, int line)
46271352Sjasone{
46372200Sbmilekic	int rval;
46471352Sjasone
46583366Sjulian	MPASS(curthread != NULL);
46671352Sjasone
467111879Sjhb	KASSERT(!mtx_owned(m),
468111879Sjhb	    ("mtx_trylock() called on a mutex already owned"));
469111879Sjhb
47083366Sjulian	rval = _obtain_lock(m, curthread);
47172200Sbmilekic
47274912Sjhb	LOCK_LOG_TRY("LOCK", &m->mtx_object, opts, rval, file, line);
473111879Sjhb	if (rval)
47476272Sjhb		WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK,
47576272Sjhb		    file, line);
47671352Sjasone
47774912Sjhb	return (rval);
47871352Sjasone}
47971352Sjasone
48071352Sjasone/*
48172200Sbmilekic * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
48271352Sjasone *
48372200Sbmilekic * We call this if the lock is either contested (i.e. we need to go to
48472200Sbmilekic * sleep waiting for it), or if we need to recurse on it.
48571352Sjasone */
48672200Sbmilekicvoid
48772200Sbmilekic_mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line)
48871352Sjasone{
48983366Sjulian	struct thread *td = curthread;
49097081Sjhb#if defined(SMP) && defined(ADAPTIVE_MUTEXES)
49197081Sjhb	struct thread *owner;
49297081Sjhb#endif
493102450Siedowse#ifdef KTR
494102450Siedowse	int cont_logged = 0;
495102450Siedowse#endif
49671352Sjasone
49783366Sjulian	if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)td) {
49872200Sbmilekic		m->mtx_recurse++;
49972200Sbmilekic		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
50074912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
50172344Sbmilekic			CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
50272200Sbmilekic		return;
50371352Sjasone	}
50471352Sjasone
50574912Sjhb	if (LOCK_LOG_TEST(&m->mtx_object, opts))
50672994Sjhb		CTR4(KTR_LOCK,
50772994Sjhb		    "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d",
50874912Sjhb		    m->mtx_object.lo_name, (void *)m->mtx_lock, file, line);
50971352Sjasone
51083366Sjulian	while (!_obtain_lock(m, td)) {
51172200Sbmilekic		uintptr_t v;
51283366Sjulian		struct thread *td1;
51371352Sjasone
51472200Sbmilekic		mtx_lock_spin(&sched_lock);
51572200Sbmilekic		/*
51672200Sbmilekic		 * Check if the lock has been released while spinning for
51772200Sbmilekic		 * the sched_lock.
51872200Sbmilekic		 */
51972200Sbmilekic		if ((v = m->mtx_lock) == MTX_UNOWNED) {
52072200Sbmilekic			mtx_unlock_spin(&sched_lock);
52197086Sjhb#ifdef __i386__
52297139Sjhb			ia32_pause();
52397086Sjhb#endif
52472200Sbmilekic			continue;
52571352Sjasone		}
52671352Sjasone
52772200Sbmilekic		/*
52872200Sbmilekic		 * The mutex was marked contested on release. This means that
52983366Sjulian		 * there are threads blocked on it.
53072200Sbmilekic		 */
53172200Sbmilekic		if (v == MTX_CONTESTED) {
53283366Sjulian			td1 = TAILQ_FIRST(&m->mtx_blocked);
53383366Sjulian			MPASS(td1 != NULL);
53483366Sjulian			m->mtx_lock = (uintptr_t)td | MTX_CONTESTED;
53567352Sjhb
53690538Sjulian			if (td1->td_priority < td->td_priority)
537105782Sdes				td->td_priority = td1->td_priority;
53872200Sbmilekic			mtx_unlock_spin(&sched_lock);
53967352Sjhb			return;
54067352Sjhb		}
54169376Sjhb
54269376Sjhb		/*
54372200Sbmilekic		 * If the mutex isn't already contested and a failure occurs
54472200Sbmilekic		 * setting the contested bit, the mutex was either released
54572200Sbmilekic		 * or the state of the MTX_RECURSED bit changed.
54669376Sjhb		 */
54772200Sbmilekic		if ((v & MTX_CONTESTED) == 0 &&
54872200Sbmilekic		    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
54972200Sbmilekic			(void *)(v | MTX_CONTESTED))) {
55072200Sbmilekic			mtx_unlock_spin(&sched_lock);
55197086Sjhb#ifdef __i386__
55297139Sjhb			ia32_pause();
55397086Sjhb#endif
55472200Sbmilekic			continue;
55572200Sbmilekic		}
55667352Sjhb
55797081Sjhb#if defined(SMP) && defined(ADAPTIVE_MUTEXES)
55872200Sbmilekic		/*
55997081Sjhb		 * If the current owner of the lock is executing on another
56097081Sjhb		 * CPU, spin instead of blocking.
56197081Sjhb		 */
56297081Sjhb		owner = (struct thread *)(v & MTX_FLAGMASK);
56397839Sjhb		if (m != &Giant && thread_running(owner)) {
56497081Sjhb			mtx_unlock_spin(&sched_lock);
56597839Sjhb			while (mtx_owner(m) == owner && thread_running(owner)) {
56697086Sjhb#ifdef __i386__
56797837Sjhb				ia32_pause();
56897086Sjhb#endif
56997837Sjhb			}
57097081Sjhb			continue;
57197081Sjhb		}
57297081Sjhb#endif	/* SMP && ADAPTIVE_MUTEXES */
57397081Sjhb
57497081Sjhb		/*
57593692Sjhb		 * We definitely must sleep for this lock.
57672200Sbmilekic		 */
57772200Sbmilekic		mtx_assert(m, MA_NOTOWNED);
57867352Sjhb
57967352Sjhb#ifdef notyet
58072200Sbmilekic		/*
58172200Sbmilekic		 * If we're borrowing an interrupted thread's VM context, we
58272200Sbmilekic		 * must clean up before going to sleep.
58372200Sbmilekic		 */
58483366Sjulian		if (td->td_ithd != NULL) {
58583366Sjulian			struct ithd *it = td->td_ithd;
58667352Sjhb
58772200Sbmilekic			if (it->it_interrupted) {
58874912Sjhb				if (LOCK_LOG_TEST(&m->mtx_object, opts))
58972200Sbmilekic					CTR2(KTR_LOCK,
59072994Sjhb				    "_mtx_lock_sleep: %p interrupted %p",
59172200Sbmilekic					    it, it->it_interrupted);
59272200Sbmilekic				intr_thd_fixup(it);
59367352Sjhb			}
59472200Sbmilekic		}
59567352Sjhb#endif
59667352Sjhb
59772200Sbmilekic		/*
59872200Sbmilekic		 * Put us on the list of threads blocked on this mutex.
59972200Sbmilekic		 */
60072200Sbmilekic		if (TAILQ_EMPTY(&m->mtx_blocked)) {
60190418Sjhb			td1 = mtx_owner(m);
60283366Sjulian			LIST_INSERT_HEAD(&td1->td_contested, m, mtx_contested);
603104387Sjhb			TAILQ_INSERT_TAIL(&m->mtx_blocked, td, td_lockq);
60472200Sbmilekic		} else {
605104387Sjhb			TAILQ_FOREACH(td1, &m->mtx_blocked, td_lockq)
60690538Sjulian				if (td1->td_priority > td->td_priority)
60772200Sbmilekic					break;
60883366Sjulian			if (td1)
609104387Sjhb				TAILQ_INSERT_BEFORE(td1, td, td_lockq);
61072200Sbmilekic			else
611104387Sjhb				TAILQ_INSERT_TAIL(&m->mtx_blocked, td, td_lockq);
61272200Sbmilekic		}
613102450Siedowse#ifdef KTR
614102450Siedowse		if (!cont_logged) {
615102450Siedowse			CTR6(KTR_CONTENTION,
616102450Siedowse			    "contention: %p at %s:%d wants %s, taken by %s:%d",
617102450Siedowse			    td, file, line, m->mtx_object.lo_name,
618102450Siedowse			    WITNESS_FILE(&m->mtx_object),
619102450Siedowse			    WITNESS_LINE(&m->mtx_object));
620102450Siedowse			cont_logged = 1;
621102450Siedowse		}
622102450Siedowse#endif
62367352Sjhb
62472200Sbmilekic		/*
62572200Sbmilekic		 * Save who we're blocked on.
62672200Sbmilekic		 */
62783366Sjulian		td->td_blocked = m;
628104387Sjhb		td->td_lockname = m->mtx_object.lo_name;
629104387Sjhb		TD_SET_LOCK(td);
63083366Sjulian		propagate_priority(td);
63167352Sjhb
63274912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
63372200Sbmilekic			CTR3(KTR_LOCK,
63483366Sjulian			    "_mtx_lock_sleep: p %p blocked on [%p] %s", td, m,
63574912Sjhb			    m->mtx_object.lo_name);
63672200Sbmilekic
63783366Sjulian		td->td_proc->p_stats->p_ru.ru_nvcsw++;
63872200Sbmilekic		mi_switch();
63972200Sbmilekic
64074912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
64172200Sbmilekic			CTR3(KTR_LOCK,
64272200Sbmilekic			  "_mtx_lock_sleep: p %p free from blocked on [%p] %s",
64383366Sjulian			  td, m, m->mtx_object.lo_name);
64472200Sbmilekic
64572200Sbmilekic		mtx_unlock_spin(&sched_lock);
64672200Sbmilekic	}
64772200Sbmilekic
648102450Siedowse#ifdef KTR
649102450Siedowse	if (cont_logged) {
650102450Siedowse		CTR4(KTR_CONTENTION,
651102450Siedowse		    "contention end: %s acquired by %p at %s:%d",
652102450Siedowse		    m->mtx_object.lo_name, td, file, line);
653102450Siedowse	}
654102450Siedowse#endif
65572200Sbmilekic	return;
65672200Sbmilekic}
65772200Sbmilekic
65872200Sbmilekic/*
65972200Sbmilekic * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock.
66072200Sbmilekic *
66172200Sbmilekic * This is only called if we need to actually spin for the lock. Recursion
66272200Sbmilekic * is handled inline.
66372200Sbmilekic */
66472200Sbmilekicvoid
66588088Sjhb_mtx_lock_spin(struct mtx *m, int opts, const char *file, int line)
66672200Sbmilekic{
66772200Sbmilekic	int i = 0;
66872200Sbmilekic
66974912Sjhb	if (LOCK_LOG_TEST(&m->mtx_object, opts))
67072344Sbmilekic		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
67172200Sbmilekic
67272200Sbmilekic	for (;;) {
67383366Sjulian		if (_obtain_lock(m, curthread))
67472200Sbmilekic			break;
67572200Sbmilekic
67675568Sjhb		/* Give interrupts a chance while we spin. */
67788088Sjhb		critical_exit();
67872200Sbmilekic		while (m->mtx_lock != MTX_UNOWNED) {
67997086Sjhb			if (i++ < 10000000) {
68097086Sjhb#ifdef __i386__
68197139Sjhb				ia32_pause();
68297086Sjhb#endif
68372200Sbmilekic				continue;
68497086Sjhb			}
68597084Sjhb			if (i < 60000000)
68672200Sbmilekic				DELAY(1);
68767352Sjhb#ifdef DDB
68872200Sbmilekic			else if (!db_active)
68967352Sjhb#else
69072200Sbmilekic			else
69167352Sjhb#endif
69297082Sjhb				panic("spin lock %s held by %p for > 5 seconds",
69397082Sjhb				    m->mtx_object.lo_name, (void *)m->mtx_lock);
69497086Sjhb#ifdef __i386__
69597139Sjhb			ia32_pause();
69697086Sjhb#endif
69767352Sjhb		}
69888088Sjhb		critical_enter();
69967352Sjhb	}
70072200Sbmilekic
70174912Sjhb	if (LOCK_LOG_TEST(&m->mtx_object, opts))
70272200Sbmilekic		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
70372200Sbmilekic
70472200Sbmilekic	return;
70567352Sjhb}
70667352Sjhb
70772200Sbmilekic/*
70872200Sbmilekic * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
70972200Sbmilekic *
71072200Sbmilekic * We are only called here if the lock is recursed or contested (i.e. we
71172200Sbmilekic * need to wake up a blocked thread).
71272200Sbmilekic */
71367352Sjhbvoid
71472200Sbmilekic_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
71567352Sjhb{
71683366Sjulian	struct thread *td, *td1;
71767352Sjhb	struct mtx *m1;
71867352Sjhb	int pri;
71967352Sjhb
72083366Sjulian	td = curthread;
72172200Sbmilekic
72272200Sbmilekic	if (mtx_recursed(m)) {
72372200Sbmilekic		if (--(m->mtx_recurse) == 0)
72472200Sbmilekic			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
72574912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
72672200Sbmilekic			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m);
72772200Sbmilekic		return;
72872200Sbmilekic	}
72972200Sbmilekic
73072200Sbmilekic	mtx_lock_spin(&sched_lock);
73174912Sjhb	if (LOCK_LOG_TEST(&m->mtx_object, opts))
73272200Sbmilekic		CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
73372200Sbmilekic
73483366Sjulian	td1 = TAILQ_FIRST(&m->mtx_blocked);
73597081Sjhb#if defined(SMP) && defined(ADAPTIVE_MUTEXES)
73697081Sjhb	if (td1 == NULL) {
73797081Sjhb		_release_lock_quick(m);
73897081Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
73997081Sjhb			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p no sleepers", m);
74097081Sjhb		mtx_unlock_spin(&sched_lock);
74197081Sjhb		return;
74297081Sjhb	}
74397081Sjhb#endif
74483366Sjulian	MPASS(td->td_proc->p_magic == P_MAGIC);
74583366Sjulian	MPASS(td1->td_proc->p_magic == P_MAGIC);
74672200Sbmilekic
747104387Sjhb	TAILQ_REMOVE(&m->mtx_blocked, td1, td_lockq);
74872200Sbmilekic
74972200Sbmilekic	if (TAILQ_EMPTY(&m->mtx_blocked)) {
75072200Sbmilekic		LIST_REMOVE(m, mtx_contested);
75172200Sbmilekic		_release_lock_quick(m);
75274912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
75372200Sbmilekic			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p not held", m);
75472200Sbmilekic	} else
75572200Sbmilekic		atomic_store_rel_ptr(&m->mtx_lock, (void *)MTX_CONTESTED);
75672200Sbmilekic
75772376Sjake	pri = PRI_MAX;
75883366Sjulian	LIST_FOREACH(m1, &td->td_contested, mtx_contested) {
75990538Sjulian		int cp = TAILQ_FIRST(&m1->mtx_blocked)->td_priority;
76072200Sbmilekic		if (cp < pri)
76172200Sbmilekic			pri = cp;
76272200Sbmilekic	}
76372200Sbmilekic
76490538Sjulian	if (pri > td->td_base_pri)
76590538Sjulian		pri = td->td_base_pri;
76690538Sjulian	td->td_priority = pri;
76772200Sbmilekic
76874912Sjhb	if (LOCK_LOG_TEST(&m->mtx_object, opts))
76972200Sbmilekic		CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p contested setrunqueue %p",
77083366Sjulian		    m, td1);
77172200Sbmilekic
77283366Sjulian	td1->td_blocked = NULL;
773104387Sjhb	TD_CLR_LOCK(td1);
774104160Sjulian	if (!TD_CAN_RUN(td1)) {
775104160Sjulian		mtx_unlock_spin(&sched_lock);
776104160Sjulian		return;
777104160Sjulian	}
778104161Sjulian	setrunqueue(td1);
77972200Sbmilekic
78090538Sjulian	if (td->td_critnest == 1 && td1->td_priority < pri) {
78167352Sjhb#ifdef notyet
78283366Sjulian		if (td->td_ithd != NULL) {
78383366Sjulian			struct ithd *it = td->td_ithd;
78467352Sjhb
78572200Sbmilekic			if (it->it_interrupted) {
78674912Sjhb				if (LOCK_LOG_TEST(&m->mtx_object, opts))
78772200Sbmilekic					CTR2(KTR_LOCK,
78872994Sjhb				    "_mtx_unlock_sleep: %p interrupted %p",
78972200Sbmilekic					    it, it->it_interrupted);
79072200Sbmilekic				intr_thd_fixup(it);
79167352Sjhb			}
79272200Sbmilekic		}
79367352Sjhb#endif
79474912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
79572200Sbmilekic			CTR2(KTR_LOCK,
79672200Sbmilekic			    "_mtx_unlock_sleep: %p switching out lock=%p", m,
79772200Sbmilekic			    (void *)m->mtx_lock);
79872200Sbmilekic
79983366Sjulian		td->td_proc->p_stats->p_ru.ru_nivcsw++;
80072200Sbmilekic		mi_switch();
80174912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
80272200Sbmilekic			CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p",
80372200Sbmilekic			    m, (void *)m->mtx_lock);
80467352Sjhb	}
80572200Sbmilekic
80672200Sbmilekic	mtx_unlock_spin(&sched_lock);
80772200Sbmilekic
80872200Sbmilekic	return;
80967352Sjhb}
81067352Sjhb
81172200Sbmilekic/*
81272200Sbmilekic * All the unlocking of MTX_SPIN locks is done inline.
813105782Sdes * See the _rel_spin_lock() macro for the details.
81472200Sbmilekic */
81572200Sbmilekic
81672200Sbmilekic/*
81772994Sjhb * The backing function for the INVARIANTS-enabled mtx_assert()
81872200Sbmilekic */
81972996Sjhb#ifdef INVARIANT_SUPPORT
82071352Sjasonevoid
82171360Sjasone_mtx_assert(struct mtx *m, int what, const char *file, int line)
82271352Sjasone{
82380748Sjhb
82480748Sjhb	if (panicstr != NULL)
82580748Sjhb		return;
82673033Sjake	switch (what) {
82771352Sjasone	case MA_OWNED:
82871352Sjasone	case MA_OWNED | MA_RECURSED:
82971352Sjasone	case MA_OWNED | MA_NOTRECURSED:
83073033Sjake		if (!mtx_owned(m))
83171352Sjasone			panic("mutex %s not owned at %s:%d",
83274912Sjhb			    m->mtx_object.lo_name, file, line);
83373033Sjake		if (mtx_recursed(m)) {
83473033Sjake			if ((what & MA_NOTRECURSED) != 0)
83571352Sjasone				panic("mutex %s recursed at %s:%d",
83674912Sjhb				    m->mtx_object.lo_name, file, line);
83773033Sjake		} else if ((what & MA_RECURSED) != 0) {
83871352Sjasone			panic("mutex %s unrecursed at %s:%d",
83974912Sjhb			    m->mtx_object.lo_name, file, line);
84071352Sjasone		}
84171352Sjasone		break;
84271352Sjasone	case MA_NOTOWNED:
84373033Sjake		if (mtx_owned(m))
84471352Sjasone			panic("mutex %s owned at %s:%d",
84574912Sjhb			    m->mtx_object.lo_name, file, line);
84671352Sjasone		break;
84771352Sjasone	default:
84871360Sjasone		panic("unknown mtx_assert at %s:%d", file, line);
84971352Sjasone	}
85071352Sjasone}
85171352Sjasone#endif
85271352Sjasone
85372200Sbmilekic/*
85472200Sbmilekic * The MUTEX_DEBUG-enabled mtx_validate()
85574912Sjhb *
85674912Sjhb * Most of these checks have been moved off into the LO_INITIALIZED flag
85774912Sjhb * maintained by the witness code.
85872200Sbmilekic */
85967352Sjhb#ifdef MUTEX_DEBUG
86067352Sjhb
86192723Salfredvoid	mtx_validate(struct mtx *);
86267352Sjhb
86374912Sjhbvoid
86474912Sjhbmtx_validate(struct mtx *m)
86567352Sjhb{
86667352Sjhb
86767352Sjhb/*
868105919Sphk * XXX: When kernacc() does not require Giant we can reenable this check
869105919Sphk */
870105919Sphk#ifdef notyet
871105919Sphk/*
87267352Sjhb * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
87367352Sjhb * we can re-enable the kernacc() checks.
87467352Sjhb */
87567352Sjhb#ifndef __alpha__
87682304Sbmilekic	/*
87782304Sbmilekic	 * Can't call kernacc() from early init386(), especially when
87882304Sbmilekic	 * initializing Giant mutex, because some stuff in kernacc()
87982304Sbmilekic	 * requires Giant itself.
880105782Sdes	 */
88182302Sbmilekic	if (!cold)
88282302Sbmilekic		if (!kernacc((caddr_t)m, sizeof(m),
88382302Sbmilekic		    VM_PROT_READ | VM_PROT_WRITE))
88482302Sbmilekic			panic("Can't read and write to mutex %p", m);
88567352Sjhb#endif
886105919Sphk#endif
88767352Sjhb}
88867352Sjhb#endif
88967352Sjhb
89072200Sbmilekic/*
89193672Sarr * General init routine used by the MTX_SYSINIT() macro.
89293672Sarr */
89393672Sarrvoid
89493672Sarrmtx_sysinit(void *arg)
89593672Sarr{
89693672Sarr	struct mtx_args *margs = arg;
89793672Sarr
89893813Sjhb	mtx_init(margs->ma_mtx, margs->ma_desc, NULL, margs->ma_opts);
89993672Sarr}
90093672Sarr
90193672Sarr/*
90272200Sbmilekic * Mutex initialization routine; initialize lock `m' of type contained in
90393813Sjhb * `opts' with options contained in `opts' and name `name.'  The optional
90493813Sjhb * lock type `type' is used as a general lock category name for use with
90593813Sjhb * witness.
906105782Sdes */
90767352Sjhbvoid
90893813Sjhbmtx_init(struct mtx *m, const char *name, const char *type, int opts)
90967352Sjhb{
91074912Sjhb	struct lock_object *lock;
91172200Sbmilekic
91274912Sjhb	MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE |
91393273Sjeff	    MTX_SLEEPABLE | MTX_NOWITNESS | MTX_DUPOK)) == 0);
91472200Sbmilekic
91567352Sjhb#ifdef MUTEX_DEBUG
91672200Sbmilekic	/* Diagnostic and error correction */
91774912Sjhb	mtx_validate(m);
91869429Sjhb#endif
91967352Sjhb
92085205Sjhb	lock = &m->mtx_object;
92185205Sjhb	KASSERT((lock->lo_flags & LO_INITIALIZED) == 0,
92293813Sjhb	    ("mutex %s %p already initialized", name, m));
92374912Sjhb	bzero(m, sizeof(*m));
92474912Sjhb	if (opts & MTX_SPIN)
92574912Sjhb		lock->lo_class = &lock_class_mtx_spin;
92674912Sjhb	else
92774912Sjhb		lock->lo_class = &lock_class_mtx_sleep;
92893813Sjhb	lock->lo_name = name;
92993813Sjhb	lock->lo_type = type != NULL ? type : name;
93074912Sjhb	if (opts & MTX_QUIET)
93174912Sjhb		lock->lo_flags = LO_QUIET;
93274912Sjhb	if (opts & MTX_RECURSE)
93374912Sjhb		lock->lo_flags |= LO_RECURSABLE;
93474912Sjhb	if (opts & MTX_SLEEPABLE)
93574912Sjhb		lock->lo_flags |= LO_SLEEPABLE;
93674912Sjhb	if ((opts & MTX_NOWITNESS) == 0)
93774912Sjhb		lock->lo_flags |= LO_WITNESS;
93893273Sjeff	if (opts & MTX_DUPOK)
93993273Sjeff		lock->lo_flags |= LO_DUPOK;
94072200Sbmilekic
94167352Sjhb	m->mtx_lock = MTX_UNOWNED;
94274912Sjhb	TAILQ_INIT(&m->mtx_blocked);
94372200Sbmilekic
94474912Sjhb	LOCK_LOG_INIT(lock, opts);
94572200Sbmilekic
94674912Sjhb	WITNESS_INIT(lock);
94767352Sjhb}
94867352Sjhb
94972200Sbmilekic/*
95074912Sjhb * Remove lock `m' from all_mtx queue.  We don't allow MTX_QUIET to be
95174912Sjhb * passed in as a flag here because if the corresponding mtx_init() was
95274912Sjhb * called with MTX_QUIET set, then it will already be set in the mutex's
95374912Sjhb * flags.
95472200Sbmilekic */
95567352Sjhbvoid
95667352Sjhbmtx_destroy(struct mtx *m)
95767352Sjhb{
95867352Sjhb
95974912Sjhb	LOCK_LOG_DESTROY(&m->mtx_object, 0);
96072200Sbmilekic
96174912Sjhb	if (!mtx_owned(m))
96274912Sjhb		MPASS(mtx_unowned(m));
96374912Sjhb	else {
96471228Sbmilekic		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
96572200Sbmilekic
96674912Sjhb		/* Tell witness this isn't locked to make it happy. */
96788900Sjhb		WITNESS_UNLOCK(&m->mtx_object, LOP_EXCLUSIVE, __FILE__,
96888900Sjhb		    __LINE__);
96971320Sjasone	}
97071320Sjasone
97174912Sjhb	WITNESS_DESTROY(&m->mtx_object);
97271320Sjasone}
97385564Sdillon
97485564Sdillon/*
97593702Sjhb * Intialize the mutex code and system mutexes.  This is called from the MD
97693702Sjhb * startup code prior to mi_startup().  The per-CPU data space needs to be
97793702Sjhb * setup before this is called.
97893702Sjhb */
97993702Sjhbvoid
98093702Sjhbmutex_init(void)
98193702Sjhb{
98293702Sjhb
98393702Sjhb	/* Setup thread0 so that mutexes work. */
98493702Sjhb	LIST_INIT(&thread0.td_contested);
98593702Sjhb
98693702Sjhb	/*
98793702Sjhb	 * Initialize mutexes.
98893702Sjhb	 */
98993813Sjhb	mtx_init(&Giant, "Giant", NULL, MTX_DEF | MTX_RECURSE);
99093813Sjhb	mtx_init(&sched_lock, "sched lock", NULL, MTX_SPIN | MTX_RECURSE);
99193813Sjhb	mtx_init(&proc0.p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK);
99293702Sjhb	mtx_lock(&Giant);
99393702Sjhb}
99493702Sjhb
99593702Sjhb/*
99685564Sdillon * Encapsulated Giant mutex routines.  These routines provide encapsulation
99785564Sdillon * control for the Giant mutex, allowing sysctls to be used to turn on and
99885564Sdillon * off Giant around certain subsystems.  The default value for the sysctls
99985564Sdillon * are set to what developers believe is stable and working in regards to
100085564Sdillon * the Giant pushdown.  Developers should not turn off Giant via these
100185564Sdillon * sysctls unless they know what they are doing.
100285564Sdillon *
100385564Sdillon * Callers of mtx_lock_giant() are expected to pass the return value to an
1004105782Sdes * accompanying mtx_unlock_giant() later on.  If multiple subsystems are
100585564Sdillon * effected by a Giant wrap, all related sysctl variables must be zero for
100685564Sdillon * the subsystem call to operate without Giant (as determined by the caller).
100785564Sdillon */
100885564Sdillon
100985564SdillonSYSCTL_NODE(_kern, OID_AUTO, giant, CTLFLAG_RD, NULL, "Giant mutex manipulation");
101085564Sdillon
101185564Sdillonstatic int kern_giant_all = 0;
101285564SdillonSYSCTL_INT(_kern_giant, OID_AUTO, all, CTLFLAG_RW, &kern_giant_all, 0, "");
101385564Sdillon
101485564Sdillonint kern_giant_proc = 1;	/* Giant around PROC locks */
101585564Sdillonint kern_giant_file = 1;	/* Giant around struct file & filedesc */
101690864Sdillonint kern_giant_ucred = 1;	/* Giant around ucred */
101785564SdillonSYSCTL_INT(_kern_giant, OID_AUTO, proc, CTLFLAG_RW, &kern_giant_proc, 0, "");
101885564SdillonSYSCTL_INT(_kern_giant, OID_AUTO, file, CTLFLAG_RW, &kern_giant_file, 0, "");
101990864SdillonSYSCTL_INT(_kern_giant, OID_AUTO, ucred, CTLFLAG_RW, &kern_giant_ucred, 0, "");
102085564Sdillon
102185564Sdillonint
102285564Sdillonmtx_lock_giant(int sysctlvar)
102385564Sdillon{
102485564Sdillon	if (sysctlvar || kern_giant_all) {
102585564Sdillon		mtx_lock(&Giant);
102685564Sdillon		return(1);
102785564Sdillon	}
102885564Sdillon	return(0);
102985564Sdillon}
103085564Sdillon
103185564Sdillonvoid
103285564Sdillonmtx_unlock_giant(int s)
103385564Sdillon{
103485564Sdillon	if (s)
103585564Sdillon		mtx_unlock(&Giant);
103685564Sdillon}
1037