subr_turnstile.c revision 111880
165557Sjasone/*-
265557Sjasone * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
365557Sjasone *
465557Sjasone * Redistribution and use in source and binary forms, with or without
565557Sjasone * modification, are permitted provided that the following conditions
665557Sjasone * are met:
765557Sjasone * 1. Redistributions of source code must retain the above copyright
865557Sjasone *    notice, this list of conditions and the following disclaimer.
965557Sjasone * 2. Redistributions in binary form must reproduce the above copyright
1065557Sjasone *    notice, this list of conditions and the following disclaimer in the
1165557Sjasone *    documentation and/or other materials provided with the distribution.
1265557Sjasone * 3. Berkeley Software Design Inc's name may not be used to endorse or
1365557Sjasone *    promote products derived from this software without specific prior
1465557Sjasone *    written permission.
1565557Sjasone *
1665557Sjasone * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
1765557Sjasone * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1865557Sjasone * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1965557Sjasone * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
2065557Sjasone * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2165557Sjasone * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2265557Sjasone * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2365557Sjasone * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2465557Sjasone * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2565557Sjasone * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2665557Sjasone * SUCH DAMAGE.
2765557Sjasone *
2865557Sjasone *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
2967352Sjhb *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
3065557Sjasone * $FreeBSD: head/sys/kern/subr_turnstile.c 111880 2003-03-04 20:32:41Z jhb $
3165557Sjasone */
3265557Sjasone
3365557Sjasone/*
3486411Sjhb * Machine independent bits of mutex implementation.
3572200Sbmilekic */
3672200Sbmilekic
3797081Sjhb#include "opt_adaptive_mutexes.h"
3868790Sjhb#include "opt_ddb.h"
3967676Sjhb
4065557Sjasone#include <sys/param.h>
4193609Sdes#include <sys/systm.h>
4267352Sjhb#include <sys/bus.h>
4367352Sjhb#include <sys/kernel.h>
4493609Sdes#include <sys/ktr.h>
4576166Smarkm#include <sys/lock.h>
4667352Sjhb#include <sys/malloc.h>
4774912Sjhb#include <sys/mutex.h>
4865557Sjasone#include <sys/proc.h>
4978766Sjhb#include <sys/resourcevar.h>
50104964Sjeff#include <sys/sched.h>
5193609Sdes#include <sys/sbuf.h>
5297156Sdes#include <sys/stdint.h>
5367676Sjhb#include <sys/sysctl.h>
5467352Sjhb#include <sys/vmmeter.h>
5565557Sjasone
5667352Sjhb#include <machine/atomic.h>
5767352Sjhb#include <machine/bus.h>
5867352Sjhb#include <machine/clock.h>
5965557Sjasone#include <machine/cpu.h>
6067352Sjhb
6168790Sjhb#include <ddb/ddb.h>
6268790Sjhb
6367352Sjhb#include <vm/vm.h>
6467352Sjhb#include <vm/vm_extern.h>
6567352Sjhb
6665557Sjasone/*
6772200Sbmilekic * Internal utility macros.
6871352Sjasone */
6972200Sbmilekic#define mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
7071352Sjasone
7172200Sbmilekic#define mtx_owner(m)	(mtx_unowned((m)) ? NULL \
7283366Sjulian	: (struct thread *)((m)->mtx_lock & MTX_FLAGMASK))
7371352Sjasone
7497839Sjhb/* XXXKSE This test will change. */
7597839Sjhb#define	thread_running(td)						\
7697836Sjhb	((td)->td_kse != NULL && (td)->td_kse->ke_oncpu != NOCPU)
77105782Sdes
7871352Sjasone/*
7974912Sjhb * Lock classes for sleep and spin mutexes.
8071352Sjasone */
8174912Sjhbstruct lock_class lock_class_mtx_sleep = {
8274912Sjhb	"sleep mutex",
8374912Sjhb	LC_SLEEPLOCK | LC_RECURSABLE
8474912Sjhb};
8574912Sjhbstruct lock_class lock_class_mtx_spin = {
8674912Sjhb	"spin mutex",
8774912Sjhb	LC_SPINLOCK | LC_RECURSABLE
8874912Sjhb};
8971352Sjasone
9071352Sjasone/*
9193702Sjhb * System-wide mutexes
9293702Sjhb */
9393702Sjhbstruct mtx sched_lock;
9493702Sjhbstruct mtx Giant;
9593702Sjhb
9693702Sjhb/*
9772200Sbmilekic * Prototypes for non-exported routines.
9872200Sbmilekic */
9983366Sjulianstatic void	propagate_priority(struct thread *);
10067352Sjhb
10167352Sjhbstatic void
10283366Sjulianpropagate_priority(struct thread *td)
10367352Sjhb{
10490538Sjulian	int pri = td->td_priority;
10583366Sjulian	struct mtx *m = td->td_blocked;
10667352Sjhb
10769376Sjhb	mtx_assert(&sched_lock, MA_OWNED);
10867352Sjhb	for (;;) {
10983366Sjulian		struct thread *td1;
11067352Sjhb
11183366Sjulian		td = mtx_owner(m);
11267352Sjhb
11383366Sjulian		if (td == NULL) {
11467352Sjhb			/*
11567352Sjhb			 * This really isn't quite right. Really
11683366Sjulian			 * ought to bump priority of thread that
11767352Sjhb			 * next acquires the mutex.
11867352Sjhb			 */
11967352Sjhb			MPASS(m->mtx_lock == MTX_CONTESTED);
12067352Sjhb			return;
12167352Sjhb		}
12272200Sbmilekic
12399072Sjulian		MPASS(td->td_proc != NULL);
12483366Sjulian		MPASS(td->td_proc->p_magic == P_MAGIC);
125103216Sjulian		KASSERT(!TD_IS_SLEEPING(td), ("sleeping thread owns a mutex"));
12690538Sjulian		if (td->td_priority <= pri) /* lower is higher priority */
12767352Sjhb			return;
12869376Sjhb
12969376Sjhb
13069376Sjhb		/*
13167352Sjhb		 * If lock holder is actually running, just bump priority.
13267352Sjhb		 */
133103216Sjulian		if (TD_IS_RUNNING(td)) {
13499072Sjulian			td->td_priority = pri;
13567352Sjhb			return;
13667352Sjhb		}
13772376Sjake
13873912Sjhb#ifndef SMP
13967352Sjhb		/*
14083366Sjulian		 * For UP, we check to see if td is curthread (this shouldn't
14173912Sjhb		 * ever happen however as it would mean we are in a deadlock.)
14273912Sjhb		 */
14383366Sjulian		KASSERT(td != curthread, ("Deadlock detected"));
14473912Sjhb#endif
14573912Sjhb
14673912Sjhb		/*
14783366Sjulian		 * If on run queue move to new run queue, and quit.
14883366Sjulian		 * XXXKSE this gets a lot more complicated under threads
14983366Sjulian		 * but try anyhow.
15067352Sjhb		 */
151103216Sjulian		if (TD_ON_RUNQ(td)) {
15283366Sjulian			MPASS(td->td_blocked == NULL);
153104964Sjeff			sched_prio(td, pri);
15467352Sjhb			return;
15567352Sjhb		}
15699072Sjulian		/*
15799072Sjulian		 * Adjust for any other cases.
15899072Sjulian		 */
15999072Sjulian		td->td_priority = pri;
16067352Sjhb
16167352Sjhb		/*
16269376Sjhb		 * If we aren't blocked on a mutex, we should be.
16367352Sjhb		 */
164104387Sjhb		KASSERT(TD_ON_LOCK(td), (
16569376Sjhb		    "process %d(%s):%d holds %s but isn't blocked on a mutex\n",
16699072Sjulian		    td->td_proc->p_pid, td->td_proc->p_comm, td->td_state,
16774912Sjhb		    m->mtx_object.lo_name));
16867352Sjhb
16967352Sjhb		/*
17083366Sjulian		 * Pick up the mutex that td is blocked on.
17167352Sjhb		 */
17283366Sjulian		m = td->td_blocked;
17367352Sjhb		MPASS(m != NULL);
17467352Sjhb
17567352Sjhb		/*
17683366Sjulian		 * Check if the thread needs to be moved up on
17767352Sjhb		 * the blocked chain
17867352Sjhb		 */
17983366Sjulian		if (td == TAILQ_FIRST(&m->mtx_blocked)) {
18069376Sjhb			continue;
18169376Sjhb		}
18272200Sbmilekic
183104387Sjhb		td1 = TAILQ_PREV(td, threadqueue, td_lockq);
18490538Sjulian		if (td1->td_priority <= pri) {
18567352Sjhb			continue;
18667352Sjhb		}
18767352Sjhb
18867352Sjhb		/*
18983366Sjulian		 * Remove thread from blocked chain and determine where
19083366Sjulian		 * it should be moved up to.  Since we know that td1 has
19183366Sjulian		 * a lower priority than td, we know that at least one
19283366Sjulian		 * thread in the chain has a lower priority and that
19383366Sjulian		 * td1 will thus not be NULL after the loop.
19467352Sjhb		 */
195104387Sjhb		TAILQ_REMOVE(&m->mtx_blocked, td, td_lockq);
196104387Sjhb		TAILQ_FOREACH(td1, &m->mtx_blocked, td_lockq) {
19783366Sjulian			MPASS(td1->td_proc->p_magic == P_MAGIC);
19890538Sjulian			if (td1->td_priority > pri)
19967352Sjhb				break;
20067352Sjhb		}
20172200Sbmilekic
20283366Sjulian		MPASS(td1 != NULL);
203104387Sjhb		TAILQ_INSERT_BEFORE(td1, td, td_lockq);
20467352Sjhb		CTR4(KTR_LOCK,
20571560Sjhb		    "propagate_priority: p %p moved before %p on [%p] %s",
20683366Sjulian		    td, td1, m, m->mtx_object.lo_name);
20767352Sjhb	}
20867352Sjhb}
20967352Sjhb
21093609Sdes#ifdef MUTEX_PROFILING
21193609SdesSYSCTL_NODE(_debug, OID_AUTO, mutex, CTLFLAG_RD, NULL, "mutex debugging");
21293609SdesSYSCTL_NODE(_debug_mutex, OID_AUTO, prof, CTLFLAG_RD, NULL, "mutex profiling");
21393609Sdesstatic int mutex_prof_enable = 0;
21493609SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, enable, CTLFLAG_RW,
21593609Sdes    &mutex_prof_enable, 0, "Enable tracing of mutex holdtime");
21693609Sdes
21793609Sdesstruct mutex_prof {
218105644Sdes	const char	*name;
219105644Sdes	const char	*file;
220105644Sdes	int		line;
221109654Sdes	uintmax_t	cnt_max;
222109654Sdes	uintmax_t	cnt_tot;
223109654Sdes	uintmax_t	cnt_cur;
22493705Sdes	struct mutex_prof *next;
22593609Sdes};
22693609Sdes
22771352Sjasone/*
22893609Sdes * mprof_buf is a static pool of profiling records to avoid possible
22993609Sdes * reentrance of the memory allocation functions.
23093609Sdes *
23193609Sdes * Note: NUM_MPROF_BUFFERS must be smaller than MPROF_HASH_SIZE.
23293609Sdes */
233105644Sdes#define	NUM_MPROF_BUFFERS	1000
23493609Sdesstatic struct mutex_prof mprof_buf[NUM_MPROF_BUFFERS];
23593609Sdesstatic int first_free_mprof_buf;
236105644Sdes#define	MPROF_HASH_SIZE		1009
23793609Sdesstatic struct mutex_prof *mprof_hash[MPROF_HASH_SIZE];
238111508Smtm/* SWAG: sbuf size = avg stat. line size * number of locks */
239111508Smtm#define MPROF_SBUF_SIZE		256 * 400
24093609Sdes
24193609Sdesstatic int mutex_prof_acquisitions;
24293609SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, acquisitions, CTLFLAG_RD,
24393609Sdes    &mutex_prof_acquisitions, 0, "Number of mutex acquistions recorded");
24493609Sdesstatic int mutex_prof_records;
24593609SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, records, CTLFLAG_RD,
24693609Sdes    &mutex_prof_records, 0, "Number of profiling records");
24793609Sdesstatic int mutex_prof_maxrecords = NUM_MPROF_BUFFERS;
24893609SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, maxrecords, CTLFLAG_RD,
24993609Sdes    &mutex_prof_maxrecords, 0, "Maximum number of profiling records");
25093609Sdesstatic int mutex_prof_rejected;
25193609SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, rejected, CTLFLAG_RD,
25293609Sdes    &mutex_prof_rejected, 0, "Number of rejected profiling records");
25393609Sdesstatic int mutex_prof_hashsize = MPROF_HASH_SIZE;
25493609SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, hashsize, CTLFLAG_RD,
25593609Sdes    &mutex_prof_hashsize, 0, "Hash size");
25693609Sdesstatic int mutex_prof_collisions = 0;
25793609SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, collisions, CTLFLAG_RD,
25893609Sdes    &mutex_prof_collisions, 0, "Number of hash collisions");
25993609Sdes
26093609Sdes/*
26193609Sdes * mprof_mtx protects the profiling buffers and the hash.
26293609Sdes */
26393609Sdesstatic struct mtx mprof_mtx;
26493705SdesMTX_SYSINIT(mprof, &mprof_mtx, "mutex profiling lock", MTX_SPIN | MTX_QUIET);
26593609Sdes
26693667Sdesstatic u_int64_t
26793667Sdesnanoseconds(void)
26893667Sdes{
26993667Sdes	struct timespec tv;
27093667Sdes
27193667Sdes	nanotime(&tv);
27293667Sdes	return (tv.tv_sec * (u_int64_t)1000000000 + tv.tv_nsec);
27393667Sdes}
27493667Sdes
27593609Sdesstatic int
27693609Sdesdump_mutex_prof_stats(SYSCTL_HANDLER_ARGS)
27793609Sdes{
27893609Sdes	struct sbuf *sb;
27993609Sdes	int error, i;
280111508Smtm	static int multiplier = 1;
28193609Sdes
28293609Sdes	if (first_free_mprof_buf == 0)
283105644Sdes		return (SYSCTL_OUT(req, "No locking recorded",
284105644Sdes		    sizeof("No locking recorded")));
28593609Sdes
286111508Smtmretry_sbufops:
287111508Smtm	sb = sbuf_new(NULL, NULL, MPROF_SBUF_SIZE * multiplier, SBUF_FIXEDLEN);
288105644Sdes	sbuf_printf(sb, "%6s %12s %11s %5s %s\n",
289105644Sdes	    "max", "total", "count", "avg", "name");
290105644Sdes	/*
291105644Sdes	 * XXX this spinlock seems to be by far the largest perpetrator
292105644Sdes	 * of spinlock latency (1.6 msec on an Athlon1600 was recorded
293105644Sdes	 * even before I pessimized it further by moving the average
294105644Sdes	 * computation here).
295105644Sdes	 */
29693609Sdes	mtx_lock_spin(&mprof_mtx);
297111508Smtm	for (i = 0; i < first_free_mprof_buf; ++i) {
298105644Sdes		sbuf_printf(sb, "%6ju %12ju %11ju %5ju %s:%d (%s)\n",
299109654Sdes		    mprof_buf[i].cnt_max / 1000,
300109654Sdes		    mprof_buf[i].cnt_tot / 1000,
301109654Sdes		    mprof_buf[i].cnt_cur,
302109654Sdes		    mprof_buf[i].cnt_cur == 0 ? (uintmax_t)0 :
303109654Sdes			mprof_buf[i].cnt_tot / (mprof_buf[i].cnt_cur * 1000),
30493609Sdes		    mprof_buf[i].file, mprof_buf[i].line, mprof_buf[i].name);
305111508Smtm		if (sbuf_overflowed(sb)) {
306111508Smtm			mtx_unlock_spin(&mprof_mtx);
307111508Smtm			sbuf_delete(sb);
308111508Smtm			multiplier++;
309111508Smtm			goto retry_sbufops;
310111508Smtm		}
311111508Smtm	}
31293609Sdes	mtx_unlock_spin(&mprof_mtx);
31393609Sdes	sbuf_finish(sb);
31493609Sdes	error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
31593609Sdes	sbuf_delete(sb);
31693609Sdes	return (error);
31793609Sdes}
318105644SdesSYSCTL_PROC(_debug_mutex_prof, OID_AUTO, stats, CTLTYPE_STRING | CTLFLAG_RD,
31993609Sdes    NULL, 0, dump_mutex_prof_stats, "A", "Mutex profiling statistics");
32093609Sdes#endif
32193609Sdes
32293609Sdes/*
32374900Sjhb * Function versions of the inlined __mtx_* macros.  These are used by
32474900Sjhb * modules and can also be called from assembly language if needed.
32574900Sjhb */
32674900Sjhbvoid
32774900Sjhb_mtx_lock_flags(struct mtx *m, int opts, const char *file, int line)
32874900Sjhb{
32974900Sjhb
33083841Sjhb	MPASS(curthread != NULL);
331102907Sjhb	KASSERT(m->mtx_object.lo_class == &lock_class_mtx_sleep,
332102907Sjhb	    ("mtx_lock() of spin mutex %s @ %s:%d", m->mtx_object.lo_name,
333102907Sjhb	    file, line));
33483841Sjhb	_get_sleep_lock(m, curthread, opts, file, line);
33583841Sjhb	LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file,
33683841Sjhb	    line);
33783841Sjhb	WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
33893609Sdes#ifdef MUTEX_PROFILING
33993609Sdes	/* don't reset the timer when/if recursing */
34099324Sdes	if (m->mtx_acqtime == 0) {
34199324Sdes		m->mtx_filename = file;
34299324Sdes		m->mtx_lineno = line;
34399324Sdes		m->mtx_acqtime = mutex_prof_enable ? nanoseconds() : 0;
34493609Sdes		++mutex_prof_acquisitions;
34593609Sdes	}
34693609Sdes#endif
34774900Sjhb}
34874900Sjhb
34974900Sjhbvoid
35074900Sjhb_mtx_unlock_flags(struct mtx *m, int opts, const char *file, int line)
35174900Sjhb{
35274900Sjhb
35383841Sjhb	MPASS(curthread != NULL);
354102907Sjhb	KASSERT(m->mtx_object.lo_class == &lock_class_mtx_sleep,
355102907Sjhb	    ("mtx_unlock() of spin mutex %s @ %s:%d", m->mtx_object.lo_name,
356102907Sjhb	    file, line));
357105782Sdes	WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
358102907Sjhb	LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file,
359102907Sjhb	    line);
36083947Sjhb	mtx_assert(m, MA_OWNED);
36193609Sdes#ifdef MUTEX_PROFILING
36299324Sdes	if (m->mtx_acqtime != 0) {
36393609Sdes		static const char *unknown = "(unknown)";
36493609Sdes		struct mutex_prof *mpp;
36593667Sdes		u_int64_t acqtime, now;
36693609Sdes		const char *p, *q;
36793705Sdes		volatile u_int hash;
36893609Sdes
36993667Sdes		now = nanoseconds();
37099324Sdes		acqtime = m->mtx_acqtime;
37199324Sdes		m->mtx_acqtime = 0;
37293667Sdes		if (now <= acqtime)
37393609Sdes			goto out;
374111508Smtm		for (p = m->mtx_filename;
375111508Smtm		    p != NULL && strncmp(p, "../", 3) == 0; p += 3)
37693609Sdes			/* nothing */ ;
37793609Sdes		if (p == NULL || *p == '\0')
37893609Sdes			p = unknown;
37999324Sdes		for (hash = m->mtx_lineno, q = p; *q != '\0'; ++q)
38093609Sdes			hash = (hash * 2 + *q) % MPROF_HASH_SIZE;
38193609Sdes		mtx_lock_spin(&mprof_mtx);
38293705Sdes		for (mpp = mprof_hash[hash]; mpp != NULL; mpp = mpp->next)
38399324Sdes			if (mpp->line == m->mtx_lineno &&
38499324Sdes			    strcmp(mpp->file, p) == 0)
38593609Sdes				break;
38693609Sdes		if (mpp == NULL) {
38793609Sdes			/* Just exit if we cannot get a trace buffer */
38893609Sdes			if (first_free_mprof_buf >= NUM_MPROF_BUFFERS) {
38993609Sdes				++mutex_prof_rejected;
39093609Sdes				goto unlock;
39193609Sdes			}
39293609Sdes			mpp = &mprof_buf[first_free_mprof_buf++];
39393609Sdes			mpp->name = mtx_name(m);
39493609Sdes			mpp->file = p;
39599324Sdes			mpp->line = m->mtx_lineno;
39693705Sdes			mpp->next = mprof_hash[hash];
39793705Sdes			if (mprof_hash[hash] != NULL)
39893705Sdes				++mutex_prof_collisions;
399105782Sdes			mprof_hash[hash] = mpp;
40093609Sdes			++mutex_prof_records;
40193609Sdes		}
40293609Sdes		/*
40393609Sdes		 * Record if the mutex has been held longer now than ever
404105644Sdes		 * before.
40593609Sdes		 */
406109654Sdes		if (now - acqtime > mpp->cnt_max)
407109654Sdes			mpp->cnt_max = now - acqtime;
408109654Sdes		mpp->cnt_tot += now - acqtime;
409109654Sdes		mpp->cnt_cur++;
41093609Sdesunlock:
41193609Sdes		mtx_unlock_spin(&mprof_mtx);
41293609Sdes	}
41393609Sdesout:
41493609Sdes#endif
41583841Sjhb	_rel_sleep_lock(m, curthread, opts, file, line);
41674900Sjhb}
41774900Sjhb
41874900Sjhbvoid
41974900Sjhb_mtx_lock_spin_flags(struct mtx *m, int opts, const char *file, int line)
42074900Sjhb{
42174900Sjhb
42283841Sjhb	MPASS(curthread != NULL);
423102907Sjhb	KASSERT(m->mtx_object.lo_class == &lock_class_mtx_spin,
424102907Sjhb	    ("mtx_lock_spin() of sleep mutex %s @ %s:%d",
425102907Sjhb	    m->mtx_object.lo_name, file, line));
426100754Sjhb#if defined(SMP) || LOCK_DEBUG > 0 || 1
42783841Sjhb	_get_spin_lock(m, curthread, opts, file, line);
42897079Sjhb#else
42997079Sjhb	critical_enter();
43097079Sjhb#endif
43183841Sjhb	LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file,
43283841Sjhb	    line);
43383841Sjhb	WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
43474900Sjhb}
43574900Sjhb
43674900Sjhbvoid
43774900Sjhb_mtx_unlock_spin_flags(struct mtx *m, int opts, const char *file, int line)
43874900Sjhb{
43974900Sjhb
44083841Sjhb	MPASS(curthread != NULL);
441102907Sjhb	KASSERT(m->mtx_object.lo_class == &lock_class_mtx_spin,
442102907Sjhb	    ("mtx_unlock_spin() of sleep mutex %s @ %s:%d",
443102907Sjhb	    m->mtx_object.lo_name, file, line));
444105782Sdes	WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
44583841Sjhb	LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file,
44683841Sjhb	    line);
447102907Sjhb	mtx_assert(m, MA_OWNED);
448100754Sjhb#if defined(SMP) || LOCK_DEBUG > 0 || 1
44983841Sjhb	_rel_spin_lock(m);
45097079Sjhb#else
45197079Sjhb	critical_exit();
45297079Sjhb#endif
45374900Sjhb}
45474900Sjhb
45574900Sjhb/*
45672200Sbmilekic * The important part of mtx_trylock{,_flags}()
45772200Sbmilekic * Tries to acquire lock `m.' We do NOT handle recursion here; we assume that
45872200Sbmilekic * if we're called, it's because we know we don't already own this lock.
45971352Sjasone */
46072200Sbmilekicint
46172200Sbmilekic_mtx_trylock(struct mtx *m, int opts, const char *file, int line)
46271352Sjasone{
46372200Sbmilekic	int rval;
46471352Sjasone
46583366Sjulian	MPASS(curthread != NULL);
46671352Sjasone
467111879Sjhb	KASSERT(!mtx_owned(m),
468111879Sjhb	    ("mtx_trylock() called on a mutex already owned"));
469111879Sjhb
47083366Sjulian	rval = _obtain_lock(m, curthread);
47172200Sbmilekic
47274912Sjhb	LOCK_LOG_TRY("LOCK", &m->mtx_object, opts, rval, file, line);
473111879Sjhb	if (rval)
47476272Sjhb		WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK,
47576272Sjhb		    file, line);
47671352Sjasone
47774912Sjhb	return (rval);
47871352Sjasone}
47971352Sjasone
48071352Sjasone/*
48172200Sbmilekic * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
48271352Sjasone *
48372200Sbmilekic * We call this if the lock is either contested (i.e. we need to go to
48472200Sbmilekic * sleep waiting for it), or if we need to recurse on it.
48571352Sjasone */
48672200Sbmilekicvoid
48772200Sbmilekic_mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line)
48871352Sjasone{
48983366Sjulian	struct thread *td = curthread;
490111880Sjhb	struct thread *td1;
49197081Sjhb#if defined(SMP) && defined(ADAPTIVE_MUTEXES)
49297081Sjhb	struct thread *owner;
49397081Sjhb#endif
494111880Sjhb	uintptr_t v;
495102450Siedowse#ifdef KTR
496102450Siedowse	int cont_logged = 0;
497102450Siedowse#endif
49871352Sjasone
499111880Sjhb	if (mtx_owned(m)) {
50072200Sbmilekic		m->mtx_recurse++;
50172200Sbmilekic		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
50274912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
50372344Sbmilekic			CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
50472200Sbmilekic		return;
50571352Sjasone	}
50671352Sjasone
50774912Sjhb	if (LOCK_LOG_TEST(&m->mtx_object, opts))
50872994Sjhb		CTR4(KTR_LOCK,
50972994Sjhb		    "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d",
51074912Sjhb		    m->mtx_object.lo_name, (void *)m->mtx_lock, file, line);
51171352Sjasone
51283366Sjulian	while (!_obtain_lock(m, td)) {
51371352Sjasone
51472200Sbmilekic		mtx_lock_spin(&sched_lock);
515111880Sjhb		v = m->mtx_lock;
516111880Sjhb
51772200Sbmilekic		/*
51872200Sbmilekic		 * Check if the lock has been released while spinning for
51972200Sbmilekic		 * the sched_lock.
52072200Sbmilekic		 */
521111880Sjhb		if (v == MTX_UNOWNED) {
52272200Sbmilekic			mtx_unlock_spin(&sched_lock);
52397086Sjhb#ifdef __i386__
52497139Sjhb			ia32_pause();
52597086Sjhb#endif
52672200Sbmilekic			continue;
52771352Sjasone		}
52871352Sjasone
52972200Sbmilekic		/*
53072200Sbmilekic		 * The mutex was marked contested on release. This means that
53183366Sjulian		 * there are threads blocked on it.
53272200Sbmilekic		 */
53372200Sbmilekic		if (v == MTX_CONTESTED) {
53483366Sjulian			td1 = TAILQ_FIRST(&m->mtx_blocked);
53583366Sjulian			MPASS(td1 != NULL);
53683366Sjulian			m->mtx_lock = (uintptr_t)td | MTX_CONTESTED;
53767352Sjhb
53890538Sjulian			if (td1->td_priority < td->td_priority)
539105782Sdes				td->td_priority = td1->td_priority;
54072200Sbmilekic			mtx_unlock_spin(&sched_lock);
54167352Sjhb			return;
54267352Sjhb		}
54369376Sjhb
54469376Sjhb		/*
54572200Sbmilekic		 * If the mutex isn't already contested and a failure occurs
54672200Sbmilekic		 * setting the contested bit, the mutex was either released
54772200Sbmilekic		 * or the state of the MTX_RECURSED bit changed.
54869376Sjhb		 */
54972200Sbmilekic		if ((v & MTX_CONTESTED) == 0 &&
55072200Sbmilekic		    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
55172200Sbmilekic			(void *)(v | MTX_CONTESTED))) {
55272200Sbmilekic			mtx_unlock_spin(&sched_lock);
55397086Sjhb#ifdef __i386__
55497139Sjhb			ia32_pause();
55597086Sjhb#endif
55672200Sbmilekic			continue;
55772200Sbmilekic		}
55867352Sjhb
55997081Sjhb#if defined(SMP) && defined(ADAPTIVE_MUTEXES)
56072200Sbmilekic		/*
56197081Sjhb		 * If the current owner of the lock is executing on another
56297081Sjhb		 * CPU, spin instead of blocking.
56397081Sjhb		 */
56497081Sjhb		owner = (struct thread *)(v & MTX_FLAGMASK);
56597839Sjhb		if (m != &Giant && thread_running(owner)) {
56697081Sjhb			mtx_unlock_spin(&sched_lock);
56797839Sjhb			while (mtx_owner(m) == owner && thread_running(owner)) {
56897086Sjhb#ifdef __i386__
56997837Sjhb				ia32_pause();
57097086Sjhb#endif
57197837Sjhb			}
57297081Sjhb			continue;
57397081Sjhb		}
57497081Sjhb#endif	/* SMP && ADAPTIVE_MUTEXES */
57597081Sjhb
57697081Sjhb		/*
57793692Sjhb		 * We definitely must sleep for this lock.
57872200Sbmilekic		 */
57972200Sbmilekic		mtx_assert(m, MA_NOTOWNED);
58067352Sjhb
58167352Sjhb#ifdef notyet
58272200Sbmilekic		/*
58372200Sbmilekic		 * If we're borrowing an interrupted thread's VM context, we
58472200Sbmilekic		 * must clean up before going to sleep.
58572200Sbmilekic		 */
58683366Sjulian		if (td->td_ithd != NULL) {
58783366Sjulian			struct ithd *it = td->td_ithd;
58867352Sjhb
58972200Sbmilekic			if (it->it_interrupted) {
59074912Sjhb				if (LOCK_LOG_TEST(&m->mtx_object, opts))
59172200Sbmilekic					CTR2(KTR_LOCK,
59272994Sjhb				    "_mtx_lock_sleep: %p interrupted %p",
59372200Sbmilekic					    it, it->it_interrupted);
59472200Sbmilekic				intr_thd_fixup(it);
59567352Sjhb			}
59672200Sbmilekic		}
59767352Sjhb#endif
59867352Sjhb
59972200Sbmilekic		/*
60072200Sbmilekic		 * Put us on the list of threads blocked on this mutex.
60172200Sbmilekic		 */
60272200Sbmilekic		if (TAILQ_EMPTY(&m->mtx_blocked)) {
60390418Sjhb			td1 = mtx_owner(m);
60483366Sjulian			LIST_INSERT_HEAD(&td1->td_contested, m, mtx_contested);
605104387Sjhb			TAILQ_INSERT_TAIL(&m->mtx_blocked, td, td_lockq);
60672200Sbmilekic		} else {
607104387Sjhb			TAILQ_FOREACH(td1, &m->mtx_blocked, td_lockq)
60890538Sjulian				if (td1->td_priority > td->td_priority)
60972200Sbmilekic					break;
61083366Sjulian			if (td1)
611104387Sjhb				TAILQ_INSERT_BEFORE(td1, td, td_lockq);
61272200Sbmilekic			else
613104387Sjhb				TAILQ_INSERT_TAIL(&m->mtx_blocked, td, td_lockq);
61472200Sbmilekic		}
615102450Siedowse#ifdef KTR
616102450Siedowse		if (!cont_logged) {
617102450Siedowse			CTR6(KTR_CONTENTION,
618102450Siedowse			    "contention: %p at %s:%d wants %s, taken by %s:%d",
619102450Siedowse			    td, file, line, m->mtx_object.lo_name,
620102450Siedowse			    WITNESS_FILE(&m->mtx_object),
621102450Siedowse			    WITNESS_LINE(&m->mtx_object));
622102450Siedowse			cont_logged = 1;
623102450Siedowse		}
624102450Siedowse#endif
62567352Sjhb
62672200Sbmilekic		/*
62772200Sbmilekic		 * Save who we're blocked on.
62872200Sbmilekic		 */
62983366Sjulian		td->td_blocked = m;
630104387Sjhb		td->td_lockname = m->mtx_object.lo_name;
631104387Sjhb		TD_SET_LOCK(td);
63283366Sjulian		propagate_priority(td);
63367352Sjhb
63474912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
63572200Sbmilekic			CTR3(KTR_LOCK,
63683366Sjulian			    "_mtx_lock_sleep: p %p blocked on [%p] %s", td, m,
63774912Sjhb			    m->mtx_object.lo_name);
63872200Sbmilekic
63983366Sjulian		td->td_proc->p_stats->p_ru.ru_nvcsw++;
64072200Sbmilekic		mi_switch();
64172200Sbmilekic
64274912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
64372200Sbmilekic			CTR3(KTR_LOCK,
64472200Sbmilekic			  "_mtx_lock_sleep: p %p free from blocked on [%p] %s",
64583366Sjulian			  td, m, m->mtx_object.lo_name);
64672200Sbmilekic
64772200Sbmilekic		mtx_unlock_spin(&sched_lock);
64872200Sbmilekic	}
64972200Sbmilekic
650102450Siedowse#ifdef KTR
651102450Siedowse	if (cont_logged) {
652102450Siedowse		CTR4(KTR_CONTENTION,
653102450Siedowse		    "contention end: %s acquired by %p at %s:%d",
654102450Siedowse		    m->mtx_object.lo_name, td, file, line);
655102450Siedowse	}
656102450Siedowse#endif
65772200Sbmilekic	return;
65872200Sbmilekic}
65972200Sbmilekic
66072200Sbmilekic/*
66172200Sbmilekic * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock.
66272200Sbmilekic *
66372200Sbmilekic * This is only called if we need to actually spin for the lock. Recursion
66472200Sbmilekic * is handled inline.
66572200Sbmilekic */
66672200Sbmilekicvoid
66788088Sjhb_mtx_lock_spin(struct mtx *m, int opts, const char *file, int line)
66872200Sbmilekic{
66972200Sbmilekic	int i = 0;
67072200Sbmilekic
67174912Sjhb	if (LOCK_LOG_TEST(&m->mtx_object, opts))
67272344Sbmilekic		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
67372200Sbmilekic
67472200Sbmilekic	for (;;) {
67583366Sjulian		if (_obtain_lock(m, curthread))
67672200Sbmilekic			break;
67772200Sbmilekic
67875568Sjhb		/* Give interrupts a chance while we spin. */
67988088Sjhb		critical_exit();
68072200Sbmilekic		while (m->mtx_lock != MTX_UNOWNED) {
68197086Sjhb			if (i++ < 10000000) {
68297086Sjhb#ifdef __i386__
68397139Sjhb				ia32_pause();
68497086Sjhb#endif
68572200Sbmilekic				continue;
68697086Sjhb			}
68797084Sjhb			if (i < 60000000)
68872200Sbmilekic				DELAY(1);
68967352Sjhb#ifdef DDB
69072200Sbmilekic			else if (!db_active)
69167352Sjhb#else
69272200Sbmilekic			else
69367352Sjhb#endif
69497082Sjhb				panic("spin lock %s held by %p for > 5 seconds",
69597082Sjhb				    m->mtx_object.lo_name, (void *)m->mtx_lock);
69697086Sjhb#ifdef __i386__
69797139Sjhb			ia32_pause();
69897086Sjhb#endif
69967352Sjhb		}
70088088Sjhb		critical_enter();
70167352Sjhb	}
70272200Sbmilekic
70374912Sjhb	if (LOCK_LOG_TEST(&m->mtx_object, opts))
70472200Sbmilekic		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
70572200Sbmilekic
70672200Sbmilekic	return;
70767352Sjhb}
70867352Sjhb
70972200Sbmilekic/*
71072200Sbmilekic * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
71172200Sbmilekic *
71272200Sbmilekic * We are only called here if the lock is recursed or contested (i.e. we
71372200Sbmilekic * need to wake up a blocked thread).
71472200Sbmilekic */
71567352Sjhbvoid
71672200Sbmilekic_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
71767352Sjhb{
71883366Sjulian	struct thread *td, *td1;
71967352Sjhb	struct mtx *m1;
72067352Sjhb	int pri;
72167352Sjhb
72283366Sjulian	td = curthread;
72372200Sbmilekic
72472200Sbmilekic	if (mtx_recursed(m)) {
72572200Sbmilekic		if (--(m->mtx_recurse) == 0)
72672200Sbmilekic			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
72774912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
72872200Sbmilekic			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m);
72972200Sbmilekic		return;
73072200Sbmilekic	}
73172200Sbmilekic
73272200Sbmilekic	mtx_lock_spin(&sched_lock);
73374912Sjhb	if (LOCK_LOG_TEST(&m->mtx_object, opts))
73472200Sbmilekic		CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
73572200Sbmilekic
73683366Sjulian	td1 = TAILQ_FIRST(&m->mtx_blocked);
73797081Sjhb#if defined(SMP) && defined(ADAPTIVE_MUTEXES)
73897081Sjhb	if (td1 == NULL) {
73997081Sjhb		_release_lock_quick(m);
74097081Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
74197081Sjhb			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p no sleepers", m);
74297081Sjhb		mtx_unlock_spin(&sched_lock);
74397081Sjhb		return;
74497081Sjhb	}
74597081Sjhb#endif
74683366Sjulian	MPASS(td->td_proc->p_magic == P_MAGIC);
74783366Sjulian	MPASS(td1->td_proc->p_magic == P_MAGIC);
74872200Sbmilekic
749104387Sjhb	TAILQ_REMOVE(&m->mtx_blocked, td1, td_lockq);
75072200Sbmilekic
75172200Sbmilekic	if (TAILQ_EMPTY(&m->mtx_blocked)) {
75272200Sbmilekic		LIST_REMOVE(m, mtx_contested);
75372200Sbmilekic		_release_lock_quick(m);
75474912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
75572200Sbmilekic			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p not held", m);
75672200Sbmilekic	} else
75772200Sbmilekic		atomic_store_rel_ptr(&m->mtx_lock, (void *)MTX_CONTESTED);
75872200Sbmilekic
75972376Sjake	pri = PRI_MAX;
76083366Sjulian	LIST_FOREACH(m1, &td->td_contested, mtx_contested) {
76190538Sjulian		int cp = TAILQ_FIRST(&m1->mtx_blocked)->td_priority;
76272200Sbmilekic		if (cp < pri)
76372200Sbmilekic			pri = cp;
76472200Sbmilekic	}
76572200Sbmilekic
76690538Sjulian	if (pri > td->td_base_pri)
76790538Sjulian		pri = td->td_base_pri;
76890538Sjulian	td->td_priority = pri;
76972200Sbmilekic
77074912Sjhb	if (LOCK_LOG_TEST(&m->mtx_object, opts))
77172200Sbmilekic		CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p contested setrunqueue %p",
77283366Sjulian		    m, td1);
77372200Sbmilekic
77483366Sjulian	td1->td_blocked = NULL;
775104387Sjhb	TD_CLR_LOCK(td1);
776104160Sjulian	if (!TD_CAN_RUN(td1)) {
777104160Sjulian		mtx_unlock_spin(&sched_lock);
778104160Sjulian		return;
779104160Sjulian	}
780104161Sjulian	setrunqueue(td1);
78172200Sbmilekic
78290538Sjulian	if (td->td_critnest == 1 && td1->td_priority < pri) {
78367352Sjhb#ifdef notyet
78483366Sjulian		if (td->td_ithd != NULL) {
78583366Sjulian			struct ithd *it = td->td_ithd;
78667352Sjhb
78772200Sbmilekic			if (it->it_interrupted) {
78874912Sjhb				if (LOCK_LOG_TEST(&m->mtx_object, opts))
78972200Sbmilekic					CTR2(KTR_LOCK,
79072994Sjhb				    "_mtx_unlock_sleep: %p interrupted %p",
79172200Sbmilekic					    it, it->it_interrupted);
79272200Sbmilekic				intr_thd_fixup(it);
79367352Sjhb			}
79472200Sbmilekic		}
79567352Sjhb#endif
79674912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
79772200Sbmilekic			CTR2(KTR_LOCK,
79872200Sbmilekic			    "_mtx_unlock_sleep: %p switching out lock=%p", m,
79972200Sbmilekic			    (void *)m->mtx_lock);
80072200Sbmilekic
80183366Sjulian		td->td_proc->p_stats->p_ru.ru_nivcsw++;
80272200Sbmilekic		mi_switch();
80374912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
80472200Sbmilekic			CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p",
80572200Sbmilekic			    m, (void *)m->mtx_lock);
80667352Sjhb	}
80772200Sbmilekic
80872200Sbmilekic	mtx_unlock_spin(&sched_lock);
80972200Sbmilekic
81072200Sbmilekic	return;
81167352Sjhb}
81267352Sjhb
81372200Sbmilekic/*
81472200Sbmilekic * All the unlocking of MTX_SPIN locks is done inline.
815105782Sdes * See the _rel_spin_lock() macro for the details.
81672200Sbmilekic */
81772200Sbmilekic
81872200Sbmilekic/*
81972994Sjhb * The backing function for the INVARIANTS-enabled mtx_assert()
82072200Sbmilekic */
82172996Sjhb#ifdef INVARIANT_SUPPORT
82271352Sjasonevoid
82371360Sjasone_mtx_assert(struct mtx *m, int what, const char *file, int line)
82471352Sjasone{
82580748Sjhb
82680748Sjhb	if (panicstr != NULL)
82780748Sjhb		return;
82873033Sjake	switch (what) {
82971352Sjasone	case MA_OWNED:
83071352Sjasone	case MA_OWNED | MA_RECURSED:
83171352Sjasone	case MA_OWNED | MA_NOTRECURSED:
83273033Sjake		if (!mtx_owned(m))
83371352Sjasone			panic("mutex %s not owned at %s:%d",
83474912Sjhb			    m->mtx_object.lo_name, file, line);
83573033Sjake		if (mtx_recursed(m)) {
83673033Sjake			if ((what & MA_NOTRECURSED) != 0)
83771352Sjasone				panic("mutex %s recursed at %s:%d",
83874912Sjhb				    m->mtx_object.lo_name, file, line);
83973033Sjake		} else if ((what & MA_RECURSED) != 0) {
84071352Sjasone			panic("mutex %s unrecursed at %s:%d",
84174912Sjhb			    m->mtx_object.lo_name, file, line);
84271352Sjasone		}
84371352Sjasone		break;
84471352Sjasone	case MA_NOTOWNED:
84573033Sjake		if (mtx_owned(m))
84671352Sjasone			panic("mutex %s owned at %s:%d",
84774912Sjhb			    m->mtx_object.lo_name, file, line);
84871352Sjasone		break;
84971352Sjasone	default:
85071360Sjasone		panic("unknown mtx_assert at %s:%d", file, line);
85171352Sjasone	}
85271352Sjasone}
85371352Sjasone#endif
85471352Sjasone
85572200Sbmilekic/*
85672200Sbmilekic * The MUTEX_DEBUG-enabled mtx_validate()
85774912Sjhb *
85874912Sjhb * Most of these checks have been moved off into the LO_INITIALIZED flag
85974912Sjhb * maintained by the witness code.
86072200Sbmilekic */
86167352Sjhb#ifdef MUTEX_DEBUG
86267352Sjhb
86392723Salfredvoid	mtx_validate(struct mtx *);
86467352Sjhb
86574912Sjhbvoid
86674912Sjhbmtx_validate(struct mtx *m)
86767352Sjhb{
86867352Sjhb
86967352Sjhb/*
870105919Sphk * XXX: When kernacc() does not require Giant we can reenable this check
871105919Sphk */
872105919Sphk#ifdef notyet
873105919Sphk/*
87467352Sjhb * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
87567352Sjhb * we can re-enable the kernacc() checks.
87667352Sjhb */
87767352Sjhb#ifndef __alpha__
87882304Sbmilekic	/*
87982304Sbmilekic	 * Can't call kernacc() from early init386(), especially when
88082304Sbmilekic	 * initializing Giant mutex, because some stuff in kernacc()
88182304Sbmilekic	 * requires Giant itself.
882105782Sdes	 */
88382302Sbmilekic	if (!cold)
88482302Sbmilekic		if (!kernacc((caddr_t)m, sizeof(m),
88582302Sbmilekic		    VM_PROT_READ | VM_PROT_WRITE))
88682302Sbmilekic			panic("Can't read and write to mutex %p", m);
88767352Sjhb#endif
888105919Sphk#endif
88967352Sjhb}
89067352Sjhb#endif
89167352Sjhb
89272200Sbmilekic/*
89393672Sarr * General init routine used by the MTX_SYSINIT() macro.
89493672Sarr */
89593672Sarrvoid
89693672Sarrmtx_sysinit(void *arg)
89793672Sarr{
89893672Sarr	struct mtx_args *margs = arg;
89993672Sarr
90093813Sjhb	mtx_init(margs->ma_mtx, margs->ma_desc, NULL, margs->ma_opts);
90193672Sarr}
90293672Sarr
90393672Sarr/*
90472200Sbmilekic * Mutex initialization routine; initialize lock `m' of type contained in
90593813Sjhb * `opts' with options contained in `opts' and name `name.'  The optional
90693813Sjhb * lock type `type' is used as a general lock category name for use with
90793813Sjhb * witness.
908105782Sdes */
90967352Sjhbvoid
91093813Sjhbmtx_init(struct mtx *m, const char *name, const char *type, int opts)
91167352Sjhb{
91274912Sjhb	struct lock_object *lock;
91372200Sbmilekic
91474912Sjhb	MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE |
91593273Sjeff	    MTX_SLEEPABLE | MTX_NOWITNESS | MTX_DUPOK)) == 0);
91672200Sbmilekic
91767352Sjhb#ifdef MUTEX_DEBUG
91872200Sbmilekic	/* Diagnostic and error correction */
91974912Sjhb	mtx_validate(m);
92069429Sjhb#endif
92167352Sjhb
92285205Sjhb	lock = &m->mtx_object;
92385205Sjhb	KASSERT((lock->lo_flags & LO_INITIALIZED) == 0,
92493813Sjhb	    ("mutex %s %p already initialized", name, m));
92574912Sjhb	bzero(m, sizeof(*m));
92674912Sjhb	if (opts & MTX_SPIN)
92774912Sjhb		lock->lo_class = &lock_class_mtx_spin;
92874912Sjhb	else
92974912Sjhb		lock->lo_class = &lock_class_mtx_sleep;
93093813Sjhb	lock->lo_name = name;
93193813Sjhb	lock->lo_type = type != NULL ? type : name;
93274912Sjhb	if (opts & MTX_QUIET)
93374912Sjhb		lock->lo_flags = LO_QUIET;
93474912Sjhb	if (opts & MTX_RECURSE)
93574912Sjhb		lock->lo_flags |= LO_RECURSABLE;
93674912Sjhb	if (opts & MTX_SLEEPABLE)
93774912Sjhb		lock->lo_flags |= LO_SLEEPABLE;
93874912Sjhb	if ((opts & MTX_NOWITNESS) == 0)
93974912Sjhb		lock->lo_flags |= LO_WITNESS;
94093273Sjeff	if (opts & MTX_DUPOK)
94193273Sjeff		lock->lo_flags |= LO_DUPOK;
94272200Sbmilekic
94367352Sjhb	m->mtx_lock = MTX_UNOWNED;
94474912Sjhb	TAILQ_INIT(&m->mtx_blocked);
94572200Sbmilekic
94674912Sjhb	LOCK_LOG_INIT(lock, opts);
94772200Sbmilekic
94874912Sjhb	WITNESS_INIT(lock);
94967352Sjhb}
95067352Sjhb
95172200Sbmilekic/*
95274912Sjhb * Remove lock `m' from all_mtx queue.  We don't allow MTX_QUIET to be
95374912Sjhb * passed in as a flag here because if the corresponding mtx_init() was
95474912Sjhb * called with MTX_QUIET set, then it will already be set in the mutex's
95574912Sjhb * flags.
95672200Sbmilekic */
95767352Sjhbvoid
95867352Sjhbmtx_destroy(struct mtx *m)
95967352Sjhb{
96067352Sjhb
96174912Sjhb	LOCK_LOG_DESTROY(&m->mtx_object, 0);
96272200Sbmilekic
96374912Sjhb	if (!mtx_owned(m))
96474912Sjhb		MPASS(mtx_unowned(m));
96574912Sjhb	else {
96671228Sbmilekic		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
96772200Sbmilekic
96874912Sjhb		/* Tell witness this isn't locked to make it happy. */
96988900Sjhb		WITNESS_UNLOCK(&m->mtx_object, LOP_EXCLUSIVE, __FILE__,
97088900Sjhb		    __LINE__);
97171320Sjasone	}
97271320Sjasone
97374912Sjhb	WITNESS_DESTROY(&m->mtx_object);
97471320Sjasone}
97585564Sdillon
97685564Sdillon/*
97793702Sjhb * Intialize the mutex code and system mutexes.  This is called from the MD
97893702Sjhb * startup code prior to mi_startup().  The per-CPU data space needs to be
97993702Sjhb * setup before this is called.
98093702Sjhb */
98193702Sjhbvoid
98293702Sjhbmutex_init(void)
98393702Sjhb{
98493702Sjhb
98593702Sjhb	/* Setup thread0 so that mutexes work. */
98693702Sjhb	LIST_INIT(&thread0.td_contested);
98793702Sjhb
98893702Sjhb	/*
98993702Sjhb	 * Initialize mutexes.
99093702Sjhb	 */
99193813Sjhb	mtx_init(&Giant, "Giant", NULL, MTX_DEF | MTX_RECURSE);
99293813Sjhb	mtx_init(&sched_lock, "sched lock", NULL, MTX_SPIN | MTX_RECURSE);
99393813Sjhb	mtx_init(&proc0.p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK);
99493702Sjhb	mtx_lock(&Giant);
99593702Sjhb}
99693702Sjhb
99793702Sjhb/*
99885564Sdillon * Encapsulated Giant mutex routines.  These routines provide encapsulation
99985564Sdillon * control for the Giant mutex, allowing sysctls to be used to turn on and
100085564Sdillon * off Giant around certain subsystems.  The default value for the sysctls
100185564Sdillon * are set to what developers believe is stable and working in regards to
100285564Sdillon * the Giant pushdown.  Developers should not turn off Giant via these
100385564Sdillon * sysctls unless they know what they are doing.
100485564Sdillon *
100585564Sdillon * Callers of mtx_lock_giant() are expected to pass the return value to an
1006105782Sdes * accompanying mtx_unlock_giant() later on.  If multiple subsystems are
100785564Sdillon * effected by a Giant wrap, all related sysctl variables must be zero for
100885564Sdillon * the subsystem call to operate without Giant (as determined by the caller).
100985564Sdillon */
101085564Sdillon
101185564SdillonSYSCTL_NODE(_kern, OID_AUTO, giant, CTLFLAG_RD, NULL, "Giant mutex manipulation");
101285564Sdillon
101385564Sdillonstatic int kern_giant_all = 0;
101485564SdillonSYSCTL_INT(_kern_giant, OID_AUTO, all, CTLFLAG_RW, &kern_giant_all, 0, "");
101585564Sdillon
101685564Sdillonint kern_giant_proc = 1;	/* Giant around PROC locks */
101785564Sdillonint kern_giant_file = 1;	/* Giant around struct file & filedesc */
101890864Sdillonint kern_giant_ucred = 1;	/* Giant around ucred */
101985564SdillonSYSCTL_INT(_kern_giant, OID_AUTO, proc, CTLFLAG_RW, &kern_giant_proc, 0, "");
102085564SdillonSYSCTL_INT(_kern_giant, OID_AUTO, file, CTLFLAG_RW, &kern_giant_file, 0, "");
102190864SdillonSYSCTL_INT(_kern_giant, OID_AUTO, ucred, CTLFLAG_RW, &kern_giant_ucred, 0, "");
102285564Sdillon
102385564Sdillonint
102485564Sdillonmtx_lock_giant(int sysctlvar)
102585564Sdillon{
102685564Sdillon	if (sysctlvar || kern_giant_all) {
102785564Sdillon		mtx_lock(&Giant);
102885564Sdillon		return(1);
102985564Sdillon	}
103085564Sdillon	return(0);
103185564Sdillon}
103285564Sdillon
103385564Sdillonvoid
103485564Sdillonmtx_unlock_giant(int s)
103585564Sdillon{
103685564Sdillon	if (s)
103785564Sdillon		mtx_unlock(&Giant);
103885564Sdillon}
1039