subr_turnstile.c revision 109654
165557Sjasone/*-
265557Sjasone * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
365557Sjasone *
465557Sjasone * Redistribution and use in source and binary forms, with or without
565557Sjasone * modification, are permitted provided that the following conditions
665557Sjasone * are met:
765557Sjasone * 1. Redistributions of source code must retain the above copyright
865557Sjasone *    notice, this list of conditions and the following disclaimer.
965557Sjasone * 2. Redistributions in binary form must reproduce the above copyright
1065557Sjasone *    notice, this list of conditions and the following disclaimer in the
1165557Sjasone *    documentation and/or other materials provided with the distribution.
1265557Sjasone * 3. Berkeley Software Design Inc's name may not be used to endorse or
1365557Sjasone *    promote products derived from this software without specific prior
1465557Sjasone *    written permission.
1565557Sjasone *
1665557Sjasone * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
1765557Sjasone * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1865557Sjasone * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1965557Sjasone * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
2065557Sjasone * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2165557Sjasone * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2265557Sjasone * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2365557Sjasone * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2465557Sjasone * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2565557Sjasone * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2665557Sjasone * SUCH DAMAGE.
2765557Sjasone *
2865557Sjasone *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
2967352Sjhb *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
3065557Sjasone * $FreeBSD: head/sys/kern/subr_turnstile.c 109654 2003-01-21 20:33:27Z des $
3165557Sjasone */
3265557Sjasone
3365557Sjasone/*
3486411Sjhb * Machine independent bits of mutex implementation.
3572200Sbmilekic */
3672200Sbmilekic
3797081Sjhb#include "opt_adaptive_mutexes.h"
3868790Sjhb#include "opt_ddb.h"
3967676Sjhb
4065557Sjasone#include <sys/param.h>
4193609Sdes#include <sys/systm.h>
4267352Sjhb#include <sys/bus.h>
4367352Sjhb#include <sys/kernel.h>
4493609Sdes#include <sys/ktr.h>
4576166Smarkm#include <sys/lock.h>
4667352Sjhb#include <sys/malloc.h>
4774912Sjhb#include <sys/mutex.h>
4865557Sjasone#include <sys/proc.h>
4978766Sjhb#include <sys/resourcevar.h>
50104964Sjeff#include <sys/sched.h>
5193609Sdes#include <sys/sbuf.h>
5297156Sdes#include <sys/stdint.h>
5367676Sjhb#include <sys/sysctl.h>
5467352Sjhb#include <sys/vmmeter.h>
5565557Sjasone
5667352Sjhb#include <machine/atomic.h>
5767352Sjhb#include <machine/bus.h>
5867352Sjhb#include <machine/clock.h>
5965557Sjasone#include <machine/cpu.h>
6067352Sjhb
6168790Sjhb#include <ddb/ddb.h>
6268790Sjhb
6367352Sjhb#include <vm/vm.h>
6467352Sjhb#include <vm/vm_extern.h>
6567352Sjhb
6665557Sjasone/*
6772200Sbmilekic * Internal utility macros.
6871352Sjasone */
6972200Sbmilekic#define mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
7071352Sjasone
7172200Sbmilekic#define mtx_owner(m)	(mtx_unowned((m)) ? NULL \
7283366Sjulian	: (struct thread *)((m)->mtx_lock & MTX_FLAGMASK))
7371352Sjasone
7497839Sjhb/* XXXKSE This test will change. */
7597839Sjhb#define	thread_running(td)						\
7697836Sjhb	((td)->td_kse != NULL && (td)->td_kse->ke_oncpu != NOCPU)
77105782Sdes
7871352Sjasone/*
7974912Sjhb * Lock classes for sleep and spin mutexes.
8071352Sjasone */
8174912Sjhbstruct lock_class lock_class_mtx_sleep = {
8274912Sjhb	"sleep mutex",
8374912Sjhb	LC_SLEEPLOCK | LC_RECURSABLE
8474912Sjhb};
8574912Sjhbstruct lock_class lock_class_mtx_spin = {
8674912Sjhb	"spin mutex",
8774912Sjhb	LC_SPINLOCK | LC_RECURSABLE
8874912Sjhb};
8971352Sjasone
9071352Sjasone/*
9193702Sjhb * System-wide mutexes
9293702Sjhb */
9393702Sjhbstruct mtx sched_lock;
9493702Sjhbstruct mtx Giant;
9593702Sjhb
9693702Sjhb/*
9772200Sbmilekic * Prototypes for non-exported routines.
9872200Sbmilekic */
9983366Sjulianstatic void	propagate_priority(struct thread *);
10067352Sjhb
10167352Sjhbstatic void
10283366Sjulianpropagate_priority(struct thread *td)
10367352Sjhb{
10490538Sjulian	int pri = td->td_priority;
10583366Sjulian	struct mtx *m = td->td_blocked;
10667352Sjhb
10769376Sjhb	mtx_assert(&sched_lock, MA_OWNED);
10867352Sjhb	for (;;) {
10983366Sjulian		struct thread *td1;
11067352Sjhb
11183366Sjulian		td = mtx_owner(m);
11267352Sjhb
11383366Sjulian		if (td == NULL) {
11467352Sjhb			/*
11567352Sjhb			 * This really isn't quite right. Really
11683366Sjulian			 * ought to bump priority of thread that
11767352Sjhb			 * next acquires the mutex.
11867352Sjhb			 */
11967352Sjhb			MPASS(m->mtx_lock == MTX_CONTESTED);
12067352Sjhb			return;
12167352Sjhb		}
12272200Sbmilekic
12399072Sjulian		MPASS(td->td_proc != NULL);
12483366Sjulian		MPASS(td->td_proc->p_magic == P_MAGIC);
125103216Sjulian		KASSERT(!TD_IS_SLEEPING(td), ("sleeping thread owns a mutex"));
12690538Sjulian		if (td->td_priority <= pri) /* lower is higher priority */
12767352Sjhb			return;
12869376Sjhb
12969376Sjhb
13069376Sjhb		/*
13167352Sjhb		 * If lock holder is actually running, just bump priority.
13267352Sjhb		 */
133103216Sjulian		if (TD_IS_RUNNING(td)) {
13499072Sjulian			td->td_priority = pri;
13567352Sjhb			return;
13667352Sjhb		}
13772376Sjake
13873912Sjhb#ifndef SMP
13967352Sjhb		/*
14083366Sjulian		 * For UP, we check to see if td is curthread (this shouldn't
14173912Sjhb		 * ever happen however as it would mean we are in a deadlock.)
14273912Sjhb		 */
14383366Sjulian		KASSERT(td != curthread, ("Deadlock detected"));
14473912Sjhb#endif
14573912Sjhb
14673912Sjhb		/*
14783366Sjulian		 * If on run queue move to new run queue, and quit.
14883366Sjulian		 * XXXKSE this gets a lot more complicated under threads
14983366Sjulian		 * but try anyhow.
15067352Sjhb		 */
151103216Sjulian		if (TD_ON_RUNQ(td)) {
15283366Sjulian			MPASS(td->td_blocked == NULL);
153104964Sjeff			sched_prio(td, pri);
15467352Sjhb			return;
15567352Sjhb		}
15699072Sjulian		/*
15799072Sjulian		 * Adjust for any other cases.
15899072Sjulian		 */
15999072Sjulian		td->td_priority = pri;
16067352Sjhb
16167352Sjhb		/*
16269376Sjhb		 * If we aren't blocked on a mutex, we should be.
16367352Sjhb		 */
164104387Sjhb		KASSERT(TD_ON_LOCK(td), (
16569376Sjhb		    "process %d(%s):%d holds %s but isn't blocked on a mutex\n",
16699072Sjulian		    td->td_proc->p_pid, td->td_proc->p_comm, td->td_state,
16774912Sjhb		    m->mtx_object.lo_name));
16867352Sjhb
16967352Sjhb		/*
17083366Sjulian		 * Pick up the mutex that td is blocked on.
17167352Sjhb		 */
17283366Sjulian		m = td->td_blocked;
17367352Sjhb		MPASS(m != NULL);
17467352Sjhb
17567352Sjhb		/*
17683366Sjulian		 * Check if the thread needs to be moved up on
17767352Sjhb		 * the blocked chain
17867352Sjhb		 */
17983366Sjulian		if (td == TAILQ_FIRST(&m->mtx_blocked)) {
18069376Sjhb			continue;
18169376Sjhb		}
18272200Sbmilekic
183104387Sjhb		td1 = TAILQ_PREV(td, threadqueue, td_lockq);
18490538Sjulian		if (td1->td_priority <= pri) {
18567352Sjhb			continue;
18667352Sjhb		}
18767352Sjhb
18867352Sjhb		/*
18983366Sjulian		 * Remove thread from blocked chain and determine where
19083366Sjulian		 * it should be moved up to.  Since we know that td1 has
19183366Sjulian		 * a lower priority than td, we know that at least one
19283366Sjulian		 * thread in the chain has a lower priority and that
19383366Sjulian		 * td1 will thus not be NULL after the loop.
19467352Sjhb		 */
195104387Sjhb		TAILQ_REMOVE(&m->mtx_blocked, td, td_lockq);
196104387Sjhb		TAILQ_FOREACH(td1, &m->mtx_blocked, td_lockq) {
19783366Sjulian			MPASS(td1->td_proc->p_magic == P_MAGIC);
19890538Sjulian			if (td1->td_priority > pri)
19967352Sjhb				break;
20067352Sjhb		}
20172200Sbmilekic
20283366Sjulian		MPASS(td1 != NULL);
203104387Sjhb		TAILQ_INSERT_BEFORE(td1, td, td_lockq);
20467352Sjhb		CTR4(KTR_LOCK,
20571560Sjhb		    "propagate_priority: p %p moved before %p on [%p] %s",
20683366Sjulian		    td, td1, m, m->mtx_object.lo_name);
20767352Sjhb	}
20867352Sjhb}
20967352Sjhb
21093609Sdes#ifdef MUTEX_PROFILING
21193609SdesSYSCTL_NODE(_debug, OID_AUTO, mutex, CTLFLAG_RD, NULL, "mutex debugging");
21293609SdesSYSCTL_NODE(_debug_mutex, OID_AUTO, prof, CTLFLAG_RD, NULL, "mutex profiling");
21393609Sdesstatic int mutex_prof_enable = 0;
21493609SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, enable, CTLFLAG_RW,
21593609Sdes    &mutex_prof_enable, 0, "Enable tracing of mutex holdtime");
21693609Sdes
21793609Sdesstruct mutex_prof {
218105644Sdes	const char	*name;
219105644Sdes	const char	*file;
220105644Sdes	int		line;
221109654Sdes	uintmax_t	cnt_max;
222109654Sdes	uintmax_t	cnt_tot;
223109654Sdes	uintmax_t	cnt_cur;
22493705Sdes	struct mutex_prof *next;
22593609Sdes};
22693609Sdes
22771352Sjasone/*
22893609Sdes * mprof_buf is a static pool of profiling records to avoid possible
22993609Sdes * reentrance of the memory allocation functions.
23093609Sdes *
23193609Sdes * Note: NUM_MPROF_BUFFERS must be smaller than MPROF_HASH_SIZE.
23293609Sdes */
233105644Sdes#define	NUM_MPROF_BUFFERS	1000
23493609Sdesstatic struct mutex_prof mprof_buf[NUM_MPROF_BUFFERS];
23593609Sdesstatic int first_free_mprof_buf;
236105644Sdes#define	MPROF_HASH_SIZE		1009
23793609Sdesstatic struct mutex_prof *mprof_hash[MPROF_HASH_SIZE];
23893609Sdes
23993609Sdesstatic int mutex_prof_acquisitions;
24093609SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, acquisitions, CTLFLAG_RD,
24193609Sdes    &mutex_prof_acquisitions, 0, "Number of mutex acquistions recorded");
24293609Sdesstatic int mutex_prof_records;
24393609SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, records, CTLFLAG_RD,
24493609Sdes    &mutex_prof_records, 0, "Number of profiling records");
24593609Sdesstatic int mutex_prof_maxrecords = NUM_MPROF_BUFFERS;
24693609SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, maxrecords, CTLFLAG_RD,
24793609Sdes    &mutex_prof_maxrecords, 0, "Maximum number of profiling records");
24893609Sdesstatic int mutex_prof_rejected;
24993609SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, rejected, CTLFLAG_RD,
25093609Sdes    &mutex_prof_rejected, 0, "Number of rejected profiling records");
25193609Sdesstatic int mutex_prof_hashsize = MPROF_HASH_SIZE;
25293609SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, hashsize, CTLFLAG_RD,
25393609Sdes    &mutex_prof_hashsize, 0, "Hash size");
25493609Sdesstatic int mutex_prof_collisions = 0;
25593609SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, collisions, CTLFLAG_RD,
25693609Sdes    &mutex_prof_collisions, 0, "Number of hash collisions");
25793609Sdes
25893609Sdes/*
25993609Sdes * mprof_mtx protects the profiling buffers and the hash.
26093609Sdes */
26193609Sdesstatic struct mtx mprof_mtx;
26293705SdesMTX_SYSINIT(mprof, &mprof_mtx, "mutex profiling lock", MTX_SPIN | MTX_QUIET);
26393609Sdes
26493667Sdesstatic u_int64_t
26593667Sdesnanoseconds(void)
26693667Sdes{
26793667Sdes	struct timespec tv;
26893667Sdes
26993667Sdes	nanotime(&tv);
27093667Sdes	return (tv.tv_sec * (u_int64_t)1000000000 + tv.tv_nsec);
27193667Sdes}
27293667Sdes
27393609Sdesstatic int
27493609Sdesdump_mutex_prof_stats(SYSCTL_HANDLER_ARGS)
27593609Sdes{
27693609Sdes	struct sbuf *sb;
27793609Sdes	int error, i;
27893609Sdes
27993609Sdes	if (first_free_mprof_buf == 0)
280105644Sdes		return (SYSCTL_OUT(req, "No locking recorded",
281105644Sdes		    sizeof("No locking recorded")));
28293609Sdes
28393609Sdes	sb = sbuf_new(NULL, NULL, 1024, SBUF_AUTOEXTEND);
284105644Sdes	sbuf_printf(sb, "%6s %12s %11s %5s %s\n",
285105644Sdes	    "max", "total", "count", "avg", "name");
286105644Sdes	/*
287105644Sdes	 * XXX this spinlock seems to be by far the largest perpetrator
288105644Sdes	 * of spinlock latency (1.6 msec on an Athlon1600 was recorded
289105644Sdes	 * even before I pessimized it further by moving the average
290105644Sdes	 * computation here).
291105644Sdes	 */
29293609Sdes	mtx_lock_spin(&mprof_mtx);
29393609Sdes	for (i = 0; i < first_free_mprof_buf; ++i)
294105644Sdes		sbuf_printf(sb, "%6ju %12ju %11ju %5ju %s:%d (%s)\n",
295109654Sdes		    mprof_buf[i].cnt_max / 1000,
296109654Sdes		    mprof_buf[i].cnt_tot / 1000,
297109654Sdes		    mprof_buf[i].cnt_cur,
298109654Sdes		    mprof_buf[i].cnt_cur == 0 ? (uintmax_t)0 :
299109654Sdes			mprof_buf[i].cnt_tot / (mprof_buf[i].cnt_cur * 1000),
30093609Sdes		    mprof_buf[i].file, mprof_buf[i].line, mprof_buf[i].name);
30193609Sdes	mtx_unlock_spin(&mprof_mtx);
30293609Sdes	sbuf_finish(sb);
30393609Sdes	error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
30493609Sdes	sbuf_delete(sb);
30593609Sdes	return (error);
30693609Sdes}
307105644SdesSYSCTL_PROC(_debug_mutex_prof, OID_AUTO, stats, CTLTYPE_STRING | CTLFLAG_RD,
30893609Sdes    NULL, 0, dump_mutex_prof_stats, "A", "Mutex profiling statistics");
30993609Sdes#endif
31093609Sdes
31193609Sdes/*
31274900Sjhb * Function versions of the inlined __mtx_* macros.  These are used by
31374900Sjhb * modules and can also be called from assembly language if needed.
31474900Sjhb */
31574900Sjhbvoid
31674900Sjhb_mtx_lock_flags(struct mtx *m, int opts, const char *file, int line)
31774900Sjhb{
31874900Sjhb
31983841Sjhb	MPASS(curthread != NULL);
320102907Sjhb	KASSERT(m->mtx_object.lo_class == &lock_class_mtx_sleep,
321102907Sjhb	    ("mtx_lock() of spin mutex %s @ %s:%d", m->mtx_object.lo_name,
322102907Sjhb	    file, line));
32383841Sjhb	_get_sleep_lock(m, curthread, opts, file, line);
32483841Sjhb	LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file,
32583841Sjhb	    line);
32683841Sjhb	WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
32793609Sdes#ifdef MUTEX_PROFILING
32893609Sdes	/* don't reset the timer when/if recursing */
32999324Sdes	if (m->mtx_acqtime == 0) {
33099324Sdes		m->mtx_filename = file;
33199324Sdes		m->mtx_lineno = line;
33299324Sdes		m->mtx_acqtime = mutex_prof_enable ? nanoseconds() : 0;
33393609Sdes		++mutex_prof_acquisitions;
33493609Sdes	}
33593609Sdes#endif
33674900Sjhb}
33774900Sjhb
33874900Sjhbvoid
33974900Sjhb_mtx_unlock_flags(struct mtx *m, int opts, const char *file, int line)
34074900Sjhb{
34174900Sjhb
34283841Sjhb	MPASS(curthread != NULL);
343102907Sjhb	KASSERT(m->mtx_object.lo_class == &lock_class_mtx_sleep,
344102907Sjhb	    ("mtx_unlock() of spin mutex %s @ %s:%d", m->mtx_object.lo_name,
345102907Sjhb	    file, line));
346105782Sdes	WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
347102907Sjhb	LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file,
348102907Sjhb	    line);
34983947Sjhb	mtx_assert(m, MA_OWNED);
35093609Sdes#ifdef MUTEX_PROFILING
35199324Sdes	if (m->mtx_acqtime != 0) {
35293609Sdes		static const char *unknown = "(unknown)";
35393609Sdes		struct mutex_prof *mpp;
35493667Sdes		u_int64_t acqtime, now;
35593609Sdes		const char *p, *q;
35693705Sdes		volatile u_int hash;
35793609Sdes
35893667Sdes		now = nanoseconds();
35999324Sdes		acqtime = m->mtx_acqtime;
36099324Sdes		m->mtx_acqtime = 0;
36193667Sdes		if (now <= acqtime)
36293609Sdes			goto out;
36399324Sdes		for (p = m->mtx_filename; strncmp(p, "../", 3) == 0; p += 3)
36493609Sdes			/* nothing */ ;
36593609Sdes		if (p == NULL || *p == '\0')
36693609Sdes			p = unknown;
36799324Sdes		for (hash = m->mtx_lineno, q = p; *q != '\0'; ++q)
36893609Sdes			hash = (hash * 2 + *q) % MPROF_HASH_SIZE;
36993609Sdes		mtx_lock_spin(&mprof_mtx);
37093705Sdes		for (mpp = mprof_hash[hash]; mpp != NULL; mpp = mpp->next)
37199324Sdes			if (mpp->line == m->mtx_lineno &&
37299324Sdes			    strcmp(mpp->file, p) == 0)
37393609Sdes				break;
37493609Sdes		if (mpp == NULL) {
37593609Sdes			/* Just exit if we cannot get a trace buffer */
37693609Sdes			if (first_free_mprof_buf >= NUM_MPROF_BUFFERS) {
37793609Sdes				++mutex_prof_rejected;
37893609Sdes				goto unlock;
37993609Sdes			}
38093609Sdes			mpp = &mprof_buf[first_free_mprof_buf++];
38193609Sdes			mpp->name = mtx_name(m);
38293609Sdes			mpp->file = p;
38399324Sdes			mpp->line = m->mtx_lineno;
38493705Sdes			mpp->next = mprof_hash[hash];
38593705Sdes			if (mprof_hash[hash] != NULL)
38693705Sdes				++mutex_prof_collisions;
387105782Sdes			mprof_hash[hash] = mpp;
38893609Sdes			++mutex_prof_records;
38993609Sdes		}
39093609Sdes		/*
39193609Sdes		 * Record if the mutex has been held longer now than ever
392105644Sdes		 * before.
39393609Sdes		 */
394109654Sdes		if (now - acqtime > mpp->cnt_max)
395109654Sdes			mpp->cnt_max = now - acqtime;
396109654Sdes		mpp->cnt_tot += now - acqtime;
397109654Sdes		mpp->cnt_cur++;
39893609Sdesunlock:
39993609Sdes		mtx_unlock_spin(&mprof_mtx);
40093609Sdes	}
40193609Sdesout:
40293609Sdes#endif
40383841Sjhb	_rel_sleep_lock(m, curthread, opts, file, line);
40474900Sjhb}
40574900Sjhb
40674900Sjhbvoid
40774900Sjhb_mtx_lock_spin_flags(struct mtx *m, int opts, const char *file, int line)
40874900Sjhb{
40974900Sjhb
41083841Sjhb	MPASS(curthread != NULL);
411102907Sjhb	KASSERT(m->mtx_object.lo_class == &lock_class_mtx_spin,
412102907Sjhb	    ("mtx_lock_spin() of sleep mutex %s @ %s:%d",
413102907Sjhb	    m->mtx_object.lo_name, file, line));
414100754Sjhb#if defined(SMP) || LOCK_DEBUG > 0 || 1
41583841Sjhb	_get_spin_lock(m, curthread, opts, file, line);
41697079Sjhb#else
41797079Sjhb	critical_enter();
41897079Sjhb#endif
41983841Sjhb	LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file,
42083841Sjhb	    line);
42183841Sjhb	WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
42274900Sjhb}
42374900Sjhb
42474900Sjhbvoid
42574900Sjhb_mtx_unlock_spin_flags(struct mtx *m, int opts, const char *file, int line)
42674900Sjhb{
42774900Sjhb
42883841Sjhb	MPASS(curthread != NULL);
429102907Sjhb	KASSERT(m->mtx_object.lo_class == &lock_class_mtx_spin,
430102907Sjhb	    ("mtx_unlock_spin() of sleep mutex %s @ %s:%d",
431102907Sjhb	    m->mtx_object.lo_name, file, line));
432105782Sdes	WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
43383841Sjhb	LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file,
43483841Sjhb	    line);
435102907Sjhb	mtx_assert(m, MA_OWNED);
436100754Sjhb#if defined(SMP) || LOCK_DEBUG > 0 || 1
43783841Sjhb	_rel_spin_lock(m);
43897079Sjhb#else
43997079Sjhb	critical_exit();
44097079Sjhb#endif
44174900Sjhb}
44274900Sjhb
44374900Sjhb/*
44472200Sbmilekic * The important part of mtx_trylock{,_flags}()
44572200Sbmilekic * Tries to acquire lock `m.' We do NOT handle recursion here; we assume that
44672200Sbmilekic * if we're called, it's because we know we don't already own this lock.
44771352Sjasone */
44872200Sbmilekicint
44972200Sbmilekic_mtx_trylock(struct mtx *m, int opts, const char *file, int line)
45071352Sjasone{
45172200Sbmilekic	int rval;
45271352Sjasone
45383366Sjulian	MPASS(curthread != NULL);
45471352Sjasone
45583366Sjulian	rval = _obtain_lock(m, curthread);
45672200Sbmilekic
45774912Sjhb	LOCK_LOG_TRY("LOCK", &m->mtx_object, opts, rval, file, line);
45874912Sjhb	if (rval) {
45971352Sjasone		/*
46072200Sbmilekic		 * We do not handle recursion in _mtx_trylock; see the
46172200Sbmilekic		 * note at the top of the routine.
46271352Sjasone		 */
46372344Sbmilekic		KASSERT(!mtx_recursed(m),
46472344Sbmilekic		    ("mtx_trylock() called on a recursed mutex"));
46576272Sjhb		WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK,
46676272Sjhb		    file, line);
46771352Sjasone	}
46871352Sjasone
46974912Sjhb	return (rval);
47071352Sjasone}
47171352Sjasone
47271352Sjasone/*
47372200Sbmilekic * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
47471352Sjasone *
47572200Sbmilekic * We call this if the lock is either contested (i.e. we need to go to
47672200Sbmilekic * sleep waiting for it), or if we need to recurse on it.
47771352Sjasone */
47872200Sbmilekicvoid
47972200Sbmilekic_mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line)
48071352Sjasone{
48183366Sjulian	struct thread *td = curthread;
48297081Sjhb#if defined(SMP) && defined(ADAPTIVE_MUTEXES)
48397081Sjhb	struct thread *owner;
48497081Sjhb#endif
485102450Siedowse#ifdef KTR
486102450Siedowse	int cont_logged = 0;
487102450Siedowse#endif
48871352Sjasone
48983366Sjulian	if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)td) {
49072200Sbmilekic		m->mtx_recurse++;
49172200Sbmilekic		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
49274912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
49372344Sbmilekic			CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
49472200Sbmilekic		return;
49571352Sjasone	}
49671352Sjasone
49774912Sjhb	if (LOCK_LOG_TEST(&m->mtx_object, opts))
49872994Sjhb		CTR4(KTR_LOCK,
49972994Sjhb		    "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d",
50074912Sjhb		    m->mtx_object.lo_name, (void *)m->mtx_lock, file, line);
50171352Sjasone
50283366Sjulian	while (!_obtain_lock(m, td)) {
50372200Sbmilekic		uintptr_t v;
50483366Sjulian		struct thread *td1;
50571352Sjasone
50672200Sbmilekic		mtx_lock_spin(&sched_lock);
50772200Sbmilekic		/*
50872200Sbmilekic		 * Check if the lock has been released while spinning for
50972200Sbmilekic		 * the sched_lock.
51072200Sbmilekic		 */
51172200Sbmilekic		if ((v = m->mtx_lock) == MTX_UNOWNED) {
51272200Sbmilekic			mtx_unlock_spin(&sched_lock);
51397086Sjhb#ifdef __i386__
51497139Sjhb			ia32_pause();
51597086Sjhb#endif
51672200Sbmilekic			continue;
51771352Sjasone		}
51871352Sjasone
51972200Sbmilekic		/*
52072200Sbmilekic		 * The mutex was marked contested on release. This means that
52183366Sjulian		 * there are threads blocked on it.
52272200Sbmilekic		 */
52372200Sbmilekic		if (v == MTX_CONTESTED) {
52483366Sjulian			td1 = TAILQ_FIRST(&m->mtx_blocked);
52583366Sjulian			MPASS(td1 != NULL);
52683366Sjulian			m->mtx_lock = (uintptr_t)td | MTX_CONTESTED;
52767352Sjhb
52890538Sjulian			if (td1->td_priority < td->td_priority)
529105782Sdes				td->td_priority = td1->td_priority;
53072200Sbmilekic			mtx_unlock_spin(&sched_lock);
53167352Sjhb			return;
53267352Sjhb		}
53369376Sjhb
53469376Sjhb		/*
53572200Sbmilekic		 * If the mutex isn't already contested and a failure occurs
53672200Sbmilekic		 * setting the contested bit, the mutex was either released
53772200Sbmilekic		 * or the state of the MTX_RECURSED bit changed.
53869376Sjhb		 */
53972200Sbmilekic		if ((v & MTX_CONTESTED) == 0 &&
54072200Sbmilekic		    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
54172200Sbmilekic			(void *)(v | MTX_CONTESTED))) {
54272200Sbmilekic			mtx_unlock_spin(&sched_lock);
54397086Sjhb#ifdef __i386__
54497139Sjhb			ia32_pause();
54597086Sjhb#endif
54672200Sbmilekic			continue;
54772200Sbmilekic		}
54867352Sjhb
54997081Sjhb#if defined(SMP) && defined(ADAPTIVE_MUTEXES)
55072200Sbmilekic		/*
55197081Sjhb		 * If the current owner of the lock is executing on another
55297081Sjhb		 * CPU, spin instead of blocking.
55397081Sjhb		 */
55497081Sjhb		owner = (struct thread *)(v & MTX_FLAGMASK);
55597839Sjhb		if (m != &Giant && thread_running(owner)) {
55697081Sjhb			mtx_unlock_spin(&sched_lock);
55797839Sjhb			while (mtx_owner(m) == owner && thread_running(owner)) {
55897086Sjhb#ifdef __i386__
55997837Sjhb				ia32_pause();
56097086Sjhb#endif
56197837Sjhb			}
56297081Sjhb			continue;
56397081Sjhb		}
56497081Sjhb#endif	/* SMP && ADAPTIVE_MUTEXES */
56597081Sjhb
56697081Sjhb		/*
56793692Sjhb		 * We definitely must sleep for this lock.
56872200Sbmilekic		 */
56972200Sbmilekic		mtx_assert(m, MA_NOTOWNED);
57067352Sjhb
57167352Sjhb#ifdef notyet
57272200Sbmilekic		/*
57372200Sbmilekic		 * If we're borrowing an interrupted thread's VM context, we
57472200Sbmilekic		 * must clean up before going to sleep.
57572200Sbmilekic		 */
57683366Sjulian		if (td->td_ithd != NULL) {
57783366Sjulian			struct ithd *it = td->td_ithd;
57867352Sjhb
57972200Sbmilekic			if (it->it_interrupted) {
58074912Sjhb				if (LOCK_LOG_TEST(&m->mtx_object, opts))
58172200Sbmilekic					CTR2(KTR_LOCK,
58272994Sjhb				    "_mtx_lock_sleep: %p interrupted %p",
58372200Sbmilekic					    it, it->it_interrupted);
58472200Sbmilekic				intr_thd_fixup(it);
58567352Sjhb			}
58672200Sbmilekic		}
58767352Sjhb#endif
58867352Sjhb
58972200Sbmilekic		/*
59072200Sbmilekic		 * Put us on the list of threads blocked on this mutex.
59172200Sbmilekic		 */
59272200Sbmilekic		if (TAILQ_EMPTY(&m->mtx_blocked)) {
59390418Sjhb			td1 = mtx_owner(m);
59483366Sjulian			LIST_INSERT_HEAD(&td1->td_contested, m, mtx_contested);
595104387Sjhb			TAILQ_INSERT_TAIL(&m->mtx_blocked, td, td_lockq);
59672200Sbmilekic		} else {
597104387Sjhb			TAILQ_FOREACH(td1, &m->mtx_blocked, td_lockq)
59890538Sjulian				if (td1->td_priority > td->td_priority)
59972200Sbmilekic					break;
60083366Sjulian			if (td1)
601104387Sjhb				TAILQ_INSERT_BEFORE(td1, td, td_lockq);
60272200Sbmilekic			else
603104387Sjhb				TAILQ_INSERT_TAIL(&m->mtx_blocked, td, td_lockq);
60472200Sbmilekic		}
605102450Siedowse#ifdef KTR
606102450Siedowse		if (!cont_logged) {
607102450Siedowse			CTR6(KTR_CONTENTION,
608102450Siedowse			    "contention: %p at %s:%d wants %s, taken by %s:%d",
609102450Siedowse			    td, file, line, m->mtx_object.lo_name,
610102450Siedowse			    WITNESS_FILE(&m->mtx_object),
611102450Siedowse			    WITNESS_LINE(&m->mtx_object));
612102450Siedowse			cont_logged = 1;
613102450Siedowse		}
614102450Siedowse#endif
61567352Sjhb
61672200Sbmilekic		/*
61772200Sbmilekic		 * Save who we're blocked on.
61872200Sbmilekic		 */
61983366Sjulian		td->td_blocked = m;
620104387Sjhb		td->td_lockname = m->mtx_object.lo_name;
621104387Sjhb		TD_SET_LOCK(td);
62283366Sjulian		propagate_priority(td);
62367352Sjhb
62474912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
62572200Sbmilekic			CTR3(KTR_LOCK,
62683366Sjulian			    "_mtx_lock_sleep: p %p blocked on [%p] %s", td, m,
62774912Sjhb			    m->mtx_object.lo_name);
62872200Sbmilekic
62983366Sjulian		td->td_proc->p_stats->p_ru.ru_nvcsw++;
63072200Sbmilekic		mi_switch();
63172200Sbmilekic
63274912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
63372200Sbmilekic			CTR3(KTR_LOCK,
63472200Sbmilekic			  "_mtx_lock_sleep: p %p free from blocked on [%p] %s",
63583366Sjulian			  td, m, m->mtx_object.lo_name);
63672200Sbmilekic
63772200Sbmilekic		mtx_unlock_spin(&sched_lock);
63872200Sbmilekic	}
63972200Sbmilekic
640102450Siedowse#ifdef KTR
641102450Siedowse	if (cont_logged) {
642102450Siedowse		CTR4(KTR_CONTENTION,
643102450Siedowse		    "contention end: %s acquired by %p at %s:%d",
644102450Siedowse		    m->mtx_object.lo_name, td, file, line);
645102450Siedowse	}
646102450Siedowse#endif
64772200Sbmilekic	return;
64872200Sbmilekic}
64972200Sbmilekic
65072200Sbmilekic/*
65172200Sbmilekic * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock.
65272200Sbmilekic *
65372200Sbmilekic * This is only called if we need to actually spin for the lock. Recursion
65472200Sbmilekic * is handled inline.
65572200Sbmilekic */
65672200Sbmilekicvoid
65788088Sjhb_mtx_lock_spin(struct mtx *m, int opts, const char *file, int line)
65872200Sbmilekic{
65972200Sbmilekic	int i = 0;
66072200Sbmilekic
66174912Sjhb	if (LOCK_LOG_TEST(&m->mtx_object, opts))
66272344Sbmilekic		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
66372200Sbmilekic
66472200Sbmilekic	for (;;) {
66583366Sjulian		if (_obtain_lock(m, curthread))
66672200Sbmilekic			break;
66772200Sbmilekic
66875568Sjhb		/* Give interrupts a chance while we spin. */
66988088Sjhb		critical_exit();
67072200Sbmilekic		while (m->mtx_lock != MTX_UNOWNED) {
67197086Sjhb			if (i++ < 10000000) {
67297086Sjhb#ifdef __i386__
67397139Sjhb				ia32_pause();
67497086Sjhb#endif
67572200Sbmilekic				continue;
67697086Sjhb			}
67797084Sjhb			if (i < 60000000)
67872200Sbmilekic				DELAY(1);
67967352Sjhb#ifdef DDB
68072200Sbmilekic			else if (!db_active)
68167352Sjhb#else
68272200Sbmilekic			else
68367352Sjhb#endif
68497082Sjhb				panic("spin lock %s held by %p for > 5 seconds",
68597082Sjhb				    m->mtx_object.lo_name, (void *)m->mtx_lock);
68697086Sjhb#ifdef __i386__
68797139Sjhb			ia32_pause();
68897086Sjhb#endif
68967352Sjhb		}
69088088Sjhb		critical_enter();
69167352Sjhb	}
69272200Sbmilekic
69374912Sjhb	if (LOCK_LOG_TEST(&m->mtx_object, opts))
69472200Sbmilekic		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
69572200Sbmilekic
69672200Sbmilekic	return;
69767352Sjhb}
69867352Sjhb
69972200Sbmilekic/*
70072200Sbmilekic * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
70172200Sbmilekic *
70272200Sbmilekic * We are only called here if the lock is recursed or contested (i.e. we
70372200Sbmilekic * need to wake up a blocked thread).
70472200Sbmilekic */
70567352Sjhbvoid
70672200Sbmilekic_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
70767352Sjhb{
70883366Sjulian	struct thread *td, *td1;
70967352Sjhb	struct mtx *m1;
71067352Sjhb	int pri;
71167352Sjhb
71283366Sjulian	td = curthread;
71372200Sbmilekic
71472200Sbmilekic	if (mtx_recursed(m)) {
71572200Sbmilekic		if (--(m->mtx_recurse) == 0)
71672200Sbmilekic			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
71774912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
71872200Sbmilekic			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m);
71972200Sbmilekic		return;
72072200Sbmilekic	}
72172200Sbmilekic
72272200Sbmilekic	mtx_lock_spin(&sched_lock);
72374912Sjhb	if (LOCK_LOG_TEST(&m->mtx_object, opts))
72472200Sbmilekic		CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
72572200Sbmilekic
72683366Sjulian	td1 = TAILQ_FIRST(&m->mtx_blocked);
72797081Sjhb#if defined(SMP) && defined(ADAPTIVE_MUTEXES)
72897081Sjhb	if (td1 == NULL) {
72997081Sjhb		_release_lock_quick(m);
73097081Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
73197081Sjhb			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p no sleepers", m);
73297081Sjhb		mtx_unlock_spin(&sched_lock);
73397081Sjhb		return;
73497081Sjhb	}
73597081Sjhb#endif
73683366Sjulian	MPASS(td->td_proc->p_magic == P_MAGIC);
73783366Sjulian	MPASS(td1->td_proc->p_magic == P_MAGIC);
73872200Sbmilekic
739104387Sjhb	TAILQ_REMOVE(&m->mtx_blocked, td1, td_lockq);
74072200Sbmilekic
74172200Sbmilekic	if (TAILQ_EMPTY(&m->mtx_blocked)) {
74272200Sbmilekic		LIST_REMOVE(m, mtx_contested);
74372200Sbmilekic		_release_lock_quick(m);
74474912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
74572200Sbmilekic			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p not held", m);
74672200Sbmilekic	} else
74772200Sbmilekic		atomic_store_rel_ptr(&m->mtx_lock, (void *)MTX_CONTESTED);
74872200Sbmilekic
74972376Sjake	pri = PRI_MAX;
75083366Sjulian	LIST_FOREACH(m1, &td->td_contested, mtx_contested) {
75190538Sjulian		int cp = TAILQ_FIRST(&m1->mtx_blocked)->td_priority;
75272200Sbmilekic		if (cp < pri)
75372200Sbmilekic			pri = cp;
75472200Sbmilekic	}
75572200Sbmilekic
75690538Sjulian	if (pri > td->td_base_pri)
75790538Sjulian		pri = td->td_base_pri;
75890538Sjulian	td->td_priority = pri;
75972200Sbmilekic
76074912Sjhb	if (LOCK_LOG_TEST(&m->mtx_object, opts))
76172200Sbmilekic		CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p contested setrunqueue %p",
76283366Sjulian		    m, td1);
76372200Sbmilekic
76483366Sjulian	td1->td_blocked = NULL;
765104387Sjhb	TD_CLR_LOCK(td1);
766104160Sjulian	if (!TD_CAN_RUN(td1)) {
767104160Sjulian		mtx_unlock_spin(&sched_lock);
768104160Sjulian		return;
769104160Sjulian	}
770104161Sjulian	setrunqueue(td1);
77172200Sbmilekic
77290538Sjulian	if (td->td_critnest == 1 && td1->td_priority < pri) {
77367352Sjhb#ifdef notyet
77483366Sjulian		if (td->td_ithd != NULL) {
77583366Sjulian			struct ithd *it = td->td_ithd;
77667352Sjhb
77772200Sbmilekic			if (it->it_interrupted) {
77874912Sjhb				if (LOCK_LOG_TEST(&m->mtx_object, opts))
77972200Sbmilekic					CTR2(KTR_LOCK,
78072994Sjhb				    "_mtx_unlock_sleep: %p interrupted %p",
78172200Sbmilekic					    it, it->it_interrupted);
78272200Sbmilekic				intr_thd_fixup(it);
78367352Sjhb			}
78472200Sbmilekic		}
78567352Sjhb#endif
78674912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
78772200Sbmilekic			CTR2(KTR_LOCK,
78872200Sbmilekic			    "_mtx_unlock_sleep: %p switching out lock=%p", m,
78972200Sbmilekic			    (void *)m->mtx_lock);
79072200Sbmilekic
79183366Sjulian		td->td_proc->p_stats->p_ru.ru_nivcsw++;
79272200Sbmilekic		mi_switch();
79374912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
79472200Sbmilekic			CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p",
79572200Sbmilekic			    m, (void *)m->mtx_lock);
79667352Sjhb	}
79772200Sbmilekic
79872200Sbmilekic	mtx_unlock_spin(&sched_lock);
79972200Sbmilekic
80072200Sbmilekic	return;
80167352Sjhb}
80267352Sjhb
80372200Sbmilekic/*
80472200Sbmilekic * All the unlocking of MTX_SPIN locks is done inline.
805105782Sdes * See the _rel_spin_lock() macro for the details.
80672200Sbmilekic */
80772200Sbmilekic
80872200Sbmilekic/*
80972994Sjhb * The backing function for the INVARIANTS-enabled mtx_assert()
81072200Sbmilekic */
81172996Sjhb#ifdef INVARIANT_SUPPORT
81271352Sjasonevoid
81371360Sjasone_mtx_assert(struct mtx *m, int what, const char *file, int line)
81471352Sjasone{
81580748Sjhb
81680748Sjhb	if (panicstr != NULL)
81780748Sjhb		return;
81873033Sjake	switch (what) {
81971352Sjasone	case MA_OWNED:
82071352Sjasone	case MA_OWNED | MA_RECURSED:
82171352Sjasone	case MA_OWNED | MA_NOTRECURSED:
82273033Sjake		if (!mtx_owned(m))
82371352Sjasone			panic("mutex %s not owned at %s:%d",
82474912Sjhb			    m->mtx_object.lo_name, file, line);
82573033Sjake		if (mtx_recursed(m)) {
82673033Sjake			if ((what & MA_NOTRECURSED) != 0)
82771352Sjasone				panic("mutex %s recursed at %s:%d",
82874912Sjhb				    m->mtx_object.lo_name, file, line);
82973033Sjake		} else if ((what & MA_RECURSED) != 0) {
83071352Sjasone			panic("mutex %s unrecursed at %s:%d",
83174912Sjhb			    m->mtx_object.lo_name, file, line);
83271352Sjasone		}
83371352Sjasone		break;
83471352Sjasone	case MA_NOTOWNED:
83573033Sjake		if (mtx_owned(m))
83671352Sjasone			panic("mutex %s owned at %s:%d",
83774912Sjhb			    m->mtx_object.lo_name, file, line);
83871352Sjasone		break;
83971352Sjasone	default:
84071360Sjasone		panic("unknown mtx_assert at %s:%d", file, line);
84171352Sjasone	}
84271352Sjasone}
84371352Sjasone#endif
84471352Sjasone
84572200Sbmilekic/*
84672200Sbmilekic * The MUTEX_DEBUG-enabled mtx_validate()
84774912Sjhb *
84874912Sjhb * Most of these checks have been moved off into the LO_INITIALIZED flag
84974912Sjhb * maintained by the witness code.
85072200Sbmilekic */
85167352Sjhb#ifdef MUTEX_DEBUG
85267352Sjhb
85392723Salfredvoid	mtx_validate(struct mtx *);
85467352Sjhb
85574912Sjhbvoid
85674912Sjhbmtx_validate(struct mtx *m)
85767352Sjhb{
85867352Sjhb
85967352Sjhb/*
860105919Sphk * XXX: When kernacc() does not require Giant we can reenable this check
861105919Sphk */
862105919Sphk#ifdef notyet
863105919Sphk/*
86467352Sjhb * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
86567352Sjhb * we can re-enable the kernacc() checks.
86667352Sjhb */
86767352Sjhb#ifndef __alpha__
86882304Sbmilekic	/*
86982304Sbmilekic	 * Can't call kernacc() from early init386(), especially when
87082304Sbmilekic	 * initializing Giant mutex, because some stuff in kernacc()
87182304Sbmilekic	 * requires Giant itself.
872105782Sdes	 */
87382302Sbmilekic	if (!cold)
87482302Sbmilekic		if (!kernacc((caddr_t)m, sizeof(m),
87582302Sbmilekic		    VM_PROT_READ | VM_PROT_WRITE))
87682302Sbmilekic			panic("Can't read and write to mutex %p", m);
87767352Sjhb#endif
878105919Sphk#endif
87967352Sjhb}
88067352Sjhb#endif
88167352Sjhb
88272200Sbmilekic/*
88393672Sarr * General init routine used by the MTX_SYSINIT() macro.
88493672Sarr */
88593672Sarrvoid
88693672Sarrmtx_sysinit(void *arg)
88793672Sarr{
88893672Sarr	struct mtx_args *margs = arg;
88993672Sarr
89093813Sjhb	mtx_init(margs->ma_mtx, margs->ma_desc, NULL, margs->ma_opts);
89193672Sarr}
89293672Sarr
89393672Sarr/*
89472200Sbmilekic * Mutex initialization routine; initialize lock `m' of type contained in
89593813Sjhb * `opts' with options contained in `opts' and name `name.'  The optional
89693813Sjhb * lock type `type' is used as a general lock category name for use with
89793813Sjhb * witness.
898105782Sdes */
89967352Sjhbvoid
90093813Sjhbmtx_init(struct mtx *m, const char *name, const char *type, int opts)
90167352Sjhb{
90274912Sjhb	struct lock_object *lock;
90372200Sbmilekic
90474912Sjhb	MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE |
90593273Sjeff	    MTX_SLEEPABLE | MTX_NOWITNESS | MTX_DUPOK)) == 0);
90672200Sbmilekic
90767352Sjhb#ifdef MUTEX_DEBUG
90872200Sbmilekic	/* Diagnostic and error correction */
90974912Sjhb	mtx_validate(m);
91069429Sjhb#endif
91167352Sjhb
91285205Sjhb	lock = &m->mtx_object;
91385205Sjhb	KASSERT((lock->lo_flags & LO_INITIALIZED) == 0,
91493813Sjhb	    ("mutex %s %p already initialized", name, m));
91574912Sjhb	bzero(m, sizeof(*m));
91674912Sjhb	if (opts & MTX_SPIN)
91774912Sjhb		lock->lo_class = &lock_class_mtx_spin;
91874912Sjhb	else
91974912Sjhb		lock->lo_class = &lock_class_mtx_sleep;
92093813Sjhb	lock->lo_name = name;
92193813Sjhb	lock->lo_type = type != NULL ? type : name;
92274912Sjhb	if (opts & MTX_QUIET)
92374912Sjhb		lock->lo_flags = LO_QUIET;
92474912Sjhb	if (opts & MTX_RECURSE)
92574912Sjhb		lock->lo_flags |= LO_RECURSABLE;
92674912Sjhb	if (opts & MTX_SLEEPABLE)
92774912Sjhb		lock->lo_flags |= LO_SLEEPABLE;
92874912Sjhb	if ((opts & MTX_NOWITNESS) == 0)
92974912Sjhb		lock->lo_flags |= LO_WITNESS;
93093273Sjeff	if (opts & MTX_DUPOK)
93193273Sjeff		lock->lo_flags |= LO_DUPOK;
93272200Sbmilekic
93367352Sjhb	m->mtx_lock = MTX_UNOWNED;
93474912Sjhb	TAILQ_INIT(&m->mtx_blocked);
93572200Sbmilekic
93674912Sjhb	LOCK_LOG_INIT(lock, opts);
93772200Sbmilekic
93874912Sjhb	WITNESS_INIT(lock);
93967352Sjhb}
94067352Sjhb
94172200Sbmilekic/*
94274912Sjhb * Remove lock `m' from all_mtx queue.  We don't allow MTX_QUIET to be
94374912Sjhb * passed in as a flag here because if the corresponding mtx_init() was
94474912Sjhb * called with MTX_QUIET set, then it will already be set in the mutex's
94574912Sjhb * flags.
94672200Sbmilekic */
94767352Sjhbvoid
94867352Sjhbmtx_destroy(struct mtx *m)
94967352Sjhb{
95067352Sjhb
95174912Sjhb	LOCK_LOG_DESTROY(&m->mtx_object, 0);
95272200Sbmilekic
95374912Sjhb	if (!mtx_owned(m))
95474912Sjhb		MPASS(mtx_unowned(m));
95574912Sjhb	else {
95671228Sbmilekic		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
95772200Sbmilekic
95874912Sjhb		/* Tell witness this isn't locked to make it happy. */
95988900Sjhb		WITNESS_UNLOCK(&m->mtx_object, LOP_EXCLUSIVE, __FILE__,
96088900Sjhb		    __LINE__);
96171320Sjasone	}
96271320Sjasone
96374912Sjhb	WITNESS_DESTROY(&m->mtx_object);
96471320Sjasone}
96585564Sdillon
96685564Sdillon/*
96793702Sjhb * Intialize the mutex code and system mutexes.  This is called from the MD
96893702Sjhb * startup code prior to mi_startup().  The per-CPU data space needs to be
96993702Sjhb * setup before this is called.
97093702Sjhb */
97193702Sjhbvoid
97293702Sjhbmutex_init(void)
97393702Sjhb{
97493702Sjhb
97593702Sjhb	/* Setup thread0 so that mutexes work. */
97693702Sjhb	LIST_INIT(&thread0.td_contested);
97793702Sjhb
97893702Sjhb	/*
97993702Sjhb	 * Initialize mutexes.
98093702Sjhb	 */
98193813Sjhb	mtx_init(&Giant, "Giant", NULL, MTX_DEF | MTX_RECURSE);
98293813Sjhb	mtx_init(&sched_lock, "sched lock", NULL, MTX_SPIN | MTX_RECURSE);
98393813Sjhb	mtx_init(&proc0.p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK);
98493702Sjhb	mtx_lock(&Giant);
98593702Sjhb}
98693702Sjhb
98793702Sjhb/*
98885564Sdillon * Encapsulated Giant mutex routines.  These routines provide encapsulation
98985564Sdillon * control for the Giant mutex, allowing sysctls to be used to turn on and
99085564Sdillon * off Giant around certain subsystems.  The default value for the sysctls
99185564Sdillon * are set to what developers believe is stable and working in regards to
99285564Sdillon * the Giant pushdown.  Developers should not turn off Giant via these
99385564Sdillon * sysctls unless they know what they are doing.
99485564Sdillon *
99585564Sdillon * Callers of mtx_lock_giant() are expected to pass the return value to an
996105782Sdes * accompanying mtx_unlock_giant() later on.  If multiple subsystems are
99785564Sdillon * effected by a Giant wrap, all related sysctl variables must be zero for
99885564Sdillon * the subsystem call to operate without Giant (as determined by the caller).
99985564Sdillon */
100085564Sdillon
100185564SdillonSYSCTL_NODE(_kern, OID_AUTO, giant, CTLFLAG_RD, NULL, "Giant mutex manipulation");
100285564Sdillon
100385564Sdillonstatic int kern_giant_all = 0;
100485564SdillonSYSCTL_INT(_kern_giant, OID_AUTO, all, CTLFLAG_RW, &kern_giant_all, 0, "");
100585564Sdillon
100685564Sdillonint kern_giant_proc = 1;	/* Giant around PROC locks */
100785564Sdillonint kern_giant_file = 1;	/* Giant around struct file & filedesc */
100890864Sdillonint kern_giant_ucred = 1;	/* Giant around ucred */
100985564SdillonSYSCTL_INT(_kern_giant, OID_AUTO, proc, CTLFLAG_RW, &kern_giant_proc, 0, "");
101085564SdillonSYSCTL_INT(_kern_giant, OID_AUTO, file, CTLFLAG_RW, &kern_giant_file, 0, "");
101190864SdillonSYSCTL_INT(_kern_giant, OID_AUTO, ucred, CTLFLAG_RW, &kern_giant_ucred, 0, "");
101285564Sdillon
101385564Sdillonint
101485564Sdillonmtx_lock_giant(int sysctlvar)
101585564Sdillon{
101685564Sdillon	if (sysctlvar || kern_giant_all) {
101785564Sdillon		mtx_lock(&Giant);
101885564Sdillon		return(1);
101985564Sdillon	}
102085564Sdillon	return(0);
102185564Sdillon}
102285564Sdillon
102385564Sdillonvoid
102485564Sdillonmtx_unlock_giant(int s)
102585564Sdillon{
102685564Sdillon	if (s)
102785564Sdillon		mtx_unlock(&Giant);
102885564Sdillon}
1029