subr_turnstile.c revision 93702
165557Sjasone/*-
265557Sjasone * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
365557Sjasone *
465557Sjasone * Redistribution and use in source and binary forms, with or without
565557Sjasone * modification, are permitted provided that the following conditions
665557Sjasone * are met:
765557Sjasone * 1. Redistributions of source code must retain the above copyright
865557Sjasone *    notice, this list of conditions and the following disclaimer.
965557Sjasone * 2. Redistributions in binary form must reproduce the above copyright
1065557Sjasone *    notice, this list of conditions and the following disclaimer in the
1165557Sjasone *    documentation and/or other materials provided with the distribution.
1265557Sjasone * 3. Berkeley Software Design Inc's name may not be used to endorse or
1365557Sjasone *    promote products derived from this software without specific prior
1465557Sjasone *    written permission.
1565557Sjasone *
1665557Sjasone * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
1765557Sjasone * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1865557Sjasone * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1965557Sjasone * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
2065557Sjasone * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2165557Sjasone * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2265557Sjasone * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2365557Sjasone * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2465557Sjasone * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2565557Sjasone * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2665557Sjasone * SUCH DAMAGE.
2765557Sjasone *
2865557Sjasone *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
2967352Sjhb *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
3065557Sjasone * $FreeBSD: head/sys/kern/subr_turnstile.c 93702 2002-04-02 22:19:16Z jhb $
3165557Sjasone */
3265557Sjasone
3365557Sjasone/*
3486411Sjhb * Machine independent bits of mutex implementation.
3572200Sbmilekic */
3672200Sbmilekic
3768790Sjhb#include "opt_ddb.h"
3867676Sjhb
3965557Sjasone#include <sys/param.h>
4093609Sdes#include <sys/systm.h>
4167352Sjhb#include <sys/bus.h>
4267352Sjhb#include <sys/kernel.h>
4393609Sdes#include <sys/ktr.h>
4476166Smarkm#include <sys/lock.h>
4567352Sjhb#include <sys/malloc.h>
4674912Sjhb#include <sys/mutex.h>
4765557Sjasone#include <sys/proc.h>
4878766Sjhb#include <sys/resourcevar.h>
4993609Sdes#include <sys/sbuf.h>
5067676Sjhb#include <sys/sysctl.h>
5167352Sjhb#include <sys/vmmeter.h>
5265557Sjasone
5367352Sjhb#include <machine/atomic.h>
5467352Sjhb#include <machine/bus.h>
5567352Sjhb#include <machine/clock.h>
5665557Sjasone#include <machine/cpu.h>
5767352Sjhb
5868790Sjhb#include <ddb/ddb.h>
5968790Sjhb
6067352Sjhb#include <vm/vm.h>
6167352Sjhb#include <vm/vm_extern.h>
6267352Sjhb
6365557Sjasone/*
6472200Sbmilekic * Internal utility macros.
6571352Sjasone */
6672200Sbmilekic#define mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
6771352Sjasone
6872200Sbmilekic#define mtx_owner(m)	(mtx_unowned((m)) ? NULL \
6983366Sjulian	: (struct thread *)((m)->mtx_lock & MTX_FLAGMASK))
7071352Sjasone
7171352Sjasone/*
7274912Sjhb * Lock classes for sleep and spin mutexes.
7371352Sjasone */
7474912Sjhbstruct lock_class lock_class_mtx_sleep = {
7574912Sjhb	"sleep mutex",
7674912Sjhb	LC_SLEEPLOCK | LC_RECURSABLE
7774912Sjhb};
7874912Sjhbstruct lock_class lock_class_mtx_spin = {
7974912Sjhb	"spin mutex",
8074912Sjhb	LC_SPINLOCK | LC_RECURSABLE
8174912Sjhb};
8271352Sjasone
8371352Sjasone/*
8493702Sjhb * System-wide mutexes
8593702Sjhb */
8693702Sjhbstruct mtx sched_lock;
8793702Sjhbstruct mtx Giant;
8893702Sjhb
8993702Sjhb/*
9072200Sbmilekic * Prototypes for non-exported routines.
9172200Sbmilekic */
9283366Sjulianstatic void	propagate_priority(struct thread *);
9367352Sjhb
9467352Sjhbstatic void
9583366Sjulianpropagate_priority(struct thread *td)
9667352Sjhb{
9790538Sjulian	int pri = td->td_priority;
9883366Sjulian	struct mtx *m = td->td_blocked;
9967352Sjhb
10069376Sjhb	mtx_assert(&sched_lock, MA_OWNED);
10167352Sjhb	for (;;) {
10283366Sjulian		struct thread *td1;
10367352Sjhb
10483366Sjulian		td = mtx_owner(m);
10567352Sjhb
10683366Sjulian		if (td == NULL) {
10767352Sjhb			/*
10867352Sjhb			 * This really isn't quite right. Really
10983366Sjulian			 * ought to bump priority of thread that
11067352Sjhb			 * next acquires the mutex.
11167352Sjhb			 */
11267352Sjhb			MPASS(m->mtx_lock == MTX_CONTESTED);
11367352Sjhb			return;
11467352Sjhb		}
11572200Sbmilekic
11683366Sjulian		MPASS(td->td_proc->p_magic == P_MAGIC);
11783366Sjulian		KASSERT(td->td_proc->p_stat != SSLEEP, ("sleeping thread owns a mutex"));
11890538Sjulian		if (td->td_priority <= pri) /* lower is higher priority */
11967352Sjhb			return;
12069376Sjhb
12167352Sjhb		/*
12283366Sjulian		 * Bump this thread's priority.
12369376Sjhb		 */
12490538Sjulian		td->td_priority = pri;
12569376Sjhb
12669376Sjhb		/*
12767352Sjhb		 * If lock holder is actually running, just bump priority.
12867352Sjhb		 */
12983366Sjulian		 /* XXXKSE this test is not sufficient */
13083366Sjulian		if (td->td_kse && (td->td_kse->ke_oncpu != NOCPU)) {
13183366Sjulian			MPASS(td->td_proc->p_stat == SRUN
13283366Sjulian			|| td->td_proc->p_stat == SZOMB
13383366Sjulian			|| td->td_proc->p_stat == SSTOP);
13467352Sjhb			return;
13567352Sjhb		}
13672376Sjake
13773912Sjhb#ifndef SMP
13867352Sjhb		/*
13983366Sjulian		 * For UP, we check to see if td is curthread (this shouldn't
14073912Sjhb		 * ever happen however as it would mean we are in a deadlock.)
14173912Sjhb		 */
14283366Sjulian		KASSERT(td != curthread, ("Deadlock detected"));
14373912Sjhb#endif
14473912Sjhb
14573912Sjhb		/*
14683366Sjulian		 * If on run queue move to new run queue, and quit.
14783366Sjulian		 * XXXKSE this gets a lot more complicated under threads
14883366Sjulian		 * but try anyhow.
14967352Sjhb		 */
15083366Sjulian		if (td->td_proc->p_stat == SRUN) {
15183366Sjulian			MPASS(td->td_blocked == NULL);
15283366Sjulian			remrunqueue(td);
15383366Sjulian			setrunqueue(td);
15467352Sjhb			return;
15567352Sjhb		}
15667352Sjhb
15767352Sjhb		/*
15869376Sjhb		 * If we aren't blocked on a mutex, we should be.
15967352Sjhb		 */
16083366Sjulian		KASSERT(td->td_proc->p_stat == SMTX, (
16169376Sjhb		    "process %d(%s):%d holds %s but isn't blocked on a mutex\n",
16283366Sjulian		    td->td_proc->p_pid, td->td_proc->p_comm, td->td_proc->p_stat,
16374912Sjhb		    m->mtx_object.lo_name));
16467352Sjhb
16567352Sjhb		/*
16683366Sjulian		 * Pick up the mutex that td is blocked on.
16767352Sjhb		 */
16883366Sjulian		m = td->td_blocked;
16967352Sjhb		MPASS(m != NULL);
17067352Sjhb
17167352Sjhb		/*
17283366Sjulian		 * Check if the thread needs to be moved up on
17367352Sjhb		 * the blocked chain
17467352Sjhb		 */
17583366Sjulian		if (td == TAILQ_FIRST(&m->mtx_blocked)) {
17669376Sjhb			continue;
17769376Sjhb		}
17872200Sbmilekic
17983366Sjulian		td1 = TAILQ_PREV(td, threadqueue, td_blkq);
18090538Sjulian		if (td1->td_priority <= pri) {
18167352Sjhb			continue;
18267352Sjhb		}
18367352Sjhb
18467352Sjhb		/*
18583366Sjulian		 * Remove thread from blocked chain and determine where
18683366Sjulian		 * it should be moved up to.  Since we know that td1 has
18783366Sjulian		 * a lower priority than td, we know that at least one
18883366Sjulian		 * thread in the chain has a lower priority and that
18983366Sjulian		 * td1 will thus not be NULL after the loop.
19067352Sjhb		 */
19183366Sjulian		TAILQ_REMOVE(&m->mtx_blocked, td, td_blkq);
19283366Sjulian		TAILQ_FOREACH(td1, &m->mtx_blocked, td_blkq) {
19383366Sjulian			MPASS(td1->td_proc->p_magic == P_MAGIC);
19490538Sjulian			if (td1->td_priority > pri)
19567352Sjhb				break;
19667352Sjhb		}
19772200Sbmilekic
19883366Sjulian		MPASS(td1 != NULL);
19983366Sjulian		TAILQ_INSERT_BEFORE(td1, td, td_blkq);
20067352Sjhb		CTR4(KTR_LOCK,
20171560Sjhb		    "propagate_priority: p %p moved before %p on [%p] %s",
20283366Sjulian		    td, td1, m, m->mtx_object.lo_name);
20367352Sjhb	}
20467352Sjhb}
20567352Sjhb
20693609Sdes#ifdef MUTEX_PROFILING
20793609SdesSYSCTL_NODE(_debug, OID_AUTO, mutex, CTLFLAG_RD, NULL, "mutex debugging");
20893609SdesSYSCTL_NODE(_debug_mutex, OID_AUTO, prof, CTLFLAG_RD, NULL, "mutex profiling");
20993609Sdesstatic int mutex_prof_enable = 0;
21093609SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, enable, CTLFLAG_RW,
21193609Sdes    &mutex_prof_enable, 0, "Enable tracing of mutex holdtime");
21293609Sdes
21393609Sdesstruct mutex_prof {
21493609Sdes	const char *name;
21593609Sdes	const char *file;
21693609Sdes	int line;
21793609Sdes#define MPROF_MAX 0
21893609Sdes#define MPROF_TOT 1
21993609Sdes#define MPROF_CNT 2
22093609Sdes#define MPROF_AVG 3
22193667Sdes	u_int64_t counter[4];
22293609Sdes};
22393609Sdes
22471352Sjasone/*
22593609Sdes * mprof_buf is a static pool of profiling records to avoid possible
22693609Sdes * reentrance of the memory allocation functions.
22793609Sdes *
22893609Sdes * Note: NUM_MPROF_BUFFERS must be smaller than MPROF_HASH_SIZE.
22993609Sdes */
23093609Sdes#define NUM_MPROF_BUFFERS 4096
23193609Sdesstatic struct mutex_prof mprof_buf[NUM_MPROF_BUFFERS];
23293609Sdesstatic int first_free_mprof_buf;
23393609Sdes#define MPROF_HASH_SIZE 32771
23493609Sdesstatic struct mutex_prof *mprof_hash[MPROF_HASH_SIZE];
23593609Sdes
23693609Sdesstatic int mutex_prof_acquisitions;
23793609SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, acquisitions, CTLFLAG_RD,
23893609Sdes    &mutex_prof_acquisitions, 0, "Number of mutex acquistions recorded");
23993609Sdesstatic int mutex_prof_records;
24093609SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, records, CTLFLAG_RD,
24193609Sdes    &mutex_prof_records, 0, "Number of profiling records");
24293609Sdesstatic int mutex_prof_maxrecords = NUM_MPROF_BUFFERS;
24393609SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, maxrecords, CTLFLAG_RD,
24493609Sdes    &mutex_prof_maxrecords, 0, "Maximum number of profiling records");
24593609Sdesstatic int mutex_prof_rejected;
24693609SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, rejected, CTLFLAG_RD,
24793609Sdes    &mutex_prof_rejected, 0, "Number of rejected profiling records");
24893609Sdesstatic int mutex_prof_hashsize = MPROF_HASH_SIZE;
24993609SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, hashsize, CTLFLAG_RD,
25093609Sdes    &mutex_prof_hashsize, 0, "Hash size");
25193609Sdesstatic int mutex_prof_collisions = 0;
25293609SdesSYSCTL_INT(_debug_mutex_prof, OID_AUTO, collisions, CTLFLAG_RD,
25393609Sdes    &mutex_prof_collisions, 0, "Number of hash collisions");
25493609Sdes
25593609Sdes/*
25693609Sdes * mprof_mtx protects the profiling buffers and the hash.
25793609Sdes */
25893609Sdesstatic struct mtx mprof_mtx;
25993609Sdes
26093609Sdesstatic void
26193609Sdesmprof_init(void *arg __unused)
26293609Sdes{
26393609Sdes	mtx_init(&mprof_mtx, "mutex profiling lock", MTX_SPIN | MTX_QUIET);
26493609Sdes}
26593609SdesSYSINIT(mprofinit, SI_SUB_LOCK, SI_ORDER_ANY, mprof_init, NULL);
26693609Sdes
26793667Sdesstatic u_int64_t
26893667Sdesnanoseconds(void)
26993667Sdes{
27093667Sdes	struct timespec tv;
27193667Sdes
27293667Sdes	nanotime(&tv);
27393667Sdes	return (tv.tv_sec * (u_int64_t)1000000000 + tv.tv_nsec);
27493667Sdes}
27593667Sdes
27693609Sdesstatic int
27793609Sdesdump_mutex_prof_stats(SYSCTL_HANDLER_ARGS)
27893609Sdes{
27993609Sdes	struct sbuf *sb;
28093609Sdes	int error, i;
28193609Sdes
28293609Sdes	if (first_free_mprof_buf == 0)
28393609Sdes		return SYSCTL_OUT(req, "No locking recorded",
28493609Sdes		    sizeof("No locking recorded"));
28593609Sdes
28693609Sdes	sb = sbuf_new(NULL, NULL, 1024, SBUF_AUTOEXTEND);
28793609Sdes	sbuf_printf(sb, "%12s %12s %12s %12s %s\n",
28893609Sdes	    "max", "total", "count", "average", "name");
28993609Sdes	mtx_lock_spin(&mprof_mtx);
29093609Sdes	for (i = 0; i < first_free_mprof_buf; ++i)
29193609Sdes		sbuf_printf(sb, "%12llu %12llu %12llu %12llu %s:%d (%s)\n",
29293667Sdes		    mprof_buf[i].counter[MPROF_MAX] / 1000,
29393667Sdes		    mprof_buf[i].counter[MPROF_TOT] / 1000,
29493667Sdes		    mprof_buf[i].counter[MPROF_CNT],
29593667Sdes		    mprof_buf[i].counter[MPROF_AVG] / 1000,
29693609Sdes		    mprof_buf[i].file, mprof_buf[i].line, mprof_buf[i].name);
29793609Sdes	mtx_unlock_spin(&mprof_mtx);
29893609Sdes	sbuf_finish(sb);
29993609Sdes	error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
30093609Sdes	sbuf_delete(sb);
30193609Sdes	return (error);
30293609Sdes}
30393609SdesSYSCTL_PROC(_debug_mutex_prof, OID_AUTO, stats, CTLTYPE_STRING|CTLFLAG_RD,
30493609Sdes    NULL, 0, dump_mutex_prof_stats, "A", "Mutex profiling statistics");
30593609Sdes#endif
30693609Sdes
30793609Sdes/*
30874900Sjhb * Function versions of the inlined __mtx_* macros.  These are used by
30974900Sjhb * modules and can also be called from assembly language if needed.
31074900Sjhb */
31174900Sjhbvoid
31274900Sjhb_mtx_lock_flags(struct mtx *m, int opts, const char *file, int line)
31374900Sjhb{
31474900Sjhb
31583841Sjhb	MPASS(curthread != NULL);
31683841Sjhb	_get_sleep_lock(m, curthread, opts, file, line);
31783841Sjhb	LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file,
31883841Sjhb	    line);
31983841Sjhb	WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
32093609Sdes#ifdef MUTEX_PROFILING
32193609Sdes	/* don't reset the timer when/if recursing */
32293667Sdes	if (m->acqtime == 0) {
32393609Sdes		m->file = file;
32493609Sdes		m->line = line;
32593667Sdes		m->acqtime = mutex_prof_enable ? nanoseconds() : 0;
32693609Sdes		++mutex_prof_acquisitions;
32793609Sdes	}
32893609Sdes#endif
32974900Sjhb}
33074900Sjhb
33174900Sjhbvoid
33274900Sjhb_mtx_unlock_flags(struct mtx *m, int opts, const char *file, int line)
33374900Sjhb{
33474900Sjhb
33583841Sjhb	MPASS(curthread != NULL);
33683947Sjhb	mtx_assert(m, MA_OWNED);
33793609Sdes#ifdef MUTEX_PROFILING
33893667Sdes	if (m->acqtime != 0) {
33993609Sdes		static const char *unknown = "(unknown)";
34093609Sdes		struct mutex_prof *mpp;
34193667Sdes		u_int64_t acqtime, now;
34293609Sdes		const char *p, *q;
34393609Sdes		volatile u_int hash, n;
34493609Sdes
34593667Sdes		now = nanoseconds();
34693667Sdes		acqtime = m->acqtime;
34793667Sdes		m->acqtime = 0;
34893667Sdes		if (now <= acqtime)
34993609Sdes			goto out;
35093609Sdes		for (p = file; strncmp(p, "../", 3) == 0; p += 3)
35193609Sdes			/* nothing */ ;
35293609Sdes		if (p == NULL || *p == '\0')
35393609Sdes			p = unknown;
35493609Sdes		for (hash = line, q = p; *q != '\0'; ++q)
35593609Sdes			hash = (hash * 2 + *q) % MPROF_HASH_SIZE;
35693609Sdes		mtx_lock_spin(&mprof_mtx);
35793609Sdes		n = hash;
35893609Sdes		while ((mpp = mprof_hash[n]) != NULL) {
35993609Sdes			if (mpp->line == line && strcmp(mpp->file, p) == 0)
36093609Sdes				break;
36193609Sdes			n = (n + 1) % MPROF_HASH_SIZE;
36293609Sdes		}
36393609Sdes		if (mpp == NULL) {
36493609Sdes			/* Just exit if we cannot get a trace buffer */
36593609Sdes			if (first_free_mprof_buf >= NUM_MPROF_BUFFERS) {
36693609Sdes				++mutex_prof_rejected;
36793609Sdes				goto unlock;
36893609Sdes			}
36993609Sdes			mpp = &mprof_buf[first_free_mprof_buf++];
37093609Sdes			mpp->name = mtx_name(m);
37193609Sdes			mpp->file = p;
37293609Sdes			mpp->line = line;
37393609Sdes			mutex_prof_collisions += n - hash;
37493609Sdes			++mutex_prof_records;
37593609Sdes			mprof_hash[hash] = mpp;
37693609Sdes		}
37793609Sdes		/*
37893609Sdes		 * Record if the mutex has been held longer now than ever
37993609Sdes		 * before
38093609Sdes		 */
38193667Sdes		if ((now - acqtime) > mpp->counter[MPROF_MAX])
38293667Sdes			mpp->counter[MPROF_MAX] = now - acqtime;
38393667Sdes		mpp->counter[MPROF_TOT] += now - acqtime;
38493667Sdes		mpp->counter[MPROF_CNT] += 1;
38593667Sdes		mpp->counter[MPROF_AVG] =
38693667Sdes		    mpp->counter[MPROF_TOT] / mpp->counter[MPROF_CNT];
38793609Sdesunlock:
38893609Sdes		mtx_unlock_spin(&mprof_mtx);
38993609Sdes	}
39093609Sdesout:
39193609Sdes#endif
39283841Sjhb 	WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
39383841Sjhb	LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file,
39483841Sjhb	    line);
39583841Sjhb	_rel_sleep_lock(m, curthread, opts, file, line);
39674900Sjhb}
39774900Sjhb
39874900Sjhbvoid
39974900Sjhb_mtx_lock_spin_flags(struct mtx *m, int opts, const char *file, int line)
40074900Sjhb{
40174900Sjhb
40283841Sjhb	MPASS(curthread != NULL);
40383841Sjhb	_get_spin_lock(m, curthread, opts, file, line);
40483841Sjhb	LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file,
40583841Sjhb	    line);
40683841Sjhb	WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
40774900Sjhb}
40874900Sjhb
40974900Sjhbvoid
41074900Sjhb_mtx_unlock_spin_flags(struct mtx *m, int opts, const char *file, int line)
41174900Sjhb{
41274900Sjhb
41383841Sjhb	MPASS(curthread != NULL);
41483947Sjhb	mtx_assert(m, MA_OWNED);
41583841Sjhb 	WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
41683841Sjhb	LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file,
41783841Sjhb	    line);
41883841Sjhb	_rel_spin_lock(m);
41974900Sjhb}
42074900Sjhb
42174900Sjhb/*
42272200Sbmilekic * The important part of mtx_trylock{,_flags}()
42372200Sbmilekic * Tries to acquire lock `m.' We do NOT handle recursion here; we assume that
42472200Sbmilekic * if we're called, it's because we know we don't already own this lock.
42571352Sjasone */
42672200Sbmilekicint
42772200Sbmilekic_mtx_trylock(struct mtx *m, int opts, const char *file, int line)
42871352Sjasone{
42972200Sbmilekic	int rval;
43071352Sjasone
43183366Sjulian	MPASS(curthread != NULL);
43271352Sjasone
43383366Sjulian	rval = _obtain_lock(m, curthread);
43472200Sbmilekic
43574912Sjhb	LOCK_LOG_TRY("LOCK", &m->mtx_object, opts, rval, file, line);
43674912Sjhb	if (rval) {
43771352Sjasone		/*
43872200Sbmilekic		 * We do not handle recursion in _mtx_trylock; see the
43972200Sbmilekic		 * note at the top of the routine.
44071352Sjasone		 */
44172344Sbmilekic		KASSERT(!mtx_recursed(m),
44272344Sbmilekic		    ("mtx_trylock() called on a recursed mutex"));
44376272Sjhb		WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK,
44476272Sjhb		    file, line);
44571352Sjasone	}
44671352Sjasone
44774912Sjhb	return (rval);
44871352Sjasone}
44971352Sjasone
45071352Sjasone/*
45172200Sbmilekic * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
45271352Sjasone *
45372200Sbmilekic * We call this if the lock is either contested (i.e. we need to go to
45472200Sbmilekic * sleep waiting for it), or if we need to recurse on it.
45571352Sjasone */
45672200Sbmilekicvoid
45772200Sbmilekic_mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line)
45871352Sjasone{
45983366Sjulian	struct thread *td = curthread;
46071352Sjasone
46183366Sjulian	if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)td) {
46272200Sbmilekic		m->mtx_recurse++;
46372200Sbmilekic		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
46474912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
46572344Sbmilekic			CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
46672200Sbmilekic		return;
46771352Sjasone	}
46871352Sjasone
46974912Sjhb	if (LOCK_LOG_TEST(&m->mtx_object, opts))
47072994Sjhb		CTR4(KTR_LOCK,
47172994Sjhb		    "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d",
47274912Sjhb		    m->mtx_object.lo_name, (void *)m->mtx_lock, file, line);
47371352Sjasone
47483366Sjulian	while (!_obtain_lock(m, td)) {
47572200Sbmilekic		uintptr_t v;
47683366Sjulian		struct thread *td1;
47771352Sjasone
47872200Sbmilekic		mtx_lock_spin(&sched_lock);
47972200Sbmilekic		/*
48072200Sbmilekic		 * Check if the lock has been released while spinning for
48172200Sbmilekic		 * the sched_lock.
48272200Sbmilekic		 */
48372200Sbmilekic		if ((v = m->mtx_lock) == MTX_UNOWNED) {
48472200Sbmilekic			mtx_unlock_spin(&sched_lock);
48572200Sbmilekic			continue;
48671352Sjasone		}
48771352Sjasone
48872200Sbmilekic		/*
48972200Sbmilekic		 * The mutex was marked contested on release. This means that
49083366Sjulian		 * there are threads blocked on it.
49172200Sbmilekic		 */
49272200Sbmilekic		if (v == MTX_CONTESTED) {
49383366Sjulian			td1 = TAILQ_FIRST(&m->mtx_blocked);
49483366Sjulian			MPASS(td1 != NULL);
49583366Sjulian			m->mtx_lock = (uintptr_t)td | MTX_CONTESTED;
49667352Sjhb
49790538Sjulian			if (td1->td_priority < td->td_priority)
49890538Sjulian				td->td_priority = td1->td_priority;
49972200Sbmilekic			mtx_unlock_spin(&sched_lock);
50067352Sjhb			return;
50167352Sjhb		}
50269376Sjhb
50369376Sjhb		/*
50472200Sbmilekic		 * If the mutex isn't already contested and a failure occurs
50572200Sbmilekic		 * setting the contested bit, the mutex was either released
50672200Sbmilekic		 * or the state of the MTX_RECURSED bit changed.
50769376Sjhb		 */
50872200Sbmilekic		if ((v & MTX_CONTESTED) == 0 &&
50972200Sbmilekic		    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
51072200Sbmilekic			(void *)(v | MTX_CONTESTED))) {
51172200Sbmilekic			mtx_unlock_spin(&sched_lock);
51272200Sbmilekic			continue;
51372200Sbmilekic		}
51467352Sjhb
51572200Sbmilekic		/*
51693692Sjhb		 * We definitely must sleep for this lock.
51772200Sbmilekic		 */
51872200Sbmilekic		mtx_assert(m, MA_NOTOWNED);
51967352Sjhb
52067352Sjhb#ifdef notyet
52172200Sbmilekic		/*
52272200Sbmilekic		 * If we're borrowing an interrupted thread's VM context, we
52372200Sbmilekic		 * must clean up before going to sleep.
52472200Sbmilekic		 */
52583366Sjulian		if (td->td_ithd != NULL) {
52683366Sjulian			struct ithd *it = td->td_ithd;
52767352Sjhb
52872200Sbmilekic			if (it->it_interrupted) {
52974912Sjhb				if (LOCK_LOG_TEST(&m->mtx_object, opts))
53072200Sbmilekic					CTR2(KTR_LOCK,
53172994Sjhb				    "_mtx_lock_sleep: %p interrupted %p",
53272200Sbmilekic					    it, it->it_interrupted);
53372200Sbmilekic				intr_thd_fixup(it);
53467352Sjhb			}
53572200Sbmilekic		}
53667352Sjhb#endif
53767352Sjhb
53872200Sbmilekic		/*
53972200Sbmilekic		 * Put us on the list of threads blocked on this mutex.
54072200Sbmilekic		 */
54172200Sbmilekic		if (TAILQ_EMPTY(&m->mtx_blocked)) {
54290418Sjhb			td1 = mtx_owner(m);
54383366Sjulian			LIST_INSERT_HEAD(&td1->td_contested, m, mtx_contested);
54483366Sjulian			TAILQ_INSERT_TAIL(&m->mtx_blocked, td, td_blkq);
54572200Sbmilekic		} else {
54683366Sjulian			TAILQ_FOREACH(td1, &m->mtx_blocked, td_blkq)
54790538Sjulian				if (td1->td_priority > td->td_priority)
54872200Sbmilekic					break;
54983366Sjulian			if (td1)
55083366Sjulian				TAILQ_INSERT_BEFORE(td1, td, td_blkq);
55172200Sbmilekic			else
55283366Sjulian				TAILQ_INSERT_TAIL(&m->mtx_blocked, td, td_blkq);
55372200Sbmilekic		}
55467352Sjhb
55572200Sbmilekic		/*
55672200Sbmilekic		 * Save who we're blocked on.
55772200Sbmilekic		 */
55883366Sjulian		td->td_blocked = m;
55983366Sjulian		td->td_mtxname = m->mtx_object.lo_name;
56083366Sjulian		td->td_proc->p_stat = SMTX;
56183366Sjulian		propagate_priority(td);
56267352Sjhb
56374912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
56472200Sbmilekic			CTR3(KTR_LOCK,
56583366Sjulian			    "_mtx_lock_sleep: p %p blocked on [%p] %s", td, m,
56674912Sjhb			    m->mtx_object.lo_name);
56772200Sbmilekic
56883366Sjulian		td->td_proc->p_stats->p_ru.ru_nvcsw++;
56972200Sbmilekic		mi_switch();
57072200Sbmilekic
57174912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
57272200Sbmilekic			CTR3(KTR_LOCK,
57372200Sbmilekic			  "_mtx_lock_sleep: p %p free from blocked on [%p] %s",
57483366Sjulian			  td, m, m->mtx_object.lo_name);
57572200Sbmilekic
57672200Sbmilekic		mtx_unlock_spin(&sched_lock);
57772200Sbmilekic	}
57872200Sbmilekic
57972200Sbmilekic	return;
58072200Sbmilekic}
58172200Sbmilekic
58272200Sbmilekic/*
58372200Sbmilekic * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock.
58472200Sbmilekic *
58572200Sbmilekic * This is only called if we need to actually spin for the lock. Recursion
58672200Sbmilekic * is handled inline.
58772200Sbmilekic */
58872200Sbmilekicvoid
58988088Sjhb_mtx_lock_spin(struct mtx *m, int opts, const char *file, int line)
59072200Sbmilekic{
59172200Sbmilekic	int i = 0;
59272200Sbmilekic
59374912Sjhb	if (LOCK_LOG_TEST(&m->mtx_object, opts))
59472344Sbmilekic		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
59572200Sbmilekic
59672200Sbmilekic	for (;;) {
59783366Sjulian		if (_obtain_lock(m, curthread))
59872200Sbmilekic			break;
59972200Sbmilekic
60075568Sjhb		/* Give interrupts a chance while we spin. */
60188088Sjhb		critical_exit();
60272200Sbmilekic		while (m->mtx_lock != MTX_UNOWNED) {
60389392Sjhb			if (i++ < 10000000)
60472200Sbmilekic				continue;
60589392Sjhb			if (i++ < 60000000)
60672200Sbmilekic				DELAY(1);
60767352Sjhb#ifdef DDB
60872200Sbmilekic			else if (!db_active)
60967352Sjhb#else
61072200Sbmilekic			else
61167352Sjhb#endif
61272200Sbmilekic			panic("spin lock %s held by %p for > 5 seconds",
61374912Sjhb			    m->mtx_object.lo_name, (void *)m->mtx_lock);
61467352Sjhb		}
61588088Sjhb		critical_enter();
61667352Sjhb	}
61772200Sbmilekic
61874912Sjhb	if (LOCK_LOG_TEST(&m->mtx_object, opts))
61972200Sbmilekic		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
62072200Sbmilekic
62172200Sbmilekic	return;
62267352Sjhb}
62367352Sjhb
62472200Sbmilekic/*
62572200Sbmilekic * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
62672200Sbmilekic *
62772200Sbmilekic * We are only called here if the lock is recursed or contested (i.e. we
62872200Sbmilekic * need to wake up a blocked thread).
62972200Sbmilekic */
63067352Sjhbvoid
63172200Sbmilekic_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
63267352Sjhb{
63383366Sjulian	struct thread *td, *td1;
63467352Sjhb	struct mtx *m1;
63567352Sjhb	int pri;
63667352Sjhb
63783366Sjulian	td = curthread;
63872200Sbmilekic
63972200Sbmilekic	if (mtx_recursed(m)) {
64072200Sbmilekic		if (--(m->mtx_recurse) == 0)
64172200Sbmilekic			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
64274912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
64372200Sbmilekic			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m);
64472200Sbmilekic		return;
64572200Sbmilekic	}
64672200Sbmilekic
64772200Sbmilekic	mtx_lock_spin(&sched_lock);
64874912Sjhb	if (LOCK_LOG_TEST(&m->mtx_object, opts))
64972200Sbmilekic		CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
65072200Sbmilekic
65183366Sjulian	td1 = TAILQ_FIRST(&m->mtx_blocked);
65283366Sjulian	MPASS(td->td_proc->p_magic == P_MAGIC);
65383366Sjulian	MPASS(td1->td_proc->p_magic == P_MAGIC);
65472200Sbmilekic
65583366Sjulian	TAILQ_REMOVE(&m->mtx_blocked, td1, td_blkq);
65672200Sbmilekic
65772200Sbmilekic	if (TAILQ_EMPTY(&m->mtx_blocked)) {
65872200Sbmilekic		LIST_REMOVE(m, mtx_contested);
65972200Sbmilekic		_release_lock_quick(m);
66074912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
66172200Sbmilekic			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p not held", m);
66272200Sbmilekic	} else
66372200Sbmilekic		atomic_store_rel_ptr(&m->mtx_lock, (void *)MTX_CONTESTED);
66472200Sbmilekic
66572376Sjake	pri = PRI_MAX;
66683366Sjulian	LIST_FOREACH(m1, &td->td_contested, mtx_contested) {
66790538Sjulian		int cp = TAILQ_FIRST(&m1->mtx_blocked)->td_priority;
66872200Sbmilekic		if (cp < pri)
66972200Sbmilekic			pri = cp;
67072200Sbmilekic	}
67172200Sbmilekic
67290538Sjulian	if (pri > td->td_base_pri)
67390538Sjulian		pri = td->td_base_pri;
67490538Sjulian	td->td_priority = pri;
67572200Sbmilekic
67674912Sjhb	if (LOCK_LOG_TEST(&m->mtx_object, opts))
67772200Sbmilekic		CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p contested setrunqueue %p",
67883366Sjulian		    m, td1);
67972200Sbmilekic
68083366Sjulian	td1->td_blocked = NULL;
68183366Sjulian	td1->td_proc->p_stat = SRUN;
68283366Sjulian	setrunqueue(td1);
68372200Sbmilekic
68490538Sjulian	if (td->td_critnest == 1 && td1->td_priority < pri) {
68567352Sjhb#ifdef notyet
68683366Sjulian		if (td->td_ithd != NULL) {
68783366Sjulian			struct ithd *it = td->td_ithd;
68867352Sjhb
68972200Sbmilekic			if (it->it_interrupted) {
69074912Sjhb				if (LOCK_LOG_TEST(&m->mtx_object, opts))
69172200Sbmilekic					CTR2(KTR_LOCK,
69272994Sjhb				    "_mtx_unlock_sleep: %p interrupted %p",
69372200Sbmilekic					    it, it->it_interrupted);
69472200Sbmilekic				intr_thd_fixup(it);
69567352Sjhb			}
69672200Sbmilekic		}
69767352Sjhb#endif
69883366Sjulian		setrunqueue(td);
69974912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
70072200Sbmilekic			CTR2(KTR_LOCK,
70172200Sbmilekic			    "_mtx_unlock_sleep: %p switching out lock=%p", m,
70272200Sbmilekic			    (void *)m->mtx_lock);
70372200Sbmilekic
70483366Sjulian		td->td_proc->p_stats->p_ru.ru_nivcsw++;
70572200Sbmilekic		mi_switch();
70674912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
70772200Sbmilekic			CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p",
70872200Sbmilekic			    m, (void *)m->mtx_lock);
70967352Sjhb	}
71072200Sbmilekic
71172200Sbmilekic	mtx_unlock_spin(&sched_lock);
71272200Sbmilekic
71372200Sbmilekic	return;
71467352Sjhb}
71567352Sjhb
71672200Sbmilekic/*
71772200Sbmilekic * All the unlocking of MTX_SPIN locks is done inline.
71872200Sbmilekic * See the _rel_spin_lock() macro for the details.
71972200Sbmilekic */
72072200Sbmilekic
72172200Sbmilekic/*
72272994Sjhb * The backing function for the INVARIANTS-enabled mtx_assert()
72372200Sbmilekic */
72472996Sjhb#ifdef INVARIANT_SUPPORT
72571352Sjasonevoid
72671360Sjasone_mtx_assert(struct mtx *m, int what, const char *file, int line)
72771352Sjasone{
72880748Sjhb
72980748Sjhb	if (panicstr != NULL)
73080748Sjhb		return;
73173033Sjake	switch (what) {
73271352Sjasone	case MA_OWNED:
73371352Sjasone	case MA_OWNED | MA_RECURSED:
73471352Sjasone	case MA_OWNED | MA_NOTRECURSED:
73573033Sjake		if (!mtx_owned(m))
73671352Sjasone			panic("mutex %s not owned at %s:%d",
73774912Sjhb			    m->mtx_object.lo_name, file, line);
73873033Sjake		if (mtx_recursed(m)) {
73973033Sjake			if ((what & MA_NOTRECURSED) != 0)
74071352Sjasone				panic("mutex %s recursed at %s:%d",
74174912Sjhb				    m->mtx_object.lo_name, file, line);
74273033Sjake		} else if ((what & MA_RECURSED) != 0) {
74371352Sjasone			panic("mutex %s unrecursed at %s:%d",
74474912Sjhb			    m->mtx_object.lo_name, file, line);
74571352Sjasone		}
74671352Sjasone		break;
74771352Sjasone	case MA_NOTOWNED:
74873033Sjake		if (mtx_owned(m))
74971352Sjasone			panic("mutex %s owned at %s:%d",
75074912Sjhb			    m->mtx_object.lo_name, file, line);
75171352Sjasone		break;
75271352Sjasone	default:
75371360Sjasone		panic("unknown mtx_assert at %s:%d", file, line);
75471352Sjasone	}
75571352Sjasone}
75671352Sjasone#endif
75771352Sjasone
75872200Sbmilekic/*
75972200Sbmilekic * The MUTEX_DEBUG-enabled mtx_validate()
76074912Sjhb *
76174912Sjhb * Most of these checks have been moved off into the LO_INITIALIZED flag
76274912Sjhb * maintained by the witness code.
76372200Sbmilekic */
76467352Sjhb#ifdef MUTEX_DEBUG
76567352Sjhb
76692723Salfredvoid	mtx_validate(struct mtx *);
76767352Sjhb
76874912Sjhbvoid
76974912Sjhbmtx_validate(struct mtx *m)
77067352Sjhb{
77167352Sjhb
77267352Sjhb/*
77367352Sjhb * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
77467352Sjhb * we can re-enable the kernacc() checks.
77567352Sjhb */
77667352Sjhb#ifndef __alpha__
77782304Sbmilekic	/*
77882304Sbmilekic	 * Can't call kernacc() from early init386(), especially when
77982304Sbmilekic	 * initializing Giant mutex, because some stuff in kernacc()
78082304Sbmilekic	 * requires Giant itself.
78182304Sbmilekic	 */
78282302Sbmilekic	if (!cold)
78382302Sbmilekic		if (!kernacc((caddr_t)m, sizeof(m),
78482302Sbmilekic		    VM_PROT_READ | VM_PROT_WRITE))
78582302Sbmilekic			panic("Can't read and write to mutex %p", m);
78667352Sjhb#endif
78767352Sjhb}
78867352Sjhb#endif
78967352Sjhb
79072200Sbmilekic/*
79193672Sarr * General init routine used by the MTX_SYSINIT() macro.
79293672Sarr */
79393672Sarrvoid
79493672Sarrmtx_sysinit(void *arg)
79593672Sarr{
79693672Sarr	struct mtx_args *margs = arg;
79793672Sarr
79893672Sarr	mtx_init(margs->ma_mtx, margs->ma_desc, margs->ma_opts);
79993672Sarr}
80093672Sarr
80193672Sarr/*
80272200Sbmilekic * Mutex initialization routine; initialize lock `m' of type contained in
80372200Sbmilekic * `opts' with options contained in `opts' and description `description.'
80472200Sbmilekic */
80567352Sjhbvoid
80672200Sbmilekicmtx_init(struct mtx *m, const char *description, int opts)
80767352Sjhb{
80874912Sjhb	struct lock_object *lock;
80972200Sbmilekic
81074912Sjhb	MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE |
81193273Sjeff	    MTX_SLEEPABLE | MTX_NOWITNESS | MTX_DUPOK)) == 0);
81272200Sbmilekic
81367352Sjhb#ifdef MUTEX_DEBUG
81472200Sbmilekic	/* Diagnostic and error correction */
81574912Sjhb	mtx_validate(m);
81669429Sjhb#endif
81767352Sjhb
81885205Sjhb	lock = &m->mtx_object;
81985205Sjhb	KASSERT((lock->lo_flags & LO_INITIALIZED) == 0,
82085205Sjhb	    ("mutex %s %p already initialized", description, m));
82174912Sjhb	bzero(m, sizeof(*m));
82274912Sjhb	if (opts & MTX_SPIN)
82374912Sjhb		lock->lo_class = &lock_class_mtx_spin;
82474912Sjhb	else
82574912Sjhb		lock->lo_class = &lock_class_mtx_sleep;
82674912Sjhb	lock->lo_name = description;
82774912Sjhb	if (opts & MTX_QUIET)
82874912Sjhb		lock->lo_flags = LO_QUIET;
82974912Sjhb	if (opts & MTX_RECURSE)
83074912Sjhb		lock->lo_flags |= LO_RECURSABLE;
83174912Sjhb	if (opts & MTX_SLEEPABLE)
83274912Sjhb		lock->lo_flags |= LO_SLEEPABLE;
83374912Sjhb	if ((opts & MTX_NOWITNESS) == 0)
83474912Sjhb		lock->lo_flags |= LO_WITNESS;
83593273Sjeff	if (opts & MTX_DUPOK)
83693273Sjeff		lock->lo_flags |= LO_DUPOK;
83772200Sbmilekic
83867352Sjhb	m->mtx_lock = MTX_UNOWNED;
83974912Sjhb	TAILQ_INIT(&m->mtx_blocked);
84072200Sbmilekic
84174912Sjhb	LOCK_LOG_INIT(lock, opts);
84272200Sbmilekic
84374912Sjhb	WITNESS_INIT(lock);
84467352Sjhb}
84567352Sjhb
84672200Sbmilekic/*
84774912Sjhb * Remove lock `m' from all_mtx queue.  We don't allow MTX_QUIET to be
84874912Sjhb * passed in as a flag here because if the corresponding mtx_init() was
84974912Sjhb * called with MTX_QUIET set, then it will already be set in the mutex's
85074912Sjhb * flags.
85172200Sbmilekic */
85267352Sjhbvoid
85367352Sjhbmtx_destroy(struct mtx *m)
85467352Sjhb{
85567352Sjhb
85674912Sjhb	LOCK_LOG_DESTROY(&m->mtx_object, 0);
85772200Sbmilekic
85874912Sjhb	if (!mtx_owned(m))
85974912Sjhb		MPASS(mtx_unowned(m));
86074912Sjhb	else {
86171228Sbmilekic		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
86272200Sbmilekic
86374912Sjhb		/* Tell witness this isn't locked to make it happy. */
86488900Sjhb		WITNESS_UNLOCK(&m->mtx_object, LOP_EXCLUSIVE, __FILE__,
86588900Sjhb		    __LINE__);
86671320Sjasone	}
86771320Sjasone
86874912Sjhb	WITNESS_DESTROY(&m->mtx_object);
86971320Sjasone}
87085564Sdillon
87185564Sdillon/*
87293702Sjhb * Intialize the mutex code and system mutexes.  This is called from the MD
87393702Sjhb * startup code prior to mi_startup().  The per-CPU data space needs to be
87493702Sjhb * setup before this is called.
87593702Sjhb */
87693702Sjhbvoid
87793702Sjhbmutex_init(void)
87893702Sjhb{
87993702Sjhb
88093702Sjhb	/* Setup thread0 so that mutexes work. */
88193702Sjhb	LIST_INIT(&thread0.td_contested);
88293702Sjhb
88393702Sjhb	/*
88493702Sjhb	 * Initialize mutexes.
88593702Sjhb	 */
88693702Sjhb	mtx_init(&Giant, "Giant", MTX_DEF | MTX_RECURSE);
88793702Sjhb	mtx_init(&sched_lock, "sched lock", MTX_SPIN | MTX_RECURSE);
88893702Sjhb	mtx_init(&proc0.p_mtx, "process lock", MTX_DEF | MTX_DUPOK);
88993702Sjhb	mtx_lock(&Giant);
89093702Sjhb}
89193702Sjhb
89293702Sjhb/*
89385564Sdillon * Encapsulated Giant mutex routines.  These routines provide encapsulation
89485564Sdillon * control for the Giant mutex, allowing sysctls to be used to turn on and
89585564Sdillon * off Giant around certain subsystems.  The default value for the sysctls
89685564Sdillon * are set to what developers believe is stable and working in regards to
89785564Sdillon * the Giant pushdown.  Developers should not turn off Giant via these
89885564Sdillon * sysctls unless they know what they are doing.
89985564Sdillon *
90085564Sdillon * Callers of mtx_lock_giant() are expected to pass the return value to an
90185564Sdillon * accompanying mtx_unlock_giant() later on.  If multiple subsystems are
90285564Sdillon * effected by a Giant wrap, all related sysctl variables must be zero for
90385564Sdillon * the subsystem call to operate without Giant (as determined by the caller).
90485564Sdillon */
90585564Sdillon
90685564SdillonSYSCTL_NODE(_kern, OID_AUTO, giant, CTLFLAG_RD, NULL, "Giant mutex manipulation");
90785564Sdillon
90885564Sdillonstatic int kern_giant_all = 0;
90985564SdillonSYSCTL_INT(_kern_giant, OID_AUTO, all, CTLFLAG_RW, &kern_giant_all, 0, "");
91085564Sdillon
91185564Sdillonint kern_giant_proc = 1;	/* Giant around PROC locks */
91285564Sdillonint kern_giant_file = 1;	/* Giant around struct file & filedesc */
91390864Sdillonint kern_giant_ucred = 1;	/* Giant around ucred */
91485564SdillonSYSCTL_INT(_kern_giant, OID_AUTO, proc, CTLFLAG_RW, &kern_giant_proc, 0, "");
91585564SdillonSYSCTL_INT(_kern_giant, OID_AUTO, file, CTLFLAG_RW, &kern_giant_file, 0, "");
91690864SdillonSYSCTL_INT(_kern_giant, OID_AUTO, ucred, CTLFLAG_RW, &kern_giant_ucred, 0, "");
91785564Sdillon
91885564Sdillonint
91985564Sdillonmtx_lock_giant(int sysctlvar)
92085564Sdillon{
92185564Sdillon	if (sysctlvar || kern_giant_all) {
92285564Sdillon		mtx_lock(&Giant);
92385564Sdillon		return(1);
92485564Sdillon	}
92585564Sdillon	return(0);
92685564Sdillon}
92785564Sdillon
92885564Sdillonvoid
92985564Sdillonmtx_unlock_giant(int s)
93085564Sdillon{
93185564Sdillon	if (s)
93285564Sdillon		mtx_unlock(&Giant);
93385564Sdillon}
93485564Sdillon
935