subr_turnstile.c revision 93692
142421Syokota/*-
242421Syokota * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
342421Syokota *
442421Syokota * Redistribution and use in source and binary forms, with or without
542421Syokota * modification, are permitted provided that the following conditions
642421Syokota * are met:
742421Syokota * 1. Redistributions of source code must retain the above copyright
842421Syokota *    notice, this list of conditions and the following disclaimer.
942421Syokota * 2. Redistributions in binary form must reproduce the above copyright
1042421Syokota *    notice, this list of conditions and the following disclaimer in the
1142421Syokota *    documentation and/or other materials provided with the distribution.
1242421Syokota * 3. Berkeley Software Design Inc's name may not be used to endorse or
1342421Syokota *    promote products derived from this software without specific prior
1442421Syokota *    written permission.
1542421Syokota *
1642421Syokota * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
1742421Syokota * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1842421Syokota * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1942421Syokota * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
2042421Syokota * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2142421Syokota * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2242421Syokota * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2342421Syokota * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2442421Syokota * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2542421Syokota * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2642421Syokota * SUCH DAMAGE.
2742421Syokota *
2842421Syokota *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
2942421Syokota *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
3042421Syokota * $FreeBSD: head/sys/kern/subr_turnstile.c 93692 2002-04-02 20:44:30Z jhb $
3142421Syokota */
3242421Syokota
33119418Sobrien/*
34119418Sobrien * Machine independent bits of mutex implementation.
35119418Sobrien */
3642421Syokota
3742421Syokota#include "opt_ddb.h"
3842421Syokota
3942421Syokota#include <sys/param.h>
4058271Syokota#include <sys/systm.h>
4142421Syokota#include <sys/bus.h>
4242421Syokota#include <sys/kernel.h>
4358271Syokota#include <sys/ktr.h>
4458271Syokota#include <sys/lock.h>
4558271Syokota#include <sys/malloc.h>
4642421Syokota#include <sys/mutex.h>
47147271Smarius#include <sys/proc.h>
4842421Syokota#include <sys/resourcevar.h>
49147271Smarius#include <sys/sbuf.h>
50147271Smarius#include <sys/sysctl.h>
51147271Smarius#include <sys/vmmeter.h>
52147271Smarius
53147271Smarius#include <machine/atomic.h>
5442421Syokota#include <machine/bus.h>
55147271Smarius#include <machine/clock.h>
5642421Syokota#include <machine/cpu.h>
5742421Syokota
5842421Syokota#include <ddb/ddb.h>
59102149Speter
6042421Syokota#include <vm/vm.h>
6142421Syokota#include <vm/vm_extern.h>
6242421Syokota
6342421Syokota/*
6442421Syokota * Internal utility macros.
6542421Syokota */
6642421Syokota#define mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
6742421Syokota
6842421Syokota#define mtx_owner(m)	(mtx_unowned((m)) ? NULL \
6942421Syokota	: (struct thread *)((m)->mtx_lock & MTX_FLAGMASK))
7042421Syokota
7142421Syokota/*
7242421Syokota * Lock classes for sleep and spin mutexes.
7342421Syokota */
7442421Syokotastruct lock_class lock_class_mtx_sleep = {
7542421Syokota	"sleep mutex",
7658271Syokota	LC_SLEEPLOCK | LC_RECURSABLE
7758271Syokota};
7858271Syokotastruct lock_class lock_class_mtx_spin = {
7958271Syokota	"spin mutex",
8058271Syokota	LC_SPINLOCK | LC_RECURSABLE
8158271Syokota};
8258271Syokota
8342421Syokota/*
8442421Syokota * Prototypes for non-exported routines.
8542421Syokota */
8642421Syokotastatic void	propagate_priority(struct thread *);
8742421Syokota
8842421Syokotastatic void
8942421Syokotapropagate_priority(struct thread *td)
9042421Syokota{
9142421Syokota	int pri = td->td_priority;
9242421Syokota	struct mtx *m = td->td_blocked;
9342421Syokota
9442421Syokota	mtx_assert(&sched_lock, MA_OWNED);
9542421Syokota	for (;;) {
96147271Smarius		struct thread *td1;
97147271Smarius
98147271Smarius		td = mtx_owner(m);
99147271Smarius
10042421Syokota		if (td == NULL) {
10142421Syokota			/*
10258271Syokota			 * This really isn't quite right. Really
10358271Syokota			 * ought to bump priority of thread that
10442421Syokota			 * next acquires the mutex.
10542421Syokota			 */
10642421Syokota			MPASS(m->mtx_lock == MTX_CONTESTED);
10742421Syokota			return;
10842421Syokota		}
10942421Syokota
11042421Syokota		MPASS(td->td_proc->p_magic == P_MAGIC);
11142421Syokota		KASSERT(td->td_proc->p_stat != SSLEEP, ("sleeping thread owns a mutex"));
11242421Syokota		if (td->td_priority <= pri) /* lower is higher priority */
11342421Syokota			return;
11442421Syokota
11542421Syokota		/*
11642421Syokota		 * Bump this thread's priority.
11742421Syokota		 */
11842421Syokota		td->td_priority = pri;
11942421Syokota
12042421Syokota		/*
12142421Syokota		 * If lock holder is actually running, just bump priority.
12242421Syokota		 */
12369781Sdwmalone		 /* XXXKSE this test is not sufficient */
12442421Syokota		if (td->td_kse && (td->td_kse->ke_oncpu != NOCPU)) {
12542421Syokota			MPASS(td->td_proc->p_stat == SRUN
12642421Syokota			|| td->td_proc->p_stat == SZOMB
12742421Syokota			|| td->td_proc->p_stat == SSTOP);
12842421Syokota			return;
12942421Syokota		}
13042421Syokota
13158271Syokota#ifndef SMP
13242421Syokota		/*
13358271Syokota		 * For UP, we check to see if td is curthread (this shouldn't
13447296Syokota		 * ever happen however as it would mean we are in a deadlock.)
13558271Syokota		 */
13658271Syokota		KASSERT(td != curthread, ("Deadlock detected"));
13747296Syokota#endif
13847296Syokota
13947296Syokota		/*
14047296Syokota		 * If on run queue move to new run queue, and quit.
14158271Syokota		 * XXXKSE this gets a lot more complicated under threads
14258271Syokota		 * but try anyhow.
14347296Syokota		 */
14458271Syokota		if (td->td_proc->p_stat == SRUN) {
14558271Syokota			MPASS(td->td_blocked == NULL);
14658271Syokota			remrunqueue(td);
14742421Syokota			setrunqueue(td);
14842421Syokota			return;
14942421Syokota		}
15042421Syokota
15142421Syokota		/*
15242421Syokota		 * If we aren't blocked on a mutex, we should be.
15358271Syokota		 */
15458271Syokota		KASSERT(td->td_proc->p_stat == SMTX, (
15558271Syokota		    "process %d(%s):%d holds %s but isn't blocked on a mutex\n",
156158041Ssobomax		    td->td_proc->p_pid, td->td_proc->p_comm, td->td_proc->p_stat,
157158041Ssobomax		    m->mtx_object.lo_name));
158158041Ssobomax
159158041Ssobomax		/*
160147271Smarius		 * Pick up the mutex that td is blocked on.
161147271Smarius		 */
162147271Smarius		m = td->td_blocked;
163147271Smarius		MPASS(m != NULL);
164147271Smarius
165147271Smarius		/*
166147271Smarius		 * Check if the thread needs to be moved up on
167147271Smarius		 * the blocked chain
16858271Syokota		 */
16958271Syokota		if (td == TAILQ_FIRST(&m->mtx_blocked)) {
17058271Syokota			continue;
17158271Syokota		}
17258271Syokota
173114930Speter		td1 = TAILQ_PREV(td, threadqueue, td_blkq);
17458271Syokota		if (td1->td_priority <= pri) {
175114930Speter			continue;
176114930Speter		}
17758271Syokota
17865176Sdfr		/*
17992661Speter		 * Remove thread from blocked chain and determine where
18092661Speter		 * it should be moved up to.  Since we know that td1 has
181147271Smarius		 * a lower priority than td, we know that at least one
182147271Smarius		 * thread in the chain has a lower priority and that
18392661Speter		 * td1 will thus not be NULL after the loop.
18492661Speter		 */
18558271Syokota		TAILQ_REMOVE(&m->mtx_blocked, td, td_blkq);
18658271Syokota		TAILQ_FOREACH(td1, &m->mtx_blocked, td_blkq) {
187147271Smarius			MPASS(td1->td_proc->p_magic == P_MAGIC);
188147271Smarius			if (td1->td_priority > pri)
189147271Smarius				break;
190147271Smarius		}
191147271Smarius
192147271Smarius		MPASS(td1 != NULL);
193147271Smarius		TAILQ_INSERT_BEFORE(td1, td, td_blkq);
194147271Smarius		CTR4(KTR_LOCK,
195147271Smarius		    "propagate_priority: p %p moved before %p on [%p] %s",
196147271Smarius		    td, td1, m, m->mtx_object.lo_name);
197147271Smarius	}
198147271Smarius}
199147271Smarius
200147271Smarius#ifdef MUTEX_PROFILING
201147271SmariusSYSCTL_NODE(_debug, OID_AUTO, mutex, CTLFLAG_RD, NULL, "mutex debugging");
202147271SmariusSYSCTL_NODE(_debug_mutex, OID_AUTO, prof, CTLFLAG_RD, NULL, "mutex profiling");
203147271Smariusstatic int mutex_prof_enable = 0;
204147271SmariusSYSCTL_INT(_debug_mutex_prof, OID_AUTO, enable, CTLFLAG_RW,
205147271Smarius    &mutex_prof_enable, 0, "Enable tracing of mutex holdtime");
206147271Smarius
207147271Smariusstruct mutex_prof {
208147271Smarius	const char *name;
209147271Smarius	const char *file;
210147271Smarius	int line;
211147271Smarius#define MPROF_MAX 0
212147271Smarius#define MPROF_TOT 1
213147271Smarius#define MPROF_CNT 2
214147271Smarius#define MPROF_AVG 3
215147271Smarius	u_int64_t counter[4];
216147271Smarius};
217147271Smarius
218153072Sru/*
21958271Syokota * mprof_buf is a static pool of profiling records to avoid possible
22058271Syokota * reentrance of the memory allocation functions.
22158271Syokota *
22258271Syokota * Note: NUM_MPROF_BUFFERS must be smaller than MPROF_HASH_SIZE.
22358271Syokota */
22458271Syokota#define NUM_MPROF_BUFFERS 4096
225147271Smariusstatic struct mutex_prof mprof_buf[NUM_MPROF_BUFFERS];
226158041Ssobomaxstatic int first_free_mprof_buf;
227158041Ssobomax#define MPROF_HASH_SIZE 32771
228158041Ssobomaxstatic struct mutex_prof *mprof_hash[MPROF_HASH_SIZE];
229158041Ssobomax
230158041Ssobomaxstatic int mutex_prof_acquisitions;
231158041SsobomaxSYSCTL_INT(_debug_mutex_prof, OID_AUTO, acquisitions, CTLFLAG_RD,
232158041Ssobomax    &mutex_prof_acquisitions, 0, "Number of mutex acquistions recorded");
233158041Ssobomaxstatic int mutex_prof_records;
234158041SsobomaxSYSCTL_INT(_debug_mutex_prof, OID_AUTO, records, CTLFLAG_RD,
235158041Ssobomax    &mutex_prof_records, 0, "Number of profiling records");
236158041Ssobomaxstatic int mutex_prof_maxrecords = NUM_MPROF_BUFFERS;
237158041SsobomaxSYSCTL_INT(_debug_mutex_prof, OID_AUTO, maxrecords, CTLFLAG_RD,
238158041Ssobomax    &mutex_prof_maxrecords, 0, "Maximum number of profiling records");
239158041Ssobomaxstatic int mutex_prof_rejected;
240158041SsobomaxSYSCTL_INT(_debug_mutex_prof, OID_AUTO, rejected, CTLFLAG_RD,
241158041Ssobomax    &mutex_prof_rejected, 0, "Number of rejected profiling records");
242158041Ssobomaxstatic int mutex_prof_hashsize = MPROF_HASH_SIZE;
243158041SsobomaxSYSCTL_INT(_debug_mutex_prof, OID_AUTO, hashsize, CTLFLAG_RD,
244158041Ssobomax    &mutex_prof_hashsize, 0, "Hash size");
245158041Ssobomaxstatic int mutex_prof_collisions = 0;
24658271SyokotaSYSCTL_INT(_debug_mutex_prof, OID_AUTO, collisions, CTLFLAG_RD,
24742421Syokota    &mutex_prof_collisions, 0, "Number of hash collisions");
24842421Syokota
24942421Syokota/*
25058271Syokota * mprof_mtx protects the profiling buffers and the hash.
25158271Syokota */
25242421Syokotastatic struct mtx mprof_mtx;
25358271Syokota
25442421Syokotastatic void
25542421Syokotamprof_init(void *arg __unused)
25642421Syokota{
25742421Syokota	mtx_init(&mprof_mtx, "mutex profiling lock", MTX_SPIN | MTX_QUIET);
25842421Syokota}
25942421SyokotaSYSINIT(mprofinit, SI_SUB_LOCK, SI_ORDER_ANY, mprof_init, NULL);
26042421Syokota
26142421Syokotastatic u_int64_t
26242421Syokotananoseconds(void)
26342421Syokota{
26442421Syokota	struct timespec tv;
26542421Syokota
26658271Syokota	nanotime(&tv);
26758271Syokota	return (tv.tv_sec * (u_int64_t)1000000000 + tv.tv_nsec);
26858271Syokota}
26942421Syokota
27042421Syokotastatic int
27142421Syokotadump_mutex_prof_stats(SYSCTL_HANDLER_ARGS)
27258271Syokota{
27342421Syokota	struct sbuf *sb;
27458271Syokota	int error, i;
27542421Syokota
27658271Syokota	if (first_free_mprof_buf == 0)
27758271Syokota		return SYSCTL_OUT(req, "No locking recorded",
27858271Syokota		    sizeof("No locking recorded"));
27958271Syokota
28058271Syokota	sb = sbuf_new(NULL, NULL, 1024, SBUF_AUTOEXTEND);
28158271Syokota	sbuf_printf(sb, "%12s %12s %12s %12s %s\n",
28258271Syokota	    "max", "total", "count", "average", "name");
28342421Syokota	mtx_lock_spin(&mprof_mtx);
28442421Syokota	for (i = 0; i < first_free_mprof_buf; ++i)
28542421Syokota		sbuf_printf(sb, "%12llu %12llu %12llu %12llu %s:%d (%s)\n",
28642421Syokota		    mprof_buf[i].counter[MPROF_MAX] / 1000,
28742421Syokota		    mprof_buf[i].counter[MPROF_TOT] / 1000,
28842421Syokota		    mprof_buf[i].counter[MPROF_CNT],
28942421Syokota		    mprof_buf[i].counter[MPROF_AVG] / 1000,
29042421Syokota		    mprof_buf[i].file, mprof_buf[i].line, mprof_buf[i].name);
29142421Syokota	mtx_unlock_spin(&mprof_mtx);
29242421Syokota	sbuf_finish(sb);
29393279Smurray	error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
29442421Syokota	sbuf_delete(sb);
29542421Syokota	return (error);
29693279Smurray}
29742421SyokotaSYSCTL_PROC(_debug_mutex_prof, OID_AUTO, stats, CTLTYPE_STRING|CTLFLAG_RD,
29842421Syokota    NULL, 0, dump_mutex_prof_stats, "A", "Mutex profiling statistics");
29942421Syokota#endif
30042421Syokota
30142421Syokota/*
30242421Syokota * Function versions of the inlined __mtx_* macros.  These are used by
30342421Syokota * modules and can also be called from assembly language if needed.
30442421Syokota */
30542421Syokotavoid
30642421Syokota_mtx_lock_flags(struct mtx *m, int opts, const char *file, int line)
30742421Syokota{
30842421Syokota
30942421Syokota	MPASS(curthread != NULL);
31042421Syokota	_get_sleep_lock(m, curthread, opts, file, line);
31142421Syokota	LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file,
31242421Syokota	    line);
31342421Syokota	WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
31442421Syokota#ifdef MUTEX_PROFILING
31593279Smurray	/* don't reset the timer when/if recursing */
31693279Smurray	if (m->acqtime == 0) {
31742421Syokota		m->file = file;
31893279Smurray		m->line = line;
31993279Smurray		m->acqtime = mutex_prof_enable ? nanoseconds() : 0;
32093279Smurray		++mutex_prof_acquisitions;
32142421Syokota	}
32242421Syokota#endif
32342421Syokota}
32442421Syokota
32542421Syokotavoid
32642421Syokota_mtx_unlock_flags(struct mtx *m, int opts, const char *file, int line)
32742421Syokota{
32842421Syokota
32942421Syokota	MPASS(curthread != NULL);
33042421Syokota	mtx_assert(m, MA_OWNED);
33142421Syokota#ifdef MUTEX_PROFILING
33242421Syokota	if (m->acqtime != 0) {
33342421Syokota		static const char *unknown = "(unknown)";
33442421Syokota		struct mutex_prof *mpp;
33542421Syokota		u_int64_t acqtime, now;
33642421Syokota		const char *p, *q;
33742421Syokota		volatile u_int hash, n;
33842421Syokota
33942421Syokota		now = nanoseconds();
34042421Syokota		acqtime = m->acqtime;
34158271Syokota		m->acqtime = 0;
34242421Syokota		if (now <= acqtime)
34342421Syokota			goto out;
34442421Syokota		for (p = file; strncmp(p, "../", 3) == 0; p += 3)
34542421Syokota			/* nothing */ ;
34642421Syokota		if (p == NULL || *p == '\0')
34742421Syokota			p = unknown;
34842421Syokota		for (hash = line, q = p; *q != '\0'; ++q)
34942421Syokota			hash = (hash * 2 + *q) % MPROF_HASH_SIZE;
35042421Syokota		mtx_lock_spin(&mprof_mtx);
35142421Syokota		n = hash;
35242421Syokota		while ((mpp = mprof_hash[n]) != NULL) {
35342421Syokota			if (mpp->line == line && strcmp(mpp->file, p) == 0)
35442421Syokota				break;
35542421Syokota			n = (n + 1) % MPROF_HASH_SIZE;
35642421Syokota		}
35742421Syokota		if (mpp == NULL) {
35842421Syokota			/* Just exit if we cannot get a trace buffer */
35942421Syokota			if (first_free_mprof_buf >= NUM_MPROF_BUFFERS) {
36042421Syokota				++mutex_prof_rejected;
36142421Syokota				goto unlock;
36242421Syokota			}
36342421Syokota			mpp = &mprof_buf[first_free_mprof_buf++];
36442421Syokota			mpp->name = mtx_name(m);
36542421Syokota			mpp->file = p;
36642421Syokota			mpp->line = line;
36742421Syokota			mutex_prof_collisions += n - hash;
36842421Syokota			++mutex_prof_records;
36942421Syokota			mprof_hash[hash] = mpp;
37042421Syokota		}
37142421Syokota		/*
37242421Syokota		 * Record if the mutex has been held longer now than ever
37342421Syokota		 * before
37442421Syokota		 */
37542421Syokota		if ((now - acqtime) > mpp->counter[MPROF_MAX])
37642421Syokota			mpp->counter[MPROF_MAX] = now - acqtime;
37742421Syokota		mpp->counter[MPROF_TOT] += now - acqtime;
37842421Syokota		mpp->counter[MPROF_CNT] += 1;
37942421Syokota		mpp->counter[MPROF_AVG] =
38042421Syokota		    mpp->counter[MPROF_TOT] / mpp->counter[MPROF_CNT];
38142421Syokotaunlock:
38242421Syokota		mtx_unlock_spin(&mprof_mtx);
38342421Syokota	}
38442421Syokotaout:
38542421Syokota#endif
38642421Syokota 	WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
38742421Syokota	LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file,
38842421Syokota	    line);
38958271Syokota	_rel_sleep_lock(m, curthread, opts, file, line);
39042421Syokota}
39142421Syokota
39258271Syokotavoid
39342421Syokota_mtx_lock_spin_flags(struct mtx *m, int opts, const char *file, int line)
39442421Syokota{
39558271Syokota
39642421Syokota	MPASS(curthread != NULL);
39742421Syokota	_get_spin_lock(m, curthread, opts, file, line);
39842421Syokota	LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file,
39942421Syokota	    line);
40042421Syokota	WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
40142421Syokota}
40242421Syokota
40342421Syokotavoid
40442421Syokota_mtx_unlock_spin_flags(struct mtx *m, int opts, const char *file, int line)
40542421Syokota{
40642421Syokota
40742421Syokota	MPASS(curthread != NULL);
40842421Syokota	mtx_assert(m, MA_OWNED);
40942421Syokota 	WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
41042421Syokota	LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file,
41142421Syokota	    line);
41242421Syokota	_rel_spin_lock(m);
41342421Syokota}
41442421Syokota
41558271Syokota/*
41642421Syokota * The important part of mtx_trylock{,_flags}()
41742421Syokota * Tries to acquire lock `m.' We do NOT handle recursion here; we assume that
41842421Syokota * if we're called, it's because we know we don't already own this lock.
41942421Syokota */
42042421Syokotaint
42142421Syokota_mtx_trylock(struct mtx *m, int opts, const char *file, int line)
42242421Syokota{
42342421Syokota	int rval;
42442421Syokota
42542421Syokota	MPASS(curthread != NULL);
42642421Syokota
42742421Syokota	rval = _obtain_lock(m, curthread);
42842421Syokota
42942421Syokota	LOCK_LOG_TRY("LOCK", &m->mtx_object, opts, rval, file, line);
43042421Syokota	if (rval) {
43142421Syokota		/*
43258271Syokota		 * We do not handle recursion in _mtx_trylock; see the
43342421Syokota		 * note at the top of the routine.
43442421Syokota		 */
43542421Syokota		KASSERT(!mtx_recursed(m),
43658271Syokota		    ("mtx_trylock() called on a recursed mutex"));
43742421Syokota		WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK,
43842421Syokota		    file, line);
43942421Syokota	}
44042421Syokota
44142421Syokota	return (rval);
44242421Syokota}
44342421Syokota
44442421Syokota/*
44542421Syokota * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
44642421Syokota *
44742421Syokota * We call this if the lock is either contested (i.e. we need to go to
44842421Syokota * sleep waiting for it), or if we need to recurse on it.
44942421Syokota */
45042421Syokotavoid
45142421Syokota_mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line)
45242421Syokota{
45342421Syokota	struct thread *td = curthread;
45442421Syokota
45542421Syokota	if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)td) {
45642421Syokota		m->mtx_recurse++;
45742421Syokota		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
45842421Syokota		if (LOCK_LOG_TEST(&m->mtx_object, opts))
45958271Syokota			CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
46042421Syokota		return;
46158271Syokota	}
46242421Syokota
46342421Syokota	if (LOCK_LOG_TEST(&m->mtx_object, opts))
46442421Syokota		CTR4(KTR_LOCK,
46542421Syokota		    "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d",
46642421Syokota		    m->mtx_object.lo_name, (void *)m->mtx_lock, file, line);
46742421Syokota
46842421Syokota	while (!_obtain_lock(m, td)) {
46942421Syokota		uintptr_t v;
47042421Syokota		struct thread *td1;
47142421Syokota
47242421Syokota		mtx_lock_spin(&sched_lock);
47342421Syokota		/*
47442421Syokota		 * Check if the lock has been released while spinning for
47542421Syokota		 * the sched_lock.
47642421Syokota		 */
47742421Syokota		if ((v = m->mtx_lock) == MTX_UNOWNED) {
47842421Syokota			mtx_unlock_spin(&sched_lock);
47942421Syokota			continue;
48042421Syokota		}
48142421Syokota
48242421Syokota		/*
48342421Syokota		 * The mutex was marked contested on release. This means that
48458271Syokota		 * there are threads blocked on it.
48542421Syokota		 */
48642421Syokota		if (v == MTX_CONTESTED) {
48742421Syokota			td1 = TAILQ_FIRST(&m->mtx_blocked);
48858271Syokota			MPASS(td1 != NULL);
48942421Syokota			m->mtx_lock = (uintptr_t)td | MTX_CONTESTED;
49042421Syokota
49142421Syokota			if (td1->td_priority < td->td_priority)
49242421Syokota				td->td_priority = td1->td_priority;
49342421Syokota			mtx_unlock_spin(&sched_lock);
49442421Syokota			return;
49542421Syokota		}
49642421Syokota
49742421Syokota		/*
49842421Syokota		 * If the mutex isn't already contested and a failure occurs
49942421Syokota		 * setting the contested bit, the mutex was either released
50042421Syokota		 * or the state of the MTX_RECURSED bit changed.
50142421Syokota		 */
50242421Syokota		if ((v & MTX_CONTESTED) == 0 &&
50342421Syokota		    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
50442421Syokota			(void *)(v | MTX_CONTESTED))) {
50542421Syokota			mtx_unlock_spin(&sched_lock);
50642421Syokota			continue;
50742421Syokota		}
50842421Syokota
50942421Syokota		/*
51042421Syokota		 * We definitely must sleep for this lock.
51158271Syokota		 */
51242421Syokota		mtx_assert(m, MA_NOTOWNED);
51358271Syokota
51442421Syokota#ifdef notyet
51542421Syokota		/*
51642421Syokota		 * If we're borrowing an interrupted thread's VM context, we
51742421Syokota		 * must clean up before going to sleep.
51842421Syokota		 */
51942421Syokota		if (td->td_ithd != NULL) {
52042421Syokota			struct ithd *it = td->td_ithd;
52142421Syokota
52242421Syokota			if (it->it_interrupted) {
52342421Syokota				if (LOCK_LOG_TEST(&m->mtx_object, opts))
52442421Syokota					CTR2(KTR_LOCK,
52542421Syokota				    "_mtx_lock_sleep: %p interrupted %p",
52642421Syokota					    it, it->it_interrupted);
52742421Syokota				intr_thd_fixup(it);
52842421Syokota			}
52942421Syokota		}
53042421Syokota#endif
53142421Syokota
53242421Syokota		/*
53342421Syokota		 * Put us on the list of threads blocked on this mutex.
53458271Syokota		 */
53542421Syokota		if (TAILQ_EMPTY(&m->mtx_blocked)) {
53642421Syokota			td1 = mtx_owner(m);
53742421Syokota			LIST_INSERT_HEAD(&td1->td_contested, m, mtx_contested);
53842421Syokota			TAILQ_INSERT_TAIL(&m->mtx_blocked, td, td_blkq);
53942421Syokota		} else {
54042421Syokota			TAILQ_FOREACH(td1, &m->mtx_blocked, td_blkq)
54142421Syokota				if (td1->td_priority > td->td_priority)
54242421Syokota					break;
54342421Syokota			if (td1)
54458271Syokota				TAILQ_INSERT_BEFORE(td1, td, td_blkq);
54542421Syokota			else
54642421Syokota				TAILQ_INSERT_TAIL(&m->mtx_blocked, td, td_blkq);
54742421Syokota		}
54842421Syokota
54942421Syokota		/*
55042421Syokota		 * Save who we're blocked on.
55142421Syokota		 */
55242421Syokota		td->td_blocked = m;
55342421Syokota		td->td_mtxname = m->mtx_object.lo_name;
55458271Syokota		td->td_proc->p_stat = SMTX;
55542421Syokota		propagate_priority(td);
55642421Syokota
55742421Syokota		if (LOCK_LOG_TEST(&m->mtx_object, opts))
55842421Syokota			CTR3(KTR_LOCK,
55942421Syokota			    "_mtx_lock_sleep: p %p blocked on [%p] %s", td, m,
56042421Syokota			    m->mtx_object.lo_name);
56142421Syokota
56242421Syokota		td->td_proc->p_stats->p_ru.ru_nvcsw++;
56342421Syokota		mi_switch();
56442421Syokota
56542421Syokota		if (LOCK_LOG_TEST(&m->mtx_object, opts))
56642421Syokota			CTR3(KTR_LOCK,
56742421Syokota			  "_mtx_lock_sleep: p %p free from blocked on [%p] %s",
56842421Syokota			  td, m, m->mtx_object.lo_name);
56942421Syokota
57042421Syokota		mtx_unlock_spin(&sched_lock);
57142421Syokota	}
57242421Syokota
57342421Syokota	return;
57442421Syokota}
57542421Syokota
57642421Syokota/*
57742421Syokota * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock.
57842421Syokota *
57942421Syokota * This is only called if we need to actually spin for the lock. Recursion
58042421Syokota * is handled inline.
58142421Syokota */
58242421Syokotavoid
58342421Syokota_mtx_lock_spin(struct mtx *m, int opts, const char *file, int line)
58442421Syokota{
58542421Syokota	int i = 0;
58642421Syokota
58742421Syokota	if (LOCK_LOG_TEST(&m->mtx_object, opts))
58842421Syokota		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
58942421Syokota
59042421Syokota	for (;;) {
59142421Syokota		if (_obtain_lock(m, curthread))
59242421Syokota			break;
59342421Syokota
59442421Syokota		/* Give interrupts a chance while we spin. */
59542421Syokota		critical_exit();
59642421Syokota		while (m->mtx_lock != MTX_UNOWNED) {
59742421Syokota			if (i++ < 10000000)
59842421Syokota				continue;
59942421Syokota			if (i++ < 60000000)
60042421Syokota				DELAY(1);
60142421Syokota#ifdef DDB
60242421Syokota			else if (!db_active)
60342421Syokota#else
60442421Syokota			else
60542421Syokota#endif
60642421Syokota			panic("spin lock %s held by %p for > 5 seconds",
60742421Syokota			    m->mtx_object.lo_name, (void *)m->mtx_lock);
60842421Syokota		}
60942421Syokota		critical_enter();
61042421Syokota	}
61142421Syokota
61242421Syokota	if (LOCK_LOG_TEST(&m->mtx_object, opts))
61342421Syokota		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
61442421Syokota
61542421Syokota	return;
61642421Syokota}
61742421Syokota
61842421Syokota/*
61942421Syokota * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
62042421Syokota *
62142421Syokota * We are only called here if the lock is recursed or contested (i.e. we
62242421Syokota * need to wake up a blocked thread).
62342421Syokota */
62442421Syokotavoid
62542421Syokota_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
62642421Syokota{
62742421Syokota	struct thread *td, *td1;
62842421Syokota	struct mtx *m1;
62942421Syokota	int pri;
63042421Syokota
63142421Syokota	td = curthread;
63242421Syokota
63342421Syokota	if (mtx_recursed(m)) {
63442421Syokota		if (--(m->mtx_recurse) == 0)
63542421Syokota			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
63642421Syokota		if (LOCK_LOG_TEST(&m->mtx_object, opts))
63742421Syokota			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m);
63842421Syokota		return;
63942421Syokota	}
64042421Syokota
64142421Syokota	mtx_lock_spin(&sched_lock);
64242421Syokota	if (LOCK_LOG_TEST(&m->mtx_object, opts))
64342421Syokota		CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
64442421Syokota
64542421Syokota	td1 = TAILQ_FIRST(&m->mtx_blocked);
64642421Syokota	MPASS(td->td_proc->p_magic == P_MAGIC);
64742421Syokota	MPASS(td1->td_proc->p_magic == P_MAGIC);
64842421Syokota
64942421Syokota	TAILQ_REMOVE(&m->mtx_blocked, td1, td_blkq);
65042421Syokota
65142421Syokota	if (TAILQ_EMPTY(&m->mtx_blocked)) {
65242421Syokota		LIST_REMOVE(m, mtx_contested);
65342421Syokota		_release_lock_quick(m);
65442421Syokota		if (LOCK_LOG_TEST(&m->mtx_object, opts))
65542421Syokota			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p not held", m);
65642421Syokota	} else
65742421Syokota		atomic_store_rel_ptr(&m->mtx_lock, (void *)MTX_CONTESTED);
65842421Syokota
65942421Syokota	pri = PRI_MAX;
66042421Syokota	LIST_FOREACH(m1, &td->td_contested, mtx_contested) {
66142421Syokota		int cp = TAILQ_FIRST(&m1->mtx_blocked)->td_priority;
66242421Syokota		if (cp < pri)
66342421Syokota			pri = cp;
66442421Syokota	}
66542421Syokota
66642421Syokota	if (pri > td->td_base_pri)
66742421Syokota		pri = td->td_base_pri;
66842421Syokota	td->td_priority = pri;
66942421Syokota
67042421Syokota	if (LOCK_LOG_TEST(&m->mtx_object, opts))
67142421Syokota		CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p contested setrunqueue %p",
67242421Syokota		    m, td1);
67342421Syokota
67442421Syokota	td1->td_blocked = NULL;
67542421Syokota	td1->td_proc->p_stat = SRUN;
67642421Syokota	setrunqueue(td1);
67742421Syokota
67842421Syokota	if (td->td_critnest == 1 && td1->td_priority < pri) {
67942421Syokota#ifdef notyet
68042421Syokota		if (td->td_ithd != NULL) {
68142421Syokota			struct ithd *it = td->td_ithd;
68242421Syokota
68358271Syokota			if (it->it_interrupted) {
68442421Syokota				if (LOCK_LOG_TEST(&m->mtx_object, opts))
68542421Syokota					CTR2(KTR_LOCK,
68642421Syokota				    "_mtx_unlock_sleep: %p interrupted %p",
68742421Syokota					    it, it->it_interrupted);
68842421Syokota				intr_thd_fixup(it);
68942421Syokota			}
69042421Syokota		}
69142421Syokota#endif
69242421Syokota		setrunqueue(td);
69342421Syokota		if (LOCK_LOG_TEST(&m->mtx_object, opts))
69442421Syokota			CTR2(KTR_LOCK,
69542421Syokota			    "_mtx_unlock_sleep: %p switching out lock=%p", m,
69642421Syokota			    (void *)m->mtx_lock);
69742421Syokota
69842421Syokota		td->td_proc->p_stats->p_ru.ru_nivcsw++;
69942421Syokota		mi_switch();
70042421Syokota		if (LOCK_LOG_TEST(&m->mtx_object, opts))
70142421Syokota			CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p",
70242421Syokota			    m, (void *)m->mtx_lock);
70342421Syokota	}
70442421Syokota
70542421Syokota	mtx_unlock_spin(&sched_lock);
70642421Syokota
70742421Syokota	return;
70858271Syokota}
70942421Syokota
71042421Syokota/*
71142421Syokota * All the unlocking of MTX_SPIN locks is done inline.
71242421Syokota * See the _rel_spin_lock() macro for the details.
71342421Syokota */
71442421Syokota
71542421Syokota/*
71642421Syokota * The backing function for the INVARIANTS-enabled mtx_assert()
71742421Syokota */
71842421Syokota#ifdef INVARIANT_SUPPORT
71942421Syokotavoid
72042421Syokota_mtx_assert(struct mtx *m, int what, const char *file, int line)
72142421Syokota{
72242421Syokota
72342421Syokota	if (panicstr != NULL)
72442421Syokota		return;
72542421Syokota	switch (what) {
72642421Syokota	case MA_OWNED:
72742421Syokota	case MA_OWNED | MA_RECURSED:
72842421Syokota	case MA_OWNED | MA_NOTRECURSED:
72942421Syokota		if (!mtx_owned(m))
73042421Syokota			panic("mutex %s not owned at %s:%d",
73158271Syokota			    m->mtx_object.lo_name, file, line);
73242421Syokota		if (mtx_recursed(m)) {
73342421Syokota			if ((what & MA_NOTRECURSED) != 0)
73458271Syokota				panic("mutex %s recursed at %s:%d",
73558271Syokota				    m->mtx_object.lo_name, file, line);
73642421Syokota		} else if ((what & MA_RECURSED) != 0) {
73742421Syokota			panic("mutex %s unrecursed at %s:%d",
73842421Syokota			    m->mtx_object.lo_name, file, line);
73958271Syokota		}
74042421Syokota		break;
74142421Syokota	case MA_NOTOWNED:
74242421Syokota		if (mtx_owned(m))
74342421Syokota			panic("mutex %s owned at %s:%d",
74442421Syokota			    m->mtx_object.lo_name, file, line);
74542421Syokota		break;
74642421Syokota	default:
74742421Syokota		panic("unknown mtx_assert at %s:%d", file, line);
74842421Syokota	}
74942421Syokota}
75042421Syokota#endif
75142421Syokota
75258271Syokota/*
75342421Syokota * The MUTEX_DEBUG-enabled mtx_validate()
75442421Syokota *
75542421Syokota * Most of these checks have been moved off into the LO_INITIALIZED flag
75642421Syokota * maintained by the witness code.
75742421Syokota */
75842421Syokota#ifdef MUTEX_DEBUG
75942421Syokota
76042421Syokotavoid	mtx_validate(struct mtx *);
76142421Syokota
76242421Syokotavoid
76342421Syokotamtx_validate(struct mtx *m)
76442421Syokota{
76558271Syokota
76642421Syokota/*
76742421Syokota * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
76858271Syokota * we can re-enable the kernacc() checks.
76958271Syokota */
77042421Syokota#ifndef __alpha__
77142421Syokota	/*
77242421Syokota	 * Can't call kernacc() from early init386(), especially when
77358271Syokota	 * initializing Giant mutex, because some stuff in kernacc()
77442421Syokota	 * requires Giant itself.
77542421Syokota	 */
77642421Syokota	if (!cold)
77742421Syokota		if (!kernacc((caddr_t)m, sizeof(m),
77842421Syokota		    VM_PROT_READ | VM_PROT_WRITE))
77942421Syokota			panic("Can't read and write to mutex %p", m);
78042421Syokota#endif
78142421Syokota}
78242421Syokota#endif
78342421Syokota
78442421Syokota/*
78542421Syokota * General init routine used by the MTX_SYSINIT() macro.
78642421Syokota */
78742421Syokotavoid
78842421Syokotamtx_sysinit(void *arg)
78942421Syokota{
79042421Syokota	struct mtx_args *margs = arg;
79142421Syokota
79258271Syokota	mtx_init(margs->ma_mtx, margs->ma_desc, margs->ma_opts);
79342421Syokota}
79458271Syokota
79542421Syokota/*
79642421Syokota * Mutex initialization routine; initialize lock `m' of type contained in
79742421Syokota * `opts' with options contained in `opts' and description `description.'
79842421Syokota */
79942421Syokotavoid
80042421Syokotamtx_init(struct mtx *m, const char *description, int opts)
80142421Syokota{
80242421Syokota	struct lock_object *lock;
80342421Syokota
80442421Syokota	MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE |
80542421Syokota	    MTX_SLEEPABLE | MTX_NOWITNESS | MTX_DUPOK)) == 0);
80642421Syokota
80742421Syokota#ifdef MUTEX_DEBUG
80842421Syokota	/* Diagnostic and error correction */
80942421Syokota	mtx_validate(m);
81042421Syokota#endif
81142421Syokota
81242421Syokota	lock = &m->mtx_object;
81342421Syokota	KASSERT((lock->lo_flags & LO_INITIALIZED) == 0,
81442421Syokota	    ("mutex %s %p already initialized", description, m));
81542421Syokota	bzero(m, sizeof(*m));
81642421Syokota	if (opts & MTX_SPIN)
81742421Syokota		lock->lo_class = &lock_class_mtx_spin;
81842421Syokota	else
81942421Syokota		lock->lo_class = &lock_class_mtx_sleep;
82042421Syokota	lock->lo_name = description;
82142421Syokota	if (opts & MTX_QUIET)
82242421Syokota		lock->lo_flags = LO_QUIET;
82342421Syokota	if (opts & MTX_RECURSE)
82442421Syokota		lock->lo_flags |= LO_RECURSABLE;
82542421Syokota	if (opts & MTX_SLEEPABLE)
82642421Syokota		lock->lo_flags |= LO_SLEEPABLE;
82742421Syokota	if ((opts & MTX_NOWITNESS) == 0)
82842421Syokota		lock->lo_flags |= LO_WITNESS;
82942421Syokota	if (opts & MTX_DUPOK)
83042421Syokota		lock->lo_flags |= LO_DUPOK;
83158271Syokota
83242421Syokota	m->mtx_lock = MTX_UNOWNED;
83358271Syokota	TAILQ_INIT(&m->mtx_blocked);
83442421Syokota
83542421Syokota	LOCK_LOG_INIT(lock, opts);
83642421Syokota
83742421Syokota	WITNESS_INIT(lock);
83842421Syokota}
83942421Syokota
84042421Syokota/*
84142421Syokota * Remove lock `m' from all_mtx queue.  We don't allow MTX_QUIET to be
84242421Syokota * passed in as a flag here because if the corresponding mtx_init() was
84342421Syokota * called with MTX_QUIET set, then it will already be set in the mutex's
84442421Syokota * flags.
84542421Syokota */
84642421Syokotavoid
84742421Syokotamtx_destroy(struct mtx *m)
84842421Syokota{
84942421Syokota
85042421Syokota	LOCK_LOG_DESTROY(&m->mtx_object, 0);
85142421Syokota
85242421Syokota	if (!mtx_owned(m))
85342421Syokota		MPASS(mtx_unowned(m));
85442421Syokota	else {
85542421Syokota		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
85642421Syokota
85742421Syokota		/* Tell witness this isn't locked to make it happy. */
85842421Syokota		WITNESS_UNLOCK(&m->mtx_object, LOP_EXCLUSIVE, __FILE__,
85942421Syokota		    __LINE__);
86042421Syokota	}
86142421Syokota
86242421Syokota	WITNESS_DESTROY(&m->mtx_object);
86342421Syokota}
86442421Syokota
86542421Syokota/*
86642421Syokota * Encapsulated Giant mutex routines.  These routines provide encapsulation
86742421Syokota * control for the Giant mutex, allowing sysctls to be used to turn on and
86842421Syokota * off Giant around certain subsystems.  The default value for the sysctls
86958271Syokota * are set to what developers believe is stable and working in regards to
87042421Syokota * the Giant pushdown.  Developers should not turn off Giant via these
87158271Syokota * sysctls unless they know what they are doing.
87242421Syokota *
87342421Syokota * Callers of mtx_lock_giant() are expected to pass the return value to an
87442421Syokota * accompanying mtx_unlock_giant() later on.  If multiple subsystems are
87542421Syokota * effected by a Giant wrap, all related sysctl variables must be zero for
87642421Syokota * the subsystem call to operate without Giant (as determined by the caller).
87742421Syokota */
87842421Syokota
87942421SyokotaSYSCTL_NODE(_kern, OID_AUTO, giant, CTLFLAG_RD, NULL, "Giant mutex manipulation");
88042421Syokota
88142421Syokotastatic int kern_giant_all = 0;
88242421SyokotaSYSCTL_INT(_kern_giant, OID_AUTO, all, CTLFLAG_RW, &kern_giant_all, 0, "");
88342421Syokota
88442421Syokotaint kern_giant_proc = 1;	/* Giant around PROC locks */
88542421Syokotaint kern_giant_file = 1;	/* Giant around struct file & filedesc */
88642421Syokotaint kern_giant_ucred = 1;	/* Giant around ucred */
88742421SyokotaSYSCTL_INT(_kern_giant, OID_AUTO, proc, CTLFLAG_RW, &kern_giant_proc, 0, "");
88842421SyokotaSYSCTL_INT(_kern_giant, OID_AUTO, file, CTLFLAG_RW, &kern_giant_file, 0, "");
88942421SyokotaSYSCTL_INT(_kern_giant, OID_AUTO, ucred, CTLFLAG_RW, &kern_giant_ucred, 0, "");
89042421Syokota
89142421Syokotaint
89242421Syokotamtx_lock_giant(int sysctlvar)
89342421Syokota{
89442421Syokota	if (sysctlvar || kern_giant_all) {
89542421Syokota		mtx_lock(&Giant);
89642421Syokota		return(1);
89742421Syokota	}
89842421Syokota	return(0);
89942421Syokota}
90042421Syokota
90142421Syokotavoid
90242421Syokotamtx_unlock_giant(int s)
90342421Syokota{
90442421Syokota	if (s)
90542421Syokota		mtx_unlock(&Giant);
90642421Syokota}
90742421Syokota
90842421Syokota