kern_mutex.c revision 144806
119370Spst/*-
298944Sobrien * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
3130803Smarcel *
498944Sobrien * Redistribution and use in source and binary forms, with or without
519370Spst * modification, are permitted provided that the following conditions
698944Sobrien * are met:
719370Spst * 1. Redistributions of source code must retain the above copyright
898944Sobrien *    notice, this list of conditions and the following disclaimer.
998944Sobrien * 2. Redistributions in binary form must reproduce the above copyright
1098944Sobrien *    notice, this list of conditions and the following disclaimer in the
1198944Sobrien *    documentation and/or other materials provided with the distribution.
1219370Spst * 3. Berkeley Software Design Inc's name may not be used to endorse or
1398944Sobrien *    promote products derived from this software without specific prior
1498944Sobrien *    written permission.
1598944Sobrien *
1698944Sobrien * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
1719370Spst * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1898944Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1998944Sobrien * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
2098944Sobrien * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2198944Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2219370Spst * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2319370Spst * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2446283Sdfr * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2519370Spst * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2646283Sdfr * SUCH DAMAGE.
2746283Sdfr *
2819370Spst *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
2919370Spst *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
3019370Spst */
3119370Spst
3219370Spst/*
3319370Spst * Machine independent bits of mutex implementation.
3419370Spst */
3546283Sdfr
3698944Sobrien#include <sys/cdefs.h>
3798944Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_mutex.c 144806 2005-04-08 14:14:09Z glebius $");
38130803Smarcel
3919370Spst#include "opt_adaptive_mutexes.h"
4046283Sdfr#include "opt_ddb.h"
4198944Sobrien#include "opt_mprof.h"
4246283Sdfr#include "opt_mutex_wake_all.h"
4319370Spst#include "opt_sched.h"
4419370Spst
45130803Smarcel#include <sys/param.h>
46130803Smarcel#include <sys/systm.h>
47130803Smarcel#include <sys/bus.h>
48130803Smarcel#include <sys/kdb.h>
4919370Spst#include <sys/kernel.h>
5019370Spst#include <sys/ktr.h>
5119370Spst#include <sys/lock.h>
5219370Spst#include <sys/malloc.h>
5398944Sobrien#include <sys/mutex.h>
5419370Spst#include <sys/proc.h>
5546283Sdfr#include <sys/resourcevar.h>
5698944Sobrien#include <sys/sched.h>
5719370Spst#include <sys/sbuf.h>
5819370Spst#include <sys/sysctl.h>
5919370Spst#include <sys/turnstile.h>
6019370Spst#include <sys/vmmeter.h>
6119370Spst
6219370Spst#include <machine/atomic.h>
6346283Sdfr#include <machine/bus.h>
6446283Sdfr#include <machine/clock.h>
6546283Sdfr#include <machine/cpu.h>
6698944Sobrien
6746283Sdfr#include <ddb/ddb.h>
6846283Sdfr
6946283Sdfr#include <vm/vm.h>
7046283Sdfr#include <vm/vm_extern.h>
7198944Sobrien
7246283Sdfr/*
7398944Sobrien * Force MUTEX_WAKE_ALL for now.
7446283Sdfr * single thread wakeup needs fixes to avoid race conditions with
7546283Sdfr * priority inheritance.
7698944Sobrien */
7746283Sdfr#ifndef MUTEX_WAKE_ALL
7846283Sdfr#define MUTEX_WAKE_ALL
7946283Sdfr#endif
8046283Sdfr
8146283Sdfr/*
8246283Sdfr * Internal utility macros.
8346283Sdfr */
8446283Sdfr#define mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
8546283Sdfr
8646283Sdfr#define mtx_owner(m)	(mtx_unowned((m)) ? NULL \
8746283Sdfr	: (struct thread *)((m)->mtx_lock & MTX_FLAGMASK))
8898944Sobrien
8946283Sdfr/*
9098944Sobrien * Lock classes for sleep and spin mutexes.
9146283Sdfr */
9246283Sdfrstruct lock_class lock_class_mtx_sleep = {
9346283Sdfr	"sleep mutex",
9446283Sdfr	LC_SLEEPLOCK | LC_RECURSABLE
9546283Sdfr};
9646283Sdfrstruct lock_class lock_class_mtx_spin = {
9746283Sdfr	"spin mutex",
9846283Sdfr	LC_SPINLOCK | LC_RECURSABLE
9946283Sdfr};
10046283Sdfr
10146283Sdfr/*
10246283Sdfr * System-wide mutexes
10346283Sdfr */
10446283Sdfrstruct mtx sched_lock;
10546283Sdfrstruct mtx Giant;
10646283Sdfr
10798944Sobrien#ifdef MUTEX_PROFILING
10819370SpstSYSCTL_NODE(_debug, OID_AUTO, mutex, CTLFLAG_RD, NULL, "mutex debugging");
10919370SpstSYSCTL_NODE(_debug_mutex, OID_AUTO, prof, CTLFLAG_RD, NULL, "mutex profiling");
11019370Spststatic int mutex_prof_enable = 0;
11119370SpstSYSCTL_INT(_debug_mutex_prof, OID_AUTO, enable, CTLFLAG_RW,
11219370Spst    &mutex_prof_enable, 0, "Enable tracing of mutex holdtime");
11319370Spst
11419370Spststruct mutex_prof {
11519370Spst	const char	*name;
11619370Spst	const char	*file;
11719370Spst	int		line;
11819370Spst	uintmax_t	cnt_max;
11919370Spst	uintmax_t	cnt_tot;
12019370Spst	uintmax_t	cnt_cur;
12119370Spst	uintmax_t	cnt_contest_holding;
12219370Spst	uintmax_t	cnt_contest_locking;
12319370Spst	struct mutex_prof *next;
12419370Spst};
12519370Spst
12619370Spst/*
12798944Sobrien * mprof_buf is a static pool of profiling records to avoid possible
12898944Sobrien * reentrance of the memory allocation functions.
12919370Spst *
13019370Spst * Note: NUM_MPROF_BUFFERS must be smaller than MPROF_HASH_SIZE.
13198944Sobrien */
13219370Spst#ifdef MPROF_BUFFERS
13319370Spst#define NUM_MPROF_BUFFERS	MPROF_BUFFERS
13498944Sobrien#else
13519370Spst#define	NUM_MPROF_BUFFERS	1000
13619370Spst#endif
13719370Spststatic struct mutex_prof mprof_buf[NUM_MPROF_BUFFERS];
138130803Smarcelstatic int first_free_mprof_buf;
139130803Smarcel#ifndef MPROF_HASH_SIZE
14019370Spst#define	MPROF_HASH_SIZE		1009
14198944Sobrien#endif
14219370Spst#if NUM_MPROF_BUFFERS >= MPROF_HASH_SIZE
14319370Spst#error MPROF_BUFFERS must be larger than MPROF_HASH_SIZE
14419370Spst#endif
14519370Spststatic struct mutex_prof *mprof_hash[MPROF_HASH_SIZE];
14619370Spst/* SWAG: sbuf size = avg stat. line size * number of locks */
14798944Sobrien#define MPROF_SBUF_SIZE		256 * 400
14819370Spst
14919370Spststatic int mutex_prof_acquisitions;
15098944SobrienSYSCTL_INT(_debug_mutex_prof, OID_AUTO, acquisitions, CTLFLAG_RD,
15119370Spst    &mutex_prof_acquisitions, 0, "Number of mutex acquistions recorded");
15298944Sobrienstatic int mutex_prof_records;
15398944SobrienSYSCTL_INT(_debug_mutex_prof, OID_AUTO, records, CTLFLAG_RD,
15419370Spst    &mutex_prof_records, 0, "Number of profiling records");
15519370Spststatic int mutex_prof_maxrecords = NUM_MPROF_BUFFERS;
15619370SpstSYSCTL_INT(_debug_mutex_prof, OID_AUTO, maxrecords, CTLFLAG_RD,
15798944Sobrien    &mutex_prof_maxrecords, 0, "Maximum number of profiling records");
15898944Sobrienstatic int mutex_prof_rejected;
15998944SobrienSYSCTL_INT(_debug_mutex_prof, OID_AUTO, rejected, CTLFLAG_RD,
16098944Sobrien    &mutex_prof_rejected, 0, "Number of rejected profiling records");
16198944Sobrienstatic int mutex_prof_hashsize = MPROF_HASH_SIZE;
16298944SobrienSYSCTL_INT(_debug_mutex_prof, OID_AUTO, hashsize, CTLFLAG_RD,
16319370Spst    &mutex_prof_hashsize, 0, "Hash size");
16419370Spststatic int mutex_prof_collisions = 0;
16546283SdfrSYSCTL_INT(_debug_mutex_prof, OID_AUTO, collisions, CTLFLAG_RD,
16646283Sdfr    &mutex_prof_collisions, 0, "Number of hash collisions");
16746283Sdfr
16898944Sobrien/*
16998944Sobrien * mprof_mtx protects the profiling buffers and the hash.
17098944Sobrien */
17198944Sobrienstatic struct mtx mprof_mtx;
17246283SdfrMTX_SYSINIT(mprof, &mprof_mtx, "mutex profiling lock", MTX_SPIN | MTX_QUIET);
17319370Spst
17419370Spststatic u_int64_t
17519370Spstnanoseconds(void)
17698944Sobrien{
17719370Spst	struct timespec tv;
17819370Spst
17919370Spst	nanotime(&tv);
18019370Spst	return (tv.tv_sec * (u_int64_t)1000000000 + tv.tv_nsec);
18119370Spst}
18219370Spst
18319370Spststatic int
18498944Sobriendump_mutex_prof_stats(SYSCTL_HANDLER_ARGS)
18519370Spst{
18619370Spst	struct sbuf *sb;
18719370Spst	int error, i;
18819370Spst	static int multiplier = 1;
18919370Spst
19019370Spst	if (first_free_mprof_buf == 0)
19119370Spst		return (SYSCTL_OUT(req, "No locking recorded",
19298944Sobrien		    sizeof("No locking recorded")));
19319370Spst
19419370Spstretry_sbufops:
195130803Smarcel	sb = sbuf_new(NULL, NULL, MPROF_SBUF_SIZE * multiplier, SBUF_FIXEDLEN);
196130803Smarcel	sbuf_printf(sb, "\n%6s %12s %11s %5s %12s %12s %s\n",
197130803Smarcel	    "max", "total", "count", "avg", "cnt_hold", "cnt_lock", "name");
198130803Smarcel	/*
199130803Smarcel	 * XXX this spinlock seems to be by far the largest perpetrator
200130803Smarcel	 * of spinlock latency (1.6 msec on an Athlon1600 was recorded
201130803Smarcel	 * even before I pessimized it further by moving the average
202130803Smarcel	 * computation here).
203130803Smarcel	 */
204130803Smarcel	mtx_lock_spin(&mprof_mtx);
205130803Smarcel	for (i = 0; i < first_free_mprof_buf; ++i) {
206130803Smarcel		sbuf_printf(sb, "%6ju %12ju %11ju %5ju %12ju %12ju %s:%d (%s)\n",
207130803Smarcel		    mprof_buf[i].cnt_max / 1000,
208130803Smarcel		    mprof_buf[i].cnt_tot / 1000,
209130803Smarcel		    mprof_buf[i].cnt_cur,
210130803Smarcel		    mprof_buf[i].cnt_cur == 0 ? (uintmax_t)0 :
211130803Smarcel			mprof_buf[i].cnt_tot / (mprof_buf[i].cnt_cur * 1000),
212130803Smarcel		    mprof_buf[i].cnt_contest_holding,
213130803Smarcel		    mprof_buf[i].cnt_contest_locking,
214130803Smarcel		    mprof_buf[i].file, mprof_buf[i].line, mprof_buf[i].name);
215130803Smarcel		if (sbuf_overflowed(sb)) {
216130803Smarcel			mtx_unlock_spin(&mprof_mtx);
217130803Smarcel			sbuf_delete(sb);
218130803Smarcel			multiplier++;
219130803Smarcel			goto retry_sbufops;
220130803Smarcel		}
221130803Smarcel	}
222130803Smarcel	mtx_unlock_spin(&mprof_mtx);
223130803Smarcel	sbuf_finish(sb);
224130803Smarcel	error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
225130803Smarcel	sbuf_delete(sb);
226130803Smarcel	return (error);
227130803Smarcel}
228130803SmarcelSYSCTL_PROC(_debug_mutex_prof, OID_AUTO, stats, CTLTYPE_STRING | CTLFLAG_RD,
229130803Smarcel    NULL, 0, dump_mutex_prof_stats, "A", "Mutex profiling statistics");
230130803Smarcel
23119370Spststatic int
23219370Spstreset_mutex_prof_stats(SYSCTL_HANDLER_ARGS)
23319370Spst{
23498944Sobrien	int error, v;
23519370Spst
23619370Spst	if (first_free_mprof_buf == 0)
23719370Spst		return (0);
23819370Spst
23919370Spst	v = 0;
24019370Spst	error = sysctl_handle_int(oidp, &v, 0, req);
24119370Spst	if (error)
242130803Smarcel		return (error);
243130803Smarcel	if (req->newptr == NULL)
244130803Smarcel		return (error);
245130803Smarcel	if (v == 0)
246130803Smarcel		return (0);
247130803Smarcel
248130803Smarcel	mtx_lock_spin(&mprof_mtx);
249130803Smarcel	bzero(mprof_buf, sizeof(*mprof_buf) * first_free_mprof_buf);
250130803Smarcel	bzero(mprof_hash, sizeof(struct mtx *) * MPROF_HASH_SIZE);
251130803Smarcel	first_free_mprof_buf = 0;
252130803Smarcel	mtx_unlock_spin(&mprof_mtx);
253130803Smarcel	return (0);
254130803Smarcel}
255130803SmarcelSYSCTL_PROC(_debug_mutex_prof, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_RW,
256130803Smarcel    NULL, 0, reset_mutex_prof_stats, "I", "Reset mutex profiling statistics");
257130803Smarcel#endif
258130803Smarcel
259130803Smarcel/*
26019370Spst * Function versions of the inlined __mtx_* macros.  These are used by
26119370Spst * modules and can also be called from assembly language if needed.
26219370Spst */
26319370Spstvoid
26498944Sobrien_mtx_lock_flags(struct mtx *m, int opts, const char *file, int line)
26519370Spst{
26619370Spst
26719370Spst	MPASS(curthread != NULL);
26898944Sobrien	KASSERT(m->mtx_object.lo_class == &lock_class_mtx_sleep,
26919370Spst	    ("mtx_lock() of spin mutex %s @ %s:%d", m->mtx_object.lo_name,
27019370Spst	    file, line));
27119370Spst	WITNESS_CHECKORDER(&m->mtx_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE,
27219370Spst	    file, line);
27398944Sobrien	_get_sleep_lock(m, curthread, opts, file, line);
27498944Sobrien	LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file,
27519370Spst	    line);
27619370Spst	WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
27719370Spst#ifdef MUTEX_PROFILING
27819370Spst	/* don't reset the timer when/if recursing */
27919370Spst	if (m->mtx_acqtime == 0) {
28019370Spst		m->mtx_filename = file;
28119370Spst		m->mtx_lineno = line;
28219370Spst		m->mtx_acqtime = mutex_prof_enable ? nanoseconds() : 0;
28319370Spst		++mutex_prof_acquisitions;
28419370Spst	}
28519370Spst#endif
28619370Spst}
28719370Spst
28819370Spstvoid
28919370Spst_mtx_unlock_flags(struct mtx *m, int opts, const char *file, int line)
29046283Sdfr{
29146283Sdfr
29219370Spst	MPASS(curthread != NULL);
29319370Spst	KASSERT(m->mtx_object.lo_class == &lock_class_mtx_sleep,
29419370Spst	    ("mtx_unlock() of spin mutex %s @ %s:%d", m->mtx_object.lo_name,
29519370Spst	    file, line));
29619370Spst	WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
29719370Spst	LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file,
29819370Spst	    line);
29919370Spst	mtx_assert(m, MA_OWNED);
30019370Spst#ifdef MUTEX_PROFILING
30119370Spst	if (m->mtx_acqtime != 0) {
30219370Spst		static const char *unknown = "(unknown)";
30319370Spst		struct mutex_prof *mpp;
30446283Sdfr		u_int64_t acqtime, now;
305130803Smarcel		const char *p, *q;
306130803Smarcel		volatile u_int hash;
307130803Smarcel
308130803Smarcel		now = nanoseconds();
30919370Spst		acqtime = m->mtx_acqtime;
31046283Sdfr		m->mtx_acqtime = 0;
311130803Smarcel		if (now <= acqtime)
312130803Smarcel			goto out;
313130803Smarcel		for (p = m->mtx_filename;
314130803Smarcel		    p != NULL && strncmp(p, "../", 3) == 0; p += 3)
315130803Smarcel			/* nothing */ ;
31619370Spst		if (p == NULL || *p == '\0')
31719370Spst			p = unknown;
31819370Spst		for (hash = m->mtx_lineno, q = p; *q != '\0'; ++q)
31919370Spst			hash = (hash * 2 + *q) % MPROF_HASH_SIZE;
32019370Spst		mtx_lock_spin(&mprof_mtx);
32119370Spst		for (mpp = mprof_hash[hash]; mpp != NULL; mpp = mpp->next)
32219370Spst			if (mpp->line == m->mtx_lineno &&
32398944Sobrien			    strcmp(mpp->file, p) == 0)
32419370Spst				break;
32519370Spst		if (mpp == NULL) {
326130803Smarcel			/* Just exit if we cannot get a trace buffer */
327130803Smarcel			if (first_free_mprof_buf >= NUM_MPROF_BUFFERS) {
328130803Smarcel				++mutex_prof_rejected;
329130803Smarcel				goto unlock;
33019370Spst			}
33119370Spst			mpp = &mprof_buf[first_free_mprof_buf++];
33219370Spst			mpp->name = mtx_name(m);
33398944Sobrien			mpp->file = p;
33419370Spst			mpp->line = m->mtx_lineno;
33519370Spst			mpp->next = mprof_hash[hash];
33619370Spst			if (mprof_hash[hash] != NULL)
33719370Spst				++mutex_prof_collisions;
33819370Spst			mprof_hash[hash] = mpp;
33919370Spst			++mutex_prof_records;
34019370Spst		}
34119370Spst		/*
34219370Spst		 * Record if the mutex has been held longer now than ever
34398944Sobrien		 * before.
34419370Spst		 */
34598944Sobrien		if (now - acqtime > mpp->cnt_max)
34698944Sobrien			mpp->cnt_max = now - acqtime;
34719370Spst		mpp->cnt_tot += now - acqtime;
34898944Sobrien		mpp->cnt_cur++;
34998944Sobrien		/*
35019370Spst		 * There's a small race, really we should cmpxchg
35198944Sobrien		 * 0 with the current value, but that would bill
35298944Sobrien		 * the contention to the wrong lock instance if
35398944Sobrien		 * it followed this also.
35419370Spst		 */
35598944Sobrien		mpp->cnt_contest_holding += m->mtx_contest_holding;
35698944Sobrien		m->mtx_contest_holding = 0;
35798944Sobrien		mpp->cnt_contest_locking += m->mtx_contest_locking;
35898944Sobrien		m->mtx_contest_locking = 0;
35998944Sobrienunlock:
36098944Sobrien		mtx_unlock_spin(&mprof_mtx);
36198944Sobrien	}
36298944Sobrienout:
36398944Sobrien#endif
36498944Sobrien	_rel_sleep_lock(m, curthread, opts, file, line);
36598944Sobrien}
366130803Smarcel
36719370Spstvoid
36819370Spst_mtx_lock_spin_flags(struct mtx *m, int opts, const char *file, int line)
36919370Spst{
37019370Spst
371130803Smarcel	MPASS(curthread != NULL);
372130803Smarcel	KASSERT(m->mtx_object.lo_class == &lock_class_mtx_spin,
373130803Smarcel	    ("mtx_lock_spin() of sleep mutex %s @ %s:%d",
374130803Smarcel	    m->mtx_object.lo_name, file, line));
375130803Smarcel	WITNESS_CHECKORDER(&m->mtx_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE,
376130803Smarcel	    file, line);
377130803Smarcel	_get_spin_lock(m, curthread, opts, file, line);
378130803Smarcel	LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file,
379130803Smarcel	    line);
380130803Smarcel	WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
381130803Smarcel}
382130803Smarcel
383130803Smarcelvoid
384130803Smarcel_mtx_unlock_spin_flags(struct mtx *m, int opts, const char *file, int line)
385130803Smarcel{
386130803Smarcel
387130803Smarcel	MPASS(curthread != NULL);
388130803Smarcel	KASSERT(m->mtx_object.lo_class == &lock_class_mtx_spin,
38998944Sobrien	    ("mtx_unlock_spin() of sleep mutex %s @ %s:%d",
39019370Spst	    m->mtx_object.lo_name, file, line));
39119370Spst	WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
39219370Spst	LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file,
39319370Spst	    line);
39419370Spst	mtx_assert(m, MA_OWNED);
39598944Sobrien	_rel_spin_lock(m);
39619370Spst}
39719370Spst
39819370Spst/*
39919370Spst * The important part of mtx_trylock{,_flags}()
40019370Spst * Tries to acquire lock `m.'  If this function is called on a mutex that
40119370Spst * is already owned, it will recursively acquire the lock.
40219370Spst */
40398944Sobrienint
40419370Spst_mtx_trylock(struct mtx *m, int opts, const char *file, int line)
40519370Spst{
40619370Spst	int rval;
40719370Spst
40819370Spst	MPASS(curthread != NULL);
40919370Spst
41019370Spst	if (mtx_owned(m) && (m->mtx_object.lo_flags & LO_RECURSABLE) != 0) {
41119370Spst		m->mtx_recurse++;
41219370Spst		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
41319370Spst		rval = 1;
41419370Spst	} else
41519370Spst		rval = _obtain_lock(m, curthread);
41619370Spst
41719370Spst	LOCK_LOG_TRY("LOCK", &m->mtx_object, opts, rval, file, line);
41819370Spst	if (rval)
41919370Spst		WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK,
42019370Spst		    file, line);
42119370Spst
42219370Spst	return (rval);
42319370Spst}
42419370Spst
42519370Spst/*
42619370Spst * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
42719370Spst *
42819370Spst * We call this if the lock is either contested (i.e. we need to go to
42919370Spst * sleep waiting for it), or if we need to recurse on it.
43019370Spst */
43119370Spstvoid
43219370Spst_mtx_lock_sleep(struct mtx *m, struct thread *td, int opts, const char *file,
43398944Sobrien    int line)
43419370Spst{
43519370Spst#if defined(SMP) && !defined(NO_ADAPTIVE_MUTEXES)
43619370Spst	struct thread *owner;
43719370Spst#endif
43819370Spst	uintptr_t v;
43919370Spst#ifdef KTR
44019370Spst	int cont_logged = 0;
44119370Spst#endif
44219370Spst#ifdef MUTEX_PROFILING
44319370Spst	int contested;
44419370Spst#endif
44519370Spst
44619370Spst	if (mtx_owned(m)) {
44719370Spst		KASSERT((m->mtx_object.lo_flags & LO_RECURSABLE) != 0,
44846283Sdfr	    ("_mtx_lock_sleep: recursed on non-recursive mutex %s @ %s:%d\n",
449130803Smarcel		    m->mtx_object.lo_name, file, line));
450130803Smarcel		m->mtx_recurse++;
451130803Smarcel		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
452130803Smarcel		if (LOCK_LOG_TEST(&m->mtx_object, opts))
453130803Smarcel			CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
45419370Spst		return;
45519370Spst	}
45619370Spst
45719370Spst	if (LOCK_LOG_TEST(&m->mtx_object, opts))
45819370Spst		CTR4(KTR_LOCK,
45919370Spst		    "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d",
46019370Spst		    m->mtx_object.lo_name, (void *)m->mtx_lock, file, line);
46119370Spst
46219370Spst#ifdef MUTEX_PROFILING
46319370Spst	contested = 0;
46419370Spst#endif
46519370Spst	while (!_obtain_lock(m, td)) {
46619370Spst#ifdef MUTEX_PROFILING
46719370Spst		contested = 1;
46819370Spst		atomic_add_int(&m->mtx_contest_holding, 1);
46919370Spst#endif
47019370Spst		turnstile_lock(&m->mtx_object);
47119370Spst		v = m->mtx_lock;
47219370Spst
47398944Sobrien		/*
47419370Spst		 * Check if the lock has been released while spinning for
47519370Spst		 * the turnstile chain lock.
47619370Spst		 */
47719370Spst		if (v == MTX_UNOWNED) {
47819370Spst			turnstile_release(&m->mtx_object);
47919370Spst			cpu_spinwait();
48019370Spst			continue;
48119370Spst		}
48246283Sdfr
483130803Smarcel#ifdef MUTEX_WAKE_ALL
484130803Smarcel		MPASS(v != MTX_CONTESTED);
485130803Smarcel#else
486130803Smarcel		/*
48746283Sdfr		 * The mutex was marked contested on release. This means that
48819370Spst		 * there are other threads blocked on it.  Grab ownership of
48946283Sdfr		 * it and propagate its priority to the current thread if
49046283Sdfr		 * necessary.
491130803Smarcel		 */
492130803Smarcel		if (v == MTX_CONTESTED) {
493130803Smarcel			m->mtx_lock = (uintptr_t)td | MTX_CONTESTED;
494130803Smarcel			turnstile_claim(&m->mtx_object);
495130803Smarcel			break;
49619370Spst		}
49719370Spst#endif
49819370Spst
49919370Spst		/*
50098944Sobrien		 * If the mutex isn't already contested and a failure occurs
50119370Spst		 * setting the contested bit, the mutex was either released
50219370Spst		 * or the state of the MTX_RECURSED bit changed.
50319370Spst		 */
50419370Spst		if ((v & MTX_CONTESTED) == 0 &&
50519370Spst		    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
50619370Spst			(void *)(v | MTX_CONTESTED))) {
50719370Spst			turnstile_release(&m->mtx_object);
50898944Sobrien			cpu_spinwait();
50919370Spst			continue;
510130803Smarcel		}
51119370Spst
512130803Smarcel#if defined(SMP) && !defined(NO_ADAPTIVE_MUTEXES)
513130803Smarcel		/*
51419370Spst		 * If the current owner of the lock is executing on another
51519370Spst		 * CPU, spin instead of blocking.
51619370Spst		 */
51719370Spst		owner = (struct thread *)(v & MTX_FLAGMASK);
51898944Sobrien#ifdef ADAPTIVE_GIANT
51919370Spst		if (TD_IS_RUNNING(owner)) {
52019370Spst#else
52119370Spst		if (m != &Giant && TD_IS_RUNNING(owner)) {
52219370Spst#endif
52319370Spst			turnstile_release(&m->mtx_object);
52419370Spst			while (mtx_owner(m) == owner && TD_IS_RUNNING(owner)) {
52519370Spst				cpu_spinwait();
52619370Spst			}
52719370Spst			continue;
52819370Spst		}
52919370Spst#endif	/* SMP && !NO_ADAPTIVE_MUTEXES */
53019370Spst
53119370Spst		/*
53219370Spst		 * We definitely must sleep for this lock.
53319370Spst		 */
53419370Spst		mtx_assert(m, MA_NOTOWNED);
53598944Sobrien
53619370Spst#ifdef KTR
53719370Spst		if (!cont_logged) {
53819370Spst			CTR6(KTR_CONTENTION,
53919370Spst			    "contention: %p at %s:%d wants %s, taken by %s:%d",
54019370Spst			    td, file, line, m->mtx_object.lo_name,
54119370Spst			    WITNESS_FILE(&m->mtx_object),
54219370Spst			    WITNESS_LINE(&m->mtx_object));
54319370Spst			cont_logged = 1;
54419370Spst		}
54519370Spst#endif
54619370Spst
54798944Sobrien		/*
54819370Spst		 * Block on the turnstile.
54998944Sobrien		 */
55098944Sobrien		turnstile_wait(&m->mtx_object, mtx_owner(m));
55198944Sobrien	}
55298944Sobrien
55398944Sobrien#ifdef KTR
55419370Spst	if (cont_logged) {
55519370Spst		CTR4(KTR_CONTENTION,
556130803Smarcel		    "contention end: %s acquired by %p at %s:%d",
557130803Smarcel		    m->mtx_object.lo_name, td, file, line);
55819370Spst	}
55919370Spst#endif
56019370Spst#ifdef MUTEX_PROFILING
56119370Spst	if (contested)
56219370Spst		m->mtx_contest_locking++;
56319370Spst	m->mtx_contest_holding = 0;
564130803Smarcel#endif
565130803Smarcel	return;
566130803Smarcel}
567130803Smarcel
568130803Smarcel#ifdef SMP
569130803Smarcel/*
570130803Smarcel * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock.
571130803Smarcel *
572130803Smarcel * This is only called if we need to actually spin for the lock. Recursion
573130803Smarcel * is handled inline.
574130803Smarcel */
575130803Smarcelvoid
576130803Smarcel_mtx_lock_spin(struct mtx *m, struct thread *td, int opts, const char *file,
577130803Smarcel    int line)
578130803Smarcel{
579130803Smarcel	int i = 0;
580130803Smarcel
581130803Smarcel	if (LOCK_LOG_TEST(&m->mtx_object, opts))
582130803Smarcel		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
583130803Smarcel
584130803Smarcel	for (;;) {
585130803Smarcel		if (_obtain_lock(m, td))
586130803Smarcel			break;
587130803Smarcel
588130803Smarcel		/* Give interrupts a chance while we spin. */
589130803Smarcel		spinlock_exit();
590130803Smarcel		while (m->mtx_lock != MTX_UNOWNED) {
591130803Smarcel			if (i++ < 10000000) {
592130803Smarcel				cpu_spinwait();
593130803Smarcel				continue;
594130803Smarcel			}
595130803Smarcel			if (i < 60000000)
596130803Smarcel				DELAY(1);
59798944Sobrien			else if (!kdb_active) {
59819370Spst				printf("spin lock %s held by %p for > 5 seconds\n",
59998944Sobrien				    m->mtx_object.lo_name, (void *)m->mtx_lock);
60019370Spst#ifdef WITNESS
60119370Spst				witness_display_spinlock(&m->mtx_object,
60219370Spst				    mtx_owner(m));
60398944Sobrien#endif
60498944Sobrien				panic("spin lock held too long");
60598944Sobrien			}
60619370Spst			cpu_spinwait();
60798944Sobrien		}
60819370Spst		spinlock_enter();
60919370Spst	}
61019370Spst
61119370Spst	if (LOCK_LOG_TEST(&m->mtx_object, opts))
61298944Sobrien		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
61398944Sobrien
61498944Sobrien	return;
61598944Sobrien}
61698944Sobrien#endif /* SMP */
61798944Sobrien
61819370Spst/*
619130803Smarcel * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
620130803Smarcel *
621130803Smarcel * We are only called here if the lock is recursed or contested (i.e. we
622130803Smarcel * need to wake up a blocked thread).
623130803Smarcel */
624130803Smarcelvoid
625130803Smarcel_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
626130803Smarcel{
627130803Smarcel	struct turnstile *ts;
62819370Spst#ifndef PREEMPTION
629130803Smarcel	struct thread *td, *td1;
630130803Smarcel#endif
631130803Smarcel
632130803Smarcel	if (mtx_recursed(m)) {
633130803Smarcel		if (--(m->mtx_recurse) == 0)
634130803Smarcel			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
635130803Smarcel		if (LOCK_LOG_TEST(&m->mtx_object, opts))
636130803Smarcel			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m);
637130803Smarcel		return;
63819370Spst	}
639130803Smarcel
64019370Spst	turnstile_lock(&m->mtx_object);
64119370Spst	ts = turnstile_lookup(&m->mtx_object);
64219370Spst	if (LOCK_LOG_TEST(&m->mtx_object, opts))
64319370Spst		CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
64419370Spst
64519370Spst#if defined(SMP) && !defined(NO_ADAPTIVE_MUTEXES)
64619370Spst	if (ts == NULL) {
64719370Spst		_release_lock_quick(m);
64819370Spst		if (LOCK_LOG_TEST(&m->mtx_object, opts))
64919370Spst			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p no sleepers", m);
65019370Spst		turnstile_release(&m->mtx_object);
65198944Sobrien		return;
65219370Spst	}
65319370Spst#else
65419370Spst	MPASS(ts != NULL);
65519370Spst#endif
65619370Spst#ifndef PREEMPTION
65719370Spst	/* XXX */
65819370Spst	td1 = turnstile_head(ts);
65919370Spst#endif
66019370Spst#ifdef MUTEX_WAKE_ALL
66119370Spst	turnstile_broadcast(ts);
66219370Spst	_release_lock_quick(m);
66319370Spst#else
66419370Spst	if (turnstile_signal(ts)) {
66519370Spst		_release_lock_quick(m);
66619370Spst		if (LOCK_LOG_TEST(&m->mtx_object, opts))
66719370Spst			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p not held", m);
66819370Spst	} else {
66919370Spst		m->mtx_lock = MTX_CONTESTED;
67019370Spst		if (LOCK_LOG_TEST(&m->mtx_object, opts))
67119370Spst			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p still contested",
67219370Spst			    m);
67398944Sobrien	}
67498944Sobrien#endif
67519370Spst	turnstile_unpend(ts);
67646283Sdfr
67719370Spst#ifndef PREEMPTION
67819370Spst	/*
67919370Spst	 * XXX: This is just a hack until preemption is done.  However,
68019370Spst	 * once preemption is done we need to either wrap the
68119370Spst	 * turnstile_signal() and release of the actual lock in an
68219370Spst	 * extra critical section or change the preemption code to
68319370Spst	 * always just set a flag and never do instant-preempts.
68419370Spst	 */
68519370Spst	td = curthread;
68619370Spst	if (td->td_critnest > 0 || td1->td_priority >= td->td_priority)
68719370Spst		return;
68819370Spst	mtx_lock_spin(&sched_lock);
68919370Spst	if (!TD_IS_RUNNING(td1)) {
69019370Spst#ifdef notyet
69119370Spst		if (td->td_ithd != NULL) {
69219370Spst			struct ithd *it = td->td_ithd;
69319370Spst
694130803Smarcel			if (it->it_interrupted) {
695130803Smarcel				if (LOCK_LOG_TEST(&m->mtx_object, opts))
696130803Smarcel					CTR2(KTR_LOCK,
697130803Smarcel				    "_mtx_unlock_sleep: %p interrupted %p",
698130803Smarcel					    it, it->it_interrupted);
699130803Smarcel				intr_thd_fixup(it);
700130803Smarcel			}
701130803Smarcel		}
702130803Smarcel#endif
703130803Smarcel		if (LOCK_LOG_TEST(&m->mtx_object, opts))
704130803Smarcel			CTR2(KTR_LOCK,
705130803Smarcel			    "_mtx_unlock_sleep: %p switching out lock=%p", m,
706130803Smarcel			    (void *)m->mtx_lock);
707130803Smarcel
708130803Smarcel		mi_switch(SW_INVOL, NULL);
709130803Smarcel		if (LOCK_LOG_TEST(&m->mtx_object, opts))
710130803Smarcel			CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p",
711130803Smarcel			    m, (void *)m->mtx_lock);
712130803Smarcel	}
713130803Smarcel	mtx_unlock_spin(&sched_lock);
714130803Smarcel#endif
715130803Smarcel
716130803Smarcel	return;
717130803Smarcel}
718130803Smarcel
719130803Smarcel/*
720130803Smarcel * All the unlocking of MTX_SPIN locks is done inline.
721130803Smarcel * See the _rel_spin_lock() macro for the details.
722130803Smarcel */
723130803Smarcel
724130803Smarcel/*
725130803Smarcel * The backing function for the INVARIANTS-enabled mtx_assert()
726130803Smarcel */
727130803Smarcel#ifdef INVARIANT_SUPPORT
728130803Smarcelvoid
729130803Smarcel_mtx_assert(struct mtx *m, int what, const char *file, int line)
730130803Smarcel{
731130803Smarcel
732130803Smarcel	if (panicstr != NULL)
733130803Smarcel		return;
734130803Smarcel	switch (what) {
735130803Smarcel	case MA_OWNED:
736130803Smarcel	case MA_OWNED | MA_RECURSED:
737	case MA_OWNED | MA_NOTRECURSED:
738		if (!mtx_owned(m))
739			panic("mutex %s not owned at %s:%d",
740			    m->mtx_object.lo_name, file, line);
741		if (mtx_recursed(m)) {
742			if ((what & MA_NOTRECURSED) != 0)
743				panic("mutex %s recursed at %s:%d",
744				    m->mtx_object.lo_name, file, line);
745		} else if ((what & MA_RECURSED) != 0) {
746			panic("mutex %s unrecursed at %s:%d",
747			    m->mtx_object.lo_name, file, line);
748		}
749		break;
750	case MA_NOTOWNED:
751		if (mtx_owned(m))
752			panic("mutex %s owned at %s:%d",
753			    m->mtx_object.lo_name, file, line);
754		break;
755	default:
756		panic("unknown mtx_assert at %s:%d", file, line);
757	}
758}
759#endif
760
761/*
762 * The MUTEX_DEBUG-enabled mtx_validate()
763 *
764 * Most of these checks have been moved off into the LO_INITIALIZED flag
765 * maintained by the witness code.
766 */
767#ifdef MUTEX_DEBUG
768
769void	mtx_validate(struct mtx *);
770
771void
772mtx_validate(struct mtx *m)
773{
774
775/*
776 * XXX: When kernacc() does not require Giant we can reenable this check
777 */
778#ifdef notyet
779/*
780 * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
781 * we can re-enable the kernacc() checks.
782 */
783#ifndef __alpha__
784	/*
785	 * Can't call kernacc() from early init386(), especially when
786	 * initializing Giant mutex, because some stuff in kernacc()
787	 * requires Giant itself.
788	 */
789	if (!cold)
790		if (!kernacc((caddr_t)m, sizeof(m),
791		    VM_PROT_READ | VM_PROT_WRITE))
792			panic("Can't read and write to mutex %p", m);
793#endif
794#endif
795}
796#endif
797
798/*
799 * General init routine used by the MTX_SYSINIT() macro.
800 */
801void
802mtx_sysinit(void *arg)
803{
804	struct mtx_args *margs = arg;
805
806	mtx_init(margs->ma_mtx, margs->ma_desc, NULL, margs->ma_opts);
807}
808
809/*
810 * Mutex initialization routine; initialize lock `m' of type contained in
811 * `opts' with options contained in `opts' and name `name.'  The optional
812 * lock type `type' is used as a general lock category name for use with
813 * witness.
814 */
815void
816mtx_init(struct mtx *m, const char *name, const char *type, int opts)
817{
818	struct lock_object *lock;
819
820	MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE |
821	    MTX_NOWITNESS | MTX_DUPOK)) == 0);
822
823#ifdef MUTEX_DEBUG
824	/* Diagnostic and error correction */
825	mtx_validate(m);
826#endif
827
828	lock = &m->mtx_object;
829	KASSERT((lock->lo_flags & LO_INITIALIZED) == 0,
830	    ("mutex \"%s\" %p already initialized", name, m));
831	bzero(m, sizeof(*m));
832	if (opts & MTX_SPIN)
833		lock->lo_class = &lock_class_mtx_spin;
834	else
835		lock->lo_class = &lock_class_mtx_sleep;
836	lock->lo_name = name;
837	lock->lo_type = type != NULL ? type : name;
838	if (opts & MTX_QUIET)
839		lock->lo_flags = LO_QUIET;
840	if (opts & MTX_RECURSE)
841		lock->lo_flags |= LO_RECURSABLE;
842	if ((opts & MTX_NOWITNESS) == 0)
843		lock->lo_flags |= LO_WITNESS;
844	if (opts & MTX_DUPOK)
845		lock->lo_flags |= LO_DUPOK;
846
847	m->mtx_lock = MTX_UNOWNED;
848
849	LOCK_LOG_INIT(lock, opts);
850
851	WITNESS_INIT(lock);
852}
853
854/*
855 * Remove lock `m' from all_mtx queue.  We don't allow MTX_QUIET to be
856 * passed in as a flag here because if the corresponding mtx_init() was
857 * called with MTX_QUIET set, then it will already be set in the mutex's
858 * flags.
859 */
860void
861mtx_destroy(struct mtx *m)
862{
863
864	LOCK_LOG_DESTROY(&m->mtx_object, 0);
865
866	if (!mtx_owned(m))
867		MPASS(mtx_unowned(m));
868	else {
869		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
870
871		/* Tell witness this isn't locked to make it happy. */
872		WITNESS_UNLOCK(&m->mtx_object, LOP_EXCLUSIVE, __FILE__,
873		    __LINE__);
874	}
875
876	WITNESS_DESTROY(&m->mtx_object);
877}
878
879/*
880 * Intialize the mutex code and system mutexes.  This is called from the MD
881 * startup code prior to mi_startup().  The per-CPU data space needs to be
882 * setup before this is called.
883 */
884void
885mutex_init(void)
886{
887
888	/* Setup thread0 so that mutexes work. */
889	LIST_INIT(&thread0.td_contested);
890
891	/* Setup turnstiles so that sleep mutexes work. */
892	init_turnstiles();
893
894	/*
895	 * Initialize mutexes.
896	 */
897	mtx_init(&Giant, "Giant", NULL, MTX_DEF | MTX_RECURSE);
898	mtx_init(&sched_lock, "sched lock", NULL, MTX_SPIN | MTX_RECURSE);
899	mtx_init(&proc0.p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK);
900	mtx_lock(&Giant);
901}
902