kern_mutex.c revision 193035
10Sstevel@tonic-gate/*-
20Sstevel@tonic-gate * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * Redistribution and use in source and binary forms, with or without
50Sstevel@tonic-gate * modification, are permitted provided that the following conditions
60Sstevel@tonic-gate * are met:
70Sstevel@tonic-gate * 1. Redistributions of source code must retain the above copyright
80Sstevel@tonic-gate *    notice, this list of conditions and the following disclaimer.
90Sstevel@tonic-gate * 2. Redistributions in binary form must reproduce the above copyright
100Sstevel@tonic-gate *    notice, this list of conditions and the following disclaimer in the
110Sstevel@tonic-gate *    documentation and/or other materials provided with the distribution.
120Sstevel@tonic-gate * 3. Berkeley Software Design Inc's name may not be used to endorse or
130Sstevel@tonic-gate *    promote products derived from this software without specific prior
140Sstevel@tonic-gate *    written permission.
150Sstevel@tonic-gate *
160Sstevel@tonic-gate * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
170Sstevel@tonic-gate * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
180Sstevel@tonic-gate * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
190Sstevel@tonic-gate * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
200Sstevel@tonic-gate * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
210Sstevel@tonic-gate * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
220Sstevel@tonic-gate * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
230Sstevel@tonic-gate * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
240Sstevel@tonic-gate * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
250Sstevel@tonic-gate * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
260Sstevel@tonic-gate * SUCH DAMAGE.
270Sstevel@tonic-gate *
280Sstevel@tonic-gate *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
290Sstevel@tonic-gate *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
300Sstevel@tonic-gate */
310Sstevel@tonic-gate
320Sstevel@tonic-gate/*
330Sstevel@tonic-gate * Machine independent bits of mutex implementation.
340Sstevel@tonic-gate */
350Sstevel@tonic-gate
360Sstevel@tonic-gate#include <sys/cdefs.h>
370Sstevel@tonic-gate__FBSDID("$FreeBSD: head/sys/kern/kern_mutex.c 193035 2009-05-29 13:56:34Z jhb $");
380Sstevel@tonic-gate
390Sstevel@tonic-gate#include "opt_adaptive_mutexes.h"
400Sstevel@tonic-gate#include "opt_ddb.h"
410Sstevel@tonic-gate#include "opt_global.h"
420Sstevel@tonic-gate#include "opt_kdtrace.h"
430Sstevel@tonic-gate#include "opt_sched.h"
440Sstevel@tonic-gate
450Sstevel@tonic-gate#include <sys/param.h>
460Sstevel@tonic-gate#include <sys/systm.h>
470Sstevel@tonic-gate#include <sys/bus.h>
480Sstevel@tonic-gate#include <sys/conf.h>
490Sstevel@tonic-gate#include <sys/kdb.h>
500Sstevel@tonic-gate#include <sys/kernel.h>
510Sstevel@tonic-gate#include <sys/ktr.h>
520Sstevel@tonic-gate#include <sys/lock.h>
530Sstevel@tonic-gate#include <sys/malloc.h>
540Sstevel@tonic-gate#include <sys/mutex.h>
550Sstevel@tonic-gate#include <sys/proc.h>
560Sstevel@tonic-gate#include <sys/resourcevar.h>
570Sstevel@tonic-gate#include <sys/sched.h>
580Sstevel@tonic-gate#include <sys/sbuf.h>
590Sstevel@tonic-gate#include <sys/sysctl.h>
600Sstevel@tonic-gate#include <sys/turnstile.h>
610Sstevel@tonic-gate#include <sys/vmmeter.h>
620Sstevel@tonic-gate#include <sys/lock_profile.h>
630Sstevel@tonic-gate
640Sstevel@tonic-gate#include <machine/atomic.h>
650Sstevel@tonic-gate#include <machine/bus.h>
660Sstevel@tonic-gate#include <machine/cpu.h>
670Sstevel@tonic-gate
680Sstevel@tonic-gate#include <ddb/ddb.h>
690Sstevel@tonic-gate
700Sstevel@tonic-gate#include <fs/devfs/devfs_int.h>
710Sstevel@tonic-gate
720Sstevel@tonic-gate#include <vm/vm.h>
730Sstevel@tonic-gate#include <vm/vm_extern.h>
740Sstevel@tonic-gate
750Sstevel@tonic-gate#if defined(SMP) && !defined(NO_ADAPTIVE_MUTEXES)
760Sstevel@tonic-gate#define	ADAPTIVE_MUTEXES
770Sstevel@tonic-gate#endif
780Sstevel@tonic-gate
790Sstevel@tonic-gate/*
800Sstevel@tonic-gate * Internal utility macros.
810Sstevel@tonic-gate */
820Sstevel@tonic-gate#define mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
830Sstevel@tonic-gate
840Sstevel@tonic-gate#define	mtx_destroyed(m) ((m)->mtx_lock == MTX_DESTROYED)
850Sstevel@tonic-gate
860Sstevel@tonic-gate#define	mtx_owner(m)	((struct thread *)((m)->mtx_lock & ~MTX_FLAGMASK))
870Sstevel@tonic-gate
880Sstevel@tonic-gatestatic void	assert_mtx(struct lock_object *lock, int what);
890Sstevel@tonic-gate#ifdef DDB
900Sstevel@tonic-gatestatic void	db_show_mtx(struct lock_object *lock);
910Sstevel@tonic-gate#endif
920Sstevel@tonic-gatestatic void	lock_mtx(struct lock_object *lock, int how);
930Sstevel@tonic-gatestatic void	lock_spin(struct lock_object *lock, int how);
940Sstevel@tonic-gate#ifdef KDTRACE_HOOKS
950Sstevel@tonic-gatestatic int	owner_mtx(struct lock_object *lock, struct thread **owner);
960Sstevel@tonic-gate#endif
970Sstevel@tonic-gatestatic int	unlock_mtx(struct lock_object *lock);
980Sstevel@tonic-gatestatic int	unlock_spin(struct lock_object *lock);
990Sstevel@tonic-gate
1000Sstevel@tonic-gate/*
1010Sstevel@tonic-gate * Lock classes for sleep and spin mutexes.
1020Sstevel@tonic-gate */
1030Sstevel@tonic-gatestruct lock_class lock_class_mtx_sleep = {
1040Sstevel@tonic-gate	.lc_name = "sleep mutex",
1050Sstevel@tonic-gate	.lc_flags = LC_SLEEPLOCK | LC_RECURSABLE,
1060Sstevel@tonic-gate	.lc_assert = assert_mtx,
1070Sstevel@tonic-gate#ifdef DDB
1080Sstevel@tonic-gate	.lc_ddb_show = db_show_mtx,
1090Sstevel@tonic-gate#endif
1100Sstevel@tonic-gate	.lc_lock = lock_mtx,
1110Sstevel@tonic-gate	.lc_unlock = unlock_mtx,
1120Sstevel@tonic-gate#ifdef KDTRACE_HOOKS
1130Sstevel@tonic-gate	.lc_owner = owner_mtx,
1140Sstevel@tonic-gate#endif
1150Sstevel@tonic-gate};
1160Sstevel@tonic-gatestruct lock_class lock_class_mtx_spin = {
1170Sstevel@tonic-gate	.lc_name = "spin mutex",
1180Sstevel@tonic-gate	.lc_flags = LC_SPINLOCK | LC_RECURSABLE,
1190Sstevel@tonic-gate	.lc_assert = assert_mtx,
1200Sstevel@tonic-gate#ifdef DDB
1210Sstevel@tonic-gate	.lc_ddb_show = db_show_mtx,
1220Sstevel@tonic-gate#endif
1230Sstevel@tonic-gate	.lc_lock = lock_spin,
1240Sstevel@tonic-gate	.lc_unlock = unlock_spin,
1250Sstevel@tonic-gate#ifdef KDTRACE_HOOKS
1260Sstevel@tonic-gate	.lc_owner = owner_mtx,
1270Sstevel@tonic-gate#endif
1280Sstevel@tonic-gate};
1290Sstevel@tonic-gate
1300Sstevel@tonic-gate/*
1310Sstevel@tonic-gate * System-wide mutexes
1320Sstevel@tonic-gate */
1330Sstevel@tonic-gatestruct mtx blocked_lock;
1340Sstevel@tonic-gatestruct mtx Giant;
1350Sstevel@tonic-gate
1360Sstevel@tonic-gatevoid
1370Sstevel@tonic-gateassert_mtx(struct lock_object *lock, int what)
1380Sstevel@tonic-gate{
1390Sstevel@tonic-gate
1400Sstevel@tonic-gate	mtx_assert((struct mtx *)lock, what);
1410Sstevel@tonic-gate}
1420Sstevel@tonic-gate
1430Sstevel@tonic-gatevoid
1440Sstevel@tonic-gatelock_mtx(struct lock_object *lock, int how)
1450Sstevel@tonic-gate{
1460Sstevel@tonic-gate
1470Sstevel@tonic-gate	mtx_lock((struct mtx *)lock);
1480Sstevel@tonic-gate}
1490Sstevel@tonic-gate
1500Sstevel@tonic-gatevoid
1510Sstevel@tonic-gatelock_spin(struct lock_object *lock, int how)
1520Sstevel@tonic-gate{
1530Sstevel@tonic-gate
1540Sstevel@tonic-gate	panic("spin locks can only use msleep_spin");
1550Sstevel@tonic-gate}
1560Sstevel@tonic-gate
1570Sstevel@tonic-gateint
1580Sstevel@tonic-gateunlock_mtx(struct lock_object *lock)
1590Sstevel@tonic-gate{
1600Sstevel@tonic-gate	struct mtx *m;
1610Sstevel@tonic-gate
1620Sstevel@tonic-gate	m = (struct mtx *)lock;
1630Sstevel@tonic-gate	mtx_assert(m, MA_OWNED | MA_NOTRECURSED);
1640Sstevel@tonic-gate	mtx_unlock(m);
1650Sstevel@tonic-gate	return (0);
1660Sstevel@tonic-gate}
1670Sstevel@tonic-gate
1680Sstevel@tonic-gateint
1690Sstevel@tonic-gateunlock_spin(struct lock_object *lock)
1700Sstevel@tonic-gate{
1710Sstevel@tonic-gate
1720Sstevel@tonic-gate	panic("spin locks can only use msleep_spin");
1730Sstevel@tonic-gate}
1740Sstevel@tonic-gate
1750Sstevel@tonic-gate#ifdef KDTRACE_HOOKS
1760Sstevel@tonic-gateint
1770Sstevel@tonic-gateowner_mtx(struct lock_object *lock, struct thread **owner)
1780Sstevel@tonic-gate{
1790Sstevel@tonic-gate	struct mtx *m = (struct mtx *)lock;
1800Sstevel@tonic-gate
1810Sstevel@tonic-gate	*owner = mtx_owner(m);
1820Sstevel@tonic-gate	return (mtx_unowned(m) == 0);
1830Sstevel@tonic-gate}
1840Sstevel@tonic-gate#endif
1850Sstevel@tonic-gate
1860Sstevel@tonic-gate/*
1870Sstevel@tonic-gate * Function versions of the inlined __mtx_* macros.  These are used by
1880Sstevel@tonic-gate * modules and can also be called from assembly language if needed.
1890Sstevel@tonic-gate */
1900Sstevel@tonic-gatevoid
1910Sstevel@tonic-gate_mtx_lock_flags(struct mtx *m, int opts, const char *file, int line)
1920Sstevel@tonic-gate{
1930Sstevel@tonic-gate
1940Sstevel@tonic-gate	MPASS(curthread != NULL);
1950Sstevel@tonic-gate	KASSERT(m->mtx_lock != MTX_DESTROYED,
1960Sstevel@tonic-gate	    ("mtx_lock() of destroyed mutex @ %s:%d", file, line));
1970Sstevel@tonic-gate	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep,
1980Sstevel@tonic-gate	    ("mtx_lock() of spin mutex %s @ %s:%d", m->lock_object.lo_name,
1990Sstevel@tonic-gate	    file, line));
2000Sstevel@tonic-gate	WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE,
2010Sstevel@tonic-gate	    file, line, NULL);
2020Sstevel@tonic-gate
2030Sstevel@tonic-gate	_get_sleep_lock(m, curthread, opts, file, line);
2040Sstevel@tonic-gate	LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file,
2050Sstevel@tonic-gate	    line);
2060Sstevel@tonic-gate	WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
2070Sstevel@tonic-gate	curthread->td_locks++;
2080Sstevel@tonic-gate}
2090Sstevel@tonic-gate
2100Sstevel@tonic-gatevoid
2110Sstevel@tonic-gate_mtx_unlock_flags(struct mtx *m, int opts, const char *file, int line)
2120Sstevel@tonic-gate{
2130Sstevel@tonic-gate	MPASS(curthread != NULL);
214	KASSERT(m->mtx_lock != MTX_DESTROYED,
215	    ("mtx_unlock() of destroyed mutex @ %s:%d", file, line));
216	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep,
217	    ("mtx_unlock() of spin mutex %s @ %s:%d", m->lock_object.lo_name,
218	    file, line));
219	curthread->td_locks--;
220	WITNESS_UNLOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
221	LOCK_LOG_LOCK("UNLOCK", &m->lock_object, opts, m->mtx_recurse, file,
222	    line);
223	mtx_assert(m, MA_OWNED);
224
225	if (m->mtx_recurse == 0)
226		LOCKSTAT_PROFILE_RELEASE_LOCK(LS_MTX_UNLOCK_RELEASE, m);
227	_rel_sleep_lock(m, curthread, opts, file, line);
228}
229
230void
231_mtx_lock_spin_flags(struct mtx *m, int opts, const char *file, int line)
232{
233
234	MPASS(curthread != NULL);
235	KASSERT(m->mtx_lock != MTX_DESTROYED,
236	    ("mtx_lock_spin() of destroyed mutex @ %s:%d", file, line));
237	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
238	    ("mtx_lock_spin() of sleep mutex %s @ %s:%d",
239	    m->lock_object.lo_name, file, line));
240	if (mtx_owned(m))
241		KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0,
242	    ("mtx_lock_spin: recursed on non-recursive mutex %s @ %s:%d\n",
243		    m->lock_object.lo_name, file, line));
244	WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE,
245	    file, line, NULL);
246	_get_spin_lock(m, curthread, opts, file, line);
247	LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file,
248	    line);
249	WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
250}
251
252void
253_mtx_unlock_spin_flags(struct mtx *m, int opts, const char *file, int line)
254{
255
256	MPASS(curthread != NULL);
257	KASSERT(m->mtx_lock != MTX_DESTROYED,
258	    ("mtx_unlock_spin() of destroyed mutex @ %s:%d", file, line));
259	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
260	    ("mtx_unlock_spin() of sleep mutex %s @ %s:%d",
261	    m->lock_object.lo_name, file, line));
262	WITNESS_UNLOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
263	LOCK_LOG_LOCK("UNLOCK", &m->lock_object, opts, m->mtx_recurse, file,
264	    line);
265	mtx_assert(m, MA_OWNED);
266
267	_rel_spin_lock(m);
268}
269
270/*
271 * The important part of mtx_trylock{,_flags}()
272 * Tries to acquire lock `m.'  If this function is called on a mutex that
273 * is already owned, it will recursively acquire the lock.
274 */
275int
276_mtx_trylock(struct mtx *m, int opts, const char *file, int line)
277{
278#ifdef LOCK_PROFILING
279	uint64_t waittime = 0;
280	int contested = 0;
281#endif
282	int rval;
283
284	MPASS(curthread != NULL);
285	KASSERT(m->mtx_lock != MTX_DESTROYED,
286	    ("mtx_trylock() of destroyed mutex @ %s:%d", file, line));
287	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep,
288	    ("mtx_trylock() of spin mutex %s @ %s:%d", m->lock_object.lo_name,
289	    file, line));
290
291	if (mtx_owned(m) && (m->lock_object.lo_flags & LO_RECURSABLE) != 0) {
292		m->mtx_recurse++;
293		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
294		rval = 1;
295	} else
296		rval = _obtain_lock(m, (uintptr_t)curthread);
297
298	LOCK_LOG_TRY("LOCK", &m->lock_object, opts, rval, file, line);
299	if (rval) {
300		WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK,
301		    file, line);
302		curthread->td_locks++;
303		if (m->mtx_recurse == 0)
304			LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_MTX_LOCK_ACQUIRE,
305			    m, contested, waittime, file, line);
306
307	}
308
309	return (rval);
310}
311
312/*
313 * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
314 *
315 * We call this if the lock is either contested (i.e. we need to go to
316 * sleep waiting for it), or if we need to recurse on it.
317 */
318void
319_mtx_lock_sleep(struct mtx *m, uintptr_t tid, int opts, const char *file,
320    int line)
321{
322	struct turnstile *ts;
323	uintptr_t v;
324#ifdef ADAPTIVE_MUTEXES
325	volatile struct thread *owner;
326#endif
327#ifdef KTR
328	int cont_logged = 0;
329#endif
330#ifdef LOCK_PROFILING
331	int contested = 0;
332	uint64_t waittime = 0;
333#endif
334#ifdef KDTRACE_HOOKS
335	uint64_t spin_cnt = 0;
336	uint64_t sleep_cnt = 0;
337	int64_t sleep_time = 0;
338#endif
339
340	if (mtx_owned(m)) {
341		KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0,
342	    ("_mtx_lock_sleep: recursed on non-recursive mutex %s @ %s:%d\n",
343		    m->lock_object.lo_name, file, line));
344		m->mtx_recurse++;
345		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
346		if (LOCK_LOG_TEST(&m->lock_object, opts))
347			CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
348		return;
349	}
350
351	lock_profile_obtain_lock_failed(&m->lock_object,
352		    &contested, &waittime);
353	if (LOCK_LOG_TEST(&m->lock_object, opts))
354		CTR4(KTR_LOCK,
355		    "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d",
356		    m->lock_object.lo_name, (void *)m->mtx_lock, file, line);
357
358	while (!_obtain_lock(m, tid)) {
359#ifdef KDTRACE_HOOKS
360		spin_cnt++;
361#endif
362#ifdef ADAPTIVE_MUTEXES
363		/*
364		 * If the owner is running on another CPU, spin until the
365		 * owner stops running or the state of the lock changes.
366		 */
367		v = m->mtx_lock;
368		if (v != MTX_UNOWNED) {
369			owner = (struct thread *)(v & ~MTX_FLAGMASK);
370			if (TD_IS_RUNNING(owner)) {
371				if (LOCK_LOG_TEST(&m->lock_object, 0))
372					CTR3(KTR_LOCK,
373					    "%s: spinning on %p held by %p",
374					    __func__, m, owner);
375				while (mtx_owner(m) == owner &&
376				    TD_IS_RUNNING(owner)) {
377					cpu_spinwait();
378#ifdef KDTRACE_HOOKS
379					spin_cnt++;
380#endif
381				}
382				continue;
383			}
384		}
385#endif
386
387		ts = turnstile_trywait(&m->lock_object);
388		v = m->mtx_lock;
389
390		/*
391		 * Check if the lock has been released while spinning for
392		 * the turnstile chain lock.
393		 */
394		if (v == MTX_UNOWNED) {
395			turnstile_cancel(ts);
396			cpu_spinwait();
397			continue;
398		}
399
400#ifdef ADAPTIVE_MUTEXES
401		/*
402		 * The current lock owner might have started executing
403		 * on another CPU (or the lock could have changed
404		 * owners) while we were waiting on the turnstile
405		 * chain lock.  If so, drop the turnstile lock and try
406		 * again.
407		 */
408		owner = (struct thread *)(v & ~MTX_FLAGMASK);
409		if (TD_IS_RUNNING(owner)) {
410			turnstile_cancel(ts);
411			cpu_spinwait();
412			continue;
413		}
414#endif
415
416		/*
417		 * If the mutex isn't already contested and a failure occurs
418		 * setting the contested bit, the mutex was either released
419		 * or the state of the MTX_RECURSED bit changed.
420		 */
421		if ((v & MTX_CONTESTED) == 0 &&
422		    !atomic_cmpset_ptr(&m->mtx_lock, v, v | MTX_CONTESTED)) {
423			turnstile_cancel(ts);
424			cpu_spinwait();
425			continue;
426		}
427
428		/*
429		 * We definitely must sleep for this lock.
430		 */
431		mtx_assert(m, MA_NOTOWNED);
432
433#ifdef KTR
434		if (!cont_logged) {
435			CTR6(KTR_CONTENTION,
436			    "contention: %p at %s:%d wants %s, taken by %s:%d",
437			    (void *)tid, file, line, m->lock_object.lo_name,
438			    WITNESS_FILE(&m->lock_object),
439			    WITNESS_LINE(&m->lock_object));
440			cont_logged = 1;
441		}
442#endif
443
444		/*
445		 * Block on the turnstile.
446		 */
447#ifdef KDTRACE_HOOKS
448		sleep_time -= lockstat_nsecs();
449#endif
450		turnstile_wait(ts, mtx_owner(m), TS_EXCLUSIVE_QUEUE);
451#ifdef KDTRACE_HOOKS
452		sleep_time += lockstat_nsecs();
453		sleep_cnt++;
454#endif
455	}
456#ifdef KTR
457	if (cont_logged) {
458		CTR4(KTR_CONTENTION,
459		    "contention end: %s acquired by %p at %s:%d",
460		    m->lock_object.lo_name, (void *)tid, file, line);
461	}
462#endif
463	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_MTX_LOCK_ACQUIRE, m, contested,
464	    waittime, file, line);
465#ifdef KDTRACE_HOOKS
466	if (sleep_time)
467		LOCKSTAT_RECORD1(LS_MTX_LOCK_BLOCK, m, sleep_time);
468
469	/*
470	 * Only record the loops spinning and not sleeping.
471	 */
472	if (spin_cnt > sleep_cnt)
473		LOCKSTAT_RECORD1(LS_MTX_LOCK_SPIN, m, (spin_cnt - sleep_cnt));
474#endif
475}
476
477static void
478_mtx_lock_spin_failed(struct mtx *m)
479{
480	struct thread *td;
481
482	td = mtx_owner(m);
483
484	/* If the mutex is unlocked, try again. */
485	if (td == NULL)
486		return;
487
488	printf( "spin lock %p (%s) held by %p (tid %d) too long\n",
489	    m, m->lock_object.lo_name, td, td->td_tid);
490#ifdef WITNESS
491	witness_display_spinlock(&m->lock_object, td);
492#endif
493	panic("spin lock held too long");
494}
495
496#ifdef SMP
497/*
498 * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock.
499 *
500 * This is only called if we need to actually spin for the lock. Recursion
501 * is handled inline.
502 */
503void
504_mtx_lock_spin(struct mtx *m, uintptr_t tid, int opts, const char *file,
505    int line)
506{
507	int i = 0;
508#ifdef LOCK_PROFILING
509	int contested = 0;
510	uint64_t waittime = 0;
511#endif
512
513	if (LOCK_LOG_TEST(&m->lock_object, opts))
514		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
515
516	lock_profile_obtain_lock_failed(&m->lock_object, &contested, &waittime);
517	while (!_obtain_lock(m, tid)) {
518
519		/* Give interrupts a chance while we spin. */
520		spinlock_exit();
521		while (m->mtx_lock != MTX_UNOWNED) {
522			if (i++ < 10000000) {
523				cpu_spinwait();
524				continue;
525			}
526			if (i < 60000000 || kdb_active || panicstr != NULL)
527				DELAY(1);
528			else
529				_mtx_lock_spin_failed(m);
530			cpu_spinwait();
531		}
532		spinlock_enter();
533	}
534
535	if (LOCK_LOG_TEST(&m->lock_object, opts))
536		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
537
538	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_MTX_SPIN_LOCK_ACQUIRE, m,
539	    contested, waittime, (file), (line));
540	LOCKSTAT_RECORD1(LS_MTX_SPIN_LOCK_SPIN, m, i);
541}
542#endif /* SMP */
543
544void
545_thread_lock_flags(struct thread *td, int opts, const char *file, int line)
546{
547	struct mtx *m;
548	uintptr_t tid;
549	int i;
550#ifdef LOCK_PROFILING
551	int contested = 0;
552	uint64_t waittime = 0;
553#endif
554#ifdef KDTRACE_HOOKS
555	uint64_t spin_cnt = 0;
556#endif
557
558	i = 0;
559	tid = (uintptr_t)curthread;
560	for (;;) {
561retry:
562		spinlock_enter();
563		m = td->td_lock;
564		KASSERT(m->mtx_lock != MTX_DESTROYED,
565		    ("thread_lock() of destroyed mutex @ %s:%d", file, line));
566		KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
567		    ("thread_lock() of sleep mutex %s @ %s:%d",
568		    m->lock_object.lo_name, file, line));
569		if (mtx_owned(m))
570			KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0,
571	    ("thread_lock: recursed on non-recursive mutex %s @ %s:%d\n",
572			    m->lock_object.lo_name, file, line));
573		WITNESS_CHECKORDER(&m->lock_object,
574		    opts | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL);
575		while (!_obtain_lock(m, tid)) {
576#ifdef KDTRACE_HOOKS
577			spin_cnt++;
578#endif
579			if (m->mtx_lock == tid) {
580				m->mtx_recurse++;
581				break;
582			}
583			lock_profile_obtain_lock_failed(&m->lock_object,
584			    &contested, &waittime);
585			/* Give interrupts a chance while we spin. */
586			spinlock_exit();
587			while (m->mtx_lock != MTX_UNOWNED) {
588				if (i++ < 10000000)
589					cpu_spinwait();
590				else if (i < 60000000 ||
591				    kdb_active || panicstr != NULL)
592					DELAY(1);
593				else
594					_mtx_lock_spin_failed(m);
595				cpu_spinwait();
596				if (m != td->td_lock)
597					goto retry;
598			}
599			spinlock_enter();
600		}
601		if (m == td->td_lock)
602			break;
603		_rel_spin_lock(m);	/* does spinlock_exit() */
604#ifdef KDTRACE_HOOKS
605		spin_cnt++;
606#endif
607	}
608	if (m->mtx_recurse == 0)
609		LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_MTX_SPIN_LOCK_ACQUIRE,
610		    m, contested, waittime, (file), (line));
611	LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file,
612	    line);
613	WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
614	LOCKSTAT_RECORD1(LS_THREAD_LOCK_SPIN, m, spin_cnt);
615}
616
617struct mtx *
618thread_lock_block(struct thread *td)
619{
620	struct mtx *lock;
621
622	spinlock_enter();
623	THREAD_LOCK_ASSERT(td, MA_OWNED);
624	lock = td->td_lock;
625	td->td_lock = &blocked_lock;
626	mtx_unlock_spin(lock);
627
628	return (lock);
629}
630
631void
632thread_lock_unblock(struct thread *td, struct mtx *new)
633{
634	mtx_assert(new, MA_OWNED);
635	MPASS(td->td_lock == &blocked_lock);
636	atomic_store_rel_ptr((volatile void *)&td->td_lock, (uintptr_t)new);
637	spinlock_exit();
638}
639
640void
641thread_lock_set(struct thread *td, struct mtx *new)
642{
643	struct mtx *lock;
644
645	mtx_assert(new, MA_OWNED);
646	THREAD_LOCK_ASSERT(td, MA_OWNED);
647	lock = td->td_lock;
648	td->td_lock = new;
649	mtx_unlock_spin(lock);
650}
651
652/*
653 * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
654 *
655 * We are only called here if the lock is recursed or contested (i.e. we
656 * need to wake up a blocked thread).
657 */
658void
659_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
660{
661	struct turnstile *ts;
662
663	if (mtx_recursed(m)) {
664		if (--(m->mtx_recurse) == 0)
665			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
666		if (LOCK_LOG_TEST(&m->lock_object, opts))
667			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m);
668		return;
669	}
670
671	/*
672	 * We have to lock the chain before the turnstile so this turnstile
673	 * can be removed from the hash list if it is empty.
674	 */
675	turnstile_chain_lock(&m->lock_object);
676	ts = turnstile_lookup(&m->lock_object);
677	if (LOCK_LOG_TEST(&m->lock_object, opts))
678		CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
679	MPASS(ts != NULL);
680	turnstile_broadcast(ts, TS_EXCLUSIVE_QUEUE);
681	_release_lock_quick(m);
682
683	/*
684	 * This turnstile is now no longer associated with the mutex.  We can
685	 * unlock the chain lock so a new turnstile may take it's place.
686	 */
687	turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
688	turnstile_chain_unlock(&m->lock_object);
689}
690
691/*
692 * All the unlocking of MTX_SPIN locks is done inline.
693 * See the _rel_spin_lock() macro for the details.
694 */
695
696/*
697 * The backing function for the INVARIANTS-enabled mtx_assert()
698 */
699#ifdef INVARIANT_SUPPORT
700void
701_mtx_assert(struct mtx *m, int what, const char *file, int line)
702{
703
704	if (panicstr != NULL || dumping)
705		return;
706	switch (what) {
707	case MA_OWNED:
708	case MA_OWNED | MA_RECURSED:
709	case MA_OWNED | MA_NOTRECURSED:
710		if (!mtx_owned(m))
711			panic("mutex %s not owned at %s:%d",
712			    m->lock_object.lo_name, file, line);
713		if (mtx_recursed(m)) {
714			if ((what & MA_NOTRECURSED) != 0)
715				panic("mutex %s recursed at %s:%d",
716				    m->lock_object.lo_name, file, line);
717		} else if ((what & MA_RECURSED) != 0) {
718			panic("mutex %s unrecursed at %s:%d",
719			    m->lock_object.lo_name, file, line);
720		}
721		break;
722	case MA_NOTOWNED:
723		if (mtx_owned(m))
724			panic("mutex %s owned at %s:%d",
725			    m->lock_object.lo_name, file, line);
726		break;
727	default:
728		panic("unknown mtx_assert at %s:%d", file, line);
729	}
730}
731#endif
732
733/*
734 * The MUTEX_DEBUG-enabled mtx_validate()
735 *
736 * Most of these checks have been moved off into the LO_INITIALIZED flag
737 * maintained by the witness code.
738 */
739#ifdef MUTEX_DEBUG
740
741void	mtx_validate(struct mtx *);
742
743void
744mtx_validate(struct mtx *m)
745{
746
747/*
748 * XXX: When kernacc() does not require Giant we can reenable this check
749 */
750#ifdef notyet
751	/*
752	 * Can't call kernacc() from early init386(), especially when
753	 * initializing Giant mutex, because some stuff in kernacc()
754	 * requires Giant itself.
755	 */
756	if (!cold)
757		if (!kernacc((caddr_t)m, sizeof(m),
758		    VM_PROT_READ | VM_PROT_WRITE))
759			panic("Can't read and write to mutex %p", m);
760#endif
761}
762#endif
763
764/*
765 * General init routine used by the MTX_SYSINIT() macro.
766 */
767void
768mtx_sysinit(void *arg)
769{
770	struct mtx_args *margs = arg;
771
772	mtx_init(margs->ma_mtx, margs->ma_desc, NULL, margs->ma_opts);
773}
774
775/*
776 * Mutex initialization routine; initialize lock `m' of type contained in
777 * `opts' with options contained in `opts' and name `name.'  The optional
778 * lock type `type' is used as a general lock category name for use with
779 * witness.
780 */
781void
782mtx_init(struct mtx *m, const char *name, const char *type, int opts)
783{
784	struct lock_class *class;
785	int flags;
786
787	MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE |
788		MTX_NOWITNESS | MTX_DUPOK | MTX_NOPROFILE)) == 0);
789
790#ifdef MUTEX_DEBUG
791	/* Diagnostic and error correction */
792	mtx_validate(m);
793#endif
794
795	/* Determine lock class and lock flags. */
796	if (opts & MTX_SPIN)
797		class = &lock_class_mtx_spin;
798	else
799		class = &lock_class_mtx_sleep;
800	flags = 0;
801	if (opts & MTX_QUIET)
802		flags |= LO_QUIET;
803	if (opts & MTX_RECURSE)
804		flags |= LO_RECURSABLE;
805	if ((opts & MTX_NOWITNESS) == 0)
806		flags |= LO_WITNESS;
807	if (opts & MTX_DUPOK)
808		flags |= LO_DUPOK;
809	if (opts & MTX_NOPROFILE)
810		flags |= LO_NOPROFILE;
811
812	/* Initialize mutex. */
813	m->mtx_lock = MTX_UNOWNED;
814	m->mtx_recurse = 0;
815
816	lock_init(&m->lock_object, class, name, type, flags);
817}
818
819/*
820 * Remove lock `m' from all_mtx queue.  We don't allow MTX_QUIET to be
821 * passed in as a flag here because if the corresponding mtx_init() was
822 * called with MTX_QUIET set, then it will already be set in the mutex's
823 * flags.
824 */
825void
826mtx_destroy(struct mtx *m)
827{
828
829	if (!mtx_owned(m))
830		MPASS(mtx_unowned(m));
831	else {
832		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
833
834		/* Perform the non-mtx related part of mtx_unlock_spin(). */
835		if (LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin)
836			spinlock_exit();
837		else
838			curthread->td_locks--;
839
840		lock_profile_release_lock(&m->lock_object);
841		/* Tell witness this isn't locked to make it happy. */
842		WITNESS_UNLOCK(&m->lock_object, LOP_EXCLUSIVE, __FILE__,
843		    __LINE__);
844	}
845
846	m->mtx_lock = MTX_DESTROYED;
847	lock_destroy(&m->lock_object);
848}
849
850/*
851 * Intialize the mutex code and system mutexes.  This is called from the MD
852 * startup code prior to mi_startup().  The per-CPU data space needs to be
853 * setup before this is called.
854 */
855void
856mutex_init(void)
857{
858
859	/* Setup turnstiles so that sleep mutexes work. */
860	init_turnstiles();
861
862	/*
863	 * Initialize mutexes.
864	 */
865	mtx_init(&Giant, "Giant", NULL, MTX_DEF | MTX_RECURSE);
866	mtx_init(&blocked_lock, "blocked lock", NULL, MTX_SPIN);
867	blocked_lock.mtx_lock = 0xdeadc0de;	/* Always blocked. */
868	mtx_init(&proc0.p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK);
869	mtx_init(&proc0.p_slock, "process slock", NULL, MTX_SPIN | MTX_RECURSE);
870	mtx_init(&devmtx, "cdev", NULL, MTX_DEF);
871	mtx_lock(&Giant);
872}
873
874#ifdef DDB
875void
876db_show_mtx(struct lock_object *lock)
877{
878	struct thread *td;
879	struct mtx *m;
880
881	m = (struct mtx *)lock;
882
883	db_printf(" flags: {");
884	if (LOCK_CLASS(lock) == &lock_class_mtx_spin)
885		db_printf("SPIN");
886	else
887		db_printf("DEF");
888	if (m->lock_object.lo_flags & LO_RECURSABLE)
889		db_printf(", RECURSE");
890	if (m->lock_object.lo_flags & LO_DUPOK)
891		db_printf(", DUPOK");
892	db_printf("}\n");
893	db_printf(" state: {");
894	if (mtx_unowned(m))
895		db_printf("UNOWNED");
896	else if (mtx_destroyed(m))
897		db_printf("DESTROYED");
898	else {
899		db_printf("OWNED");
900		if (m->mtx_lock & MTX_CONTESTED)
901			db_printf(", CONTESTED");
902		if (m->mtx_lock & MTX_RECURSED)
903			db_printf(", RECURSED");
904	}
905	db_printf("}\n");
906	if (!mtx_unowned(m) && !mtx_destroyed(m)) {
907		td = mtx_owner(m);
908		db_printf(" owner: %p (tid %d, pid %d, \"%s\")\n", td,
909		    td->td_tid, td->td_proc->p_pid, td->td_name);
910		if (mtx_recursed(m))
911			db_printf(" recursed: %d\n", m->mtx_recurse);
912	}
913}
914#endif
915