subr_turnstile.c revision 90418
165557Sjasone/*-
265557Sjasone * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
365557Sjasone *
465557Sjasone * Redistribution and use in source and binary forms, with or without
565557Sjasone * modification, are permitted provided that the following conditions
665557Sjasone * are met:
765557Sjasone * 1. Redistributions of source code must retain the above copyright
865557Sjasone *    notice, this list of conditions and the following disclaimer.
965557Sjasone * 2. Redistributions in binary form must reproduce the above copyright
1065557Sjasone *    notice, this list of conditions and the following disclaimer in the
1165557Sjasone *    documentation and/or other materials provided with the distribution.
1265557Sjasone * 3. Berkeley Software Design Inc's name may not be used to endorse or
1365557Sjasone *    promote products derived from this software without specific prior
1465557Sjasone *    written permission.
1565557Sjasone *
1665557Sjasone * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
1765557Sjasone * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1865557Sjasone * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1965557Sjasone * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
2065557Sjasone * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2165557Sjasone * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2265557Sjasone * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2365557Sjasone * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2465557Sjasone * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2565557Sjasone * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2665557Sjasone * SUCH DAMAGE.
2765557Sjasone *
2865557Sjasone *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
2967352Sjhb *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
3065557Sjasone * $FreeBSD: head/sys/kern/subr_turnstile.c 90418 2002-02-09 00:12:53Z jhb $
3165557Sjasone */
3265557Sjasone
3365557Sjasone/*
3486411Sjhb * Machine independent bits of mutex implementation.
3572200Sbmilekic */
3672200Sbmilekic
3768790Sjhb#include "opt_ddb.h"
3867676Sjhb
3965557Sjasone#include <sys/param.h>
4067352Sjhb#include <sys/bus.h>
4167352Sjhb#include <sys/kernel.h>
4276166Smarkm#include <sys/lock.h>
4367352Sjhb#include <sys/malloc.h>
4474912Sjhb#include <sys/mutex.h>
4565557Sjasone#include <sys/proc.h>
4678766Sjhb#include <sys/resourcevar.h>
4767676Sjhb#include <sys/sysctl.h>
4865557Sjasone#include <sys/systm.h>
4967352Sjhb#include <sys/vmmeter.h>
5065557Sjasone#include <sys/ktr.h>
5165557Sjasone
5267352Sjhb#include <machine/atomic.h>
5367352Sjhb#include <machine/bus.h>
5467352Sjhb#include <machine/clock.h>
5565557Sjasone#include <machine/cpu.h>
5667352Sjhb
5768790Sjhb#include <ddb/ddb.h>
5868790Sjhb
5967352Sjhb#include <vm/vm.h>
6067352Sjhb#include <vm/vm_extern.h>
6167352Sjhb
6265557Sjasone/*
6372200Sbmilekic * Internal utility macros.
6471352Sjasone */
6572200Sbmilekic#define mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
6671352Sjasone
6772200Sbmilekic#define mtx_owner(m)	(mtx_unowned((m)) ? NULL \
6883366Sjulian	: (struct thread *)((m)->mtx_lock & MTX_FLAGMASK))
6971352Sjasone
7083366Sjulian#define SET_PRIO(td, pri)	(td)->td_ksegrp->kg_pri.pri_level = (pri)
7171352Sjasone
7271352Sjasone/*
7374912Sjhb * Lock classes for sleep and spin mutexes.
7471352Sjasone */
7574912Sjhbstruct lock_class lock_class_mtx_sleep = {
7674912Sjhb	"sleep mutex",
7774912Sjhb	LC_SLEEPLOCK | LC_RECURSABLE
7874912Sjhb};
7974912Sjhbstruct lock_class lock_class_mtx_spin = {
8074912Sjhb	"spin mutex",
8174912Sjhb	LC_SPINLOCK | LC_RECURSABLE
8274912Sjhb};
8371352Sjasone
8471352Sjasone/*
8572200Sbmilekic * Prototypes for non-exported routines.
8672200Sbmilekic */
8783366Sjulianstatic void	propagate_priority(struct thread *);
8867352Sjhb
8967352Sjhbstatic void
9083366Sjulianpropagate_priority(struct thread *td)
9167352Sjhb{
9283366Sjulian	struct ksegrp *kg = td->td_ksegrp;
9383366Sjulian	int pri = kg->kg_pri.pri_level;
9483366Sjulian	struct mtx *m = td->td_blocked;
9567352Sjhb
9669376Sjhb	mtx_assert(&sched_lock, MA_OWNED);
9767352Sjhb	for (;;) {
9883366Sjulian		struct thread *td1;
9967352Sjhb
10083366Sjulian		td = mtx_owner(m);
10167352Sjhb
10283366Sjulian		if (td == NULL) {
10367352Sjhb			/*
10467352Sjhb			 * This really isn't quite right. Really
10583366Sjulian			 * ought to bump priority of thread that
10667352Sjhb			 * next acquires the mutex.
10767352Sjhb			 */
10867352Sjhb			MPASS(m->mtx_lock == MTX_CONTESTED);
10967352Sjhb			return;
11067352Sjhb		}
11183679Sjhb		kg = td->td_ksegrp;
11272200Sbmilekic
11383366Sjulian		MPASS(td->td_proc->p_magic == P_MAGIC);
11483366Sjulian		KASSERT(td->td_proc->p_stat != SSLEEP, ("sleeping thread owns a mutex"));
11583366Sjulian		if (kg->kg_pri.pri_level <= pri) /* lower is higher priority */
11667352Sjhb			return;
11769376Sjhb
11867352Sjhb		/*
11983366Sjulian		 * Bump this thread's priority.
12069376Sjhb		 */
12183366Sjulian		SET_PRIO(td, pri);
12269376Sjhb
12369376Sjhb		/*
12467352Sjhb		 * If lock holder is actually running, just bump priority.
12567352Sjhb		 */
12683366Sjulian		 /* XXXKSE this test is not sufficient */
12783366Sjulian		if (td->td_kse && (td->td_kse->ke_oncpu != NOCPU)) {
12883366Sjulian			MPASS(td->td_proc->p_stat == SRUN
12983366Sjulian			|| td->td_proc->p_stat == SZOMB
13083366Sjulian			|| td->td_proc->p_stat == SSTOP);
13167352Sjhb			return;
13267352Sjhb		}
13372376Sjake
13473912Sjhb#ifndef SMP
13567352Sjhb		/*
13683366Sjulian		 * For UP, we check to see if td is curthread (this shouldn't
13773912Sjhb		 * ever happen however as it would mean we are in a deadlock.)
13873912Sjhb		 */
13983366Sjulian		KASSERT(td != curthread, ("Deadlock detected"));
14073912Sjhb#endif
14173912Sjhb
14273912Sjhb		/*
14383366Sjulian		 * If on run queue move to new run queue, and quit.
14483366Sjulian		 * XXXKSE this gets a lot more complicated under threads
14583366Sjulian		 * but try anyhow.
14667352Sjhb		 */
14783366Sjulian		if (td->td_proc->p_stat == SRUN) {
14883366Sjulian			MPASS(td->td_blocked == NULL);
14983366Sjulian			remrunqueue(td);
15083366Sjulian			setrunqueue(td);
15167352Sjhb			return;
15267352Sjhb		}
15367352Sjhb
15467352Sjhb		/*
15569376Sjhb		 * If we aren't blocked on a mutex, we should be.
15667352Sjhb		 */
15783366Sjulian		KASSERT(td->td_proc->p_stat == SMTX, (
15869376Sjhb		    "process %d(%s):%d holds %s but isn't blocked on a mutex\n",
15983366Sjulian		    td->td_proc->p_pid, td->td_proc->p_comm, td->td_proc->p_stat,
16074912Sjhb		    m->mtx_object.lo_name));
16167352Sjhb
16267352Sjhb		/*
16383366Sjulian		 * Pick up the mutex that td is blocked on.
16467352Sjhb		 */
16583366Sjulian		m = td->td_blocked;
16667352Sjhb		MPASS(m != NULL);
16767352Sjhb
16867352Sjhb		/*
16983366Sjulian		 * Check if the thread needs to be moved up on
17067352Sjhb		 * the blocked chain
17167352Sjhb		 */
17283366Sjulian		if (td == TAILQ_FIRST(&m->mtx_blocked)) {
17369376Sjhb			continue;
17469376Sjhb		}
17572200Sbmilekic
17683366Sjulian		td1 = TAILQ_PREV(td, threadqueue, td_blkq);
17783366Sjulian		if (td1->td_ksegrp->kg_pri.pri_level <= pri) {
17867352Sjhb			continue;
17967352Sjhb		}
18067352Sjhb
18167352Sjhb		/*
18283366Sjulian		 * Remove thread from blocked chain and determine where
18383366Sjulian		 * it should be moved up to.  Since we know that td1 has
18483366Sjulian		 * a lower priority than td, we know that at least one
18583366Sjulian		 * thread in the chain has a lower priority and that
18683366Sjulian		 * td1 will thus not be NULL after the loop.
18767352Sjhb		 */
18883366Sjulian		TAILQ_REMOVE(&m->mtx_blocked, td, td_blkq);
18983366Sjulian		TAILQ_FOREACH(td1, &m->mtx_blocked, td_blkq) {
19083366Sjulian			MPASS(td1->td_proc->p_magic == P_MAGIC);
19183366Sjulian			if (td1->td_ksegrp->kg_pri.pri_level > pri)
19267352Sjhb				break;
19367352Sjhb		}
19472200Sbmilekic
19583366Sjulian		MPASS(td1 != NULL);
19683366Sjulian		TAILQ_INSERT_BEFORE(td1, td, td_blkq);
19767352Sjhb		CTR4(KTR_LOCK,
19871560Sjhb		    "propagate_priority: p %p moved before %p on [%p] %s",
19983366Sjulian		    td, td1, m, m->mtx_object.lo_name);
20067352Sjhb	}
20167352Sjhb}
20267352Sjhb
20371352Sjasone/*
20474900Sjhb * Function versions of the inlined __mtx_* macros.  These are used by
20574900Sjhb * modules and can also be called from assembly language if needed.
20674900Sjhb */
20774900Sjhbvoid
20874900Sjhb_mtx_lock_flags(struct mtx *m, int opts, const char *file, int line)
20974900Sjhb{
21074900Sjhb
21183841Sjhb	MPASS(curthread != NULL);
21283841Sjhb	_get_sleep_lock(m, curthread, opts, file, line);
21383841Sjhb	LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file,
21483841Sjhb	    line);
21583841Sjhb	WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
21674900Sjhb}
21774900Sjhb
21874900Sjhbvoid
21974900Sjhb_mtx_unlock_flags(struct mtx *m, int opts, const char *file, int line)
22074900Sjhb{
22174900Sjhb
22283841Sjhb	MPASS(curthread != NULL);
22383947Sjhb	mtx_assert(m, MA_OWNED);
22483841Sjhb 	WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
22583841Sjhb	LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file,
22683841Sjhb	    line);
22783841Sjhb	_rel_sleep_lock(m, curthread, opts, file, line);
22874900Sjhb}
22974900Sjhb
23074900Sjhbvoid
23174900Sjhb_mtx_lock_spin_flags(struct mtx *m, int opts, const char *file, int line)
23274900Sjhb{
23374900Sjhb
23483841Sjhb	MPASS(curthread != NULL);
23583841Sjhb	_get_spin_lock(m, curthread, opts, file, line);
23683841Sjhb	LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file,
23783841Sjhb	    line);
23883841Sjhb	WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
23974900Sjhb}
24074900Sjhb
24174900Sjhbvoid
24274900Sjhb_mtx_unlock_spin_flags(struct mtx *m, int opts, const char *file, int line)
24374900Sjhb{
24474900Sjhb
24583841Sjhb	MPASS(curthread != NULL);
24683947Sjhb	mtx_assert(m, MA_OWNED);
24783841Sjhb 	WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
24883841Sjhb	LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file,
24983841Sjhb	    line);
25083841Sjhb	_rel_spin_lock(m);
25174900Sjhb}
25274900Sjhb
25374900Sjhb/*
25472200Sbmilekic * The important part of mtx_trylock{,_flags}()
25572200Sbmilekic * Tries to acquire lock `m.' We do NOT handle recursion here; we assume that
25672200Sbmilekic * if we're called, it's because we know we don't already own this lock.
25771352Sjasone */
25872200Sbmilekicint
25972200Sbmilekic_mtx_trylock(struct mtx *m, int opts, const char *file, int line)
26071352Sjasone{
26172200Sbmilekic	int rval;
26271352Sjasone
26383366Sjulian	MPASS(curthread != NULL);
26471352Sjasone
26583366Sjulian	rval = _obtain_lock(m, curthread);
26672200Sbmilekic
26774912Sjhb	LOCK_LOG_TRY("LOCK", &m->mtx_object, opts, rval, file, line);
26874912Sjhb	if (rval) {
26971352Sjasone		/*
27072200Sbmilekic		 * We do not handle recursion in _mtx_trylock; see the
27172200Sbmilekic		 * note at the top of the routine.
27271352Sjasone		 */
27372344Sbmilekic		KASSERT(!mtx_recursed(m),
27472344Sbmilekic		    ("mtx_trylock() called on a recursed mutex"));
27576272Sjhb		WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK,
27676272Sjhb		    file, line);
27771352Sjasone	}
27871352Sjasone
27974912Sjhb	return (rval);
28071352Sjasone}
28171352Sjasone
28271352Sjasone/*
28372200Sbmilekic * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
28471352Sjasone *
28572200Sbmilekic * We call this if the lock is either contested (i.e. we need to go to
28672200Sbmilekic * sleep waiting for it), or if we need to recurse on it.
28771352Sjasone */
28872200Sbmilekicvoid
28972200Sbmilekic_mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line)
29071352Sjasone{
29183366Sjulian	struct thread *td = curthread;
29283366Sjulian	struct ksegrp *kg = td->td_ksegrp;
29371352Sjasone
29483366Sjulian	if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)td) {
29572200Sbmilekic		m->mtx_recurse++;
29672200Sbmilekic		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
29774912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
29872344Sbmilekic			CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
29972200Sbmilekic		return;
30071352Sjasone	}
30171352Sjasone
30274912Sjhb	if (LOCK_LOG_TEST(&m->mtx_object, opts))
30372994Sjhb		CTR4(KTR_LOCK,
30472994Sjhb		    "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d",
30574912Sjhb		    m->mtx_object.lo_name, (void *)m->mtx_lock, file, line);
30671352Sjasone
30783366Sjulian	while (!_obtain_lock(m, td)) {
30872200Sbmilekic		uintptr_t v;
30983366Sjulian		struct thread *td1;
31071352Sjasone
31172200Sbmilekic		mtx_lock_spin(&sched_lock);
31272200Sbmilekic		/*
31372200Sbmilekic		 * Check if the lock has been released while spinning for
31472200Sbmilekic		 * the sched_lock.
31572200Sbmilekic		 */
31672200Sbmilekic		if ((v = m->mtx_lock) == MTX_UNOWNED) {
31772200Sbmilekic			mtx_unlock_spin(&sched_lock);
31872200Sbmilekic			continue;
31971352Sjasone		}
32071352Sjasone
32172200Sbmilekic		/*
32272200Sbmilekic		 * The mutex was marked contested on release. This means that
32383366Sjulian		 * there are threads blocked on it.
32472200Sbmilekic		 */
32572200Sbmilekic		if (v == MTX_CONTESTED) {
32683366Sjulian			td1 = TAILQ_FIRST(&m->mtx_blocked);
32783366Sjulian			MPASS(td1 != NULL);
32883366Sjulian			m->mtx_lock = (uintptr_t)td | MTX_CONTESTED;
32967352Sjhb
33083366Sjulian			if (td1->td_ksegrp->kg_pri.pri_level < kg->kg_pri.pri_level)
33183366Sjulian				SET_PRIO(td, td1->td_ksegrp->kg_pri.pri_level);
33272200Sbmilekic			mtx_unlock_spin(&sched_lock);
33367352Sjhb			return;
33467352Sjhb		}
33569376Sjhb
33669376Sjhb		/*
33772200Sbmilekic		 * If the mutex isn't already contested and a failure occurs
33872200Sbmilekic		 * setting the contested bit, the mutex was either released
33972200Sbmilekic		 * or the state of the MTX_RECURSED bit changed.
34069376Sjhb		 */
34172200Sbmilekic		if ((v & MTX_CONTESTED) == 0 &&
34272200Sbmilekic		    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
34372200Sbmilekic			(void *)(v | MTX_CONTESTED))) {
34472200Sbmilekic			mtx_unlock_spin(&sched_lock);
34572200Sbmilekic			continue;
34672200Sbmilekic		}
34767352Sjhb
34872200Sbmilekic		/*
34972200Sbmilekic		 * We deffinately must sleep for this lock.
35072200Sbmilekic		 */
35172200Sbmilekic		mtx_assert(m, MA_NOTOWNED);
35267352Sjhb
35367352Sjhb#ifdef notyet
35472200Sbmilekic		/*
35572200Sbmilekic		 * If we're borrowing an interrupted thread's VM context, we
35672200Sbmilekic		 * must clean up before going to sleep.
35772200Sbmilekic		 */
35883366Sjulian		if (td->td_ithd != NULL) {
35983366Sjulian			struct ithd *it = td->td_ithd;
36067352Sjhb
36172200Sbmilekic			if (it->it_interrupted) {
36274912Sjhb				if (LOCK_LOG_TEST(&m->mtx_object, opts))
36372200Sbmilekic					CTR2(KTR_LOCK,
36472994Sjhb				    "_mtx_lock_sleep: %p interrupted %p",
36572200Sbmilekic					    it, it->it_interrupted);
36672200Sbmilekic				intr_thd_fixup(it);
36767352Sjhb			}
36872200Sbmilekic		}
36967352Sjhb#endif
37067352Sjhb
37172200Sbmilekic		/*
37272200Sbmilekic		 * Put us on the list of threads blocked on this mutex.
37372200Sbmilekic		 */
37472200Sbmilekic		if (TAILQ_EMPTY(&m->mtx_blocked)) {
37590418Sjhb			td1 = mtx_owner(m);
37683366Sjulian			LIST_INSERT_HEAD(&td1->td_contested, m, mtx_contested);
37783366Sjulian			TAILQ_INSERT_TAIL(&m->mtx_blocked, td, td_blkq);
37872200Sbmilekic		} else {
37983366Sjulian			TAILQ_FOREACH(td1, &m->mtx_blocked, td_blkq)
38083366Sjulian				if (td1->td_ksegrp->kg_pri.pri_level > kg->kg_pri.pri_level)
38172200Sbmilekic					break;
38283366Sjulian			if (td1)
38383366Sjulian				TAILQ_INSERT_BEFORE(td1, td, td_blkq);
38472200Sbmilekic			else
38583366Sjulian				TAILQ_INSERT_TAIL(&m->mtx_blocked, td, td_blkq);
38672200Sbmilekic		}
38767352Sjhb
38872200Sbmilekic		/*
38972200Sbmilekic		 * Save who we're blocked on.
39072200Sbmilekic		 */
39183366Sjulian		td->td_blocked = m;
39283366Sjulian		td->td_mtxname = m->mtx_object.lo_name;
39383366Sjulian		td->td_proc->p_stat = SMTX;
39483366Sjulian		propagate_priority(td);
39567352Sjhb
39674912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
39772200Sbmilekic			CTR3(KTR_LOCK,
39883366Sjulian			    "_mtx_lock_sleep: p %p blocked on [%p] %s", td, m,
39974912Sjhb			    m->mtx_object.lo_name);
40072200Sbmilekic
40183366Sjulian		td->td_proc->p_stats->p_ru.ru_nvcsw++;
40272200Sbmilekic		mi_switch();
40372200Sbmilekic
40474912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
40572200Sbmilekic			CTR3(KTR_LOCK,
40672200Sbmilekic			  "_mtx_lock_sleep: p %p free from blocked on [%p] %s",
40783366Sjulian			  td, m, m->mtx_object.lo_name);
40872200Sbmilekic
40972200Sbmilekic		mtx_unlock_spin(&sched_lock);
41072200Sbmilekic	}
41172200Sbmilekic
41272200Sbmilekic	return;
41372200Sbmilekic}
41472200Sbmilekic
41572200Sbmilekic/*
41672200Sbmilekic * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock.
41772200Sbmilekic *
41872200Sbmilekic * This is only called if we need to actually spin for the lock. Recursion
41972200Sbmilekic * is handled inline.
42072200Sbmilekic */
42172200Sbmilekicvoid
42288088Sjhb_mtx_lock_spin(struct mtx *m, int opts, const char *file, int line)
42372200Sbmilekic{
42472200Sbmilekic	int i = 0;
42572200Sbmilekic
42674912Sjhb	if (LOCK_LOG_TEST(&m->mtx_object, opts))
42772344Sbmilekic		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
42872200Sbmilekic
42972200Sbmilekic	for (;;) {
43083366Sjulian		if (_obtain_lock(m, curthread))
43172200Sbmilekic			break;
43272200Sbmilekic
43375568Sjhb		/* Give interrupts a chance while we spin. */
43488088Sjhb		critical_exit();
43572200Sbmilekic		while (m->mtx_lock != MTX_UNOWNED) {
43689392Sjhb			if (i++ < 10000000)
43772200Sbmilekic				continue;
43889392Sjhb			if (i++ < 60000000)
43972200Sbmilekic				DELAY(1);
44067352Sjhb#ifdef DDB
44172200Sbmilekic			else if (!db_active)
44267352Sjhb#else
44372200Sbmilekic			else
44467352Sjhb#endif
44572200Sbmilekic			panic("spin lock %s held by %p for > 5 seconds",
44674912Sjhb			    m->mtx_object.lo_name, (void *)m->mtx_lock);
44767352Sjhb		}
44888088Sjhb		critical_enter();
44967352Sjhb	}
45072200Sbmilekic
45174912Sjhb	if (LOCK_LOG_TEST(&m->mtx_object, opts))
45272200Sbmilekic		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
45372200Sbmilekic
45472200Sbmilekic	return;
45567352Sjhb}
45667352Sjhb
45772200Sbmilekic/*
45872200Sbmilekic * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
45972200Sbmilekic *
46072200Sbmilekic * We are only called here if the lock is recursed or contested (i.e. we
46172200Sbmilekic * need to wake up a blocked thread).
46272200Sbmilekic */
46367352Sjhbvoid
46472200Sbmilekic_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
46567352Sjhb{
46683366Sjulian	struct thread *td, *td1;
46767352Sjhb	struct mtx *m1;
46867352Sjhb	int pri;
46983366Sjulian	struct ksegrp *kg;
47067352Sjhb
47183366Sjulian	td = curthread;
47283366Sjulian	kg = td->td_ksegrp;
47372200Sbmilekic
47472200Sbmilekic	if (mtx_recursed(m)) {
47572200Sbmilekic		if (--(m->mtx_recurse) == 0)
47672200Sbmilekic			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
47774912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
47872200Sbmilekic			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m);
47972200Sbmilekic		return;
48072200Sbmilekic	}
48172200Sbmilekic
48272200Sbmilekic	mtx_lock_spin(&sched_lock);
48374912Sjhb	if (LOCK_LOG_TEST(&m->mtx_object, opts))
48472200Sbmilekic		CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
48572200Sbmilekic
48683366Sjulian	td1 = TAILQ_FIRST(&m->mtx_blocked);
48783366Sjulian	MPASS(td->td_proc->p_magic == P_MAGIC);
48883366Sjulian	MPASS(td1->td_proc->p_magic == P_MAGIC);
48972200Sbmilekic
49083366Sjulian	TAILQ_REMOVE(&m->mtx_blocked, td1, td_blkq);
49172200Sbmilekic
49272200Sbmilekic	if (TAILQ_EMPTY(&m->mtx_blocked)) {
49372200Sbmilekic		LIST_REMOVE(m, mtx_contested);
49472200Sbmilekic		_release_lock_quick(m);
49574912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
49672200Sbmilekic			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p not held", m);
49772200Sbmilekic	} else
49872200Sbmilekic		atomic_store_rel_ptr(&m->mtx_lock, (void *)MTX_CONTESTED);
49972200Sbmilekic
50072376Sjake	pri = PRI_MAX;
50183366Sjulian	LIST_FOREACH(m1, &td->td_contested, mtx_contested) {
50283366Sjulian		int cp = TAILQ_FIRST(&m1->mtx_blocked)->td_ksegrp->kg_pri.pri_level;
50372200Sbmilekic		if (cp < pri)
50472200Sbmilekic			pri = cp;
50572200Sbmilekic	}
50672200Sbmilekic
50783366Sjulian	if (pri > kg->kg_pri.pri_native)
50883366Sjulian		pri = kg->kg_pri.pri_native;
50983366Sjulian	SET_PRIO(td, pri);
51072200Sbmilekic
51174912Sjhb	if (LOCK_LOG_TEST(&m->mtx_object, opts))
51272200Sbmilekic		CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p contested setrunqueue %p",
51383366Sjulian		    m, td1);
51472200Sbmilekic
51583366Sjulian	td1->td_blocked = NULL;
51683366Sjulian	td1->td_proc->p_stat = SRUN;
51783366Sjulian	setrunqueue(td1);
51872200Sbmilekic
51988900Sjhb	if (td->td_critnest == 1 && td1->td_ksegrp->kg_pri.pri_level < pri) {
52067352Sjhb#ifdef notyet
52183366Sjulian		if (td->td_ithd != NULL) {
52283366Sjulian			struct ithd *it = td->td_ithd;
52367352Sjhb
52472200Sbmilekic			if (it->it_interrupted) {
52574912Sjhb				if (LOCK_LOG_TEST(&m->mtx_object, opts))
52672200Sbmilekic					CTR2(KTR_LOCK,
52772994Sjhb				    "_mtx_unlock_sleep: %p interrupted %p",
52872200Sbmilekic					    it, it->it_interrupted);
52972200Sbmilekic				intr_thd_fixup(it);
53067352Sjhb			}
53172200Sbmilekic		}
53267352Sjhb#endif
53383366Sjulian		setrunqueue(td);
53474912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
53572200Sbmilekic			CTR2(KTR_LOCK,
53672200Sbmilekic			    "_mtx_unlock_sleep: %p switching out lock=%p", m,
53772200Sbmilekic			    (void *)m->mtx_lock);
53872200Sbmilekic
53983366Sjulian		td->td_proc->p_stats->p_ru.ru_nivcsw++;
54072200Sbmilekic		mi_switch();
54174912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
54272200Sbmilekic			CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p",
54372200Sbmilekic			    m, (void *)m->mtx_lock);
54467352Sjhb	}
54572200Sbmilekic
54672200Sbmilekic	mtx_unlock_spin(&sched_lock);
54772200Sbmilekic
54872200Sbmilekic	return;
54967352Sjhb}
55067352Sjhb
55172200Sbmilekic/*
55272200Sbmilekic * All the unlocking of MTX_SPIN locks is done inline.
55372200Sbmilekic * See the _rel_spin_lock() macro for the details.
55472200Sbmilekic */
55572200Sbmilekic
55672200Sbmilekic/*
55772994Sjhb * The backing function for the INVARIANTS-enabled mtx_assert()
55872200Sbmilekic */
55972996Sjhb#ifdef INVARIANT_SUPPORT
56071352Sjasonevoid
56171360Sjasone_mtx_assert(struct mtx *m, int what, const char *file, int line)
56271352Sjasone{
56380748Sjhb
56480748Sjhb	if (panicstr != NULL)
56580748Sjhb		return;
56673033Sjake	switch (what) {
56771352Sjasone	case MA_OWNED:
56871352Sjasone	case MA_OWNED | MA_RECURSED:
56971352Sjasone	case MA_OWNED | MA_NOTRECURSED:
57073033Sjake		if (!mtx_owned(m))
57171352Sjasone			panic("mutex %s not owned at %s:%d",
57274912Sjhb			    m->mtx_object.lo_name, file, line);
57373033Sjake		if (mtx_recursed(m)) {
57473033Sjake			if ((what & MA_NOTRECURSED) != 0)
57571352Sjasone				panic("mutex %s recursed at %s:%d",
57674912Sjhb				    m->mtx_object.lo_name, file, line);
57773033Sjake		} else if ((what & MA_RECURSED) != 0) {
57871352Sjasone			panic("mutex %s unrecursed at %s:%d",
57974912Sjhb			    m->mtx_object.lo_name, file, line);
58071352Sjasone		}
58171352Sjasone		break;
58271352Sjasone	case MA_NOTOWNED:
58373033Sjake		if (mtx_owned(m))
58471352Sjasone			panic("mutex %s owned at %s:%d",
58574912Sjhb			    m->mtx_object.lo_name, file, line);
58671352Sjasone		break;
58771352Sjasone	default:
58871360Sjasone		panic("unknown mtx_assert at %s:%d", file, line);
58971352Sjasone	}
59071352Sjasone}
59171352Sjasone#endif
59271352Sjasone
59372200Sbmilekic/*
59472200Sbmilekic * The MUTEX_DEBUG-enabled mtx_validate()
59574912Sjhb *
59674912Sjhb * Most of these checks have been moved off into the LO_INITIALIZED flag
59774912Sjhb * maintained by the witness code.
59872200Sbmilekic */
59967352Sjhb#ifdef MUTEX_DEBUG
60067352Sjhb
60174912Sjhbvoid	mtx_validate __P((struct mtx *));
60267352Sjhb
60374912Sjhbvoid
60474912Sjhbmtx_validate(struct mtx *m)
60567352Sjhb{
60667352Sjhb
60767352Sjhb/*
60867352Sjhb * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
60967352Sjhb * we can re-enable the kernacc() checks.
61067352Sjhb */
61167352Sjhb#ifndef __alpha__
61282304Sbmilekic	/*
61382304Sbmilekic	 * Can't call kernacc() from early init386(), especially when
61482304Sbmilekic	 * initializing Giant mutex, because some stuff in kernacc()
61582304Sbmilekic	 * requires Giant itself.
61682304Sbmilekic	 */
61782302Sbmilekic	if (!cold)
61882302Sbmilekic		if (!kernacc((caddr_t)m, sizeof(m),
61982302Sbmilekic		    VM_PROT_READ | VM_PROT_WRITE))
62082302Sbmilekic			panic("Can't read and write to mutex %p", m);
62167352Sjhb#endif
62267352Sjhb}
62367352Sjhb#endif
62467352Sjhb
62572200Sbmilekic/*
62672200Sbmilekic * Mutex initialization routine; initialize lock `m' of type contained in
62772200Sbmilekic * `opts' with options contained in `opts' and description `description.'
62872200Sbmilekic */
62967352Sjhbvoid
63072200Sbmilekicmtx_init(struct mtx *m, const char *description, int opts)
63167352Sjhb{
63274912Sjhb	struct lock_object *lock;
63372200Sbmilekic
63474912Sjhb	MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE |
63574912Sjhb	    MTX_SLEEPABLE | MTX_NOWITNESS)) == 0);
63672200Sbmilekic
63767352Sjhb#ifdef MUTEX_DEBUG
63872200Sbmilekic	/* Diagnostic and error correction */
63974912Sjhb	mtx_validate(m);
64069429Sjhb#endif
64167352Sjhb
64285205Sjhb	lock = &m->mtx_object;
64385205Sjhb	KASSERT((lock->lo_flags & LO_INITIALIZED) == 0,
64485205Sjhb	    ("mutex %s %p already initialized", description, m));
64574912Sjhb	bzero(m, sizeof(*m));
64674912Sjhb	if (opts & MTX_SPIN)
64774912Sjhb		lock->lo_class = &lock_class_mtx_spin;
64874912Sjhb	else
64974912Sjhb		lock->lo_class = &lock_class_mtx_sleep;
65074912Sjhb	lock->lo_name = description;
65174912Sjhb	if (opts & MTX_QUIET)
65274912Sjhb		lock->lo_flags = LO_QUIET;
65374912Sjhb	if (opts & MTX_RECURSE)
65474912Sjhb		lock->lo_flags |= LO_RECURSABLE;
65574912Sjhb	if (opts & MTX_SLEEPABLE)
65674912Sjhb		lock->lo_flags |= LO_SLEEPABLE;
65774912Sjhb	if ((opts & MTX_NOWITNESS) == 0)
65874912Sjhb		lock->lo_flags |= LO_WITNESS;
65972200Sbmilekic
66067352Sjhb	m->mtx_lock = MTX_UNOWNED;
66174912Sjhb	TAILQ_INIT(&m->mtx_blocked);
66272200Sbmilekic
66374912Sjhb	LOCK_LOG_INIT(lock, opts);
66472200Sbmilekic
66574912Sjhb	WITNESS_INIT(lock);
66667352Sjhb}
66767352Sjhb
66872200Sbmilekic/*
66974912Sjhb * Remove lock `m' from all_mtx queue.  We don't allow MTX_QUIET to be
67074912Sjhb * passed in as a flag here because if the corresponding mtx_init() was
67174912Sjhb * called with MTX_QUIET set, then it will already be set in the mutex's
67274912Sjhb * flags.
67372200Sbmilekic */
67467352Sjhbvoid
67567352Sjhbmtx_destroy(struct mtx *m)
67667352Sjhb{
67767352Sjhb
67874912Sjhb	LOCK_LOG_DESTROY(&m->mtx_object, 0);
67972200Sbmilekic
68074912Sjhb	if (!mtx_owned(m))
68174912Sjhb		MPASS(mtx_unowned(m));
68274912Sjhb	else {
68371228Sbmilekic		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
68472200Sbmilekic
68574912Sjhb		/* Tell witness this isn't locked to make it happy. */
68688900Sjhb		WITNESS_UNLOCK(&m->mtx_object, LOP_EXCLUSIVE, __FILE__,
68788900Sjhb		    __LINE__);
68871320Sjasone	}
68971320Sjasone
69074912Sjhb	WITNESS_DESTROY(&m->mtx_object);
69171320Sjasone}
69285564Sdillon
69385564Sdillon/*
69485564Sdillon * Encapsulated Giant mutex routines.  These routines provide encapsulation
69585564Sdillon * control for the Giant mutex, allowing sysctls to be used to turn on and
69685564Sdillon * off Giant around certain subsystems.  The default value for the sysctls
69785564Sdillon * are set to what developers believe is stable and working in regards to
69885564Sdillon * the Giant pushdown.  Developers should not turn off Giant via these
69985564Sdillon * sysctls unless they know what they are doing.
70085564Sdillon *
70185564Sdillon * Callers of mtx_lock_giant() are expected to pass the return value to an
70285564Sdillon * accompanying mtx_unlock_giant() later on.  If multiple subsystems are
70385564Sdillon * effected by a Giant wrap, all related sysctl variables must be zero for
70485564Sdillon * the subsystem call to operate without Giant (as determined by the caller).
70585564Sdillon */
70685564Sdillon
70785564SdillonSYSCTL_NODE(_kern, OID_AUTO, giant, CTLFLAG_RD, NULL, "Giant mutex manipulation");
70885564Sdillon
70985564Sdillonstatic int kern_giant_all = 0;
71085564SdillonSYSCTL_INT(_kern_giant, OID_AUTO, all, CTLFLAG_RW, &kern_giant_all, 0, "");
71185564Sdillon
71285564Sdillonint kern_giant_proc = 1;	/* Giant around PROC locks */
71385564Sdillonint kern_giant_file = 1;	/* Giant around struct file & filedesc */
71485564SdillonSYSCTL_INT(_kern_giant, OID_AUTO, proc, CTLFLAG_RW, &kern_giant_proc, 0, "");
71585564SdillonSYSCTL_INT(_kern_giant, OID_AUTO, file, CTLFLAG_RW, &kern_giant_file, 0, "");
71685564Sdillon
71785564Sdillonint
71885564Sdillonmtx_lock_giant(int sysctlvar)
71985564Sdillon{
72085564Sdillon	if (sysctlvar || kern_giant_all) {
72185564Sdillon		mtx_lock(&Giant);
72285564Sdillon		return(1);
72385564Sdillon	}
72485564Sdillon	return(0);
72585564Sdillon}
72685564Sdillon
72785564Sdillonvoid
72885564Sdillonmtx_unlock_giant(int s)
72985564Sdillon{
73085564Sdillon	if (s)
73185564Sdillon		mtx_unlock(&Giant);
73285564Sdillon}
73385564Sdillon
734