subr_turnstile.c revision 92723
165557Sjasone/*-
265557Sjasone * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
365557Sjasone *
465557Sjasone * Redistribution and use in source and binary forms, with or without
565557Sjasone * modification, are permitted provided that the following conditions
665557Sjasone * are met:
765557Sjasone * 1. Redistributions of source code must retain the above copyright
865557Sjasone *    notice, this list of conditions and the following disclaimer.
965557Sjasone * 2. Redistributions in binary form must reproduce the above copyright
1065557Sjasone *    notice, this list of conditions and the following disclaimer in the
1165557Sjasone *    documentation and/or other materials provided with the distribution.
1265557Sjasone * 3. Berkeley Software Design Inc's name may not be used to endorse or
1365557Sjasone *    promote products derived from this software without specific prior
1465557Sjasone *    written permission.
1565557Sjasone *
1665557Sjasone * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
1765557Sjasone * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1865557Sjasone * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1965557Sjasone * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
2065557Sjasone * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2165557Sjasone * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2265557Sjasone * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2365557Sjasone * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2465557Sjasone * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2565557Sjasone * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2665557Sjasone * SUCH DAMAGE.
2765557Sjasone *
2865557Sjasone *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
2967352Sjhb *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
3065557Sjasone * $FreeBSD: head/sys/kern/subr_turnstile.c 92723 2002-03-19 21:25:46Z alfred $
3165557Sjasone */
3265557Sjasone
3365557Sjasone/*
3486411Sjhb * Machine independent bits of mutex implementation.
3572200Sbmilekic */
3672200Sbmilekic
3768790Sjhb#include "opt_ddb.h"
3867676Sjhb
3965557Sjasone#include <sys/param.h>
4067352Sjhb#include <sys/bus.h>
4167352Sjhb#include <sys/kernel.h>
4276166Smarkm#include <sys/lock.h>
4367352Sjhb#include <sys/malloc.h>
4474912Sjhb#include <sys/mutex.h>
4565557Sjasone#include <sys/proc.h>
4678766Sjhb#include <sys/resourcevar.h>
4767676Sjhb#include <sys/sysctl.h>
4865557Sjasone#include <sys/systm.h>
4967352Sjhb#include <sys/vmmeter.h>
5065557Sjasone#include <sys/ktr.h>
5165557Sjasone
5267352Sjhb#include <machine/atomic.h>
5367352Sjhb#include <machine/bus.h>
5467352Sjhb#include <machine/clock.h>
5565557Sjasone#include <machine/cpu.h>
5667352Sjhb
5768790Sjhb#include <ddb/ddb.h>
5868790Sjhb
5967352Sjhb#include <vm/vm.h>
6067352Sjhb#include <vm/vm_extern.h>
6167352Sjhb
6265557Sjasone/*
6372200Sbmilekic * Internal utility macros.
6471352Sjasone */
6572200Sbmilekic#define mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
6671352Sjasone
6772200Sbmilekic#define mtx_owner(m)	(mtx_unowned((m)) ? NULL \
6883366Sjulian	: (struct thread *)((m)->mtx_lock & MTX_FLAGMASK))
6971352Sjasone
7071352Sjasone/*
7174912Sjhb * Lock classes for sleep and spin mutexes.
7271352Sjasone */
7374912Sjhbstruct lock_class lock_class_mtx_sleep = {
7474912Sjhb	"sleep mutex",
7574912Sjhb	LC_SLEEPLOCK | LC_RECURSABLE
7674912Sjhb};
7774912Sjhbstruct lock_class lock_class_mtx_spin = {
7874912Sjhb	"spin mutex",
7974912Sjhb	LC_SPINLOCK | LC_RECURSABLE
8074912Sjhb};
8171352Sjasone
8271352Sjasone/*
8372200Sbmilekic * Prototypes for non-exported routines.
8472200Sbmilekic */
8583366Sjulianstatic void	propagate_priority(struct thread *);
8667352Sjhb
8767352Sjhbstatic void
8883366Sjulianpropagate_priority(struct thread *td)
8967352Sjhb{
9090538Sjulian	int pri = td->td_priority;
9183366Sjulian	struct mtx *m = td->td_blocked;
9267352Sjhb
9369376Sjhb	mtx_assert(&sched_lock, MA_OWNED);
9467352Sjhb	for (;;) {
9583366Sjulian		struct thread *td1;
9667352Sjhb
9783366Sjulian		td = mtx_owner(m);
9867352Sjhb
9983366Sjulian		if (td == NULL) {
10067352Sjhb			/*
10167352Sjhb			 * This really isn't quite right. Really
10283366Sjulian			 * ought to bump priority of thread that
10367352Sjhb			 * next acquires the mutex.
10467352Sjhb			 */
10567352Sjhb			MPASS(m->mtx_lock == MTX_CONTESTED);
10667352Sjhb			return;
10767352Sjhb		}
10872200Sbmilekic
10983366Sjulian		MPASS(td->td_proc->p_magic == P_MAGIC);
11083366Sjulian		KASSERT(td->td_proc->p_stat != SSLEEP, ("sleeping thread owns a mutex"));
11190538Sjulian		if (td->td_priority <= pri) /* lower is higher priority */
11267352Sjhb			return;
11369376Sjhb
11467352Sjhb		/*
11583366Sjulian		 * Bump this thread's priority.
11669376Sjhb		 */
11790538Sjulian		td->td_priority = pri;
11869376Sjhb
11969376Sjhb		/*
12067352Sjhb		 * If lock holder is actually running, just bump priority.
12167352Sjhb		 */
12283366Sjulian		 /* XXXKSE this test is not sufficient */
12383366Sjulian		if (td->td_kse && (td->td_kse->ke_oncpu != NOCPU)) {
12483366Sjulian			MPASS(td->td_proc->p_stat == SRUN
12583366Sjulian			|| td->td_proc->p_stat == SZOMB
12683366Sjulian			|| td->td_proc->p_stat == SSTOP);
12767352Sjhb			return;
12867352Sjhb		}
12972376Sjake
13073912Sjhb#ifndef SMP
13167352Sjhb		/*
13283366Sjulian		 * For UP, we check to see if td is curthread (this shouldn't
13373912Sjhb		 * ever happen however as it would mean we are in a deadlock.)
13473912Sjhb		 */
13583366Sjulian		KASSERT(td != curthread, ("Deadlock detected"));
13673912Sjhb#endif
13773912Sjhb
13873912Sjhb		/*
13983366Sjulian		 * If on run queue move to new run queue, and quit.
14083366Sjulian		 * XXXKSE this gets a lot more complicated under threads
14183366Sjulian		 * but try anyhow.
14267352Sjhb		 */
14383366Sjulian		if (td->td_proc->p_stat == SRUN) {
14483366Sjulian			MPASS(td->td_blocked == NULL);
14583366Sjulian			remrunqueue(td);
14683366Sjulian			setrunqueue(td);
14767352Sjhb			return;
14867352Sjhb		}
14967352Sjhb
15067352Sjhb		/*
15169376Sjhb		 * If we aren't blocked on a mutex, we should be.
15267352Sjhb		 */
15383366Sjulian		KASSERT(td->td_proc->p_stat == SMTX, (
15469376Sjhb		    "process %d(%s):%d holds %s but isn't blocked on a mutex\n",
15583366Sjulian		    td->td_proc->p_pid, td->td_proc->p_comm, td->td_proc->p_stat,
15674912Sjhb		    m->mtx_object.lo_name));
15767352Sjhb
15867352Sjhb		/*
15983366Sjulian		 * Pick up the mutex that td is blocked on.
16067352Sjhb		 */
16183366Sjulian		m = td->td_blocked;
16267352Sjhb		MPASS(m != NULL);
16367352Sjhb
16467352Sjhb		/*
16583366Sjulian		 * Check if the thread needs to be moved up on
16667352Sjhb		 * the blocked chain
16767352Sjhb		 */
16883366Sjulian		if (td == TAILQ_FIRST(&m->mtx_blocked)) {
16969376Sjhb			continue;
17069376Sjhb		}
17172200Sbmilekic
17283366Sjulian		td1 = TAILQ_PREV(td, threadqueue, td_blkq);
17390538Sjulian		if (td1->td_priority <= pri) {
17467352Sjhb			continue;
17567352Sjhb		}
17667352Sjhb
17767352Sjhb		/*
17883366Sjulian		 * Remove thread from blocked chain and determine where
17983366Sjulian		 * it should be moved up to.  Since we know that td1 has
18083366Sjulian		 * a lower priority than td, we know that at least one
18183366Sjulian		 * thread in the chain has a lower priority and that
18283366Sjulian		 * td1 will thus not be NULL after the loop.
18367352Sjhb		 */
18483366Sjulian		TAILQ_REMOVE(&m->mtx_blocked, td, td_blkq);
18583366Sjulian		TAILQ_FOREACH(td1, &m->mtx_blocked, td_blkq) {
18683366Sjulian			MPASS(td1->td_proc->p_magic == P_MAGIC);
18790538Sjulian			if (td1->td_priority > pri)
18867352Sjhb				break;
18967352Sjhb		}
19072200Sbmilekic
19183366Sjulian		MPASS(td1 != NULL);
19283366Sjulian		TAILQ_INSERT_BEFORE(td1, td, td_blkq);
19367352Sjhb		CTR4(KTR_LOCK,
19471560Sjhb		    "propagate_priority: p %p moved before %p on [%p] %s",
19583366Sjulian		    td, td1, m, m->mtx_object.lo_name);
19667352Sjhb	}
19767352Sjhb}
19867352Sjhb
19971352Sjasone/*
20074900Sjhb * Function versions of the inlined __mtx_* macros.  These are used by
20174900Sjhb * modules and can also be called from assembly language if needed.
20274900Sjhb */
20374900Sjhbvoid
20474900Sjhb_mtx_lock_flags(struct mtx *m, int opts, const char *file, int line)
20574900Sjhb{
20674900Sjhb
20783841Sjhb	MPASS(curthread != NULL);
20883841Sjhb	_get_sleep_lock(m, curthread, opts, file, line);
20983841Sjhb	LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file,
21083841Sjhb	    line);
21183841Sjhb	WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
21274900Sjhb}
21374900Sjhb
21474900Sjhbvoid
21574900Sjhb_mtx_unlock_flags(struct mtx *m, int opts, const char *file, int line)
21674900Sjhb{
21774900Sjhb
21883841Sjhb	MPASS(curthread != NULL);
21983947Sjhb	mtx_assert(m, MA_OWNED);
22083841Sjhb 	WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
22183841Sjhb	LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file,
22283841Sjhb	    line);
22383841Sjhb	_rel_sleep_lock(m, curthread, opts, file, line);
22474900Sjhb}
22574900Sjhb
22674900Sjhbvoid
22774900Sjhb_mtx_lock_spin_flags(struct mtx *m, int opts, const char *file, int line)
22874900Sjhb{
22974900Sjhb
23083841Sjhb	MPASS(curthread != NULL);
23183841Sjhb	_get_spin_lock(m, curthread, opts, file, line);
23283841Sjhb	LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file,
23383841Sjhb	    line);
23483841Sjhb	WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
23574900Sjhb}
23674900Sjhb
23774900Sjhbvoid
23874900Sjhb_mtx_unlock_spin_flags(struct mtx *m, int opts, const char *file, int line)
23974900Sjhb{
24074900Sjhb
24183841Sjhb	MPASS(curthread != NULL);
24283947Sjhb	mtx_assert(m, MA_OWNED);
24383841Sjhb 	WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
24483841Sjhb	LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file,
24583841Sjhb	    line);
24683841Sjhb	_rel_spin_lock(m);
24774900Sjhb}
24874900Sjhb
24974900Sjhb/*
25072200Sbmilekic * The important part of mtx_trylock{,_flags}()
25172200Sbmilekic * Tries to acquire lock `m.' We do NOT handle recursion here; we assume that
25272200Sbmilekic * if we're called, it's because we know we don't already own this lock.
25371352Sjasone */
25472200Sbmilekicint
25572200Sbmilekic_mtx_trylock(struct mtx *m, int opts, const char *file, int line)
25671352Sjasone{
25772200Sbmilekic	int rval;
25871352Sjasone
25983366Sjulian	MPASS(curthread != NULL);
26071352Sjasone
26183366Sjulian	rval = _obtain_lock(m, curthread);
26272200Sbmilekic
26374912Sjhb	LOCK_LOG_TRY("LOCK", &m->mtx_object, opts, rval, file, line);
26474912Sjhb	if (rval) {
26571352Sjasone		/*
26672200Sbmilekic		 * We do not handle recursion in _mtx_trylock; see the
26772200Sbmilekic		 * note at the top of the routine.
26871352Sjasone		 */
26972344Sbmilekic		KASSERT(!mtx_recursed(m),
27072344Sbmilekic		    ("mtx_trylock() called on a recursed mutex"));
27176272Sjhb		WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK,
27276272Sjhb		    file, line);
27371352Sjasone	}
27471352Sjasone
27574912Sjhb	return (rval);
27671352Sjasone}
27771352Sjasone
27871352Sjasone/*
27972200Sbmilekic * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
28071352Sjasone *
28172200Sbmilekic * We call this if the lock is either contested (i.e. we need to go to
28272200Sbmilekic * sleep waiting for it), or if we need to recurse on it.
28371352Sjasone */
28472200Sbmilekicvoid
28572200Sbmilekic_mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line)
28671352Sjasone{
28783366Sjulian	struct thread *td = curthread;
28871352Sjasone
28983366Sjulian	if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)td) {
29072200Sbmilekic		m->mtx_recurse++;
29172200Sbmilekic		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
29274912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
29372344Sbmilekic			CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
29472200Sbmilekic		return;
29571352Sjasone	}
29671352Sjasone
29774912Sjhb	if (LOCK_LOG_TEST(&m->mtx_object, opts))
29872994Sjhb		CTR4(KTR_LOCK,
29972994Sjhb		    "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d",
30074912Sjhb		    m->mtx_object.lo_name, (void *)m->mtx_lock, file, line);
30171352Sjasone
30283366Sjulian	while (!_obtain_lock(m, td)) {
30372200Sbmilekic		uintptr_t v;
30483366Sjulian		struct thread *td1;
30571352Sjasone
30672200Sbmilekic		mtx_lock_spin(&sched_lock);
30772200Sbmilekic		/*
30872200Sbmilekic		 * Check if the lock has been released while spinning for
30972200Sbmilekic		 * the sched_lock.
31072200Sbmilekic		 */
31172200Sbmilekic		if ((v = m->mtx_lock) == MTX_UNOWNED) {
31272200Sbmilekic			mtx_unlock_spin(&sched_lock);
31372200Sbmilekic			continue;
31471352Sjasone		}
31571352Sjasone
31672200Sbmilekic		/*
31772200Sbmilekic		 * The mutex was marked contested on release. This means that
31883366Sjulian		 * there are threads blocked on it.
31972200Sbmilekic		 */
32072200Sbmilekic		if (v == MTX_CONTESTED) {
32183366Sjulian			td1 = TAILQ_FIRST(&m->mtx_blocked);
32283366Sjulian			MPASS(td1 != NULL);
32383366Sjulian			m->mtx_lock = (uintptr_t)td | MTX_CONTESTED;
32467352Sjhb
32590538Sjulian			if (td1->td_priority < td->td_priority)
32690538Sjulian				td->td_priority = td1->td_priority;
32772200Sbmilekic			mtx_unlock_spin(&sched_lock);
32867352Sjhb			return;
32967352Sjhb		}
33069376Sjhb
33169376Sjhb		/*
33272200Sbmilekic		 * If the mutex isn't already contested and a failure occurs
33372200Sbmilekic		 * setting the contested bit, the mutex was either released
33472200Sbmilekic		 * or the state of the MTX_RECURSED bit changed.
33569376Sjhb		 */
33672200Sbmilekic		if ((v & MTX_CONTESTED) == 0 &&
33772200Sbmilekic		    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
33872200Sbmilekic			(void *)(v | MTX_CONTESTED))) {
33972200Sbmilekic			mtx_unlock_spin(&sched_lock);
34072200Sbmilekic			continue;
34172200Sbmilekic		}
34267352Sjhb
34372200Sbmilekic		/*
34472200Sbmilekic		 * We deffinately must sleep for this lock.
34572200Sbmilekic		 */
34672200Sbmilekic		mtx_assert(m, MA_NOTOWNED);
34767352Sjhb
34867352Sjhb#ifdef notyet
34972200Sbmilekic		/*
35072200Sbmilekic		 * If we're borrowing an interrupted thread's VM context, we
35172200Sbmilekic		 * must clean up before going to sleep.
35272200Sbmilekic		 */
35383366Sjulian		if (td->td_ithd != NULL) {
35483366Sjulian			struct ithd *it = td->td_ithd;
35567352Sjhb
35672200Sbmilekic			if (it->it_interrupted) {
35774912Sjhb				if (LOCK_LOG_TEST(&m->mtx_object, opts))
35872200Sbmilekic					CTR2(KTR_LOCK,
35972994Sjhb				    "_mtx_lock_sleep: %p interrupted %p",
36072200Sbmilekic					    it, it->it_interrupted);
36172200Sbmilekic				intr_thd_fixup(it);
36267352Sjhb			}
36372200Sbmilekic		}
36467352Sjhb#endif
36567352Sjhb
36672200Sbmilekic		/*
36772200Sbmilekic		 * Put us on the list of threads blocked on this mutex.
36872200Sbmilekic		 */
36972200Sbmilekic		if (TAILQ_EMPTY(&m->mtx_blocked)) {
37090418Sjhb			td1 = mtx_owner(m);
37183366Sjulian			LIST_INSERT_HEAD(&td1->td_contested, m, mtx_contested);
37283366Sjulian			TAILQ_INSERT_TAIL(&m->mtx_blocked, td, td_blkq);
37372200Sbmilekic		} else {
37483366Sjulian			TAILQ_FOREACH(td1, &m->mtx_blocked, td_blkq)
37590538Sjulian				if (td1->td_priority > td->td_priority)
37672200Sbmilekic					break;
37783366Sjulian			if (td1)
37883366Sjulian				TAILQ_INSERT_BEFORE(td1, td, td_blkq);
37972200Sbmilekic			else
38083366Sjulian				TAILQ_INSERT_TAIL(&m->mtx_blocked, td, td_blkq);
38172200Sbmilekic		}
38267352Sjhb
38372200Sbmilekic		/*
38472200Sbmilekic		 * Save who we're blocked on.
38572200Sbmilekic		 */
38683366Sjulian		td->td_blocked = m;
38783366Sjulian		td->td_mtxname = m->mtx_object.lo_name;
38883366Sjulian		td->td_proc->p_stat = SMTX;
38983366Sjulian		propagate_priority(td);
39067352Sjhb
39174912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
39272200Sbmilekic			CTR3(KTR_LOCK,
39383366Sjulian			    "_mtx_lock_sleep: p %p blocked on [%p] %s", td, m,
39474912Sjhb			    m->mtx_object.lo_name);
39572200Sbmilekic
39683366Sjulian		td->td_proc->p_stats->p_ru.ru_nvcsw++;
39772200Sbmilekic		mi_switch();
39872200Sbmilekic
39974912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
40072200Sbmilekic			CTR3(KTR_LOCK,
40172200Sbmilekic			  "_mtx_lock_sleep: p %p free from blocked on [%p] %s",
40283366Sjulian			  td, m, m->mtx_object.lo_name);
40372200Sbmilekic
40472200Sbmilekic		mtx_unlock_spin(&sched_lock);
40572200Sbmilekic	}
40672200Sbmilekic
40772200Sbmilekic	return;
40872200Sbmilekic}
40972200Sbmilekic
41072200Sbmilekic/*
41172200Sbmilekic * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock.
41272200Sbmilekic *
41372200Sbmilekic * This is only called if we need to actually spin for the lock. Recursion
41472200Sbmilekic * is handled inline.
41572200Sbmilekic */
41672200Sbmilekicvoid
41788088Sjhb_mtx_lock_spin(struct mtx *m, int opts, const char *file, int line)
41872200Sbmilekic{
41972200Sbmilekic	int i = 0;
42072200Sbmilekic
42174912Sjhb	if (LOCK_LOG_TEST(&m->mtx_object, opts))
42272344Sbmilekic		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
42372200Sbmilekic
42472200Sbmilekic	for (;;) {
42583366Sjulian		if (_obtain_lock(m, curthread))
42672200Sbmilekic			break;
42772200Sbmilekic
42875568Sjhb		/* Give interrupts a chance while we spin. */
42988088Sjhb		critical_exit();
43072200Sbmilekic		while (m->mtx_lock != MTX_UNOWNED) {
43189392Sjhb			if (i++ < 10000000)
43272200Sbmilekic				continue;
43389392Sjhb			if (i++ < 60000000)
43472200Sbmilekic				DELAY(1);
43567352Sjhb#ifdef DDB
43672200Sbmilekic			else if (!db_active)
43767352Sjhb#else
43872200Sbmilekic			else
43967352Sjhb#endif
44072200Sbmilekic			panic("spin lock %s held by %p for > 5 seconds",
44174912Sjhb			    m->mtx_object.lo_name, (void *)m->mtx_lock);
44267352Sjhb		}
44388088Sjhb		critical_enter();
44467352Sjhb	}
44572200Sbmilekic
44674912Sjhb	if (LOCK_LOG_TEST(&m->mtx_object, opts))
44772200Sbmilekic		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
44872200Sbmilekic
44972200Sbmilekic	return;
45067352Sjhb}
45167352Sjhb
45272200Sbmilekic/*
45372200Sbmilekic * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
45472200Sbmilekic *
45572200Sbmilekic * We are only called here if the lock is recursed or contested (i.e. we
45672200Sbmilekic * need to wake up a blocked thread).
45772200Sbmilekic */
45867352Sjhbvoid
45972200Sbmilekic_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
46067352Sjhb{
46183366Sjulian	struct thread *td, *td1;
46267352Sjhb	struct mtx *m1;
46367352Sjhb	int pri;
46467352Sjhb
46583366Sjulian	td = curthread;
46672200Sbmilekic
46772200Sbmilekic	if (mtx_recursed(m)) {
46872200Sbmilekic		if (--(m->mtx_recurse) == 0)
46972200Sbmilekic			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
47074912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
47172200Sbmilekic			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m);
47272200Sbmilekic		return;
47372200Sbmilekic	}
47472200Sbmilekic
47572200Sbmilekic	mtx_lock_spin(&sched_lock);
47674912Sjhb	if (LOCK_LOG_TEST(&m->mtx_object, opts))
47772200Sbmilekic		CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
47872200Sbmilekic
47983366Sjulian	td1 = TAILQ_FIRST(&m->mtx_blocked);
48083366Sjulian	MPASS(td->td_proc->p_magic == P_MAGIC);
48183366Sjulian	MPASS(td1->td_proc->p_magic == P_MAGIC);
48272200Sbmilekic
48383366Sjulian	TAILQ_REMOVE(&m->mtx_blocked, td1, td_blkq);
48472200Sbmilekic
48572200Sbmilekic	if (TAILQ_EMPTY(&m->mtx_blocked)) {
48672200Sbmilekic		LIST_REMOVE(m, mtx_contested);
48772200Sbmilekic		_release_lock_quick(m);
48874912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
48972200Sbmilekic			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p not held", m);
49072200Sbmilekic	} else
49172200Sbmilekic		atomic_store_rel_ptr(&m->mtx_lock, (void *)MTX_CONTESTED);
49272200Sbmilekic
49372376Sjake	pri = PRI_MAX;
49483366Sjulian	LIST_FOREACH(m1, &td->td_contested, mtx_contested) {
49590538Sjulian		int cp = TAILQ_FIRST(&m1->mtx_blocked)->td_priority;
49672200Sbmilekic		if (cp < pri)
49772200Sbmilekic			pri = cp;
49872200Sbmilekic	}
49972200Sbmilekic
50090538Sjulian	if (pri > td->td_base_pri)
50190538Sjulian		pri = td->td_base_pri;
50290538Sjulian	td->td_priority = pri;
50372200Sbmilekic
50474912Sjhb	if (LOCK_LOG_TEST(&m->mtx_object, opts))
50572200Sbmilekic		CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p contested setrunqueue %p",
50683366Sjulian		    m, td1);
50772200Sbmilekic
50883366Sjulian	td1->td_blocked = NULL;
50983366Sjulian	td1->td_proc->p_stat = SRUN;
51083366Sjulian	setrunqueue(td1);
51172200Sbmilekic
51290538Sjulian	if (td->td_critnest == 1 && td1->td_priority < pri) {
51367352Sjhb#ifdef notyet
51483366Sjulian		if (td->td_ithd != NULL) {
51583366Sjulian			struct ithd *it = td->td_ithd;
51667352Sjhb
51772200Sbmilekic			if (it->it_interrupted) {
51874912Sjhb				if (LOCK_LOG_TEST(&m->mtx_object, opts))
51972200Sbmilekic					CTR2(KTR_LOCK,
52072994Sjhb				    "_mtx_unlock_sleep: %p interrupted %p",
52172200Sbmilekic					    it, it->it_interrupted);
52272200Sbmilekic				intr_thd_fixup(it);
52367352Sjhb			}
52472200Sbmilekic		}
52567352Sjhb#endif
52683366Sjulian		setrunqueue(td);
52774912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
52872200Sbmilekic			CTR2(KTR_LOCK,
52972200Sbmilekic			    "_mtx_unlock_sleep: %p switching out lock=%p", m,
53072200Sbmilekic			    (void *)m->mtx_lock);
53172200Sbmilekic
53283366Sjulian		td->td_proc->p_stats->p_ru.ru_nivcsw++;
53372200Sbmilekic		mi_switch();
53474912Sjhb		if (LOCK_LOG_TEST(&m->mtx_object, opts))
53572200Sbmilekic			CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p",
53672200Sbmilekic			    m, (void *)m->mtx_lock);
53767352Sjhb	}
53872200Sbmilekic
53972200Sbmilekic	mtx_unlock_spin(&sched_lock);
54072200Sbmilekic
54172200Sbmilekic	return;
54267352Sjhb}
54367352Sjhb
54472200Sbmilekic/*
54572200Sbmilekic * All the unlocking of MTX_SPIN locks is done inline.
54672200Sbmilekic * See the _rel_spin_lock() macro for the details.
54772200Sbmilekic */
54872200Sbmilekic
54972200Sbmilekic/*
55072994Sjhb * The backing function for the INVARIANTS-enabled mtx_assert()
55172200Sbmilekic */
55272996Sjhb#ifdef INVARIANT_SUPPORT
55371352Sjasonevoid
55471360Sjasone_mtx_assert(struct mtx *m, int what, const char *file, int line)
55571352Sjasone{
55680748Sjhb
55780748Sjhb	if (panicstr != NULL)
55880748Sjhb		return;
55973033Sjake	switch (what) {
56071352Sjasone	case MA_OWNED:
56171352Sjasone	case MA_OWNED | MA_RECURSED:
56271352Sjasone	case MA_OWNED | MA_NOTRECURSED:
56373033Sjake		if (!mtx_owned(m))
56471352Sjasone			panic("mutex %s not owned at %s:%d",
56574912Sjhb			    m->mtx_object.lo_name, file, line);
56673033Sjake		if (mtx_recursed(m)) {
56773033Sjake			if ((what & MA_NOTRECURSED) != 0)
56871352Sjasone				panic("mutex %s recursed at %s:%d",
56974912Sjhb				    m->mtx_object.lo_name, file, line);
57073033Sjake		} else if ((what & MA_RECURSED) != 0) {
57171352Sjasone			panic("mutex %s unrecursed at %s:%d",
57274912Sjhb			    m->mtx_object.lo_name, file, line);
57371352Sjasone		}
57471352Sjasone		break;
57571352Sjasone	case MA_NOTOWNED:
57673033Sjake		if (mtx_owned(m))
57771352Sjasone			panic("mutex %s owned at %s:%d",
57874912Sjhb			    m->mtx_object.lo_name, file, line);
57971352Sjasone		break;
58071352Sjasone	default:
58171360Sjasone		panic("unknown mtx_assert at %s:%d", file, line);
58271352Sjasone	}
58371352Sjasone}
58471352Sjasone#endif
58571352Sjasone
58672200Sbmilekic/*
58772200Sbmilekic * The MUTEX_DEBUG-enabled mtx_validate()
58874912Sjhb *
58974912Sjhb * Most of these checks have been moved off into the LO_INITIALIZED flag
59074912Sjhb * maintained by the witness code.
59172200Sbmilekic */
59267352Sjhb#ifdef MUTEX_DEBUG
59367352Sjhb
59492723Salfredvoid	mtx_validate(struct mtx *);
59567352Sjhb
59674912Sjhbvoid
59774912Sjhbmtx_validate(struct mtx *m)
59867352Sjhb{
59967352Sjhb
60067352Sjhb/*
60167352Sjhb * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
60267352Sjhb * we can re-enable the kernacc() checks.
60367352Sjhb */
60467352Sjhb#ifndef __alpha__
60582304Sbmilekic	/*
60682304Sbmilekic	 * Can't call kernacc() from early init386(), especially when
60782304Sbmilekic	 * initializing Giant mutex, because some stuff in kernacc()
60882304Sbmilekic	 * requires Giant itself.
60982304Sbmilekic	 */
61082302Sbmilekic	if (!cold)
61182302Sbmilekic		if (!kernacc((caddr_t)m, sizeof(m),
61282302Sbmilekic		    VM_PROT_READ | VM_PROT_WRITE))
61382302Sbmilekic			panic("Can't read and write to mutex %p", m);
61467352Sjhb#endif
61567352Sjhb}
61667352Sjhb#endif
61767352Sjhb
61872200Sbmilekic/*
61972200Sbmilekic * Mutex initialization routine; initialize lock `m' of type contained in
62072200Sbmilekic * `opts' with options contained in `opts' and description `description.'
62172200Sbmilekic */
62267352Sjhbvoid
62372200Sbmilekicmtx_init(struct mtx *m, const char *description, int opts)
62467352Sjhb{
62574912Sjhb	struct lock_object *lock;
62672200Sbmilekic
62774912Sjhb	MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE |
62874912Sjhb	    MTX_SLEEPABLE | MTX_NOWITNESS)) == 0);
62972200Sbmilekic
63067352Sjhb#ifdef MUTEX_DEBUG
63172200Sbmilekic	/* Diagnostic and error correction */
63274912Sjhb	mtx_validate(m);
63369429Sjhb#endif
63467352Sjhb
63585205Sjhb	lock = &m->mtx_object;
63685205Sjhb	KASSERT((lock->lo_flags & LO_INITIALIZED) == 0,
63785205Sjhb	    ("mutex %s %p already initialized", description, m));
63874912Sjhb	bzero(m, sizeof(*m));
63974912Sjhb	if (opts & MTX_SPIN)
64074912Sjhb		lock->lo_class = &lock_class_mtx_spin;
64174912Sjhb	else
64274912Sjhb		lock->lo_class = &lock_class_mtx_sleep;
64374912Sjhb	lock->lo_name = description;
64474912Sjhb	if (opts & MTX_QUIET)
64574912Sjhb		lock->lo_flags = LO_QUIET;
64674912Sjhb	if (opts & MTX_RECURSE)
64774912Sjhb		lock->lo_flags |= LO_RECURSABLE;
64874912Sjhb	if (opts & MTX_SLEEPABLE)
64974912Sjhb		lock->lo_flags |= LO_SLEEPABLE;
65074912Sjhb	if ((opts & MTX_NOWITNESS) == 0)
65174912Sjhb		lock->lo_flags |= LO_WITNESS;
65272200Sbmilekic
65367352Sjhb	m->mtx_lock = MTX_UNOWNED;
65474912Sjhb	TAILQ_INIT(&m->mtx_blocked);
65572200Sbmilekic
65674912Sjhb	LOCK_LOG_INIT(lock, opts);
65772200Sbmilekic
65874912Sjhb	WITNESS_INIT(lock);
65967352Sjhb}
66067352Sjhb
66172200Sbmilekic/*
66274912Sjhb * Remove lock `m' from all_mtx queue.  We don't allow MTX_QUIET to be
66374912Sjhb * passed in as a flag here because if the corresponding mtx_init() was
66474912Sjhb * called with MTX_QUIET set, then it will already be set in the mutex's
66574912Sjhb * flags.
66672200Sbmilekic */
66767352Sjhbvoid
66867352Sjhbmtx_destroy(struct mtx *m)
66967352Sjhb{
67067352Sjhb
67174912Sjhb	LOCK_LOG_DESTROY(&m->mtx_object, 0);
67272200Sbmilekic
67374912Sjhb	if (!mtx_owned(m))
67474912Sjhb		MPASS(mtx_unowned(m));
67574912Sjhb	else {
67671228Sbmilekic		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
67772200Sbmilekic
67874912Sjhb		/* Tell witness this isn't locked to make it happy. */
67988900Sjhb		WITNESS_UNLOCK(&m->mtx_object, LOP_EXCLUSIVE, __FILE__,
68088900Sjhb		    __LINE__);
68171320Sjasone	}
68271320Sjasone
68374912Sjhb	WITNESS_DESTROY(&m->mtx_object);
68471320Sjasone}
68585564Sdillon
68685564Sdillon/*
68785564Sdillon * Encapsulated Giant mutex routines.  These routines provide encapsulation
68885564Sdillon * control for the Giant mutex, allowing sysctls to be used to turn on and
68985564Sdillon * off Giant around certain subsystems.  The default value for the sysctls
69085564Sdillon * are set to what developers believe is stable and working in regards to
69185564Sdillon * the Giant pushdown.  Developers should not turn off Giant via these
69285564Sdillon * sysctls unless they know what they are doing.
69385564Sdillon *
69485564Sdillon * Callers of mtx_lock_giant() are expected to pass the return value to an
69585564Sdillon * accompanying mtx_unlock_giant() later on.  If multiple subsystems are
69685564Sdillon * effected by a Giant wrap, all related sysctl variables must be zero for
69785564Sdillon * the subsystem call to operate without Giant (as determined by the caller).
69885564Sdillon */
69985564Sdillon
70085564SdillonSYSCTL_NODE(_kern, OID_AUTO, giant, CTLFLAG_RD, NULL, "Giant mutex manipulation");
70185564Sdillon
70285564Sdillonstatic int kern_giant_all = 0;
70385564SdillonSYSCTL_INT(_kern_giant, OID_AUTO, all, CTLFLAG_RW, &kern_giant_all, 0, "");
70485564Sdillon
70585564Sdillonint kern_giant_proc = 1;	/* Giant around PROC locks */
70685564Sdillonint kern_giant_file = 1;	/* Giant around struct file & filedesc */
70790864Sdillonint kern_giant_ucred = 1;	/* Giant around ucred */
70885564SdillonSYSCTL_INT(_kern_giant, OID_AUTO, proc, CTLFLAG_RW, &kern_giant_proc, 0, "");
70985564SdillonSYSCTL_INT(_kern_giant, OID_AUTO, file, CTLFLAG_RW, &kern_giant_file, 0, "");
71090864SdillonSYSCTL_INT(_kern_giant, OID_AUTO, ucred, CTLFLAG_RW, &kern_giant_ucred, 0, "");
71185564Sdillon
71285564Sdillonint
71385564Sdillonmtx_lock_giant(int sysctlvar)
71485564Sdillon{
71585564Sdillon	if (sysctlvar || kern_giant_all) {
71685564Sdillon		mtx_lock(&Giant);
71785564Sdillon		return(1);
71885564Sdillon	}
71985564Sdillon	return(0);
72085564Sdillon}
72185564Sdillon
72285564Sdillonvoid
72385564Sdillonmtx_unlock_giant(int s)
72485564Sdillon{
72585564Sdillon	if (s)
72685564Sdillon		mtx_unlock(&Giant);
72785564Sdillon}
72885564Sdillon
729