kern_mutex.c revision 97079
1100966Siwasaki/*-
2100966Siwasaki * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
3100966Siwasaki *
4100966Siwasaki * Redistribution and use in source and binary forms, with or without
5100966Siwasaki * modification, are permitted provided that the following conditions
6100966Siwasaki * are met:
7217365Sjkim * 1. Redistributions of source code must retain the above copyright
8278970Sjkim *    notice, this list of conditions and the following disclaimer.
9100966Siwasaki * 2. Redistributions in binary form must reproduce the above copyright
10100966Siwasaki *    notice, this list of conditions and the following disclaimer in the
11217365Sjkim *    documentation and/or other materials provided with the distribution.
12217365Sjkim * 3. Berkeley Software Design Inc's name may not be used to endorse or
13217365Sjkim *    promote products derived from this software without specific prior
14217365Sjkim *    written permission.
15217365Sjkim *
16217365Sjkim * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
17217365Sjkim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18217365Sjkim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19217365Sjkim * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
20217365Sjkim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21217365Sjkim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22217365Sjkim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23217365Sjkim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24217365Sjkim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25100966Siwasaki * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26217365Sjkim * SUCH DAMAGE.
27217365Sjkim *
28217365Sjkim *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
29100966Siwasaki *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
30217365Sjkim * $FreeBSD: head/sys/kern/kern_mutex.c 97079 2002-05-21 20:34:28Z jhb $
31217365Sjkim */
32217365Sjkim
33217365Sjkim/*
34217365Sjkim * Machine independent bits of mutex implementation.
35217365Sjkim */
36217365Sjkim
37217365Sjkim#include "opt_ddb.h"
38217365Sjkim
39217365Sjkim#include <sys/param.h>
40217365Sjkim#include <sys/systm.h>
41217365Sjkim#include <sys/bus.h>
42217365Sjkim#include <sys/kernel.h>
43100966Siwasaki#include <sys/ktr.h>
44193341Sjkim#include <sys/lock.h>
45193341Sjkim#include <sys/malloc.h>
46193341Sjkim#include <sys/mutex.h>
47193341Sjkim#include <sys/proc.h>
48193341Sjkim#include <sys/resourcevar.h>
49193341Sjkim#include <sys/sbuf.h>
50100966Siwasaki#include <sys/sysctl.h>
51100966Siwasaki#include <sys/vmmeter.h>
52100966Siwasaki
53100966Siwasaki#include <machine/atomic.h>
54102550Siwasaki#include <machine/bus.h>
55100966Siwasaki#include <machine/clock.h>
56100966Siwasaki#include <machine/cpu.h>
57100966Siwasaki
58151937Sjkim#include <ddb/ddb.h>
59100966Siwasaki
60198237Sjkim#include <vm/vm.h>
61198237Sjkim#include <vm/vm_extern.h>
62198237Sjkim
63198237Sjkim/*
64198237Sjkim * Internal utility macros.
65198237Sjkim */
66198237Sjkim#define mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
67198237Sjkim
68198237Sjkim#define mtx_owner(m)	(mtx_unowned((m)) ? NULL \
69198237Sjkim	: (struct thread *)((m)->mtx_lock & MTX_FLAGMASK))
70198237Sjkim
71151937Sjkim/*
72100966Siwasaki * Lock classes for sleep and spin mutexes.
73151937Sjkim */
74151937Sjkimstruct lock_class lock_class_mtx_sleep = {
75151937Sjkim	"sleep mutex",
76151937Sjkim	LC_SLEEPLOCK | LC_RECURSABLE
77151937Sjkim};
78151937Sjkimstruct lock_class lock_class_mtx_spin = {
79151937Sjkim	"spin mutex",
80151937Sjkim	LC_SPINLOCK | LC_RECURSABLE
81151937Sjkim};
82151937Sjkim
83151937Sjkim/*
84151937Sjkim * System-wide mutexes
85151937Sjkim */
86151937Sjkimstruct mtx sched_lock;
87151937Sjkimstruct mtx Giant;
88151937Sjkim
89151937Sjkim/*
90100966Siwasaki * Prototypes for non-exported routines.
91100966Siwasaki */
92100966Siwasakistatic void	propagate_priority(struct thread *);
93100966Siwasaki
94151937Sjkimstatic void
95151937Sjkimpropagate_priority(struct thread *td)
96100966Siwasaki{
97100966Siwasaki	int pri = td->td_priority;
98100966Siwasaki	struct mtx *m = td->td_blocked;
99100966Siwasaki
100241973Sjkim	mtx_assert(&sched_lock, MA_OWNED);
101100966Siwasaki	for (;;) {
102100966Siwasaki		struct thread *td1;
103100966Siwasaki
104100966Siwasaki		td = mtx_owner(m);
105100966Siwasaki
106100966Siwasaki		if (td == NULL) {
107100966Siwasaki			/*
108100966Siwasaki			 * This really isn't quite right. Really
109100966Siwasaki			 * ought to bump priority of thread that
110100966Siwasaki			 * next acquires the mutex.
111100966Siwasaki			 */
112100966Siwasaki			MPASS(m->mtx_lock == MTX_CONTESTED);
113100966Siwasaki			return;
114100966Siwasaki		}
115100966Siwasaki
116100966Siwasaki		MPASS(td->td_proc->p_magic == P_MAGIC);
117100966Siwasaki		KASSERT(td->td_proc->p_stat != SSLEEP, ("sleeping thread owns a mutex"));
118100966Siwasaki		if (td->td_priority <= pri) /* lower is higher priority */
119100966Siwasaki			return;
120167802Sjkim
121100966Siwasaki		/*
122167802Sjkim		 * Bump this thread's priority.
123151937Sjkim		 */
124100966Siwasaki		td->td_priority = pri;
125100966Siwasaki
126100966Siwasaki		/*
127100966Siwasaki		 * If lock holder is actually running, just bump priority.
128100966Siwasaki		 */
129100966Siwasaki		 /* XXXKSE this test is not sufficient */
130100966Siwasaki		if (td->td_kse && (td->td_kse->ke_oncpu != NOCPU)) {
131100966Siwasaki			MPASS(td->td_proc->p_stat == SRUN
132100966Siwasaki			|| td->td_proc->p_stat == SZOMB
133151937Sjkim			|| td->td_proc->p_stat == SSTOP);
134151937Sjkim			return;
135100966Siwasaki		}
136100966Siwasaki
137100966Siwasaki#ifndef SMP
138100966Siwasaki		/*
139100966Siwasaki		 * For UP, we check to see if td is curthread (this shouldn't
140100966Siwasaki		 * ever happen however as it would mean we are in a deadlock.)
141100966Siwasaki		 */
142100966Siwasaki		KASSERT(td != curthread, ("Deadlock detected"));
143100966Siwasaki#endif
144167802Sjkim
145100966Siwasaki		/*
146100966Siwasaki		 * If on run queue move to new run queue, and quit.
147100966Siwasaki		 * XXXKSE this gets a lot more complicated under threads
148100966Siwasaki		 * but try anyhow.
149100966Siwasaki		 */
150100966Siwasaki		if (td->td_proc->p_stat == SRUN) {
151100966Siwasaki			MPASS(td->td_blocked == NULL);
152100966Siwasaki			remrunqueue(td);
153100966Siwasaki			setrunqueue(td);
154100966Siwasaki			return;
155100966Siwasaki		}
156100966Siwasaki
157100966Siwasaki		/*
158100966Siwasaki		 * If we aren't blocked on a mutex, we should be.
159100966Siwasaki		 */
160100966Siwasaki		KASSERT(td->td_proc->p_stat == SMTX, (
161100966Siwasaki		    "process %d(%s):%d holds %s but isn't blocked on a mutex\n",
162100966Siwasaki		    td->td_proc->p_pid, td->td_proc->p_comm, td->td_proc->p_stat,
163100966Siwasaki		    m->mtx_object.lo_name));
164100966Siwasaki
165167802Sjkim		/*
166100966Siwasaki		 * Pick up the mutex that td is blocked on.
167167802Sjkim		 */
168167802Sjkim		m = td->td_blocked;
169167802Sjkim		MPASS(m != NULL);
170167802Sjkim
171167802Sjkim		/*
172100966Siwasaki		 * Check if the thread needs to be moved up on
173100966Siwasaki		 * the blocked chain
174100966Siwasaki		 */
175100966Siwasaki		if (td == TAILQ_FIRST(&m->mtx_blocked)) {
176117521Snjl			continue;
177117521Snjl		}
178100966Siwasaki
179100966Siwasaki		td1 = TAILQ_PREV(td, threadqueue, td_blkq);
180100966Siwasaki		if (td1->td_priority <= pri) {
181100966Siwasaki			continue;
182100966Siwasaki		}
183100966Siwasaki
184100966Siwasaki		/*
185100966Siwasaki		 * Remove thread from blocked chain and determine where
186100966Siwasaki		 * it should be moved up to.  Since we know that td1 has
187100966Siwasaki		 * a lower priority than td, we know that at least one
188100966Siwasaki		 * thread in the chain has a lower priority and that
189100966Siwasaki		 * td1 will thus not be NULL after the loop.
190100966Siwasaki		 */
191100966Siwasaki		TAILQ_REMOVE(&m->mtx_blocked, td, td_blkq);
192100966Siwasaki		TAILQ_FOREACH(td1, &m->mtx_blocked, td_blkq) {
193100966Siwasaki			MPASS(td1->td_proc->p_magic == P_MAGIC);
194100966Siwasaki			if (td1->td_priority > pri)
195100966Siwasaki				break;
196100966Siwasaki		}
197100966Siwasaki
198100966Siwasaki		MPASS(td1 != NULL);
199100966Siwasaki		TAILQ_INSERT_BEFORE(td1, td, td_blkq);
200100966Siwasaki		CTR4(KTR_LOCK,
201100966Siwasaki		    "propagate_priority: p %p moved before %p on [%p] %s",
202100966Siwasaki		    td, td1, m, m->mtx_object.lo_name);
203100966Siwasaki	}
204100966Siwasaki}
205100966Siwasaki
206100966Siwasaki#ifdef MUTEX_PROFILING
207100966SiwasakiSYSCTL_NODE(_debug, OID_AUTO, mutex, CTLFLAG_RD, NULL, "mutex debugging");
208100966SiwasakiSYSCTL_NODE(_debug_mutex, OID_AUTO, prof, CTLFLAG_RD, NULL, "mutex profiling");
209100966Siwasakistatic int mutex_prof_enable = 0;
210100966SiwasakiSYSCTL_INT(_debug_mutex_prof, OID_AUTO, enable, CTLFLAG_RW,
211100966Siwasaki    &mutex_prof_enable, 0, "Enable tracing of mutex holdtime");
212100966Siwasaki
213100966Siwasakistruct mutex_prof {
214100966Siwasaki	const char *name;
215100966Siwasaki	const char *file;
216100966Siwasaki	int line;
217100966Siwasaki#define MPROF_MAX 0
218100966Siwasaki#define MPROF_TOT 1
219100966Siwasaki#define MPROF_CNT 2
220100966Siwasaki#define MPROF_AVG 3
221100966Siwasaki	u_int64_t counter[4];
222100966Siwasaki	struct mutex_prof *next;
223100966Siwasaki};
224100966Siwasaki
225100966Siwasaki/*
226100966Siwasaki * mprof_buf is a static pool of profiling records to avoid possible
227100966Siwasaki * reentrance of the memory allocation functions.
228100966Siwasaki *
229100966Siwasaki * Note: NUM_MPROF_BUFFERS must be smaller than MPROF_HASH_SIZE.
230100966Siwasaki */
231100966Siwasaki#define NUM_MPROF_BUFFERS 1000
232100966Siwasakistatic struct mutex_prof mprof_buf[NUM_MPROF_BUFFERS];
233100966Siwasakistatic int first_free_mprof_buf;
234100966Siwasaki#define MPROF_HASH_SIZE 1009
235100966Siwasakistatic struct mutex_prof *mprof_hash[MPROF_HASH_SIZE];
236100966Siwasaki
237100966Siwasakistatic int mutex_prof_acquisitions;
238100966SiwasakiSYSCTL_INT(_debug_mutex_prof, OID_AUTO, acquisitions, CTLFLAG_RD,
239100966Siwasaki    &mutex_prof_acquisitions, 0, "Number of mutex acquistions recorded");
240100966Siwasakistatic int mutex_prof_records;
241100966SiwasakiSYSCTL_INT(_debug_mutex_prof, OID_AUTO, records, CTLFLAG_RD,
242100966Siwasaki    &mutex_prof_records, 0, "Number of profiling records");
243100966Siwasakistatic int mutex_prof_maxrecords = NUM_MPROF_BUFFERS;
244100966SiwasakiSYSCTL_INT(_debug_mutex_prof, OID_AUTO, maxrecords, CTLFLAG_RD,
245100966Siwasaki    &mutex_prof_maxrecords, 0, "Maximum number of profiling records");
246100966Siwasakistatic int mutex_prof_rejected;
247151937SjkimSYSCTL_INT(_debug_mutex_prof, OID_AUTO, rejected, CTLFLAG_RD,
248100966Siwasaki    &mutex_prof_rejected, 0, "Number of rejected profiling records");
249100966Siwasakistatic int mutex_prof_hashsize = MPROF_HASH_SIZE;
250100966SiwasakiSYSCTL_INT(_debug_mutex_prof, OID_AUTO, hashsize, CTLFLAG_RD,
251100966Siwasaki    &mutex_prof_hashsize, 0, "Hash size");
252100966Siwasakistatic int mutex_prof_collisions = 0;
253151937SjkimSYSCTL_INT(_debug_mutex_prof, OID_AUTO, collisions, CTLFLAG_RD,
254100966Siwasaki    &mutex_prof_collisions, 0, "Number of hash collisions");
255100966Siwasaki
256100966Siwasaki/*
257100966Siwasaki * mprof_mtx protects the profiling buffers and the hash.
258100966Siwasaki */
259100966Siwasakistatic struct mtx mprof_mtx;
260100966SiwasakiMTX_SYSINIT(mprof, &mprof_mtx, "mutex profiling lock", MTX_SPIN | MTX_QUIET);
261100966Siwasaki
262100966Siwasakistatic u_int64_t
263100966Siwasakinanoseconds(void)
264100966Siwasaki{
265100966Siwasaki	struct timespec tv;
266100966Siwasaki
267100966Siwasaki	nanotime(&tv);
268100966Siwasaki	return (tv.tv_sec * (u_int64_t)1000000000 + tv.tv_nsec);
269100966Siwasaki}
270100966Siwasaki
271100966Siwasakistatic int
272100966Siwasakidump_mutex_prof_stats(SYSCTL_HANDLER_ARGS)
273100966Siwasaki{
274100966Siwasaki	struct sbuf *sb;
275100966Siwasaki	int error, i;
276100966Siwasaki
277100966Siwasaki	if (first_free_mprof_buf == 0)
278100966Siwasaki		return SYSCTL_OUT(req, "No locking recorded",
279100966Siwasaki		    sizeof("No locking recorded"));
280100966Siwasaki
281100966Siwasaki	sb = sbuf_new(NULL, NULL, 1024, SBUF_AUTOEXTEND);
282100966Siwasaki	sbuf_printf(sb, "%12s %12s %12s %12s %s\n",
283100966Siwasaki	    "max", "total", "count", "average", "name");
284100966Siwasaki	mtx_lock_spin(&mprof_mtx);
285100966Siwasaki	for (i = 0; i < first_free_mprof_buf; ++i)
286100966Siwasaki		sbuf_printf(sb, "%12llu %12llu %12llu %12llu %s:%d (%s)\n",
287272444Sjkim		    mprof_buf[i].counter[MPROF_MAX] / 1000,
288278970Sjkim		    mprof_buf[i].counter[MPROF_TOT] / 1000,
289278970Sjkim		    mprof_buf[i].counter[MPROF_CNT],
290100966Siwasaki		    mprof_buf[i].counter[MPROF_AVG] / 1000,
291100966Siwasaki		    mprof_buf[i].file, mprof_buf[i].line, mprof_buf[i].name);
292100966Siwasaki	mtx_unlock_spin(&mprof_mtx);
293100966Siwasaki	sbuf_finish(sb);
294100966Siwasaki	error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
295100966Siwasaki	sbuf_delete(sb);
296100966Siwasaki	return (error);
297100966Siwasaki}
298100966SiwasakiSYSCTL_PROC(_debug_mutex_prof, OID_AUTO, stats, CTLTYPE_STRING|CTLFLAG_RD,
299100966Siwasaki    NULL, 0, dump_mutex_prof_stats, "A", "Mutex profiling statistics");
300100966Siwasaki#endif
301100966Siwasaki
302100966Siwasaki/*
303100966Siwasaki * Function versions of the inlined __mtx_* macros.  These are used by
304100966Siwasaki * modules and can also be called from assembly language if needed.
305278970Sjkim */
306278970Sjkimvoid
307278970Sjkim_mtx_lock_flags(struct mtx *m, int opts, const char *file, int line)
308278970Sjkim{
309278970Sjkim
310278970Sjkim	MPASS(curthread != NULL);
311278970Sjkim	_get_sleep_lock(m, curthread, opts, file, line);
312278970Sjkim	LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file,
313278970Sjkim	    line);
314278970Sjkim	WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
315278970Sjkim#ifdef MUTEX_PROFILING
316285797Sjkim	/* don't reset the timer when/if recursing */
317285797Sjkim	if (m->acqtime == 0) {
318100966Siwasaki		m->file = file;
319100966Siwasaki		m->line = line;
320100966Siwasaki		m->acqtime = mutex_prof_enable ? nanoseconds() : 0;
321100966Siwasaki		++mutex_prof_acquisitions;
322100966Siwasaki	}
323100966Siwasaki#endif
324100966Siwasaki}
325100966Siwasaki
326100966Siwasakivoid
327100966Siwasaki_mtx_unlock_flags(struct mtx *m, int opts, const char *file, int line)
328100966Siwasaki{
329100966Siwasaki
330100966Siwasaki	MPASS(curthread != NULL);
331100966Siwasaki	mtx_assert(m, MA_OWNED);
332100966Siwasaki#ifdef MUTEX_PROFILING
333100966Siwasaki	if (m->acqtime != 0) {
334100966Siwasaki		static const char *unknown = "(unknown)";
335100966Siwasaki		struct mutex_prof *mpp;
336100966Siwasaki		u_int64_t acqtime, now;
337151937Sjkim		const char *p, *q;
338100966Siwasaki		volatile u_int hash;
339100966Siwasaki
340100966Siwasaki		now = nanoseconds();
341100966Siwasaki		acqtime = m->acqtime;
342100966Siwasaki		m->acqtime = 0;
343100966Siwasaki		if (now <= acqtime)
344100966Siwasaki			goto out;
345100966Siwasaki		for (p = file; strncmp(p, "../", 3) == 0; p += 3)
346100966Siwasaki			/* nothing */ ;
347100966Siwasaki		if (p == NULL || *p == '\0')
348100966Siwasaki			p = unknown;
349100966Siwasaki		for (hash = line, q = p; *q != '\0'; ++q)
350100966Siwasaki			hash = (hash * 2 + *q) % MPROF_HASH_SIZE;
351100966Siwasaki		mtx_lock_spin(&mprof_mtx);
352100966Siwasaki		for (mpp = mprof_hash[hash]; mpp != NULL; mpp = mpp->next)
353100966Siwasaki			if (mpp->line == line && strcmp(mpp->file, p) == 0)
354100966Siwasaki				break;
355100966Siwasaki		if (mpp == NULL) {
356100966Siwasaki			/* Just exit if we cannot get a trace buffer */
357100966Siwasaki			if (first_free_mprof_buf >= NUM_MPROF_BUFFERS) {
358100966Siwasaki				++mutex_prof_rejected;
359100966Siwasaki				goto unlock;
360100966Siwasaki			}
361100966Siwasaki			mpp = &mprof_buf[first_free_mprof_buf++];
362100966Siwasaki			mpp->name = mtx_name(m);
363100966Siwasaki			mpp->file = p;
364100966Siwasaki			mpp->line = line;
365100966Siwasaki			mpp->next = mprof_hash[hash];
366100966Siwasaki			if (mprof_hash[hash] != NULL)
367100966Siwasaki				++mutex_prof_collisions;
368100966Siwasaki			mprof_hash[hash] = mpp;
369100966Siwasaki			++mutex_prof_records;
370100966Siwasaki		}
371151937Sjkim		/*
372100966Siwasaki		 * Record if the mutex has been held longer now than ever
373100966Siwasaki		 * before
374100966Siwasaki		 */
375100966Siwasaki		if ((now - acqtime) > mpp->counter[MPROF_MAX])
376100966Siwasaki			mpp->counter[MPROF_MAX] = now - acqtime;
377100966Siwasaki		mpp->counter[MPROF_TOT] += now - acqtime;
378100966Siwasaki		mpp->counter[MPROF_CNT] += 1;
379100966Siwasaki		mpp->counter[MPROF_AVG] =
380100966Siwasaki		    mpp->counter[MPROF_TOT] / mpp->counter[MPROF_CNT];
381100966Siwasakiunlock:
382100966Siwasaki		mtx_unlock_spin(&mprof_mtx);
383100966Siwasaki	}
384100966Siwasakiout:
385100966Siwasaki#endif
386100966Siwasaki 	WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
387100966Siwasaki	LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file,
388100966Siwasaki	    line);
389100966Siwasaki	_rel_sleep_lock(m, curthread, opts, file, line);
390100966Siwasaki}
391100966Siwasaki
392100966Siwasakivoid
393100966Siwasaki_mtx_lock_spin_flags(struct mtx *m, int opts, const char *file, int line)
394100966Siwasaki{
395100966Siwasaki
396100966Siwasaki	MPASS(curthread != NULL);
397100966Siwasaki#if defined(SMP) || LOCK_DEBUG > 0
398100966Siwasaki	_get_spin_lock(m, curthread, opts, file, line);
399100966Siwasaki#else
400102550Siwasaki	critical_enter();
401100966Siwasaki#endif
402100966Siwasaki	LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file,
403100966Siwasaki	    line);
404100966Siwasaki	WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
405151937Sjkim}
406100966Siwasaki
407100966Siwasakivoid
408100966Siwasaki_mtx_unlock_spin_flags(struct mtx *m, int opts, const char *file, int line)
409100966Siwasaki{
410100966Siwasaki
411100966Siwasaki	MPASS(curthread != NULL);
412100966Siwasaki	mtx_assert(m, MA_OWNED);
413100966Siwasaki 	WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
414100966Siwasaki	LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file,
415285797Sjkim	    line);
416100966Siwasaki#if defined(SMP) || LOCK_DEBUG > 0
417100966Siwasaki	_rel_spin_lock(m);
418285797Sjkim#else
419285797Sjkim	critical_exit();
420285797Sjkim#endif
421285797Sjkim}
422285797Sjkim
423285797Sjkim/*
424285797Sjkim * The important part of mtx_trylock{,_flags}()
425285797Sjkim * Tries to acquire lock `m.' We do NOT handle recursion here; we assume that
426285797Sjkim * if we're called, it's because we know we don't already own this lock.
427285797Sjkim */
428285797Sjkimint
429285797Sjkim_mtx_trylock(struct mtx *m, int opts, const char *file, int line)
430285797Sjkim{
431285797Sjkim	int rval;
432285797Sjkim
433100966Siwasaki	MPASS(curthread != NULL);
434100966Siwasaki
435100966Siwasaki	rval = _obtain_lock(m, curthread);
436100966Siwasaki
437100966Siwasaki	LOCK_LOG_TRY("LOCK", &m->mtx_object, opts, rval, file, line);
438100966Siwasaki	if (rval) {
439100966Siwasaki		/*
440128212Snjl		 * We do not handle recursion in _mtx_trylock; see the
441100966Siwasaki		 * note at the top of the routine.
442100966Siwasaki		 */
443100966Siwasaki		KASSERT(!mtx_recursed(m),
444100966Siwasaki		    ("mtx_trylock() called on a recursed mutex"));
445100966Siwasaki		WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK,
446151937Sjkim		    file, line);
447151937Sjkim	}
448285797Sjkim
449285797Sjkim	return (rval);
450151937Sjkim}
451151937Sjkim
452151937Sjkim/*
453285797Sjkim * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
454151937Sjkim *
455100966Siwasaki * We call this if the lock is either contested (i.e. we need to go to
456100966Siwasaki * sleep waiting for it), or if we need to recurse on it.
457100966Siwasaki */
458128212Snjlvoid
459128212Snjl_mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line)
460100966Siwasaki{
461128212Snjl	struct thread *td = curthread;
462128212Snjl
463128212Snjl	if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)td) {
464198237Sjkim		m->mtx_recurse++;
465100966Siwasaki		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
466100966Siwasaki		if (LOCK_LOG_TEST(&m->mtx_object, opts))
467100966Siwasaki			CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
468100966Siwasaki		return;
469100966Siwasaki	}
470100966Siwasaki
471100966Siwasaki	if (LOCK_LOG_TEST(&m->mtx_object, opts))
472151937Sjkim		CTR4(KTR_LOCK,
473151937Sjkim		    "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d",
474151937Sjkim		    m->mtx_object.lo_name, (void *)m->mtx_lock, file, line);
475151937Sjkim
476245582Sjkim	while (!_obtain_lock(m, td)) {
477245582Sjkim		uintptr_t v;
478245582Sjkim		struct thread *td1;
479245582Sjkim
480245582Sjkim		mtx_lock_spin(&sched_lock);
481245582Sjkim		/*
482245582Sjkim		 * Check if the lock has been released while spinning for
483245582Sjkim		 * the sched_lock.
484245582Sjkim		 */
485245582Sjkim		if ((v = m->mtx_lock) == MTX_UNOWNED) {
486245582Sjkim			mtx_unlock_spin(&sched_lock);
487245582Sjkim			continue;
488245582Sjkim		}
489245582Sjkim
490245582Sjkim		/*
491245582Sjkim		 * The mutex was marked contested on release. This means that
492245582Sjkim		 * there are threads blocked on it.
493245582Sjkim		 */
494245582Sjkim		if (v == MTX_CONTESTED) {
495245582Sjkim			td1 = TAILQ_FIRST(&m->mtx_blocked);
496250838Sjkim			MPASS(td1 != NULL);
497245582Sjkim			m->mtx_lock = (uintptr_t)td | MTX_CONTESTED;
498245582Sjkim
499245582Sjkim			if (td1->td_priority < td->td_priority)
500245582Sjkim				td->td_priority = td1->td_priority;
501167802Sjkim			mtx_unlock_spin(&sched_lock);
502167802Sjkim			return;
503100966Siwasaki		}
504100966Siwasaki
505167802Sjkim		/*
506167802Sjkim		 * If the mutex isn't already contested and a failure occurs
507167802Sjkim		 * setting the contested bit, the mutex was either released
508167802Sjkim		 * or the state of the MTX_RECURSED bit changed.
509167802Sjkim		 */
510167802Sjkim		if ((v & MTX_CONTESTED) == 0 &&
511167802Sjkim		    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
512278970Sjkim			(void *)(v | MTX_CONTESTED))) {
513167802Sjkim			mtx_unlock_spin(&sched_lock);
514167802Sjkim			continue;
515167802Sjkim		}
516167802Sjkim
517167802Sjkim		/*
518167802Sjkim		 * We definitely must sleep for this lock.
519278970Sjkim		 */
520278970Sjkim		mtx_assert(m, MA_NOTOWNED);
521278970Sjkim
522278970Sjkim#ifdef notyet
523278970Sjkim		/*
524278970Sjkim		 * If we're borrowing an interrupted thread's VM context, we
525278970Sjkim		 * must clean up before going to sleep.
526100966Siwasaki		 */
527100966Siwasaki		if (td->td_ithd != NULL) {
528100966Siwasaki			struct ithd *it = td->td_ithd;
529100966Siwasaki
530228110Sjkim			if (it->it_interrupted) {
531228110Sjkim				if (LOCK_LOG_TEST(&m->mtx_object, opts))
532167802Sjkim					CTR2(KTR_LOCK,
533167802Sjkim				    "_mtx_lock_sleep: %p interrupted %p",
534167802Sjkim					    it, it->it_interrupted);
535167802Sjkim				intr_thd_fixup(it);
536100966Siwasaki			}
537100966Siwasaki		}
538100966Siwasaki#endif
539100966Siwasaki
540100966Siwasaki		/*
541100966Siwasaki		 * Put us on the list of threads blocked on this mutex.
542117521Snjl		 */
543117521Snjl		if (TAILQ_EMPTY(&m->mtx_blocked)) {
544100966Siwasaki			td1 = mtx_owner(m);
545100966Siwasaki			LIST_INSERT_HEAD(&td1->td_contested, m, mtx_contested);
546102550Siwasaki			TAILQ_INSERT_TAIL(&m->mtx_blocked, td, td_blkq);
547100966Siwasaki		} else {
548100966Siwasaki			TAILQ_FOREACH(td1, &m->mtx_blocked, td_blkq)
549100966Siwasaki				if (td1->td_priority > td->td_priority)
550100966Siwasaki					break;
551100966Siwasaki			if (td1)
552100966Siwasaki				TAILQ_INSERT_BEFORE(td1, td, td_blkq);
553100966Siwasaki			else
554100966Siwasaki				TAILQ_INSERT_TAIL(&m->mtx_blocked, td, td_blkq);
555100966Siwasaki		}
556117521Snjl
557117521Snjl		/*
558100966Siwasaki		 * Save who we're blocked on.
559100966Siwasaki		 */
560100966Siwasaki		td->td_blocked = m;
561100966Siwasaki		td->td_mtxname = m->mtx_object.lo_name;
562100966Siwasaki		td->td_proc->p_stat = SMTX;
563100966Siwasaki		propagate_priority(td);
564100966Siwasaki
565100966Siwasaki		if (LOCK_LOG_TEST(&m->mtx_object, opts))
566100966Siwasaki			CTR3(KTR_LOCK,
567100966Siwasaki			    "_mtx_lock_sleep: p %p blocked on [%p] %s", td, m,
568100966Siwasaki			    m->mtx_object.lo_name);
569100966Siwasaki
570100966Siwasaki		td->td_proc->p_stats->p_ru.ru_nvcsw++;
571100966Siwasaki		mi_switch();
572100966Siwasaki
573100966Siwasaki		if (LOCK_LOG_TEST(&m->mtx_object, opts))
574100966Siwasaki			CTR3(KTR_LOCK,
575100966Siwasaki			  "_mtx_lock_sleep: p %p free from blocked on [%p] %s",
576100966Siwasaki			  td, m, m->mtx_object.lo_name);
577100966Siwasaki
578100966Siwasaki		mtx_unlock_spin(&sched_lock);
579100966Siwasaki	}
580193267Sjkim
581100966Siwasaki	return;
582100966Siwasaki}
583100966Siwasaki
584100966Siwasaki/*
585281396Sjkim * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock.
586100966Siwasaki *
587100966Siwasaki * This is only called if we need to actually spin for the lock. Recursion
588100966Siwasaki * is handled inline.
589100966Siwasaki */
590100966Siwasakivoid
591100966Siwasaki_mtx_lock_spin(struct mtx *m, int opts, const char *file, int line)
592100966Siwasaki{
593100966Siwasaki	int i = 0;
594100966Siwasaki
595100966Siwasaki	if (LOCK_LOG_TEST(&m->mtx_object, opts))
596100966Siwasaki		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
597100966Siwasaki
598100966Siwasaki	for (;;) {
599237412Sjkim		if (_obtain_lock(m, curthread))
600237412Sjkim			break;
601237412Sjkim
602237412Sjkim		/* Give interrupts a chance while we spin. */
603100966Siwasaki		critical_exit();
604100966Siwasaki		while (m->mtx_lock != MTX_UNOWNED) {
605100966Siwasaki			if (i++ < 10000000)
606100966Siwasaki				continue;
607100966Siwasaki			if (i++ < 60000000)
608100966Siwasaki				DELAY(1);
609272444Sjkim#ifdef DDB
610100966Siwasaki			else if (!db_active)
611100966Siwasaki#else
612100966Siwasaki			else
613100966Siwasaki#endif
614100966Siwasaki			panic("spin lock %s held by %p for > 5 seconds",
615100966Siwasaki			    m->mtx_object.lo_name, (void *)m->mtx_lock);
616100966Siwasaki		}
617100966Siwasaki		critical_enter();
618100966Siwasaki	}
619100966Siwasaki
620100966Siwasaki	if (LOCK_LOG_TEST(&m->mtx_object, opts))
621100966Siwasaki		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
622100966Siwasaki
623100966Siwasaki	return;
624100966Siwasaki}
625100966Siwasaki
626100966Siwasaki/*
627100966Siwasaki * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
628100966Siwasaki *
629100966Siwasaki * We are only called here if the lock is recursed or contested (i.e. we
630100966Siwasaki * need to wake up a blocked thread).
631100966Siwasaki */
632100966Siwasakivoid
633100966Siwasaki_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
634100966Siwasaki{
635100966Siwasaki	struct thread *td, *td1;
636100966Siwasaki	struct mtx *m1;
637100966Siwasaki	int pri;
638100966Siwasaki
639100966Siwasaki	td = curthread;
640100966Siwasaki
641100966Siwasaki	if (mtx_recursed(m)) {
642100966Siwasaki		if (--(m->mtx_recurse) == 0)
643100966Siwasaki			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
644100966Siwasaki		if (LOCK_LOG_TEST(&m->mtx_object, opts))
645100966Siwasaki			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m);
646167802Sjkim		return;
647100966Siwasaki	}
648100966Siwasaki
649100966Siwasaki	mtx_lock_spin(&sched_lock);
650100966Siwasaki	if (LOCK_LOG_TEST(&m->mtx_object, opts))
651100966Siwasaki		CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
652100966Siwasaki
653100966Siwasaki	td1 = TAILQ_FIRST(&m->mtx_blocked);
654100966Siwasaki	MPASS(td->td_proc->p_magic == P_MAGIC);
655100966Siwasaki	MPASS(td1->td_proc->p_magic == P_MAGIC);
656100966Siwasaki
657100966Siwasaki	TAILQ_REMOVE(&m->mtx_blocked, td1, td_blkq);
658100966Siwasaki
659100966Siwasaki	if (TAILQ_EMPTY(&m->mtx_blocked)) {
660100966Siwasaki		LIST_REMOVE(m, mtx_contested);
661100966Siwasaki		_release_lock_quick(m);
662100966Siwasaki		if (LOCK_LOG_TEST(&m->mtx_object, opts))
663100966Siwasaki			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p not held", m);
664100966Siwasaki	} else
665237412Sjkim		atomic_store_rel_ptr(&m->mtx_lock, (void *)MTX_CONTESTED);
666237412Sjkim
667100966Siwasaki	pri = PRI_MAX;
668100966Siwasaki	LIST_FOREACH(m1, &td->td_contested, mtx_contested) {
669100966Siwasaki		int cp = TAILQ_FIRST(&m1->mtx_blocked)->td_priority;
670100966Siwasaki		if (cp < pri)
671100966Siwasaki			pri = cp;
672100966Siwasaki	}
673100966Siwasaki
674100966Siwasaki	if (pri > td->td_base_pri)
675100966Siwasaki		pri = td->td_base_pri;
676100966Siwasaki	td->td_priority = pri;
677100966Siwasaki
678100966Siwasaki	if (LOCK_LOG_TEST(&m->mtx_object, opts))
679100966Siwasaki		CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p contested setrunqueue %p",
680100966Siwasaki		    m, td1);
681100966Siwasaki
682100966Siwasaki	td1->td_blocked = NULL;
683100966Siwasaki	td1->td_proc->p_stat = SRUN;
684100966Siwasaki	setrunqueue(td1);
685100966Siwasaki
686100966Siwasaki	if (td->td_critnest == 1 && td1->td_priority < pri) {
687100966Siwasaki#ifdef notyet
688100966Siwasaki		if (td->td_ithd != NULL) {
689100966Siwasaki			struct ithd *it = td->td_ithd;
690167802Sjkim
691100966Siwasaki			if (it->it_interrupted) {
692100966Siwasaki				if (LOCK_LOG_TEST(&m->mtx_object, opts))
693100966Siwasaki					CTR2(KTR_LOCK,
694100966Siwasaki				    "_mtx_unlock_sleep: %p interrupted %p",
695100966Siwasaki					    it, it->it_interrupted);
696100966Siwasaki				intr_thd_fixup(it);
697167802Sjkim			}
698167802Sjkim		}
699167802Sjkim#endif
700167802Sjkim		setrunqueue(td);
701167802Sjkim		if (LOCK_LOG_TEST(&m->mtx_object, opts))
702167802Sjkim			CTR2(KTR_LOCK,
703167802Sjkim			    "_mtx_unlock_sleep: %p switching out lock=%p", m,
704167802Sjkim			    (void *)m->mtx_lock);
705100966Siwasaki
706167802Sjkim		td->td_proc->p_stats->p_ru.ru_nivcsw++;
707237412Sjkim		mi_switch();
708237412Sjkim		if (LOCK_LOG_TEST(&m->mtx_object, opts))
709167802Sjkim			CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p",
710167802Sjkim			    m, (void *)m->mtx_lock);
711167802Sjkim	}
712100966Siwasaki
713100966Siwasaki	mtx_unlock_spin(&sched_lock);
714100966Siwasaki
715100966Siwasaki	return;
716100966Siwasaki}
717100966Siwasaki
718167802Sjkim/*
719100966Siwasaki * All the unlocking of MTX_SPIN locks is done inline.
720100966Siwasaki * See the _rel_spin_lock() macro for the details.
721100966Siwasaki */
722100966Siwasaki
723100966Siwasaki/*
724100966Siwasaki * The backing function for the INVARIANTS-enabled mtx_assert()
725100966Siwasaki */
726100966Siwasaki#ifdef INVARIANT_SUPPORT
727100966Siwasakivoid
728100966Siwasaki_mtx_assert(struct mtx *m, int what, const char *file, int line)
729100966Siwasaki{
730100966Siwasaki
731100966Siwasaki	if (panicstr != NULL)
732100966Siwasaki		return;
733100966Siwasaki	switch (what) {
734100966Siwasaki	case MA_OWNED:
735100966Siwasaki	case MA_OWNED | MA_RECURSED:
736100966Siwasaki	case MA_OWNED | MA_NOTRECURSED:
737100966Siwasaki		if (!mtx_owned(m))
738100966Siwasaki			panic("mutex %s not owned at %s:%d",
739100966Siwasaki			    m->mtx_object.lo_name, file, line);
740100966Siwasaki		if (mtx_recursed(m)) {
741100966Siwasaki			if ((what & MA_NOTRECURSED) != 0)
742100966Siwasaki				panic("mutex %s recursed at %s:%d",
743100966Siwasaki				    m->mtx_object.lo_name, file, line);
744100966Siwasaki		} else if ((what & MA_RECURSED) != 0) {
745100966Siwasaki			panic("mutex %s unrecursed at %s:%d",
746100966Siwasaki			    m->mtx_object.lo_name, file, line);
747100966Siwasaki		}
748100966Siwasaki		break;
749237412Sjkim	case MA_NOTOWNED:
750237412Sjkim		if (mtx_owned(m))
751100966Siwasaki			panic("mutex %s owned at %s:%d",
752100966Siwasaki			    m->mtx_object.lo_name, file, line);
753100966Siwasaki		break;
754237412Sjkim	default:
755237412Sjkim		panic("unknown mtx_assert at %s:%d", file, line);
756237412Sjkim	}
757237412Sjkim}
758237412Sjkim#endif
759237412Sjkim
760237412Sjkim/*
761100966Siwasaki * The MUTEX_DEBUG-enabled mtx_validate()
762100966Siwasaki *
763100966Siwasaki * Most of these checks have been moved off into the LO_INITIALIZED flag
764100966Siwasaki * maintained by the witness code.
765100966Siwasaki */
766100966Siwasaki#ifdef MUTEX_DEBUG
767100966Siwasaki
768100966Siwasakivoid	mtx_validate(struct mtx *);
769100966Siwasaki
770100966Siwasakivoid
771100966Siwasakimtx_validate(struct mtx *m)
772100966Siwasaki{
773100966Siwasaki
774100966Siwasaki/*
775100966Siwasaki * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
776100966Siwasaki * we can re-enable the kernacc() checks.
777100966Siwasaki */
778100966Siwasaki#ifndef __alpha__
779100966Siwasaki	/*
780100966Siwasaki	 * Can't call kernacc() from early init386(), especially when
781100966Siwasaki	 * initializing Giant mutex, because some stuff in kernacc()
782100966Siwasaki	 * requires Giant itself.
783100966Siwasaki	 */
784100966Siwasaki	if (!cold)
785100966Siwasaki		if (!kernacc((caddr_t)m, sizeof(m),
786237412Sjkim		    VM_PROT_READ | VM_PROT_WRITE))
787100966Siwasaki			panic("Can't read and write to mutex %p", m);
788100966Siwasaki#endif
789100966Siwasaki}
790100966Siwasaki#endif
791100966Siwasaki
792100966Siwasaki/*
793100966Siwasaki * General init routine used by the MTX_SYSINIT() macro.
794100966Siwasaki */
795100966Siwasakivoid
796100966Siwasakimtx_sysinit(void *arg)
797100966Siwasaki{
798100966Siwasaki	struct mtx_args *margs = arg;
799100966Siwasaki
800100966Siwasaki	mtx_init(margs->ma_mtx, margs->ma_desc, NULL, margs->ma_opts);
801100966Siwasaki}
802100966Siwasaki
803100966Siwasaki/*
804100966Siwasaki * Mutex initialization routine; initialize lock `m' of type contained in
805100966Siwasaki * `opts' with options contained in `opts' and name `name.'  The optional
806100966Siwasaki * lock type `type' is used as a general lock category name for use with
807100966Siwasaki * witness.
808100966Siwasaki */
809100966Siwasakivoid
810100966Siwasakimtx_init(struct mtx *m, const char *name, const char *type, int opts)
811100966Siwasaki{
812100966Siwasaki	struct lock_object *lock;
813100966Siwasaki
814100966Siwasaki	MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE |
815100966Siwasaki	    MTX_SLEEPABLE | MTX_NOWITNESS | MTX_DUPOK)) == 0);
816100966Siwasaki
817100966Siwasaki#ifdef MUTEX_DEBUG
818100966Siwasaki	/* Diagnostic and error correction */
819100966Siwasaki	mtx_validate(m);
820100966Siwasaki#endif
821100966Siwasaki
822100966Siwasaki	lock = &m->mtx_object;
823100966Siwasaki	KASSERT((lock->lo_flags & LO_INITIALIZED) == 0,
824100966Siwasaki	    ("mutex %s %p already initialized", name, m));
825100966Siwasaki	bzero(m, sizeof(*m));
826241973Sjkim	if (opts & MTX_SPIN)
827100966Siwasaki		lock->lo_class = &lock_class_mtx_spin;
828100966Siwasaki	else
829100966Siwasaki		lock->lo_class = &lock_class_mtx_sleep;
830151937Sjkim	lock->lo_name = name;
831100966Siwasaki	lock->lo_type = type != NULL ? type : name;
832100966Siwasaki	if (opts & MTX_QUIET)
833100966Siwasaki		lock->lo_flags = LO_QUIET;
834100966Siwasaki	if (opts & MTX_RECURSE)
835100966Siwasaki		lock->lo_flags |= LO_RECURSABLE;
836100966Siwasaki	if (opts & MTX_SLEEPABLE)
837237412Sjkim		lock->lo_flags |= LO_SLEEPABLE;
838100966Siwasaki	if ((opts & MTX_NOWITNESS) == 0)
839100966Siwasaki		lock->lo_flags |= LO_WITNESS;
840100966Siwasaki	if (opts & MTX_DUPOK)
841100966Siwasaki		lock->lo_flags |= LO_DUPOK;
842100966Siwasaki
843100966Siwasaki	m->mtx_lock = MTX_UNOWNED;
844100966Siwasaki	TAILQ_INIT(&m->mtx_blocked);
845100966Siwasaki
846100966Siwasaki	LOCK_LOG_INIT(lock, opts);
847100966Siwasaki
848100966Siwasaki	WITNESS_INIT(lock);
849100966Siwasaki}
850100966Siwasaki
851100966Siwasaki/*
852100966Siwasaki * Remove lock `m' from all_mtx queue.  We don't allow MTX_QUIET to be
853100966Siwasaki * passed in as a flag here because if the corresponding mtx_init() was
854100966Siwasaki * called with MTX_QUIET set, then it will already be set in the mutex's
855100966Siwasaki * flags.
856100966Siwasaki */
857100966Siwasakivoid
858100966Siwasakimtx_destroy(struct mtx *m)
859278970Sjkim{
860100966Siwasaki
861278970Sjkim	LOCK_LOG_DESTROY(&m->mtx_object, 0);
862100966Siwasaki
863237412Sjkim	if (!mtx_owned(m))
864237412Sjkim		MPASS(mtx_unowned(m));
865237412Sjkim	else {
866237412Sjkim		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
867237412Sjkim
868237412Sjkim		/* Tell witness this isn't locked to make it happy. */
869237412Sjkim		WITNESS_UNLOCK(&m->mtx_object, LOP_EXCLUSIVE, __FILE__,
870237412Sjkim		    __LINE__);
871237412Sjkim	}
872237412Sjkim
873237412Sjkim	WITNESS_DESTROY(&m->mtx_object);
874237412Sjkim}
875237412Sjkim
876272444Sjkim/*
877272444Sjkim * Intialize the mutex code and system mutexes.  This is called from the MD
878272444Sjkim * startup code prior to mi_startup().  The per-CPU data space needs to be
879272444Sjkim * setup before this is called.
880272444Sjkim */
881272444Sjkimvoid
882272444Sjkimmutex_init(void)
883272444Sjkim{
884272444Sjkim
885100966Siwasaki	/* Setup thread0 so that mutexes work. */
886100966Siwasaki	LIST_INIT(&thread0.td_contested);
887100966Siwasaki
888100966Siwasaki	/*
889100966Siwasaki	 * Initialize mutexes.
890100966Siwasaki	 */
891100966Siwasaki	mtx_init(&Giant, "Giant", NULL, MTX_DEF | MTX_RECURSE);
892100966Siwasaki	mtx_init(&sched_lock, "sched lock", NULL, MTX_SPIN | MTX_RECURSE);
893151937Sjkim	mtx_init(&proc0.p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK);
894151937Sjkim	mtx_lock(&Giant);
895151937Sjkim}
896151937Sjkim
897167802Sjkim/*
898167802Sjkim * Encapsulated Giant mutex routines.  These routines provide encapsulation
899167802Sjkim * control for the Giant mutex, allowing sysctls to be used to turn on and
900167802Sjkim * off Giant around certain subsystems.  The default value for the sysctls
901100966Siwasaki * are set to what developers believe is stable and working in regards to
902100966Siwasaki * the Giant pushdown.  Developers should not turn off Giant via these
903100966Siwasaki * sysctls unless they know what they are doing.
904100966Siwasaki *
905100966Siwasaki * Callers of mtx_lock_giant() are expected to pass the return value to an
906100966Siwasaki * accompanying mtx_unlock_giant() later on.  If multiple subsystems are
907100966Siwasaki * effected by a Giant wrap, all related sysctl variables must be zero for
908100966Siwasaki * the subsystem call to operate without Giant (as determined by the caller).
909100966Siwasaki */
910100966Siwasaki
911100966SiwasakiSYSCTL_NODE(_kern, OID_AUTO, giant, CTLFLAG_RD, NULL, "Giant mutex manipulation");
912100966Siwasaki
913100966Siwasakistatic int kern_giant_all = 0;
914100966SiwasakiSYSCTL_INT(_kern_giant, OID_AUTO, all, CTLFLAG_RW, &kern_giant_all, 0, "");
915100966Siwasaki
916100966Siwasakiint kern_giant_proc = 1;	/* Giant around PROC locks */
917100966Siwasakiint kern_giant_file = 1;	/* Giant around struct file & filedesc */
918100966Siwasakiint kern_giant_ucred = 1;	/* Giant around ucred */
919100966SiwasakiSYSCTL_INT(_kern_giant, OID_AUTO, proc, CTLFLAG_RW, &kern_giant_proc, 0, "");
920100966SiwasakiSYSCTL_INT(_kern_giant, OID_AUTO, file, CTLFLAG_RW, &kern_giant_file, 0, "");
921100966SiwasakiSYSCTL_INT(_kern_giant, OID_AUTO, ucred, CTLFLAG_RW, &kern_giant_ucred, 0, "");
922100966Siwasaki
923100966Siwasakiint
924100966Siwasakimtx_lock_giant(int sysctlvar)
925100966Siwasaki{
926100966Siwasaki	if (sysctlvar || kern_giant_all) {
927100966Siwasaki		mtx_lock(&Giant);
928100966Siwasaki		return(1);
929100966Siwasaki	}
930100966Siwasaki	return(0);
931100966Siwasaki}
932100966Siwasaki
933100966Siwasakivoid
934100966Siwasakimtx_unlock_giant(int s)
935100966Siwasaki{
936100966Siwasaki	if (s)
937100966Siwasaki		mtx_unlock(&Giant);
938100966Siwasaki}
939100966Siwasaki
940100966Siwasaki