1/*-
2 * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 * 3. Berkeley Software Design Inc's name may not be used to endorse or
13 *    promote products derived from this software without specific prior
14 *    written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
29 *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
30 */
31
32/*
33 * Machine independent bits of mutex implementation.
34 */
35
36#include <sys/cdefs.h>
37__FBSDID("$FreeBSD: releng/11.0/sys/kern/kern_mutex.c 303953 2016-08-11 09:28:49Z mjg $");
38
39#include "opt_adaptive_mutexes.h"
40#include "opt_ddb.h"
41#include "opt_hwpmc_hooks.h"
42#include "opt_sched.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/bus.h>
47#include <sys/conf.h>
48#include <sys/kdb.h>
49#include <sys/kernel.h>
50#include <sys/ktr.h>
51#include <sys/lock.h>
52#include <sys/malloc.h>
53#include <sys/mutex.h>
54#include <sys/proc.h>
55#include <sys/resourcevar.h>
56#include <sys/sched.h>
57#include <sys/sbuf.h>
58#include <sys/smp.h>
59#include <sys/sysctl.h>
60#include <sys/turnstile.h>
61#include <sys/vmmeter.h>
62#include <sys/lock_profile.h>
63
64#include <machine/atomic.h>
65#include <machine/bus.h>
66#include <machine/cpu.h>
67
68#include <ddb/ddb.h>
69
70#include <fs/devfs/devfs_int.h>
71
72#include <vm/vm.h>
73#include <vm/vm_extern.h>
74
75#if defined(SMP) && !defined(NO_ADAPTIVE_MUTEXES)
76#define	ADAPTIVE_MUTEXES
77#endif
78
79#ifdef HWPMC_HOOKS
80#include <sys/pmckern.h>
81PMC_SOFT_DEFINE( , , lock, failed);
82#endif
83
84/*
85 * Return the mutex address when the lock cookie address is provided.
86 * This functionality assumes that struct mtx* have a member named mtx_lock.
87 */
88#define	mtxlock2mtx(c)	(__containerof(c, struct mtx, mtx_lock))
89
90/*
91 * Internal utility macros.
92 */
93#define mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
94
95#define	mtx_destroyed(m) ((m)->mtx_lock == MTX_DESTROYED)
96
97#define	mtx_owner(m)	((struct thread *)((m)->mtx_lock & ~MTX_FLAGMASK))
98
99static void	assert_mtx(const struct lock_object *lock, int what);
100#ifdef DDB
101static void	db_show_mtx(const struct lock_object *lock);
102#endif
103static void	lock_mtx(struct lock_object *lock, uintptr_t how);
104static void	lock_spin(struct lock_object *lock, uintptr_t how);
105#ifdef KDTRACE_HOOKS
106static int	owner_mtx(const struct lock_object *lock,
107		    struct thread **owner);
108#endif
109static uintptr_t unlock_mtx(struct lock_object *lock);
110static uintptr_t unlock_spin(struct lock_object *lock);
111
112/*
113 * Lock classes for sleep and spin mutexes.
114 */
115struct lock_class lock_class_mtx_sleep = {
116	.lc_name = "sleep mutex",
117	.lc_flags = LC_SLEEPLOCK | LC_RECURSABLE,
118	.lc_assert = assert_mtx,
119#ifdef DDB
120	.lc_ddb_show = db_show_mtx,
121#endif
122	.lc_lock = lock_mtx,
123	.lc_unlock = unlock_mtx,
124#ifdef KDTRACE_HOOKS
125	.lc_owner = owner_mtx,
126#endif
127};
128struct lock_class lock_class_mtx_spin = {
129	.lc_name = "spin mutex",
130	.lc_flags = LC_SPINLOCK | LC_RECURSABLE,
131	.lc_assert = assert_mtx,
132#ifdef DDB
133	.lc_ddb_show = db_show_mtx,
134#endif
135	.lc_lock = lock_spin,
136	.lc_unlock = unlock_spin,
137#ifdef KDTRACE_HOOKS
138	.lc_owner = owner_mtx,
139#endif
140};
141
142#ifdef ADAPTIVE_MUTEXES
143static SYSCTL_NODE(_debug, OID_AUTO, mtx, CTLFLAG_RD, NULL, "mtx debugging");
144
145static struct lock_delay_config mtx_delay = {
146	.initial	= 1000,
147	.step		= 500,
148	.min		= 100,
149	.max		= 5000,
150};
151
152SYSCTL_INT(_debug_mtx, OID_AUTO, delay_initial, CTLFLAG_RW, &mtx_delay.initial,
153    0, "");
154SYSCTL_INT(_debug_mtx, OID_AUTO, delay_step, CTLFLAG_RW, &mtx_delay.step,
155    0, "");
156SYSCTL_INT(_debug_mtx, OID_AUTO, delay_min, CTLFLAG_RW, &mtx_delay.min,
157    0, "");
158SYSCTL_INT(_debug_mtx, OID_AUTO, delay_max, CTLFLAG_RW, &mtx_delay.max,
159    0, "");
160
161static void
162mtx_delay_sysinit(void *dummy)
163{
164
165	mtx_delay.initial = mp_ncpus * 25;
166	mtx_delay.step = (mp_ncpus * 25) / 2;
167	mtx_delay.min = mp_ncpus * 5;
168	mtx_delay.max = mp_ncpus * 25 * 10;
169}
170LOCK_DELAY_SYSINIT(mtx_delay_sysinit);
171#endif
172
173/*
174 * System-wide mutexes
175 */
176struct mtx blocked_lock;
177struct mtx Giant;
178
179void
180assert_mtx(const struct lock_object *lock, int what)
181{
182
183	mtx_assert((const struct mtx *)lock, what);
184}
185
186void
187lock_mtx(struct lock_object *lock, uintptr_t how)
188{
189
190	mtx_lock((struct mtx *)lock);
191}
192
193void
194lock_spin(struct lock_object *lock, uintptr_t how)
195{
196
197	panic("spin locks can only use msleep_spin");
198}
199
200uintptr_t
201unlock_mtx(struct lock_object *lock)
202{
203	struct mtx *m;
204
205	m = (struct mtx *)lock;
206	mtx_assert(m, MA_OWNED | MA_NOTRECURSED);
207	mtx_unlock(m);
208	return (0);
209}
210
211uintptr_t
212unlock_spin(struct lock_object *lock)
213{
214
215	panic("spin locks can only use msleep_spin");
216}
217
218#ifdef KDTRACE_HOOKS
219int
220owner_mtx(const struct lock_object *lock, struct thread **owner)
221{
222	const struct mtx *m = (const struct mtx *)lock;
223
224	*owner = mtx_owner(m);
225	return (mtx_unowned(m) == 0);
226}
227#endif
228
229/*
230 * Function versions of the inlined __mtx_* macros.  These are used by
231 * modules and can also be called from assembly language if needed.
232 */
233void
234__mtx_lock_flags(volatile uintptr_t *c, int opts, const char *file, int line)
235{
236	struct mtx *m;
237
238	if (SCHEDULER_STOPPED())
239		return;
240
241	m = mtxlock2mtx(c);
242
243	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
244	    ("mtx_lock() by idle thread %p on sleep mutex %s @ %s:%d",
245	    curthread, m->lock_object.lo_name, file, line));
246	KASSERT(m->mtx_lock != MTX_DESTROYED,
247	    ("mtx_lock() of destroyed mutex @ %s:%d", file, line));
248	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep,
249	    ("mtx_lock() of spin mutex %s @ %s:%d", m->lock_object.lo_name,
250	    file, line));
251	WITNESS_CHECKORDER(&m->lock_object, (opts & ~MTX_RECURSE) |
252	    LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL);
253
254	__mtx_lock(m, curthread, opts, file, line);
255	LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file,
256	    line);
257	WITNESS_LOCK(&m->lock_object, (opts & ~MTX_RECURSE) | LOP_EXCLUSIVE,
258	    file, line);
259	TD_LOCKS_INC(curthread);
260}
261
262void
263__mtx_unlock_flags(volatile uintptr_t *c, int opts, const char *file, int line)
264{
265	struct mtx *m;
266
267	if (SCHEDULER_STOPPED())
268		return;
269
270	m = mtxlock2mtx(c);
271
272	KASSERT(m->mtx_lock != MTX_DESTROYED,
273	    ("mtx_unlock() of destroyed mutex @ %s:%d", file, line));
274	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep,
275	    ("mtx_unlock() of spin mutex %s @ %s:%d", m->lock_object.lo_name,
276	    file, line));
277	WITNESS_UNLOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
278	LOCK_LOG_LOCK("UNLOCK", &m->lock_object, opts, m->mtx_recurse, file,
279	    line);
280	mtx_assert(m, MA_OWNED);
281
282	__mtx_unlock(m, curthread, opts, file, line);
283	TD_LOCKS_DEC(curthread);
284}
285
286void
287__mtx_lock_spin_flags(volatile uintptr_t *c, int opts, const char *file,
288    int line)
289{
290	struct mtx *m;
291
292	if (SCHEDULER_STOPPED())
293		return;
294
295	m = mtxlock2mtx(c);
296
297	KASSERT(m->mtx_lock != MTX_DESTROYED,
298	    ("mtx_lock_spin() of destroyed mutex @ %s:%d", file, line));
299	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
300	    ("mtx_lock_spin() of sleep mutex %s @ %s:%d",
301	    m->lock_object.lo_name, file, line));
302	if (mtx_owned(m))
303		KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0 ||
304		    (opts & MTX_RECURSE) != 0,
305	    ("mtx_lock_spin: recursed on non-recursive mutex %s @ %s:%d\n",
306		    m->lock_object.lo_name, file, line));
307	opts &= ~MTX_RECURSE;
308	WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE,
309	    file, line, NULL);
310	__mtx_lock_spin(m, curthread, opts, file, line);
311	LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file,
312	    line);
313	WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
314}
315
316int
317__mtx_trylock_spin_flags(volatile uintptr_t *c, int opts, const char *file,
318    int line)
319{
320	struct mtx *m;
321
322	if (SCHEDULER_STOPPED())
323		return (1);
324
325	m = mtxlock2mtx(c);
326
327	KASSERT(m->mtx_lock != MTX_DESTROYED,
328	    ("mtx_trylock_spin() of destroyed mutex @ %s:%d", file, line));
329	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
330	    ("mtx_trylock_spin() of sleep mutex %s @ %s:%d",
331	    m->lock_object.lo_name, file, line));
332	KASSERT((opts & MTX_RECURSE) == 0,
333	    ("mtx_trylock_spin: unsupp. opt MTX_RECURSE on mutex %s @ %s:%d\n",
334	    m->lock_object.lo_name, file, line));
335	if (__mtx_trylock_spin(m, curthread, opts, file, line)) {
336		LOCK_LOG_TRY("LOCK", &m->lock_object, opts, 1, file, line);
337		WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
338		return (1);
339	}
340	LOCK_LOG_TRY("LOCK", &m->lock_object, opts, 0, file, line);
341	return (0);
342}
343
344void
345__mtx_unlock_spin_flags(volatile uintptr_t *c, int opts, const char *file,
346    int line)
347{
348	struct mtx *m;
349
350	if (SCHEDULER_STOPPED())
351		return;
352
353	m = mtxlock2mtx(c);
354
355	KASSERT(m->mtx_lock != MTX_DESTROYED,
356	    ("mtx_unlock_spin() of destroyed mutex @ %s:%d", file, line));
357	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
358	    ("mtx_unlock_spin() of sleep mutex %s @ %s:%d",
359	    m->lock_object.lo_name, file, line));
360	WITNESS_UNLOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
361	LOCK_LOG_LOCK("UNLOCK", &m->lock_object, opts, m->mtx_recurse, file,
362	    line);
363	mtx_assert(m, MA_OWNED);
364
365	__mtx_unlock_spin(m);
366}
367
368/*
369 * The important part of mtx_trylock{,_flags}()
370 * Tries to acquire lock `m.'  If this function is called on a mutex that
371 * is already owned, it will recursively acquire the lock.
372 */
373int
374_mtx_trylock_flags_(volatile uintptr_t *c, int opts, const char *file, int line)
375{
376	struct mtx *m;
377#ifdef LOCK_PROFILING
378	uint64_t waittime = 0;
379	int contested = 0;
380#endif
381	int rval;
382
383	if (SCHEDULER_STOPPED())
384		return (1);
385
386	m = mtxlock2mtx(c);
387
388	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
389	    ("mtx_trylock() by idle thread %p on sleep mutex %s @ %s:%d",
390	    curthread, m->lock_object.lo_name, file, line));
391	KASSERT(m->mtx_lock != MTX_DESTROYED,
392	    ("mtx_trylock() of destroyed mutex @ %s:%d", file, line));
393	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep,
394	    ("mtx_trylock() of spin mutex %s @ %s:%d", m->lock_object.lo_name,
395	    file, line));
396
397	if (mtx_owned(m) && ((m->lock_object.lo_flags & LO_RECURSABLE) != 0 ||
398	    (opts & MTX_RECURSE) != 0)) {
399		m->mtx_recurse++;
400		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
401		rval = 1;
402	} else
403		rval = _mtx_obtain_lock(m, (uintptr_t)curthread);
404	opts &= ~MTX_RECURSE;
405
406	LOCK_LOG_TRY("LOCK", &m->lock_object, opts, rval, file, line);
407	if (rval) {
408		WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK,
409		    file, line);
410		TD_LOCKS_INC(curthread);
411		if (m->mtx_recurse == 0)
412			LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire,
413			    m, contested, waittime, file, line);
414
415	}
416
417	return (rval);
418}
419
420/*
421 * __mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
422 *
423 * We call this if the lock is either contested (i.e. we need to go to
424 * sleep waiting for it), or if we need to recurse on it.
425 */
426void
427__mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts,
428    const char *file, int line)
429{
430	struct mtx *m;
431	struct turnstile *ts;
432	uintptr_t v;
433#ifdef ADAPTIVE_MUTEXES
434	volatile struct thread *owner;
435#endif
436#ifdef KTR
437	int cont_logged = 0;
438#endif
439#ifdef LOCK_PROFILING
440	int contested = 0;
441	uint64_t waittime = 0;
442#endif
443#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS)
444	struct lock_delay_arg lda;
445#endif
446#ifdef KDTRACE_HOOKS
447	u_int sleep_cnt = 0;
448	int64_t sleep_time = 0;
449	int64_t all_time = 0;
450#endif
451
452	if (SCHEDULER_STOPPED())
453		return;
454
455#if defined(ADAPTIVE_MUTEXES)
456	lock_delay_arg_init(&lda, &mtx_delay);
457#elif defined(KDTRACE_HOOKS)
458	lock_delay_arg_init(&lda, NULL);
459#endif
460	m = mtxlock2mtx(c);
461
462	if (mtx_owned(m)) {
463		KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0 ||
464		    (opts & MTX_RECURSE) != 0,
465	    ("_mtx_lock_sleep: recursed on non-recursive mutex %s @ %s:%d\n",
466		    m->lock_object.lo_name, file, line));
467		opts &= ~MTX_RECURSE;
468		m->mtx_recurse++;
469		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
470		if (LOCK_LOG_TEST(&m->lock_object, opts))
471			CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
472		return;
473	}
474	opts &= ~MTX_RECURSE;
475
476#ifdef HWPMC_HOOKS
477	PMC_SOFT_CALL( , , lock, failed);
478#endif
479	lock_profile_obtain_lock_failed(&m->lock_object,
480		    &contested, &waittime);
481	if (LOCK_LOG_TEST(&m->lock_object, opts))
482		CTR4(KTR_LOCK,
483		    "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d",
484		    m->lock_object.lo_name, (void *)m->mtx_lock, file, line);
485#ifdef KDTRACE_HOOKS
486	all_time -= lockstat_nsecs(&m->lock_object);
487#endif
488
489	for (;;) {
490		if (m->mtx_lock == MTX_UNOWNED && _mtx_obtain_lock(m, tid))
491			break;
492#ifdef KDTRACE_HOOKS
493		lda.spin_cnt++;
494#endif
495#ifdef ADAPTIVE_MUTEXES
496		/*
497		 * If the owner is running on another CPU, spin until the
498		 * owner stops running or the state of the lock changes.
499		 */
500		v = m->mtx_lock;
501		if (v != MTX_UNOWNED) {
502			owner = (struct thread *)(v & ~MTX_FLAGMASK);
503			if (TD_IS_RUNNING(owner)) {
504				if (LOCK_LOG_TEST(&m->lock_object, 0))
505					CTR3(KTR_LOCK,
506					    "%s: spinning on %p held by %p",
507					    __func__, m, owner);
508				KTR_STATE1(KTR_SCHED, "thread",
509				    sched_tdname((struct thread *)tid),
510				    "spinning", "lockname:\"%s\"",
511				    m->lock_object.lo_name);
512				while (mtx_owner(m) == owner &&
513				    TD_IS_RUNNING(owner))
514					lock_delay(&lda);
515				KTR_STATE0(KTR_SCHED, "thread",
516				    sched_tdname((struct thread *)tid),
517				    "running");
518				continue;
519			}
520		}
521#endif
522
523		ts = turnstile_trywait(&m->lock_object);
524		v = m->mtx_lock;
525
526		/*
527		 * Check if the lock has been released while spinning for
528		 * the turnstile chain lock.
529		 */
530		if (v == MTX_UNOWNED) {
531			turnstile_cancel(ts);
532			continue;
533		}
534
535#ifdef ADAPTIVE_MUTEXES
536		/*
537		 * The current lock owner might have started executing
538		 * on another CPU (or the lock could have changed
539		 * owners) while we were waiting on the turnstile
540		 * chain lock.  If so, drop the turnstile lock and try
541		 * again.
542		 */
543		owner = (struct thread *)(v & ~MTX_FLAGMASK);
544		if (TD_IS_RUNNING(owner)) {
545			turnstile_cancel(ts);
546			continue;
547		}
548#endif
549
550		/*
551		 * If the mutex isn't already contested and a failure occurs
552		 * setting the contested bit, the mutex was either released
553		 * or the state of the MTX_RECURSED bit changed.
554		 */
555		if ((v & MTX_CONTESTED) == 0 &&
556		    !atomic_cmpset_ptr(&m->mtx_lock, v, v | MTX_CONTESTED)) {
557			turnstile_cancel(ts);
558			continue;
559		}
560
561		/*
562		 * We definitely must sleep for this lock.
563		 */
564		mtx_assert(m, MA_NOTOWNED);
565
566#ifdef KTR
567		if (!cont_logged) {
568			CTR6(KTR_CONTENTION,
569			    "contention: %p at %s:%d wants %s, taken by %s:%d",
570			    (void *)tid, file, line, m->lock_object.lo_name,
571			    WITNESS_FILE(&m->lock_object),
572			    WITNESS_LINE(&m->lock_object));
573			cont_logged = 1;
574		}
575#endif
576
577		/*
578		 * Block on the turnstile.
579		 */
580#ifdef KDTRACE_HOOKS
581		sleep_time -= lockstat_nsecs(&m->lock_object);
582#endif
583		turnstile_wait(ts, mtx_owner(m), TS_EXCLUSIVE_QUEUE);
584#ifdef KDTRACE_HOOKS
585		sleep_time += lockstat_nsecs(&m->lock_object);
586		sleep_cnt++;
587#endif
588	}
589#ifdef KDTRACE_HOOKS
590	all_time += lockstat_nsecs(&m->lock_object);
591#endif
592#ifdef KTR
593	if (cont_logged) {
594		CTR4(KTR_CONTENTION,
595		    "contention end: %s acquired by %p at %s:%d",
596		    m->lock_object.lo_name, (void *)tid, file, line);
597	}
598#endif
599	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire, m, contested,
600	    waittime, file, line);
601#ifdef KDTRACE_HOOKS
602	if (sleep_time)
603		LOCKSTAT_RECORD1(adaptive__block, m, sleep_time);
604
605	/*
606	 * Only record the loops spinning and not sleeping.
607	 */
608	if (lda.spin_cnt > sleep_cnt)
609		LOCKSTAT_RECORD1(adaptive__spin, m, all_time - sleep_time);
610#endif
611}
612
613static void
614_mtx_lock_spin_failed(struct mtx *m)
615{
616	struct thread *td;
617
618	td = mtx_owner(m);
619
620	/* If the mutex is unlocked, try again. */
621	if (td == NULL)
622		return;
623
624	printf( "spin lock %p (%s) held by %p (tid %d) too long\n",
625	    m, m->lock_object.lo_name, td, td->td_tid);
626#ifdef WITNESS
627	witness_display_spinlock(&m->lock_object, td, printf);
628#endif
629	panic("spin lock held too long");
630}
631
632#ifdef SMP
633/*
634 * _mtx_lock_spin_cookie: the tougher part of acquiring an MTX_SPIN lock.
635 *
636 * This is only called if we need to actually spin for the lock. Recursion
637 * is handled inline.
638 */
639void
640_mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t tid, int opts,
641    const char *file, int line)
642{
643	struct mtx *m;
644	int i = 0;
645#ifdef LOCK_PROFILING
646	int contested = 0;
647	uint64_t waittime = 0;
648#endif
649#ifdef KDTRACE_HOOKS
650	int64_t spin_time = 0;
651#endif
652
653	if (SCHEDULER_STOPPED())
654		return;
655
656	m = mtxlock2mtx(c);
657
658	if (LOCK_LOG_TEST(&m->lock_object, opts))
659		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
660	KTR_STATE1(KTR_SCHED, "thread", sched_tdname((struct thread *)tid),
661	    "spinning", "lockname:\"%s\"", m->lock_object.lo_name);
662
663#ifdef HWPMC_HOOKS
664	PMC_SOFT_CALL( , , lock, failed);
665#endif
666	lock_profile_obtain_lock_failed(&m->lock_object, &contested, &waittime);
667#ifdef KDTRACE_HOOKS
668	spin_time -= lockstat_nsecs(&m->lock_object);
669#endif
670	for (;;) {
671		if (m->mtx_lock == MTX_UNOWNED && _mtx_obtain_lock(m, tid))
672			break;
673		/* Give interrupts a chance while we spin. */
674		spinlock_exit();
675		while (m->mtx_lock != MTX_UNOWNED) {
676			if (i++ < 10000000) {
677				cpu_spinwait();
678				continue;
679			}
680			if (i < 60000000 || kdb_active || panicstr != NULL)
681				DELAY(1);
682			else
683				_mtx_lock_spin_failed(m);
684			cpu_spinwait();
685		}
686		spinlock_enter();
687	}
688#ifdef KDTRACE_HOOKS
689	spin_time += lockstat_nsecs(&m->lock_object);
690#endif
691
692	if (LOCK_LOG_TEST(&m->lock_object, opts))
693		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
694	KTR_STATE0(KTR_SCHED, "thread", sched_tdname((struct thread *)tid),
695	    "running");
696
697#ifdef KDTRACE_HOOKS
698	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(spin__acquire, m,
699	    contested, waittime, file, line);
700	if (spin_time != 0)
701		LOCKSTAT_RECORD1(spin__spin, m, spin_time);
702#endif
703}
704#endif /* SMP */
705
706void
707thread_lock_flags_(struct thread *td, int opts, const char *file, int line)
708{
709	struct mtx *m;
710	uintptr_t tid;
711	int i;
712#ifdef LOCK_PROFILING
713	int contested = 0;
714	uint64_t waittime = 0;
715#endif
716#ifdef KDTRACE_HOOKS
717	int64_t spin_time = 0;
718#endif
719
720	i = 0;
721	tid = (uintptr_t)curthread;
722
723	if (SCHEDULER_STOPPED()) {
724		/*
725		 * Ensure that spinlock sections are balanced even when the
726		 * scheduler is stopped, since we may otherwise inadvertently
727		 * re-enable interrupts while dumping core.
728		 */
729		spinlock_enter();
730		return;
731	}
732
733#ifdef KDTRACE_HOOKS
734	spin_time -= lockstat_nsecs(&td->td_lock->lock_object);
735#endif
736	for (;;) {
737retry:
738		spinlock_enter();
739		m = td->td_lock;
740		KASSERT(m->mtx_lock != MTX_DESTROYED,
741		    ("thread_lock() of destroyed mutex @ %s:%d", file, line));
742		KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
743		    ("thread_lock() of sleep mutex %s @ %s:%d",
744		    m->lock_object.lo_name, file, line));
745		if (mtx_owned(m))
746			KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0,
747	    ("thread_lock: recursed on non-recursive mutex %s @ %s:%d\n",
748			    m->lock_object.lo_name, file, line));
749		WITNESS_CHECKORDER(&m->lock_object,
750		    opts | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL);
751		for (;;) {
752			if (m->mtx_lock == MTX_UNOWNED && _mtx_obtain_lock(m, tid))
753				break;
754			if (m->mtx_lock == tid) {
755				m->mtx_recurse++;
756				break;
757			}
758#ifdef HWPMC_HOOKS
759			PMC_SOFT_CALL( , , lock, failed);
760#endif
761			lock_profile_obtain_lock_failed(&m->lock_object,
762			    &contested, &waittime);
763			/* Give interrupts a chance while we spin. */
764			spinlock_exit();
765			while (m->mtx_lock != MTX_UNOWNED) {
766				if (i++ < 10000000)
767					cpu_spinwait();
768				else if (i < 60000000 ||
769				    kdb_active || panicstr != NULL)
770					DELAY(1);
771				else
772					_mtx_lock_spin_failed(m);
773				cpu_spinwait();
774				if (m != td->td_lock)
775					goto retry;
776			}
777			spinlock_enter();
778		}
779		if (m == td->td_lock)
780			break;
781		__mtx_unlock_spin(m);	/* does spinlock_exit() */
782	}
783#ifdef KDTRACE_HOOKS
784	spin_time += lockstat_nsecs(&m->lock_object);
785#endif
786	if (m->mtx_recurse == 0)
787		LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(spin__acquire, m,
788		    contested, waittime, file, line);
789	LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file,
790	    line);
791	WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
792#ifdef KDTRACE_HOOKS
793	if (spin_time != 0)
794		LOCKSTAT_RECORD1(thread__spin, m, spin_time);
795#endif
796}
797
798struct mtx *
799thread_lock_block(struct thread *td)
800{
801	struct mtx *lock;
802
803	THREAD_LOCK_ASSERT(td, MA_OWNED);
804	lock = td->td_lock;
805	td->td_lock = &blocked_lock;
806	mtx_unlock_spin(lock);
807
808	return (lock);
809}
810
811void
812thread_lock_unblock(struct thread *td, struct mtx *new)
813{
814	mtx_assert(new, MA_OWNED);
815	MPASS(td->td_lock == &blocked_lock);
816	atomic_store_rel_ptr((volatile void *)&td->td_lock, (uintptr_t)new);
817}
818
819void
820thread_lock_set(struct thread *td, struct mtx *new)
821{
822	struct mtx *lock;
823
824	mtx_assert(new, MA_OWNED);
825	THREAD_LOCK_ASSERT(td, MA_OWNED);
826	lock = td->td_lock;
827	td->td_lock = new;
828	mtx_unlock_spin(lock);
829}
830
831/*
832 * __mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
833 *
834 * We are only called here if the lock is recursed or contested (i.e. we
835 * need to wake up a blocked thread).
836 */
837void
838__mtx_unlock_sleep(volatile uintptr_t *c, int opts, const char *file, int line)
839{
840	struct mtx *m;
841	struct turnstile *ts;
842
843	if (SCHEDULER_STOPPED())
844		return;
845
846	m = mtxlock2mtx(c);
847
848	if (mtx_recursed(m)) {
849		if (--(m->mtx_recurse) == 0)
850			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
851		if (LOCK_LOG_TEST(&m->lock_object, opts))
852			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m);
853		return;
854	}
855
856	/*
857	 * We have to lock the chain before the turnstile so this turnstile
858	 * can be removed from the hash list if it is empty.
859	 */
860	turnstile_chain_lock(&m->lock_object);
861	ts = turnstile_lookup(&m->lock_object);
862	if (LOCK_LOG_TEST(&m->lock_object, opts))
863		CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
864	MPASS(ts != NULL);
865	turnstile_broadcast(ts, TS_EXCLUSIVE_QUEUE);
866	_mtx_release_lock_quick(m);
867
868	/*
869	 * This turnstile is now no longer associated with the mutex.  We can
870	 * unlock the chain lock so a new turnstile may take it's place.
871	 */
872	turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
873	turnstile_chain_unlock(&m->lock_object);
874}
875
876/*
877 * All the unlocking of MTX_SPIN locks is done inline.
878 * See the __mtx_unlock_spin() macro for the details.
879 */
880
881/*
882 * The backing function for the INVARIANTS-enabled mtx_assert()
883 */
884#ifdef INVARIANT_SUPPORT
885void
886__mtx_assert(const volatile uintptr_t *c, int what, const char *file, int line)
887{
888	const struct mtx *m;
889
890	if (panicstr != NULL || dumping)
891		return;
892
893	m = mtxlock2mtx(c);
894
895	switch (what) {
896	case MA_OWNED:
897	case MA_OWNED | MA_RECURSED:
898	case MA_OWNED | MA_NOTRECURSED:
899		if (!mtx_owned(m))
900			panic("mutex %s not owned at %s:%d",
901			    m->lock_object.lo_name, file, line);
902		if (mtx_recursed(m)) {
903			if ((what & MA_NOTRECURSED) != 0)
904				panic("mutex %s recursed at %s:%d",
905				    m->lock_object.lo_name, file, line);
906		} else if ((what & MA_RECURSED) != 0) {
907			panic("mutex %s unrecursed at %s:%d",
908			    m->lock_object.lo_name, file, line);
909		}
910		break;
911	case MA_NOTOWNED:
912		if (mtx_owned(m))
913			panic("mutex %s owned at %s:%d",
914			    m->lock_object.lo_name, file, line);
915		break;
916	default:
917		panic("unknown mtx_assert at %s:%d", file, line);
918	}
919}
920#endif
921
922/*
923 * General init routine used by the MTX_SYSINIT() macro.
924 */
925void
926mtx_sysinit(void *arg)
927{
928	struct mtx_args *margs = arg;
929
930	mtx_init((struct mtx *)margs->ma_mtx, margs->ma_desc, NULL,
931	    margs->ma_opts);
932}
933
934/*
935 * Mutex initialization routine; initialize lock `m' of type contained in
936 * `opts' with options contained in `opts' and name `name.'  The optional
937 * lock type `type' is used as a general lock category name for use with
938 * witness.
939 */
940void
941_mtx_init(volatile uintptr_t *c, const char *name, const char *type, int opts)
942{
943	struct mtx *m;
944	struct lock_class *class;
945	int flags;
946
947	m = mtxlock2mtx(c);
948
949	MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE |
950	    MTX_NOWITNESS | MTX_DUPOK | MTX_NOPROFILE | MTX_NEW)) == 0);
951	ASSERT_ATOMIC_LOAD_PTR(m->mtx_lock,
952	    ("%s: mtx_lock not aligned for %s: %p", __func__, name,
953	    &m->mtx_lock));
954
955	/* Determine lock class and lock flags. */
956	if (opts & MTX_SPIN)
957		class = &lock_class_mtx_spin;
958	else
959		class = &lock_class_mtx_sleep;
960	flags = 0;
961	if (opts & MTX_QUIET)
962		flags |= LO_QUIET;
963	if (opts & MTX_RECURSE)
964		flags |= LO_RECURSABLE;
965	if ((opts & MTX_NOWITNESS) == 0)
966		flags |= LO_WITNESS;
967	if (opts & MTX_DUPOK)
968		flags |= LO_DUPOK;
969	if (opts & MTX_NOPROFILE)
970		flags |= LO_NOPROFILE;
971	if (opts & MTX_NEW)
972		flags |= LO_NEW;
973
974	/* Initialize mutex. */
975	lock_init(&m->lock_object, class, name, type, flags);
976
977	m->mtx_lock = MTX_UNOWNED;
978	m->mtx_recurse = 0;
979}
980
981/*
982 * Remove lock `m' from all_mtx queue.  We don't allow MTX_QUIET to be
983 * passed in as a flag here because if the corresponding mtx_init() was
984 * called with MTX_QUIET set, then it will already be set in the mutex's
985 * flags.
986 */
987void
988_mtx_destroy(volatile uintptr_t *c)
989{
990	struct mtx *m;
991
992	m = mtxlock2mtx(c);
993
994	if (!mtx_owned(m))
995		MPASS(mtx_unowned(m));
996	else {
997		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
998
999		/* Perform the non-mtx related part of mtx_unlock_spin(). */
1000		if (LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin)
1001			spinlock_exit();
1002		else
1003			TD_LOCKS_DEC(curthread);
1004
1005		lock_profile_release_lock(&m->lock_object);
1006		/* Tell witness this isn't locked to make it happy. */
1007		WITNESS_UNLOCK(&m->lock_object, LOP_EXCLUSIVE, __FILE__,
1008		    __LINE__);
1009	}
1010
1011	m->mtx_lock = MTX_DESTROYED;
1012	lock_destroy(&m->lock_object);
1013}
1014
1015/*
1016 * Intialize the mutex code and system mutexes.  This is called from the MD
1017 * startup code prior to mi_startup().  The per-CPU data space needs to be
1018 * setup before this is called.
1019 */
1020void
1021mutex_init(void)
1022{
1023
1024	/* Setup turnstiles so that sleep mutexes work. */
1025	init_turnstiles();
1026
1027	/*
1028	 * Initialize mutexes.
1029	 */
1030	mtx_init(&Giant, "Giant", NULL, MTX_DEF | MTX_RECURSE);
1031	mtx_init(&blocked_lock, "blocked lock", NULL, MTX_SPIN);
1032	blocked_lock.mtx_lock = 0xdeadc0de;	/* Always blocked. */
1033	mtx_init(&proc0.p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK);
1034	mtx_init(&proc0.p_slock, "process slock", NULL, MTX_SPIN);
1035	mtx_init(&proc0.p_statmtx, "pstatl", NULL, MTX_SPIN);
1036	mtx_init(&proc0.p_itimmtx, "pitiml", NULL, MTX_SPIN);
1037	mtx_init(&proc0.p_profmtx, "pprofl", NULL, MTX_SPIN);
1038	mtx_init(&devmtx, "cdev", NULL, MTX_DEF);
1039	mtx_lock(&Giant);
1040}
1041
1042#ifdef DDB
1043void
1044db_show_mtx(const struct lock_object *lock)
1045{
1046	struct thread *td;
1047	const struct mtx *m;
1048
1049	m = (const struct mtx *)lock;
1050
1051	db_printf(" flags: {");
1052	if (LOCK_CLASS(lock) == &lock_class_mtx_spin)
1053		db_printf("SPIN");
1054	else
1055		db_printf("DEF");
1056	if (m->lock_object.lo_flags & LO_RECURSABLE)
1057		db_printf(", RECURSE");
1058	if (m->lock_object.lo_flags & LO_DUPOK)
1059		db_printf(", DUPOK");
1060	db_printf("}\n");
1061	db_printf(" state: {");
1062	if (mtx_unowned(m))
1063		db_printf("UNOWNED");
1064	else if (mtx_destroyed(m))
1065		db_printf("DESTROYED");
1066	else {
1067		db_printf("OWNED");
1068		if (m->mtx_lock & MTX_CONTESTED)
1069			db_printf(", CONTESTED");
1070		if (m->mtx_lock & MTX_RECURSED)
1071			db_printf(", RECURSED");
1072	}
1073	db_printf("}\n");
1074	if (!mtx_unowned(m) && !mtx_destroyed(m)) {
1075		td = mtx_owner(m);
1076		db_printf(" owner: %p (tid %d, pid %d, \"%s\")\n", td,
1077		    td->td_tid, td->td_proc->p_pid, td->td_name);
1078		if (mtx_recursed(m))
1079			db_printf(" recursed: %d\n", m->mtx_recurse);
1080	}
1081}
1082#endif
1083