Deleted Added
full compact
1/*-
2 * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * 3. Berkeley Software Design Inc's name may not be used to endorse or
13 * promote products derived from this software without specific prior
14 * written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 * from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
29 * and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
30 * $FreeBSD: head/sys/kern/subr_turnstile.c 115568 2003-05-31 21:11:01Z phk $
30 */
31
32/*
33 * Machine independent bits of mutex implementation.
34 */
35
36#include <sys/cdefs.h>
37__FBSDID("$FreeBSD: head/sys/kern/subr_turnstile.c 116182 2003-06-11 00:56:59Z obrien $");
38
39#include "opt_adaptive_mutexes.h"
40#include "opt_ddb.h"
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/bus.h>
45#include <sys/kernel.h>
46#include <sys/ktr.h>
47#include <sys/lock.h>
48#include <sys/malloc.h>
49#include <sys/mutex.h>
50#include <sys/proc.h>
51#include <sys/resourcevar.h>
52#include <sys/sched.h>
53#include <sys/sbuf.h>
54#include <sys/sysctl.h>
55#include <sys/vmmeter.h>
56
57#include <machine/atomic.h>
58#include <machine/bus.h>
59#include <machine/clock.h>
60#include <machine/cpu.h>
61
62#include <ddb/ddb.h>
63
64#include <vm/vm.h>
65#include <vm/vm_extern.h>
66
67/*
68 * Internal utility macros.
69 */
70#define mtx_unowned(m) ((m)->mtx_lock == MTX_UNOWNED)
71
72#define mtx_owner(m) (mtx_unowned((m)) ? NULL \
73 : (struct thread *)((m)->mtx_lock & MTX_FLAGMASK))
74
75/*
76 * Lock classes for sleep and spin mutexes.
77 */
78struct lock_class lock_class_mtx_sleep = {
79 "sleep mutex",
80 LC_SLEEPLOCK | LC_RECURSABLE
81};
82struct lock_class lock_class_mtx_spin = {
83 "spin mutex",
84 LC_SPINLOCK | LC_RECURSABLE
85};
86
87/*
88 * System-wide mutexes
89 */
90struct mtx sched_lock;
91struct mtx Giant;
92
93/*
94 * Prototypes for non-exported routines.
95 */
96static void propagate_priority(struct thread *);
97
98static void
99propagate_priority(struct thread *td)
100{
101 int pri = td->td_priority;
102 struct mtx *m = td->td_blocked;
103
104 mtx_assert(&sched_lock, MA_OWNED);
105 for (;;) {
106 struct thread *td1;
107
108 td = mtx_owner(m);
109
110 if (td == NULL) {
111 /*
112 * This really isn't quite right. Really
113 * ought to bump priority of thread that
114 * next acquires the mutex.
115 */
116 MPASS(m->mtx_lock == MTX_CONTESTED);
117 return;
118 }
119
120 MPASS(td->td_proc != NULL);
121 MPASS(td->td_proc->p_magic == P_MAGIC);
122 KASSERT(!TD_IS_SLEEPING(td), ("sleeping thread owns a mutex"));
123 if (td->td_priority <= pri) /* lower is higher priority */
124 return;
125
126
127 /*
128 * If lock holder is actually running, just bump priority.
129 */
130 if (TD_IS_RUNNING(td)) {
131 td->td_priority = pri;
132 return;
133 }
134
135#ifndef SMP
136 /*
137 * For UP, we check to see if td is curthread (this shouldn't
138 * ever happen however as it would mean we are in a deadlock.)
139 */
140 KASSERT(td != curthread, ("Deadlock detected"));
141#endif
142
143 /*
144 * If on run queue move to new run queue, and quit.
145 * XXXKSE this gets a lot more complicated under threads
146 * but try anyhow.
147 */
148 if (TD_ON_RUNQ(td)) {
149 MPASS(td->td_blocked == NULL);
150 sched_prio(td, pri);
151 return;
152 }
153 /*
154 * Adjust for any other cases.
155 */
156 td->td_priority = pri;
157
158 /*
159 * If we aren't blocked on a mutex, we should be.
160 */
161 KASSERT(TD_ON_LOCK(td), (
162 "process %d(%s):%d holds %s but isn't blocked on a mutex\n",
163 td->td_proc->p_pid, td->td_proc->p_comm, td->td_state,
164 m->mtx_object.lo_name));
165
166 /*
167 * Pick up the mutex that td is blocked on.
168 */
169 m = td->td_blocked;
170 MPASS(m != NULL);
171
172 /*
173 * Check if the thread needs to be moved up on
174 * the blocked chain
175 */
176 if (td == TAILQ_FIRST(&m->mtx_blocked)) {
177 continue;
178 }
179
180 td1 = TAILQ_PREV(td, threadqueue, td_lockq);
181 if (td1->td_priority <= pri) {
182 continue;
183 }
184
185 /*
186 * Remove thread from blocked chain and determine where
187 * it should be moved up to. Since we know that td1 has
188 * a lower priority than td, we know that at least one
189 * thread in the chain has a lower priority and that
190 * td1 will thus not be NULL after the loop.
191 */
192 TAILQ_REMOVE(&m->mtx_blocked, td, td_lockq);
193 TAILQ_FOREACH(td1, &m->mtx_blocked, td_lockq) {
194 MPASS(td1->td_proc->p_magic == P_MAGIC);
195 if (td1->td_priority > pri)
196 break;
197 }
198
199 MPASS(td1 != NULL);
200 TAILQ_INSERT_BEFORE(td1, td, td_lockq);
201 CTR4(KTR_LOCK,
202 "propagate_priority: p %p moved before %p on [%p] %s",
203 td, td1, m, m->mtx_object.lo_name);
204 }
205}
206
207#ifdef MUTEX_PROFILING
208SYSCTL_NODE(_debug, OID_AUTO, mutex, CTLFLAG_RD, NULL, "mutex debugging");
209SYSCTL_NODE(_debug_mutex, OID_AUTO, prof, CTLFLAG_RD, NULL, "mutex profiling");
210static int mutex_prof_enable = 0;
211SYSCTL_INT(_debug_mutex_prof, OID_AUTO, enable, CTLFLAG_RW,
212 &mutex_prof_enable, 0, "Enable tracing of mutex holdtime");
213
214struct mutex_prof {
215 const char *name;
216 const char *file;
217 int line;
218 uintmax_t cnt_max;
219 uintmax_t cnt_tot;
220 uintmax_t cnt_cur;
221 struct mutex_prof *next;
222};
223
224/*
225 * mprof_buf is a static pool of profiling records to avoid possible
226 * reentrance of the memory allocation functions.
227 *
228 * Note: NUM_MPROF_BUFFERS must be smaller than MPROF_HASH_SIZE.
229 */
230#define NUM_MPROF_BUFFERS 1000
231static struct mutex_prof mprof_buf[NUM_MPROF_BUFFERS];
232static int first_free_mprof_buf;
233#define MPROF_HASH_SIZE 1009
234static struct mutex_prof *mprof_hash[MPROF_HASH_SIZE];
235/* SWAG: sbuf size = avg stat. line size * number of locks */
236#define MPROF_SBUF_SIZE 256 * 400
237
238static int mutex_prof_acquisitions;
239SYSCTL_INT(_debug_mutex_prof, OID_AUTO, acquisitions, CTLFLAG_RD,
240 &mutex_prof_acquisitions, 0, "Number of mutex acquistions recorded");
241static int mutex_prof_records;
242SYSCTL_INT(_debug_mutex_prof, OID_AUTO, records, CTLFLAG_RD,
243 &mutex_prof_records, 0, "Number of profiling records");
244static int mutex_prof_maxrecords = NUM_MPROF_BUFFERS;
245SYSCTL_INT(_debug_mutex_prof, OID_AUTO, maxrecords, CTLFLAG_RD,
246 &mutex_prof_maxrecords, 0, "Maximum number of profiling records");
247static int mutex_prof_rejected;
248SYSCTL_INT(_debug_mutex_prof, OID_AUTO, rejected, CTLFLAG_RD,
249 &mutex_prof_rejected, 0, "Number of rejected profiling records");
250static int mutex_prof_hashsize = MPROF_HASH_SIZE;
251SYSCTL_INT(_debug_mutex_prof, OID_AUTO, hashsize, CTLFLAG_RD,
252 &mutex_prof_hashsize, 0, "Hash size");
253static int mutex_prof_collisions = 0;
254SYSCTL_INT(_debug_mutex_prof, OID_AUTO, collisions, CTLFLAG_RD,
255 &mutex_prof_collisions, 0, "Number of hash collisions");
256
257/*
258 * mprof_mtx protects the profiling buffers and the hash.
259 */
260static struct mtx mprof_mtx;
261MTX_SYSINIT(mprof, &mprof_mtx, "mutex profiling lock", MTX_SPIN | MTX_QUIET);
262
263static u_int64_t
264nanoseconds(void)
265{
266 struct timespec tv;
267
268 nanotime(&tv);
269 return (tv.tv_sec * (u_int64_t)1000000000 + tv.tv_nsec);
270}
271
272static int
273dump_mutex_prof_stats(SYSCTL_HANDLER_ARGS)
274{
275 struct sbuf *sb;
276 int error, i;
277 static int multiplier = 1;
278
279 if (first_free_mprof_buf == 0)
280 return (SYSCTL_OUT(req, "No locking recorded",
281 sizeof("No locking recorded")));
282
283retry_sbufops:
284 sb = sbuf_new(NULL, NULL, MPROF_SBUF_SIZE * multiplier, SBUF_FIXEDLEN);
285 sbuf_printf(sb, "%6s %12s %11s %5s %s\n",
286 "max", "total", "count", "avg", "name");
287 /*
288 * XXX this spinlock seems to be by far the largest perpetrator
289 * of spinlock latency (1.6 msec on an Athlon1600 was recorded
290 * even before I pessimized it further by moving the average
291 * computation here).
292 */
293 mtx_lock_spin(&mprof_mtx);
294 for (i = 0; i < first_free_mprof_buf; ++i) {
295 sbuf_printf(sb, "%6ju %12ju %11ju %5ju %s:%d (%s)\n",
296 mprof_buf[i].cnt_max / 1000,
297 mprof_buf[i].cnt_tot / 1000,
298 mprof_buf[i].cnt_cur,
299 mprof_buf[i].cnt_cur == 0 ? (uintmax_t)0 :
300 mprof_buf[i].cnt_tot / (mprof_buf[i].cnt_cur * 1000),
301 mprof_buf[i].file, mprof_buf[i].line, mprof_buf[i].name);
302 if (sbuf_overflowed(sb)) {
303 mtx_unlock_spin(&mprof_mtx);
304 sbuf_delete(sb);
305 multiplier++;
306 goto retry_sbufops;
307 }
308 }
309 mtx_unlock_spin(&mprof_mtx);
310 sbuf_finish(sb);
311 error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
312 sbuf_delete(sb);
313 return (error);
314}
315SYSCTL_PROC(_debug_mutex_prof, OID_AUTO, stats, CTLTYPE_STRING | CTLFLAG_RD,
316 NULL, 0, dump_mutex_prof_stats, "A", "Mutex profiling statistics");
317#endif
318
319/*
320 * Function versions of the inlined __mtx_* macros. These are used by
321 * modules and can also be called from assembly language if needed.
322 */
323void
324_mtx_lock_flags(struct mtx *m, int opts, const char *file, int line)
325{
326
327 MPASS(curthread != NULL);
328 KASSERT(m->mtx_object.lo_class == &lock_class_mtx_sleep,
329 ("mtx_lock() of spin mutex %s @ %s:%d", m->mtx_object.lo_name,
330 file, line));
331 _get_sleep_lock(m, curthread, opts, file, line);
332 LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file,
333 line);
334 WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
335#ifdef MUTEX_PROFILING
336 /* don't reset the timer when/if recursing */
337 if (m->mtx_acqtime == 0) {
338 m->mtx_filename = file;
339 m->mtx_lineno = line;
340 m->mtx_acqtime = mutex_prof_enable ? nanoseconds() : 0;
341 ++mutex_prof_acquisitions;
342 }
343#endif
344}
345
346void
347_mtx_unlock_flags(struct mtx *m, int opts, const char *file, int line)
348{
349
350 MPASS(curthread != NULL);
351 KASSERT(m->mtx_object.lo_class == &lock_class_mtx_sleep,
352 ("mtx_unlock() of spin mutex %s @ %s:%d", m->mtx_object.lo_name,
353 file, line));
354 WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
355 LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file,
356 line);
357 mtx_assert(m, MA_OWNED);
358#ifdef MUTEX_PROFILING
359 if (m->mtx_acqtime != 0) {
360 static const char *unknown = "(unknown)";
361 struct mutex_prof *mpp;
362 u_int64_t acqtime, now;
363 const char *p, *q;
364 volatile u_int hash;
365
366 now = nanoseconds();
367 acqtime = m->mtx_acqtime;
368 m->mtx_acqtime = 0;
369 if (now <= acqtime)
370 goto out;
371 for (p = m->mtx_filename;
372 p != NULL && strncmp(p, "../", 3) == 0; p += 3)
373 /* nothing */ ;
374 if (p == NULL || *p == '\0')
375 p = unknown;
376 for (hash = m->mtx_lineno, q = p; *q != '\0'; ++q)
377 hash = (hash * 2 + *q) % MPROF_HASH_SIZE;
378 mtx_lock_spin(&mprof_mtx);
379 for (mpp = mprof_hash[hash]; mpp != NULL; mpp = mpp->next)
380 if (mpp->line == m->mtx_lineno &&
381 strcmp(mpp->file, p) == 0)
382 break;
383 if (mpp == NULL) {
384 /* Just exit if we cannot get a trace buffer */
385 if (first_free_mprof_buf >= NUM_MPROF_BUFFERS) {
386 ++mutex_prof_rejected;
387 goto unlock;
388 }
389 mpp = &mprof_buf[first_free_mprof_buf++];
390 mpp->name = mtx_name(m);
391 mpp->file = p;
392 mpp->line = m->mtx_lineno;
393 mpp->next = mprof_hash[hash];
394 if (mprof_hash[hash] != NULL)
395 ++mutex_prof_collisions;
396 mprof_hash[hash] = mpp;
397 ++mutex_prof_records;
398 }
399 /*
400 * Record if the mutex has been held longer now than ever
401 * before.
402 */
403 if (now - acqtime > mpp->cnt_max)
404 mpp->cnt_max = now - acqtime;
405 mpp->cnt_tot += now - acqtime;
406 mpp->cnt_cur++;
407unlock:
408 mtx_unlock_spin(&mprof_mtx);
409 }
410out:
411#endif
412 _rel_sleep_lock(m, curthread, opts, file, line);
413}
414
415void
416_mtx_lock_spin_flags(struct mtx *m, int opts, const char *file, int line)
417{
418
419 MPASS(curthread != NULL);
420 KASSERT(m->mtx_object.lo_class == &lock_class_mtx_spin,
421 ("mtx_lock_spin() of sleep mutex %s @ %s:%d",
422 m->mtx_object.lo_name, file, line));
423#if defined(SMP) || LOCK_DEBUG > 0 || 1
424 _get_spin_lock(m, curthread, opts, file, line);
425#else
426 critical_enter();
427#endif
428 LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file,
429 line);
430 WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
431}
432
433void
434_mtx_unlock_spin_flags(struct mtx *m, int opts, const char *file, int line)
435{
436
437 MPASS(curthread != NULL);
438 KASSERT(m->mtx_object.lo_class == &lock_class_mtx_spin,
439 ("mtx_unlock_spin() of sleep mutex %s @ %s:%d",
440 m->mtx_object.lo_name, file, line));
441 WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
442 LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file,
443 line);
444 mtx_assert(m, MA_OWNED);
445#if defined(SMP) || LOCK_DEBUG > 0 || 1
446 _rel_spin_lock(m);
447#else
448 critical_exit();
449#endif
450}
451
452/*
453 * The important part of mtx_trylock{,_flags}()
454 * Tries to acquire lock `m.' We do NOT handle recursion here. If this
455 * function is called on a recursed mutex, it will return failure and
456 * will not recursively acquire the lock. You are expected to know what
457 * you are doing.
458 */
459int
460_mtx_trylock(struct mtx *m, int opts, const char *file, int line)
461{
462 int rval;
463
464 MPASS(curthread != NULL);
465
466 rval = _obtain_lock(m, curthread);
467
468 LOCK_LOG_TRY("LOCK", &m->mtx_object, opts, rval, file, line);
469 if (rval)
470 WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK,
471 file, line);
472
473 return (rval);
474}
475
476/*
477 * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
478 *
479 * We call this if the lock is either contested (i.e. we need to go to
480 * sleep waiting for it), or if we need to recurse on it.
481 */
482void
483_mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line)
484{
485 struct thread *td = curthread;
486 struct thread *td1;
487#if defined(SMP) && defined(ADAPTIVE_MUTEXES)
488 struct thread *owner;
489#endif
490 uintptr_t v;
491#ifdef KTR
492 int cont_logged = 0;
493#endif
494
495 if (mtx_owned(m)) {
496 m->mtx_recurse++;
497 atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
498 if (LOCK_LOG_TEST(&m->mtx_object, opts))
499 CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
500 return;
501 }
502
503 if (LOCK_LOG_TEST(&m->mtx_object, opts))
504 CTR4(KTR_LOCK,
505 "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d",
506 m->mtx_object.lo_name, (void *)m->mtx_lock, file, line);
507
508 while (!_obtain_lock(m, td)) {
509
510 mtx_lock_spin(&sched_lock);
511 v = m->mtx_lock;
512
513 /*
514 * Check if the lock has been released while spinning for
515 * the sched_lock.
516 */
517 if (v == MTX_UNOWNED) {
518 mtx_unlock_spin(&sched_lock);
519#ifdef __i386__
520 ia32_pause();
521#endif
522 continue;
523 }
524
525 /*
526 * The mutex was marked contested on release. This means that
527 * there are threads blocked on it.
528 */
529 if (v == MTX_CONTESTED) {
530 td1 = TAILQ_FIRST(&m->mtx_blocked);
531 MPASS(td1 != NULL);
532 m->mtx_lock = (uintptr_t)td | MTX_CONTESTED;
533
534 if (td1->td_priority < td->td_priority)
535 td->td_priority = td1->td_priority;
536 mtx_unlock_spin(&sched_lock);
537 return;
538 }
539
540 /*
541 * If the mutex isn't already contested and a failure occurs
542 * setting the contested bit, the mutex was either released
543 * or the state of the MTX_RECURSED bit changed.
544 */
545 if ((v & MTX_CONTESTED) == 0 &&
546 !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
547 (void *)(v | MTX_CONTESTED))) {
548 mtx_unlock_spin(&sched_lock);
549#ifdef __i386__
550 ia32_pause();
551#endif
552 continue;
553 }
554
555#if defined(SMP) && defined(ADAPTIVE_MUTEXES)
556 /*
557 * If the current owner of the lock is executing on another
558 * CPU, spin instead of blocking.
559 */
560 owner = (struct thread *)(v & MTX_FLAGMASK);
561 if (m != &Giant && TD_IS_RUNNING(owner)) {
562 mtx_unlock_spin(&sched_lock);
563 while (mtx_owner(m) == owner && TD_IS_RUNNING(owner)) {
564#ifdef __i386__
565 ia32_pause();
566#endif
567 }
568 continue;
569 }
570#endif /* SMP && ADAPTIVE_MUTEXES */
571
572 /*
573 * We definitely must sleep for this lock.
574 */
575 mtx_assert(m, MA_NOTOWNED);
576
577#ifdef notyet
578 /*
579 * If we're borrowing an interrupted thread's VM context, we
580 * must clean up before going to sleep.
581 */
582 if (td->td_ithd != NULL) {
583 struct ithd *it = td->td_ithd;
584
585 if (it->it_interrupted) {
586 if (LOCK_LOG_TEST(&m->mtx_object, opts))
587 CTR2(KTR_LOCK,
588 "_mtx_lock_sleep: %p interrupted %p",
589 it, it->it_interrupted);
590 intr_thd_fixup(it);
591 }
592 }
593#endif
594
595 /*
596 * Put us on the list of threads blocked on this mutex.
597 */
598 if (TAILQ_EMPTY(&m->mtx_blocked)) {
599 td1 = mtx_owner(m);
600 LIST_INSERT_HEAD(&td1->td_contested, m, mtx_contested);
601 TAILQ_INSERT_TAIL(&m->mtx_blocked, td, td_lockq);
602 } else {
603 TAILQ_FOREACH(td1, &m->mtx_blocked, td_lockq)
604 if (td1->td_priority > td->td_priority)
605 break;
606 if (td1)
607 TAILQ_INSERT_BEFORE(td1, td, td_lockq);
608 else
609 TAILQ_INSERT_TAIL(&m->mtx_blocked, td, td_lockq);
610 }
611#ifdef KTR
612 if (!cont_logged) {
613 CTR6(KTR_CONTENTION,
614 "contention: %p at %s:%d wants %s, taken by %s:%d",
615 td, file, line, m->mtx_object.lo_name,
616 WITNESS_FILE(&m->mtx_object),
617 WITNESS_LINE(&m->mtx_object));
618 cont_logged = 1;
619 }
620#endif
621
622 /*
623 * Save who we're blocked on.
624 */
625 td->td_blocked = m;
626 td->td_lockname = m->mtx_object.lo_name;
627 TD_SET_LOCK(td);
628 propagate_priority(td);
629
630 if (LOCK_LOG_TEST(&m->mtx_object, opts))
631 CTR3(KTR_LOCK,
632 "_mtx_lock_sleep: p %p blocked on [%p] %s", td, m,
633 m->mtx_object.lo_name);
634
635 td->td_proc->p_stats->p_ru.ru_nvcsw++;
636 mi_switch();
637
638 if (LOCK_LOG_TEST(&m->mtx_object, opts))
639 CTR3(KTR_LOCK,
640 "_mtx_lock_sleep: p %p free from blocked on [%p] %s",
641 td, m, m->mtx_object.lo_name);
642
643 mtx_unlock_spin(&sched_lock);
644 }
645
646#ifdef KTR
647 if (cont_logged) {
648 CTR4(KTR_CONTENTION,
649 "contention end: %s acquired by %p at %s:%d",
650 m->mtx_object.lo_name, td, file, line);
651 }
652#endif
653 return;
654}
655
656/*
657 * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock.
658 *
659 * This is only called if we need to actually spin for the lock. Recursion
660 * is handled inline.
661 */
662void
663_mtx_lock_spin(struct mtx *m, int opts, const char *file, int line)
664{
665 int i = 0;
666
667 if (LOCK_LOG_TEST(&m->mtx_object, opts))
668 CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
669
670 for (;;) {
671 if (_obtain_lock(m, curthread))
672 break;
673
674 /* Give interrupts a chance while we spin. */
675 critical_exit();
676 while (m->mtx_lock != MTX_UNOWNED) {
677 if (i++ < 10000000) {
678#ifdef __i386__
679 ia32_pause();
680#endif
681 continue;
682 }
683 if (i < 60000000)
684 DELAY(1);
685#ifdef DDB
686 else if (!db_active)
687#else
688 else
689#endif
690 panic("spin lock %s held by %p for > 5 seconds",
691 m->mtx_object.lo_name, (void *)m->mtx_lock);
692#ifdef __i386__
693 ia32_pause();
694#endif
695 }
696 critical_enter();
697 }
698
699 if (LOCK_LOG_TEST(&m->mtx_object, opts))
700 CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
701
702 return;
703}
704
705/*
706 * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
707 *
708 * We are only called here if the lock is recursed or contested (i.e. we
709 * need to wake up a blocked thread).
710 */
711void
712_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
713{
714 struct thread *td, *td1;
715 struct mtx *m1;
716 int pri;
717
718 td = curthread;
719
720 if (mtx_recursed(m)) {
721 if (--(m->mtx_recurse) == 0)
722 atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
723 if (LOCK_LOG_TEST(&m->mtx_object, opts))
724 CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m);
725 return;
726 }
727
728 mtx_lock_spin(&sched_lock);
729 if (LOCK_LOG_TEST(&m->mtx_object, opts))
730 CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
731
732 td1 = TAILQ_FIRST(&m->mtx_blocked);
733#if defined(SMP) && defined(ADAPTIVE_MUTEXES)
734 if (td1 == NULL) {
735 _release_lock_quick(m);
736 if (LOCK_LOG_TEST(&m->mtx_object, opts))
737 CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p no sleepers", m);
738 mtx_unlock_spin(&sched_lock);
739 return;
740 }
741#endif
742 MPASS(td->td_proc->p_magic == P_MAGIC);
743 MPASS(td1->td_proc->p_magic == P_MAGIC);
744
745 TAILQ_REMOVE(&m->mtx_blocked, td1, td_lockq);
746
747 if (TAILQ_EMPTY(&m->mtx_blocked)) {
748 LIST_REMOVE(m, mtx_contested);
749 _release_lock_quick(m);
750 if (LOCK_LOG_TEST(&m->mtx_object, opts))
751 CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p not held", m);
752 } else
753 atomic_store_rel_ptr(&m->mtx_lock, (void *)MTX_CONTESTED);
754
755 pri = PRI_MAX;
756 LIST_FOREACH(m1, &td->td_contested, mtx_contested) {
757 int cp = TAILQ_FIRST(&m1->mtx_blocked)->td_priority;
758 if (cp < pri)
759 pri = cp;
760 }
761
762 if (pri > td->td_base_pri)
763 pri = td->td_base_pri;
764 td->td_priority = pri;
765
766 if (LOCK_LOG_TEST(&m->mtx_object, opts))
767 CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p contested setrunqueue %p",
768 m, td1);
769
770 td1->td_blocked = NULL;
771 TD_CLR_LOCK(td1);
772 if (!TD_CAN_RUN(td1)) {
773 mtx_unlock_spin(&sched_lock);
774 return;
775 }
776 setrunqueue(td1);
777
778 if (td->td_critnest == 1 && td1->td_priority < pri) {
779#ifdef notyet
780 if (td->td_ithd != NULL) {
781 struct ithd *it = td->td_ithd;
782
783 if (it->it_interrupted) {
784 if (LOCK_LOG_TEST(&m->mtx_object, opts))
785 CTR2(KTR_LOCK,
786 "_mtx_unlock_sleep: %p interrupted %p",
787 it, it->it_interrupted);
788 intr_thd_fixup(it);
789 }
790 }
791#endif
792 if (LOCK_LOG_TEST(&m->mtx_object, opts))
793 CTR2(KTR_LOCK,
794 "_mtx_unlock_sleep: %p switching out lock=%p", m,
795 (void *)m->mtx_lock);
796
797 td->td_proc->p_stats->p_ru.ru_nivcsw++;
798 mi_switch();
799 if (LOCK_LOG_TEST(&m->mtx_object, opts))
800 CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p",
801 m, (void *)m->mtx_lock);
802 }
803
804 mtx_unlock_spin(&sched_lock);
805
806 return;
807}
808
809/*
810 * All the unlocking of MTX_SPIN locks is done inline.
811 * See the _rel_spin_lock() macro for the details.
812 */
813
814/*
815 * The backing function for the INVARIANTS-enabled mtx_assert()
816 */
817#ifdef INVARIANT_SUPPORT
818void
819_mtx_assert(struct mtx *m, int what, const char *file, int line)
820{
821
822 if (panicstr != NULL)
823 return;
824 switch (what) {
825 case MA_OWNED:
826 case MA_OWNED | MA_RECURSED:
827 case MA_OWNED | MA_NOTRECURSED:
828 if (!mtx_owned(m))
829 panic("mutex %s not owned at %s:%d",
830 m->mtx_object.lo_name, file, line);
831 if (mtx_recursed(m)) {
832 if ((what & MA_NOTRECURSED) != 0)
833 panic("mutex %s recursed at %s:%d",
834 m->mtx_object.lo_name, file, line);
835 } else if ((what & MA_RECURSED) != 0) {
836 panic("mutex %s unrecursed at %s:%d",
837 m->mtx_object.lo_name, file, line);
838 }
839 break;
840 case MA_NOTOWNED:
841 if (mtx_owned(m))
842 panic("mutex %s owned at %s:%d",
843 m->mtx_object.lo_name, file, line);
844 break;
845 default:
846 panic("unknown mtx_assert at %s:%d", file, line);
847 }
848}
849#endif
850
851/*
852 * The MUTEX_DEBUG-enabled mtx_validate()
853 *
854 * Most of these checks have been moved off into the LO_INITIALIZED flag
855 * maintained by the witness code.
856 */
857#ifdef MUTEX_DEBUG
858
859void mtx_validate(struct mtx *);
860
861void
862mtx_validate(struct mtx *m)
863{
864
865/*
866 * XXX: When kernacc() does not require Giant we can reenable this check
867 */
868#ifdef notyet
869/*
870 * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
871 * we can re-enable the kernacc() checks.
872 */
873#ifndef __alpha__
874 /*
875 * Can't call kernacc() from early init386(), especially when
876 * initializing Giant mutex, because some stuff in kernacc()
877 * requires Giant itself.
878 */
879 if (!cold)
880 if (!kernacc((caddr_t)m, sizeof(m),
881 VM_PROT_READ | VM_PROT_WRITE))
882 panic("Can't read and write to mutex %p", m);
883#endif
884#endif
885}
886#endif
887
888/*
889 * General init routine used by the MTX_SYSINIT() macro.
890 */
891void
892mtx_sysinit(void *arg)
893{
894 struct mtx_args *margs = arg;
895
896 mtx_init(margs->ma_mtx, margs->ma_desc, NULL, margs->ma_opts);
897}
898
899/*
900 * Mutex initialization routine; initialize lock `m' of type contained in
901 * `opts' with options contained in `opts' and name `name.' The optional
902 * lock type `type' is used as a general lock category name for use with
903 * witness.
904 */
905void
906mtx_init(struct mtx *m, const char *name, const char *type, int opts)
907{
908 struct lock_object *lock;
909
910 MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE |
911 MTX_NOWITNESS | MTX_DUPOK)) == 0);
912
913#ifdef MUTEX_DEBUG
914 /* Diagnostic and error correction */
915 mtx_validate(m);
916#endif
917
918 lock = &m->mtx_object;
919 KASSERT((lock->lo_flags & LO_INITIALIZED) == 0,
920 ("mutex \"%s\" %p already initialized", name, m));
921 bzero(m, sizeof(*m));
922 if (opts & MTX_SPIN)
923 lock->lo_class = &lock_class_mtx_spin;
924 else
925 lock->lo_class = &lock_class_mtx_sleep;
926 lock->lo_name = name;
927 lock->lo_type = type != NULL ? type : name;
928 if (opts & MTX_QUIET)
929 lock->lo_flags = LO_QUIET;
930 if (opts & MTX_RECURSE)
931 lock->lo_flags |= LO_RECURSABLE;
932 if ((opts & MTX_NOWITNESS) == 0)
933 lock->lo_flags |= LO_WITNESS;
934 if (opts & MTX_DUPOK)
935 lock->lo_flags |= LO_DUPOK;
936
937 m->mtx_lock = MTX_UNOWNED;
938 TAILQ_INIT(&m->mtx_blocked);
939
940 LOCK_LOG_INIT(lock, opts);
941
942 WITNESS_INIT(lock);
943}
944
945/*
946 * Remove lock `m' from all_mtx queue. We don't allow MTX_QUIET to be
947 * passed in as a flag here because if the corresponding mtx_init() was
948 * called with MTX_QUIET set, then it will already be set in the mutex's
949 * flags.
950 */
951void
952mtx_destroy(struct mtx *m)
953{
954
955 LOCK_LOG_DESTROY(&m->mtx_object, 0);
956
957 if (!mtx_owned(m))
958 MPASS(mtx_unowned(m));
959 else {
960 MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
961
962 /* Tell witness this isn't locked to make it happy. */
963 WITNESS_UNLOCK(&m->mtx_object, LOP_EXCLUSIVE, __FILE__,
964 __LINE__);
965 }
966
967 WITNESS_DESTROY(&m->mtx_object);
968}
969
970/*
971 * Intialize the mutex code and system mutexes. This is called from the MD
972 * startup code prior to mi_startup(). The per-CPU data space needs to be
973 * setup before this is called.
974 */
975void
976mutex_init(void)
977{
978
979 /* Setup thread0 so that mutexes work. */
980 LIST_INIT(&thread0.td_contested);
981
982 /*
983 * Initialize mutexes.
984 */
985 mtx_init(&Giant, "Giant", NULL, MTX_DEF | MTX_RECURSE);
986 mtx_init(&sched_lock, "sched lock", NULL, MTX_SPIN | MTX_RECURSE);
987 mtx_init(&proc0.p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK);
988 mtx_lock(&Giant);
989}