thr_mutex.c revision 331722
1/*
2 * Copyright (c) 1995 John Birrell <jb@cimlogic.com.au>.
3 * Copyright (c) 2006 David Xu <davidxu@freebsd.org>.
4 * Copyright (c) 2015, 2016 The FreeBSD Foundation
5 *
6 * All rights reserved.
7 *
8 * Portions of this software were developed by Konstantin Belousov
9 * under sponsorship from the FreeBSD Foundation.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 *    must display the following acknowledgement:
21 *	This product includes software developed by John Birrell.
22 * 4. Neither the name of the author nor the names of any co-contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 */
38
39#include <sys/cdefs.h>
40__FBSDID("$FreeBSD: stable/11/lib/libthr/thread/thr_mutex.c 331722 2018-03-29 02:50:57Z eadler $");
41
42#include "namespace.h"
43#include <stdlib.h>
44#include <errno.h>
45#include <string.h>
46#include <sys/param.h>
47#include <sys/queue.h>
48#include <pthread.h>
49#include <pthread_np.h>
50#include "un-namespace.h"
51
52#include "thr_private.h"
53
54_Static_assert(sizeof(struct pthread_mutex) <= PAGE_SIZE,
55    "pthread_mutex is too large for off-page");
56
57/*
58 * For adaptive mutexes, how many times to spin doing trylock2
59 * before entering the kernel to block
60 */
61#define MUTEX_ADAPTIVE_SPINS	2000
62
63/*
64 * Prototypes
65 */
66int	__pthread_mutex_consistent(pthread_mutex_t *mutex);
67int	__pthread_mutex_init(pthread_mutex_t *mutex,
68		const pthread_mutexattr_t *mutex_attr);
69int	__pthread_mutex_trylock(pthread_mutex_t *mutex);
70int	__pthread_mutex_lock(pthread_mutex_t *mutex);
71int	__pthread_mutex_timedlock(pthread_mutex_t *mutex,
72		const struct timespec *abstime);
73int	_pthread_mutex_getspinloops_np(pthread_mutex_t *mutex, int *count);
74int	_pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count);
75int	__pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count);
76int	_pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count);
77int	_pthread_mutex_getyieldloops_np(pthread_mutex_t *mutex, int *count);
78int	__pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count);
79
80static int	mutex_self_trylock(pthread_mutex_t);
81static int	mutex_self_lock(pthread_mutex_t,
82				const struct timespec *abstime);
83static int	mutex_unlock_common(struct pthread_mutex *, bool, int *);
84static int	mutex_lock_sleep(struct pthread *, pthread_mutex_t,
85				const struct timespec *);
86static void	mutex_init_robust(struct pthread *curthread);
87static int	mutex_qidx(struct pthread_mutex *m);
88static bool	is_robust_mutex(struct pthread_mutex *m);
89static bool	is_pshared_mutex(struct pthread_mutex *m);
90
91__weak_reference(__pthread_mutex_init, pthread_mutex_init);
92__strong_reference(__pthread_mutex_init, _pthread_mutex_init);
93__weak_reference(__pthread_mutex_lock, pthread_mutex_lock);
94__strong_reference(__pthread_mutex_lock, _pthread_mutex_lock);
95__weak_reference(__pthread_mutex_timedlock, pthread_mutex_timedlock);
96__strong_reference(__pthread_mutex_timedlock, _pthread_mutex_timedlock);
97__weak_reference(__pthread_mutex_trylock, pthread_mutex_trylock);
98__strong_reference(__pthread_mutex_trylock, _pthread_mutex_trylock);
99__weak_reference(_pthread_mutex_consistent, pthread_mutex_consistent);
100__strong_reference(_pthread_mutex_consistent, __pthread_mutex_consistent);
101
102/* Single underscore versions provided for libc internal usage: */
103/* No difference between libc and application usage of these: */
104__weak_reference(_pthread_mutex_destroy, pthread_mutex_destroy);
105__weak_reference(_pthread_mutex_unlock, pthread_mutex_unlock);
106
107__weak_reference(_pthread_mutex_getprioceiling, pthread_mutex_getprioceiling);
108__weak_reference(_pthread_mutex_setprioceiling, pthread_mutex_setprioceiling);
109
110__weak_reference(__pthread_mutex_setspinloops_np, pthread_mutex_setspinloops_np);
111__strong_reference(__pthread_mutex_setspinloops_np, _pthread_mutex_setspinloops_np);
112__weak_reference(_pthread_mutex_getspinloops_np, pthread_mutex_getspinloops_np);
113
114__weak_reference(__pthread_mutex_setyieldloops_np, pthread_mutex_setyieldloops_np);
115__strong_reference(__pthread_mutex_setyieldloops_np, _pthread_mutex_setyieldloops_np);
116__weak_reference(_pthread_mutex_getyieldloops_np, pthread_mutex_getyieldloops_np);
117__weak_reference(_pthread_mutex_isowned_np, pthread_mutex_isowned_np);
118
119static void
120mutex_init_link(struct pthread_mutex *m)
121{
122
123#if defined(_PTHREADS_INVARIANTS)
124	m->m_qe.tqe_prev = NULL;
125	m->m_qe.tqe_next = NULL;
126	m->m_pqe.tqe_prev = NULL;
127	m->m_pqe.tqe_next = NULL;
128#endif
129}
130
131static void
132mutex_assert_is_owned(struct pthread_mutex *m __unused)
133{
134
135#if defined(_PTHREADS_INVARIANTS)
136	if (__predict_false(m->m_qe.tqe_prev == NULL))
137		PANIC("mutex %p own %#x is not on list %p %p",
138		    m, m->m_lock.m_owner, m->m_qe.tqe_prev, m->m_qe.tqe_next);
139#endif
140}
141
142static void
143mutex_assert_not_owned(struct pthread *curthread __unused,
144    struct pthread_mutex *m __unused)
145{
146
147#if defined(_PTHREADS_INVARIANTS)
148	if (__predict_false(m->m_qe.tqe_prev != NULL ||
149	    m->m_qe.tqe_next != NULL))
150		PANIC("mutex %p own %#x is on list %p %p",
151		    m, m->m_lock.m_owner, m->m_qe.tqe_prev, m->m_qe.tqe_next);
152	if (__predict_false(is_robust_mutex(m) &&
153	    (m->m_lock.m_rb_lnk != 0 || m->m_rb_prev != NULL ||
154	    (is_pshared_mutex(m) && curthread->robust_list ==
155	    (uintptr_t)&m->m_lock) ||
156	    (!is_pshared_mutex(m) && curthread->priv_robust_list ==
157	    (uintptr_t)&m->m_lock))))
158		PANIC(
159    "mutex %p own %#x is on robust linkage %p %p head %p phead %p",
160		    m, m->m_lock.m_owner, (void *)m->m_lock.m_rb_lnk,
161		    m->m_rb_prev, (void *)curthread->robust_list,
162		    (void *)curthread->priv_robust_list);
163#endif
164}
165
166static bool
167is_pshared_mutex(struct pthread_mutex *m)
168{
169
170	return ((m->m_lock.m_flags & USYNC_PROCESS_SHARED) != 0);
171}
172
173static bool
174is_robust_mutex(struct pthread_mutex *m)
175{
176
177	return ((m->m_lock.m_flags & UMUTEX_ROBUST) != 0);
178}
179
180int
181_mutex_enter_robust(struct pthread *curthread, struct pthread_mutex *m)
182{
183
184#if defined(_PTHREADS_INVARIANTS)
185	if (__predict_false(curthread->inact_mtx != 0))
186		PANIC("inact_mtx enter");
187#endif
188	if (!is_robust_mutex(m))
189		return (0);
190
191	mutex_init_robust(curthread);
192	curthread->inact_mtx = (uintptr_t)&m->m_lock;
193	return (1);
194}
195
196void
197_mutex_leave_robust(struct pthread *curthread, struct pthread_mutex *m __unused)
198{
199
200#if defined(_PTHREADS_INVARIANTS)
201	if (__predict_false(curthread->inact_mtx != (uintptr_t)&m->m_lock))
202		PANIC("inact_mtx leave");
203#endif
204	curthread->inact_mtx = 0;
205}
206
207static int
208mutex_check_attr(const struct pthread_mutex_attr *attr)
209{
210
211	if (attr->m_type < PTHREAD_MUTEX_ERRORCHECK ||
212	    attr->m_type >= PTHREAD_MUTEX_TYPE_MAX)
213		return (EINVAL);
214	if (attr->m_protocol < PTHREAD_PRIO_NONE ||
215	    attr->m_protocol > PTHREAD_PRIO_PROTECT)
216		return (EINVAL);
217	return (0);
218}
219
220static void
221mutex_init_robust(struct pthread *curthread)
222{
223	struct umtx_robust_lists_params rb;
224
225	if (curthread == NULL)
226		curthread = _get_curthread();
227	if (curthread->robust_inited)
228		return;
229	rb.robust_list_offset = (uintptr_t)&curthread->robust_list;
230	rb.robust_priv_list_offset = (uintptr_t)&curthread->priv_robust_list;
231	rb.robust_inact_offset = (uintptr_t)&curthread->inact_mtx;
232	_umtx_op(NULL, UMTX_OP_ROBUST_LISTS, sizeof(rb), &rb, NULL);
233	curthread->robust_inited = 1;
234}
235
236static void
237mutex_init_body(struct pthread_mutex *pmutex,
238    const struct pthread_mutex_attr *attr)
239{
240
241	pmutex->m_flags = attr->m_type;
242	pmutex->m_count = 0;
243	pmutex->m_spinloops = 0;
244	pmutex->m_yieldloops = 0;
245	mutex_init_link(pmutex);
246	switch (attr->m_protocol) {
247	case PTHREAD_PRIO_NONE:
248		pmutex->m_lock.m_owner = UMUTEX_UNOWNED;
249		pmutex->m_lock.m_flags = 0;
250		break;
251	case PTHREAD_PRIO_INHERIT:
252		pmutex->m_lock.m_owner = UMUTEX_UNOWNED;
253		pmutex->m_lock.m_flags = UMUTEX_PRIO_INHERIT;
254		break;
255	case PTHREAD_PRIO_PROTECT:
256		pmutex->m_lock.m_owner = UMUTEX_CONTESTED;
257		pmutex->m_lock.m_flags = UMUTEX_PRIO_PROTECT;
258		pmutex->m_lock.m_ceilings[0] = attr->m_ceiling;
259		break;
260	}
261	if (attr->m_pshared == PTHREAD_PROCESS_SHARED)
262		pmutex->m_lock.m_flags |= USYNC_PROCESS_SHARED;
263	if (attr->m_robust == PTHREAD_MUTEX_ROBUST) {
264		mutex_init_robust(NULL);
265		pmutex->m_lock.m_flags |= UMUTEX_ROBUST;
266	}
267	if (PMUTEX_TYPE(pmutex->m_flags) == PTHREAD_MUTEX_ADAPTIVE_NP) {
268		pmutex->m_spinloops =
269		    _thr_spinloops ? _thr_spinloops: MUTEX_ADAPTIVE_SPINS;
270		pmutex->m_yieldloops = _thr_yieldloops;
271	}
272}
273
274static int
275mutex_init(pthread_mutex_t *mutex,
276    const struct pthread_mutex_attr *mutex_attr,
277    void *(calloc_cb)(size_t, size_t))
278{
279	const struct pthread_mutex_attr *attr;
280	struct pthread_mutex *pmutex;
281	int error;
282
283	if (mutex_attr == NULL) {
284		attr = &_pthread_mutexattr_default;
285	} else {
286		attr = mutex_attr;
287		error = mutex_check_attr(attr);
288		if (error != 0)
289			return (error);
290	}
291	if ((pmutex = (pthread_mutex_t)
292		calloc_cb(1, sizeof(struct pthread_mutex))) == NULL)
293		return (ENOMEM);
294	mutex_init_body(pmutex, attr);
295	*mutex = pmutex;
296	return (0);
297}
298
299static int
300init_static(struct pthread *thread, pthread_mutex_t *mutex)
301{
302	int ret;
303
304	THR_LOCK_ACQUIRE(thread, &_mutex_static_lock);
305
306	if (*mutex == THR_MUTEX_INITIALIZER)
307		ret = mutex_init(mutex, &_pthread_mutexattr_default, calloc);
308	else if (*mutex == THR_ADAPTIVE_MUTEX_INITIALIZER)
309		ret = mutex_init(mutex, &_pthread_mutexattr_adaptive_default,
310		    calloc);
311	else
312		ret = 0;
313	THR_LOCK_RELEASE(thread, &_mutex_static_lock);
314
315	return (ret);
316}
317
318static void
319set_inherited_priority(struct pthread *curthread, struct pthread_mutex *m)
320{
321	struct pthread_mutex *m2;
322
323	m2 = TAILQ_LAST(&curthread->mq[mutex_qidx(m)], mutex_queue);
324	if (m2 != NULL)
325		m->m_lock.m_ceilings[1] = m2->m_lock.m_ceilings[0];
326	else
327		m->m_lock.m_ceilings[1] = -1;
328}
329
330static void
331shared_mutex_init(struct pthread_mutex *pmtx, const struct
332    pthread_mutex_attr *mutex_attr)
333{
334	static const struct pthread_mutex_attr foobar_mutex_attr = {
335		.m_type = PTHREAD_MUTEX_DEFAULT,
336		.m_protocol = PTHREAD_PRIO_NONE,
337		.m_ceiling = 0,
338		.m_pshared = PTHREAD_PROCESS_SHARED,
339		.m_robust = PTHREAD_MUTEX_STALLED,
340	};
341	bool done;
342
343	/*
344	 * Hack to allow multiple pthread_mutex_init() calls on the
345	 * same process-shared mutex.  We rely on kernel allocating
346	 * zeroed offpage for the mutex, i.e. the
347	 * PMUTEX_INITSTAGE_ALLOC value must be zero.
348	 */
349	for (done = false; !done;) {
350		switch (pmtx->m_ps) {
351		case PMUTEX_INITSTAGE_DONE:
352			atomic_thread_fence_acq();
353			done = true;
354			break;
355		case PMUTEX_INITSTAGE_ALLOC:
356			if (atomic_cmpset_int(&pmtx->m_ps,
357			    PMUTEX_INITSTAGE_ALLOC, PMUTEX_INITSTAGE_BUSY)) {
358				if (mutex_attr == NULL)
359					mutex_attr = &foobar_mutex_attr;
360				mutex_init_body(pmtx, mutex_attr);
361				atomic_store_rel_int(&pmtx->m_ps,
362				    PMUTEX_INITSTAGE_DONE);
363				done = true;
364			}
365			break;
366		case PMUTEX_INITSTAGE_BUSY:
367			_pthread_yield();
368			break;
369		default:
370			PANIC("corrupted offpage");
371			break;
372		}
373	}
374}
375
376int
377__pthread_mutex_init(pthread_mutex_t *mutex,
378    const pthread_mutexattr_t *mutex_attr)
379{
380	struct pthread_mutex *pmtx;
381	int ret;
382
383	if (mutex_attr != NULL) {
384		ret = mutex_check_attr(*mutex_attr);
385		if (ret != 0)
386			return (ret);
387	}
388	if (mutex_attr == NULL ||
389	    (*mutex_attr)->m_pshared == PTHREAD_PROCESS_PRIVATE) {
390		return (mutex_init(mutex, mutex_attr ? *mutex_attr : NULL,
391		    calloc));
392	}
393	pmtx = __thr_pshared_offpage(mutex, 1);
394	if (pmtx == NULL)
395		return (EFAULT);
396	*mutex = THR_PSHARED_PTR;
397	shared_mutex_init(pmtx, *mutex_attr);
398	return (0);
399}
400
401/* This function is used internally by malloc. */
402int
403_pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
404    void *(calloc_cb)(size_t, size_t))
405{
406	static const struct pthread_mutex_attr attr = {
407		.m_type = PTHREAD_MUTEX_NORMAL,
408		.m_protocol = PTHREAD_PRIO_NONE,
409		.m_ceiling = 0,
410		.m_pshared = PTHREAD_PROCESS_PRIVATE,
411		.m_robust = PTHREAD_MUTEX_STALLED,
412	};
413	int ret;
414
415	ret = mutex_init(mutex, &attr, calloc_cb);
416	if (ret == 0)
417		(*mutex)->m_flags |= PMUTEX_FLAG_PRIVATE;
418	return (ret);
419}
420
421/*
422 * Fix mutex ownership for child process.
423 *
424 * Process private mutex ownership is transmitted from the forking
425 * thread to the child process.
426 *
427 * Process shared mutex should not be inherited because owner is
428 * forking thread which is in parent process, they are removed from
429 * the owned mutex list.
430 */
431static void
432queue_fork(struct pthread *curthread, struct mutex_queue *q,
433    struct mutex_queue *qp, uint bit)
434{
435	struct pthread_mutex *m;
436
437	TAILQ_INIT(q);
438	TAILQ_FOREACH(m, qp, m_pqe) {
439		TAILQ_INSERT_TAIL(q, m, m_qe);
440		m->m_lock.m_owner = TID(curthread) | bit;
441	}
442}
443
444void
445_mutex_fork(struct pthread *curthread)
446{
447
448	queue_fork(curthread, &curthread->mq[TMQ_NORM],
449	    &curthread->mq[TMQ_NORM_PRIV], 0);
450	queue_fork(curthread, &curthread->mq[TMQ_NORM_PP],
451	    &curthread->mq[TMQ_NORM_PP_PRIV], UMUTEX_CONTESTED);
452	queue_fork(curthread, &curthread->mq[TMQ_ROBUST_PP],
453	    &curthread->mq[TMQ_ROBUST_PP_PRIV], UMUTEX_CONTESTED);
454	curthread->robust_list = 0;
455}
456
457int
458_pthread_mutex_destroy(pthread_mutex_t *mutex)
459{
460	pthread_mutex_t m, m1;
461	int ret;
462
463	m = *mutex;
464	if (m < THR_MUTEX_DESTROYED) {
465		ret = 0;
466	} else if (m == THR_MUTEX_DESTROYED) {
467		ret = EINVAL;
468	} else {
469		if (m == THR_PSHARED_PTR) {
470			m1 = __thr_pshared_offpage(mutex, 0);
471			if (m1 != NULL) {
472				mutex_assert_not_owned(_get_curthread(), m1);
473				__thr_pshared_destroy(mutex);
474			}
475			*mutex = THR_MUTEX_DESTROYED;
476			return (0);
477		}
478		if (PMUTEX_OWNER_ID(m) != 0 &&
479		    (uint32_t)m->m_lock.m_owner != UMUTEX_RB_NOTRECOV) {
480			ret = EBUSY;
481		} else {
482			*mutex = THR_MUTEX_DESTROYED;
483			mutex_assert_not_owned(_get_curthread(), m);
484			free(m);
485			ret = 0;
486		}
487	}
488
489	return (ret);
490}
491
492static int
493mutex_qidx(struct pthread_mutex *m)
494{
495
496	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
497		return (TMQ_NORM);
498	return (is_robust_mutex(m) ? TMQ_ROBUST_PP : TMQ_NORM_PP);
499}
500
501/*
502 * Both enqueue_mutex() and dequeue_mutex() operate on the
503 * thread-private linkage of the locked mutexes and on the robust
504 * linkage.
505 *
506 * Robust list, as seen by kernel, must be consistent even in the case
507 * of thread termination at arbitrary moment.  Since either enqueue or
508 * dequeue for list walked by kernel consists of rewriting a single
509 * forward pointer, it is safe.  On the other hand, rewrite of the
510 * back pointer is not atomic WRT the forward one, but kernel does not
511 * care.
512 */
513static void
514enqueue_mutex(struct pthread *curthread, struct pthread_mutex *m,
515    int error)
516{
517	struct pthread_mutex *m1;
518	uintptr_t *rl;
519	int qidx;
520
521	/* Add to the list of owned mutexes: */
522	if (error != EOWNERDEAD)
523		mutex_assert_not_owned(curthread, m);
524	qidx = mutex_qidx(m);
525	TAILQ_INSERT_TAIL(&curthread->mq[qidx], m, m_qe);
526	if (!is_pshared_mutex(m))
527		TAILQ_INSERT_TAIL(&curthread->mq[qidx + 1], m, m_pqe);
528	if (is_robust_mutex(m)) {
529		rl = is_pshared_mutex(m) ? &curthread->robust_list :
530		    &curthread->priv_robust_list;
531		m->m_rb_prev = NULL;
532		if (*rl != 0) {
533			m1 = __containerof((void *)*rl,
534			    struct pthread_mutex, m_lock);
535			m->m_lock.m_rb_lnk = (uintptr_t)&m1->m_lock;
536			m1->m_rb_prev = m;
537		} else {
538			m1 = NULL;
539			m->m_lock.m_rb_lnk = 0;
540		}
541		*rl = (uintptr_t)&m->m_lock;
542	}
543}
544
545static void
546dequeue_mutex(struct pthread *curthread, struct pthread_mutex *m)
547{
548	struct pthread_mutex *mp, *mn;
549	int qidx;
550
551	mutex_assert_is_owned(m);
552	qidx = mutex_qidx(m);
553	if (is_robust_mutex(m)) {
554		mp = m->m_rb_prev;
555		if (mp == NULL) {
556			if (is_pshared_mutex(m)) {
557				curthread->robust_list = m->m_lock.m_rb_lnk;
558			} else {
559				curthread->priv_robust_list =
560				    m->m_lock.m_rb_lnk;
561			}
562		} else {
563			mp->m_lock.m_rb_lnk = m->m_lock.m_rb_lnk;
564		}
565		if (m->m_lock.m_rb_lnk != 0) {
566			mn = __containerof((void *)m->m_lock.m_rb_lnk,
567			    struct pthread_mutex, m_lock);
568			mn->m_rb_prev = m->m_rb_prev;
569		}
570		m->m_lock.m_rb_lnk = 0;
571		m->m_rb_prev = NULL;
572	}
573	TAILQ_REMOVE(&curthread->mq[qidx], m, m_qe);
574	if (!is_pshared_mutex(m))
575		TAILQ_REMOVE(&curthread->mq[qidx + 1], m, m_pqe);
576	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) != 0)
577		set_inherited_priority(curthread, m);
578	mutex_init_link(m);
579}
580
581static int
582check_and_init_mutex(pthread_mutex_t *mutex, struct pthread_mutex **m)
583{
584	int ret;
585
586	*m = *mutex;
587	ret = 0;
588	if (*m == THR_PSHARED_PTR) {
589		*m = __thr_pshared_offpage(mutex, 0);
590		if (*m == NULL)
591			ret = EINVAL;
592		else
593			shared_mutex_init(*m, NULL);
594	} else if (__predict_false(*m <= THR_MUTEX_DESTROYED)) {
595		if (*m == THR_MUTEX_DESTROYED) {
596			ret = EINVAL;
597		} else {
598			ret = init_static(_get_curthread(), mutex);
599			if (ret == 0)
600				*m = *mutex;
601		}
602	}
603	return (ret);
604}
605
606int
607__pthread_mutex_trylock(pthread_mutex_t *mutex)
608{
609	struct pthread *curthread;
610	struct pthread_mutex *m;
611	uint32_t id;
612	int ret, robust;
613
614	ret = check_and_init_mutex(mutex, &m);
615	if (ret != 0)
616		return (ret);
617	curthread = _get_curthread();
618	id = TID(curthread);
619	if (m->m_flags & PMUTEX_FLAG_PRIVATE)
620		THR_CRITICAL_ENTER(curthread);
621	robust = _mutex_enter_robust(curthread, m);
622	ret = _thr_umutex_trylock(&m->m_lock, id);
623	if (__predict_true(ret == 0) || ret == EOWNERDEAD) {
624		enqueue_mutex(curthread, m, ret);
625		if (ret == EOWNERDEAD)
626			m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
627	} else if (PMUTEX_OWNER_ID(m) == id) {
628		ret = mutex_self_trylock(m);
629	} /* else {} */
630	if (robust)
631		_mutex_leave_robust(curthread, m);
632	if (ret != 0 && ret != EOWNERDEAD &&
633	    (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0)
634		THR_CRITICAL_LEAVE(curthread);
635	return (ret);
636}
637
638static int
639mutex_lock_sleep(struct pthread *curthread, struct pthread_mutex *m,
640    const struct timespec *abstime)
641{
642	uint32_t id, owner;
643	int count, ret;
644
645	id = TID(curthread);
646	if (PMUTEX_OWNER_ID(m) == id)
647		return (mutex_self_lock(m, abstime));
648
649	/*
650	 * For adaptive mutexes, spin for a bit in the expectation
651	 * that if the application requests this mutex type then
652	 * the lock is likely to be released quickly and it is
653	 * faster than entering the kernel
654	 */
655	if (__predict_false((m->m_lock.m_flags & (UMUTEX_PRIO_PROTECT |
656	    UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST | UMUTEX_NONCONSISTENT)) != 0))
657		goto sleep_in_kernel;
658
659	if (!_thr_is_smp)
660		goto yield_loop;
661
662	count = m->m_spinloops;
663	while (count--) {
664		owner = m->m_lock.m_owner;
665		if ((owner & ~UMUTEX_CONTESTED) == 0) {
666			if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner,
667			    id | owner)) {
668				ret = 0;
669				goto done;
670			}
671		}
672		CPU_SPINWAIT;
673	}
674
675yield_loop:
676	count = m->m_yieldloops;
677	while (count--) {
678		_sched_yield();
679		owner = m->m_lock.m_owner;
680		if ((owner & ~UMUTEX_CONTESTED) == 0) {
681			if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner,
682			    id | owner)) {
683				ret = 0;
684				goto done;
685			}
686		}
687	}
688
689sleep_in_kernel:
690	if (abstime == NULL)
691		ret = __thr_umutex_lock(&m->m_lock, id);
692	else if (__predict_false(abstime->tv_nsec < 0 ||
693	    abstime->tv_nsec >= 1000000000))
694		ret = EINVAL;
695	else
696		ret = __thr_umutex_timedlock(&m->m_lock, id, abstime);
697done:
698	if (ret == 0 || ret == EOWNERDEAD) {
699		enqueue_mutex(curthread, m, ret);
700		if (ret == EOWNERDEAD)
701			m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
702	}
703	return (ret);
704}
705
706static inline int
707mutex_lock_common(struct pthread_mutex *m, const struct timespec *abstime,
708    bool cvattach, bool rb_onlist)
709{
710	struct pthread *curthread;
711	int ret, robust;
712
713	robust = 0;  /* pacify gcc */
714	curthread  = _get_curthread();
715	if (!cvattach && m->m_flags & PMUTEX_FLAG_PRIVATE)
716		THR_CRITICAL_ENTER(curthread);
717	if (!rb_onlist)
718		robust = _mutex_enter_robust(curthread, m);
719	ret = _thr_umutex_trylock2(&m->m_lock, TID(curthread));
720	if (ret == 0 || ret == EOWNERDEAD) {
721		enqueue_mutex(curthread, m, ret);
722		if (ret == EOWNERDEAD)
723			m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
724	} else {
725		ret = mutex_lock_sleep(curthread, m, abstime);
726	}
727	if (!rb_onlist && robust)
728		_mutex_leave_robust(curthread, m);
729	if (ret != 0 && ret != EOWNERDEAD &&
730	    (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0 && !cvattach)
731		THR_CRITICAL_LEAVE(curthread);
732	return (ret);
733}
734
735int
736__pthread_mutex_lock(pthread_mutex_t *mutex)
737{
738	struct pthread_mutex *m;
739	int ret;
740
741	_thr_check_init();
742	ret = check_and_init_mutex(mutex, &m);
743	if (ret == 0)
744		ret = mutex_lock_common(m, NULL, false, false);
745	return (ret);
746}
747
748int
749__pthread_mutex_timedlock(pthread_mutex_t *mutex,
750    const struct timespec *abstime)
751{
752	struct pthread_mutex *m;
753	int ret;
754
755	_thr_check_init();
756	ret = check_and_init_mutex(mutex, &m);
757	if (ret == 0)
758		ret = mutex_lock_common(m, abstime, false, false);
759	return (ret);
760}
761
762int
763_pthread_mutex_unlock(pthread_mutex_t *mutex)
764{
765	struct pthread_mutex *mp;
766
767	if (*mutex == THR_PSHARED_PTR) {
768		mp = __thr_pshared_offpage(mutex, 0);
769		if (mp == NULL)
770			return (EINVAL);
771		shared_mutex_init(mp, NULL);
772	} else {
773		mp = *mutex;
774	}
775	return (mutex_unlock_common(mp, false, NULL));
776}
777
778int
779_mutex_cv_lock(struct pthread_mutex *m, int count, bool rb_onlist)
780{
781	int error;
782
783	error = mutex_lock_common(m, NULL, true, rb_onlist);
784	if (error == 0 || error == EOWNERDEAD)
785		m->m_count = count;
786	return (error);
787}
788
789int
790_mutex_cv_unlock(struct pthread_mutex *m, int *count, int *defer)
791{
792
793	/*
794	 * Clear the count in case this is a recursive mutex.
795	 */
796	*count = m->m_count;
797	m->m_count = 0;
798	(void)mutex_unlock_common(m, true, defer);
799        return (0);
800}
801
802int
803_mutex_cv_attach(struct pthread_mutex *m, int count)
804{
805	struct pthread *curthread;
806
807	curthread = _get_curthread();
808	enqueue_mutex(curthread, m, 0);
809	m->m_count = count;
810	return (0);
811}
812
813int
814_mutex_cv_detach(struct pthread_mutex *mp, int *recurse)
815{
816	struct pthread *curthread;
817	int deferred, error;
818
819	curthread = _get_curthread();
820	if ((error = _mutex_owned(curthread, mp)) != 0)
821		return (error);
822
823	/*
824	 * Clear the count in case this is a recursive mutex.
825	 */
826	*recurse = mp->m_count;
827	mp->m_count = 0;
828	dequeue_mutex(curthread, mp);
829
830	/* Will this happen in real-world ? */
831        if ((mp->m_flags & PMUTEX_FLAG_DEFERRED) != 0) {
832		deferred = 1;
833		mp->m_flags &= ~PMUTEX_FLAG_DEFERRED;
834	} else
835		deferred = 0;
836
837	if (deferred)  {
838		_thr_wake_all(curthread->defer_waiters,
839		    curthread->nwaiter_defer);
840		curthread->nwaiter_defer = 0;
841	}
842	return (0);
843}
844
845static int
846mutex_self_trylock(struct pthread_mutex *m)
847{
848	int ret;
849
850	switch (PMUTEX_TYPE(m->m_flags)) {
851	case PTHREAD_MUTEX_ERRORCHECK:
852	case PTHREAD_MUTEX_NORMAL:
853	case PTHREAD_MUTEX_ADAPTIVE_NP:
854		ret = EBUSY;
855		break;
856
857	case PTHREAD_MUTEX_RECURSIVE:
858		/* Increment the lock count: */
859		if (m->m_count + 1 > 0) {
860			m->m_count++;
861			ret = 0;
862		} else
863			ret = EAGAIN;
864		break;
865
866	default:
867		/* Trap invalid mutex types; */
868		ret = EINVAL;
869	}
870
871	return (ret);
872}
873
874static int
875mutex_self_lock(struct pthread_mutex *m, const struct timespec *abstime)
876{
877	struct timespec	ts1, ts2;
878	int ret;
879
880	switch (PMUTEX_TYPE(m->m_flags)) {
881	case PTHREAD_MUTEX_ERRORCHECK:
882	case PTHREAD_MUTEX_ADAPTIVE_NP:
883		if (abstime) {
884			if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
885			    abstime->tv_nsec >= 1000000000) {
886				ret = EINVAL;
887			} else {
888				clock_gettime(CLOCK_REALTIME, &ts1);
889				TIMESPEC_SUB(&ts2, abstime, &ts1);
890				__sys_nanosleep(&ts2, NULL);
891				ret = ETIMEDOUT;
892			}
893		} else {
894			/*
895			 * POSIX specifies that mutexes should return
896			 * EDEADLK if a recursive lock is detected.
897			 */
898			ret = EDEADLK;
899		}
900		break;
901
902	case PTHREAD_MUTEX_NORMAL:
903		/*
904		 * What SS2 define as a 'normal' mutex.  Intentionally
905		 * deadlock on attempts to get a lock you already own.
906		 */
907		ret = 0;
908		if (abstime) {
909			if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
910			    abstime->tv_nsec >= 1000000000) {
911				ret = EINVAL;
912			} else {
913				clock_gettime(CLOCK_REALTIME, &ts1);
914				TIMESPEC_SUB(&ts2, abstime, &ts1);
915				__sys_nanosleep(&ts2, NULL);
916				ret = ETIMEDOUT;
917			}
918		} else {
919			ts1.tv_sec = 30;
920			ts1.tv_nsec = 0;
921			for (;;)
922				__sys_nanosleep(&ts1, NULL);
923		}
924		break;
925
926	case PTHREAD_MUTEX_RECURSIVE:
927		/* Increment the lock count: */
928		if (m->m_count + 1 > 0) {
929			m->m_count++;
930			ret = 0;
931		} else
932			ret = EAGAIN;
933		break;
934
935	default:
936		/* Trap invalid mutex types; */
937		ret = EINVAL;
938	}
939
940	return (ret);
941}
942
943static int
944mutex_unlock_common(struct pthread_mutex *m, bool cv, int *mtx_defer)
945{
946	struct pthread *curthread;
947	uint32_t id;
948	int deferred, error, robust;
949
950	if (__predict_false(m <= THR_MUTEX_DESTROYED)) {
951		if (m == THR_MUTEX_DESTROYED)
952			return (EINVAL);
953		return (EPERM);
954	}
955
956	curthread = _get_curthread();
957	id = TID(curthread);
958
959	/*
960	 * Check if the running thread is not the owner of the mutex.
961	 */
962	if (__predict_false(PMUTEX_OWNER_ID(m) != id))
963		return (EPERM);
964
965	error = 0;
966	if (__predict_false(PMUTEX_TYPE(m->m_flags) ==
967	    PTHREAD_MUTEX_RECURSIVE && m->m_count > 0)) {
968		m->m_count--;
969	} else {
970		if ((m->m_flags & PMUTEX_FLAG_DEFERRED) != 0) {
971			deferred = 1;
972			m->m_flags &= ~PMUTEX_FLAG_DEFERRED;
973        	} else
974			deferred = 0;
975
976		robust = _mutex_enter_robust(curthread, m);
977		dequeue_mutex(curthread, m);
978		error = _thr_umutex_unlock2(&m->m_lock, id, mtx_defer);
979		if (deferred)  {
980			if (mtx_defer == NULL) {
981				_thr_wake_all(curthread->defer_waiters,
982				    curthread->nwaiter_defer);
983				curthread->nwaiter_defer = 0;
984			} else
985				*mtx_defer = 1;
986		}
987		if (robust)
988			_mutex_leave_robust(curthread, m);
989	}
990	if (!cv && m->m_flags & PMUTEX_FLAG_PRIVATE)
991		THR_CRITICAL_LEAVE(curthread);
992	return (error);
993}
994
995int
996_pthread_mutex_getprioceiling(pthread_mutex_t *mutex,
997    int *prioceiling)
998{
999	struct pthread_mutex *m;
1000
1001	if (*mutex == THR_PSHARED_PTR) {
1002		m = __thr_pshared_offpage(mutex, 0);
1003		if (m == NULL)
1004			return (EINVAL);
1005		shared_mutex_init(m, NULL);
1006	} else {
1007		m = *mutex;
1008		if (m <= THR_MUTEX_DESTROYED)
1009			return (EINVAL);
1010	}
1011	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
1012		return (EINVAL);
1013	*prioceiling = m->m_lock.m_ceilings[0];
1014	return (0);
1015}
1016
1017int
1018_pthread_mutex_setprioceiling(pthread_mutex_t *mutex,
1019    int ceiling, int *old_ceiling)
1020{
1021	struct pthread *curthread;
1022	struct pthread_mutex *m, *m1, *m2;
1023	struct mutex_queue *q, *qp;
1024	int qidx, ret;
1025
1026	if (*mutex == THR_PSHARED_PTR) {
1027		m = __thr_pshared_offpage(mutex, 0);
1028		if (m == NULL)
1029			return (EINVAL);
1030		shared_mutex_init(m, NULL);
1031	} else {
1032		m = *mutex;
1033		if (m <= THR_MUTEX_DESTROYED)
1034			return (EINVAL);
1035	}
1036	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
1037		return (EINVAL);
1038
1039	ret = __thr_umutex_set_ceiling(&m->m_lock, ceiling, old_ceiling);
1040	if (ret != 0)
1041		return (ret);
1042
1043	curthread = _get_curthread();
1044	if (PMUTEX_OWNER_ID(m) == TID(curthread)) {
1045		mutex_assert_is_owned(m);
1046		m1 = TAILQ_PREV(m, mutex_queue, m_qe);
1047		m2 = TAILQ_NEXT(m, m_qe);
1048		if ((m1 != NULL && m1->m_lock.m_ceilings[0] > (u_int)ceiling) ||
1049		    (m2 != NULL && m2->m_lock.m_ceilings[0] < (u_int)ceiling)) {
1050			qidx = mutex_qidx(m);
1051			q = &curthread->mq[qidx];
1052			qp = &curthread->mq[qidx + 1];
1053			TAILQ_REMOVE(q, m, m_qe);
1054			if (!is_pshared_mutex(m))
1055				TAILQ_REMOVE(qp, m, m_pqe);
1056			TAILQ_FOREACH(m2, q, m_qe) {
1057				if (m2->m_lock.m_ceilings[0] > (u_int)ceiling) {
1058					TAILQ_INSERT_BEFORE(m2, m, m_qe);
1059					if (!is_pshared_mutex(m)) {
1060						while (m2 != NULL &&
1061						    is_pshared_mutex(m2)) {
1062							m2 = TAILQ_PREV(m2,
1063							    mutex_queue, m_qe);
1064						}
1065						if (m2 == NULL) {
1066							TAILQ_INSERT_HEAD(qp,
1067							    m, m_pqe);
1068						} else {
1069							TAILQ_INSERT_BEFORE(m2,
1070							    m, m_pqe);
1071						}
1072					}
1073					return (0);
1074				}
1075			}
1076			TAILQ_INSERT_TAIL(q, m, m_qe);
1077			if (!is_pshared_mutex(m))
1078				TAILQ_INSERT_TAIL(qp, m, m_pqe);
1079		}
1080	}
1081	return (0);
1082}
1083
1084int
1085_pthread_mutex_getspinloops_np(pthread_mutex_t *mutex, int *count)
1086{
1087	struct pthread_mutex *m;
1088	int ret;
1089
1090	ret = check_and_init_mutex(mutex, &m);
1091	if (ret == 0)
1092		*count = m->m_spinloops;
1093	return (ret);
1094}
1095
1096int
1097__pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count)
1098{
1099	struct pthread_mutex *m;
1100	int ret;
1101
1102	ret = check_and_init_mutex(mutex, &m);
1103	if (ret == 0)
1104		m->m_spinloops = count;
1105	return (ret);
1106}
1107
1108int
1109_pthread_mutex_getyieldloops_np(pthread_mutex_t *mutex, int *count)
1110{
1111	struct pthread_mutex *m;
1112	int ret;
1113
1114	ret = check_and_init_mutex(mutex, &m);
1115	if (ret == 0)
1116		*count = m->m_yieldloops;
1117	return (ret);
1118}
1119
1120int
1121__pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count)
1122{
1123	struct pthread_mutex *m;
1124	int ret;
1125
1126	ret = check_and_init_mutex(mutex, &m);
1127	if (ret == 0)
1128		m->m_yieldloops = count;
1129	return (0);
1130}
1131
1132int
1133_pthread_mutex_isowned_np(pthread_mutex_t *mutex)
1134{
1135	struct pthread_mutex *m;
1136
1137	if (*mutex == THR_PSHARED_PTR) {
1138		m = __thr_pshared_offpage(mutex, 0);
1139		if (m == NULL)
1140			return (0);
1141		shared_mutex_init(m, NULL);
1142	} else {
1143		m = *mutex;
1144		if (m <= THR_MUTEX_DESTROYED)
1145			return (0);
1146	}
1147	return (PMUTEX_OWNER_ID(m) == TID(_get_curthread()));
1148}
1149
1150int
1151_mutex_owned(struct pthread *curthread, const struct pthread_mutex *mp)
1152{
1153
1154	if (__predict_false(mp <= THR_MUTEX_DESTROYED)) {
1155		if (mp == THR_MUTEX_DESTROYED)
1156			return (EINVAL);
1157		return (EPERM);
1158	}
1159	if (PMUTEX_OWNER_ID(mp) != TID(curthread))
1160		return (EPERM);
1161	return (0);
1162}
1163
1164int
1165_pthread_mutex_consistent(pthread_mutex_t *mutex)
1166{
1167	struct pthread_mutex *m;
1168	struct pthread *curthread;
1169
1170	if (*mutex == THR_PSHARED_PTR) {
1171		m = __thr_pshared_offpage(mutex, 0);
1172		if (m == NULL)
1173			return (EINVAL);
1174		shared_mutex_init(m, NULL);
1175	} else {
1176		m = *mutex;
1177		if (m <= THR_MUTEX_DESTROYED)
1178			return (EINVAL);
1179	}
1180	curthread = _get_curthread();
1181	if ((m->m_lock.m_flags & (UMUTEX_ROBUST | UMUTEX_NONCONSISTENT)) !=
1182	    (UMUTEX_ROBUST | UMUTEX_NONCONSISTENT))
1183		return (EINVAL);
1184	if (PMUTEX_OWNER_ID(m) != TID(curthread))
1185		return (EPERM);
1186	m->m_lock.m_flags &= ~UMUTEX_NONCONSISTENT;
1187	return (0);
1188}
1189