kern_umtx.c revision 162550
1/*-
2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice unmodified, this list of conditions, and the following
11 *    disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: head/sys/kern/kern_umtx.c 162550 2006-09-22 14:59:10Z davidxu $");
30
31#include "opt_compat.h"
32#include <sys/param.h>
33#include <sys/kernel.h>
34#include <sys/limits.h>
35#include <sys/lock.h>
36#include <sys/malloc.h>
37#include <sys/mutex.h>
38#include <sys/proc.h>
39#include <sys/sched.h>
40#include <sys/sysctl.h>
41#include <sys/sysent.h>
42#include <sys/systm.h>
43#include <sys/sysproto.h>
44#include <sys/eventhandler.h>
45#include <sys/umtx.h>
46
47#include <vm/vm.h>
48#include <vm/vm_param.h>
49#include <vm/pmap.h>
50#include <vm/vm_map.h>
51#include <vm/vm_object.h>
52
53#ifdef COMPAT_IA32
54#include <compat/freebsd32/freebsd32_proto.h>
55#endif
56
57#define TYPE_SIMPLE_LOCK	0
58#define TYPE_SIMPLE_WAIT	1
59#define TYPE_NORMAL_UMUTEX	2
60#define TYPE_PI_UMUTEX		3
61#define TYPE_PP_UMUTEX		4
62#define TYPE_CV			5
63
64/* Key to represent a unique userland synchronous object */
65struct umtx_key {
66	int	hash;
67	int	type;
68	int	shared;
69	union {
70		struct {
71			vm_object_t	object;
72			uintptr_t	offset;
73		} shared;
74		struct {
75			struct vmspace	*vs;
76			uintptr_t	addr;
77		} private;
78		struct {
79			void		*a;
80			uintptr_t	b;
81		} both;
82	} info;
83};
84
85/* Priority inheritance mutex info. */
86struct umtx_pi {
87	/* Owner thread */
88	struct thread		*pi_owner;
89
90	/* Reference count */
91	int			pi_refcount;
92
93 	/* List entry to link umtx holding by thread */
94	TAILQ_ENTRY(umtx_pi)	pi_link;
95
96	/* List entry in hash */
97	TAILQ_ENTRY(umtx_pi)	pi_hashlink;
98
99	/* List for waiters */
100	TAILQ_HEAD(,umtx_q)	pi_blocked;
101
102	/* Identify a userland lock object */
103	struct umtx_key		pi_key;
104};
105
106/* A userland synchronous object user. */
107struct umtx_q {
108	/* Linked list for the hash. */
109	TAILQ_ENTRY(umtx_q)	uq_link;
110
111	/* Umtx key. */
112	struct umtx_key		uq_key;
113
114	/* Umtx flags. */
115	int			uq_flags;
116#define UQF_UMTXQ	0x0001
117
118	/* The thread waits on. */
119	struct thread		*uq_thread;
120
121	/*
122	 * Blocked on PI mutex. read can use chain lock
123	 * or sched_lock, write must have both chain lock and
124	 * sched_lock being hold.
125	 */
126	struct umtx_pi		*uq_pi_blocked;
127
128	/* On blocked list */
129	TAILQ_ENTRY(umtx_q)	uq_lockq;
130
131	/* Thread contending with us */
132	TAILQ_HEAD(,umtx_pi)	uq_pi_contested;
133
134	/* Inherited priority from PP mutex */
135	u_char			uq_inherited_pri;
136};
137
138TAILQ_HEAD(umtxq_head, umtx_q);
139
140/* Userland lock object's wait-queue chain */
141struct umtxq_chain {
142	/* Lock for this chain. */
143	struct mtx		uc_lock;
144
145	/* List of sleep queues. */
146	struct umtxq_head	uc_queue;
147
148	/* Busy flag */
149	char			uc_busy;
150
151	/* Chain lock waiters */
152	int			uc_waiters;
153
154	/* All PI in the list */
155	TAILQ_HEAD(,umtx_pi)	uc_pi_list;
156};
157
158#define	UMTXQ_LOCKED_ASSERT(uc)		mtx_assert(&(uc)->uc_lock, MA_OWNED)
159
160/*
161 * Don't propagate time-sharing priority, there is a security reason,
162 * a user can simply introduce PI-mutex, let thread A lock the mutex,
163 * and let another thread B block on the mutex, because B is
164 * sleeping, its priority will be boosted, this causes A's priority to
165 * be boosted via priority propagating too and will never be lowered even
166 * if it is using 100%CPU, this is unfair to other processes.
167 */
168
169#define UPRI(td)	(((td)->td_ksegrp->kg_user_pri >= PRI_MIN_TIMESHARE &&\
170			  (td)->td_ksegrp->kg_user_pri <= PRI_MAX_TIMESHARE) ?\
171			 PRI_MAX_TIMESHARE : (td)->td_ksegrp->kg_user_pri)
172
173#define	GOLDEN_RATIO_PRIME	2654404609U
174#define	UMTX_CHAINS		128
175#define	UMTX_SHIFTS		(__WORD_BIT - 7)
176
177#define THREAD_SHARE		0
178#define PROCESS_SHARE		1
179#define AUTO_SHARE		2
180
181#define	GET_SHARE(flags)	\
182    (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
183
184static uma_zone_t		umtx_pi_zone;
185static struct umtxq_chain	umtxq_chains[UMTX_CHAINS];
186static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
187static int			umtx_pi_allocated;
188
189SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
190SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
191    &umtx_pi_allocated, 0, "Allocated umtx_pi");
192
193static void umtxq_sysinit(void *);
194static void umtxq_hash(struct umtx_key *key);
195static struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
196static void umtxq_lock(struct umtx_key *key);
197static void umtxq_unlock(struct umtx_key *key);
198static void umtxq_busy(struct umtx_key *key);
199static void umtxq_unbusy(struct umtx_key *key);
200static void umtxq_insert(struct umtx_q *uq);
201static void umtxq_remove(struct umtx_q *uq);
202static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
203static int umtxq_count(struct umtx_key *key);
204static int umtxq_signal(struct umtx_key *key, int nr_wakeup);
205static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2);
206static int umtx_key_get(void *addr, int type, int share,
207	struct umtx_key *key);
208static void umtx_key_release(struct umtx_key *key);
209static struct umtx_pi *umtx_pi_alloc(void);
210static void umtx_pi_free(struct umtx_pi *pi);
211static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
212static void umtx_thread_cleanup(struct thread *td);
213static void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
214	struct image_params *imgp __unused);
215SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
216
217static void
218umtxq_sysinit(void *arg __unused)
219{
220	int i;
221
222	umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
223		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
224	for (i = 0; i < UMTX_CHAINS; ++i) {
225		mtx_init(&umtxq_chains[i].uc_lock, "umtxql", NULL,
226			 MTX_DEF | MTX_DUPOK);
227		TAILQ_INIT(&umtxq_chains[i].uc_queue);
228		TAILQ_INIT(&umtxq_chains[i].uc_pi_list);
229		umtxq_chains[i].uc_busy = 0;
230		umtxq_chains[i].uc_waiters = 0;
231	}
232	EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
233	    EVENTHANDLER_PRI_ANY);
234}
235
236struct umtx_q *
237umtxq_alloc(void)
238{
239	struct umtx_q *uq;
240
241	uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
242	TAILQ_INIT(&uq->uq_pi_contested);
243	uq->uq_inherited_pri = PRI_MAX;
244	return (uq);
245}
246
247void
248umtxq_free(struct umtx_q *uq)
249{
250	free(uq, M_UMTX);
251}
252
253static inline void
254umtxq_hash(struct umtx_key *key)
255{
256	unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
257	key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
258}
259
260static inline int
261umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2)
262{
263	return (k1->type == k2->type &&
264		k1->info.both.a == k2->info.both.a &&
265	        k1->info.both.b == k2->info.both.b);
266}
267
268static inline struct umtxq_chain *
269umtxq_getchain(struct umtx_key *key)
270{
271	return (&umtxq_chains[key->hash]);
272}
273
274/*
275 * Set chain to busy state when following operation
276 * may be blocked (kernel mutex can not be used).
277 */
278static inline void
279umtxq_busy(struct umtx_key *key)
280{
281	struct umtxq_chain *uc;
282
283	uc = umtxq_getchain(key);
284	mtx_assert(&uc->uc_lock, MA_OWNED);
285	while (uc->uc_busy != 0) {
286		uc->uc_waiters++;
287		msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
288		uc->uc_waiters--;
289	}
290	uc->uc_busy = 1;
291}
292
293/*
294 * Unbusy a chain.
295 */
296static inline void
297umtxq_unbusy(struct umtx_key *key)
298{
299	struct umtxq_chain *uc;
300
301	uc = umtxq_getchain(key);
302	mtx_assert(&uc->uc_lock, MA_OWNED);
303	KASSERT(uc->uc_busy != 0, ("not busy"));
304	uc->uc_busy = 0;
305	if (uc->uc_waiters)
306		wakeup_one(uc);
307}
308
309/*
310 * Lock a chain.
311 */
312static inline void
313umtxq_lock(struct umtx_key *key)
314{
315	struct umtxq_chain *uc;
316
317	uc = umtxq_getchain(key);
318	mtx_lock(&uc->uc_lock);
319}
320
321/*
322 * Unlock a chain.
323 */
324static inline void
325umtxq_unlock(struct umtx_key *key)
326{
327	struct umtxq_chain *uc;
328
329	uc = umtxq_getchain(key);
330	mtx_unlock(&uc->uc_lock);
331}
332
333/*
334 * Insert a thread onto the umtx queue.
335 */
336static inline void
337umtxq_insert(struct umtx_q *uq)
338{
339	struct umtxq_chain *uc;
340
341	uc = umtxq_getchain(&uq->uq_key);
342	UMTXQ_LOCKED_ASSERT(uc);
343	TAILQ_INSERT_TAIL(&uc->uc_queue, uq, uq_link);
344	uq->uq_flags |= UQF_UMTXQ;
345}
346
347/*
348 * Remove thread from the umtx queue.
349 */
350static inline void
351umtxq_remove(struct umtx_q *uq)
352{
353	struct umtxq_chain *uc;
354
355	uc = umtxq_getchain(&uq->uq_key);
356	UMTXQ_LOCKED_ASSERT(uc);
357	if (uq->uq_flags & UQF_UMTXQ) {
358		TAILQ_REMOVE(&uc->uc_queue, uq, uq_link);
359		uq->uq_flags &= ~UQF_UMTXQ;
360	}
361}
362
363/*
364 * Check if there are multiple waiters
365 */
366static int
367umtxq_count(struct umtx_key *key)
368{
369	struct umtxq_chain *uc;
370	struct umtx_q *uq;
371	int count = 0;
372
373	uc = umtxq_getchain(key);
374	UMTXQ_LOCKED_ASSERT(uc);
375	TAILQ_FOREACH(uq, &uc->uc_queue, uq_link) {
376		if (umtx_key_match(&uq->uq_key, key)) {
377			if (++count > 1)
378				break;
379		}
380	}
381	return (count);
382}
383
384/*
385 * Check if there are multiple PI waiters and returns first
386 * waiter.
387 */
388static int
389umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
390{
391	struct umtxq_chain *uc;
392	struct umtx_q *uq;
393	int count = 0;
394
395	*first = NULL;
396	uc = umtxq_getchain(key);
397	UMTXQ_LOCKED_ASSERT(uc);
398	TAILQ_FOREACH(uq, &uc->uc_queue, uq_link) {
399		if (umtx_key_match(&uq->uq_key, key)) {
400			if (++count > 1)
401				break;
402			*first = uq;
403		}
404	}
405	return (count);
406}
407
408/*
409 * Wake up threads waiting on an userland object.
410 */
411static int
412umtxq_signal(struct umtx_key *key, int n_wake)
413{
414	struct umtxq_chain *uc;
415	struct umtx_q *uq, *next;
416	int ret;
417
418	ret = 0;
419	uc = umtxq_getchain(key);
420	UMTXQ_LOCKED_ASSERT(uc);
421	TAILQ_FOREACH_SAFE(uq, &uc->uc_queue, uq_link, next) {
422		if (umtx_key_match(&uq->uq_key, key)) {
423			umtxq_remove(uq);
424			wakeup(uq);
425			if (++ret >= n_wake)
426				break;
427		}
428	}
429	return (ret);
430}
431
432/*
433 * Wake up specified thread.
434 */
435static inline void
436umtxq_signal_thread(struct umtx_q *uq)
437{
438	struct umtxq_chain *uc;
439
440	uc = umtxq_getchain(&uq->uq_key);
441	UMTXQ_LOCKED_ASSERT(uc);
442	umtxq_remove(uq);
443	wakeup(uq);
444}
445
446/*
447 * Put thread into sleep state, before sleeping, check if
448 * thread was removed from umtx queue.
449 */
450static inline int
451umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
452{
453	struct umtxq_chain *uc;
454	int error;
455
456	uc = umtxq_getchain(&uq->uq_key);
457	UMTXQ_LOCKED_ASSERT(uc);
458	if (!(uq->uq_flags & UQF_UMTXQ))
459		return (0);
460	error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
461	if (error == EWOULDBLOCK)
462		error = ETIMEDOUT;
463	return (error);
464}
465
466/*
467 * Convert userspace address into unique logical address.
468 */
469static int
470umtx_key_get(void *addr, int type, int share, struct umtx_key *key)
471{
472	struct thread *td = curthread;
473	vm_map_t map;
474	vm_map_entry_t entry;
475	vm_pindex_t pindex;
476	vm_prot_t prot;
477	boolean_t wired;
478
479	key->type = type;
480	if (share == THREAD_SHARE) {
481		key->shared = 0;
482		key->info.private.vs = td->td_proc->p_vmspace;
483		key->info.private.addr = (uintptr_t)addr;
484	} else if (share == PROCESS_SHARE || share == AUTO_SHARE) {
485		map = &td->td_proc->p_vmspace->vm_map;
486		if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
487		    &entry, &key->info.shared.object, &pindex, &prot,
488		    &wired) != KERN_SUCCESS) {
489			return EFAULT;
490		}
491
492		if ((share == PROCESS_SHARE) ||
493		    (share == AUTO_SHARE &&
494		     VM_INHERIT_SHARE == entry->inheritance)) {
495			key->shared = 1;
496			key->info.shared.offset = entry->offset + entry->start -
497				(vm_offset_t)addr;
498			vm_object_reference(key->info.shared.object);
499		} else {
500			key->shared = 0;
501			key->info.private.vs = td->td_proc->p_vmspace;
502			key->info.private.addr = (uintptr_t)addr;
503		}
504		vm_map_lookup_done(map, entry);
505	}
506
507	umtxq_hash(key);
508	return (0);
509}
510
511/*
512 * Release key.
513 */
514static inline void
515umtx_key_release(struct umtx_key *key)
516{
517	if (key->shared)
518		vm_object_deallocate(key->info.shared.object);
519}
520
521/*
522 * Lock a umtx object.
523 */
524static int
525_do_lock_umtx(struct thread *td, struct umtx *umtx, uintptr_t id, int timo)
526{
527	struct umtx_q *uq;
528	intptr_t owner;
529	intptr_t old;
530	int error = 0;
531
532	uq = td->td_umtxq;
533
534	/*
535	 * Care must be exercised when dealing with umtx structure. It
536	 * can fault on any access.
537	 */
538	for (;;) {
539		/*
540		 * Try the uncontested case.  This should be done in userland.
541		 */
542		owner = casuptr((intptr_t *)&umtx->u_owner, UMTX_UNOWNED, id);
543
544		/* The acquire succeeded. */
545		if (owner == UMTX_UNOWNED)
546			return (0);
547
548		/* The address was invalid. */
549		if (owner == -1)
550			return (EFAULT);
551
552		/* If no one owns it but it is contested try to acquire it. */
553		if (owner == UMTX_CONTESTED) {
554			owner = casuptr((intptr_t *)&umtx->u_owner,
555			    UMTX_CONTESTED, id | UMTX_CONTESTED);
556
557			if (owner == UMTX_CONTESTED)
558				return (0);
559
560			/* The address was invalid. */
561			if (owner == -1)
562				return (EFAULT);
563
564			/* If this failed the lock has changed, restart. */
565			continue;
566		}
567
568		/*
569		 * If we caught a signal, we have retried and now
570		 * exit immediately.
571		 */
572		if (error != 0)
573			return (error);
574
575		if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
576			AUTO_SHARE, &uq->uq_key)) != 0)
577			return (error);
578
579		umtxq_lock(&uq->uq_key);
580		umtxq_busy(&uq->uq_key);
581		umtxq_insert(uq);
582		umtxq_unbusy(&uq->uq_key);
583		umtxq_unlock(&uq->uq_key);
584
585		/*
586		 * Set the contested bit so that a release in user space
587		 * knows to use the system call for unlock.  If this fails
588		 * either some one else has acquired the lock or it has been
589		 * released.
590		 */
591		old = casuptr((intptr_t *)&umtx->u_owner, owner,
592		    owner | UMTX_CONTESTED);
593
594		/* The address was invalid. */
595		if (old == -1) {
596			umtxq_lock(&uq->uq_key);
597			umtxq_remove(uq);
598			umtxq_unlock(&uq->uq_key);
599			umtx_key_release(&uq->uq_key);
600			return (EFAULT);
601		}
602
603		/*
604		 * We set the contested bit, sleep. Otherwise the lock changed
605		 * and we need to retry or we lost a race to the thread
606		 * unlocking the umtx.
607		 */
608		umtxq_lock(&uq->uq_key);
609		if (old == owner)
610			error = umtxq_sleep(uq, "umtx", timo);
611		umtxq_remove(uq);
612		umtxq_unlock(&uq->uq_key);
613		umtx_key_release(&uq->uq_key);
614	}
615
616	return (0);
617}
618
619/*
620 * Lock a umtx object.
621 */
622static int
623do_lock_umtx(struct thread *td, struct umtx *umtx, uintptr_t id,
624	struct timespec *timeout)
625{
626	struct timespec ts, ts2, ts3;
627	struct timeval tv;
628	int error;
629
630	if (timeout == NULL) {
631		error = _do_lock_umtx(td, umtx, id, 0);
632		/* Mutex locking is restarted if it is interrupted. */
633		if (error == EINTR)
634			error = ERESTART;
635	} else {
636		getnanouptime(&ts);
637		timespecadd(&ts, timeout);
638		TIMESPEC_TO_TIMEVAL(&tv, timeout);
639		for (;;) {
640			error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
641			if (error != ETIMEDOUT)
642				break;
643			getnanouptime(&ts2);
644			if (timespeccmp(&ts2, &ts, >=)) {
645				error = ETIMEDOUT;
646				break;
647			}
648			ts3 = ts;
649			timespecsub(&ts3, &ts2);
650			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
651		}
652		/* Timed-locking is not restarted. */
653		if (error == ERESTART)
654			error = EINTR;
655	}
656	return (error);
657}
658
659/*
660 * Unlock a umtx object.
661 */
662static int
663do_unlock_umtx(struct thread *td, struct umtx *umtx, uintptr_t id)
664{
665	struct umtx_key key;
666	intptr_t owner;
667	intptr_t old;
668	int error;
669	int count;
670
671	/*
672	 * Make sure we own this mtx.
673	 *
674	 * XXX Need a {fu,su}ptr this is not correct on arch where
675	 * sizeof(intptr_t) != sizeof(long).
676	 */
677	owner = fuword(&umtx->u_owner);
678	if (owner == -1)
679		return (EFAULT);
680
681	if ((owner & ~UMTX_CONTESTED) != id)
682		return (EPERM);
683
684	/* This should be done in userland */
685	if ((owner & UMTX_CONTESTED) == 0) {
686		old = casuptr((intptr_t *)&umtx->u_owner, owner,
687			UMTX_UNOWNED);
688		if (old == -1)
689			return (EFAULT);
690		if (old == owner)
691			return (0);
692		owner = old;
693	}
694
695	/* We should only ever be in here for contested locks */
696	if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
697		&key)) != 0)
698		return (error);
699
700	umtxq_lock(&key);
701	umtxq_busy(&key);
702	count = umtxq_count(&key);
703	umtxq_unlock(&key);
704
705	/*
706	 * When unlocking the umtx, it must be marked as unowned if
707	 * there is zero or one thread only waiting for it.
708	 * Otherwise, it must be marked as contested.
709	 */
710	old = casuptr((intptr_t *)&umtx->u_owner, owner,
711			count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
712	umtxq_lock(&key);
713	umtxq_signal(&key,1);
714	umtxq_unbusy(&key);
715	umtxq_unlock(&key);
716	umtx_key_release(&key);
717	if (old == -1)
718		return (EFAULT);
719	if (old != owner)
720		return (EINVAL);
721	return (0);
722}
723
724#ifdef COMPAT_IA32
725
726/*
727 * Lock a umtx object.
728 */
729static int
730_do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
731{
732	struct umtx_q *uq;
733	uint32_t owner;
734	uint32_t old;
735	int error = 0;
736
737	uq = td->td_umtxq;
738
739	/*
740	 * Care must be exercised when dealing with umtx structure. It
741	 * can fault on any access.
742	 */
743	for (;;) {
744		/*
745		 * Try the uncontested case.  This should be done in userland.
746		 */
747		owner = casuword32(m, UMUTEX_UNOWNED, id);
748
749		/* The acquire succeeded. */
750		if (owner == UMUTEX_UNOWNED)
751			return (0);
752
753		/* The address was invalid. */
754		if (owner == -1)
755			return (EFAULT);
756
757		/* If no one owns it but it is contested try to acquire it. */
758		if (owner == UMUTEX_CONTESTED) {
759			owner = casuword32(m,
760			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
761			if (owner == UMUTEX_CONTESTED)
762				return (0);
763
764			/* The address was invalid. */
765			if (owner == -1)
766				return (EFAULT);
767
768			/* If this failed the lock has changed, restart. */
769			continue;
770		}
771
772		/*
773		 * If we caught a signal, we have retried and now
774		 * exit immediately.
775		 */
776		if (error != 0)
777			return (error);
778
779		if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
780			AUTO_SHARE, &uq->uq_key)) != 0)
781			return (error);
782
783		umtxq_lock(&uq->uq_key);
784		umtxq_busy(&uq->uq_key);
785		umtxq_insert(uq);
786		umtxq_unbusy(&uq->uq_key);
787		umtxq_unlock(&uq->uq_key);
788
789		/*
790		 * Set the contested bit so that a release in user space
791		 * knows to use the system call for unlock.  If this fails
792		 * either some one else has acquired the lock or it has been
793		 * released.
794		 */
795		old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
796
797		/* The address was invalid. */
798		if (old == -1) {
799			umtxq_lock(&uq->uq_key);
800			umtxq_remove(uq);
801			umtxq_unlock(&uq->uq_key);
802			umtx_key_release(&uq->uq_key);
803			return (EFAULT);
804		}
805
806		/*
807		 * We set the contested bit, sleep. Otherwise the lock changed
808		 * and we need to retry or we lost a race to the thread
809		 * unlocking the umtx.
810		 */
811		umtxq_lock(&uq->uq_key);
812		if (old == owner)
813			error = umtxq_sleep(uq, "umtx", timo);
814		umtxq_remove(uq);
815		umtxq_unlock(&uq->uq_key);
816		umtx_key_release(&uq->uq_key);
817	}
818
819	return (0);
820}
821
822/*
823 * Lock a umtx object.
824 */
825static int
826do_lock_umtx32(struct thread *td, void *m, uint32_t id,
827	struct timespec *timeout)
828{
829	struct timespec ts, ts2, ts3;
830	struct timeval tv;
831	int error;
832
833	if (timeout == NULL) {
834		error = _do_lock_umtx32(td, m, id, 0);
835		/* Mutex locking is restarted if it is interrupted. */
836		if (error == EINTR)
837			error = ERESTART;
838	} else {
839		getnanouptime(&ts);
840		timespecadd(&ts, timeout);
841		TIMESPEC_TO_TIMEVAL(&tv, timeout);
842		for (;;) {
843			error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
844			if (error != ETIMEDOUT)
845				break;
846			getnanouptime(&ts2);
847			if (timespeccmp(&ts2, &ts, >=)) {
848				error = ETIMEDOUT;
849				break;
850			}
851			ts3 = ts;
852			timespecsub(&ts3, &ts2);
853			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
854		}
855		/* Timed-locking is not restarted. */
856		if (error == ERESTART)
857			error = EINTR;
858	}
859	return (error);
860}
861
862/*
863 * Unlock a umtx object.
864 */
865static int
866do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
867{
868	struct umtx_key key;
869	uint32_t owner;
870	uint32_t old;
871	int error;
872	int count;
873
874	/*
875	 * Make sure we own this mtx.
876	 *
877	 * XXX Need a {fu,su}ptr this is not correct on arch where
878	 * sizeof(intptr_t) != sizeof(long).
879	 */
880	owner = fuword32(m);
881	if (owner == -1)
882		return (EFAULT);
883
884	if ((owner & ~UMUTEX_CONTESTED) != id)
885		return (EPERM);
886
887	/* This should be done in userland */
888	if ((owner & UMUTEX_CONTESTED) == 0) {
889		old = casuword32(m, owner, UMUTEX_UNOWNED);
890		if (old == -1)
891			return (EFAULT);
892		if (old == owner)
893			return (0);
894		owner = old;
895	}
896
897	/* We should only ever be in here for contested locks */
898	if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
899		&key)) != 0)
900		return (error);
901
902	umtxq_lock(&key);
903	umtxq_busy(&key);
904	count = umtxq_count(&key);
905	umtxq_unlock(&key);
906
907	/*
908	 * When unlocking the umtx, it must be marked as unowned if
909	 * there is zero or one thread only waiting for it.
910	 * Otherwise, it must be marked as contested.
911	 */
912	old = casuword32(m, owner,
913		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
914	umtxq_lock(&key);
915	umtxq_signal(&key,1);
916	umtxq_unbusy(&key);
917	umtxq_unlock(&key);
918	umtx_key_release(&key);
919	if (old == -1)
920		return (EFAULT);
921	if (old != owner)
922		return (EINVAL);
923	return (0);
924}
925#endif
926
927/*
928 * Fetch and compare value, sleep on the address if value is not changed.
929 */
930static int
931do_wait(struct thread *td, void *addr, uintptr_t id,
932	struct timespec *timeout, int compat32)
933{
934	struct umtx_q *uq;
935	struct timespec ts, ts2, ts3;
936	struct timeval tv;
937	uintptr_t tmp;
938	int error = 0;
939
940	uq = td->td_umtxq;
941	if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, AUTO_SHARE,
942	    &uq->uq_key)) != 0)
943		return (error);
944
945	umtxq_lock(&uq->uq_key);
946	umtxq_insert(uq);
947	umtxq_unlock(&uq->uq_key);
948	if (compat32 == 0)
949		tmp = fuword(addr);
950        else
951		tmp = fuword32(addr);
952	if (tmp != id) {
953		umtxq_lock(&uq->uq_key);
954		umtxq_remove(uq);
955		umtxq_unlock(&uq->uq_key);
956	} else if (timeout == NULL) {
957		umtxq_lock(&uq->uq_key);
958		error = umtxq_sleep(uq, "ucond", 0);
959		umtxq_remove(uq);
960		umtxq_unlock(&uq->uq_key);
961	} else {
962		getnanouptime(&ts);
963		timespecadd(&ts, timeout);
964		TIMESPEC_TO_TIMEVAL(&tv, timeout);
965		umtxq_lock(&uq->uq_key);
966		for (;;) {
967			error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
968			if (!(uq->uq_flags & UQF_UMTXQ))
969				break;
970			if (error != ETIMEDOUT)
971				break;
972			umtxq_unlock(&uq->uq_key);
973			getnanouptime(&ts2);
974			if (timespeccmp(&ts2, &ts, >=)) {
975				error = ETIMEDOUT;
976				umtxq_lock(&uq->uq_key);
977				break;
978			}
979			ts3 = ts;
980			timespecsub(&ts3, &ts2);
981			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
982			umtxq_lock(&uq->uq_key);
983		}
984		umtxq_remove(uq);
985		umtxq_unlock(&uq->uq_key);
986	}
987	umtx_key_release(&uq->uq_key);
988	if (error == ERESTART)
989		error = EINTR;
990	return (error);
991}
992
993/*
994 * Wake up threads sleeping on the specified address.
995 */
996int
997kern_umtx_wake(struct thread *td, void *uaddr, int n_wake)
998{
999	struct umtx_key key;
1000	int ret;
1001
1002	if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, AUTO_SHARE,
1003	   &key)) != 0)
1004		return (ret);
1005	umtxq_lock(&key);
1006	ret = umtxq_signal(&key, n_wake);
1007	umtxq_unlock(&key);
1008	umtx_key_release(&key);
1009	return (0);
1010}
1011
1012/*
1013 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1014 */
1015static int
1016_do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1017	int try)
1018{
1019	struct umtx_q *uq;
1020	uint32_t owner, old, id;
1021	int error = 0;
1022
1023	id = td->td_tid;
1024	uq = td->td_umtxq;
1025
1026	/*
1027	 * Care must be exercised when dealing with umtx structure. It
1028	 * can fault on any access.
1029	 */
1030	for (;;) {
1031		/*
1032		 * Try the uncontested case.  This should be done in userland.
1033		 */
1034		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1035
1036		/* The acquire succeeded. */
1037		if (owner == UMUTEX_UNOWNED)
1038			return (0);
1039
1040		/* The address was invalid. */
1041		if (owner == -1)
1042			return (EFAULT);
1043
1044		/* If no one owns it but it is contested try to acquire it. */
1045		if (owner == UMUTEX_CONTESTED) {
1046			owner = casuword32(&m->m_owner,
1047			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1048
1049			if (owner == UMUTEX_CONTESTED)
1050				return (0);
1051
1052			/* The address was invalid. */
1053			if (owner == -1)
1054				return (EFAULT);
1055
1056			/* If this failed the lock has changed, restart. */
1057			continue;
1058		}
1059
1060		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1061		    (owner & ~UMUTEX_CONTESTED) == id)
1062			return (EDEADLK);
1063
1064		if (try != 0)
1065			return (EBUSY);
1066
1067		/*
1068		 * If we caught a signal, we have retried and now
1069		 * exit immediately.
1070		 */
1071		if (error != 0)
1072			return (error);
1073
1074		if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1075		    GET_SHARE(flags), &uq->uq_key)) != 0)
1076			return (error);
1077
1078		umtxq_lock(&uq->uq_key);
1079		umtxq_busy(&uq->uq_key);
1080		umtxq_insert(uq);
1081		umtxq_unbusy(&uq->uq_key);
1082		umtxq_unlock(&uq->uq_key);
1083
1084		/*
1085		 * Set the contested bit so that a release in user space
1086		 * knows to use the system call for unlock.  If this fails
1087		 * either some one else has acquired the lock or it has been
1088		 * released.
1089		 */
1090		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1091
1092		/* The address was invalid. */
1093		if (old == -1) {
1094			umtxq_lock(&uq->uq_key);
1095			umtxq_remove(uq);
1096			umtxq_unlock(&uq->uq_key);
1097			umtx_key_release(&uq->uq_key);
1098			return (EFAULT);
1099		}
1100
1101		/*
1102		 * We set the contested bit, sleep. Otherwise the lock changed
1103		 * and we need to retry or we lost a race to the thread
1104		 * unlocking the umtx.
1105		 */
1106		umtxq_lock(&uq->uq_key);
1107		if (old == owner)
1108			error = umtxq_sleep(uq, "umtxn", timo);
1109		umtxq_remove(uq);
1110		umtxq_unlock(&uq->uq_key);
1111		umtx_key_release(&uq->uq_key);
1112	}
1113
1114	return (0);
1115}
1116
1117/*
1118 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1119 */
1120/*
1121 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1122 */
1123static int
1124do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
1125{
1126	struct umtx_key key;
1127	uint32_t owner, old, id;
1128	int error;
1129	int count;
1130
1131	id = td->td_tid;
1132	/*
1133	 * Make sure we own this mtx.
1134	 */
1135	owner = fuword32(&m->m_owner);
1136	if (owner == -1)
1137		return (EFAULT);
1138
1139	if ((owner & ~UMUTEX_CONTESTED) != id)
1140		return (EPERM);
1141
1142	/* This should be done in userland */
1143	if ((owner & UMUTEX_CONTESTED) == 0) {
1144		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1145		if (old == -1)
1146			return (EFAULT);
1147		if (old == owner)
1148			return (0);
1149		owner = old;
1150	}
1151
1152	/* We should only ever be in here for contested locks */
1153	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1154	    &key)) != 0)
1155		return (error);
1156
1157	umtxq_lock(&key);
1158	umtxq_busy(&key);
1159	count = umtxq_count(&key);
1160	umtxq_unlock(&key);
1161
1162	/*
1163	 * When unlocking the umtx, it must be marked as unowned if
1164	 * there is zero or one thread only waiting for it.
1165	 * Otherwise, it must be marked as contested.
1166	 */
1167	old = casuword32(&m->m_owner, owner,
1168		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1169	umtxq_lock(&key);
1170	umtxq_signal(&key,1);
1171	umtxq_unbusy(&key);
1172	umtxq_unlock(&key);
1173	umtx_key_release(&key);
1174	if (old == -1)
1175		return (EFAULT);
1176	if (old != owner)
1177		return (EINVAL);
1178	return (0);
1179}
1180
1181static inline struct umtx_pi *
1182umtx_pi_alloc(void)
1183{
1184	struct umtx_pi *pi;
1185
1186	pi = uma_zalloc(umtx_pi_zone, M_ZERO | M_WAITOK);
1187	TAILQ_INIT(&pi->pi_blocked);
1188	atomic_add_int(&umtx_pi_allocated, 1);
1189	return (pi);
1190}
1191
1192static inline void
1193umtx_pi_free(struct umtx_pi *pi)
1194{
1195	uma_zfree(umtx_pi_zone, pi);
1196	atomic_add_int(&umtx_pi_allocated, -1);
1197}
1198
1199/*
1200 * Adjust the thread's position on a pi_state after its priority has been
1201 * changed.
1202 */
1203static int
1204umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1205{
1206	struct umtx_q *uq, *uq1, *uq2;
1207	struct thread *td1;
1208
1209	mtx_assert(&sched_lock, MA_OWNED);
1210	if (pi == NULL)
1211		return (0);
1212
1213	uq = td->td_umtxq;
1214
1215	/*
1216	 * Check if the thread needs to be moved on the blocked chain.
1217	 * It needs to be moved if either its priority is lower than
1218	 * the previous thread or higher than the next thread.
1219	 */
1220	uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1221	uq2 = TAILQ_NEXT(uq, uq_lockq);
1222	if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1223	    (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1224		/*
1225		 * Remove thread from blocked chain and determine where
1226		 * it should be moved to.
1227		 */
1228		TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1229		TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1230			td1 = uq1->uq_thread;
1231			MPASS(td1->td_proc->p_magic == P_MAGIC);
1232			if (UPRI(td1) > UPRI(td))
1233				break;
1234		}
1235
1236		if (uq1 == NULL)
1237			TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1238		else
1239			TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1240	}
1241	return (1);
1242}
1243
1244/*
1245 * Propagate priority when a thread is blocked on POSIX
1246 * PI mutex.
1247 */
1248static void
1249umtx_propagate_priority(struct thread *td)
1250{
1251	struct umtx_q *uq;
1252	struct umtx_pi *pi;
1253	int pri;
1254
1255	mtx_assert(&sched_lock, MA_OWNED);
1256	pri = UPRI(td);
1257	uq = td->td_umtxq;
1258	pi = uq->uq_pi_blocked;
1259	if (pi == NULL)
1260		return;
1261
1262	for (;;) {
1263		td = pi->pi_owner;
1264		if (td == NULL)
1265			return;
1266
1267		MPASS(td->td_proc != NULL);
1268		MPASS(td->td_proc->p_magic == P_MAGIC);
1269
1270		if (UPRI(td) <= pri)
1271			return;
1272
1273		sched_lend_user_prio(td, pri);
1274
1275		/*
1276		 * Pick up the lock that td is blocked on.
1277		 */
1278		uq = td->td_umtxq;
1279		pi = uq->uq_pi_blocked;
1280		/* Resort td on the list if needed. */
1281		if (!umtx_pi_adjust_thread(pi, td))
1282			break;
1283	}
1284}
1285
1286/*
1287 * Unpropagate priority for a PI mutex when a thread blocked on
1288 * it is interrupted by signal or resumed by others.
1289 */
1290static void
1291umtx_unpropagate_priority(struct umtx_pi *pi)
1292{
1293	struct umtx_q *uq, *uq_owner;
1294	struct umtx_pi *pi2;
1295	int pri;
1296
1297	mtx_assert(&sched_lock, MA_OWNED);
1298
1299	while (pi != NULL && pi->pi_owner != NULL) {
1300		pri = PRI_MAX;
1301		uq_owner = pi->pi_owner->td_umtxq;
1302
1303		TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1304			uq = TAILQ_FIRST(&pi2->pi_blocked);
1305			if (uq != NULL) {
1306				if (pri > UPRI(uq->uq_thread))
1307					pri = UPRI(uq->uq_thread);
1308			}
1309		}
1310
1311		if (pri > uq_owner->uq_inherited_pri)
1312			pri = uq_owner->uq_inherited_pri;
1313		sched_unlend_user_prio(pi->pi_owner, pri);
1314		pi = uq_owner->uq_pi_blocked;
1315	}
1316}
1317
1318/*
1319 * Insert a PI mutex into owned list.
1320 */
1321static void
1322umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1323{
1324	struct umtx_q *uq_owner;
1325
1326	uq_owner = owner->td_umtxq;
1327	mtx_assert(&sched_lock, MA_OWNED);
1328	if (pi->pi_owner != NULL)
1329		panic("pi_ower != NULL");
1330	pi->pi_owner = owner;
1331	TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1332}
1333
1334/*
1335 * Claim ownership of a PI mutex.
1336 */
1337static int
1338umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1339{
1340	struct umtx_q *uq, *uq_owner;
1341
1342	uq_owner = owner->td_umtxq;
1343	mtx_lock_spin(&sched_lock);
1344	if (pi->pi_owner == owner) {
1345		mtx_unlock_spin(&sched_lock);
1346		return (0);
1347	}
1348
1349	if (pi->pi_owner != NULL) {
1350		/*
1351		 * userland may have already messed the mutex, sigh.
1352		 */
1353		mtx_unlock_spin(&sched_lock);
1354		return (EPERM);
1355	}
1356	umtx_pi_setowner(pi, owner);
1357	uq = TAILQ_FIRST(&pi->pi_blocked);
1358	if (uq != NULL) {
1359		int pri;
1360
1361		pri = UPRI(uq->uq_thread);
1362		if (pri < UPRI(owner))
1363			sched_lend_user_prio(owner, pri);
1364	}
1365	mtx_unlock_spin(&sched_lock);
1366	return (0);
1367}
1368
1369/*
1370 * Adjust a thread's order position in its blocked PI mutex,
1371 * this may result new priority propagating process.
1372 */
1373void
1374umtx_pi_adjust(struct thread *td, u_char oldpri)
1375{
1376	struct umtx_q *uq;
1377	struct umtx_pi *pi;
1378
1379	uq = td->td_umtxq;
1380
1381	mtx_assert(&sched_lock, MA_OWNED);
1382	MPASS(TD_ON_UPILOCK(td));
1383
1384	/*
1385	 * Pick up the lock that td is blocked on.
1386	 */
1387	pi = uq->uq_pi_blocked;
1388	MPASS(pi != NULL);
1389
1390	/* Resort the turnstile on the list. */
1391	if (!umtx_pi_adjust_thread(pi, td))
1392		return;
1393
1394	/*
1395	 * If our priority was lowered and we are at the head of the
1396	 * turnstile, then propagate our new priority up the chain.
1397	 */
1398	if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri)
1399		umtx_propagate_priority(td);
1400}
1401
1402/*
1403 * Sleep on a PI mutex.
1404 */
1405static int
1406umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1407	uint32_t owner, const char *wmesg, int timo)
1408{
1409	struct umtxq_chain *uc;
1410	struct thread *td, *td1;
1411	struct umtx_q *uq1;
1412	int pri;
1413	int error = 0;
1414
1415	td = uq->uq_thread;
1416	KASSERT(td == curthread, ("inconsistent uq_thread"));
1417	uc = umtxq_getchain(&uq->uq_key);
1418	UMTXQ_LOCKED_ASSERT(uc);
1419	umtxq_insert(uq);
1420	if (pi->pi_owner == NULL) {
1421		/* XXX
1422		 * Current, We only support process private PI-mutex,
1423		 * non-contended PI-mutexes are locked in userland.
1424		 * Process shared PI-mutex should always be initialized
1425		 * by kernel and be registered in kernel, locking should
1426		 * always be done by kernel to avoid security problems.
1427		 * For process private PI-mutex, we can find owner
1428		 * thread and boost its priority safely.
1429		 */
1430		PROC_LOCK(curproc);
1431		td1 = thread_find(curproc, owner);
1432		mtx_lock_spin(&sched_lock);
1433		if (td1 != NULL && pi->pi_owner == NULL) {
1434			uq1 = td1->td_umtxq;
1435			umtx_pi_setowner(pi, td1);
1436		}
1437		PROC_UNLOCK(curproc);
1438	} else {
1439		mtx_lock_spin(&sched_lock);
1440	}
1441
1442	TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1443		pri = UPRI(uq1->uq_thread);
1444		if (pri > UPRI(td))
1445			break;
1446	}
1447
1448	if (uq1 != NULL)
1449		TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1450	else
1451		TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1452
1453	uq->uq_pi_blocked = pi;
1454	td->td_flags |= TDF_UPIBLOCKED;
1455	mtx_unlock_spin(&sched_lock);
1456	umtxq_unlock(&uq->uq_key);
1457
1458	mtx_lock_spin(&sched_lock);
1459	umtx_propagate_priority(td);
1460	mtx_unlock_spin(&sched_lock);
1461
1462	umtxq_lock(&uq->uq_key);
1463	if (uq->uq_flags & UQF_UMTXQ) {
1464		error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
1465		if (error == EWOULDBLOCK)
1466			error = ETIMEDOUT;
1467		if (uq->uq_flags & UQF_UMTXQ) {
1468			umtxq_busy(&uq->uq_key);
1469			umtxq_remove(uq);
1470			umtxq_unbusy(&uq->uq_key);
1471		}
1472	}
1473	umtxq_unlock(&uq->uq_key);
1474
1475	mtx_lock_spin(&sched_lock);
1476	uq->uq_pi_blocked = NULL;
1477	td->td_flags &= ~TDF_UPIBLOCKED;
1478	TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1479	umtx_unpropagate_priority(pi);
1480	mtx_unlock_spin(&sched_lock);
1481
1482	umtxq_lock(&uq->uq_key);
1483
1484	return (error);
1485}
1486
1487/*
1488 * Add reference count for a PI mutex.
1489 */
1490static void
1491umtx_pi_ref(struct umtx_pi *pi)
1492{
1493	struct umtxq_chain *uc;
1494
1495	uc = umtxq_getchain(&pi->pi_key);
1496	UMTXQ_LOCKED_ASSERT(uc);
1497	pi->pi_refcount++;
1498}
1499
1500/*
1501 * Decrease reference count for a PI mutex, if the counter
1502 * is decreased to zero, its memory space is freed.
1503 */
1504static void
1505umtx_pi_unref(struct umtx_pi *pi)
1506{
1507	struct umtxq_chain *uc;
1508	int free = 0;
1509
1510	uc = umtxq_getchain(&pi->pi_key);
1511	UMTXQ_LOCKED_ASSERT(uc);
1512	KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1513	if (--pi->pi_refcount == 0) {
1514		mtx_lock_spin(&sched_lock);
1515		if (pi->pi_owner != NULL) {
1516			TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
1517				pi, pi_link);
1518			pi->pi_owner = NULL;
1519		}
1520		KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1521			("blocked queue not empty"));
1522		mtx_unlock_spin(&sched_lock);
1523		TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1524		free = 1;
1525	}
1526	if (free)
1527		umtx_pi_free(pi);
1528}
1529
1530/*
1531 * Find a PI mutex in hash table.
1532 */
1533static struct umtx_pi *
1534umtx_pi_lookup(struct umtx_key *key)
1535{
1536	struct umtxq_chain *uc;
1537	struct umtx_pi *pi;
1538
1539	uc = umtxq_getchain(key);
1540	UMTXQ_LOCKED_ASSERT(uc);
1541
1542	TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1543		if (umtx_key_match(&pi->pi_key, key)) {
1544			return (pi);
1545		}
1546	}
1547	return (NULL);
1548}
1549
1550/*
1551 * Insert a PI mutex into hash table.
1552 */
1553static inline void
1554umtx_pi_insert(struct umtx_pi *pi)
1555{
1556	struct umtxq_chain *uc;
1557
1558	uc = umtxq_getchain(&pi->pi_key);
1559	UMTXQ_LOCKED_ASSERT(uc);
1560	TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1561}
1562
1563/*
1564 * Lock a PI mutex.
1565 */
1566static int
1567_do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1568	int try)
1569{
1570	struct umtx_q *uq;
1571	struct umtx_pi *pi, *new_pi;
1572	uint32_t id, owner, old;
1573	int error;
1574
1575	id = td->td_tid;
1576	uq = td->td_umtxq;
1577
1578	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1579	    &uq->uq_key)) != 0)
1580		return (error);
1581	for (;;) {
1582		pi = NULL;
1583		umtxq_lock(&uq->uq_key);
1584		pi = umtx_pi_lookup(&uq->uq_key);
1585		if (pi == NULL) {
1586			umtxq_unlock(&uq->uq_key);
1587			new_pi = umtx_pi_alloc();
1588			new_pi->pi_key = uq->uq_key;
1589			umtxq_lock(&uq->uq_key);
1590			pi = umtx_pi_lookup(&uq->uq_key);
1591			if (pi != NULL)
1592				umtx_pi_free(new_pi);
1593			else {
1594				umtx_pi_insert(new_pi);
1595				pi = new_pi;
1596			}
1597		}
1598
1599		umtx_pi_ref(pi);
1600		umtxq_unlock(&uq->uq_key);
1601
1602		/*
1603		 * Care must be exercised when dealing with umtx structure.  It
1604		 * can fault on any access.
1605		 */
1606
1607		/*
1608		 * Try the uncontested case.  This should be done in userland.
1609		 */
1610		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1611
1612		/* The acquire succeeded. */
1613		if (owner == UMUTEX_UNOWNED) {
1614			error = 0;
1615			break;
1616		}
1617
1618		/* The address was invalid. */
1619		if (owner == -1) {
1620			error = EFAULT;
1621			break;
1622		}
1623
1624		/* If no one owns it but it is contested try to acquire it. */
1625		if (owner == UMUTEX_CONTESTED) {
1626			owner = casuword32(&m->m_owner,
1627			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1628
1629			if (owner == UMUTEX_CONTESTED) {
1630				umtxq_lock(&uq->uq_key);
1631				error = umtx_pi_claim(pi, td);
1632				umtxq_unlock(&uq->uq_key);
1633				break;
1634			}
1635
1636			/* The address was invalid. */
1637			if (owner == -1) {
1638				error = EFAULT;
1639				break;
1640			}
1641
1642			/* If this failed the lock has changed, restart. */
1643			umtxq_lock(&uq->uq_key);
1644			umtx_pi_unref(pi);
1645			umtxq_unlock(&uq->uq_key);
1646			pi = NULL;
1647			continue;
1648		}
1649
1650		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1651		    (owner & ~UMUTEX_CONTESTED) == id) {
1652			error = EDEADLK;
1653			break;
1654		}
1655
1656		if (try != 0) {
1657			error = EBUSY;
1658			break;
1659		}
1660
1661		/*
1662		 * If we caught a signal, we have retried and now
1663		 * exit immediately.
1664		 */
1665		if (error != 0)
1666			break;
1667
1668		umtxq_lock(&uq->uq_key);
1669		umtxq_busy(&uq->uq_key);
1670		umtxq_unlock(&uq->uq_key);
1671
1672		/*
1673		 * Set the contested bit so that a release in user space
1674		 * knows to use the system call for unlock.  If this fails
1675		 * either some one else has acquired the lock or it has been
1676		 * released.
1677		 */
1678		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1679
1680		/* The address was invalid. */
1681		if (old == -1) {
1682			umtxq_lock(&uq->uq_key);
1683			umtxq_unbusy(&uq->uq_key);
1684			umtxq_unlock(&uq->uq_key);
1685			error = EFAULT;
1686			break;
1687		}
1688
1689		umtxq_lock(&uq->uq_key);
1690		umtxq_unbusy(&uq->uq_key);
1691		/*
1692		 * We set the contested bit, sleep. Otherwise the lock changed
1693		 * and we need to retry or we lost a race to the thread
1694		 * unlocking the umtx.
1695		 */
1696		if (old == owner)
1697			error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
1698				 "umtxpi", timo);
1699		umtx_pi_unref(pi);
1700		umtxq_unlock(&uq->uq_key);
1701		pi = NULL;
1702	}
1703
1704	if (pi != NULL) {
1705		umtxq_lock(&uq->uq_key);
1706		umtx_pi_unref(pi);
1707		umtxq_unlock(&uq->uq_key);
1708	}
1709
1710	umtx_key_release(&uq->uq_key);
1711	return (error);
1712}
1713
1714/*
1715 * Unlock a PI mutex.
1716 */
1717static int
1718do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
1719{
1720	struct umtx_key key;
1721	struct umtx_q *uq_first, *uq_first2, *uq_me;
1722	struct umtx_pi *pi, *pi2;
1723	uint32_t owner, old, id;
1724	int error;
1725	int count;
1726	int pri;
1727
1728	id = td->td_tid;
1729	/*
1730	 * Make sure we own this mtx.
1731	 */
1732	owner = fuword32(&m->m_owner);
1733	if (owner == -1)
1734		return (EFAULT);
1735
1736	if ((owner & ~UMUTEX_CONTESTED) != id)
1737		return (EPERM);
1738
1739	/* This should be done in userland */
1740	if ((owner & UMUTEX_CONTESTED) == 0) {
1741		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1742		if (old == -1)
1743			return (EFAULT);
1744		if (old == owner)
1745			return (0);
1746		owner = old;
1747	}
1748
1749	/* We should only ever be in here for contested locks */
1750	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1751	    &key)) != 0)
1752		return (error);
1753
1754	umtxq_lock(&key);
1755	umtxq_busy(&key);
1756	count = umtxq_count_pi(&key, &uq_first);
1757	if (uq_first != NULL) {
1758		pi = uq_first->uq_pi_blocked;
1759		if (pi->pi_owner != curthread) {
1760			umtxq_unbusy(&key);
1761			umtxq_unlock(&key);
1762			/* userland messed the mutex */
1763			return (EPERM);
1764		}
1765		uq_me = curthread->td_umtxq;
1766		mtx_lock_spin(&sched_lock);
1767		pi->pi_owner = NULL;
1768		TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
1769		uq_first = TAILQ_FIRST(&pi->pi_blocked);
1770		pri = PRI_MAX;
1771		TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
1772			uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
1773			if (uq_first2 != NULL) {
1774				if (pri > UPRI(uq_first2->uq_thread))
1775					pri = UPRI(uq_first2->uq_thread);
1776			}
1777		}
1778		sched_unlend_user_prio(curthread, pri);
1779		mtx_unlock_spin(&sched_lock);
1780	}
1781	umtxq_unlock(&key);
1782
1783	/*
1784	 * When unlocking the umtx, it must be marked as unowned if
1785	 * there is zero or one thread only waiting for it.
1786	 * Otherwise, it must be marked as contested.
1787	 */
1788	old = casuword32(&m->m_owner, owner,
1789		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1790
1791	umtxq_lock(&key);
1792	if (uq_first != NULL)
1793		umtxq_signal_thread(uq_first);
1794	umtxq_unbusy(&key);
1795	umtxq_unlock(&key);
1796	umtx_key_release(&key);
1797	if (old == -1)
1798		return (EFAULT);
1799	if (old != owner)
1800		return (EINVAL);
1801	return (0);
1802}
1803
1804/*
1805 * Lock a PP mutex.
1806 */
1807static int
1808_do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1809	int try)
1810{
1811	struct umtx_q *uq, *uq2;
1812	struct umtx_pi *pi;
1813	uint32_t ceiling;
1814	uint32_t owner, id;
1815	int error, pri, old_inherited_pri, su;
1816
1817	id = td->td_tid;
1818	uq = td->td_umtxq;
1819	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1820	    &uq->uq_key)) != 0)
1821		return (error);
1822	su = (suser(td) == 0);
1823	for (;;) {
1824		old_inherited_pri = uq->uq_inherited_pri;
1825		umtxq_lock(&uq->uq_key);
1826		umtxq_busy(&uq->uq_key);
1827		umtxq_unlock(&uq->uq_key);
1828
1829		ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
1830		if (ceiling > RTP_PRIO_MAX) {
1831			error = EINVAL;
1832			goto out;
1833		}
1834
1835		mtx_lock_spin(&sched_lock);
1836		if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
1837			mtx_unlock_spin(&sched_lock);
1838			error = EINVAL;
1839			goto out;
1840		}
1841		if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
1842			uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
1843			if (uq->uq_inherited_pri < UPRI(td))
1844				sched_lend_user_prio(td, uq->uq_inherited_pri);
1845		}
1846		mtx_unlock_spin(&sched_lock);
1847
1848		owner = casuword32(&m->m_owner,
1849		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1850
1851		if (owner == UMUTEX_CONTESTED) {
1852			error = 0;
1853			break;
1854		}
1855
1856		/* The address was invalid. */
1857		if (owner == -1) {
1858			error = EFAULT;
1859			break;
1860		}
1861
1862		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1863		    (owner & ~UMUTEX_CONTESTED) == id) {
1864			error = EDEADLK;
1865			break;
1866		}
1867
1868		if (try != 0) {
1869			error = EBUSY;
1870			break;
1871		}
1872
1873		/*
1874		 * If we caught a signal, we have retried and now
1875		 * exit immediately.
1876		 */
1877		if (error != 0)
1878			break;
1879
1880		umtxq_lock(&uq->uq_key);
1881		umtxq_insert(uq);
1882		umtxq_unbusy(&uq->uq_key);
1883		error = umtxq_sleep(uq, "umtxpp", timo);
1884		umtxq_remove(uq);
1885		umtxq_unlock(&uq->uq_key);
1886
1887		mtx_lock_spin(&sched_lock);
1888		uq->uq_inherited_pri = old_inherited_pri;
1889		pri = PRI_MAX;
1890		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1891			uq2 = TAILQ_FIRST(&pi->pi_blocked);
1892			if (uq2 != NULL) {
1893				if (pri > UPRI(uq2->uq_thread))
1894					pri = UPRI(uq2->uq_thread);
1895			}
1896		}
1897		if (pri > uq->uq_inherited_pri)
1898			pri = uq->uq_inherited_pri;
1899		sched_unlend_user_prio(td, pri);
1900		mtx_unlock_spin(&sched_lock);
1901	}
1902
1903	if (error != 0) {
1904		mtx_lock_spin(&sched_lock);
1905		uq->uq_inherited_pri = old_inherited_pri;
1906		pri = PRI_MAX;
1907		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1908			uq2 = TAILQ_FIRST(&pi->pi_blocked);
1909			if (uq2 != NULL) {
1910				if (pri > UPRI(uq2->uq_thread))
1911					pri = UPRI(uq2->uq_thread);
1912			}
1913		}
1914		if (pri > uq->uq_inherited_pri)
1915			pri = uq->uq_inherited_pri;
1916		sched_unlend_user_prio(td, pri);
1917		mtx_unlock_spin(&sched_lock);
1918	}
1919
1920out:
1921	umtxq_lock(&uq->uq_key);
1922	umtxq_unbusy(&uq->uq_key);
1923	umtxq_unlock(&uq->uq_key);
1924	umtx_key_release(&uq->uq_key);
1925	return (error);
1926}
1927
1928/*
1929 * Unlock a PP mutex.
1930 */
1931static int
1932do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
1933{
1934	struct umtx_key key;
1935	struct umtx_q *uq, *uq2;
1936	struct umtx_pi *pi;
1937	uint32_t owner, id;
1938	uint32_t rceiling;
1939	int error, pri, new_inherited_pri, su;
1940
1941	id = td->td_tid;
1942	uq = td->td_umtxq;
1943	su = (suser(td) == 0);
1944
1945	/*
1946	 * Make sure we own this mtx.
1947	 */
1948	owner = fuword32(&m->m_owner);
1949	if (owner == -1)
1950		return (EFAULT);
1951
1952	if ((owner & ~UMUTEX_CONTESTED) != id)
1953		return (EPERM);
1954
1955	error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
1956	if (error != 0)
1957		return (error);
1958
1959	if (rceiling == -1)
1960		new_inherited_pri = PRI_MAX;
1961	else {
1962		rceiling = RTP_PRIO_MAX - rceiling;
1963		if (rceiling > RTP_PRIO_MAX)
1964			return (EINVAL);
1965		new_inherited_pri = PRI_MIN_REALTIME + rceiling;
1966	}
1967
1968	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1969	    &key)) != 0)
1970		return (error);
1971	umtxq_lock(&key);
1972	umtxq_busy(&key);
1973	umtxq_unlock(&key);
1974	/*
1975	 * For priority protected mutex, always set unlocked state
1976	 * to UMUTEX_CONTESTED, so that userland always enters kernel
1977	 * to lock the mutex, it is necessary because thread priority
1978	 * has to be adjusted for such mutex.
1979	 */
1980	error = suword32(&m->m_owner, UMUTEX_CONTESTED);
1981
1982	umtxq_lock(&key);
1983	if (error == 0)
1984		umtxq_signal(&key, 1);
1985	umtxq_unbusy(&key);
1986	umtxq_unlock(&key);
1987
1988	if (error == -1)
1989		error = EFAULT;
1990	else {
1991		mtx_lock_spin(&sched_lock);
1992		if (su != 0)
1993			uq->uq_inherited_pri = new_inherited_pri;
1994		pri = PRI_MAX;
1995		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1996			uq2 = TAILQ_FIRST(&pi->pi_blocked);
1997			if (uq2 != NULL) {
1998				if (pri > UPRI(uq2->uq_thread))
1999					pri = UPRI(uq2->uq_thread);
2000			}
2001		}
2002		if (pri > uq->uq_inherited_pri)
2003			pri = uq->uq_inherited_pri;
2004		sched_unlend_user_prio(td, pri);
2005		mtx_unlock_spin(&sched_lock);
2006	}
2007	umtx_key_release(&key);
2008	return (error);
2009}
2010
2011static int
2012do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2013	uint32_t *old_ceiling)
2014{
2015	struct umtx_q *uq;
2016	uint32_t save_ceiling;
2017	uint32_t owner, id;
2018	uint32_t flags;
2019	int error;
2020
2021	flags = fuword32(&m->m_flags);
2022	if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2023		return (EINVAL);
2024	if (ceiling > RTP_PRIO_MAX)
2025		return (EINVAL);
2026	id = td->td_tid;
2027	uq = td->td_umtxq;
2028	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2029	   &uq->uq_key)) != 0)
2030		return (error);
2031	for (;;) {
2032		umtxq_lock(&uq->uq_key);
2033		umtxq_busy(&uq->uq_key);
2034		umtxq_unlock(&uq->uq_key);
2035
2036		save_ceiling = fuword32(&m->m_ceilings[0]);
2037
2038		owner = casuword32(&m->m_owner,
2039		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2040
2041		if (owner == UMUTEX_CONTESTED) {
2042			suword32(&m->m_ceilings[0], ceiling);
2043			suword32(&m->m_owner, UMUTEX_CONTESTED);
2044			error = 0;
2045			break;
2046		}
2047
2048		/* The address was invalid. */
2049		if (owner == -1) {
2050			error = EFAULT;
2051			break;
2052		}
2053
2054		if ((owner & ~UMUTEX_CONTESTED) == id) {
2055			suword32(&m->m_ceilings[0], ceiling);
2056			error = 0;
2057			break;
2058		}
2059
2060		/*
2061		 * If we caught a signal, we have retried and now
2062		 * exit immediately.
2063		 */
2064		if (error != 0)
2065			break;
2066
2067		/*
2068		 * We set the contested bit, sleep. Otherwise the lock changed
2069		 * and we need to retry or we lost a race to the thread
2070		 * unlocking the umtx.
2071		 */
2072		umtxq_lock(&uq->uq_key);
2073		umtxq_insert(uq);
2074		umtxq_unbusy(&uq->uq_key);
2075		error = umtxq_sleep(uq, "umtxpp", 0);
2076		umtxq_remove(uq);
2077		umtxq_unlock(&uq->uq_key);
2078	}
2079	umtxq_lock(&uq->uq_key);
2080	if (error == 0)
2081		umtxq_signal(&uq->uq_key, INT_MAX);
2082	umtxq_unbusy(&uq->uq_key);
2083	umtxq_unlock(&uq->uq_key);
2084	umtx_key_release(&uq->uq_key);
2085	if (error == 0 && old_ceiling != NULL)
2086		suword32(old_ceiling, save_ceiling);
2087	return (error);
2088}
2089
2090static int
2091_do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
2092	int try)
2093{
2094	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2095	case 0:
2096		return (_do_lock_normal(td, m, flags, timo, try));
2097	case UMUTEX_PRIO_INHERIT:
2098		return (_do_lock_pi(td, m, flags, timo, try));
2099	case UMUTEX_PRIO_PROTECT:
2100		return (_do_lock_pp(td, m, flags, timo, try));
2101	}
2102	return (EINVAL);
2103}
2104
2105/*
2106 * Lock a userland POSIX mutex.
2107 */
2108static int
2109do_lock_umutex(struct thread *td, struct umutex *m,
2110	struct timespec *timeout, int try)
2111{
2112	struct timespec ts, ts2, ts3;
2113	struct timeval tv;
2114	uint32_t flags;
2115	int error;
2116
2117	flags = fuword32(&m->m_flags);
2118	if (flags == -1)
2119		return (EFAULT);
2120
2121	if (timeout == NULL) {
2122		error = _do_lock_umutex(td, m, flags, 0, try);
2123		/* Mutex locking is restarted if it is interrupted. */
2124		if (error == EINTR)
2125			error = ERESTART;
2126	} else {
2127		getnanouptime(&ts);
2128		timespecadd(&ts, timeout);
2129		TIMESPEC_TO_TIMEVAL(&tv, timeout);
2130		for (;;) {
2131			error = _do_lock_umutex(td, m, flags, tvtohz(&tv), try);
2132			if (error != ETIMEDOUT)
2133				break;
2134			getnanouptime(&ts2);
2135			if (timespeccmp(&ts2, &ts, >=)) {
2136				error = ETIMEDOUT;
2137				break;
2138			}
2139			ts3 = ts;
2140			timespecsub(&ts3, &ts2);
2141			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2142		}
2143		/* Timed-locking is not restarted. */
2144		if (error == ERESTART)
2145			error = EINTR;
2146	}
2147	return (error);
2148}
2149
2150/*
2151 * Unlock a userland POSIX mutex.
2152 */
2153static int
2154do_unlock_umutex(struct thread *td, struct umutex *m)
2155{
2156	uint32_t flags;
2157
2158	flags = fuword32(&m->m_flags);
2159	if (flags == -1)
2160		return (EFAULT);
2161
2162	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2163	case 0:
2164		return (do_unlock_normal(td, m, flags));
2165	case UMUTEX_PRIO_INHERIT:
2166		return (do_unlock_pi(td, m, flags));
2167	case UMUTEX_PRIO_PROTECT:
2168		return (do_unlock_pp(td, m, flags));
2169	}
2170
2171	return (EINVAL);
2172}
2173
2174int
2175_umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
2176    /* struct umtx *umtx */
2177{
2178	return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
2179}
2180
2181int
2182_umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
2183    /* struct umtx *umtx */
2184{
2185	return do_unlock_umtx(td, uap->umtx, td->td_tid);
2186}
2187
2188static int
2189__umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
2190{
2191	struct timespec *ts, timeout;
2192	int error;
2193
2194	/* Allow a null timespec (wait forever). */
2195	if (uap->uaddr2 == NULL)
2196		ts = NULL;
2197	else {
2198		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2199		if (error != 0)
2200			return (error);
2201		if (timeout.tv_nsec >= 1000000000 ||
2202		    timeout.tv_nsec < 0) {
2203			return (EINVAL);
2204		}
2205		ts = &timeout;
2206	}
2207	return (do_lock_umtx(td, uap->obj, uap->val, ts));
2208}
2209
2210static int
2211__umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
2212{
2213	return (do_unlock_umtx(td, uap->obj, uap->val));
2214}
2215
2216static int
2217__umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
2218{
2219	struct timespec *ts, timeout;
2220	int error;
2221
2222	if (uap->uaddr2 == NULL)
2223		ts = NULL;
2224	else {
2225		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2226		if (error != 0)
2227			return (error);
2228		if (timeout.tv_nsec >= 1000000000 ||
2229		    timeout.tv_nsec < 0)
2230			return (EINVAL);
2231		ts = &timeout;
2232	}
2233	return do_wait(td, uap->obj, uap->val, ts, 0);
2234}
2235
2236static int
2237__umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
2238{
2239	return (kern_umtx_wake(td, uap->obj, uap->val));
2240}
2241
2242static int
2243__umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
2244{
2245	struct timespec *ts, timeout;
2246	int error;
2247
2248	/* Allow a null timespec (wait forever). */
2249	if (uap->uaddr2 == NULL)
2250		ts = NULL;
2251	else {
2252		error = copyin(uap->uaddr2, &timeout,
2253		    sizeof(timeout));
2254		if (error != 0)
2255			return (error);
2256		if (timeout.tv_nsec >= 1000000000 ||
2257		    timeout.tv_nsec < 0) {
2258			return (EINVAL);
2259		}
2260		ts = &timeout;
2261	}
2262	return do_lock_umutex(td, uap->obj, ts, 0);
2263}
2264
2265static int
2266__umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
2267{
2268	return do_lock_umutex(td, uap->obj, NULL, 1);
2269}
2270
2271static int
2272__umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
2273{
2274	return do_unlock_umutex(td, uap->obj);
2275}
2276
2277static int
2278__umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
2279{
2280	return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
2281}
2282
2283typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
2284
2285static _umtx_op_func op_table[] = {
2286	__umtx_op_lock_umtx,		/* UMTX_OP_LOCK */
2287	__umtx_op_unlock_umtx,		/* UMTX_OP_UNLOCK */
2288	__umtx_op_wait,			/* UMTX_OP_WAIT */
2289	__umtx_op_wake,			/* UMTX_OP_WAKE */
2290	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_TRYLOCK */
2291	__umtx_op_lock_umutex,		/* UMTX_OP_MUTEX_LOCK */
2292	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK */
2293	__umtx_op_set_ceiling		/* UMTX_OP_SET_CEILING */
2294};
2295
2296int
2297_umtx_op(struct thread *td, struct _umtx_op_args *uap)
2298{
2299	if (uap->op >= 0 && uap->op < UMTX_OP_MAX)
2300		return (*op_table[uap->op])(td, uap);
2301	return (EINVAL);
2302}
2303
2304#ifdef COMPAT_IA32
2305
2306struct timespec32 {
2307	u_int32_t tv_sec;
2308	u_int32_t tv_nsec;
2309};
2310
2311static inline int
2312copyin_timeout32(void *addr, struct timespec *tsp)
2313{
2314	struct timespec32 ts32;
2315	int error;
2316
2317	error = copyin(addr, &ts32, sizeof(struct timespec32));
2318	if (error == 0) {
2319		tsp->tv_sec = ts32.tv_sec;
2320		tsp->tv_nsec = ts32.tv_nsec;
2321	}
2322	return (error);
2323}
2324
2325static int
2326__umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
2327{
2328	struct timespec *ts, timeout;
2329	int error;
2330
2331	/* Allow a null timespec (wait forever). */
2332	if (uap->uaddr2 == NULL)
2333		ts = NULL;
2334	else {
2335		error = copyin_timeout32(uap->uaddr2, &timeout);
2336		if (error != 0)
2337			return (error);
2338		if (timeout.tv_nsec >= 1000000000 ||
2339		    timeout.tv_nsec < 0) {
2340			return (EINVAL);
2341		}
2342		ts = &timeout;
2343	}
2344	return (do_lock_umtx32(td, uap->obj, uap->val, ts));
2345}
2346
2347static int
2348__umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
2349{
2350	return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
2351}
2352
2353static int
2354__umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
2355{
2356	struct timespec *ts, timeout;
2357	int error;
2358
2359	if (uap->uaddr2 == NULL)
2360		ts = NULL;
2361	else {
2362		error = copyin_timeout32(uap->uaddr2, &timeout);
2363		if (error != 0)
2364			return (error);
2365		if (timeout.tv_nsec >= 1000000000 ||
2366		    timeout.tv_nsec < 0)
2367			return (EINVAL);
2368		ts = &timeout;
2369	}
2370	return do_wait(td, uap->obj, uap->val, ts, 1);
2371}
2372
2373static int
2374__umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
2375{
2376	struct timespec *ts, timeout;
2377	int error;
2378
2379	/* Allow a null timespec (wait forever). */
2380	if (uap->uaddr2 == NULL)
2381		ts = NULL;
2382	else {
2383		error = copyin_timeout32(uap->uaddr2, &timeout);
2384		if (error != 0)
2385			return (error);
2386		if (timeout.tv_nsec >= 1000000000 ||
2387		    timeout.tv_nsec < 0)
2388			return (EINVAL);
2389		ts = &timeout;
2390	}
2391	return do_lock_umutex(td, uap->obj, ts, 0);
2392}
2393
2394static _umtx_op_func op_table_compat32[] = {
2395	__umtx_op_lock_umtx_compat32,	/* UMTX_OP_LOCK */
2396	__umtx_op_unlock_umtx_compat32,	/* UMTX_OP_UNLOCK */
2397	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT */
2398	__umtx_op_wake,			/* UMTX_OP_WAKE */
2399	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_LOCK */
2400	__umtx_op_lock_umutex_compat32,	/* UMTX_OP_MUTEX_TRYLOCK */
2401	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK	*/
2402	__umtx_op_set_ceiling		/* UMTX_OP_SET_CEILING */
2403};
2404
2405int
2406freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
2407{
2408	if (uap->op >= 0 && uap->op < UMTX_OP_MAX)
2409		return (*op_table_compat32[uap->op])(td,
2410			(struct _umtx_op_args *)uap);
2411	return (EINVAL);
2412}
2413#endif
2414
2415void
2416umtx_thread_init(struct thread *td)
2417{
2418	td->td_umtxq = umtxq_alloc();
2419	td->td_umtxq->uq_thread = td;
2420}
2421
2422void
2423umtx_thread_fini(struct thread *td)
2424{
2425	umtxq_free(td->td_umtxq);
2426}
2427
2428/*
2429 * It will be called when new thread is created, e.g fork().
2430 */
2431void
2432umtx_thread_alloc(struct thread *td)
2433{
2434	struct umtx_q *uq;
2435
2436	uq = td->td_umtxq;
2437	uq->uq_inherited_pri = PRI_MAX;
2438
2439	KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
2440	KASSERT(uq->uq_thread == td, ("uq_thread != td"));
2441	KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
2442	KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
2443}
2444
2445/*
2446 * exec() hook.
2447 */
2448static void
2449umtx_exec_hook(void *arg __unused, struct proc *p __unused,
2450	struct image_params *imgp __unused)
2451{
2452	umtx_thread_cleanup(curthread);
2453}
2454
2455/*
2456 * thread_exit() hook.
2457 */
2458void
2459umtx_thread_exit(struct thread *td)
2460{
2461	umtx_thread_cleanup(td);
2462}
2463
2464/*
2465 * clean up umtx data.
2466 */
2467static void
2468umtx_thread_cleanup(struct thread *td)
2469{
2470	struct umtx_q *uq;
2471	struct umtx_pi *pi;
2472
2473	if ((uq = td->td_umtxq) == NULL)
2474		return;
2475
2476	mtx_lock_spin(&sched_lock);
2477	uq->uq_inherited_pri = PRI_MAX;
2478	while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
2479		pi->pi_owner = NULL;
2480		TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
2481	}
2482	td->td_flags &= ~TDF_UBORROWING;
2483	mtx_unlock_spin(&sched_lock);
2484}
2485