kern_umtx.c revision 161742
1/*-
2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice unmodified, this list of conditions, and the following
11 *    disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: head/sys/kern/kern_umtx.c 161742 2006-08-30 23:59:45Z davidxu $");
30
31#include <sys/param.h>
32#include <sys/kernel.h>
33#include <sys/limits.h>
34#include <sys/lock.h>
35#include <sys/malloc.h>
36#include <sys/mutex.h>
37#include <sys/proc.h>
38#include <sys/sched.h>
39#include <sys/sysctl.h>
40#include <sys/sysent.h>
41#include <sys/systm.h>
42#include <sys/sysproto.h>
43#include <sys/eventhandler.h>
44#include <sys/umtx.h>
45
46#include <vm/vm.h>
47#include <vm/vm_param.h>
48#include <vm/pmap.h>
49#include <vm/vm_map.h>
50#include <vm/vm_object.h>
51
52#define TYPE_SIMPLE_LOCK	0
53#define TYPE_SIMPLE_WAIT	1
54#define TYPE_NORMAL_UMUTEX	2
55#define TYPE_PI_UMUTEX		3
56#define TYPE_PP_UMUTEX		4
57#define TYPE_CV			5
58
59/* Key to represent a unique userland synchronous object */
60struct umtx_key {
61	int	hash;
62	int	type;
63	int	shared;
64	union {
65		struct {
66			vm_object_t	object;
67			uintptr_t	offset;
68		} shared;
69		struct {
70			struct vmspace	*vs;
71			uintptr_t	addr;
72		} private;
73		struct {
74			void		*a;
75			uintptr_t	b;
76		} both;
77	} info;
78};
79
80/* Priority inheritance mutex info. */
81struct umtx_pi {
82	/* Owner thread */
83	struct thread		*pi_owner;
84
85	/* Reference count */
86	int			pi_refcount;
87
88 	/* List entry to link umtx holding by thread */
89	TAILQ_ENTRY(umtx_pi)	pi_link;
90
91	/* List entry in hash */
92	TAILQ_ENTRY(umtx_pi)	pi_hashlink;
93
94	/* List for waiters */
95	TAILQ_HEAD(,umtx_q)	pi_blocked;
96
97	/* Identify a userland lock object */
98	struct umtx_key		pi_key;
99};
100
101/* A userland synchronous object user. */
102struct umtx_q {
103	/* Linked list for the hash. */
104	TAILQ_ENTRY(umtx_q)	uq_link;
105
106	/* Umtx key. */
107	struct umtx_key		uq_key;
108
109	/* Umtx flags. */
110	int			uq_flags;
111#define UQF_UMTXQ	0x0001
112
113	/* The thread waits on. */
114	struct thread		*uq_thread;
115
116	/*
117	 * Blocked on PI mutex. read can use chain lock
118	 * or sched_lock, write must have both chain lock and
119	 * sched_lock being hold.
120	 */
121	struct umtx_pi		*uq_pi_blocked;
122
123	/* On blocked list */
124	TAILQ_ENTRY(umtx_q)	uq_lockq;
125
126	/* Thread contending with us */
127	TAILQ_HEAD(,umtx_pi)	uq_pi_contested;
128
129	/* Inherited priority from PP mutex */
130	u_char			uq_inherited_pri;
131};
132
133TAILQ_HEAD(umtxq_head, umtx_q);
134
135/* Userland lock object's wait-queue chain */
136struct umtxq_chain {
137	/* Lock for this chain. */
138	struct mtx		uc_lock;
139
140	/* List of sleep queues. */
141	struct umtxq_head	uc_queue;
142
143	/* Busy flag */
144	char			uc_busy;
145
146	/* Chain lock waiters */
147	int			uc_waiters;
148
149	/* All PI in the list */
150	TAILQ_HEAD(,umtx_pi)	uc_pi_list;
151};
152
153#define	UMTXQ_LOCKED_ASSERT(uc)		mtx_assert(&(uc)->uc_lock, MA_OWNED)
154
155/*
156 * Don't propagate time-sharing priority, there is a security reason,
157 * a user can simply introduce PI-mutex, let thread A lock the mutex,
158 * and let another thread B block on the mutex, because B is
159 * sleeping, its priority will be boosted, this causes A's priority to
160 * be boosted via priority propagating too and will never be lowered even
161 * if it is using 100%CPU, this is unfair to other processes.
162 */
163
164#define UPRI(td)	(((td)->td_ksegrp->kg_user_pri >= PRI_MIN_TIMESHARE &&\
165			  (td)->td_ksegrp->kg_user_pri <= PRI_MAX_TIMESHARE) ?\
166			 PRI_MAX_TIMESHARE : (td)->td_ksegrp->kg_user_pri)
167
168#define	GOLDEN_RATIO_PRIME	2654404609U
169#define	UMTX_CHAINS		128
170#define	UMTX_SHIFTS		(__WORD_BIT - 7)
171
172#define THREAD_SHARE		0
173#define PROCESS_SHARE		1
174#define AUTO_SHARE		2
175
176#define	GET_SHARE(flags)	\
177    (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
178
179static uma_zone_t		umtx_pi_zone;
180static struct umtxq_chain	umtxq_chains[UMTX_CHAINS];
181static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
182static int			umtx_pi_allocated;
183
184SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
185SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
186    &umtx_pi_allocated, 0, "Allocated umtx_pi");
187
188static void umtxq_sysinit(void *);
189static void umtxq_hash(struct umtx_key *key);
190static struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
191static void umtxq_lock(struct umtx_key *key);
192static void umtxq_unlock(struct umtx_key *key);
193static void umtxq_busy(struct umtx_key *key);
194static void umtxq_unbusy(struct umtx_key *key);
195static void umtxq_insert(struct umtx_q *uq);
196static void umtxq_remove(struct umtx_q *uq);
197static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
198static int umtxq_count(struct umtx_key *key);
199static int umtxq_signal(struct umtx_key *key, int nr_wakeup);
200static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2);
201static int umtx_key_get(void *addr, int type, int share,
202	struct umtx_key *key);
203static void umtx_key_release(struct umtx_key *key);
204static struct umtx_pi *umtx_pi_alloc(void);
205static void umtx_pi_free(struct umtx_pi *pi);
206static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
207static void umtx_thread_cleanup(struct thread *td);
208static void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
209	struct image_params *imgp __unused);
210SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
211
212static void
213umtxq_sysinit(void *arg __unused)
214{
215	int i;
216
217	umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
218		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
219	for (i = 0; i < UMTX_CHAINS; ++i) {
220		mtx_init(&umtxq_chains[i].uc_lock, "umtxql", NULL,
221			 MTX_DEF | MTX_DUPOK);
222		TAILQ_INIT(&umtxq_chains[i].uc_queue);
223		TAILQ_INIT(&umtxq_chains[i].uc_pi_list);
224		umtxq_chains[i].uc_busy = 0;
225		umtxq_chains[i].uc_waiters = 0;
226	}
227	EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
228	    EVENTHANDLER_PRI_ANY);
229}
230
231struct umtx_q *
232umtxq_alloc(void)
233{
234	struct umtx_q *uq;
235
236	uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
237	TAILQ_INIT(&uq->uq_pi_contested);
238	uq->uq_inherited_pri = PRI_MAX;
239	return (uq);
240}
241
242void
243umtxq_free(struct umtx_q *uq)
244{
245	free(uq, M_UMTX);
246}
247
248static inline void
249umtxq_hash(struct umtx_key *key)
250{
251	unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
252	key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
253}
254
255static inline int
256umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2)
257{
258	return (k1->type == k2->type &&
259		k1->info.both.a == k2->info.both.a &&
260	        k1->info.both.b == k2->info.both.b);
261}
262
263static inline struct umtxq_chain *
264umtxq_getchain(struct umtx_key *key)
265{
266	return (&umtxq_chains[key->hash]);
267}
268
269/*
270 * Set chain to busy state when following operation
271 * may be blocked (kernel mutex can not be used).
272 */
273static inline void
274umtxq_busy(struct umtx_key *key)
275{
276	struct umtxq_chain *uc;
277
278	uc = umtxq_getchain(key);
279	mtx_assert(&uc->uc_lock, MA_OWNED);
280	while (uc->uc_busy != 0) {
281		uc->uc_waiters++;
282		msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
283		uc->uc_waiters--;
284	}
285	uc->uc_busy = 1;
286}
287
288/*
289 * Unbusy a chain.
290 */
291static inline void
292umtxq_unbusy(struct umtx_key *key)
293{
294	struct umtxq_chain *uc;
295
296	uc = umtxq_getchain(key);
297	mtx_assert(&uc->uc_lock, MA_OWNED);
298	KASSERT(uc->uc_busy != 0, ("not busy"));
299	uc->uc_busy = 0;
300	if (uc->uc_waiters)
301		wakeup_one(uc);
302}
303
304/*
305 * Lock a chain.
306 */
307static inline void
308umtxq_lock(struct umtx_key *key)
309{
310	struct umtxq_chain *uc;
311
312	uc = umtxq_getchain(key);
313	mtx_lock(&uc->uc_lock);
314}
315
316/*
317 * Unlock a chain.
318 */
319static inline void
320umtxq_unlock(struct umtx_key *key)
321{
322	struct umtxq_chain *uc;
323
324	uc = umtxq_getchain(key);
325	mtx_unlock(&uc->uc_lock);
326}
327
328/*
329 * Insert a thread onto the umtx queue.
330 */
331static inline void
332umtxq_insert(struct umtx_q *uq)
333{
334	struct umtxq_chain *uc;
335
336	uc = umtxq_getchain(&uq->uq_key);
337	UMTXQ_LOCKED_ASSERT(uc);
338	TAILQ_INSERT_TAIL(&uc->uc_queue, uq, uq_link);
339	uq->uq_flags |= UQF_UMTXQ;
340}
341
342/*
343 * Remove thread from the umtx queue.
344 */
345static inline void
346umtxq_remove(struct umtx_q *uq)
347{
348	struct umtxq_chain *uc;
349
350	uc = umtxq_getchain(&uq->uq_key);
351	UMTXQ_LOCKED_ASSERT(uc);
352	if (uq->uq_flags & UQF_UMTXQ) {
353		TAILQ_REMOVE(&uc->uc_queue, uq, uq_link);
354		uq->uq_flags &= ~UQF_UMTXQ;
355	}
356}
357
358/*
359 * Check if there are multiple waiters
360 */
361static int
362umtxq_count(struct umtx_key *key)
363{
364	struct umtxq_chain *uc;
365	struct umtx_q *uq;
366	int count = 0;
367
368	uc = umtxq_getchain(key);
369	UMTXQ_LOCKED_ASSERT(uc);
370	TAILQ_FOREACH(uq, &uc->uc_queue, uq_link) {
371		if (umtx_key_match(&uq->uq_key, key)) {
372			if (++count > 1)
373				break;
374		}
375	}
376	return (count);
377}
378
379/*
380 * Check if there are multiple PI waiters and returns first
381 * waiter.
382 */
383static int
384umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
385{
386	struct umtxq_chain *uc;
387	struct umtx_q *uq;
388	int count = 0;
389
390	*first = NULL;
391	uc = umtxq_getchain(key);
392	UMTXQ_LOCKED_ASSERT(uc);
393	TAILQ_FOREACH(uq, &uc->uc_queue, uq_link) {
394		if (umtx_key_match(&uq->uq_key, key)) {
395			if (++count > 1)
396				break;
397			*first = uq;
398		}
399	}
400	return (count);
401}
402
403/*
404 * Wake up threads waiting on an userland object.
405 */
406static int
407umtxq_signal(struct umtx_key *key, int n_wake)
408{
409	struct umtxq_chain *uc;
410	struct umtx_q *uq, *next;
411	int ret;
412
413	ret = 0;
414	uc = umtxq_getchain(key);
415	UMTXQ_LOCKED_ASSERT(uc);
416	TAILQ_FOREACH_SAFE(uq, &uc->uc_queue, uq_link, next) {
417		if (umtx_key_match(&uq->uq_key, key)) {
418			umtxq_remove(uq);
419			wakeup(uq);
420			if (++ret >= n_wake)
421				break;
422		}
423	}
424	return (ret);
425}
426
427/*
428 * Wake up specified thread.
429 */
430static inline void
431umtxq_signal_thread(struct umtx_q *uq)
432{
433	struct umtxq_chain *uc;
434
435	uc = umtxq_getchain(&uq->uq_key);
436	UMTXQ_LOCKED_ASSERT(uc);
437	umtxq_remove(uq);
438	wakeup(uq);
439}
440
441/*
442 * Put thread into sleep state, before sleeping, check if
443 * thread was removed from umtx queue.
444 */
445static inline int
446umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
447{
448	struct umtxq_chain *uc;
449	int error;
450
451	uc = umtxq_getchain(&uq->uq_key);
452	UMTXQ_LOCKED_ASSERT(uc);
453	if (!(uq->uq_flags & UQF_UMTXQ))
454		return (0);
455	error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
456	if (error == EWOULDBLOCK)
457		error = ETIMEDOUT;
458	return (error);
459}
460
461/*
462 * Convert userspace address into unique logical address.
463 */
464static int
465umtx_key_get(void *addr, int type, int share, struct umtx_key *key)
466{
467	struct thread *td = curthread;
468	vm_map_t map;
469	vm_map_entry_t entry;
470	vm_pindex_t pindex;
471	vm_prot_t prot;
472	boolean_t wired;
473
474	key->type = type;
475	if (share == THREAD_SHARE) {
476		key->shared = 0;
477		key->info.private.vs = td->td_proc->p_vmspace;
478		key->info.private.addr = (uintptr_t)addr;
479	} else if (share == PROCESS_SHARE || share == AUTO_SHARE) {
480		map = &td->td_proc->p_vmspace->vm_map;
481		if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
482		    &entry, &key->info.shared.object, &pindex, &prot,
483		    &wired) != KERN_SUCCESS) {
484			return EFAULT;
485		}
486
487		if ((share == PROCESS_SHARE) ||
488		    (share == AUTO_SHARE &&
489		     VM_INHERIT_SHARE == entry->inheritance)) {
490			key->shared = 1;
491			key->info.shared.offset = entry->offset + entry->start -
492				(vm_offset_t)addr;
493			vm_object_reference(key->info.shared.object);
494		} else {
495			key->shared = 0;
496			key->info.private.vs = td->td_proc->p_vmspace;
497			key->info.private.addr = (uintptr_t)addr;
498		}
499		vm_map_lookup_done(map, entry);
500	}
501
502	umtxq_hash(key);
503	return (0);
504}
505
506/*
507 * Release key.
508 */
509static inline void
510umtx_key_release(struct umtx_key *key)
511{
512	if (key->shared)
513		vm_object_deallocate(key->info.shared.object);
514}
515
516/*
517 * Lock a umtx object.
518 */
519static int
520_do_lock(struct thread *td, struct umtx *umtx, uintptr_t id, int timo)
521{
522	struct umtx_q *uq;
523	intptr_t owner;
524	intptr_t old;
525	int error = 0;
526
527	uq = td->td_umtxq;
528
529	/*
530	 * Care must be exercised when dealing with umtx structure. It
531	 * can fault on any access.
532	 */
533	for (;;) {
534		/*
535		 * Try the uncontested case.  This should be done in userland.
536		 */
537		owner = casuptr((intptr_t *)&umtx->u_owner, UMTX_UNOWNED, id);
538
539		/* The acquire succeeded. */
540		if (owner == UMTX_UNOWNED)
541			return (0);
542
543		/* The address was invalid. */
544		if (owner == -1)
545			return (EFAULT);
546
547		/* If no one owns it but it is contested try to acquire it. */
548		if (owner == UMTX_CONTESTED) {
549			owner = casuptr((intptr_t *)&umtx->u_owner,
550			    UMTX_CONTESTED, id | UMTX_CONTESTED);
551
552			if (owner == UMTX_CONTESTED)
553				return (0);
554
555			/* The address was invalid. */
556			if (owner == -1)
557				return (EFAULT);
558
559			/* If this failed the lock has changed, restart. */
560			continue;
561		}
562
563		/*
564		 * If we caught a signal, we have retried and now
565		 * exit immediately.
566		 */
567		if (error != 0)
568			return (error);
569
570		if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
571			AUTO_SHARE, &uq->uq_key)) != 0)
572			return (error);
573
574		umtxq_lock(&uq->uq_key);
575		umtxq_busy(&uq->uq_key);
576		umtxq_insert(uq);
577		umtxq_unbusy(&uq->uq_key);
578		umtxq_unlock(&uq->uq_key);
579
580		/*
581		 * Set the contested bit so that a release in user space
582		 * knows to use the system call for unlock.  If this fails
583		 * either some one else has acquired the lock or it has been
584		 * released.
585		 */
586		old = casuptr((intptr_t *)&umtx->u_owner, owner,
587		    owner | UMTX_CONTESTED);
588
589		/* The address was invalid. */
590		if (old == -1) {
591			umtxq_lock(&uq->uq_key);
592			umtxq_remove(uq);
593			umtxq_unlock(&uq->uq_key);
594			umtx_key_release(&uq->uq_key);
595			return (EFAULT);
596		}
597
598		/*
599		 * We set the contested bit, sleep. Otherwise the lock changed
600		 * and we need to retry or we lost a race to the thread
601		 * unlocking the umtx.
602		 */
603		umtxq_lock(&uq->uq_key);
604		if (old == owner)
605			error = umtxq_sleep(uq, "umtx", timo);
606		umtxq_remove(uq);
607		umtxq_unlock(&uq->uq_key);
608		umtx_key_release(&uq->uq_key);
609	}
610
611	return (0);
612}
613
614/*
615 * Lock a umtx object.
616 */
617static int
618do_lock(struct thread *td, struct umtx *umtx, uintptr_t id,
619	struct timespec *timeout)
620{
621	struct timespec ts, ts2, ts3;
622	struct timeval tv;
623	int error;
624
625	if (timeout == NULL) {
626		error = _do_lock(td, umtx, id, 0);
627	} else {
628		getnanouptime(&ts);
629		timespecadd(&ts, timeout);
630		TIMESPEC_TO_TIMEVAL(&tv, timeout);
631		for (;;) {
632			error = _do_lock(td, umtx, id, tvtohz(&tv));
633			if (error != ETIMEDOUT)
634				break;
635			getnanouptime(&ts2);
636			if (timespeccmp(&ts2, &ts, >=)) {
637				error = ETIMEDOUT;
638				break;
639			}
640			ts3 = ts;
641			timespecsub(&ts3, &ts2);
642			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
643		}
644	}
645	/* Mutex locking is be restarted if it is interrupted. */
646	if (error == EINTR)
647		error = ERESTART;
648	return (error);
649}
650
651/*
652 * Unlock a umtx object.
653 */
654static int
655do_unlock(struct thread *td, struct umtx *umtx, uintptr_t id)
656{
657	struct umtx_key key;
658	intptr_t owner;
659	intptr_t old;
660	int error;
661	int count;
662
663	/*
664	 * Make sure we own this mtx.
665	 *
666	 * XXX Need a {fu,su}ptr this is not correct on arch where
667	 * sizeof(intptr_t) != sizeof(long).
668	 */
669	owner = fuword(&umtx->u_owner);
670	if (owner == -1)
671		return (EFAULT);
672
673	if ((owner & ~UMTX_CONTESTED) != id)
674		return (EPERM);
675
676	/* This should be done in userland */
677	if ((owner & UMTX_CONTESTED) == 0) {
678		old = casuptr((intptr_t *)&umtx->u_owner, owner,
679			UMTX_UNOWNED);
680		if (old == -1)
681			return (EFAULT);
682		if (old == owner)
683			return (0);
684	}
685
686	/* We should only ever be in here for contested locks */
687	if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
688		&key)) != 0)
689		return (error);
690
691	umtxq_lock(&key);
692	umtxq_busy(&key);
693	count = umtxq_count(&key);
694	umtxq_unlock(&key);
695
696	/*
697	 * When unlocking the umtx, it must be marked as unowned if
698	 * there is zero or one thread only waiting for it.
699	 * Otherwise, it must be marked as contested.
700	 */
701	old = casuptr((intptr_t *)&umtx->u_owner, owner,
702			count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
703	umtxq_lock(&key);
704	umtxq_signal(&key,1);
705	umtxq_unbusy(&key);
706	umtxq_unlock(&key);
707	umtx_key_release(&key);
708	if (old == -1)
709		return (EFAULT);
710	if (old != owner)
711		return (EINVAL);
712	return (0);
713}
714
715/*
716 * Fetch and compare value, sleep on the address if value is not changed.
717 */
718static int
719do_wait(struct thread *td, struct umtx *umtx, uintptr_t id, struct timespec *timeout)
720{
721	struct umtx_q *uq;
722	struct timespec ts, ts2, ts3;
723	struct timeval tv;
724	uintptr_t tmp;
725	int error = 0;
726
727	uq = td->td_umtxq;
728	if ((error = umtx_key_get(umtx, TYPE_SIMPLE_WAIT, AUTO_SHARE,
729	    &uq->uq_key)) != 0)
730		return (error);
731
732	umtxq_lock(&uq->uq_key);
733	umtxq_insert(uq);
734	umtxq_unlock(&uq->uq_key);
735	tmp = fuword(&umtx->u_owner);
736	if (tmp != id) {
737		umtxq_lock(&uq->uq_key);
738		umtxq_remove(uq);
739		umtxq_unlock(&uq->uq_key);
740	} else if (timeout == NULL) {
741		umtxq_lock(&uq->uq_key);
742		error = umtxq_sleep(uq, "ucond", 0);
743		umtxq_remove(uq);
744		umtxq_unlock(&uq->uq_key);
745	} else {
746		getnanouptime(&ts);
747		timespecadd(&ts, timeout);
748		TIMESPEC_TO_TIMEVAL(&tv, timeout);
749		umtxq_lock(&uq->uq_key);
750		for (;;) {
751			error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
752			if (!(uq->uq_flags & UQF_UMTXQ))
753				break;
754			if (error != ETIMEDOUT)
755				break;
756			umtxq_unlock(&uq->uq_key);
757			getnanouptime(&ts2);
758			if (timespeccmp(&ts2, &ts, >=)) {
759				error = ETIMEDOUT;
760				umtxq_lock(&uq->uq_key);
761				break;
762			}
763			ts3 = ts;
764			timespecsub(&ts3, &ts2);
765			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
766			umtxq_lock(&uq->uq_key);
767		}
768		umtxq_remove(uq);
769		umtxq_unlock(&uq->uq_key);
770	}
771	umtx_key_release(&uq->uq_key);
772	/* Mutex locking is be restarted if it is interrupted. */
773	if (error == ERESTART)
774		error = EINTR;
775	return (error);
776}
777
778/*
779 * Wake up threads sleeping on the specified address.
780 */
781int
782kern_umtx_wake(struct thread *td, void *uaddr, int n_wake)
783{
784	struct umtx_key key;
785	int ret;
786
787	if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, AUTO_SHARE,
788	   &key)) != 0)
789		return (ret);
790	umtxq_lock(&key);
791	ret = umtxq_signal(&key, n_wake);
792	umtxq_unlock(&key);
793	umtx_key_release(&key);
794	return (0);
795}
796
797/*
798 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
799 */
800static int
801_do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
802	int try)
803{
804	struct umtx_q *uq;
805	uint32_t owner, old, id;
806	int error = 0;
807
808	id = td->td_tid;
809	uq = td->td_umtxq;
810
811	/*
812	 * Care must be exercised when dealing with umtx structure. It
813	 * can fault on any access.
814	 */
815	for (;;) {
816		/*
817		 * Try the uncontested case.  This should be done in userland.
818		 */
819		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
820
821		/* The acquire succeeded. */
822		if (owner == UMUTEX_UNOWNED)
823			return (0);
824
825		/* The address was invalid. */
826		if (owner == -1)
827			return (EFAULT);
828
829		/* If no one owns it but it is contested try to acquire it. */
830		if (owner == UMUTEX_CONTESTED) {
831			owner = casuword32(&m->m_owner,
832			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
833
834			if (owner == UMUTEX_CONTESTED)
835				return (0);
836
837			/* The address was invalid. */
838			if (owner == -1)
839				return (EFAULT);
840
841			/* If this failed the lock has changed, restart. */
842			continue;
843		}
844
845		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
846		    (owner & ~UMUTEX_CONTESTED) == id)
847			return (EDEADLK);
848
849		if (try != 0)
850			return (EBUSY);
851
852		/*
853		 * If we caught a signal, we have retried and now
854		 * exit immediately.
855		 */
856		if (error != 0)
857			return (error);
858
859		if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
860		    GET_SHARE(flags), &uq->uq_key)) != 0)
861			return (error);
862
863		umtxq_lock(&uq->uq_key);
864		umtxq_busy(&uq->uq_key);
865		umtxq_insert(uq);
866		umtxq_unbusy(&uq->uq_key);
867		umtxq_unlock(&uq->uq_key);
868
869		/*
870		 * Set the contested bit so that a release in user space
871		 * knows to use the system call for unlock.  If this fails
872		 * either some one else has acquired the lock or it has been
873		 * released.
874		 */
875		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
876
877		/* The address was invalid. */
878		if (old == -1) {
879			umtxq_lock(&uq->uq_key);
880			umtxq_remove(uq);
881			umtxq_unlock(&uq->uq_key);
882			umtx_key_release(&uq->uq_key);
883			return (EFAULT);
884		}
885
886		/*
887		 * We set the contested bit, sleep. Otherwise the lock changed
888		 * and we need to retry or we lost a race to the thread
889		 * unlocking the umtx.
890		 */
891		umtxq_lock(&uq->uq_key);
892		if (old == owner)
893			error = umtxq_sleep(uq, "umtxn", timo);
894		umtxq_remove(uq);
895		umtxq_unlock(&uq->uq_key);
896		umtx_key_release(&uq->uq_key);
897	}
898
899	return (0);
900}
901
902/*
903 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
904 */
905static int
906do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags,
907	struct timespec *timeout, int try)
908{
909	struct timespec ts, ts2, ts3;
910	struct timeval tv;
911	int error;
912
913	if (timeout == NULL) {
914		error = _do_lock_normal(td, m, flags, 0, try);
915	} else {
916		getnanouptime(&ts);
917		timespecadd(&ts, timeout);
918		TIMESPEC_TO_TIMEVAL(&tv, timeout);
919		for (;;) {
920			error = _do_lock_normal(td, m, flags, tvtohz(&tv), try);
921			if (error != ETIMEDOUT)
922				break;
923			getnanouptime(&ts2);
924			if (timespeccmp(&ts2, &ts, >=)) {
925				error = ETIMEDOUT;
926				break;
927			}
928			ts3 = ts;
929			timespecsub(&ts3, &ts2);
930			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
931		}
932	}
933	/* Mutex locking is be restarted if it is interrupted. */
934	if (error == EINTR)
935		error = ERESTART;
936	return (error);
937}
938
939/*
940 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
941 */
942static int
943do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
944{
945	struct umtx_key key;
946	uint32_t owner, old, id;
947	int error;
948	int count;
949
950	id = td->td_tid;
951	/*
952	 * Make sure we own this mtx.
953	 */
954	owner = fuword32(&m->m_owner);
955	if (owner == -1)
956		return (EFAULT);
957
958	if ((owner & ~UMUTEX_CONTESTED) != id)
959		return (EPERM);
960
961	/* This should be done in userland */
962	if ((owner & UMUTEX_CONTESTED) == 0) {
963		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
964		if (old == -1)
965			return (EFAULT);
966		if (old == owner)
967			return (0);
968	}
969
970	/* We should only ever be in here for contested locks */
971	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
972	    &key)) != 0)
973		return (error);
974
975	umtxq_lock(&key);
976	umtxq_busy(&key);
977	count = umtxq_count(&key);
978	umtxq_unlock(&key);
979
980	/*
981	 * When unlocking the umtx, it must be marked as unowned if
982	 * there is zero or one thread only waiting for it.
983	 * Otherwise, it must be marked as contested.
984	 */
985	old = casuword32(&m->m_owner, owner,
986		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
987	umtxq_lock(&key);
988	umtxq_signal(&key,1);
989	umtxq_unbusy(&key);
990	umtxq_unlock(&key);
991	umtx_key_release(&key);
992	if (old == -1)
993		return (EFAULT);
994	if (old != owner)
995		return (EINVAL);
996	return (0);
997}
998
999static inline struct umtx_pi *
1000umtx_pi_alloc(void)
1001{
1002	struct umtx_pi *pi;
1003
1004	pi = uma_zalloc(umtx_pi_zone, M_ZERO | M_WAITOK);
1005	TAILQ_INIT(&pi->pi_blocked);
1006	atomic_add_int(&umtx_pi_allocated, 1);
1007	return (pi);
1008}
1009
1010static inline void
1011umtx_pi_free(struct umtx_pi *pi)
1012{
1013	uma_zfree(umtx_pi_zone, pi);
1014	atomic_add_int(&umtx_pi_allocated, -1);
1015}
1016
1017/*
1018 * Adjust the thread's position on a pi_state after its priority has been
1019 * changed.
1020 */
1021static int
1022umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1023{
1024	struct umtx_q *uq, *uq1, *uq2;
1025	struct thread *td1;
1026
1027	mtx_assert(&sched_lock, MA_OWNED);
1028	if (pi == NULL)
1029		return (0);
1030
1031	uq = td->td_umtxq;
1032
1033	/*
1034	 * Check if the thread needs to be moved on the blocked chain.
1035	 * It needs to be moved if either its priority is lower than
1036	 * the previous thread or higher than the next thread.
1037	 */
1038	uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1039	uq2 = TAILQ_NEXT(uq, uq_lockq);
1040	if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1041	    (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1042		/*
1043		 * Remove thread from blocked chain and determine where
1044		 * it should be moved to.
1045		 */
1046		TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1047		TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1048			td1 = uq1->uq_thread;
1049			MPASS(td1->td_proc->p_magic == P_MAGIC);
1050			if (UPRI(td1) > UPRI(td))
1051				break;
1052		}
1053
1054		if (uq1 == NULL)
1055			TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1056		else
1057			TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1058	}
1059	return (1);
1060}
1061
1062/*
1063 * Propagate priority when a thread is blocked on POSIX
1064 * PI mutex.
1065 */
1066static void
1067umtx_propagate_priority(struct thread *td)
1068{
1069	struct umtx_q *uq;
1070	struct umtx_pi *pi;
1071	int pri;
1072
1073	mtx_assert(&sched_lock, MA_OWNED);
1074	pri = UPRI(td);
1075	uq = td->td_umtxq;
1076	pi = uq->uq_pi_blocked;
1077	if (pi == NULL)
1078		return;
1079
1080	for (;;) {
1081		td = pi->pi_owner;
1082		if (td == NULL)
1083			return;
1084
1085		MPASS(td->td_proc != NULL);
1086		MPASS(td->td_proc->p_magic == P_MAGIC);
1087
1088		if (UPRI(td) <= pri)
1089			return;
1090
1091		sched_lend_user_prio(td, pri);
1092
1093		/*
1094		 * Pick up the lock that td is blocked on.
1095		 */
1096		uq = td->td_umtxq;
1097		pi = uq->uq_pi_blocked;
1098		/* Resort td on the list if needed. */
1099		if (!umtx_pi_adjust_thread(pi, td))
1100			break;
1101	}
1102}
1103
1104/*
1105 * Unpropagate priority for a PI mutex when a thread blocked on
1106 * it is interrupted by signal or resumed by others.
1107 */
1108static void
1109umtx_unpropagate_priority(struct umtx_pi *pi)
1110{
1111	struct umtx_q *uq, *uq_owner;
1112	struct umtx_pi *pi2;
1113	int pri;
1114
1115	mtx_assert(&sched_lock, MA_OWNED);
1116
1117	while (pi != NULL && pi->pi_owner != NULL) {
1118		pri = PRI_MAX;
1119		uq_owner = pi->pi_owner->td_umtxq;
1120
1121		TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1122			uq = TAILQ_FIRST(&pi2->pi_blocked);
1123			if (uq != NULL) {
1124				if (pri > UPRI(uq->uq_thread))
1125					pri = UPRI(uq->uq_thread);
1126			}
1127		}
1128
1129		if (pri > uq_owner->uq_inherited_pri)
1130			pri = uq_owner->uq_inherited_pri;
1131		sched_unlend_user_prio(pi->pi_owner, pri);
1132		pi = uq_owner->uq_pi_blocked;
1133	}
1134}
1135
1136/*
1137 * Insert a PI mutex into owned list.
1138 */
1139static void
1140umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1141{
1142	struct umtx_q *uq_owner;
1143
1144	uq_owner = owner->td_umtxq;
1145	mtx_assert(&sched_lock, MA_OWNED);
1146	if (pi->pi_owner != NULL)
1147		panic("pi_ower != NULL");
1148	pi->pi_owner = owner;
1149	TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1150}
1151
1152/*
1153 * Claim ownership of a PI mutex.
1154 */
1155static int
1156umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1157{
1158	struct umtx_q *uq, *uq_owner;
1159
1160	uq_owner = owner->td_umtxq;
1161	mtx_lock_spin(&sched_lock);
1162	if (pi->pi_owner == owner) {
1163		mtx_unlock_spin(&sched_lock);
1164		return (0);
1165	}
1166
1167	if (pi->pi_owner != NULL) {
1168		/*
1169		 * userland may have already messed the mutex, sigh.
1170		 */
1171		mtx_unlock_spin(&sched_lock);
1172		return (EPERM);
1173	}
1174	umtx_pi_setowner(pi, owner);
1175	uq = TAILQ_FIRST(&pi->pi_blocked);
1176	if (uq != NULL) {
1177		int pri;
1178
1179		pri = UPRI(uq->uq_thread);
1180		if (pri < UPRI(owner))
1181			sched_lend_user_prio(owner, pri);
1182	}
1183	mtx_unlock_spin(&sched_lock);
1184	return (0);
1185}
1186
1187/*
1188 * Adjust a thread's order position in its blocked PI mutex,
1189 * this may result new priority propagating process.
1190 */
1191void
1192umtx_pi_adjust(struct thread *td, u_char oldpri)
1193{
1194	struct umtx_q *uq;
1195	struct umtx_pi *pi;
1196
1197	uq = td->td_umtxq;
1198
1199	mtx_assert(&sched_lock, MA_OWNED);
1200	MPASS(TD_ON_UPILOCK(td));
1201
1202	/*
1203	 * Pick up the lock that td is blocked on.
1204	 */
1205	pi = uq->uq_pi_blocked;
1206	MPASS(pi != NULL);
1207
1208	/* Resort the turnstile on the list. */
1209	if (!umtx_pi_adjust_thread(pi, td))
1210		return;
1211
1212	/*
1213	 * If our priority was lowered and we are at the head of the
1214	 * turnstile, then propagate our new priority up the chain.
1215	 */
1216	if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri)
1217		umtx_propagate_priority(td);
1218}
1219
1220/*
1221 * Sleep on a PI mutex.
1222 */
1223static int
1224umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1225	uint32_t owner, const char *wmesg, int timo)
1226{
1227	struct umtxq_chain *uc;
1228	struct thread *td, *td1;
1229	struct umtx_q *uq1;
1230	int pri;
1231	int error = 0;
1232
1233	td = uq->uq_thread;
1234	KASSERT(td == curthread, ("inconsistent uq_thread"));
1235	uc = umtxq_getchain(&uq->uq_key);
1236	UMTXQ_LOCKED_ASSERT(uc);
1237	umtxq_insert(uq);
1238	if (pi->pi_owner == NULL) {
1239		/* XXX
1240		 * Current, We only support process private PI-mutex,
1241		 * non-contended PI-mutexes are locked in userland.
1242		 * Process shared PI-mutex should always be initialized
1243		 * by kernel and be registered in kernel, locking should
1244		 * always be done by kernel to avoid security problems.
1245		 * For process private PI-mutex, we can find owner
1246		 * thread and boost its priority safely.
1247		 */
1248		PROC_LOCK(curproc);
1249		td1 = thread_find(curproc, owner);
1250		mtx_lock_spin(&sched_lock);
1251		if (td1 != NULL && pi->pi_owner == NULL) {
1252			uq1 = td1->td_umtxq;
1253			umtx_pi_setowner(pi, td1);
1254		}
1255		PROC_UNLOCK(curproc);
1256	} else {
1257		mtx_lock_spin(&sched_lock);
1258	}
1259
1260	TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1261		pri = UPRI(uq1->uq_thread);
1262		if (pri > UPRI(td))
1263			break;
1264	}
1265
1266	if (uq1 != NULL)
1267		TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1268	else
1269		TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1270
1271	uq->uq_pi_blocked = pi;
1272	td->td_flags |= TDF_UPIBLOCKED;
1273	mtx_unlock_spin(&sched_lock);
1274	umtxq_unlock(&uq->uq_key);
1275
1276	mtx_lock_spin(&sched_lock);
1277	umtx_propagate_priority(td);
1278	mtx_unlock_spin(&sched_lock);
1279
1280	umtxq_lock(&uq->uq_key);
1281	if (uq->uq_flags & UQF_UMTXQ) {
1282		error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
1283		if (error == EWOULDBLOCK)
1284			error = ETIMEDOUT;
1285		if (uq->uq_flags & UQF_UMTXQ) {
1286			umtxq_busy(&uq->uq_key);
1287			umtxq_remove(uq);
1288			umtxq_unbusy(&uq->uq_key);
1289		}
1290	}
1291	umtxq_unlock(&uq->uq_key);
1292
1293	mtx_lock_spin(&sched_lock);
1294	uq->uq_pi_blocked = NULL;
1295	td->td_flags &= ~TDF_UPIBLOCKED;
1296	TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1297	umtx_unpropagate_priority(pi);
1298	mtx_unlock_spin(&sched_lock);
1299
1300	umtxq_lock(&uq->uq_key);
1301
1302	return (error);
1303}
1304
1305/*
1306 * Add reference count for a PI mutex.
1307 */
1308static void
1309umtx_pi_ref(struct umtx_pi *pi)
1310{
1311	struct umtxq_chain *uc;
1312
1313	uc = umtxq_getchain(&pi->pi_key);
1314	UMTXQ_LOCKED_ASSERT(uc);
1315	pi->pi_refcount++;
1316}
1317
1318/*
1319 * Decrease reference count for a PI mutex, if the counter
1320 * is decreased to zero, its memory space is freed.
1321 */
1322static void
1323umtx_pi_unref(struct umtx_pi *pi)
1324{
1325	struct umtxq_chain *uc;
1326	int free = 0;
1327
1328	uc = umtxq_getchain(&pi->pi_key);
1329	UMTXQ_LOCKED_ASSERT(uc);
1330	KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1331	if (--pi->pi_refcount == 0) {
1332		mtx_lock_spin(&sched_lock);
1333		if (pi->pi_owner != NULL) {
1334			TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
1335				pi, pi_link);
1336			pi->pi_owner = NULL;
1337		}
1338		KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1339			("blocked queue not empty"));
1340		mtx_unlock_spin(&sched_lock);
1341		TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1342		free = 1;
1343	}
1344	if (free)
1345		umtx_pi_free(pi);
1346}
1347
1348/*
1349 * Find a PI mutex in hash table.
1350 */
1351static struct umtx_pi *
1352umtx_pi_lookup(struct umtx_key *key)
1353{
1354	struct umtxq_chain *uc;
1355	struct umtx_pi *pi;
1356
1357	uc = umtxq_getchain(key);
1358	UMTXQ_LOCKED_ASSERT(uc);
1359
1360	TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1361		if (umtx_key_match(&pi->pi_key, key)) {
1362			return (pi);
1363		}
1364	}
1365	return (NULL);
1366}
1367
1368/*
1369 * Insert a PI mutex into hash table.
1370 */
1371static inline void
1372umtx_pi_insert(struct umtx_pi *pi)
1373{
1374	struct umtxq_chain *uc;
1375
1376	uc = umtxq_getchain(&pi->pi_key);
1377	UMTXQ_LOCKED_ASSERT(uc);
1378	TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1379}
1380
1381/*
1382 * Lock a PI mutex.
1383 */
1384static int
1385_do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1386	int try)
1387{
1388	struct umtx_q *uq;
1389	struct umtx_pi *pi, *new_pi;
1390	uint32_t id, owner, old;
1391	int error;
1392
1393	id = td->td_tid;
1394	uq = td->td_umtxq;
1395
1396	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1397	    &uq->uq_key)) != 0)
1398		return (error);
1399	for (;;) {
1400		pi = NULL;
1401		umtxq_lock(&uq->uq_key);
1402		pi = umtx_pi_lookup(&uq->uq_key);
1403		if (pi == NULL) {
1404			umtxq_unlock(&uq->uq_key);
1405			new_pi = umtx_pi_alloc();
1406			new_pi->pi_key = uq->uq_key;
1407			umtxq_lock(&uq->uq_key);
1408			pi = umtx_pi_lookup(&uq->uq_key);
1409			if (pi != NULL)
1410				umtx_pi_free(new_pi);
1411			else {
1412				umtx_pi_insert(new_pi);
1413				pi = new_pi;
1414			}
1415		}
1416
1417		umtx_pi_ref(pi);
1418		umtxq_unlock(&uq->uq_key);
1419
1420		/*
1421		 * Care must be exercised when dealing with umtx structure.  It
1422		 * can fault on any access.
1423		 */
1424
1425		/*
1426		 * Try the uncontested case.  This should be done in userland.
1427		 */
1428		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1429
1430		/* The acquire succeeded. */
1431		if (owner == UMUTEX_UNOWNED) {
1432			error = 0;
1433			break;
1434		}
1435
1436		/* The address was invalid. */
1437		if (owner == -1) {
1438			error = EFAULT;
1439			break;
1440		}
1441
1442		/* If no one owns it but it is contested try to acquire it. */
1443		if (owner == UMUTEX_CONTESTED) {
1444			owner = casuword32(&m->m_owner,
1445			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1446
1447			if (owner == UMUTEX_CONTESTED) {
1448				umtxq_lock(&uq->uq_key);
1449				error = umtx_pi_claim(pi, td);
1450				umtxq_unlock(&uq->uq_key);
1451				break;
1452			}
1453
1454			/* The address was invalid. */
1455			if (owner == -1) {
1456				error = EFAULT;
1457				break;
1458			}
1459
1460			/* If this failed the lock has changed, restart. */
1461			umtxq_lock(&uq->uq_key);
1462			umtx_pi_unref(pi);
1463			umtxq_unlock(&uq->uq_key);
1464			pi = NULL;
1465			continue;
1466		}
1467
1468		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1469		    (owner & ~UMUTEX_CONTESTED) == id) {
1470			error = EDEADLK;
1471			break;
1472		}
1473
1474		if (try != 0) {
1475			error = EBUSY;
1476			break;
1477		}
1478
1479		/*
1480		 * If we caught a signal, we have retried and now
1481		 * exit immediately.
1482		 */
1483		if (error != 0)
1484			break;
1485
1486		umtxq_lock(&uq->uq_key);
1487		umtxq_busy(&uq->uq_key);
1488		umtxq_unlock(&uq->uq_key);
1489
1490		/*
1491		 * Set the contested bit so that a release in user space
1492		 * knows to use the system call for unlock.  If this fails
1493		 * either some one else has acquired the lock or it has been
1494		 * released.
1495		 */
1496		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1497
1498		/* The address was invalid. */
1499		if (old == -1) {
1500			umtxq_lock(&uq->uq_key);
1501			umtxq_unbusy(&uq->uq_key);
1502			umtxq_unlock(&uq->uq_key);
1503			error = EFAULT;
1504			break;
1505		}
1506
1507		umtxq_lock(&uq->uq_key);
1508		umtxq_unbusy(&uq->uq_key);
1509		/*
1510		 * We set the contested bit, sleep. Otherwise the lock changed
1511		 * and we need to retry or we lost a race to the thread
1512		 * unlocking the umtx.
1513		 */
1514		if (old == owner)
1515			error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
1516				 "umtxpi", timo);
1517		umtx_pi_unref(pi);
1518		umtxq_unlock(&uq->uq_key);
1519		pi = NULL;
1520	}
1521
1522	if (pi != NULL) {
1523		umtxq_lock(&uq->uq_key);
1524		umtx_pi_unref(pi);
1525		umtxq_unlock(&uq->uq_key);
1526	}
1527
1528	umtx_key_release(&uq->uq_key);
1529	return (error);
1530}
1531
1532static int
1533do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags,
1534	struct timespec *timeout, int try)
1535{
1536	struct timespec ts, ts2, ts3;
1537	struct timeval tv;
1538	int error;
1539
1540	if (timeout == NULL) {
1541		error = _do_lock_pi(td, m, flags, 0, try);
1542	} else {
1543		getnanouptime(&ts);
1544		timespecadd(&ts, timeout);
1545		TIMESPEC_TO_TIMEVAL(&tv, timeout);
1546		for (;;) {
1547			error = _do_lock_pi(td, m, flags, tvtohz(&tv), try);
1548			if (error != ETIMEDOUT)
1549				break;
1550			getnanouptime(&ts2);
1551			if (timespeccmp(&ts2, &ts, >=)) {
1552				error = ETIMEDOUT;
1553				break;
1554			}
1555			ts3 = ts;
1556			timespecsub(&ts3, &ts2);
1557			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
1558		}
1559	}
1560	/* Mutex locking is be restarted if it is interrupted. */
1561	if (error == EINTR)
1562		error = ERESTART;
1563	return (error);
1564}
1565
1566/*
1567 * Unlock a PI mutex.
1568 */
1569static int
1570do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
1571{
1572	struct umtx_key key;
1573	struct umtx_q *uq_first, *uq_first2, *uq_me;
1574	struct umtx_pi *pi, *pi2;
1575	uint32_t owner, old, id;
1576	int error;
1577	int count;
1578	int pri;
1579
1580	id = td->td_tid;
1581	/*
1582	 * Make sure we own this mtx.
1583	 */
1584	owner = fuword32(&m->m_owner);
1585	if (owner == -1)
1586		return (EFAULT);
1587
1588	if ((owner & ~UMUTEX_CONTESTED) != id)
1589		return (EPERM);
1590
1591	/* This should be done in userland */
1592	if ((owner & UMUTEX_CONTESTED) == 0) {
1593		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1594		if (old == -1)
1595			return (EFAULT);
1596		if (old == owner)
1597			return (0);
1598	}
1599
1600	/* We should only ever be in here for contested locks */
1601	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1602	    &key)) != 0)
1603		return (error);
1604
1605	umtxq_lock(&key);
1606	umtxq_busy(&key);
1607	count = umtxq_count_pi(&key, &uq_first);
1608	if (uq_first != NULL) {
1609		pi = uq_first->uq_pi_blocked;
1610		if (pi->pi_owner != curthread) {
1611			umtxq_unbusy(&key);
1612			umtxq_unlock(&key);
1613			/* userland messed the mutex */
1614			return (EPERM);
1615		}
1616		uq_me = curthread->td_umtxq;
1617		mtx_lock_spin(&sched_lock);
1618		pi->pi_owner = NULL;
1619		TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
1620		uq_first = TAILQ_FIRST(&pi->pi_blocked);
1621		pri = PRI_MAX;
1622		TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
1623			uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
1624			if (uq_first2 != NULL) {
1625				if (pri > UPRI(uq_first2->uq_thread))
1626					pri = UPRI(uq_first2->uq_thread);
1627			}
1628		}
1629		sched_unlend_user_prio(curthread, pri);
1630		mtx_unlock_spin(&sched_lock);
1631	}
1632	umtxq_unlock(&key);
1633
1634	/*
1635	 * When unlocking the umtx, it must be marked as unowned if
1636	 * there is zero or one thread only waiting for it.
1637	 * Otherwise, it must be marked as contested.
1638	 */
1639	old = casuword32(&m->m_owner, owner,
1640		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1641
1642	umtxq_lock(&key);
1643	if (uq_first != NULL)
1644		umtxq_signal_thread(uq_first);
1645	umtxq_unbusy(&key);
1646	umtxq_unlock(&key);
1647	umtx_key_release(&key);
1648	if (old == -1)
1649		return (EFAULT);
1650	if (old != owner)
1651		return (EINVAL);
1652	return (0);
1653}
1654
1655/*
1656 * Lock a PP mutex.
1657 */
1658static int
1659_do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1660	int try)
1661{
1662	struct umtx_q *uq, *uq2;
1663	struct umtx_pi *pi;
1664	uint32_t ceiling;
1665	uint32_t owner, id;
1666	int error, pri, old_inherited_pri, su;
1667
1668	id = td->td_tid;
1669	uq = td->td_umtxq;
1670	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1671	    &uq->uq_key)) != 0)
1672		return (error);
1673	su = (suser(td) == 0);
1674	for (;;) {
1675		old_inherited_pri = uq->uq_inherited_pri;
1676		umtxq_lock(&uq->uq_key);
1677		umtxq_busy(&uq->uq_key);
1678		umtxq_unlock(&uq->uq_key);
1679
1680		ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
1681		if (ceiling > RTP_PRIO_MAX) {
1682			error = EINVAL;
1683			goto out;
1684		}
1685
1686		mtx_lock_spin(&sched_lock);
1687		if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
1688			mtx_unlock_spin(&sched_lock);
1689			error = EINVAL;
1690			goto out;
1691		}
1692		if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
1693			uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
1694			if (uq->uq_inherited_pri < UPRI(td))
1695				sched_lend_user_prio(td, uq->uq_inherited_pri);
1696		}
1697		mtx_unlock_spin(&sched_lock);
1698
1699		owner = casuword32(&m->m_owner,
1700		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1701
1702		if (owner == UMUTEX_CONTESTED) {
1703			error = 0;
1704			break;
1705		}
1706
1707		/* The address was invalid. */
1708		if (owner == -1) {
1709			error = EFAULT;
1710			break;
1711		}
1712
1713		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1714		    (owner & ~UMUTEX_CONTESTED) == id) {
1715			error = EDEADLK;
1716			break;
1717		}
1718
1719		if (try != 0) {
1720			error = EBUSY;
1721			break;
1722		}
1723
1724		/*
1725		 * If we caught a signal, we have retried and now
1726		 * exit immediately.
1727		 */
1728		if (error != 0)
1729			break;
1730
1731		umtxq_lock(&uq->uq_key);
1732		umtxq_insert(uq);
1733		umtxq_unbusy(&uq->uq_key);
1734		error = umtxq_sleep(uq, "umtxpp", timo);
1735		umtxq_remove(uq);
1736		umtxq_unlock(&uq->uq_key);
1737
1738		mtx_lock_spin(&sched_lock);
1739		uq->uq_inherited_pri = old_inherited_pri;
1740		pri = PRI_MAX;
1741		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1742			uq2 = TAILQ_FIRST(&pi->pi_blocked);
1743			if (uq2 != NULL) {
1744				if (pri > UPRI(uq2->uq_thread))
1745					pri = UPRI(uq2->uq_thread);
1746			}
1747		}
1748		if (pri > uq->uq_inherited_pri)
1749			pri = uq->uq_inherited_pri;
1750		sched_unlend_user_prio(td, pri);
1751		mtx_unlock_spin(&sched_lock);
1752	}
1753
1754	if (error != 0) {
1755		mtx_lock_spin(&sched_lock);
1756		uq->uq_inherited_pri = old_inherited_pri;
1757		pri = PRI_MAX;
1758		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1759			uq2 = TAILQ_FIRST(&pi->pi_blocked);
1760			if (uq2 != NULL) {
1761				if (pri > UPRI(uq2->uq_thread))
1762					pri = UPRI(uq2->uq_thread);
1763			}
1764		}
1765		if (pri > uq->uq_inherited_pri)
1766			pri = uq->uq_inherited_pri;
1767		sched_unlend_user_prio(td, pri);
1768		mtx_unlock_spin(&sched_lock);
1769	}
1770
1771out:
1772	umtxq_lock(&uq->uq_key);
1773	umtxq_unbusy(&uq->uq_key);
1774	umtxq_unlock(&uq->uq_key);
1775	umtx_key_release(&uq->uq_key);
1776	return (error);
1777}
1778
1779/*
1780 * Lock a PP mutex.
1781 */
1782static int
1783do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags,
1784	struct timespec *timeout, int try)
1785{
1786	struct timespec ts, ts2, ts3;
1787	struct timeval tv;
1788	int error;
1789
1790	if (timeout == NULL) {
1791		error = _do_lock_pp(td, m, flags, 0, try);
1792	} else {
1793		getnanouptime(&ts);
1794		timespecadd(&ts, timeout);
1795		TIMESPEC_TO_TIMEVAL(&tv, timeout);
1796		for (;;) {
1797			error = _do_lock_pp(td, m, flags, tvtohz(&tv), try);
1798			if (error != ETIMEDOUT)
1799				break;
1800			getnanouptime(&ts2);
1801			if (timespeccmp(&ts2, &ts, >=)) {
1802				error = ETIMEDOUT;
1803				break;
1804			}
1805			ts3 = ts;
1806			timespecsub(&ts3, &ts2);
1807			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
1808		}
1809	}
1810	/* Mutex locking is be restarted if it is interrupted. */
1811	if (error == EINTR)
1812		error = ERESTART;
1813	return (error);
1814}
1815
1816/*
1817 * Unlock a PP mutex.
1818 */
1819static int
1820do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
1821{
1822	struct umtx_key key;
1823	struct umtx_q *uq, *uq2;
1824	struct umtx_pi *pi;
1825	uint32_t owner, id;
1826	uint32_t rceiling;
1827	int error, pri, new_inherited_pri;
1828
1829	id = td->td_tid;
1830	uq = td->td_umtxq;
1831
1832	/*
1833	 * Make sure we own this mtx.
1834	 */
1835	owner = fuword32(&m->m_owner);
1836	if (owner == -1)
1837		return (EFAULT);
1838
1839	if ((owner & ~UMUTEX_CONTESTED) != id)
1840		return (EPERM);
1841
1842	error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
1843	if (error != 0)
1844		return (error);
1845
1846	if (rceiling == -1)
1847		new_inherited_pri = PRI_MAX;
1848	else {
1849		rceiling = RTP_PRIO_MAX - rceiling;
1850		if (rceiling > RTP_PRIO_MAX)
1851			return (EINVAL);
1852		new_inherited_pri = PRI_MIN_REALTIME + rceiling;
1853	}
1854
1855	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1856	    &key)) != 0)
1857		return (error);
1858	umtxq_lock(&key);
1859	umtxq_busy(&key);
1860	umtxq_unlock(&key);
1861	/*
1862	 * For priority protected mutex, always set unlocked state
1863	 * to UMUTEX_CONTESTED, so that userland always enters kernel
1864	 * to lock the mutex, it is necessary because thread priority
1865	 * has to be adjusted for such mutex.
1866	 */
1867	error = suword32(&m->m_owner, UMUTEX_CONTESTED);
1868
1869	umtxq_lock(&key);
1870	if (error == 0)
1871		umtxq_signal(&key, 1);
1872	umtxq_unbusy(&key);
1873	umtxq_unlock(&key);
1874
1875	if (error == -1)
1876		error = EFAULT;
1877	else {
1878		mtx_lock_spin(&sched_lock);
1879		uq->uq_inherited_pri = new_inherited_pri;
1880		pri = PRI_MAX;
1881		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1882			uq2 = TAILQ_FIRST(&pi->pi_blocked);
1883			if (uq2 != NULL) {
1884				if (pri > UPRI(uq2->uq_thread))
1885					pri = UPRI(uq2->uq_thread);
1886			}
1887		}
1888		if (pri > uq->uq_inherited_pri)
1889			pri = uq->uq_inherited_pri;
1890		sched_unlend_user_prio(td, pri);
1891		mtx_unlock_spin(&sched_lock);
1892	}
1893	umtx_key_release(&key);
1894	return (error);
1895}
1896
1897static int
1898do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
1899	uint32_t *old_ceiling)
1900{
1901	struct umtx_q *uq;
1902	uint32_t save_ceiling;
1903	uint32_t owner, id;
1904	uint32_t flags;
1905	int error;
1906
1907	flags = fuword32(&m->m_flags);
1908	if ((flags & UMUTEX_PRIO_PROTECT) == 0)
1909		return (EINVAL);
1910	if (ceiling > RTP_PRIO_MAX)
1911		return (EINVAL);
1912	id = td->td_tid;
1913	uq = td->td_umtxq;
1914	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1915	   &uq->uq_key)) != 0)
1916		return (error);
1917	for (;;) {
1918		umtxq_lock(&uq->uq_key);
1919		umtxq_busy(&uq->uq_key);
1920		umtxq_unlock(&uq->uq_key);
1921
1922		save_ceiling = fuword32(&m->m_ceilings[0]);
1923
1924		owner = casuword32(&m->m_owner,
1925		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1926
1927		if (owner == UMUTEX_CONTESTED) {
1928			suword32(&m->m_ceilings[0], ceiling);
1929			suword32(&m->m_owner, UMUTEX_CONTESTED);
1930			error = 0;
1931			break;
1932		}
1933
1934		/* The address was invalid. */
1935		if (owner == -1) {
1936			error = EFAULT;
1937			break;
1938		}
1939
1940		if ((owner & ~UMUTEX_CONTESTED) == id) {
1941			suword32(&m->m_ceilings[0], ceiling);
1942			error = 0;
1943			break;
1944		}
1945
1946		/*
1947		 * If we caught a signal, we have retried and now
1948		 * exit immediately.
1949		 */
1950		if (error != 0)
1951			break;
1952
1953		/*
1954		 * We set the contested bit, sleep. Otherwise the lock changed
1955		 * and we need to retry or we lost a race to the thread
1956		 * unlocking the umtx.
1957		 */
1958		umtxq_lock(&uq->uq_key);
1959		umtxq_insert(uq);
1960		umtxq_unbusy(&uq->uq_key);
1961		error = umtxq_sleep(uq, "umtxpp", 0);
1962		umtxq_remove(uq);
1963		umtxq_unlock(&uq->uq_key);
1964	}
1965	umtxq_lock(&uq->uq_key);
1966	if (error == 0)
1967		umtxq_signal(&uq->uq_key, INT_MAX);
1968	umtxq_unbusy(&uq->uq_key);
1969	umtxq_unlock(&uq->uq_key);
1970	umtx_key_release(&uq->uq_key);
1971	if (error == 0 && old_ceiling != NULL)
1972		suword32(old_ceiling, save_ceiling);
1973	return (error);
1974}
1975
1976/*
1977 * Lock a userland POSIX mutex.
1978 */
1979static int
1980do_lock_umutex(struct thread *td, struct umutex *m, struct timespec *ts,
1981	int try)
1982{
1983	uint32_t flags;
1984
1985	flags = fuword32(&m->m_flags);
1986	if (flags == -1)
1987		return (EFAULT);
1988
1989	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
1990	case 0:
1991		return (do_lock_normal(td, m, flags, ts, try));
1992	case UMUTEX_PRIO_INHERIT:
1993		return (do_lock_pi(td, m, flags, ts, try));
1994	case UMUTEX_PRIO_PROTECT:
1995		return (do_lock_pp(td, m, flags, ts, try));
1996	}
1997
1998	return (EINVAL);
1999}
2000
2001/*
2002 * Unlock a userland POSIX mutex.
2003 */
2004static int
2005do_unlock_umutex(struct thread *td, struct umutex *m)
2006{
2007	uint32_t flags;
2008	int ret;
2009
2010	flags = fuword32(&m->m_flags);
2011	if (flags == -1)
2012		return (EFAULT);
2013
2014	if ((flags & UMUTEX_PRIO_INHERIT) != 0)
2015		ret = do_unlock_pi(td, m, flags);
2016	else if ((flags & UMUTEX_PRIO_PROTECT) != 0)
2017		ret = do_unlock_pp(td, m, flags);
2018	else
2019		ret = do_unlock_normal(td, m, flags);
2020
2021	return (ret);
2022}
2023
2024int
2025_umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
2026    /* struct umtx *umtx */
2027{
2028	return _do_lock(td, uap->umtx, td->td_tid, 0);
2029}
2030
2031int
2032_umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
2033    /* struct umtx *umtx */
2034{
2035	return do_unlock(td, uap->umtx, td->td_tid);
2036}
2037
2038int
2039_umtx_op(struct thread *td, struct _umtx_op_args *uap)
2040{
2041	struct timespec timeout;
2042	struct timespec *ts;
2043	int error;
2044
2045	switch(uap->op) {
2046	case UMTX_OP_MUTEX_LOCK:
2047		/* Allow a null timespec (wait forever). */
2048		if (uap->uaddr2 == NULL)
2049			ts = NULL;
2050		else {
2051			error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2052			if (error != 0)
2053				break;
2054			if (timeout.tv_nsec >= 1000000000 ||
2055			    timeout.tv_nsec < 0) {
2056				error = EINVAL;
2057				break;
2058			}
2059			ts = &timeout;
2060		}
2061		error = do_lock_umutex(td, uap->obj, ts, 0);
2062		break;
2063	case UMTX_OP_MUTEX_UNLOCK:
2064		error = do_unlock_umutex(td, uap->obj);
2065		break;
2066	case UMTX_OP_LOCK:
2067		/* Allow a null timespec (wait forever). */
2068		if (uap->uaddr2 == NULL)
2069			ts = NULL;
2070		else {
2071			error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2072			if (error != 0)
2073				break;
2074			if (timeout.tv_nsec >= 1000000000 ||
2075			    timeout.tv_nsec < 0) {
2076				error = EINVAL;
2077				break;
2078			}
2079			ts = &timeout;
2080		}
2081		error = do_lock(td, uap->obj, uap->val, ts);
2082		break;
2083	case UMTX_OP_UNLOCK:
2084		error = do_unlock(td, uap->obj, uap->val);
2085		break;
2086	case UMTX_OP_WAIT:
2087		/* Allow a null timespec (wait forever). */
2088		if (uap->uaddr2 == NULL)
2089			ts = NULL;
2090		else {
2091			error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2092			if (error != 0)
2093				break;
2094			if (timeout.tv_nsec >= 1000000000 ||
2095			    timeout.tv_nsec < 0) {
2096				error = EINVAL;
2097				break;
2098			}
2099			ts = &timeout;
2100		}
2101		error = do_wait(td, uap->obj, uap->val, ts);
2102		break;
2103	case UMTX_OP_WAKE:
2104		error = kern_umtx_wake(td, uap->obj, uap->val);
2105		break;
2106	case UMTX_OP_MUTEX_TRYLOCK:
2107		error = do_lock_umutex(td, uap->obj, NULL, 1);
2108		break;
2109	case UMTX_OP_SET_CEILING:
2110		error = do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
2111		break;
2112	default:
2113		error = EINVAL;
2114		break;
2115	}
2116	return (error);
2117}
2118
2119void
2120umtx_thread_init(struct thread *td)
2121{
2122	td->td_umtxq = umtxq_alloc();
2123	td->td_umtxq->uq_thread = td;
2124}
2125
2126void
2127umtx_thread_fini(struct thread *td)
2128{
2129	umtxq_free(td->td_umtxq);
2130}
2131
2132/*
2133 * It will be called when new thread is created, e.g fork().
2134 */
2135void
2136umtx_thread_alloc(struct thread *td)
2137{
2138	struct umtx_q *uq;
2139
2140	uq = td->td_umtxq;
2141	uq->uq_inherited_pri = PRI_MAX;
2142
2143	KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
2144	KASSERT(uq->uq_thread == td, ("uq_thread != td"));
2145	KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
2146	KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
2147}
2148
2149/*
2150 * exec() hook.
2151 */
2152static void
2153umtx_exec_hook(void *arg __unused, struct proc *p __unused,
2154	struct image_params *imgp __unused)
2155{
2156	umtx_thread_cleanup(curthread);
2157}
2158
2159/*
2160 * thread_exit() hook.
2161 */
2162void
2163umtx_thread_exit(struct thread *td)
2164{
2165	umtx_thread_cleanup(td);
2166}
2167
2168/*
2169 * clean up umtx data.
2170 */
2171static void
2172umtx_thread_cleanup(struct thread *td)
2173{
2174	struct umtx_q *uq;
2175	struct umtx_pi *pi;
2176
2177	if ((uq = td->td_umtxq) == NULL)
2178		return;
2179
2180	mtx_lock_spin(&sched_lock);
2181	uq->uq_inherited_pri = PRI_MAX;
2182	while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
2183		pi->pi_owner = NULL;
2184		TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
2185	}
2186	td->td_flags &= ~TDF_UBORROWING;
2187	mtx_unlock_spin(&sched_lock);
2188}
2189