kern_umtx.c revision 231995
1/*-
2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice unmodified, this list of conditions, and the following
11 *    disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: head/sys/kern/kern_umtx.c 231995 2012-02-22 07:34:23Z davidxu $");
30
31#include "opt_compat.h"
32#include <sys/param.h>
33#include <sys/kernel.h>
34#include <sys/limits.h>
35#include <sys/lock.h>
36#include <sys/malloc.h>
37#include <sys/mutex.h>
38#include <sys/priv.h>
39#include <sys/proc.h>
40#include <sys/sched.h>
41#include <sys/smp.h>
42#include <sys/sysctl.h>
43#include <sys/sysent.h>
44#include <sys/systm.h>
45#include <sys/sysproto.h>
46#include <sys/syscallsubr.h>
47#include <sys/eventhandler.h>
48#include <sys/umtx.h>
49
50#include <vm/vm.h>
51#include <vm/vm_param.h>
52#include <vm/pmap.h>
53#include <vm/vm_map.h>
54#include <vm/vm_object.h>
55
56#include <machine/cpu.h>
57
58#ifdef COMPAT_FREEBSD32
59#include <compat/freebsd32/freebsd32_proto.h>
60#endif
61
62#define _UMUTEX_TRY		1
63#define _UMUTEX_WAIT		2
64
65/* Priority inheritance mutex info. */
66struct umtx_pi {
67	/* Owner thread */
68	struct thread		*pi_owner;
69
70	/* Reference count */
71	int			pi_refcount;
72
73 	/* List entry to link umtx holding by thread */
74	TAILQ_ENTRY(umtx_pi)	pi_link;
75
76	/* List entry in hash */
77	TAILQ_ENTRY(umtx_pi)	pi_hashlink;
78
79	/* List for waiters */
80	TAILQ_HEAD(,umtx_q)	pi_blocked;
81
82	/* Identify a userland lock object */
83	struct umtx_key		pi_key;
84};
85
86/* A userland synchronous object user. */
87struct umtx_q {
88	/* Linked list for the hash. */
89	TAILQ_ENTRY(umtx_q)	uq_link;
90
91	/* Umtx key. */
92	struct umtx_key		uq_key;
93
94	/* Umtx flags. */
95	int			uq_flags;
96#define UQF_UMTXQ	0x0001
97
98	/* The thread waits on. */
99	struct thread		*uq_thread;
100
101	/*
102	 * Blocked on PI mutex. read can use chain lock
103	 * or umtx_lock, write must have both chain lock and
104	 * umtx_lock being hold.
105	 */
106	struct umtx_pi		*uq_pi_blocked;
107
108	/* On blocked list */
109	TAILQ_ENTRY(umtx_q)	uq_lockq;
110
111	/* Thread contending with us */
112	TAILQ_HEAD(,umtx_pi)	uq_pi_contested;
113
114	/* Inherited priority from PP mutex */
115	u_char			uq_inherited_pri;
116
117	/* Spare queue ready to be reused */
118	struct umtxq_queue	*uq_spare_queue;
119
120	/* The queue we on */
121	struct umtxq_queue	*uq_cur_queue;
122};
123
124TAILQ_HEAD(umtxq_head, umtx_q);
125
126/* Per-key wait-queue */
127struct umtxq_queue {
128	struct umtxq_head	head;
129	struct umtx_key		key;
130	LIST_ENTRY(umtxq_queue)	link;
131	int			length;
132};
133
134LIST_HEAD(umtxq_list, umtxq_queue);
135
136/* Userland lock object's wait-queue chain */
137struct umtxq_chain {
138	/* Lock for this chain. */
139	struct mtx		uc_lock;
140
141	/* List of sleep queues. */
142	struct umtxq_list	uc_queue[2];
143#define UMTX_SHARED_QUEUE	0
144#define UMTX_EXCLUSIVE_QUEUE	1
145
146	LIST_HEAD(, umtxq_queue) uc_spare_queue;
147
148	/* Busy flag */
149	char			uc_busy;
150
151	/* Chain lock waiters */
152	int			uc_waiters;
153
154	/* All PI in the list */
155	TAILQ_HEAD(,umtx_pi)	uc_pi_list;
156
157};
158
159#define	UMTXQ_LOCKED_ASSERT(uc)		mtx_assert(&(uc)->uc_lock, MA_OWNED)
160#define	UMTXQ_BUSY_ASSERT(uc)	KASSERT(&(uc)->uc_busy, ("umtx chain is not busy"))
161
162/*
163 * Don't propagate time-sharing priority, there is a security reason,
164 * a user can simply introduce PI-mutex, let thread A lock the mutex,
165 * and let another thread B block on the mutex, because B is
166 * sleeping, its priority will be boosted, this causes A's priority to
167 * be boosted via priority propagating too and will never be lowered even
168 * if it is using 100%CPU, this is unfair to other processes.
169 */
170
171#define UPRI(td)	(((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
172			  (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
173			 PRI_MAX_TIMESHARE : (td)->td_user_pri)
174
175#define	GOLDEN_RATIO_PRIME	2654404609U
176#define	UMTX_CHAINS		512
177#define	UMTX_SHIFTS		(__WORD_BIT - 9)
178
179#define	GET_SHARE(flags)	\
180    (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
181
182#define BUSY_SPINS		200
183
184static uma_zone_t		umtx_pi_zone;
185static struct umtxq_chain	umtxq_chains[2][UMTX_CHAINS];
186static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
187static int			umtx_pi_allocated;
188
189static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
190SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
191    &umtx_pi_allocated, 0, "Allocated umtx_pi");
192
193static void umtxq_sysinit(void *);
194static void umtxq_hash(struct umtx_key *key);
195static struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
196static void umtxq_lock(struct umtx_key *key);
197static void umtxq_unlock(struct umtx_key *key);
198static void umtxq_busy(struct umtx_key *key);
199static void umtxq_unbusy(struct umtx_key *key);
200static void umtxq_insert_queue(struct umtx_q *uq, int q);
201static void umtxq_remove_queue(struct umtx_q *uq, int q);
202static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
203static int umtxq_count(struct umtx_key *key);
204static struct umtx_pi *umtx_pi_alloc(int);
205static void umtx_pi_free(struct umtx_pi *pi);
206static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
207static void umtx_thread_cleanup(struct thread *td);
208static void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
209	struct image_params *imgp __unused);
210SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
211
212#define umtxq_signal(key, nwake)	umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
213#define umtxq_insert(uq)	umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
214#define umtxq_remove(uq)	umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
215
216static struct mtx umtx_lock;
217
218static void
219umtxq_sysinit(void *arg __unused)
220{
221	int i, j;
222
223	umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
224		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
225	for (i = 0; i < 2; ++i) {
226		for (j = 0; j < UMTX_CHAINS; ++j) {
227			mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
228				 MTX_DEF | MTX_DUPOK);
229			LIST_INIT(&umtxq_chains[i][j].uc_queue[0]);
230			LIST_INIT(&umtxq_chains[i][j].uc_queue[1]);
231			LIST_INIT(&umtxq_chains[i][j].uc_spare_queue);
232			TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
233			umtxq_chains[i][j].uc_busy = 0;
234			umtxq_chains[i][j].uc_waiters = 0;
235		}
236	}
237	mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN);
238	EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
239	    EVENTHANDLER_PRI_ANY);
240}
241
242struct umtx_q *
243umtxq_alloc(void)
244{
245	struct umtx_q *uq;
246
247	uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
248	uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, M_WAITOK | M_ZERO);
249	TAILQ_INIT(&uq->uq_spare_queue->head);
250	TAILQ_INIT(&uq->uq_pi_contested);
251	uq->uq_inherited_pri = PRI_MAX;
252	return (uq);
253}
254
255void
256umtxq_free(struct umtx_q *uq)
257{
258	MPASS(uq->uq_spare_queue != NULL);
259	free(uq->uq_spare_queue, M_UMTX);
260	free(uq, M_UMTX);
261}
262
263static inline void
264umtxq_hash(struct umtx_key *key)
265{
266	unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
267	key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
268}
269
270static inline struct umtxq_chain *
271umtxq_getchain(struct umtx_key *key)
272{
273	if (key->type <= TYPE_SEM)
274		return (&umtxq_chains[1][key->hash]);
275	return (&umtxq_chains[0][key->hash]);
276}
277
278/*
279 * Lock a chain.
280 */
281static inline void
282umtxq_lock(struct umtx_key *key)
283{
284	struct umtxq_chain *uc;
285
286	uc = umtxq_getchain(key);
287	mtx_lock(&uc->uc_lock);
288}
289
290/*
291 * Unlock a chain.
292 */
293static inline void
294umtxq_unlock(struct umtx_key *key)
295{
296	struct umtxq_chain *uc;
297
298	uc = umtxq_getchain(key);
299	mtx_unlock(&uc->uc_lock);
300}
301
302/*
303 * Set chain to busy state when following operation
304 * may be blocked (kernel mutex can not be used).
305 */
306static inline void
307umtxq_busy(struct umtx_key *key)
308{
309	struct umtxq_chain *uc;
310
311	uc = umtxq_getchain(key);
312	mtx_assert(&uc->uc_lock, MA_OWNED);
313	if (uc->uc_busy) {
314#ifdef SMP
315		if (smp_cpus > 1) {
316			int count = BUSY_SPINS;
317			if (count > 0) {
318				umtxq_unlock(key);
319				while (uc->uc_busy && --count > 0)
320					cpu_spinwait();
321				umtxq_lock(key);
322			}
323		}
324#endif
325		while (uc->uc_busy) {
326			uc->uc_waiters++;
327			msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
328			uc->uc_waiters--;
329		}
330	}
331	uc->uc_busy = 1;
332}
333
334/*
335 * Unbusy a chain.
336 */
337static inline void
338umtxq_unbusy(struct umtx_key *key)
339{
340	struct umtxq_chain *uc;
341
342	uc = umtxq_getchain(key);
343	mtx_assert(&uc->uc_lock, MA_OWNED);
344	KASSERT(uc->uc_busy != 0, ("not busy"));
345	uc->uc_busy = 0;
346	if (uc->uc_waiters)
347		wakeup_one(uc);
348}
349
350static struct umtxq_queue *
351umtxq_queue_lookup(struct umtx_key *key, int q)
352{
353	struct umtxq_queue *uh;
354	struct umtxq_chain *uc;
355
356	uc = umtxq_getchain(key);
357	UMTXQ_LOCKED_ASSERT(uc);
358	LIST_FOREACH(uh, &uc->uc_queue[q], link) {
359		if (umtx_key_match(&uh->key, key))
360			return (uh);
361	}
362
363	return (NULL);
364}
365
366static inline void
367umtxq_insert_queue(struct umtx_q *uq, int q)
368{
369	struct umtxq_queue *uh;
370	struct umtxq_chain *uc;
371
372	uc = umtxq_getchain(&uq->uq_key);
373	UMTXQ_LOCKED_ASSERT(uc);
374	KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue"));
375	uh = umtxq_queue_lookup(&uq->uq_key, q);
376	if (uh != NULL) {
377		LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link);
378	} else {
379		uh = uq->uq_spare_queue;
380		uh->key = uq->uq_key;
381		LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link);
382	}
383	uq->uq_spare_queue = NULL;
384
385	TAILQ_INSERT_TAIL(&uh->head, uq, uq_link);
386	uh->length++;
387	uq->uq_flags |= UQF_UMTXQ;
388	uq->uq_cur_queue = uh;
389	return;
390}
391
392static inline void
393umtxq_remove_queue(struct umtx_q *uq, int q)
394{
395	struct umtxq_chain *uc;
396	struct umtxq_queue *uh;
397
398	uc = umtxq_getchain(&uq->uq_key);
399	UMTXQ_LOCKED_ASSERT(uc);
400	if (uq->uq_flags & UQF_UMTXQ) {
401		uh = uq->uq_cur_queue;
402		TAILQ_REMOVE(&uh->head, uq, uq_link);
403		uh->length--;
404		uq->uq_flags &= ~UQF_UMTXQ;
405		if (TAILQ_EMPTY(&uh->head)) {
406			KASSERT(uh->length == 0,
407			    ("inconsistent umtxq_queue length"));
408			LIST_REMOVE(uh, link);
409		} else {
410			uh = LIST_FIRST(&uc->uc_spare_queue);
411			KASSERT(uh != NULL, ("uc_spare_queue is empty"));
412			LIST_REMOVE(uh, link);
413		}
414		uq->uq_spare_queue = uh;
415		uq->uq_cur_queue = NULL;
416	}
417}
418
419/*
420 * Check if there are multiple waiters
421 */
422static int
423umtxq_count(struct umtx_key *key)
424{
425	struct umtxq_chain *uc;
426	struct umtxq_queue *uh;
427
428	uc = umtxq_getchain(key);
429	UMTXQ_LOCKED_ASSERT(uc);
430	uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
431	if (uh != NULL)
432		return (uh->length);
433	return (0);
434}
435
436/*
437 * Check if there are multiple PI waiters and returns first
438 * waiter.
439 */
440static int
441umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
442{
443	struct umtxq_chain *uc;
444	struct umtxq_queue *uh;
445
446	*first = NULL;
447	uc = umtxq_getchain(key);
448	UMTXQ_LOCKED_ASSERT(uc);
449	uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
450	if (uh != NULL) {
451		*first = TAILQ_FIRST(&uh->head);
452		return (uh->length);
453	}
454	return (0);
455}
456
457/*
458 * Wake up threads waiting on an userland object.
459 */
460
461static int
462umtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
463{
464	struct umtxq_chain *uc;
465	struct umtxq_queue *uh;
466	struct umtx_q *uq;
467	int ret;
468
469	ret = 0;
470	uc = umtxq_getchain(key);
471	UMTXQ_LOCKED_ASSERT(uc);
472	uh = umtxq_queue_lookup(key, q);
473	if (uh != NULL) {
474		while ((uq = TAILQ_FIRST(&uh->head)) != NULL) {
475			umtxq_remove_queue(uq, q);
476			wakeup(uq);
477			if (++ret >= n_wake)
478				return (ret);
479		}
480	}
481	return (ret);
482}
483
484
485/*
486 * Wake up specified thread.
487 */
488static inline void
489umtxq_signal_thread(struct umtx_q *uq)
490{
491	struct umtxq_chain *uc;
492
493	uc = umtxq_getchain(&uq->uq_key);
494	UMTXQ_LOCKED_ASSERT(uc);
495	umtxq_remove(uq);
496	wakeup(uq);
497}
498
499/*
500 * Put thread into sleep state, before sleeping, check if
501 * thread was removed from umtx queue.
502 */
503static inline int
504umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
505{
506	struct umtxq_chain *uc;
507	int error;
508
509	uc = umtxq_getchain(&uq->uq_key);
510	UMTXQ_LOCKED_ASSERT(uc);
511	if (!(uq->uq_flags & UQF_UMTXQ))
512		return (0);
513	error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
514	if (error == EWOULDBLOCK)
515		error = ETIMEDOUT;
516	return (error);
517}
518
519/*
520 * Convert userspace address into unique logical address.
521 */
522int
523umtx_key_get(void *addr, int type, int share, struct umtx_key *key)
524{
525	struct thread *td = curthread;
526	vm_map_t map;
527	vm_map_entry_t entry;
528	vm_pindex_t pindex;
529	vm_prot_t prot;
530	boolean_t wired;
531
532	key->type = type;
533	if (share == THREAD_SHARE) {
534		key->shared = 0;
535		key->info.private.vs = td->td_proc->p_vmspace;
536		key->info.private.addr = (uintptr_t)addr;
537	} else {
538		MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
539		map = &td->td_proc->p_vmspace->vm_map;
540		if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
541		    &entry, &key->info.shared.object, &pindex, &prot,
542		    &wired) != KERN_SUCCESS) {
543			return EFAULT;
544		}
545
546		if ((share == PROCESS_SHARE) ||
547		    (share == AUTO_SHARE &&
548		     VM_INHERIT_SHARE == entry->inheritance)) {
549			key->shared = 1;
550			key->info.shared.offset = entry->offset + entry->start -
551				(vm_offset_t)addr;
552			vm_object_reference(key->info.shared.object);
553		} else {
554			key->shared = 0;
555			key->info.private.vs = td->td_proc->p_vmspace;
556			key->info.private.addr = (uintptr_t)addr;
557		}
558		vm_map_lookup_done(map, entry);
559	}
560
561	umtxq_hash(key);
562	return (0);
563}
564
565/*
566 * Release key.
567 */
568void
569umtx_key_release(struct umtx_key *key)
570{
571	if (key->shared)
572		vm_object_deallocate(key->info.shared.object);
573}
574
575/*
576 * Lock a umtx object.
577 */
578static int
579_do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo)
580{
581	struct umtx_q *uq;
582	u_long owner;
583	u_long old;
584	int error = 0;
585
586	uq = td->td_umtxq;
587
588	/*
589	 * Care must be exercised when dealing with umtx structure. It
590	 * can fault on any access.
591	 */
592	for (;;) {
593		/*
594		 * Try the uncontested case.  This should be done in userland.
595		 */
596		owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
597
598		/* The acquire succeeded. */
599		if (owner == UMTX_UNOWNED)
600			return (0);
601
602		/* The address was invalid. */
603		if (owner == -1)
604			return (EFAULT);
605
606		/* If no one owns it but it is contested try to acquire it. */
607		if (owner == UMTX_CONTESTED) {
608			owner = casuword(&umtx->u_owner,
609			    UMTX_CONTESTED, id | UMTX_CONTESTED);
610
611			if (owner == UMTX_CONTESTED)
612				return (0);
613
614			/* The address was invalid. */
615			if (owner == -1)
616				return (EFAULT);
617
618			/* If this failed the lock has changed, restart. */
619			continue;
620		}
621
622		/*
623		 * If we caught a signal, we have retried and now
624		 * exit immediately.
625		 */
626		if (error != 0)
627			return (error);
628
629		if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
630			AUTO_SHARE, &uq->uq_key)) != 0)
631			return (error);
632
633		umtxq_lock(&uq->uq_key);
634		umtxq_busy(&uq->uq_key);
635		umtxq_insert(uq);
636		umtxq_unbusy(&uq->uq_key);
637		umtxq_unlock(&uq->uq_key);
638
639		/*
640		 * Set the contested bit so that a release in user space
641		 * knows to use the system call for unlock.  If this fails
642		 * either some one else has acquired the lock or it has been
643		 * released.
644		 */
645		old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
646
647		/* The address was invalid. */
648		if (old == -1) {
649			umtxq_lock(&uq->uq_key);
650			umtxq_remove(uq);
651			umtxq_unlock(&uq->uq_key);
652			umtx_key_release(&uq->uq_key);
653			return (EFAULT);
654		}
655
656		/*
657		 * We set the contested bit, sleep. Otherwise the lock changed
658		 * and we need to retry or we lost a race to the thread
659		 * unlocking the umtx.
660		 */
661		umtxq_lock(&uq->uq_key);
662		if (old == owner)
663			error = umtxq_sleep(uq, "umtx", timo);
664		umtxq_remove(uq);
665		umtxq_unlock(&uq->uq_key);
666		umtx_key_release(&uq->uq_key);
667	}
668
669	return (0);
670}
671
672/*
673 * Lock a umtx object.
674 */
675static int
676do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
677	struct timespec *timeout)
678{
679	struct timespec ts, ts2, ts3;
680	struct timeval tv;
681	int error;
682
683	if (timeout == NULL) {
684		error = _do_lock_umtx(td, umtx, id, 0);
685		/* Mutex locking is restarted if it is interrupted. */
686		if (error == EINTR)
687			error = ERESTART;
688	} else {
689		getnanouptime(&ts);
690		timespecadd(&ts, timeout);
691		TIMESPEC_TO_TIMEVAL(&tv, timeout);
692		for (;;) {
693			error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
694			if (error != ETIMEDOUT)
695				break;
696			getnanouptime(&ts2);
697			if (timespeccmp(&ts2, &ts, >=)) {
698				error = ETIMEDOUT;
699				break;
700			}
701			ts3 = ts;
702			timespecsub(&ts3, &ts2);
703			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
704		}
705		/* Timed-locking is not restarted. */
706		if (error == ERESTART)
707			error = EINTR;
708	}
709	return (error);
710}
711
712/*
713 * Unlock a umtx object.
714 */
715static int
716do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
717{
718	struct umtx_key key;
719	u_long owner;
720	u_long old;
721	int error;
722	int count;
723
724	/*
725	 * Make sure we own this mtx.
726	 */
727	owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
728	if (owner == -1)
729		return (EFAULT);
730
731	if ((owner & ~UMTX_CONTESTED) != id)
732		return (EPERM);
733
734	/* This should be done in userland */
735	if ((owner & UMTX_CONTESTED) == 0) {
736		old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
737		if (old == -1)
738			return (EFAULT);
739		if (old == owner)
740			return (0);
741		owner = old;
742	}
743
744	/* We should only ever be in here for contested locks */
745	if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
746		&key)) != 0)
747		return (error);
748
749	umtxq_lock(&key);
750	umtxq_busy(&key);
751	count = umtxq_count(&key);
752	umtxq_unlock(&key);
753
754	/*
755	 * When unlocking the umtx, it must be marked as unowned if
756	 * there is zero or one thread only waiting for it.
757	 * Otherwise, it must be marked as contested.
758	 */
759	old = casuword(&umtx->u_owner, owner,
760		count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
761	umtxq_lock(&key);
762	umtxq_signal(&key,1);
763	umtxq_unbusy(&key);
764	umtxq_unlock(&key);
765	umtx_key_release(&key);
766	if (old == -1)
767		return (EFAULT);
768	if (old != owner)
769		return (EINVAL);
770	return (0);
771}
772
773#ifdef COMPAT_FREEBSD32
774
775/*
776 * Lock a umtx object.
777 */
778static int
779_do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
780{
781	struct umtx_q *uq;
782	uint32_t owner;
783	uint32_t old;
784	int error = 0;
785
786	uq = td->td_umtxq;
787
788	/*
789	 * Care must be exercised when dealing with umtx structure. It
790	 * can fault on any access.
791	 */
792	for (;;) {
793		/*
794		 * Try the uncontested case.  This should be done in userland.
795		 */
796		owner = casuword32(m, UMUTEX_UNOWNED, id);
797
798		/* The acquire succeeded. */
799		if (owner == UMUTEX_UNOWNED)
800			return (0);
801
802		/* The address was invalid. */
803		if (owner == -1)
804			return (EFAULT);
805
806		/* If no one owns it but it is contested try to acquire it. */
807		if (owner == UMUTEX_CONTESTED) {
808			owner = casuword32(m,
809			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
810			if (owner == UMUTEX_CONTESTED)
811				return (0);
812
813			/* The address was invalid. */
814			if (owner == -1)
815				return (EFAULT);
816
817			/* If this failed the lock has changed, restart. */
818			continue;
819		}
820
821		/*
822		 * If we caught a signal, we have retried and now
823		 * exit immediately.
824		 */
825		if (error != 0)
826			return (error);
827
828		if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
829			AUTO_SHARE, &uq->uq_key)) != 0)
830			return (error);
831
832		umtxq_lock(&uq->uq_key);
833		umtxq_busy(&uq->uq_key);
834		umtxq_insert(uq);
835		umtxq_unbusy(&uq->uq_key);
836		umtxq_unlock(&uq->uq_key);
837
838		/*
839		 * Set the contested bit so that a release in user space
840		 * knows to use the system call for unlock.  If this fails
841		 * either some one else has acquired the lock or it has been
842		 * released.
843		 */
844		old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
845
846		/* The address was invalid. */
847		if (old == -1) {
848			umtxq_lock(&uq->uq_key);
849			umtxq_remove(uq);
850			umtxq_unlock(&uq->uq_key);
851			umtx_key_release(&uq->uq_key);
852			return (EFAULT);
853		}
854
855		/*
856		 * We set the contested bit, sleep. Otherwise the lock changed
857		 * and we need to retry or we lost a race to the thread
858		 * unlocking the umtx.
859		 */
860		umtxq_lock(&uq->uq_key);
861		if (old == owner)
862			error = umtxq_sleep(uq, "umtx", timo);
863		umtxq_remove(uq);
864		umtxq_unlock(&uq->uq_key);
865		umtx_key_release(&uq->uq_key);
866	}
867
868	return (0);
869}
870
871/*
872 * Lock a umtx object.
873 */
874static int
875do_lock_umtx32(struct thread *td, void *m, uint32_t id,
876	struct timespec *timeout)
877{
878	struct timespec ts, ts2, ts3;
879	struct timeval tv;
880	int error;
881
882	if (timeout == NULL) {
883		error = _do_lock_umtx32(td, m, id, 0);
884		/* Mutex locking is restarted if it is interrupted. */
885		if (error == EINTR)
886			error = ERESTART;
887	} else {
888		getnanouptime(&ts);
889		timespecadd(&ts, timeout);
890		TIMESPEC_TO_TIMEVAL(&tv, timeout);
891		for (;;) {
892			error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
893			if (error != ETIMEDOUT)
894				break;
895			getnanouptime(&ts2);
896			if (timespeccmp(&ts2, &ts, >=)) {
897				error = ETIMEDOUT;
898				break;
899			}
900			ts3 = ts;
901			timespecsub(&ts3, &ts2);
902			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
903		}
904		/* Timed-locking is not restarted. */
905		if (error == ERESTART)
906			error = EINTR;
907	}
908	return (error);
909}
910
911/*
912 * Unlock a umtx object.
913 */
914static int
915do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
916{
917	struct umtx_key key;
918	uint32_t owner;
919	uint32_t old;
920	int error;
921	int count;
922
923	/*
924	 * Make sure we own this mtx.
925	 */
926	owner = fuword32(m);
927	if (owner == -1)
928		return (EFAULT);
929
930	if ((owner & ~UMUTEX_CONTESTED) != id)
931		return (EPERM);
932
933	/* This should be done in userland */
934	if ((owner & UMUTEX_CONTESTED) == 0) {
935		old = casuword32(m, owner, UMUTEX_UNOWNED);
936		if (old == -1)
937			return (EFAULT);
938		if (old == owner)
939			return (0);
940		owner = old;
941	}
942
943	/* We should only ever be in here for contested locks */
944	if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
945		&key)) != 0)
946		return (error);
947
948	umtxq_lock(&key);
949	umtxq_busy(&key);
950	count = umtxq_count(&key);
951	umtxq_unlock(&key);
952
953	/*
954	 * When unlocking the umtx, it must be marked as unowned if
955	 * there is zero or one thread only waiting for it.
956	 * Otherwise, it must be marked as contested.
957	 */
958	old = casuword32(m, owner,
959		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
960	umtxq_lock(&key);
961	umtxq_signal(&key,1);
962	umtxq_unbusy(&key);
963	umtxq_unlock(&key);
964	umtx_key_release(&key);
965	if (old == -1)
966		return (EFAULT);
967	if (old != owner)
968		return (EINVAL);
969	return (0);
970}
971#endif
972
973static inline int
974tstohz(const struct timespec *tsp)
975{
976	struct timeval tv;
977
978	TIMESPEC_TO_TIMEVAL(&tv, tsp);
979	return tvtohz(&tv);
980}
981
982/*
983 * Fetch and compare value, sleep on the address if value is not changed.
984 */
985static int
986do_wait(struct thread *td, void *addr, u_long id,
987	struct timespec *timeout, int compat32, int is_private, uint32_t flags)
988{
989	struct umtx_q *uq;
990	struct timespec ets, cts, tts;
991	u_long tmp;
992	int clockid = (flags >> 16) & 0xFFFF;
993	int error = 0;
994
995	uq = td->td_umtxq;
996	if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
997		is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
998		return (error);
999
1000	umtxq_lock(&uq->uq_key);
1001	umtxq_insert(uq);
1002	umtxq_unlock(&uq->uq_key);
1003	if (compat32 == 0)
1004		tmp = fuword(addr);
1005        else
1006		tmp = (unsigned int)fuword32(addr);
1007	if (tmp != id) {
1008		umtxq_lock(&uq->uq_key);
1009		umtxq_remove(uq);
1010		umtxq_unlock(&uq->uq_key);
1011	} else if (timeout == NULL) {
1012		umtxq_lock(&uq->uq_key);
1013		error = umtxq_sleep(uq, "uwait", 0);
1014		umtxq_remove(uq);
1015		umtxq_unlock(&uq->uq_key);
1016	} else {
1017		kern_clock_gettime(td, clockid, &cts);
1018		if ((flags & UMTX_WAIT_ABSTIME) == 0) {
1019			ets = cts;
1020			timespecadd(&ets, timeout);
1021		} else {
1022			ets = *timeout;
1023		}
1024		umtxq_lock(&uq->uq_key);
1025		for (;;) {
1026			if (timespeccmp(&cts, &ets, >=)) {
1027				error = ETIMEDOUT;
1028				break;
1029			}
1030			tts = ets;
1031			timespecsub(&tts, &cts);
1032			error = umtxq_sleep(uq, "uwait", tstohz(&tts));
1033			if (!(uq->uq_flags & UQF_UMTXQ)) {
1034				error = 0;
1035				break;
1036			}
1037			if (error != ETIMEDOUT)
1038				break;
1039			umtxq_unlock(&uq->uq_key);
1040			kern_clock_gettime(td, clockid, &cts);
1041			umtxq_lock(&uq->uq_key);
1042		}
1043		umtxq_remove(uq);
1044		umtxq_unlock(&uq->uq_key);
1045	}
1046	umtx_key_release(&uq->uq_key);
1047	if (error == ERESTART)
1048		error = EINTR;
1049	return (error);
1050}
1051
1052/*
1053 * Wake up threads sleeping on the specified address.
1054 */
1055int
1056kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
1057{
1058	struct umtx_key key;
1059	int ret;
1060
1061	if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
1062		is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
1063		return (ret);
1064	umtxq_lock(&key);
1065	ret = umtxq_signal(&key, n_wake);
1066	umtxq_unlock(&key);
1067	umtx_key_release(&key);
1068	return (0);
1069}
1070
1071/*
1072 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1073 */
1074static int
1075_do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1076	int mode)
1077{
1078	struct umtx_q *uq;
1079	uint32_t owner, old, id;
1080	int error = 0;
1081
1082	id = td->td_tid;
1083	uq = td->td_umtxq;
1084
1085	/*
1086	 * Care must be exercised when dealing with umtx structure. It
1087	 * can fault on any access.
1088	 */
1089	for (;;) {
1090		owner = fuword32(__DEVOLATILE(void *, &m->m_owner));
1091		if (mode == _UMUTEX_WAIT) {
1092			if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED)
1093				return (0);
1094		} else {
1095			/*
1096			 * Try the uncontested case.  This should be done in userland.
1097			 */
1098			owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1099
1100			/* The acquire succeeded. */
1101			if (owner == UMUTEX_UNOWNED)
1102				return (0);
1103
1104			/* The address was invalid. */
1105			if (owner == -1)
1106				return (EFAULT);
1107
1108			/* If no one owns it but it is contested try to acquire it. */
1109			if (owner == UMUTEX_CONTESTED) {
1110				owner = casuword32(&m->m_owner,
1111				    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1112
1113				if (owner == UMUTEX_CONTESTED)
1114					return (0);
1115
1116				/* The address was invalid. */
1117				if (owner == -1)
1118					return (EFAULT);
1119
1120				/* If this failed the lock has changed, restart. */
1121				continue;
1122			}
1123		}
1124
1125		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1126		    (owner & ~UMUTEX_CONTESTED) == id)
1127			return (EDEADLK);
1128
1129		if (mode == _UMUTEX_TRY)
1130			return (EBUSY);
1131
1132		/*
1133		 * If we caught a signal, we have retried and now
1134		 * exit immediately.
1135		 */
1136		if (error != 0)
1137			return (error);
1138
1139		if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1140		    GET_SHARE(flags), &uq->uq_key)) != 0)
1141			return (error);
1142
1143		umtxq_lock(&uq->uq_key);
1144		umtxq_busy(&uq->uq_key);
1145		umtxq_insert(uq);
1146		umtxq_unlock(&uq->uq_key);
1147
1148		/*
1149		 * Set the contested bit so that a release in user space
1150		 * knows to use the system call for unlock.  If this fails
1151		 * either some one else has acquired the lock or it has been
1152		 * released.
1153		 */
1154		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1155
1156		/* The address was invalid. */
1157		if (old == -1) {
1158			umtxq_lock(&uq->uq_key);
1159			umtxq_remove(uq);
1160			umtxq_unbusy(&uq->uq_key);
1161			umtxq_unlock(&uq->uq_key);
1162			umtx_key_release(&uq->uq_key);
1163			return (EFAULT);
1164		}
1165
1166		/*
1167		 * We set the contested bit, sleep. Otherwise the lock changed
1168		 * and we need to retry or we lost a race to the thread
1169		 * unlocking the umtx.
1170		 */
1171		umtxq_lock(&uq->uq_key);
1172		umtxq_unbusy(&uq->uq_key);
1173		if (old == owner)
1174			error = umtxq_sleep(uq, "umtxn", timo);
1175		umtxq_remove(uq);
1176		umtxq_unlock(&uq->uq_key);
1177		umtx_key_release(&uq->uq_key);
1178	}
1179
1180	return (0);
1181}
1182
1183/*
1184 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1185 */
1186/*
1187 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1188 */
1189static int
1190do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
1191{
1192	struct umtx_key key;
1193	uint32_t owner, old, id;
1194	int error;
1195	int count;
1196
1197	id = td->td_tid;
1198	/*
1199	 * Make sure we own this mtx.
1200	 */
1201	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1202	if (owner == -1)
1203		return (EFAULT);
1204
1205	if ((owner & ~UMUTEX_CONTESTED) != id)
1206		return (EPERM);
1207
1208	if ((owner & UMUTEX_CONTESTED) == 0) {
1209		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1210		if (old == -1)
1211			return (EFAULT);
1212		if (old == owner)
1213			return (0);
1214		owner = old;
1215	}
1216
1217	/* We should only ever be in here for contested locks */
1218	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1219	    &key)) != 0)
1220		return (error);
1221
1222	umtxq_lock(&key);
1223	umtxq_busy(&key);
1224	count = umtxq_count(&key);
1225	umtxq_unlock(&key);
1226
1227	/*
1228	 * When unlocking the umtx, it must be marked as unowned if
1229	 * there is zero or one thread only waiting for it.
1230	 * Otherwise, it must be marked as contested.
1231	 */
1232	old = casuword32(&m->m_owner, owner,
1233		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1234	umtxq_lock(&key);
1235	umtxq_signal(&key,1);
1236	umtxq_unbusy(&key);
1237	umtxq_unlock(&key);
1238	umtx_key_release(&key);
1239	if (old == -1)
1240		return (EFAULT);
1241	if (old != owner)
1242		return (EINVAL);
1243	return (0);
1244}
1245
1246/*
1247 * Check if the mutex is available and wake up a waiter,
1248 * only for simple mutex.
1249 */
1250static int
1251do_wake_umutex(struct thread *td, struct umutex *m)
1252{
1253	struct umtx_key key;
1254	uint32_t owner;
1255	uint32_t flags;
1256	int error;
1257	int count;
1258
1259	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1260	if (owner == -1)
1261		return (EFAULT);
1262
1263	if ((owner & ~UMUTEX_CONTESTED) != 0)
1264		return (0);
1265
1266	flags = fuword32(&m->m_flags);
1267
1268	/* We should only ever be in here for contested locks */
1269	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1270	    &key)) != 0)
1271		return (error);
1272
1273	umtxq_lock(&key);
1274	umtxq_busy(&key);
1275	count = umtxq_count(&key);
1276	umtxq_unlock(&key);
1277
1278	if (count <= 1)
1279		owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED);
1280
1281	umtxq_lock(&key);
1282	if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
1283		umtxq_signal(&key, 1);
1284	umtxq_unbusy(&key);
1285	umtxq_unlock(&key);
1286	umtx_key_release(&key);
1287	return (0);
1288}
1289
1290static inline struct umtx_pi *
1291umtx_pi_alloc(int flags)
1292{
1293	struct umtx_pi *pi;
1294
1295	pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1296	TAILQ_INIT(&pi->pi_blocked);
1297	atomic_add_int(&umtx_pi_allocated, 1);
1298	return (pi);
1299}
1300
1301static inline void
1302umtx_pi_free(struct umtx_pi *pi)
1303{
1304	uma_zfree(umtx_pi_zone, pi);
1305	atomic_add_int(&umtx_pi_allocated, -1);
1306}
1307
1308/*
1309 * Adjust the thread's position on a pi_state after its priority has been
1310 * changed.
1311 */
1312static int
1313umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1314{
1315	struct umtx_q *uq, *uq1, *uq2;
1316	struct thread *td1;
1317
1318	mtx_assert(&umtx_lock, MA_OWNED);
1319	if (pi == NULL)
1320		return (0);
1321
1322	uq = td->td_umtxq;
1323
1324	/*
1325	 * Check if the thread needs to be moved on the blocked chain.
1326	 * It needs to be moved if either its priority is lower than
1327	 * the previous thread or higher than the next thread.
1328	 */
1329	uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1330	uq2 = TAILQ_NEXT(uq, uq_lockq);
1331	if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1332	    (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1333		/*
1334		 * Remove thread from blocked chain and determine where
1335		 * it should be moved to.
1336		 */
1337		TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1338		TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1339			td1 = uq1->uq_thread;
1340			MPASS(td1->td_proc->p_magic == P_MAGIC);
1341			if (UPRI(td1) > UPRI(td))
1342				break;
1343		}
1344
1345		if (uq1 == NULL)
1346			TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1347		else
1348			TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1349	}
1350	return (1);
1351}
1352
1353/*
1354 * Propagate priority when a thread is blocked on POSIX
1355 * PI mutex.
1356 */
1357static void
1358umtx_propagate_priority(struct thread *td)
1359{
1360	struct umtx_q *uq;
1361	struct umtx_pi *pi;
1362	int pri;
1363
1364	mtx_assert(&umtx_lock, MA_OWNED);
1365	pri = UPRI(td);
1366	uq = td->td_umtxq;
1367	pi = uq->uq_pi_blocked;
1368	if (pi == NULL)
1369		return;
1370
1371	for (;;) {
1372		td = pi->pi_owner;
1373		if (td == NULL || td == curthread)
1374			return;
1375
1376		MPASS(td->td_proc != NULL);
1377		MPASS(td->td_proc->p_magic == P_MAGIC);
1378
1379		thread_lock(td);
1380		if (td->td_lend_user_pri > pri)
1381			sched_lend_user_prio(td, pri);
1382		else {
1383			thread_unlock(td);
1384			break;
1385		}
1386		thread_unlock(td);
1387
1388		/*
1389		 * Pick up the lock that td is blocked on.
1390		 */
1391		uq = td->td_umtxq;
1392		pi = uq->uq_pi_blocked;
1393		if (pi == NULL)
1394			break;
1395		/* Resort td on the list if needed. */
1396		umtx_pi_adjust_thread(pi, td);
1397	}
1398}
1399
1400/*
1401 * Unpropagate priority for a PI mutex when a thread blocked on
1402 * it is interrupted by signal or resumed by others.
1403 */
1404static void
1405umtx_repropagate_priority(struct umtx_pi *pi)
1406{
1407	struct umtx_q *uq, *uq_owner;
1408	struct umtx_pi *pi2;
1409	int pri;
1410
1411	mtx_assert(&umtx_lock, MA_OWNED);
1412
1413	while (pi != NULL && pi->pi_owner != NULL) {
1414		pri = PRI_MAX;
1415		uq_owner = pi->pi_owner->td_umtxq;
1416
1417		TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1418			uq = TAILQ_FIRST(&pi2->pi_blocked);
1419			if (uq != NULL) {
1420				if (pri > UPRI(uq->uq_thread))
1421					pri = UPRI(uq->uq_thread);
1422			}
1423		}
1424
1425		if (pri > uq_owner->uq_inherited_pri)
1426			pri = uq_owner->uq_inherited_pri;
1427		thread_lock(pi->pi_owner);
1428		sched_lend_user_prio(pi->pi_owner, pri);
1429		thread_unlock(pi->pi_owner);
1430		if ((pi = uq_owner->uq_pi_blocked) != NULL)
1431			umtx_pi_adjust_thread(pi, uq_owner->uq_thread);
1432	}
1433}
1434
1435/*
1436 * Insert a PI mutex into owned list.
1437 */
1438static void
1439umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1440{
1441	struct umtx_q *uq_owner;
1442
1443	uq_owner = owner->td_umtxq;
1444	mtx_assert(&umtx_lock, MA_OWNED);
1445	if (pi->pi_owner != NULL)
1446		panic("pi_ower != NULL");
1447	pi->pi_owner = owner;
1448	TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1449}
1450
1451/*
1452 * Claim ownership of a PI mutex.
1453 */
1454static int
1455umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1456{
1457	struct umtx_q *uq, *uq_owner;
1458
1459	uq_owner = owner->td_umtxq;
1460	mtx_lock_spin(&umtx_lock);
1461	if (pi->pi_owner == owner) {
1462		mtx_unlock_spin(&umtx_lock);
1463		return (0);
1464	}
1465
1466	if (pi->pi_owner != NULL) {
1467		/*
1468		 * userland may have already messed the mutex, sigh.
1469		 */
1470		mtx_unlock_spin(&umtx_lock);
1471		return (EPERM);
1472	}
1473	umtx_pi_setowner(pi, owner);
1474	uq = TAILQ_FIRST(&pi->pi_blocked);
1475	if (uq != NULL) {
1476		int pri;
1477
1478		pri = UPRI(uq->uq_thread);
1479		thread_lock(owner);
1480		if (pri < UPRI(owner))
1481			sched_lend_user_prio(owner, pri);
1482		thread_unlock(owner);
1483	}
1484	mtx_unlock_spin(&umtx_lock);
1485	return (0);
1486}
1487
1488/*
1489 * Adjust a thread's order position in its blocked PI mutex,
1490 * this may result new priority propagating process.
1491 */
1492void
1493umtx_pi_adjust(struct thread *td, u_char oldpri)
1494{
1495	struct umtx_q *uq;
1496	struct umtx_pi *pi;
1497
1498	uq = td->td_umtxq;
1499	mtx_lock_spin(&umtx_lock);
1500	/*
1501	 * Pick up the lock that td is blocked on.
1502	 */
1503	pi = uq->uq_pi_blocked;
1504	if (pi != NULL) {
1505		umtx_pi_adjust_thread(pi, td);
1506		umtx_repropagate_priority(pi);
1507	}
1508	mtx_unlock_spin(&umtx_lock);
1509}
1510
1511/*
1512 * Sleep on a PI mutex.
1513 */
1514static int
1515umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1516	uint32_t owner, const char *wmesg, int timo)
1517{
1518	struct umtxq_chain *uc;
1519	struct thread *td, *td1;
1520	struct umtx_q *uq1;
1521	int pri;
1522	int error = 0;
1523
1524	td = uq->uq_thread;
1525	KASSERT(td == curthread, ("inconsistent uq_thread"));
1526	uc = umtxq_getchain(&uq->uq_key);
1527	UMTXQ_LOCKED_ASSERT(uc);
1528	UMTXQ_BUSY_ASSERT(uc);
1529	umtxq_insert(uq);
1530	mtx_lock_spin(&umtx_lock);
1531	if (pi->pi_owner == NULL) {
1532		mtx_unlock_spin(&umtx_lock);
1533		/* XXX Only look up thread in current process. */
1534		td1 = tdfind(owner, curproc->p_pid);
1535		mtx_lock_spin(&umtx_lock);
1536		if (td1 != NULL) {
1537			if (pi->pi_owner == NULL)
1538				umtx_pi_setowner(pi, td1);
1539			PROC_UNLOCK(td1->td_proc);
1540		}
1541	}
1542
1543	TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1544		pri = UPRI(uq1->uq_thread);
1545		if (pri > UPRI(td))
1546			break;
1547	}
1548
1549	if (uq1 != NULL)
1550		TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1551	else
1552		TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1553
1554	uq->uq_pi_blocked = pi;
1555	thread_lock(td);
1556	td->td_flags |= TDF_UPIBLOCKED;
1557	thread_unlock(td);
1558	umtx_propagate_priority(td);
1559	mtx_unlock_spin(&umtx_lock);
1560	umtxq_unbusy(&uq->uq_key);
1561
1562	if (uq->uq_flags & UQF_UMTXQ) {
1563		error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
1564		if (error == EWOULDBLOCK)
1565			error = ETIMEDOUT;
1566		if (uq->uq_flags & UQF_UMTXQ) {
1567			umtxq_remove(uq);
1568		}
1569	}
1570	mtx_lock_spin(&umtx_lock);
1571	uq->uq_pi_blocked = NULL;
1572	thread_lock(td);
1573	td->td_flags &= ~TDF_UPIBLOCKED;
1574	thread_unlock(td);
1575	TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1576	umtx_repropagate_priority(pi);
1577	mtx_unlock_spin(&umtx_lock);
1578	umtxq_unlock(&uq->uq_key);
1579
1580	return (error);
1581}
1582
1583/*
1584 * Add reference count for a PI mutex.
1585 */
1586static void
1587umtx_pi_ref(struct umtx_pi *pi)
1588{
1589	struct umtxq_chain *uc;
1590
1591	uc = umtxq_getchain(&pi->pi_key);
1592	UMTXQ_LOCKED_ASSERT(uc);
1593	pi->pi_refcount++;
1594}
1595
1596/*
1597 * Decrease reference count for a PI mutex, if the counter
1598 * is decreased to zero, its memory space is freed.
1599 */
1600static void
1601umtx_pi_unref(struct umtx_pi *pi)
1602{
1603	struct umtxq_chain *uc;
1604
1605	uc = umtxq_getchain(&pi->pi_key);
1606	UMTXQ_LOCKED_ASSERT(uc);
1607	KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1608	if (--pi->pi_refcount == 0) {
1609		mtx_lock_spin(&umtx_lock);
1610		if (pi->pi_owner != NULL) {
1611			TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
1612				pi, pi_link);
1613			pi->pi_owner = NULL;
1614		}
1615		KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1616			("blocked queue not empty"));
1617		mtx_unlock_spin(&umtx_lock);
1618		TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1619		umtx_pi_free(pi);
1620	}
1621}
1622
1623/*
1624 * Find a PI mutex in hash table.
1625 */
1626static struct umtx_pi *
1627umtx_pi_lookup(struct umtx_key *key)
1628{
1629	struct umtxq_chain *uc;
1630	struct umtx_pi *pi;
1631
1632	uc = umtxq_getchain(key);
1633	UMTXQ_LOCKED_ASSERT(uc);
1634
1635	TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1636		if (umtx_key_match(&pi->pi_key, key)) {
1637			return (pi);
1638		}
1639	}
1640	return (NULL);
1641}
1642
1643/*
1644 * Insert a PI mutex into hash table.
1645 */
1646static inline void
1647umtx_pi_insert(struct umtx_pi *pi)
1648{
1649	struct umtxq_chain *uc;
1650
1651	uc = umtxq_getchain(&pi->pi_key);
1652	UMTXQ_LOCKED_ASSERT(uc);
1653	TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1654}
1655
1656/*
1657 * Lock a PI mutex.
1658 */
1659static int
1660_do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1661	int try)
1662{
1663	struct umtx_q *uq;
1664	struct umtx_pi *pi, *new_pi;
1665	uint32_t id, owner, old;
1666	int error;
1667
1668	id = td->td_tid;
1669	uq = td->td_umtxq;
1670
1671	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1672	    &uq->uq_key)) != 0)
1673		return (error);
1674	umtxq_lock(&uq->uq_key);
1675	pi = umtx_pi_lookup(&uq->uq_key);
1676	if (pi == NULL) {
1677		new_pi = umtx_pi_alloc(M_NOWAIT);
1678		if (new_pi == NULL) {
1679			umtxq_unlock(&uq->uq_key);
1680			new_pi = umtx_pi_alloc(M_WAITOK);
1681			umtxq_lock(&uq->uq_key);
1682			pi = umtx_pi_lookup(&uq->uq_key);
1683			if (pi != NULL) {
1684				umtx_pi_free(new_pi);
1685				new_pi = NULL;
1686			}
1687		}
1688		if (new_pi != NULL) {
1689			new_pi->pi_key = uq->uq_key;
1690			umtx_pi_insert(new_pi);
1691			pi = new_pi;
1692		}
1693	}
1694	umtx_pi_ref(pi);
1695	umtxq_unlock(&uq->uq_key);
1696
1697	/*
1698	 * Care must be exercised when dealing with umtx structure.  It
1699	 * can fault on any access.
1700	 */
1701	for (;;) {
1702		/*
1703		 * Try the uncontested case.  This should be done in userland.
1704		 */
1705		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1706
1707		/* The acquire succeeded. */
1708		if (owner == UMUTEX_UNOWNED) {
1709			error = 0;
1710			break;
1711		}
1712
1713		/* The address was invalid. */
1714		if (owner == -1) {
1715			error = EFAULT;
1716			break;
1717		}
1718
1719		/* If no one owns it but it is contested try to acquire it. */
1720		if (owner == UMUTEX_CONTESTED) {
1721			owner = casuword32(&m->m_owner,
1722			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1723
1724			if (owner == UMUTEX_CONTESTED) {
1725				umtxq_lock(&uq->uq_key);
1726				umtxq_busy(&uq->uq_key);
1727				error = umtx_pi_claim(pi, td);
1728				umtxq_unbusy(&uq->uq_key);
1729				umtxq_unlock(&uq->uq_key);
1730				break;
1731			}
1732
1733			/* The address was invalid. */
1734			if (owner == -1) {
1735				error = EFAULT;
1736				break;
1737			}
1738
1739			/* If this failed the lock has changed, restart. */
1740			continue;
1741		}
1742
1743		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1744		    (owner & ~UMUTEX_CONTESTED) == id) {
1745			error = EDEADLK;
1746			break;
1747		}
1748
1749		if (try != 0) {
1750			error = EBUSY;
1751			break;
1752		}
1753
1754		/*
1755		 * If we caught a signal, we have retried and now
1756		 * exit immediately.
1757		 */
1758		if (error != 0)
1759			break;
1760
1761		umtxq_lock(&uq->uq_key);
1762		umtxq_busy(&uq->uq_key);
1763		umtxq_unlock(&uq->uq_key);
1764
1765		/*
1766		 * Set the contested bit so that a release in user space
1767		 * knows to use the system call for unlock.  If this fails
1768		 * either some one else has acquired the lock or it has been
1769		 * released.
1770		 */
1771		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1772
1773		/* The address was invalid. */
1774		if (old == -1) {
1775			umtxq_lock(&uq->uq_key);
1776			umtxq_unbusy(&uq->uq_key);
1777			umtxq_unlock(&uq->uq_key);
1778			error = EFAULT;
1779			break;
1780		}
1781
1782		umtxq_lock(&uq->uq_key);
1783		/*
1784		 * We set the contested bit, sleep. Otherwise the lock changed
1785		 * and we need to retry or we lost a race to the thread
1786		 * unlocking the umtx.
1787		 */
1788		if (old == owner)
1789			error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
1790				 "umtxpi", timo);
1791		else {
1792			umtxq_unbusy(&uq->uq_key);
1793			umtxq_unlock(&uq->uq_key);
1794		}
1795	}
1796
1797	umtxq_lock(&uq->uq_key);
1798	umtx_pi_unref(pi);
1799	umtxq_unlock(&uq->uq_key);
1800
1801	umtx_key_release(&uq->uq_key);
1802	return (error);
1803}
1804
1805/*
1806 * Unlock a PI mutex.
1807 */
1808static int
1809do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
1810{
1811	struct umtx_key key;
1812	struct umtx_q *uq_first, *uq_first2, *uq_me;
1813	struct umtx_pi *pi, *pi2;
1814	uint32_t owner, old, id;
1815	int error;
1816	int count;
1817	int pri;
1818
1819	id = td->td_tid;
1820	/*
1821	 * Make sure we own this mtx.
1822	 */
1823	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1824	if (owner == -1)
1825		return (EFAULT);
1826
1827	if ((owner & ~UMUTEX_CONTESTED) != id)
1828		return (EPERM);
1829
1830	/* This should be done in userland */
1831	if ((owner & UMUTEX_CONTESTED) == 0) {
1832		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1833		if (old == -1)
1834			return (EFAULT);
1835		if (old == owner)
1836			return (0);
1837		owner = old;
1838	}
1839
1840	/* We should only ever be in here for contested locks */
1841	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1842	    &key)) != 0)
1843		return (error);
1844
1845	umtxq_lock(&key);
1846	umtxq_busy(&key);
1847	count = umtxq_count_pi(&key, &uq_first);
1848	if (uq_first != NULL) {
1849		mtx_lock_spin(&umtx_lock);
1850		pi = uq_first->uq_pi_blocked;
1851		KASSERT(pi != NULL, ("pi == NULL?"));
1852		if (pi->pi_owner != curthread) {
1853			mtx_unlock_spin(&umtx_lock);
1854			umtxq_unbusy(&key);
1855			umtxq_unlock(&key);
1856			umtx_key_release(&key);
1857			/* userland messed the mutex */
1858			return (EPERM);
1859		}
1860		uq_me = curthread->td_umtxq;
1861		pi->pi_owner = NULL;
1862		TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
1863		/* get highest priority thread which is still sleeping. */
1864		uq_first = TAILQ_FIRST(&pi->pi_blocked);
1865		while (uq_first != NULL &&
1866		       (uq_first->uq_flags & UQF_UMTXQ) == 0) {
1867			uq_first = TAILQ_NEXT(uq_first, uq_lockq);
1868		}
1869		pri = PRI_MAX;
1870		TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
1871			uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
1872			if (uq_first2 != NULL) {
1873				if (pri > UPRI(uq_first2->uq_thread))
1874					pri = UPRI(uq_first2->uq_thread);
1875			}
1876		}
1877		thread_lock(curthread);
1878		sched_lend_user_prio(curthread, pri);
1879		thread_unlock(curthread);
1880		mtx_unlock_spin(&umtx_lock);
1881		if (uq_first)
1882			umtxq_signal_thread(uq_first);
1883	}
1884	umtxq_unlock(&key);
1885
1886	/*
1887	 * When unlocking the umtx, it must be marked as unowned if
1888	 * there is zero or one thread only waiting for it.
1889	 * Otherwise, it must be marked as contested.
1890	 */
1891	old = casuword32(&m->m_owner, owner,
1892		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1893
1894	umtxq_lock(&key);
1895	umtxq_unbusy(&key);
1896	umtxq_unlock(&key);
1897	umtx_key_release(&key);
1898	if (old == -1)
1899		return (EFAULT);
1900	if (old != owner)
1901		return (EINVAL);
1902	return (0);
1903}
1904
1905/*
1906 * Lock a PP mutex.
1907 */
1908static int
1909_do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1910	int try)
1911{
1912	struct umtx_q *uq, *uq2;
1913	struct umtx_pi *pi;
1914	uint32_t ceiling;
1915	uint32_t owner, id;
1916	int error, pri, old_inherited_pri, su;
1917
1918	id = td->td_tid;
1919	uq = td->td_umtxq;
1920	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1921	    &uq->uq_key)) != 0)
1922		return (error);
1923	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
1924	for (;;) {
1925		old_inherited_pri = uq->uq_inherited_pri;
1926		umtxq_lock(&uq->uq_key);
1927		umtxq_busy(&uq->uq_key);
1928		umtxq_unlock(&uq->uq_key);
1929
1930		ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
1931		if (ceiling > RTP_PRIO_MAX) {
1932			error = EINVAL;
1933			goto out;
1934		}
1935
1936		mtx_lock_spin(&umtx_lock);
1937		if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
1938			mtx_unlock_spin(&umtx_lock);
1939			error = EINVAL;
1940			goto out;
1941		}
1942		if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
1943			uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
1944			thread_lock(td);
1945			if (uq->uq_inherited_pri < UPRI(td))
1946				sched_lend_user_prio(td, uq->uq_inherited_pri);
1947			thread_unlock(td);
1948		}
1949		mtx_unlock_spin(&umtx_lock);
1950
1951		owner = casuword32(&m->m_owner,
1952		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1953
1954		if (owner == UMUTEX_CONTESTED) {
1955			error = 0;
1956			break;
1957		}
1958
1959		/* The address was invalid. */
1960		if (owner == -1) {
1961			error = EFAULT;
1962			break;
1963		}
1964
1965		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1966		    (owner & ~UMUTEX_CONTESTED) == id) {
1967			error = EDEADLK;
1968			break;
1969		}
1970
1971		if (try != 0) {
1972			error = EBUSY;
1973			break;
1974		}
1975
1976		/*
1977		 * If we caught a signal, we have retried and now
1978		 * exit immediately.
1979		 */
1980		if (error != 0)
1981			break;
1982
1983		umtxq_lock(&uq->uq_key);
1984		umtxq_insert(uq);
1985		umtxq_unbusy(&uq->uq_key);
1986		error = umtxq_sleep(uq, "umtxpp", timo);
1987		umtxq_remove(uq);
1988		umtxq_unlock(&uq->uq_key);
1989
1990		mtx_lock_spin(&umtx_lock);
1991		uq->uq_inherited_pri = old_inherited_pri;
1992		pri = PRI_MAX;
1993		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1994			uq2 = TAILQ_FIRST(&pi->pi_blocked);
1995			if (uq2 != NULL) {
1996				if (pri > UPRI(uq2->uq_thread))
1997					pri = UPRI(uq2->uq_thread);
1998			}
1999		}
2000		if (pri > uq->uq_inherited_pri)
2001			pri = uq->uq_inherited_pri;
2002		thread_lock(td);
2003		sched_lend_user_prio(td, pri);
2004		thread_unlock(td);
2005		mtx_unlock_spin(&umtx_lock);
2006	}
2007
2008	if (error != 0) {
2009		mtx_lock_spin(&umtx_lock);
2010		uq->uq_inherited_pri = old_inherited_pri;
2011		pri = PRI_MAX;
2012		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2013			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2014			if (uq2 != NULL) {
2015				if (pri > UPRI(uq2->uq_thread))
2016					pri = UPRI(uq2->uq_thread);
2017			}
2018		}
2019		if (pri > uq->uq_inherited_pri)
2020			pri = uq->uq_inherited_pri;
2021		thread_lock(td);
2022		sched_lend_user_prio(td, pri);
2023		thread_unlock(td);
2024		mtx_unlock_spin(&umtx_lock);
2025	}
2026
2027out:
2028	umtxq_lock(&uq->uq_key);
2029	umtxq_unbusy(&uq->uq_key);
2030	umtxq_unlock(&uq->uq_key);
2031	umtx_key_release(&uq->uq_key);
2032	return (error);
2033}
2034
2035/*
2036 * Unlock a PP mutex.
2037 */
2038static int
2039do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
2040{
2041	struct umtx_key key;
2042	struct umtx_q *uq, *uq2;
2043	struct umtx_pi *pi;
2044	uint32_t owner, id;
2045	uint32_t rceiling;
2046	int error, pri, new_inherited_pri, su;
2047
2048	id = td->td_tid;
2049	uq = td->td_umtxq;
2050	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2051
2052	/*
2053	 * Make sure we own this mtx.
2054	 */
2055	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
2056	if (owner == -1)
2057		return (EFAULT);
2058
2059	if ((owner & ~UMUTEX_CONTESTED) != id)
2060		return (EPERM);
2061
2062	error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
2063	if (error != 0)
2064		return (error);
2065
2066	if (rceiling == -1)
2067		new_inherited_pri = PRI_MAX;
2068	else {
2069		rceiling = RTP_PRIO_MAX - rceiling;
2070		if (rceiling > RTP_PRIO_MAX)
2071			return (EINVAL);
2072		new_inherited_pri = PRI_MIN_REALTIME + rceiling;
2073	}
2074
2075	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2076	    &key)) != 0)
2077		return (error);
2078	umtxq_lock(&key);
2079	umtxq_busy(&key);
2080	umtxq_unlock(&key);
2081	/*
2082	 * For priority protected mutex, always set unlocked state
2083	 * to UMUTEX_CONTESTED, so that userland always enters kernel
2084	 * to lock the mutex, it is necessary because thread priority
2085	 * has to be adjusted for such mutex.
2086	 */
2087	error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2088		UMUTEX_CONTESTED);
2089
2090	umtxq_lock(&key);
2091	if (error == 0)
2092		umtxq_signal(&key, 1);
2093	umtxq_unbusy(&key);
2094	umtxq_unlock(&key);
2095
2096	if (error == -1)
2097		error = EFAULT;
2098	else {
2099		mtx_lock_spin(&umtx_lock);
2100		if (su != 0)
2101			uq->uq_inherited_pri = new_inherited_pri;
2102		pri = PRI_MAX;
2103		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2104			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2105			if (uq2 != NULL) {
2106				if (pri > UPRI(uq2->uq_thread))
2107					pri = UPRI(uq2->uq_thread);
2108			}
2109		}
2110		if (pri > uq->uq_inherited_pri)
2111			pri = uq->uq_inherited_pri;
2112		thread_lock(td);
2113		sched_lend_user_prio(td, pri);
2114		thread_unlock(td);
2115		mtx_unlock_spin(&umtx_lock);
2116	}
2117	umtx_key_release(&key);
2118	return (error);
2119}
2120
2121static int
2122do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2123	uint32_t *old_ceiling)
2124{
2125	struct umtx_q *uq;
2126	uint32_t save_ceiling;
2127	uint32_t owner, id;
2128	uint32_t flags;
2129	int error;
2130
2131	flags = fuword32(&m->m_flags);
2132	if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2133		return (EINVAL);
2134	if (ceiling > RTP_PRIO_MAX)
2135		return (EINVAL);
2136	id = td->td_tid;
2137	uq = td->td_umtxq;
2138	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2139	   &uq->uq_key)) != 0)
2140		return (error);
2141	for (;;) {
2142		umtxq_lock(&uq->uq_key);
2143		umtxq_busy(&uq->uq_key);
2144		umtxq_unlock(&uq->uq_key);
2145
2146		save_ceiling = fuword32(&m->m_ceilings[0]);
2147
2148		owner = casuword32(&m->m_owner,
2149		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2150
2151		if (owner == UMUTEX_CONTESTED) {
2152			suword32(&m->m_ceilings[0], ceiling);
2153			suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2154				UMUTEX_CONTESTED);
2155			error = 0;
2156			break;
2157		}
2158
2159		/* The address was invalid. */
2160		if (owner == -1) {
2161			error = EFAULT;
2162			break;
2163		}
2164
2165		if ((owner & ~UMUTEX_CONTESTED) == id) {
2166			suword32(&m->m_ceilings[0], ceiling);
2167			error = 0;
2168			break;
2169		}
2170
2171		/*
2172		 * If we caught a signal, we have retried and now
2173		 * exit immediately.
2174		 */
2175		if (error != 0)
2176			break;
2177
2178		/*
2179		 * We set the contested bit, sleep. Otherwise the lock changed
2180		 * and we need to retry or we lost a race to the thread
2181		 * unlocking the umtx.
2182		 */
2183		umtxq_lock(&uq->uq_key);
2184		umtxq_insert(uq);
2185		umtxq_unbusy(&uq->uq_key);
2186		error = umtxq_sleep(uq, "umtxpp", 0);
2187		umtxq_remove(uq);
2188		umtxq_unlock(&uq->uq_key);
2189	}
2190	umtxq_lock(&uq->uq_key);
2191	if (error == 0)
2192		umtxq_signal(&uq->uq_key, INT_MAX);
2193	umtxq_unbusy(&uq->uq_key);
2194	umtxq_unlock(&uq->uq_key);
2195	umtx_key_release(&uq->uq_key);
2196	if (error == 0 && old_ceiling != NULL)
2197		suword32(old_ceiling, save_ceiling);
2198	return (error);
2199}
2200
2201static int
2202_do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
2203	int mode)
2204{
2205	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2206	case 0:
2207		return (_do_lock_normal(td, m, flags, timo, mode));
2208	case UMUTEX_PRIO_INHERIT:
2209		return (_do_lock_pi(td, m, flags, timo, mode));
2210	case UMUTEX_PRIO_PROTECT:
2211		return (_do_lock_pp(td, m, flags, timo, mode));
2212	}
2213	return (EINVAL);
2214}
2215
2216/*
2217 * Lock a userland POSIX mutex.
2218 */
2219static int
2220do_lock_umutex(struct thread *td, struct umutex *m,
2221	struct timespec *timeout, int mode)
2222{
2223	struct timespec ts, ts2, ts3;
2224	struct timeval tv;
2225	uint32_t flags;
2226	int error;
2227
2228	flags = fuword32(&m->m_flags);
2229	if (flags == -1)
2230		return (EFAULT);
2231
2232	if (timeout == NULL) {
2233		error = _do_lock_umutex(td, m, flags, 0, mode);
2234		/* Mutex locking is restarted if it is interrupted. */
2235		if (error == EINTR && mode != _UMUTEX_WAIT)
2236			error = ERESTART;
2237	} else {
2238		getnanouptime(&ts);
2239		timespecadd(&ts, timeout);
2240		TIMESPEC_TO_TIMEVAL(&tv, timeout);
2241		for (;;) {
2242			error = _do_lock_umutex(td, m, flags, tvtohz(&tv), mode);
2243			if (error != ETIMEDOUT)
2244				break;
2245			getnanouptime(&ts2);
2246			if (timespeccmp(&ts2, &ts, >=)) {
2247				error = ETIMEDOUT;
2248				break;
2249			}
2250			ts3 = ts;
2251			timespecsub(&ts3, &ts2);
2252			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2253		}
2254		/* Timed-locking is not restarted. */
2255		if (error == ERESTART)
2256			error = EINTR;
2257	}
2258	return (error);
2259}
2260
2261/*
2262 * Unlock a userland POSIX mutex.
2263 */
2264static int
2265do_unlock_umutex(struct thread *td, struct umutex *m)
2266{
2267	uint32_t flags;
2268
2269	flags = fuword32(&m->m_flags);
2270	if (flags == -1)
2271		return (EFAULT);
2272
2273	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2274	case 0:
2275		return (do_unlock_normal(td, m, flags));
2276	case UMUTEX_PRIO_INHERIT:
2277		return (do_unlock_pi(td, m, flags));
2278	case UMUTEX_PRIO_PROTECT:
2279		return (do_unlock_pp(td, m, flags));
2280	}
2281
2282	return (EINVAL);
2283}
2284
2285static int
2286do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2287	struct timespec *timeout, u_long wflags)
2288{
2289	struct umtx_q *uq;
2290	struct timeval tv;
2291	struct timespec cts, ets, tts;
2292	uint32_t flags;
2293	uint32_t clockid;
2294	int error;
2295
2296	uq = td->td_umtxq;
2297	flags = fuword32(&cv->c_flags);
2298	error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2299	if (error != 0)
2300		return (error);
2301
2302	if ((wflags & CVWAIT_CLOCKID) != 0) {
2303		clockid = fuword32(&cv->c_clockid);
2304		if (clockid < CLOCK_REALTIME ||
2305		    clockid >= CLOCK_THREAD_CPUTIME_ID) {
2306			/* hmm, only HW clock id will work. */
2307			return (EINVAL);
2308		}
2309	} else {
2310		clockid = CLOCK_REALTIME;
2311	}
2312
2313	umtxq_lock(&uq->uq_key);
2314	umtxq_busy(&uq->uq_key);
2315	umtxq_insert(uq);
2316	umtxq_unlock(&uq->uq_key);
2317
2318	/*
2319	 * Set c_has_waiters to 1 before releasing user mutex, also
2320	 * don't modify cache line when unnecessary.
2321	 */
2322	if (fuword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters)) == 0)
2323		suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
2324
2325	umtxq_lock(&uq->uq_key);
2326	umtxq_unbusy(&uq->uq_key);
2327	umtxq_unlock(&uq->uq_key);
2328
2329	error = do_unlock_umutex(td, m);
2330
2331	umtxq_lock(&uq->uq_key);
2332	if (error == 0) {
2333		if (timeout == NULL) {
2334			error = umtxq_sleep(uq, "ucond", 0);
2335		} else {
2336			if ((wflags & CVWAIT_ABSTIME) == 0) {
2337				kern_clock_gettime(td, clockid, &ets);
2338				timespecadd(&ets, timeout);
2339				tts = *timeout;
2340			} else { /* absolute time */
2341				ets = *timeout;
2342				tts = *timeout;
2343				kern_clock_gettime(td, clockid, &cts);
2344				timespecsub(&tts, &cts);
2345			}
2346			TIMESPEC_TO_TIMEVAL(&tv, &tts);
2347			for (;;) {
2348				error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
2349				if (error != ETIMEDOUT)
2350					break;
2351				kern_clock_gettime(td, clockid, &cts);
2352				if (timespeccmp(&cts, &ets, >=)) {
2353					error = ETIMEDOUT;
2354					break;
2355				}
2356				tts = ets;
2357				timespecsub(&tts, &cts);
2358				TIMESPEC_TO_TIMEVAL(&tv, &tts);
2359			}
2360		}
2361	}
2362
2363	if ((uq->uq_flags & UQF_UMTXQ) == 0)
2364		error = 0;
2365	else {
2366		/*
2367		 * This must be timeout,interrupted by signal or
2368		 * surprious wakeup, clear c_has_waiter flag when
2369		 * necessary.
2370		 */
2371		umtxq_busy(&uq->uq_key);
2372		if ((uq->uq_flags & UQF_UMTXQ) != 0) {
2373			int oldlen = uq->uq_cur_queue->length;
2374			umtxq_remove(uq);
2375			if (oldlen == 1) {
2376				umtxq_unlock(&uq->uq_key);
2377				suword32(
2378				    __DEVOLATILE(uint32_t *,
2379					 &cv->c_has_waiters), 0);
2380				umtxq_lock(&uq->uq_key);
2381			}
2382		}
2383		umtxq_unbusy(&uq->uq_key);
2384		if (error == ERESTART)
2385			error = EINTR;
2386	}
2387
2388	umtxq_unlock(&uq->uq_key);
2389	umtx_key_release(&uq->uq_key);
2390	return (error);
2391}
2392
2393/*
2394 * Signal a userland condition variable.
2395 */
2396static int
2397do_cv_signal(struct thread *td, struct ucond *cv)
2398{
2399	struct umtx_key key;
2400	int error, cnt, nwake;
2401	uint32_t flags;
2402
2403	flags = fuword32(&cv->c_flags);
2404	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2405		return (error);
2406	umtxq_lock(&key);
2407	umtxq_busy(&key);
2408	cnt = umtxq_count(&key);
2409	nwake = umtxq_signal(&key, 1);
2410	if (cnt <= nwake) {
2411		umtxq_unlock(&key);
2412		error = suword32(
2413		    __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2414		umtxq_lock(&key);
2415	}
2416	umtxq_unbusy(&key);
2417	umtxq_unlock(&key);
2418	umtx_key_release(&key);
2419	return (error);
2420}
2421
2422static int
2423do_cv_broadcast(struct thread *td, struct ucond *cv)
2424{
2425	struct umtx_key key;
2426	int error;
2427	uint32_t flags;
2428
2429	flags = fuword32(&cv->c_flags);
2430	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2431		return (error);
2432
2433	umtxq_lock(&key);
2434	umtxq_busy(&key);
2435	umtxq_signal(&key, INT_MAX);
2436	umtxq_unlock(&key);
2437
2438	error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2439
2440	umtxq_lock(&key);
2441	umtxq_unbusy(&key);
2442	umtxq_unlock(&key);
2443
2444	umtx_key_release(&key);
2445	return (error);
2446}
2447
2448static int
2449do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, int timo)
2450{
2451	struct umtx_q *uq;
2452	uint32_t flags, wrflags;
2453	int32_t state, oldstate;
2454	int32_t blocked_readers;
2455	int error;
2456
2457	uq = td->td_umtxq;
2458	flags = fuword32(&rwlock->rw_flags);
2459	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2460	if (error != 0)
2461		return (error);
2462
2463	wrflags = URWLOCK_WRITE_OWNER;
2464	if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
2465		wrflags |= URWLOCK_WRITE_WAITERS;
2466
2467	for (;;) {
2468		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2469		/* try to lock it */
2470		while (!(state & wrflags)) {
2471			if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
2472				umtx_key_release(&uq->uq_key);
2473				return (EAGAIN);
2474			}
2475			oldstate = casuword32(&rwlock->rw_state, state, state + 1);
2476			if (oldstate == state) {
2477				umtx_key_release(&uq->uq_key);
2478				return (0);
2479			}
2480			state = oldstate;
2481		}
2482
2483		if (error)
2484			break;
2485
2486		/* grab monitor lock */
2487		umtxq_lock(&uq->uq_key);
2488		umtxq_busy(&uq->uq_key);
2489		umtxq_unlock(&uq->uq_key);
2490
2491		/*
2492		 * re-read the state, in case it changed between the try-lock above
2493		 * and the check below
2494		 */
2495		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2496
2497		/* set read contention bit */
2498		while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) {
2499			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS);
2500			if (oldstate == state)
2501				goto sleep;
2502			state = oldstate;
2503		}
2504
2505		/* state is changed while setting flags, restart */
2506		if (!(state & wrflags)) {
2507			umtxq_lock(&uq->uq_key);
2508			umtxq_unbusy(&uq->uq_key);
2509			umtxq_unlock(&uq->uq_key);
2510			continue;
2511		}
2512
2513sleep:
2514		/* contention bit is set, before sleeping, increase read waiter count */
2515		blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2516		suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
2517
2518		while (state & wrflags) {
2519			umtxq_lock(&uq->uq_key);
2520			umtxq_insert(uq);
2521			umtxq_unbusy(&uq->uq_key);
2522
2523			error = umtxq_sleep(uq, "urdlck", timo);
2524
2525			umtxq_busy(&uq->uq_key);
2526			umtxq_remove(uq);
2527			umtxq_unlock(&uq->uq_key);
2528			if (error)
2529				break;
2530			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2531		}
2532
2533		/* decrease read waiter count, and may clear read contention bit */
2534		blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2535		suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
2536		if (blocked_readers == 1) {
2537			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2538			for (;;) {
2539				oldstate = casuword32(&rwlock->rw_state, state,
2540					 state & ~URWLOCK_READ_WAITERS);
2541				if (oldstate == state)
2542					break;
2543				state = oldstate;
2544			}
2545		}
2546
2547		umtxq_lock(&uq->uq_key);
2548		umtxq_unbusy(&uq->uq_key);
2549		umtxq_unlock(&uq->uq_key);
2550	}
2551	umtx_key_release(&uq->uq_key);
2552	return (error);
2553}
2554
2555static int
2556do_rw_rdlock2(struct thread *td, void *obj, long val, struct timespec *timeout)
2557{
2558	struct timespec ts, ts2, ts3;
2559	struct timeval tv;
2560	int error;
2561
2562	getnanouptime(&ts);
2563	timespecadd(&ts, timeout);
2564	TIMESPEC_TO_TIMEVAL(&tv, timeout);
2565	for (;;) {
2566		error = do_rw_rdlock(td, obj, val, tvtohz(&tv));
2567		if (error != ETIMEDOUT)
2568			break;
2569		getnanouptime(&ts2);
2570		if (timespeccmp(&ts2, &ts, >=)) {
2571			error = ETIMEDOUT;
2572			break;
2573		}
2574		ts3 = ts;
2575		timespecsub(&ts3, &ts2);
2576		TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2577	}
2578	if (error == ERESTART)
2579		error = EINTR;
2580	return (error);
2581}
2582
2583static int
2584do_rw_wrlock(struct thread *td, struct urwlock *rwlock, int timo)
2585{
2586	struct umtx_q *uq;
2587	uint32_t flags;
2588	int32_t state, oldstate;
2589	int32_t blocked_writers;
2590	int32_t blocked_readers;
2591	int error;
2592
2593	uq = td->td_umtxq;
2594	flags = fuword32(&rwlock->rw_flags);
2595	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2596	if (error != 0)
2597		return (error);
2598
2599	blocked_readers = 0;
2600	for (;;) {
2601		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2602		while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2603			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER);
2604			if (oldstate == state) {
2605				umtx_key_release(&uq->uq_key);
2606				return (0);
2607			}
2608			state = oldstate;
2609		}
2610
2611		if (error) {
2612			if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) &&
2613			    blocked_readers != 0) {
2614				umtxq_lock(&uq->uq_key);
2615				umtxq_busy(&uq->uq_key);
2616				umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE);
2617				umtxq_unbusy(&uq->uq_key);
2618				umtxq_unlock(&uq->uq_key);
2619			}
2620
2621			break;
2622		}
2623
2624		/* grab monitor lock */
2625		umtxq_lock(&uq->uq_key);
2626		umtxq_busy(&uq->uq_key);
2627		umtxq_unlock(&uq->uq_key);
2628
2629		/*
2630		 * re-read the state, in case it changed between the try-lock above
2631		 * and the check below
2632		 */
2633		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2634
2635		while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) &&
2636		       (state & URWLOCK_WRITE_WAITERS) == 0) {
2637			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS);
2638			if (oldstate == state)
2639				goto sleep;
2640			state = oldstate;
2641		}
2642
2643		if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2644			umtxq_lock(&uq->uq_key);
2645			umtxq_unbusy(&uq->uq_key);
2646			umtxq_unlock(&uq->uq_key);
2647			continue;
2648		}
2649sleep:
2650		blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2651		suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
2652
2653		while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
2654			umtxq_lock(&uq->uq_key);
2655			umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2656			umtxq_unbusy(&uq->uq_key);
2657
2658			error = umtxq_sleep(uq, "uwrlck", timo);
2659
2660			umtxq_busy(&uq->uq_key);
2661			umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2662			umtxq_unlock(&uq->uq_key);
2663			if (error)
2664				break;
2665			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2666		}
2667
2668		blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2669		suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
2670		if (blocked_writers == 1) {
2671			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2672			for (;;) {
2673				oldstate = casuword32(&rwlock->rw_state, state,
2674					 state & ~URWLOCK_WRITE_WAITERS);
2675				if (oldstate == state)
2676					break;
2677				state = oldstate;
2678			}
2679			blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2680		} else
2681			blocked_readers = 0;
2682
2683		umtxq_lock(&uq->uq_key);
2684		umtxq_unbusy(&uq->uq_key);
2685		umtxq_unlock(&uq->uq_key);
2686	}
2687
2688	umtx_key_release(&uq->uq_key);
2689	return (error);
2690}
2691
2692static int
2693do_rw_wrlock2(struct thread *td, void *obj, struct timespec *timeout)
2694{
2695	struct timespec ts, ts2, ts3;
2696	struct timeval tv;
2697	int error;
2698
2699	getnanouptime(&ts);
2700	timespecadd(&ts, timeout);
2701	TIMESPEC_TO_TIMEVAL(&tv, timeout);
2702	for (;;) {
2703		error = do_rw_wrlock(td, obj, tvtohz(&tv));
2704		if (error != ETIMEDOUT)
2705			break;
2706		getnanouptime(&ts2);
2707		if (timespeccmp(&ts2, &ts, >=)) {
2708			error = ETIMEDOUT;
2709			break;
2710		}
2711		ts3 = ts;
2712		timespecsub(&ts3, &ts2);
2713		TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2714	}
2715	if (error == ERESTART)
2716		error = EINTR;
2717	return (error);
2718}
2719
2720static int
2721do_rw_unlock(struct thread *td, struct urwlock *rwlock)
2722{
2723	struct umtx_q *uq;
2724	uint32_t flags;
2725	int32_t state, oldstate;
2726	int error, q, count;
2727
2728	uq = td->td_umtxq;
2729	flags = fuword32(&rwlock->rw_flags);
2730	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2731	if (error != 0)
2732		return (error);
2733
2734	state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2735	if (state & URWLOCK_WRITE_OWNER) {
2736		for (;;) {
2737			oldstate = casuword32(&rwlock->rw_state, state,
2738				state & ~URWLOCK_WRITE_OWNER);
2739			if (oldstate != state) {
2740				state = oldstate;
2741				if (!(oldstate & URWLOCK_WRITE_OWNER)) {
2742					error = EPERM;
2743					goto out;
2744				}
2745			} else
2746				break;
2747		}
2748	} else if (URWLOCK_READER_COUNT(state) != 0) {
2749		for (;;) {
2750			oldstate = casuword32(&rwlock->rw_state, state,
2751				state - 1);
2752			if (oldstate != state) {
2753				state = oldstate;
2754				if (URWLOCK_READER_COUNT(oldstate) == 0) {
2755					error = EPERM;
2756					goto out;
2757				}
2758			}
2759			else
2760				break;
2761		}
2762	} else {
2763		error = EPERM;
2764		goto out;
2765	}
2766
2767	count = 0;
2768
2769	if (!(flags & URWLOCK_PREFER_READER)) {
2770		if (state & URWLOCK_WRITE_WAITERS) {
2771			count = 1;
2772			q = UMTX_EXCLUSIVE_QUEUE;
2773		} else if (state & URWLOCK_READ_WAITERS) {
2774			count = INT_MAX;
2775			q = UMTX_SHARED_QUEUE;
2776		}
2777	} else {
2778		if (state & URWLOCK_READ_WAITERS) {
2779			count = INT_MAX;
2780			q = UMTX_SHARED_QUEUE;
2781		} else if (state & URWLOCK_WRITE_WAITERS) {
2782			count = 1;
2783			q = UMTX_EXCLUSIVE_QUEUE;
2784		}
2785	}
2786
2787	if (count) {
2788		umtxq_lock(&uq->uq_key);
2789		umtxq_busy(&uq->uq_key);
2790		umtxq_signal_queue(&uq->uq_key, count, q);
2791		umtxq_unbusy(&uq->uq_key);
2792		umtxq_unlock(&uq->uq_key);
2793	}
2794out:
2795	umtx_key_release(&uq->uq_key);
2796	return (error);
2797}
2798
2799static int
2800do_sem_wait(struct thread *td, struct _usem *sem, struct timespec *timeout)
2801{
2802	struct umtx_q *uq;
2803	struct timeval tv;
2804	struct timespec cts, ets, tts;
2805	uint32_t flags, count;
2806	int error;
2807
2808	uq = td->td_umtxq;
2809	flags = fuword32(&sem->_flags);
2810	error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
2811	if (error != 0)
2812		return (error);
2813	umtxq_lock(&uq->uq_key);
2814	umtxq_busy(&uq->uq_key);
2815	umtxq_insert(uq);
2816	umtxq_unlock(&uq->uq_key);
2817
2818	casuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters), 0, 1);
2819	rmb();
2820	count = fuword32(__DEVOLATILE(uint32_t *, &sem->_count));
2821	if (count != 0) {
2822		umtxq_lock(&uq->uq_key);
2823		umtxq_unbusy(&uq->uq_key);
2824		umtxq_remove(uq);
2825		umtxq_unlock(&uq->uq_key);
2826		umtx_key_release(&uq->uq_key);
2827		return (0);
2828	}
2829
2830	umtxq_lock(&uq->uq_key);
2831	umtxq_unbusy(&uq->uq_key);
2832	umtxq_unlock(&uq->uq_key);
2833
2834	umtxq_lock(&uq->uq_key);
2835	if (timeout == NULL) {
2836		error = umtxq_sleep(uq, "usem", 0);
2837	} else {
2838		getnanouptime(&ets);
2839		timespecadd(&ets, timeout);
2840		TIMESPEC_TO_TIMEVAL(&tv, timeout);
2841		for (;;) {
2842			error = umtxq_sleep(uq, "usem", tvtohz(&tv));
2843			if (error != ETIMEDOUT)
2844				break;
2845			getnanouptime(&cts);
2846			if (timespeccmp(&cts, &ets, >=)) {
2847				error = ETIMEDOUT;
2848				break;
2849			}
2850			tts = ets;
2851			timespecsub(&tts, &cts);
2852			TIMESPEC_TO_TIMEVAL(&tv, &tts);
2853		}
2854	}
2855
2856	if ((uq->uq_flags & UQF_UMTXQ) == 0)
2857		error = 0;
2858	else {
2859		umtxq_remove(uq);
2860		if (error == ERESTART)
2861			error = EINTR;
2862	}
2863	umtxq_unlock(&uq->uq_key);
2864	umtx_key_release(&uq->uq_key);
2865	return (error);
2866}
2867
2868/*
2869 * Signal a userland condition variable.
2870 */
2871static int
2872do_sem_wake(struct thread *td, struct _usem *sem)
2873{
2874	struct umtx_key key;
2875	int error, cnt, nwake;
2876	uint32_t flags;
2877
2878	flags = fuword32(&sem->_flags);
2879	if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
2880		return (error);
2881	umtxq_lock(&key);
2882	umtxq_busy(&key);
2883	cnt = umtxq_count(&key);
2884	nwake = umtxq_signal(&key, 1);
2885	if (cnt <= nwake) {
2886		umtxq_unlock(&key);
2887		error = suword32(
2888		    __DEVOLATILE(uint32_t *, &sem->_has_waiters), 0);
2889		umtxq_lock(&key);
2890	}
2891	umtxq_unbusy(&key);
2892	umtxq_unlock(&key);
2893	umtx_key_release(&key);
2894	return (error);
2895}
2896
2897int
2898sys__umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
2899    /* struct umtx *umtx */
2900{
2901	return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
2902}
2903
2904int
2905sys__umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
2906    /* struct umtx *umtx */
2907{
2908	return do_unlock_umtx(td, uap->umtx, td->td_tid);
2909}
2910
2911inline int
2912umtx_copyin_timeout(const void *addr, struct timespec *tsp)
2913{
2914	int error;
2915
2916	error = copyin(addr, tsp, sizeof(struct timespec));
2917	if (error == 0) {
2918		if (tsp->tv_sec < 0 ||
2919		    tsp->tv_nsec >= 1000000000 ||
2920		    tsp->tv_nsec < 0)
2921			error = EINVAL;
2922	}
2923	return (error);
2924}
2925
2926static int
2927__umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
2928{
2929	struct timespec *ts, timeout;
2930	int error;
2931
2932	/* Allow a null timespec (wait forever). */
2933	if (uap->uaddr2 == NULL)
2934		ts = NULL;
2935	else {
2936		error = umtx_copyin_timeout(uap->uaddr2, &timeout);
2937		if (error != 0)
2938			return (error);
2939		ts = &timeout;
2940	}
2941	return (do_lock_umtx(td, uap->obj, uap->val, ts));
2942}
2943
2944static int
2945__umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
2946{
2947	return (do_unlock_umtx(td, uap->obj, uap->val));
2948}
2949
2950static int
2951__umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
2952{
2953	struct timespec *ts, timeout;
2954	int error;
2955	uint32_t flags;
2956
2957	if (uap->uaddr2 == NULL)
2958		ts = NULL;
2959	else {
2960		error = umtx_copyin_timeout(uap->uaddr2, &timeout);
2961		if (error != 0)
2962			return (error);
2963		ts = &timeout;
2964	}
2965	flags = (uint32_t)(uintptr_t)uap->uaddr1;
2966	return do_wait(td, uap->obj, uap->val, ts, 0, 0, flags);
2967}
2968
2969static int
2970__umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
2971{
2972	struct timespec *ts, timeout;
2973	int error;
2974	uint32_t flags;
2975
2976	if (uap->uaddr2 == NULL)
2977		ts = NULL;
2978	else {
2979		error = umtx_copyin_timeout(uap->uaddr2, &timeout);
2980		if (error != 0)
2981			return (error);
2982		ts = &timeout;
2983	}
2984	flags = (uint32_t)(uintptr_t)uap->uaddr1;
2985	return do_wait(td, uap->obj, uap->val, ts, 1, 0, flags);
2986}
2987
2988static int
2989__umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap)
2990{
2991	struct timespec *ts, timeout;
2992	int error;
2993	uint32_t flags;
2994
2995	if (uap->uaddr2 == NULL)
2996		ts = NULL;
2997	else {
2998		error = umtx_copyin_timeout(uap->uaddr2, &timeout);
2999		if (error != 0)
3000			return (error);
3001		ts = &timeout;
3002	}
3003	flags = (uint32_t)(uintptr_t)uap->uaddr1;
3004	return do_wait(td, uap->obj, uap->val, ts, 1, 1, flags);
3005}
3006
3007static int
3008__umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
3009{
3010	return (kern_umtx_wake(td, uap->obj, uap->val, 0));
3011}
3012
3013#define BATCH_SIZE	128
3014static int
3015__umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap)
3016{
3017	int count = uap->val;
3018	void *uaddrs[BATCH_SIZE];
3019	char **upp = (char **)uap->obj;
3020	int tocopy;
3021	int error = 0;
3022	int i, pos = 0;
3023
3024	while (count > 0) {
3025		tocopy = count;
3026		if (tocopy > BATCH_SIZE)
3027			tocopy = BATCH_SIZE;
3028		error = copyin(upp+pos, uaddrs, tocopy * sizeof(char *));
3029		if (error != 0)
3030			break;
3031		for (i = 0; i < tocopy; ++i)
3032			kern_umtx_wake(td, uaddrs[i], INT_MAX, 1);
3033		count -= tocopy;
3034		pos += tocopy;
3035	}
3036	return (error);
3037}
3038
3039static int
3040__umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap)
3041{
3042	return (kern_umtx_wake(td, uap->obj, uap->val, 1));
3043}
3044
3045static int
3046__umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
3047{
3048	struct timespec *ts, timeout;
3049	int error;
3050
3051	/* Allow a null timespec (wait forever). */
3052	if (uap->uaddr2 == NULL)
3053		ts = NULL;
3054	else {
3055		error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3056		if (error != 0)
3057			return (error);
3058		ts = &timeout;
3059	}
3060	return do_lock_umutex(td, uap->obj, ts, 0);
3061}
3062
3063static int
3064__umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
3065{
3066	return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY);
3067}
3068
3069static int
3070__umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
3071{
3072	struct timespec *ts, timeout;
3073	int error;
3074
3075	/* Allow a null timespec (wait forever). */
3076	if (uap->uaddr2 == NULL)
3077		ts = NULL;
3078	else {
3079		error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3080		if (error != 0)
3081			return (error);
3082		ts = &timeout;
3083	}
3084	return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
3085}
3086
3087static int
3088__umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap)
3089{
3090	return do_wake_umutex(td, uap->obj);
3091}
3092
3093static int
3094__umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
3095{
3096	return do_unlock_umutex(td, uap->obj);
3097}
3098
3099static int
3100__umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
3101{
3102	return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
3103}
3104
3105static int
3106__umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
3107{
3108	struct timespec *ts, timeout;
3109	int error;
3110
3111	/* Allow a null timespec (wait forever). */
3112	if (uap->uaddr2 == NULL)
3113		ts = NULL;
3114	else {
3115		error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3116		if (error != 0)
3117			return (error);
3118		ts = &timeout;
3119	}
3120	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3121}
3122
3123static int
3124__umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
3125{
3126	return do_cv_signal(td, uap->obj);
3127}
3128
3129static int
3130__umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
3131{
3132	return do_cv_broadcast(td, uap->obj);
3133}
3134
3135static int
3136__umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
3137{
3138	struct timespec timeout;
3139	int error;
3140
3141	/* Allow a null timespec (wait forever). */
3142	if (uap->uaddr2 == NULL) {
3143		error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3144	} else {
3145		error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3146		if (error != 0)
3147			return (error);
3148		error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3149	}
3150	return (error);
3151}
3152
3153static int
3154__umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
3155{
3156	struct timespec timeout;
3157	int error;
3158
3159	/* Allow a null timespec (wait forever). */
3160	if (uap->uaddr2 == NULL) {
3161		error = do_rw_wrlock(td, uap->obj, 0);
3162	} else {
3163		error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3164		if (error != 0)
3165			return (error);
3166
3167		error = do_rw_wrlock2(td, uap->obj, &timeout);
3168	}
3169	return (error);
3170}
3171
3172static int
3173__umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
3174{
3175	return do_rw_unlock(td, uap->obj);
3176}
3177
3178static int
3179__umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap)
3180{
3181	struct timespec *ts, timeout;
3182	int error;
3183
3184	/* Allow a null timespec (wait forever). */
3185	if (uap->uaddr2 == NULL)
3186		ts = NULL;
3187	else {
3188		error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3189		if (error != 0)
3190			return (error);
3191		ts = &timeout;
3192	}
3193	return (do_sem_wait(td, uap->obj, ts));
3194}
3195
3196static int
3197__umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap)
3198{
3199	return do_sem_wake(td, uap->obj);
3200}
3201
3202typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
3203
3204static _umtx_op_func op_table[] = {
3205	__umtx_op_lock_umtx,		/* UMTX_OP_LOCK */
3206	__umtx_op_unlock_umtx,		/* UMTX_OP_UNLOCK */
3207	__umtx_op_wait,			/* UMTX_OP_WAIT */
3208	__umtx_op_wake,			/* UMTX_OP_WAKE */
3209	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_TRYLOCK */
3210	__umtx_op_lock_umutex,		/* UMTX_OP_MUTEX_LOCK */
3211	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK */
3212	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
3213	__umtx_op_cv_wait,		/* UMTX_OP_CV_WAIT*/
3214	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
3215	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
3216	__umtx_op_wait_uint,		/* UMTX_OP_WAIT_UINT */
3217	__umtx_op_rw_rdlock,		/* UMTX_OP_RW_RDLOCK */
3218	__umtx_op_rw_wrlock,		/* UMTX_OP_RW_WRLOCK */
3219	__umtx_op_rw_unlock,		/* UMTX_OP_RW_UNLOCK */
3220	__umtx_op_wait_uint_private,	/* UMTX_OP_WAIT_UINT_PRIVATE */
3221	__umtx_op_wake_private,		/* UMTX_OP_WAKE_PRIVATE */
3222	__umtx_op_wait_umutex,		/* UMTX_OP_UMUTEX_WAIT */
3223	__umtx_op_wake_umutex,		/* UMTX_OP_UMUTEX_WAKE */
3224	__umtx_op_sem_wait,		/* UMTX_OP_SEM_WAIT */
3225	__umtx_op_sem_wake,		/* UMTX_OP_SEM_WAKE */
3226	__umtx_op_nwake_private		/* UMTX_OP_NWAKE_PRIVATE */
3227};
3228
3229int
3230sys__umtx_op(struct thread *td, struct _umtx_op_args *uap)
3231{
3232	if ((unsigned)uap->op < UMTX_OP_MAX)
3233		return (*op_table[uap->op])(td, uap);
3234	return (EINVAL);
3235}
3236
3237#ifdef COMPAT_FREEBSD32
3238int
3239freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
3240    /* struct umtx *umtx */
3241{
3242	return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
3243}
3244
3245int
3246freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
3247    /* struct umtx *umtx */
3248{
3249	return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
3250}
3251
3252struct timespec32 {
3253	uint32_t tv_sec;
3254	uint32_t tv_nsec;
3255};
3256
3257static inline int
3258umtx_copyin_timeout32(void *addr, struct timespec *tsp)
3259{
3260	struct timespec32 ts32;
3261	int error;
3262
3263	error = copyin(addr, &ts32, sizeof(struct timespec32));
3264	if (error == 0) {
3265		if (ts32.tv_sec < 0 ||
3266		    ts32.tv_nsec >= 1000000000 ||
3267		    ts32.tv_nsec < 0)
3268			error = EINVAL;
3269		else {
3270			tsp->tv_sec = ts32.tv_sec;
3271			tsp->tv_nsec = ts32.tv_nsec;
3272		}
3273	}
3274	return (error);
3275}
3276
3277static int
3278__umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3279{
3280	struct timespec *ts, timeout;
3281	int error;
3282
3283	/* Allow a null timespec (wait forever). */
3284	if (uap->uaddr2 == NULL)
3285		ts = NULL;
3286	else {
3287		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3288		if (error != 0)
3289			return (error);
3290		ts = &timeout;
3291	}
3292	return (do_lock_umtx32(td, uap->obj, uap->val, ts));
3293}
3294
3295static int
3296__umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3297{
3298	return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
3299}
3300
3301static int
3302__umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3303{
3304	struct timespec *ts, timeout;
3305	int error;
3306	uint32_t flags;
3307
3308	if (uap->uaddr2 == NULL)
3309		ts = NULL;
3310	else {
3311		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3312		if (error != 0)
3313			return (error);
3314		ts = &timeout;
3315	}
3316	flags = (uint32_t)(uintptr_t)uap->uaddr1;
3317	return do_wait(td, uap->obj, uap->val, ts, 1, 0, flags);
3318}
3319
3320static int
3321__umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3322{
3323	struct timespec *ts, timeout;
3324	int error;
3325
3326	/* Allow a null timespec (wait forever). */
3327	if (uap->uaddr2 == NULL)
3328		ts = NULL;
3329	else {
3330		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3331		if (error != 0)
3332			return (error);
3333		ts = &timeout;
3334	}
3335	return do_lock_umutex(td, uap->obj, ts, 0);
3336}
3337
3338static int
3339__umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3340{
3341	struct timespec *ts, timeout;
3342	int error;
3343
3344	/* Allow a null timespec (wait forever). */
3345	if (uap->uaddr2 == NULL)
3346		ts = NULL;
3347	else {
3348		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3349		if (error != 0)
3350			return (error);
3351		ts = &timeout;
3352	}
3353	return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
3354}
3355
3356static int
3357__umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3358{
3359	struct timespec *ts, timeout;
3360	int error;
3361
3362	/* Allow a null timespec (wait forever). */
3363	if (uap->uaddr2 == NULL)
3364		ts = NULL;
3365	else {
3366		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3367		if (error != 0)
3368			return (error);
3369		ts = &timeout;
3370	}
3371	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3372}
3373
3374static int
3375__umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3376{
3377	struct timespec timeout;
3378	int error;
3379
3380	/* Allow a null timespec (wait forever). */
3381	if (uap->uaddr2 == NULL) {
3382		error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3383	} else {
3384		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3385		if (error != 0)
3386			return (error);
3387		error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3388	}
3389	return (error);
3390}
3391
3392static int
3393__umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3394{
3395	struct timespec timeout;
3396	int error;
3397
3398	/* Allow a null timespec (wait forever). */
3399	if (uap->uaddr2 == NULL) {
3400		error = do_rw_wrlock(td, uap->obj, 0);
3401	} else {
3402		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3403		if (error != 0)
3404			return (error);
3405
3406		error = do_rw_wrlock2(td, uap->obj, &timeout);
3407	}
3408	return (error);
3409}
3410
3411static int
3412__umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap)
3413{
3414	struct timespec *ts, timeout;
3415	int error;
3416	uint32_t flags;
3417
3418	if (uap->uaddr2 == NULL)
3419		ts = NULL;
3420	else {
3421		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3422		if (error != 0)
3423			return (error);
3424		ts = &timeout;
3425	}
3426	flags = (uint32_t)(uintptr_t)uap->uaddr1;
3427	return do_wait(td, uap->obj, uap->val, ts, 1, 1, flags);
3428}
3429
3430static int
3431__umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3432{
3433	struct timespec *ts, timeout;
3434	int error;
3435
3436	/* Allow a null timespec (wait forever). */
3437	if (uap->uaddr2 == NULL)
3438		ts = NULL;
3439	else {
3440		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3441		if (error != 0)
3442			return (error);
3443		ts = &timeout;
3444	}
3445	return (do_sem_wait(td, uap->obj, ts));
3446}
3447
3448static int
3449__umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap)
3450{
3451	int count = uap->val;
3452	uint32_t uaddrs[BATCH_SIZE];
3453	uint32_t **upp = (uint32_t **)uap->obj;
3454	int tocopy;
3455	int error = 0;
3456	int i, pos = 0;
3457
3458	while (count > 0) {
3459		tocopy = count;
3460		if (tocopy > BATCH_SIZE)
3461			tocopy = BATCH_SIZE;
3462		error = copyin(upp+pos, uaddrs, tocopy * sizeof(uint32_t));
3463		if (error != 0)
3464			break;
3465		for (i = 0; i < tocopy; ++i)
3466			kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i],
3467				INT_MAX, 1);
3468		count -= tocopy;
3469		pos += tocopy;
3470	}
3471	return (error);
3472}
3473
3474static _umtx_op_func op_table_compat32[] = {
3475	__umtx_op_lock_umtx_compat32,	/* UMTX_OP_LOCK */
3476	__umtx_op_unlock_umtx_compat32,	/* UMTX_OP_UNLOCK */
3477	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT */
3478	__umtx_op_wake,			/* UMTX_OP_WAKE */
3479	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_LOCK */
3480	__umtx_op_lock_umutex_compat32,	/* UMTX_OP_MUTEX_TRYLOCK */
3481	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK	*/
3482	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
3483	__umtx_op_cv_wait_compat32,	/* UMTX_OP_CV_WAIT*/
3484	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
3485	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
3486	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT_UINT */
3487	__umtx_op_rw_rdlock_compat32,	/* UMTX_OP_RW_RDLOCK */
3488	__umtx_op_rw_wrlock_compat32,	/* UMTX_OP_RW_WRLOCK */
3489	__umtx_op_rw_unlock,		/* UMTX_OP_RW_UNLOCK */
3490	__umtx_op_wait_uint_private_compat32,	/* UMTX_OP_WAIT_UINT_PRIVATE */
3491	__umtx_op_wake_private,		/* UMTX_OP_WAKE_PRIVATE */
3492	__umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */
3493	__umtx_op_wake_umutex,		/* UMTX_OP_UMUTEX_WAKE */
3494	__umtx_op_sem_wait_compat32,	/* UMTX_OP_SEM_WAIT */
3495	__umtx_op_sem_wake,		/* UMTX_OP_SEM_WAKE */
3496	__umtx_op_nwake_private32	/* UMTX_OP_NWAKE_PRIVATE */
3497};
3498
3499int
3500freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
3501{
3502	if ((unsigned)uap->op < UMTX_OP_MAX)
3503		return (*op_table_compat32[uap->op])(td,
3504			(struct _umtx_op_args *)uap);
3505	return (EINVAL);
3506}
3507#endif
3508
3509void
3510umtx_thread_init(struct thread *td)
3511{
3512	td->td_umtxq = umtxq_alloc();
3513	td->td_umtxq->uq_thread = td;
3514}
3515
3516void
3517umtx_thread_fini(struct thread *td)
3518{
3519	umtxq_free(td->td_umtxq);
3520}
3521
3522/*
3523 * It will be called when new thread is created, e.g fork().
3524 */
3525void
3526umtx_thread_alloc(struct thread *td)
3527{
3528	struct umtx_q *uq;
3529
3530	uq = td->td_umtxq;
3531	uq->uq_inherited_pri = PRI_MAX;
3532
3533	KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
3534	KASSERT(uq->uq_thread == td, ("uq_thread != td"));
3535	KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
3536	KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
3537}
3538
3539/*
3540 * exec() hook.
3541 */
3542static void
3543umtx_exec_hook(void *arg __unused, struct proc *p __unused,
3544	struct image_params *imgp __unused)
3545{
3546	umtx_thread_cleanup(curthread);
3547}
3548
3549/*
3550 * thread_exit() hook.
3551 */
3552void
3553umtx_thread_exit(struct thread *td)
3554{
3555	umtx_thread_cleanup(td);
3556}
3557
3558/*
3559 * clean up umtx data.
3560 */
3561static void
3562umtx_thread_cleanup(struct thread *td)
3563{
3564	struct umtx_q *uq;
3565	struct umtx_pi *pi;
3566
3567	if ((uq = td->td_umtxq) == NULL)
3568		return;
3569
3570	mtx_lock_spin(&umtx_lock);
3571	uq->uq_inherited_pri = PRI_MAX;
3572	while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
3573		pi->pi_owner = NULL;
3574		TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
3575	}
3576	mtx_unlock_spin(&umtx_lock);
3577	thread_lock(td);
3578	sched_lend_user_prio(td, PRI_MAX);
3579	thread_unlock(td);
3580}
3581