kern_umtx.c revision 232209
1/*-
2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice unmodified, this list of conditions, and the following
11 *    disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: head/sys/kern/kern_umtx.c 232209 2012-02-27 13:38:52Z davidxu $");
30
31#include "opt_compat.h"
32#include <sys/param.h>
33#include <sys/kernel.h>
34#include <sys/limits.h>
35#include <sys/lock.h>
36#include <sys/malloc.h>
37#include <sys/mutex.h>
38#include <sys/priv.h>
39#include <sys/proc.h>
40#include <sys/sched.h>
41#include <sys/smp.h>
42#include <sys/sysctl.h>
43#include <sys/sysent.h>
44#include <sys/systm.h>
45#include <sys/sysproto.h>
46#include <sys/syscallsubr.h>
47#include <sys/eventhandler.h>
48#include <sys/umtx.h>
49
50#include <vm/vm.h>
51#include <vm/vm_param.h>
52#include <vm/pmap.h>
53#include <vm/vm_map.h>
54#include <vm/vm_object.h>
55
56#include <machine/cpu.h>
57
58#ifdef COMPAT_FREEBSD32
59#include <compat/freebsd32/freebsd32_proto.h>
60#endif
61
62#define _UMUTEX_TRY		1
63#define _UMUTEX_WAIT		2
64
65/* Priority inheritance mutex info. */
66struct umtx_pi {
67	/* Owner thread */
68	struct thread		*pi_owner;
69
70	/* Reference count */
71	int			pi_refcount;
72
73 	/* List entry to link umtx holding by thread */
74	TAILQ_ENTRY(umtx_pi)	pi_link;
75
76	/* List entry in hash */
77	TAILQ_ENTRY(umtx_pi)	pi_hashlink;
78
79	/* List for waiters */
80	TAILQ_HEAD(,umtx_q)	pi_blocked;
81
82	/* Identify a userland lock object */
83	struct umtx_key		pi_key;
84};
85
86/* A userland synchronous object user. */
87struct umtx_q {
88	/* Linked list for the hash. */
89	TAILQ_ENTRY(umtx_q)	uq_link;
90
91	/* Umtx key. */
92	struct umtx_key		uq_key;
93
94	/* Umtx flags. */
95	int			uq_flags;
96#define UQF_UMTXQ	0x0001
97
98	/* The thread waits on. */
99	struct thread		*uq_thread;
100
101	/*
102	 * Blocked on PI mutex. read can use chain lock
103	 * or umtx_lock, write must have both chain lock and
104	 * umtx_lock being hold.
105	 */
106	struct umtx_pi		*uq_pi_blocked;
107
108	/* On blocked list */
109	TAILQ_ENTRY(umtx_q)	uq_lockq;
110
111	/* Thread contending with us */
112	TAILQ_HEAD(,umtx_pi)	uq_pi_contested;
113
114	/* Inherited priority from PP mutex */
115	u_char			uq_inherited_pri;
116
117	/* Spare queue ready to be reused */
118	struct umtxq_queue	*uq_spare_queue;
119
120	/* The queue we on */
121	struct umtxq_queue	*uq_cur_queue;
122};
123
124TAILQ_HEAD(umtxq_head, umtx_q);
125
126/* Per-key wait-queue */
127struct umtxq_queue {
128	struct umtxq_head	head;
129	struct umtx_key		key;
130	LIST_ENTRY(umtxq_queue)	link;
131	int			length;
132};
133
134LIST_HEAD(umtxq_list, umtxq_queue);
135
136/* Userland lock object's wait-queue chain */
137struct umtxq_chain {
138	/* Lock for this chain. */
139	struct mtx		uc_lock;
140
141	/* List of sleep queues. */
142	struct umtxq_list	uc_queue[2];
143#define UMTX_SHARED_QUEUE	0
144#define UMTX_EXCLUSIVE_QUEUE	1
145
146	LIST_HEAD(, umtxq_queue) uc_spare_queue;
147
148	/* Busy flag */
149	char			uc_busy;
150
151	/* Chain lock waiters */
152	int			uc_waiters;
153
154	/* All PI in the list */
155	TAILQ_HEAD(,umtx_pi)	uc_pi_list;
156
157};
158
159#define	UMTXQ_LOCKED_ASSERT(uc)		mtx_assert(&(uc)->uc_lock, MA_OWNED)
160#define	UMTXQ_BUSY_ASSERT(uc)	KASSERT(&(uc)->uc_busy, ("umtx chain is not busy"))
161
162/*
163 * Don't propagate time-sharing priority, there is a security reason,
164 * a user can simply introduce PI-mutex, let thread A lock the mutex,
165 * and let another thread B block on the mutex, because B is
166 * sleeping, its priority will be boosted, this causes A's priority to
167 * be boosted via priority propagating too and will never be lowered even
168 * if it is using 100%CPU, this is unfair to other processes.
169 */
170
171#define UPRI(td)	(((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
172			  (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
173			 PRI_MAX_TIMESHARE : (td)->td_user_pri)
174
175#define	GOLDEN_RATIO_PRIME	2654404609U
176#define	UMTX_CHAINS		512
177#define	UMTX_SHIFTS		(__WORD_BIT - 9)
178
179#define	GET_SHARE(flags)	\
180    (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
181
182#define BUSY_SPINS		200
183
184static uma_zone_t		umtx_pi_zone;
185static struct umtxq_chain	umtxq_chains[2][UMTX_CHAINS];
186static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
187static int			umtx_pi_allocated;
188
189static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
190SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
191    &umtx_pi_allocated, 0, "Allocated umtx_pi");
192
193static void umtxq_sysinit(void *);
194static void umtxq_hash(struct umtx_key *key);
195static struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
196static void umtxq_lock(struct umtx_key *key);
197static void umtxq_unlock(struct umtx_key *key);
198static void umtxq_busy(struct umtx_key *key);
199static void umtxq_unbusy(struct umtx_key *key);
200static void umtxq_insert_queue(struct umtx_q *uq, int q);
201static void umtxq_remove_queue(struct umtx_q *uq, int q);
202static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
203static int umtxq_count(struct umtx_key *key);
204static struct umtx_pi *umtx_pi_alloc(int);
205static void umtx_pi_free(struct umtx_pi *pi);
206static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
207static void umtx_thread_cleanup(struct thread *td);
208static void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
209	struct image_params *imgp __unused);
210SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
211
212#define umtxq_signal(key, nwake)	umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
213#define umtxq_insert(uq)	umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
214#define umtxq_remove(uq)	umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
215
216static struct mtx umtx_lock;
217
218static void
219umtxq_sysinit(void *arg __unused)
220{
221	int i, j;
222
223	umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
224		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
225	for (i = 0; i < 2; ++i) {
226		for (j = 0; j < UMTX_CHAINS; ++j) {
227			mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
228				 MTX_DEF | MTX_DUPOK);
229			LIST_INIT(&umtxq_chains[i][j].uc_queue[0]);
230			LIST_INIT(&umtxq_chains[i][j].uc_queue[1]);
231			LIST_INIT(&umtxq_chains[i][j].uc_spare_queue);
232			TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
233			umtxq_chains[i][j].uc_busy = 0;
234			umtxq_chains[i][j].uc_waiters = 0;
235		}
236	}
237	mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN);
238	EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
239	    EVENTHANDLER_PRI_ANY);
240}
241
242struct umtx_q *
243umtxq_alloc(void)
244{
245	struct umtx_q *uq;
246
247	uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
248	uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, M_WAITOK | M_ZERO);
249	TAILQ_INIT(&uq->uq_spare_queue->head);
250	TAILQ_INIT(&uq->uq_pi_contested);
251	uq->uq_inherited_pri = PRI_MAX;
252	return (uq);
253}
254
255void
256umtxq_free(struct umtx_q *uq)
257{
258	MPASS(uq->uq_spare_queue != NULL);
259	free(uq->uq_spare_queue, M_UMTX);
260	free(uq, M_UMTX);
261}
262
263static inline void
264umtxq_hash(struct umtx_key *key)
265{
266	unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
267	key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
268}
269
270static inline struct umtxq_chain *
271umtxq_getchain(struct umtx_key *key)
272{
273	if (key->type <= TYPE_SEM)
274		return (&umtxq_chains[1][key->hash]);
275	return (&umtxq_chains[0][key->hash]);
276}
277
278/*
279 * Lock a chain.
280 */
281static inline void
282umtxq_lock(struct umtx_key *key)
283{
284	struct umtxq_chain *uc;
285
286	uc = umtxq_getchain(key);
287	mtx_lock(&uc->uc_lock);
288}
289
290/*
291 * Unlock a chain.
292 */
293static inline void
294umtxq_unlock(struct umtx_key *key)
295{
296	struct umtxq_chain *uc;
297
298	uc = umtxq_getchain(key);
299	mtx_unlock(&uc->uc_lock);
300}
301
302/*
303 * Set chain to busy state when following operation
304 * may be blocked (kernel mutex can not be used).
305 */
306static inline void
307umtxq_busy(struct umtx_key *key)
308{
309	struct umtxq_chain *uc;
310
311	uc = umtxq_getchain(key);
312	mtx_assert(&uc->uc_lock, MA_OWNED);
313	if (uc->uc_busy) {
314#ifdef SMP
315		if (smp_cpus > 1) {
316			int count = BUSY_SPINS;
317			if (count > 0) {
318				umtxq_unlock(key);
319				while (uc->uc_busy && --count > 0)
320					cpu_spinwait();
321				umtxq_lock(key);
322			}
323		}
324#endif
325		while (uc->uc_busy) {
326			uc->uc_waiters++;
327			msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
328			uc->uc_waiters--;
329		}
330	}
331	uc->uc_busy = 1;
332}
333
334/*
335 * Unbusy a chain.
336 */
337static inline void
338umtxq_unbusy(struct umtx_key *key)
339{
340	struct umtxq_chain *uc;
341
342	uc = umtxq_getchain(key);
343	mtx_assert(&uc->uc_lock, MA_OWNED);
344	KASSERT(uc->uc_busy != 0, ("not busy"));
345	uc->uc_busy = 0;
346	if (uc->uc_waiters)
347		wakeup_one(uc);
348}
349
350static struct umtxq_queue *
351umtxq_queue_lookup(struct umtx_key *key, int q)
352{
353	struct umtxq_queue *uh;
354	struct umtxq_chain *uc;
355
356	uc = umtxq_getchain(key);
357	UMTXQ_LOCKED_ASSERT(uc);
358	LIST_FOREACH(uh, &uc->uc_queue[q], link) {
359		if (umtx_key_match(&uh->key, key))
360			return (uh);
361	}
362
363	return (NULL);
364}
365
366static inline void
367umtxq_insert_queue(struct umtx_q *uq, int q)
368{
369	struct umtxq_queue *uh;
370	struct umtxq_chain *uc;
371
372	uc = umtxq_getchain(&uq->uq_key);
373	UMTXQ_LOCKED_ASSERT(uc);
374	KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue"));
375	uh = umtxq_queue_lookup(&uq->uq_key, q);
376	if (uh != NULL) {
377		LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link);
378	} else {
379		uh = uq->uq_spare_queue;
380		uh->key = uq->uq_key;
381		LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link);
382	}
383	uq->uq_spare_queue = NULL;
384
385	TAILQ_INSERT_TAIL(&uh->head, uq, uq_link);
386	uh->length++;
387	uq->uq_flags |= UQF_UMTXQ;
388	uq->uq_cur_queue = uh;
389	return;
390}
391
392static inline void
393umtxq_remove_queue(struct umtx_q *uq, int q)
394{
395	struct umtxq_chain *uc;
396	struct umtxq_queue *uh;
397
398	uc = umtxq_getchain(&uq->uq_key);
399	UMTXQ_LOCKED_ASSERT(uc);
400	if (uq->uq_flags & UQF_UMTXQ) {
401		uh = uq->uq_cur_queue;
402		TAILQ_REMOVE(&uh->head, uq, uq_link);
403		uh->length--;
404		uq->uq_flags &= ~UQF_UMTXQ;
405		if (TAILQ_EMPTY(&uh->head)) {
406			KASSERT(uh->length == 0,
407			    ("inconsistent umtxq_queue length"));
408			LIST_REMOVE(uh, link);
409		} else {
410			uh = LIST_FIRST(&uc->uc_spare_queue);
411			KASSERT(uh != NULL, ("uc_spare_queue is empty"));
412			LIST_REMOVE(uh, link);
413		}
414		uq->uq_spare_queue = uh;
415		uq->uq_cur_queue = NULL;
416	}
417}
418
419/*
420 * Check if there are multiple waiters
421 */
422static int
423umtxq_count(struct umtx_key *key)
424{
425	struct umtxq_chain *uc;
426	struct umtxq_queue *uh;
427
428	uc = umtxq_getchain(key);
429	UMTXQ_LOCKED_ASSERT(uc);
430	uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
431	if (uh != NULL)
432		return (uh->length);
433	return (0);
434}
435
436/*
437 * Check if there are multiple PI waiters and returns first
438 * waiter.
439 */
440static int
441umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
442{
443	struct umtxq_chain *uc;
444	struct umtxq_queue *uh;
445
446	*first = NULL;
447	uc = umtxq_getchain(key);
448	UMTXQ_LOCKED_ASSERT(uc);
449	uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
450	if (uh != NULL) {
451		*first = TAILQ_FIRST(&uh->head);
452		return (uh->length);
453	}
454	return (0);
455}
456
457/*
458 * Wake up threads waiting on an userland object.
459 */
460
461static int
462umtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
463{
464	struct umtxq_chain *uc;
465	struct umtxq_queue *uh;
466	struct umtx_q *uq;
467	int ret;
468
469	ret = 0;
470	uc = umtxq_getchain(key);
471	UMTXQ_LOCKED_ASSERT(uc);
472	uh = umtxq_queue_lookup(key, q);
473	if (uh != NULL) {
474		while ((uq = TAILQ_FIRST(&uh->head)) != NULL) {
475			umtxq_remove_queue(uq, q);
476			wakeup(uq);
477			if (++ret >= n_wake)
478				return (ret);
479		}
480	}
481	return (ret);
482}
483
484
485/*
486 * Wake up specified thread.
487 */
488static inline void
489umtxq_signal_thread(struct umtx_q *uq)
490{
491	struct umtxq_chain *uc;
492
493	uc = umtxq_getchain(&uq->uq_key);
494	UMTXQ_LOCKED_ASSERT(uc);
495	umtxq_remove(uq);
496	wakeup(uq);
497}
498
499/*
500 * Put thread into sleep state, before sleeping, check if
501 * thread was removed from umtx queue.
502 */
503static inline int
504umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
505{
506	struct umtxq_chain *uc;
507	int error;
508
509	uc = umtxq_getchain(&uq->uq_key);
510	UMTXQ_LOCKED_ASSERT(uc);
511	if (!(uq->uq_flags & UQF_UMTXQ))
512		return (0);
513	error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
514	if (error == EWOULDBLOCK)
515		error = ETIMEDOUT;
516	return (error);
517}
518
519/*
520 * Convert userspace address into unique logical address.
521 */
522int
523umtx_key_get(void *addr, int type, int share, struct umtx_key *key)
524{
525	struct thread *td = curthread;
526	vm_map_t map;
527	vm_map_entry_t entry;
528	vm_pindex_t pindex;
529	vm_prot_t prot;
530	boolean_t wired;
531
532	key->type = type;
533	if (share == THREAD_SHARE) {
534		key->shared = 0;
535		key->info.private.vs = td->td_proc->p_vmspace;
536		key->info.private.addr = (uintptr_t)addr;
537	} else {
538		MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
539		map = &td->td_proc->p_vmspace->vm_map;
540		if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
541		    &entry, &key->info.shared.object, &pindex, &prot,
542		    &wired) != KERN_SUCCESS) {
543			return EFAULT;
544		}
545
546		if ((share == PROCESS_SHARE) ||
547		    (share == AUTO_SHARE &&
548		     VM_INHERIT_SHARE == entry->inheritance)) {
549			key->shared = 1;
550			key->info.shared.offset = entry->offset + entry->start -
551				(vm_offset_t)addr;
552			vm_object_reference(key->info.shared.object);
553		} else {
554			key->shared = 0;
555			key->info.private.vs = td->td_proc->p_vmspace;
556			key->info.private.addr = (uintptr_t)addr;
557		}
558		vm_map_lookup_done(map, entry);
559	}
560
561	umtxq_hash(key);
562	return (0);
563}
564
565/*
566 * Release key.
567 */
568void
569umtx_key_release(struct umtx_key *key)
570{
571	if (key->shared)
572		vm_object_deallocate(key->info.shared.object);
573}
574
575/*
576 * Lock a umtx object.
577 */
578static int
579_do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo)
580{
581	struct umtx_q *uq;
582	u_long owner;
583	u_long old;
584	int error = 0;
585
586	uq = td->td_umtxq;
587
588	/*
589	 * Care must be exercised when dealing with umtx structure. It
590	 * can fault on any access.
591	 */
592	for (;;) {
593		/*
594		 * Try the uncontested case.  This should be done in userland.
595		 */
596		owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
597
598		/* The acquire succeeded. */
599		if (owner == UMTX_UNOWNED)
600			return (0);
601
602		/* The address was invalid. */
603		if (owner == -1)
604			return (EFAULT);
605
606		/* If no one owns it but it is contested try to acquire it. */
607		if (owner == UMTX_CONTESTED) {
608			owner = casuword(&umtx->u_owner,
609			    UMTX_CONTESTED, id | UMTX_CONTESTED);
610
611			if (owner == UMTX_CONTESTED)
612				return (0);
613
614			/* The address was invalid. */
615			if (owner == -1)
616				return (EFAULT);
617
618			/* If this failed the lock has changed, restart. */
619			continue;
620		}
621
622		/*
623		 * If we caught a signal, we have retried and now
624		 * exit immediately.
625		 */
626		if (error != 0)
627			return (error);
628
629		if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
630			AUTO_SHARE, &uq->uq_key)) != 0)
631			return (error);
632
633		umtxq_lock(&uq->uq_key);
634		umtxq_busy(&uq->uq_key);
635		umtxq_insert(uq);
636		umtxq_unbusy(&uq->uq_key);
637		umtxq_unlock(&uq->uq_key);
638
639		/*
640		 * Set the contested bit so that a release in user space
641		 * knows to use the system call for unlock.  If this fails
642		 * either some one else has acquired the lock or it has been
643		 * released.
644		 */
645		old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
646
647		/* The address was invalid. */
648		if (old == -1) {
649			umtxq_lock(&uq->uq_key);
650			umtxq_remove(uq);
651			umtxq_unlock(&uq->uq_key);
652			umtx_key_release(&uq->uq_key);
653			return (EFAULT);
654		}
655
656		/*
657		 * We set the contested bit, sleep. Otherwise the lock changed
658		 * and we need to retry or we lost a race to the thread
659		 * unlocking the umtx.
660		 */
661		umtxq_lock(&uq->uq_key);
662		if (old == owner)
663			error = umtxq_sleep(uq, "umtx", timo);
664		umtxq_remove(uq);
665		umtxq_unlock(&uq->uq_key);
666		umtx_key_release(&uq->uq_key);
667	}
668
669	return (0);
670}
671
672/*
673 * Lock a umtx object.
674 */
675static int
676do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
677	struct timespec *timeout)
678{
679	struct timespec ts, ts2, ts3;
680	struct timeval tv;
681	int error;
682
683	if (timeout == NULL) {
684		error = _do_lock_umtx(td, umtx, id, 0);
685		/* Mutex locking is restarted if it is interrupted. */
686		if (error == EINTR)
687			error = ERESTART;
688	} else {
689		getnanouptime(&ts);
690		timespecadd(&ts, timeout);
691		TIMESPEC_TO_TIMEVAL(&tv, timeout);
692		for (;;) {
693			error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
694			if (error != ETIMEDOUT)
695				break;
696			getnanouptime(&ts2);
697			if (timespeccmp(&ts2, &ts, >=)) {
698				error = ETIMEDOUT;
699				break;
700			}
701			ts3 = ts;
702			timespecsub(&ts3, &ts2);
703			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
704		}
705		/* Timed-locking is not restarted. */
706		if (error == ERESTART)
707			error = EINTR;
708	}
709	return (error);
710}
711
712/*
713 * Unlock a umtx object.
714 */
715static int
716do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
717{
718	struct umtx_key key;
719	u_long owner;
720	u_long old;
721	int error;
722	int count;
723
724	/*
725	 * Make sure we own this mtx.
726	 */
727	owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
728	if (owner == -1)
729		return (EFAULT);
730
731	if ((owner & ~UMTX_CONTESTED) != id)
732		return (EPERM);
733
734	/* This should be done in userland */
735	if ((owner & UMTX_CONTESTED) == 0) {
736		old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
737		if (old == -1)
738			return (EFAULT);
739		if (old == owner)
740			return (0);
741		owner = old;
742	}
743
744	/* We should only ever be in here for contested locks */
745	if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
746		&key)) != 0)
747		return (error);
748
749	umtxq_lock(&key);
750	umtxq_busy(&key);
751	count = umtxq_count(&key);
752	umtxq_unlock(&key);
753
754	/*
755	 * When unlocking the umtx, it must be marked as unowned if
756	 * there is zero or one thread only waiting for it.
757	 * Otherwise, it must be marked as contested.
758	 */
759	old = casuword(&umtx->u_owner, owner,
760		count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
761	umtxq_lock(&key);
762	umtxq_signal(&key,1);
763	umtxq_unbusy(&key);
764	umtxq_unlock(&key);
765	umtx_key_release(&key);
766	if (old == -1)
767		return (EFAULT);
768	if (old != owner)
769		return (EINVAL);
770	return (0);
771}
772
773#ifdef COMPAT_FREEBSD32
774
775/*
776 * Lock a umtx object.
777 */
778static int
779_do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
780{
781	struct umtx_q *uq;
782	uint32_t owner;
783	uint32_t old;
784	int error = 0;
785
786	uq = td->td_umtxq;
787
788	/*
789	 * Care must be exercised when dealing with umtx structure. It
790	 * can fault on any access.
791	 */
792	for (;;) {
793		/*
794		 * Try the uncontested case.  This should be done in userland.
795		 */
796		owner = casuword32(m, UMUTEX_UNOWNED, id);
797
798		/* The acquire succeeded. */
799		if (owner == UMUTEX_UNOWNED)
800			return (0);
801
802		/* The address was invalid. */
803		if (owner == -1)
804			return (EFAULT);
805
806		/* If no one owns it but it is contested try to acquire it. */
807		if (owner == UMUTEX_CONTESTED) {
808			owner = casuword32(m,
809			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
810			if (owner == UMUTEX_CONTESTED)
811				return (0);
812
813			/* The address was invalid. */
814			if (owner == -1)
815				return (EFAULT);
816
817			/* If this failed the lock has changed, restart. */
818			continue;
819		}
820
821		/*
822		 * If we caught a signal, we have retried and now
823		 * exit immediately.
824		 */
825		if (error != 0)
826			return (error);
827
828		if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
829			AUTO_SHARE, &uq->uq_key)) != 0)
830			return (error);
831
832		umtxq_lock(&uq->uq_key);
833		umtxq_busy(&uq->uq_key);
834		umtxq_insert(uq);
835		umtxq_unbusy(&uq->uq_key);
836		umtxq_unlock(&uq->uq_key);
837
838		/*
839		 * Set the contested bit so that a release in user space
840		 * knows to use the system call for unlock.  If this fails
841		 * either some one else has acquired the lock or it has been
842		 * released.
843		 */
844		old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
845
846		/* The address was invalid. */
847		if (old == -1) {
848			umtxq_lock(&uq->uq_key);
849			umtxq_remove(uq);
850			umtxq_unlock(&uq->uq_key);
851			umtx_key_release(&uq->uq_key);
852			return (EFAULT);
853		}
854
855		/*
856		 * We set the contested bit, sleep. Otherwise the lock changed
857		 * and we need to retry or we lost a race to the thread
858		 * unlocking the umtx.
859		 */
860		umtxq_lock(&uq->uq_key);
861		if (old == owner)
862			error = umtxq_sleep(uq, "umtx", timo);
863		umtxq_remove(uq);
864		umtxq_unlock(&uq->uq_key);
865		umtx_key_release(&uq->uq_key);
866	}
867
868	return (0);
869}
870
871/*
872 * Lock a umtx object.
873 */
874static int
875do_lock_umtx32(struct thread *td, void *m, uint32_t id,
876	struct timespec *timeout)
877{
878	struct timespec ts, ts2, ts3;
879	struct timeval tv;
880	int error;
881
882	if (timeout == NULL) {
883		error = _do_lock_umtx32(td, m, id, 0);
884		/* Mutex locking is restarted if it is interrupted. */
885		if (error == EINTR)
886			error = ERESTART;
887	} else {
888		getnanouptime(&ts);
889		timespecadd(&ts, timeout);
890		TIMESPEC_TO_TIMEVAL(&tv, timeout);
891		for (;;) {
892			error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
893			if (error != ETIMEDOUT)
894				break;
895			getnanouptime(&ts2);
896			if (timespeccmp(&ts2, &ts, >=)) {
897				error = ETIMEDOUT;
898				break;
899			}
900			ts3 = ts;
901			timespecsub(&ts3, &ts2);
902			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
903		}
904		/* Timed-locking is not restarted. */
905		if (error == ERESTART)
906			error = EINTR;
907	}
908	return (error);
909}
910
911/*
912 * Unlock a umtx object.
913 */
914static int
915do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
916{
917	struct umtx_key key;
918	uint32_t owner;
919	uint32_t old;
920	int error;
921	int count;
922
923	/*
924	 * Make sure we own this mtx.
925	 */
926	owner = fuword32(m);
927	if (owner == -1)
928		return (EFAULT);
929
930	if ((owner & ~UMUTEX_CONTESTED) != id)
931		return (EPERM);
932
933	/* This should be done in userland */
934	if ((owner & UMUTEX_CONTESTED) == 0) {
935		old = casuword32(m, owner, UMUTEX_UNOWNED);
936		if (old == -1)
937			return (EFAULT);
938		if (old == owner)
939			return (0);
940		owner = old;
941	}
942
943	/* We should only ever be in here for contested locks */
944	if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
945		&key)) != 0)
946		return (error);
947
948	umtxq_lock(&key);
949	umtxq_busy(&key);
950	count = umtxq_count(&key);
951	umtxq_unlock(&key);
952
953	/*
954	 * When unlocking the umtx, it must be marked as unowned if
955	 * there is zero or one thread only waiting for it.
956	 * Otherwise, it must be marked as contested.
957	 */
958	old = casuword32(m, owner,
959		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
960	umtxq_lock(&key);
961	umtxq_signal(&key,1);
962	umtxq_unbusy(&key);
963	umtxq_unlock(&key);
964	umtx_key_release(&key);
965	if (old == -1)
966		return (EFAULT);
967	if (old != owner)
968		return (EINVAL);
969	return (0);
970}
971#endif
972
973static inline int
974tstohz(const struct timespec *tsp)
975{
976	struct timeval tv;
977
978	TIMESPEC_TO_TIMEVAL(&tv, tsp);
979	return tvtohz(&tv);
980}
981
982/*
983 * Fetch and compare value, sleep on the address if value is not changed.
984 */
985static int
986do_wait(struct thread *td, void *addr, u_long id,
987	struct _umtx_time *timeout, int compat32, int is_private)
988{
989	struct umtx_q *uq;
990	struct timespec ets, cts, tts;
991	u_long tmp;
992	int error = 0;
993
994	uq = td->td_umtxq;
995	if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
996		is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
997		return (error);
998
999	umtxq_lock(&uq->uq_key);
1000	umtxq_insert(uq);
1001	umtxq_unlock(&uq->uq_key);
1002	if (compat32 == 0)
1003		tmp = fuword(addr);
1004        else
1005		tmp = (unsigned int)fuword32(addr);
1006	if (tmp != id) {
1007		umtxq_lock(&uq->uq_key);
1008		umtxq_remove(uq);
1009		umtxq_unlock(&uq->uq_key);
1010	} else if (timeout == NULL) {
1011		umtxq_lock(&uq->uq_key);
1012		error = umtxq_sleep(uq, "uwait", 0);
1013		umtxq_remove(uq);
1014		umtxq_unlock(&uq->uq_key);
1015	} else {
1016		kern_clock_gettime(td, timeout->_clockid, &cts);
1017		if ((timeout->_flags & UMTX_ABSTIME) == 0) {
1018			ets = cts;
1019			timespecadd(&ets, &timeout->_timeout);
1020		} else {
1021			ets = timeout->_timeout;
1022		}
1023		umtxq_lock(&uq->uq_key);
1024		for (;;) {
1025			if (timespeccmp(&cts, &ets, >=)) {
1026				error = ETIMEDOUT;
1027				break;
1028			}
1029			tts = ets;
1030			timespecsub(&tts, &cts);
1031			error = umtxq_sleep(uq, "uwait", tstohz(&tts));
1032			if (!(uq->uq_flags & UQF_UMTXQ)) {
1033				error = 0;
1034				break;
1035			}
1036			if (error != ETIMEDOUT)
1037				break;
1038			umtxq_unlock(&uq->uq_key);
1039			kern_clock_gettime(td, timeout->_clockid, &cts);
1040			umtxq_lock(&uq->uq_key);
1041		}
1042		umtxq_remove(uq);
1043		umtxq_unlock(&uq->uq_key);
1044	}
1045	umtx_key_release(&uq->uq_key);
1046	if (error == ERESTART)
1047		error = EINTR;
1048	return (error);
1049}
1050
1051/*
1052 * Wake up threads sleeping on the specified address.
1053 */
1054int
1055kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
1056{
1057	struct umtx_key key;
1058	int ret;
1059
1060	if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
1061		is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
1062		return (ret);
1063	umtxq_lock(&key);
1064	ret = umtxq_signal(&key, n_wake);
1065	umtxq_unlock(&key);
1066	umtx_key_release(&key);
1067	return (0);
1068}
1069
1070/*
1071 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1072 */
1073static int
1074_do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1075	int mode)
1076{
1077	struct umtx_q *uq;
1078	uint32_t owner, old, id;
1079	int error = 0;
1080
1081	id = td->td_tid;
1082	uq = td->td_umtxq;
1083
1084	/*
1085	 * Care must be exercised when dealing with umtx structure. It
1086	 * can fault on any access.
1087	 */
1088	for (;;) {
1089		owner = fuword32(__DEVOLATILE(void *, &m->m_owner));
1090		if (mode == _UMUTEX_WAIT) {
1091			if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED)
1092				return (0);
1093		} else {
1094			/*
1095			 * Try the uncontested case.  This should be done in userland.
1096			 */
1097			owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1098
1099			/* The acquire succeeded. */
1100			if (owner == UMUTEX_UNOWNED)
1101				return (0);
1102
1103			/* The address was invalid. */
1104			if (owner == -1)
1105				return (EFAULT);
1106
1107			/* If no one owns it but it is contested try to acquire it. */
1108			if (owner == UMUTEX_CONTESTED) {
1109				owner = casuword32(&m->m_owner,
1110				    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1111
1112				if (owner == UMUTEX_CONTESTED)
1113					return (0);
1114
1115				/* The address was invalid. */
1116				if (owner == -1)
1117					return (EFAULT);
1118
1119				/* If this failed the lock has changed, restart. */
1120				continue;
1121			}
1122		}
1123
1124		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1125		    (owner & ~UMUTEX_CONTESTED) == id)
1126			return (EDEADLK);
1127
1128		if (mode == _UMUTEX_TRY)
1129			return (EBUSY);
1130
1131		/*
1132		 * If we caught a signal, we have retried and now
1133		 * exit immediately.
1134		 */
1135		if (error != 0)
1136			return (error);
1137
1138		if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1139		    GET_SHARE(flags), &uq->uq_key)) != 0)
1140			return (error);
1141
1142		umtxq_lock(&uq->uq_key);
1143		umtxq_busy(&uq->uq_key);
1144		umtxq_insert(uq);
1145		umtxq_unlock(&uq->uq_key);
1146
1147		/*
1148		 * Set the contested bit so that a release in user space
1149		 * knows to use the system call for unlock.  If this fails
1150		 * either some one else has acquired the lock or it has been
1151		 * released.
1152		 */
1153		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1154
1155		/* The address was invalid. */
1156		if (old == -1) {
1157			umtxq_lock(&uq->uq_key);
1158			umtxq_remove(uq);
1159			umtxq_unbusy(&uq->uq_key);
1160			umtxq_unlock(&uq->uq_key);
1161			umtx_key_release(&uq->uq_key);
1162			return (EFAULT);
1163		}
1164
1165		/*
1166		 * We set the contested bit, sleep. Otherwise the lock changed
1167		 * and we need to retry or we lost a race to the thread
1168		 * unlocking the umtx.
1169		 */
1170		umtxq_lock(&uq->uq_key);
1171		umtxq_unbusy(&uq->uq_key);
1172		if (old == owner)
1173			error = umtxq_sleep(uq, "umtxn", timo);
1174		umtxq_remove(uq);
1175		umtxq_unlock(&uq->uq_key);
1176		umtx_key_release(&uq->uq_key);
1177	}
1178
1179	return (0);
1180}
1181
1182/*
1183 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1184 */
1185/*
1186 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1187 */
1188static int
1189do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
1190{
1191	struct umtx_key key;
1192	uint32_t owner, old, id;
1193	int error;
1194	int count;
1195
1196	id = td->td_tid;
1197	/*
1198	 * Make sure we own this mtx.
1199	 */
1200	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1201	if (owner == -1)
1202		return (EFAULT);
1203
1204	if ((owner & ~UMUTEX_CONTESTED) != id)
1205		return (EPERM);
1206
1207	if ((owner & UMUTEX_CONTESTED) == 0) {
1208		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1209		if (old == -1)
1210			return (EFAULT);
1211		if (old == owner)
1212			return (0);
1213		owner = old;
1214	}
1215
1216	/* We should only ever be in here for contested locks */
1217	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1218	    &key)) != 0)
1219		return (error);
1220
1221	umtxq_lock(&key);
1222	umtxq_busy(&key);
1223	count = umtxq_count(&key);
1224	umtxq_unlock(&key);
1225
1226	/*
1227	 * When unlocking the umtx, it must be marked as unowned if
1228	 * there is zero or one thread only waiting for it.
1229	 * Otherwise, it must be marked as contested.
1230	 */
1231	old = casuword32(&m->m_owner, owner,
1232		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1233	umtxq_lock(&key);
1234	umtxq_signal(&key,1);
1235	umtxq_unbusy(&key);
1236	umtxq_unlock(&key);
1237	umtx_key_release(&key);
1238	if (old == -1)
1239		return (EFAULT);
1240	if (old != owner)
1241		return (EINVAL);
1242	return (0);
1243}
1244
1245/*
1246 * Check if the mutex is available and wake up a waiter,
1247 * only for simple mutex.
1248 */
1249static int
1250do_wake_umutex(struct thread *td, struct umutex *m)
1251{
1252	struct umtx_key key;
1253	uint32_t owner;
1254	uint32_t flags;
1255	int error;
1256	int count;
1257
1258	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1259	if (owner == -1)
1260		return (EFAULT);
1261
1262	if ((owner & ~UMUTEX_CONTESTED) != 0)
1263		return (0);
1264
1265	flags = fuword32(&m->m_flags);
1266
1267	/* We should only ever be in here for contested locks */
1268	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1269	    &key)) != 0)
1270		return (error);
1271
1272	umtxq_lock(&key);
1273	umtxq_busy(&key);
1274	count = umtxq_count(&key);
1275	umtxq_unlock(&key);
1276
1277	if (count <= 1)
1278		owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED);
1279
1280	umtxq_lock(&key);
1281	if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
1282		umtxq_signal(&key, 1);
1283	umtxq_unbusy(&key);
1284	umtxq_unlock(&key);
1285	umtx_key_release(&key);
1286	return (0);
1287}
1288
1289static inline struct umtx_pi *
1290umtx_pi_alloc(int flags)
1291{
1292	struct umtx_pi *pi;
1293
1294	pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1295	TAILQ_INIT(&pi->pi_blocked);
1296	atomic_add_int(&umtx_pi_allocated, 1);
1297	return (pi);
1298}
1299
1300static inline void
1301umtx_pi_free(struct umtx_pi *pi)
1302{
1303	uma_zfree(umtx_pi_zone, pi);
1304	atomic_add_int(&umtx_pi_allocated, -1);
1305}
1306
1307/*
1308 * Adjust the thread's position on a pi_state after its priority has been
1309 * changed.
1310 */
1311static int
1312umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1313{
1314	struct umtx_q *uq, *uq1, *uq2;
1315	struct thread *td1;
1316
1317	mtx_assert(&umtx_lock, MA_OWNED);
1318	if (pi == NULL)
1319		return (0);
1320
1321	uq = td->td_umtxq;
1322
1323	/*
1324	 * Check if the thread needs to be moved on the blocked chain.
1325	 * It needs to be moved if either its priority is lower than
1326	 * the previous thread or higher than the next thread.
1327	 */
1328	uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1329	uq2 = TAILQ_NEXT(uq, uq_lockq);
1330	if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1331	    (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1332		/*
1333		 * Remove thread from blocked chain and determine where
1334		 * it should be moved to.
1335		 */
1336		TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1337		TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1338			td1 = uq1->uq_thread;
1339			MPASS(td1->td_proc->p_magic == P_MAGIC);
1340			if (UPRI(td1) > UPRI(td))
1341				break;
1342		}
1343
1344		if (uq1 == NULL)
1345			TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1346		else
1347			TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1348	}
1349	return (1);
1350}
1351
1352/*
1353 * Propagate priority when a thread is blocked on POSIX
1354 * PI mutex.
1355 */
1356static void
1357umtx_propagate_priority(struct thread *td)
1358{
1359	struct umtx_q *uq;
1360	struct umtx_pi *pi;
1361	int pri;
1362
1363	mtx_assert(&umtx_lock, MA_OWNED);
1364	pri = UPRI(td);
1365	uq = td->td_umtxq;
1366	pi = uq->uq_pi_blocked;
1367	if (pi == NULL)
1368		return;
1369
1370	for (;;) {
1371		td = pi->pi_owner;
1372		if (td == NULL || td == curthread)
1373			return;
1374
1375		MPASS(td->td_proc != NULL);
1376		MPASS(td->td_proc->p_magic == P_MAGIC);
1377
1378		thread_lock(td);
1379		if (td->td_lend_user_pri > pri)
1380			sched_lend_user_prio(td, pri);
1381		else {
1382			thread_unlock(td);
1383			break;
1384		}
1385		thread_unlock(td);
1386
1387		/*
1388		 * Pick up the lock that td is blocked on.
1389		 */
1390		uq = td->td_umtxq;
1391		pi = uq->uq_pi_blocked;
1392		if (pi == NULL)
1393			break;
1394		/* Resort td on the list if needed. */
1395		umtx_pi_adjust_thread(pi, td);
1396	}
1397}
1398
1399/*
1400 * Unpropagate priority for a PI mutex when a thread blocked on
1401 * it is interrupted by signal or resumed by others.
1402 */
1403static void
1404umtx_repropagate_priority(struct umtx_pi *pi)
1405{
1406	struct umtx_q *uq, *uq_owner;
1407	struct umtx_pi *pi2;
1408	int pri;
1409
1410	mtx_assert(&umtx_lock, MA_OWNED);
1411
1412	while (pi != NULL && pi->pi_owner != NULL) {
1413		pri = PRI_MAX;
1414		uq_owner = pi->pi_owner->td_umtxq;
1415
1416		TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1417			uq = TAILQ_FIRST(&pi2->pi_blocked);
1418			if (uq != NULL) {
1419				if (pri > UPRI(uq->uq_thread))
1420					pri = UPRI(uq->uq_thread);
1421			}
1422		}
1423
1424		if (pri > uq_owner->uq_inherited_pri)
1425			pri = uq_owner->uq_inherited_pri;
1426		thread_lock(pi->pi_owner);
1427		sched_lend_user_prio(pi->pi_owner, pri);
1428		thread_unlock(pi->pi_owner);
1429		if ((pi = uq_owner->uq_pi_blocked) != NULL)
1430			umtx_pi_adjust_thread(pi, uq_owner->uq_thread);
1431	}
1432}
1433
1434/*
1435 * Insert a PI mutex into owned list.
1436 */
1437static void
1438umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1439{
1440	struct umtx_q *uq_owner;
1441
1442	uq_owner = owner->td_umtxq;
1443	mtx_assert(&umtx_lock, MA_OWNED);
1444	if (pi->pi_owner != NULL)
1445		panic("pi_ower != NULL");
1446	pi->pi_owner = owner;
1447	TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1448}
1449
1450/*
1451 * Claim ownership of a PI mutex.
1452 */
1453static int
1454umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1455{
1456	struct umtx_q *uq, *uq_owner;
1457
1458	uq_owner = owner->td_umtxq;
1459	mtx_lock_spin(&umtx_lock);
1460	if (pi->pi_owner == owner) {
1461		mtx_unlock_spin(&umtx_lock);
1462		return (0);
1463	}
1464
1465	if (pi->pi_owner != NULL) {
1466		/*
1467		 * userland may have already messed the mutex, sigh.
1468		 */
1469		mtx_unlock_spin(&umtx_lock);
1470		return (EPERM);
1471	}
1472	umtx_pi_setowner(pi, owner);
1473	uq = TAILQ_FIRST(&pi->pi_blocked);
1474	if (uq != NULL) {
1475		int pri;
1476
1477		pri = UPRI(uq->uq_thread);
1478		thread_lock(owner);
1479		if (pri < UPRI(owner))
1480			sched_lend_user_prio(owner, pri);
1481		thread_unlock(owner);
1482	}
1483	mtx_unlock_spin(&umtx_lock);
1484	return (0);
1485}
1486
1487/*
1488 * Adjust a thread's order position in its blocked PI mutex,
1489 * this may result new priority propagating process.
1490 */
1491void
1492umtx_pi_adjust(struct thread *td, u_char oldpri)
1493{
1494	struct umtx_q *uq;
1495	struct umtx_pi *pi;
1496
1497	uq = td->td_umtxq;
1498	mtx_lock_spin(&umtx_lock);
1499	/*
1500	 * Pick up the lock that td is blocked on.
1501	 */
1502	pi = uq->uq_pi_blocked;
1503	if (pi != NULL) {
1504		umtx_pi_adjust_thread(pi, td);
1505		umtx_repropagate_priority(pi);
1506	}
1507	mtx_unlock_spin(&umtx_lock);
1508}
1509
1510/*
1511 * Sleep on a PI mutex.
1512 */
1513static int
1514umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1515	uint32_t owner, const char *wmesg, int timo)
1516{
1517	struct umtxq_chain *uc;
1518	struct thread *td, *td1;
1519	struct umtx_q *uq1;
1520	int pri;
1521	int error = 0;
1522
1523	td = uq->uq_thread;
1524	KASSERT(td == curthread, ("inconsistent uq_thread"));
1525	uc = umtxq_getchain(&uq->uq_key);
1526	UMTXQ_LOCKED_ASSERT(uc);
1527	UMTXQ_BUSY_ASSERT(uc);
1528	umtxq_insert(uq);
1529	mtx_lock_spin(&umtx_lock);
1530	if (pi->pi_owner == NULL) {
1531		mtx_unlock_spin(&umtx_lock);
1532		/* XXX Only look up thread in current process. */
1533		td1 = tdfind(owner, curproc->p_pid);
1534		mtx_lock_spin(&umtx_lock);
1535		if (td1 != NULL) {
1536			if (pi->pi_owner == NULL)
1537				umtx_pi_setowner(pi, td1);
1538			PROC_UNLOCK(td1->td_proc);
1539		}
1540	}
1541
1542	TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1543		pri = UPRI(uq1->uq_thread);
1544		if (pri > UPRI(td))
1545			break;
1546	}
1547
1548	if (uq1 != NULL)
1549		TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1550	else
1551		TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1552
1553	uq->uq_pi_blocked = pi;
1554	thread_lock(td);
1555	td->td_flags |= TDF_UPIBLOCKED;
1556	thread_unlock(td);
1557	umtx_propagate_priority(td);
1558	mtx_unlock_spin(&umtx_lock);
1559	umtxq_unbusy(&uq->uq_key);
1560
1561	if (uq->uq_flags & UQF_UMTXQ) {
1562		error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
1563		if (error == EWOULDBLOCK)
1564			error = ETIMEDOUT;
1565		if (uq->uq_flags & UQF_UMTXQ) {
1566			umtxq_remove(uq);
1567		}
1568	}
1569	mtx_lock_spin(&umtx_lock);
1570	uq->uq_pi_blocked = NULL;
1571	thread_lock(td);
1572	td->td_flags &= ~TDF_UPIBLOCKED;
1573	thread_unlock(td);
1574	TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1575	umtx_repropagate_priority(pi);
1576	mtx_unlock_spin(&umtx_lock);
1577	umtxq_unlock(&uq->uq_key);
1578
1579	return (error);
1580}
1581
1582/*
1583 * Add reference count for a PI mutex.
1584 */
1585static void
1586umtx_pi_ref(struct umtx_pi *pi)
1587{
1588	struct umtxq_chain *uc;
1589
1590	uc = umtxq_getchain(&pi->pi_key);
1591	UMTXQ_LOCKED_ASSERT(uc);
1592	pi->pi_refcount++;
1593}
1594
1595/*
1596 * Decrease reference count for a PI mutex, if the counter
1597 * is decreased to zero, its memory space is freed.
1598 */
1599static void
1600umtx_pi_unref(struct umtx_pi *pi)
1601{
1602	struct umtxq_chain *uc;
1603
1604	uc = umtxq_getchain(&pi->pi_key);
1605	UMTXQ_LOCKED_ASSERT(uc);
1606	KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1607	if (--pi->pi_refcount == 0) {
1608		mtx_lock_spin(&umtx_lock);
1609		if (pi->pi_owner != NULL) {
1610			TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
1611				pi, pi_link);
1612			pi->pi_owner = NULL;
1613		}
1614		KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1615			("blocked queue not empty"));
1616		mtx_unlock_spin(&umtx_lock);
1617		TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1618		umtx_pi_free(pi);
1619	}
1620}
1621
1622/*
1623 * Find a PI mutex in hash table.
1624 */
1625static struct umtx_pi *
1626umtx_pi_lookup(struct umtx_key *key)
1627{
1628	struct umtxq_chain *uc;
1629	struct umtx_pi *pi;
1630
1631	uc = umtxq_getchain(key);
1632	UMTXQ_LOCKED_ASSERT(uc);
1633
1634	TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1635		if (umtx_key_match(&pi->pi_key, key)) {
1636			return (pi);
1637		}
1638	}
1639	return (NULL);
1640}
1641
1642/*
1643 * Insert a PI mutex into hash table.
1644 */
1645static inline void
1646umtx_pi_insert(struct umtx_pi *pi)
1647{
1648	struct umtxq_chain *uc;
1649
1650	uc = umtxq_getchain(&pi->pi_key);
1651	UMTXQ_LOCKED_ASSERT(uc);
1652	TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1653}
1654
1655/*
1656 * Lock a PI mutex.
1657 */
1658static int
1659_do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1660	int try)
1661{
1662	struct umtx_q *uq;
1663	struct umtx_pi *pi, *new_pi;
1664	uint32_t id, owner, old;
1665	int error;
1666
1667	id = td->td_tid;
1668	uq = td->td_umtxq;
1669
1670	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1671	    &uq->uq_key)) != 0)
1672		return (error);
1673	umtxq_lock(&uq->uq_key);
1674	pi = umtx_pi_lookup(&uq->uq_key);
1675	if (pi == NULL) {
1676		new_pi = umtx_pi_alloc(M_NOWAIT);
1677		if (new_pi == NULL) {
1678			umtxq_unlock(&uq->uq_key);
1679			new_pi = umtx_pi_alloc(M_WAITOK);
1680			umtxq_lock(&uq->uq_key);
1681			pi = umtx_pi_lookup(&uq->uq_key);
1682			if (pi != NULL) {
1683				umtx_pi_free(new_pi);
1684				new_pi = NULL;
1685			}
1686		}
1687		if (new_pi != NULL) {
1688			new_pi->pi_key = uq->uq_key;
1689			umtx_pi_insert(new_pi);
1690			pi = new_pi;
1691		}
1692	}
1693	umtx_pi_ref(pi);
1694	umtxq_unlock(&uq->uq_key);
1695
1696	/*
1697	 * Care must be exercised when dealing with umtx structure.  It
1698	 * can fault on any access.
1699	 */
1700	for (;;) {
1701		/*
1702		 * Try the uncontested case.  This should be done in userland.
1703		 */
1704		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1705
1706		/* The acquire succeeded. */
1707		if (owner == UMUTEX_UNOWNED) {
1708			error = 0;
1709			break;
1710		}
1711
1712		/* The address was invalid. */
1713		if (owner == -1) {
1714			error = EFAULT;
1715			break;
1716		}
1717
1718		/* If no one owns it but it is contested try to acquire it. */
1719		if (owner == UMUTEX_CONTESTED) {
1720			owner = casuword32(&m->m_owner,
1721			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1722
1723			if (owner == UMUTEX_CONTESTED) {
1724				umtxq_lock(&uq->uq_key);
1725				umtxq_busy(&uq->uq_key);
1726				error = umtx_pi_claim(pi, td);
1727				umtxq_unbusy(&uq->uq_key);
1728				umtxq_unlock(&uq->uq_key);
1729				break;
1730			}
1731
1732			/* The address was invalid. */
1733			if (owner == -1) {
1734				error = EFAULT;
1735				break;
1736			}
1737
1738			/* If this failed the lock has changed, restart. */
1739			continue;
1740		}
1741
1742		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1743		    (owner & ~UMUTEX_CONTESTED) == id) {
1744			error = EDEADLK;
1745			break;
1746		}
1747
1748		if (try != 0) {
1749			error = EBUSY;
1750			break;
1751		}
1752
1753		/*
1754		 * If we caught a signal, we have retried and now
1755		 * exit immediately.
1756		 */
1757		if (error != 0)
1758			break;
1759
1760		umtxq_lock(&uq->uq_key);
1761		umtxq_busy(&uq->uq_key);
1762		umtxq_unlock(&uq->uq_key);
1763
1764		/*
1765		 * Set the contested bit so that a release in user space
1766		 * knows to use the system call for unlock.  If this fails
1767		 * either some one else has acquired the lock or it has been
1768		 * released.
1769		 */
1770		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1771
1772		/* The address was invalid. */
1773		if (old == -1) {
1774			umtxq_lock(&uq->uq_key);
1775			umtxq_unbusy(&uq->uq_key);
1776			umtxq_unlock(&uq->uq_key);
1777			error = EFAULT;
1778			break;
1779		}
1780
1781		umtxq_lock(&uq->uq_key);
1782		/*
1783		 * We set the contested bit, sleep. Otherwise the lock changed
1784		 * and we need to retry or we lost a race to the thread
1785		 * unlocking the umtx.
1786		 */
1787		if (old == owner)
1788			error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
1789				 "umtxpi", timo);
1790		else {
1791			umtxq_unbusy(&uq->uq_key);
1792			umtxq_unlock(&uq->uq_key);
1793		}
1794	}
1795
1796	umtxq_lock(&uq->uq_key);
1797	umtx_pi_unref(pi);
1798	umtxq_unlock(&uq->uq_key);
1799
1800	umtx_key_release(&uq->uq_key);
1801	return (error);
1802}
1803
1804/*
1805 * Unlock a PI mutex.
1806 */
1807static int
1808do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
1809{
1810	struct umtx_key key;
1811	struct umtx_q *uq_first, *uq_first2, *uq_me;
1812	struct umtx_pi *pi, *pi2;
1813	uint32_t owner, old, id;
1814	int error;
1815	int count;
1816	int pri;
1817
1818	id = td->td_tid;
1819	/*
1820	 * Make sure we own this mtx.
1821	 */
1822	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1823	if (owner == -1)
1824		return (EFAULT);
1825
1826	if ((owner & ~UMUTEX_CONTESTED) != id)
1827		return (EPERM);
1828
1829	/* This should be done in userland */
1830	if ((owner & UMUTEX_CONTESTED) == 0) {
1831		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1832		if (old == -1)
1833			return (EFAULT);
1834		if (old == owner)
1835			return (0);
1836		owner = old;
1837	}
1838
1839	/* We should only ever be in here for contested locks */
1840	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1841	    &key)) != 0)
1842		return (error);
1843
1844	umtxq_lock(&key);
1845	umtxq_busy(&key);
1846	count = umtxq_count_pi(&key, &uq_first);
1847	if (uq_first != NULL) {
1848		mtx_lock_spin(&umtx_lock);
1849		pi = uq_first->uq_pi_blocked;
1850		KASSERT(pi != NULL, ("pi == NULL?"));
1851		if (pi->pi_owner != curthread) {
1852			mtx_unlock_spin(&umtx_lock);
1853			umtxq_unbusy(&key);
1854			umtxq_unlock(&key);
1855			umtx_key_release(&key);
1856			/* userland messed the mutex */
1857			return (EPERM);
1858		}
1859		uq_me = curthread->td_umtxq;
1860		pi->pi_owner = NULL;
1861		TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
1862		/* get highest priority thread which is still sleeping. */
1863		uq_first = TAILQ_FIRST(&pi->pi_blocked);
1864		while (uq_first != NULL &&
1865		       (uq_first->uq_flags & UQF_UMTXQ) == 0) {
1866			uq_first = TAILQ_NEXT(uq_first, uq_lockq);
1867		}
1868		pri = PRI_MAX;
1869		TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
1870			uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
1871			if (uq_first2 != NULL) {
1872				if (pri > UPRI(uq_first2->uq_thread))
1873					pri = UPRI(uq_first2->uq_thread);
1874			}
1875		}
1876		thread_lock(curthread);
1877		sched_lend_user_prio(curthread, pri);
1878		thread_unlock(curthread);
1879		mtx_unlock_spin(&umtx_lock);
1880		if (uq_first)
1881			umtxq_signal_thread(uq_first);
1882	}
1883	umtxq_unlock(&key);
1884
1885	/*
1886	 * When unlocking the umtx, it must be marked as unowned if
1887	 * there is zero or one thread only waiting for it.
1888	 * Otherwise, it must be marked as contested.
1889	 */
1890	old = casuword32(&m->m_owner, owner,
1891		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1892
1893	umtxq_lock(&key);
1894	umtxq_unbusy(&key);
1895	umtxq_unlock(&key);
1896	umtx_key_release(&key);
1897	if (old == -1)
1898		return (EFAULT);
1899	if (old != owner)
1900		return (EINVAL);
1901	return (0);
1902}
1903
1904/*
1905 * Lock a PP mutex.
1906 */
1907static int
1908_do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1909	int try)
1910{
1911	struct umtx_q *uq, *uq2;
1912	struct umtx_pi *pi;
1913	uint32_t ceiling;
1914	uint32_t owner, id;
1915	int error, pri, old_inherited_pri, su;
1916
1917	id = td->td_tid;
1918	uq = td->td_umtxq;
1919	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1920	    &uq->uq_key)) != 0)
1921		return (error);
1922	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
1923	for (;;) {
1924		old_inherited_pri = uq->uq_inherited_pri;
1925		umtxq_lock(&uq->uq_key);
1926		umtxq_busy(&uq->uq_key);
1927		umtxq_unlock(&uq->uq_key);
1928
1929		ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
1930		if (ceiling > RTP_PRIO_MAX) {
1931			error = EINVAL;
1932			goto out;
1933		}
1934
1935		mtx_lock_spin(&umtx_lock);
1936		if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
1937			mtx_unlock_spin(&umtx_lock);
1938			error = EINVAL;
1939			goto out;
1940		}
1941		if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
1942			uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
1943			thread_lock(td);
1944			if (uq->uq_inherited_pri < UPRI(td))
1945				sched_lend_user_prio(td, uq->uq_inherited_pri);
1946			thread_unlock(td);
1947		}
1948		mtx_unlock_spin(&umtx_lock);
1949
1950		owner = casuword32(&m->m_owner,
1951		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1952
1953		if (owner == UMUTEX_CONTESTED) {
1954			error = 0;
1955			break;
1956		}
1957
1958		/* The address was invalid. */
1959		if (owner == -1) {
1960			error = EFAULT;
1961			break;
1962		}
1963
1964		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1965		    (owner & ~UMUTEX_CONTESTED) == id) {
1966			error = EDEADLK;
1967			break;
1968		}
1969
1970		if (try != 0) {
1971			error = EBUSY;
1972			break;
1973		}
1974
1975		/*
1976		 * If we caught a signal, we have retried and now
1977		 * exit immediately.
1978		 */
1979		if (error != 0)
1980			break;
1981
1982		umtxq_lock(&uq->uq_key);
1983		umtxq_insert(uq);
1984		umtxq_unbusy(&uq->uq_key);
1985		error = umtxq_sleep(uq, "umtxpp", timo);
1986		umtxq_remove(uq);
1987		umtxq_unlock(&uq->uq_key);
1988
1989		mtx_lock_spin(&umtx_lock);
1990		uq->uq_inherited_pri = old_inherited_pri;
1991		pri = PRI_MAX;
1992		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1993			uq2 = TAILQ_FIRST(&pi->pi_blocked);
1994			if (uq2 != NULL) {
1995				if (pri > UPRI(uq2->uq_thread))
1996					pri = UPRI(uq2->uq_thread);
1997			}
1998		}
1999		if (pri > uq->uq_inherited_pri)
2000			pri = uq->uq_inherited_pri;
2001		thread_lock(td);
2002		sched_lend_user_prio(td, pri);
2003		thread_unlock(td);
2004		mtx_unlock_spin(&umtx_lock);
2005	}
2006
2007	if (error != 0) {
2008		mtx_lock_spin(&umtx_lock);
2009		uq->uq_inherited_pri = old_inherited_pri;
2010		pri = PRI_MAX;
2011		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2012			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2013			if (uq2 != NULL) {
2014				if (pri > UPRI(uq2->uq_thread))
2015					pri = UPRI(uq2->uq_thread);
2016			}
2017		}
2018		if (pri > uq->uq_inherited_pri)
2019			pri = uq->uq_inherited_pri;
2020		thread_lock(td);
2021		sched_lend_user_prio(td, pri);
2022		thread_unlock(td);
2023		mtx_unlock_spin(&umtx_lock);
2024	}
2025
2026out:
2027	umtxq_lock(&uq->uq_key);
2028	umtxq_unbusy(&uq->uq_key);
2029	umtxq_unlock(&uq->uq_key);
2030	umtx_key_release(&uq->uq_key);
2031	return (error);
2032}
2033
2034/*
2035 * Unlock a PP mutex.
2036 */
2037static int
2038do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
2039{
2040	struct umtx_key key;
2041	struct umtx_q *uq, *uq2;
2042	struct umtx_pi *pi;
2043	uint32_t owner, id;
2044	uint32_t rceiling;
2045	int error, pri, new_inherited_pri, su;
2046
2047	id = td->td_tid;
2048	uq = td->td_umtxq;
2049	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2050
2051	/*
2052	 * Make sure we own this mtx.
2053	 */
2054	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
2055	if (owner == -1)
2056		return (EFAULT);
2057
2058	if ((owner & ~UMUTEX_CONTESTED) != id)
2059		return (EPERM);
2060
2061	error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
2062	if (error != 0)
2063		return (error);
2064
2065	if (rceiling == -1)
2066		new_inherited_pri = PRI_MAX;
2067	else {
2068		rceiling = RTP_PRIO_MAX - rceiling;
2069		if (rceiling > RTP_PRIO_MAX)
2070			return (EINVAL);
2071		new_inherited_pri = PRI_MIN_REALTIME + rceiling;
2072	}
2073
2074	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2075	    &key)) != 0)
2076		return (error);
2077	umtxq_lock(&key);
2078	umtxq_busy(&key);
2079	umtxq_unlock(&key);
2080	/*
2081	 * For priority protected mutex, always set unlocked state
2082	 * to UMUTEX_CONTESTED, so that userland always enters kernel
2083	 * to lock the mutex, it is necessary because thread priority
2084	 * has to be adjusted for such mutex.
2085	 */
2086	error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2087		UMUTEX_CONTESTED);
2088
2089	umtxq_lock(&key);
2090	if (error == 0)
2091		umtxq_signal(&key, 1);
2092	umtxq_unbusy(&key);
2093	umtxq_unlock(&key);
2094
2095	if (error == -1)
2096		error = EFAULT;
2097	else {
2098		mtx_lock_spin(&umtx_lock);
2099		if (su != 0)
2100			uq->uq_inherited_pri = new_inherited_pri;
2101		pri = PRI_MAX;
2102		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2103			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2104			if (uq2 != NULL) {
2105				if (pri > UPRI(uq2->uq_thread))
2106					pri = UPRI(uq2->uq_thread);
2107			}
2108		}
2109		if (pri > uq->uq_inherited_pri)
2110			pri = uq->uq_inherited_pri;
2111		thread_lock(td);
2112		sched_lend_user_prio(td, pri);
2113		thread_unlock(td);
2114		mtx_unlock_spin(&umtx_lock);
2115	}
2116	umtx_key_release(&key);
2117	return (error);
2118}
2119
2120static int
2121do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2122	uint32_t *old_ceiling)
2123{
2124	struct umtx_q *uq;
2125	uint32_t save_ceiling;
2126	uint32_t owner, id;
2127	uint32_t flags;
2128	int error;
2129
2130	flags = fuword32(&m->m_flags);
2131	if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2132		return (EINVAL);
2133	if (ceiling > RTP_PRIO_MAX)
2134		return (EINVAL);
2135	id = td->td_tid;
2136	uq = td->td_umtxq;
2137	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2138	   &uq->uq_key)) != 0)
2139		return (error);
2140	for (;;) {
2141		umtxq_lock(&uq->uq_key);
2142		umtxq_busy(&uq->uq_key);
2143		umtxq_unlock(&uq->uq_key);
2144
2145		save_ceiling = fuword32(&m->m_ceilings[0]);
2146
2147		owner = casuword32(&m->m_owner,
2148		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2149
2150		if (owner == UMUTEX_CONTESTED) {
2151			suword32(&m->m_ceilings[0], ceiling);
2152			suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2153				UMUTEX_CONTESTED);
2154			error = 0;
2155			break;
2156		}
2157
2158		/* The address was invalid. */
2159		if (owner == -1) {
2160			error = EFAULT;
2161			break;
2162		}
2163
2164		if ((owner & ~UMUTEX_CONTESTED) == id) {
2165			suword32(&m->m_ceilings[0], ceiling);
2166			error = 0;
2167			break;
2168		}
2169
2170		/*
2171		 * If we caught a signal, we have retried and now
2172		 * exit immediately.
2173		 */
2174		if (error != 0)
2175			break;
2176
2177		/*
2178		 * We set the contested bit, sleep. Otherwise the lock changed
2179		 * and we need to retry or we lost a race to the thread
2180		 * unlocking the umtx.
2181		 */
2182		umtxq_lock(&uq->uq_key);
2183		umtxq_insert(uq);
2184		umtxq_unbusy(&uq->uq_key);
2185		error = umtxq_sleep(uq, "umtxpp", 0);
2186		umtxq_remove(uq);
2187		umtxq_unlock(&uq->uq_key);
2188	}
2189	umtxq_lock(&uq->uq_key);
2190	if (error == 0)
2191		umtxq_signal(&uq->uq_key, INT_MAX);
2192	umtxq_unbusy(&uq->uq_key);
2193	umtxq_unlock(&uq->uq_key);
2194	umtx_key_release(&uq->uq_key);
2195	if (error == 0 && old_ceiling != NULL)
2196		suword32(old_ceiling, save_ceiling);
2197	return (error);
2198}
2199
2200static int
2201_do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
2202	int mode)
2203{
2204	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2205	case 0:
2206		return (_do_lock_normal(td, m, flags, timo, mode));
2207	case UMUTEX_PRIO_INHERIT:
2208		return (_do_lock_pi(td, m, flags, timo, mode));
2209	case UMUTEX_PRIO_PROTECT:
2210		return (_do_lock_pp(td, m, flags, timo, mode));
2211	}
2212	return (EINVAL);
2213}
2214
2215/*
2216 * Lock a userland POSIX mutex.
2217 */
2218static int
2219do_lock_umutex(struct thread *td, struct umutex *m,
2220	struct _umtx_time *timeout, int mode)
2221{
2222	struct timespec cts, ets, tts;
2223	uint32_t flags;
2224	int error;
2225
2226	flags = fuword32(&m->m_flags);
2227	if (flags == -1)
2228		return (EFAULT);
2229
2230	if (timeout == NULL) {
2231		error = _do_lock_umutex(td, m, flags, 0, mode);
2232		/* Mutex locking is restarted if it is interrupted. */
2233		if (error == EINTR && mode != _UMUTEX_WAIT)
2234			error = ERESTART;
2235	} else {
2236		kern_clock_gettime(td, timeout->_clockid, &cts);
2237		if ((timeout->_flags & UMTX_ABSTIME) == 0) {
2238			ets = cts;
2239			timespecadd(&ets, &timeout->_timeout);
2240			tts = timeout->_timeout;
2241		} else {
2242			ets = timeout->_timeout;
2243			tts = timeout->_timeout;
2244			timespecsub(&tts, &cts);
2245		}
2246		for (;;) {
2247			error = _do_lock_umutex(td, m, flags, tstohz(&tts), mode);
2248			if (error != ETIMEDOUT)
2249				break;
2250			kern_clock_gettime(td, timeout->_clockid, &cts);
2251			if (timespeccmp(&cts, &ets, >=))
2252				break;
2253			tts = ets;
2254			timespecsub(&tts, &cts);
2255		}
2256		/* Timed-locking is not restarted. */
2257		if (error == ERESTART)
2258			error = EINTR;
2259	}
2260	return (error);
2261}
2262
2263/*
2264 * Unlock a userland POSIX mutex.
2265 */
2266static int
2267do_unlock_umutex(struct thread *td, struct umutex *m)
2268{
2269	uint32_t flags;
2270
2271	flags = fuword32(&m->m_flags);
2272	if (flags == -1)
2273		return (EFAULT);
2274
2275	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2276	case 0:
2277		return (do_unlock_normal(td, m, flags));
2278	case UMUTEX_PRIO_INHERIT:
2279		return (do_unlock_pi(td, m, flags));
2280	case UMUTEX_PRIO_PROTECT:
2281		return (do_unlock_pp(td, m, flags));
2282	}
2283
2284	return (EINVAL);
2285}
2286
2287static int
2288do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2289	struct timespec *timeout, u_long wflags)
2290{
2291	struct umtx_q *uq;
2292	struct timespec cts, ets, tts;
2293	uint32_t flags;
2294	uint32_t clockid;
2295	int error;
2296
2297	uq = td->td_umtxq;
2298	flags = fuword32(&cv->c_flags);
2299	error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2300	if (error != 0)
2301		return (error);
2302
2303	if ((wflags & CVWAIT_CLOCKID) != 0) {
2304		clockid = fuword32(&cv->c_clockid);
2305		if (clockid < CLOCK_REALTIME ||
2306		    clockid >= CLOCK_THREAD_CPUTIME_ID) {
2307			/* hmm, only HW clock id will work. */
2308			return (EINVAL);
2309		}
2310	} else {
2311		clockid = CLOCK_REALTIME;
2312	}
2313
2314	umtxq_lock(&uq->uq_key);
2315	umtxq_busy(&uq->uq_key);
2316	umtxq_insert(uq);
2317	umtxq_unlock(&uq->uq_key);
2318
2319	/*
2320	 * Set c_has_waiters to 1 before releasing user mutex, also
2321	 * don't modify cache line when unnecessary.
2322	 */
2323	if (fuword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters)) == 0)
2324		suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
2325
2326	umtxq_lock(&uq->uq_key);
2327	umtxq_unbusy(&uq->uq_key);
2328	umtxq_unlock(&uq->uq_key);
2329
2330	error = do_unlock_umutex(td, m);
2331
2332	umtxq_lock(&uq->uq_key);
2333	if (error == 0) {
2334		if (timeout == NULL) {
2335			error = umtxq_sleep(uq, "ucond", 0);
2336		} else {
2337			if ((wflags & CVWAIT_ABSTIME) == 0) {
2338				kern_clock_gettime(td, clockid, &ets);
2339				timespecadd(&ets, timeout);
2340				tts = *timeout;
2341			} else { /* absolute time */
2342				ets = *timeout;
2343				tts = *timeout;
2344				kern_clock_gettime(td, clockid, &cts);
2345				timespecsub(&tts, &cts);
2346			}
2347			for (;;) {
2348				error = umtxq_sleep(uq, "ucond", tstohz(&tts));
2349				if (error != ETIMEDOUT)
2350					break;
2351				kern_clock_gettime(td, clockid, &cts);
2352				if (timespeccmp(&cts, &ets, >=)) {
2353					error = ETIMEDOUT;
2354					break;
2355				}
2356				tts = ets;
2357				timespecsub(&tts, &cts);
2358			}
2359		}
2360	}
2361
2362	if ((uq->uq_flags & UQF_UMTXQ) == 0)
2363		error = 0;
2364	else {
2365		/*
2366		 * This must be timeout,interrupted by signal or
2367		 * surprious wakeup, clear c_has_waiter flag when
2368		 * necessary.
2369		 */
2370		umtxq_busy(&uq->uq_key);
2371		if ((uq->uq_flags & UQF_UMTXQ) != 0) {
2372			int oldlen = uq->uq_cur_queue->length;
2373			umtxq_remove(uq);
2374			if (oldlen == 1) {
2375				umtxq_unlock(&uq->uq_key);
2376				suword32(
2377				    __DEVOLATILE(uint32_t *,
2378					 &cv->c_has_waiters), 0);
2379				umtxq_lock(&uq->uq_key);
2380			}
2381		}
2382		umtxq_unbusy(&uq->uq_key);
2383		if (error == ERESTART)
2384			error = EINTR;
2385	}
2386
2387	umtxq_unlock(&uq->uq_key);
2388	umtx_key_release(&uq->uq_key);
2389	return (error);
2390}
2391
2392/*
2393 * Signal a userland condition variable.
2394 */
2395static int
2396do_cv_signal(struct thread *td, struct ucond *cv)
2397{
2398	struct umtx_key key;
2399	int error, cnt, nwake;
2400	uint32_t flags;
2401
2402	flags = fuword32(&cv->c_flags);
2403	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2404		return (error);
2405	umtxq_lock(&key);
2406	umtxq_busy(&key);
2407	cnt = umtxq_count(&key);
2408	nwake = umtxq_signal(&key, 1);
2409	if (cnt <= nwake) {
2410		umtxq_unlock(&key);
2411		error = suword32(
2412		    __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2413		umtxq_lock(&key);
2414	}
2415	umtxq_unbusy(&key);
2416	umtxq_unlock(&key);
2417	umtx_key_release(&key);
2418	return (error);
2419}
2420
2421static int
2422do_cv_broadcast(struct thread *td, struct ucond *cv)
2423{
2424	struct umtx_key key;
2425	int error;
2426	uint32_t flags;
2427
2428	flags = fuword32(&cv->c_flags);
2429	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2430		return (error);
2431
2432	umtxq_lock(&key);
2433	umtxq_busy(&key);
2434	umtxq_signal(&key, INT_MAX);
2435	umtxq_unlock(&key);
2436
2437	error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2438
2439	umtxq_lock(&key);
2440	umtxq_unbusy(&key);
2441	umtxq_unlock(&key);
2442
2443	umtx_key_release(&key);
2444	return (error);
2445}
2446
2447static int
2448do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, int timo)
2449{
2450	struct umtx_q *uq;
2451	uint32_t flags, wrflags;
2452	int32_t state, oldstate;
2453	int32_t blocked_readers;
2454	int error;
2455
2456	uq = td->td_umtxq;
2457	flags = fuword32(&rwlock->rw_flags);
2458	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2459	if (error != 0)
2460		return (error);
2461
2462	wrflags = URWLOCK_WRITE_OWNER;
2463	if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
2464		wrflags |= URWLOCK_WRITE_WAITERS;
2465
2466	for (;;) {
2467		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2468		/* try to lock it */
2469		while (!(state & wrflags)) {
2470			if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
2471				umtx_key_release(&uq->uq_key);
2472				return (EAGAIN);
2473			}
2474			oldstate = casuword32(&rwlock->rw_state, state, state + 1);
2475			if (oldstate == state) {
2476				umtx_key_release(&uq->uq_key);
2477				return (0);
2478			}
2479			state = oldstate;
2480		}
2481
2482		if (error)
2483			break;
2484
2485		/* grab monitor lock */
2486		umtxq_lock(&uq->uq_key);
2487		umtxq_busy(&uq->uq_key);
2488		umtxq_unlock(&uq->uq_key);
2489
2490		/*
2491		 * re-read the state, in case it changed between the try-lock above
2492		 * and the check below
2493		 */
2494		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2495
2496		/* set read contention bit */
2497		while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) {
2498			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS);
2499			if (oldstate == state)
2500				goto sleep;
2501			state = oldstate;
2502		}
2503
2504		/* state is changed while setting flags, restart */
2505		if (!(state & wrflags)) {
2506			umtxq_lock(&uq->uq_key);
2507			umtxq_unbusy(&uq->uq_key);
2508			umtxq_unlock(&uq->uq_key);
2509			continue;
2510		}
2511
2512sleep:
2513		/* contention bit is set, before sleeping, increase read waiter count */
2514		blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2515		suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
2516
2517		while (state & wrflags) {
2518			umtxq_lock(&uq->uq_key);
2519			umtxq_insert(uq);
2520			umtxq_unbusy(&uq->uq_key);
2521
2522			error = umtxq_sleep(uq, "urdlck", timo);
2523
2524			umtxq_busy(&uq->uq_key);
2525			umtxq_remove(uq);
2526			umtxq_unlock(&uq->uq_key);
2527			if (error)
2528				break;
2529			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2530		}
2531
2532		/* decrease read waiter count, and may clear read contention bit */
2533		blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2534		suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
2535		if (blocked_readers == 1) {
2536			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2537			for (;;) {
2538				oldstate = casuword32(&rwlock->rw_state, state,
2539					 state & ~URWLOCK_READ_WAITERS);
2540				if (oldstate == state)
2541					break;
2542				state = oldstate;
2543			}
2544		}
2545
2546		umtxq_lock(&uq->uq_key);
2547		umtxq_unbusy(&uq->uq_key);
2548		umtxq_unlock(&uq->uq_key);
2549	}
2550	umtx_key_release(&uq->uq_key);
2551	return (error);
2552}
2553
2554static int
2555do_rw_rdlock2(struct thread *td, void *obj, long val, struct _umtx_time *timeout)
2556{
2557	struct timespec cts, ets, tts;
2558	int error;
2559
2560	kern_clock_gettime(td, timeout->_clockid, &cts);
2561	if ((timeout->_flags & UMTX_ABSTIME) == 0) {
2562		ets = cts;
2563		timespecadd(&ets, &timeout->_timeout);
2564		tts = timeout->_timeout;
2565	} else {
2566		ets = timeout->_timeout;
2567		tts = timeout->_timeout;
2568		timespecsub(&tts, &cts);
2569	}
2570	for (;;) {
2571		error = do_rw_rdlock(td, obj, val, tstohz(&tts));
2572		if (error != ETIMEDOUT)
2573			break;
2574		kern_clock_gettime(td, timeout->_clockid, &cts);
2575		if (timespeccmp(&cts, &ets, >=))
2576			break;
2577		tts = ets;
2578		timespecsub(&tts, &cts);
2579	}
2580	if (error == ERESTART)
2581		error = EINTR;
2582	return (error);
2583}
2584
2585static int
2586do_rw_wrlock(struct thread *td, struct urwlock *rwlock, int timo)
2587{
2588	struct umtx_q *uq;
2589	uint32_t flags;
2590	int32_t state, oldstate;
2591	int32_t blocked_writers;
2592	int32_t blocked_readers;
2593	int error;
2594
2595	uq = td->td_umtxq;
2596	flags = fuword32(&rwlock->rw_flags);
2597	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2598	if (error != 0)
2599		return (error);
2600
2601	blocked_readers = 0;
2602	for (;;) {
2603		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2604		while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2605			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER);
2606			if (oldstate == state) {
2607				umtx_key_release(&uq->uq_key);
2608				return (0);
2609			}
2610			state = oldstate;
2611		}
2612
2613		if (error) {
2614			if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) &&
2615			    blocked_readers != 0) {
2616				umtxq_lock(&uq->uq_key);
2617				umtxq_busy(&uq->uq_key);
2618				umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE);
2619				umtxq_unbusy(&uq->uq_key);
2620				umtxq_unlock(&uq->uq_key);
2621			}
2622
2623			break;
2624		}
2625
2626		/* grab monitor lock */
2627		umtxq_lock(&uq->uq_key);
2628		umtxq_busy(&uq->uq_key);
2629		umtxq_unlock(&uq->uq_key);
2630
2631		/*
2632		 * re-read the state, in case it changed between the try-lock above
2633		 * and the check below
2634		 */
2635		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2636
2637		while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) &&
2638		       (state & URWLOCK_WRITE_WAITERS) == 0) {
2639			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS);
2640			if (oldstate == state)
2641				goto sleep;
2642			state = oldstate;
2643		}
2644
2645		if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2646			umtxq_lock(&uq->uq_key);
2647			umtxq_unbusy(&uq->uq_key);
2648			umtxq_unlock(&uq->uq_key);
2649			continue;
2650		}
2651sleep:
2652		blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2653		suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
2654
2655		while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
2656			umtxq_lock(&uq->uq_key);
2657			umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2658			umtxq_unbusy(&uq->uq_key);
2659
2660			error = umtxq_sleep(uq, "uwrlck", timo);
2661
2662			umtxq_busy(&uq->uq_key);
2663			umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2664			umtxq_unlock(&uq->uq_key);
2665			if (error)
2666				break;
2667			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2668		}
2669
2670		blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2671		suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
2672		if (blocked_writers == 1) {
2673			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2674			for (;;) {
2675				oldstate = casuword32(&rwlock->rw_state, state,
2676					 state & ~URWLOCK_WRITE_WAITERS);
2677				if (oldstate == state)
2678					break;
2679				state = oldstate;
2680			}
2681			blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2682		} else
2683			blocked_readers = 0;
2684
2685		umtxq_lock(&uq->uq_key);
2686		umtxq_unbusy(&uq->uq_key);
2687		umtxq_unlock(&uq->uq_key);
2688	}
2689
2690	umtx_key_release(&uq->uq_key);
2691	return (error);
2692}
2693
2694static int
2695do_rw_wrlock2(struct thread *td, void *obj, struct _umtx_time *timeout)
2696{
2697	struct timespec cts, ets, tts;
2698	int error;
2699
2700	kern_clock_gettime(td, timeout->_clockid, &cts);
2701	if ((timeout->_flags & UMTX_ABSTIME) == 0) {
2702		ets = cts;
2703		timespecadd(&ets, &timeout->_timeout);
2704		tts = timeout->_timeout;
2705	} else {
2706		ets = timeout->_timeout;
2707		tts = timeout->_timeout;
2708		timespecsub(&tts, &cts);
2709	}
2710	for (;;) {
2711		error = do_rw_wrlock(td, obj, tstohz(&tts));
2712		if (error != ETIMEDOUT)
2713			break;
2714		kern_clock_gettime(td, timeout->_clockid, &cts);
2715		if (timespeccmp(&cts, &ets, >=))
2716			break;
2717		tts = ets;
2718		timespecsub(&tts, &cts);
2719	}
2720	if (error == ERESTART)
2721		error = EINTR;
2722	return (error);
2723}
2724
2725static int
2726do_rw_unlock(struct thread *td, struct urwlock *rwlock)
2727{
2728	struct umtx_q *uq;
2729	uint32_t flags;
2730	int32_t state, oldstate;
2731	int error, q, count;
2732
2733	uq = td->td_umtxq;
2734	flags = fuword32(&rwlock->rw_flags);
2735	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2736	if (error != 0)
2737		return (error);
2738
2739	state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2740	if (state & URWLOCK_WRITE_OWNER) {
2741		for (;;) {
2742			oldstate = casuword32(&rwlock->rw_state, state,
2743				state & ~URWLOCK_WRITE_OWNER);
2744			if (oldstate != state) {
2745				state = oldstate;
2746				if (!(oldstate & URWLOCK_WRITE_OWNER)) {
2747					error = EPERM;
2748					goto out;
2749				}
2750			} else
2751				break;
2752		}
2753	} else if (URWLOCK_READER_COUNT(state) != 0) {
2754		for (;;) {
2755			oldstate = casuword32(&rwlock->rw_state, state,
2756				state - 1);
2757			if (oldstate != state) {
2758				state = oldstate;
2759				if (URWLOCK_READER_COUNT(oldstate) == 0) {
2760					error = EPERM;
2761					goto out;
2762				}
2763			}
2764			else
2765				break;
2766		}
2767	} else {
2768		error = EPERM;
2769		goto out;
2770	}
2771
2772	count = 0;
2773
2774	if (!(flags & URWLOCK_PREFER_READER)) {
2775		if (state & URWLOCK_WRITE_WAITERS) {
2776			count = 1;
2777			q = UMTX_EXCLUSIVE_QUEUE;
2778		} else if (state & URWLOCK_READ_WAITERS) {
2779			count = INT_MAX;
2780			q = UMTX_SHARED_QUEUE;
2781		}
2782	} else {
2783		if (state & URWLOCK_READ_WAITERS) {
2784			count = INT_MAX;
2785			q = UMTX_SHARED_QUEUE;
2786		} else if (state & URWLOCK_WRITE_WAITERS) {
2787			count = 1;
2788			q = UMTX_EXCLUSIVE_QUEUE;
2789		}
2790	}
2791
2792	if (count) {
2793		umtxq_lock(&uq->uq_key);
2794		umtxq_busy(&uq->uq_key);
2795		umtxq_signal_queue(&uq->uq_key, count, q);
2796		umtxq_unbusy(&uq->uq_key);
2797		umtxq_unlock(&uq->uq_key);
2798	}
2799out:
2800	umtx_key_release(&uq->uq_key);
2801	return (error);
2802}
2803
2804static int
2805do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout)
2806{
2807	struct umtx_q *uq;
2808	struct timespec cts, ets, tts;
2809	uint32_t flags, count;
2810	int error;
2811
2812	uq = td->td_umtxq;
2813	flags = fuword32(&sem->_flags);
2814	error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
2815	if (error != 0)
2816		return (error);
2817	umtxq_lock(&uq->uq_key);
2818	umtxq_busy(&uq->uq_key);
2819	umtxq_insert(uq);
2820	umtxq_unlock(&uq->uq_key);
2821
2822	casuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters), 0, 1);
2823	rmb();
2824	count = fuword32(__DEVOLATILE(uint32_t *, &sem->_count));
2825	if (count != 0) {
2826		umtxq_lock(&uq->uq_key);
2827		umtxq_unbusy(&uq->uq_key);
2828		umtxq_remove(uq);
2829		umtxq_unlock(&uq->uq_key);
2830		umtx_key_release(&uq->uq_key);
2831		return (0);
2832	}
2833
2834	umtxq_lock(&uq->uq_key);
2835	umtxq_unbusy(&uq->uq_key);
2836
2837	if (timeout == NULL) {
2838		error = umtxq_sleep(uq, "usem", 0);
2839	} else {
2840		umtxq_unlock(&uq->uq_key);
2841		kern_clock_gettime(td, timeout->_clockid, &cts);
2842		if ((timeout->_flags & UMTX_ABSTIME) == 0) {
2843			ets = cts;
2844			timespecadd(&ets, &timeout->_timeout);
2845		} else {
2846			ets = timeout->_timeout;
2847		}
2848		umtxq_lock(&uq->uq_key);
2849		for (;;) {
2850			if (timespeccmp(&cts, &ets, >=)) {
2851				error = ETIMEDOUT;
2852				break;
2853			}
2854			tts = ets;
2855			timespecsub(&tts, &cts);
2856			error = umtxq_sleep(uq, "usem", tstohz(&tts));
2857			if (error != ETIMEDOUT)
2858				break;
2859			umtxq_unlock(&uq->uq_key);
2860			kern_clock_gettime(td, timeout->_clockid, &cts);
2861			umtxq_lock(&uq->uq_key);
2862		}
2863	}
2864
2865	if ((uq->uq_flags & UQF_UMTXQ) == 0)
2866		error = 0;
2867	else {
2868		umtxq_remove(uq);
2869		if (error == ERESTART)
2870			error = EINTR;
2871	}
2872	umtxq_unlock(&uq->uq_key);
2873	umtx_key_release(&uq->uq_key);
2874	return (error);
2875}
2876
2877/*
2878 * Signal a userland condition variable.
2879 */
2880static int
2881do_sem_wake(struct thread *td, struct _usem *sem)
2882{
2883	struct umtx_key key;
2884	int error, cnt, nwake;
2885	uint32_t flags;
2886
2887	flags = fuword32(&sem->_flags);
2888	if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
2889		return (error);
2890	umtxq_lock(&key);
2891	umtxq_busy(&key);
2892	cnt = umtxq_count(&key);
2893	nwake = umtxq_signal(&key, 1);
2894	if (cnt <= nwake) {
2895		umtxq_unlock(&key);
2896		error = suword32(
2897		    __DEVOLATILE(uint32_t *, &sem->_has_waiters), 0);
2898		umtxq_lock(&key);
2899	}
2900	umtxq_unbusy(&key);
2901	umtxq_unlock(&key);
2902	umtx_key_release(&key);
2903	return (error);
2904}
2905
2906int
2907sys__umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
2908    /* struct umtx *umtx */
2909{
2910	return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
2911}
2912
2913int
2914sys__umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
2915    /* struct umtx *umtx */
2916{
2917	return do_unlock_umtx(td, uap->umtx, td->td_tid);
2918}
2919
2920inline int
2921umtx_copyin_timeout(const void *addr, struct timespec *tsp)
2922{
2923	int error;
2924
2925	error = copyin(addr, tsp, sizeof(struct timespec));
2926	if (error == 0) {
2927		if (tsp->tv_sec < 0 ||
2928		    tsp->tv_nsec >= 1000000000 ||
2929		    tsp->tv_nsec < 0)
2930			error = EINVAL;
2931	}
2932	return (error);
2933}
2934
2935static inline int
2936umtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp)
2937{
2938	int error;
2939
2940	tp->_clockid = CLOCK_REALTIME;
2941	tp->_flags   = 0;
2942	if (size <= sizeof(struct timespec))
2943		error = copyin(addr, &tp->_timeout, sizeof(struct timespec));
2944	else
2945		error = copyin(addr, tp, sizeof(struct _umtx_time));
2946	if (error != 0)
2947		return (error);
2948	if (tp->_timeout.tv_sec < 0 ||
2949	    tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0)
2950		return (EINVAL);
2951	return (0);
2952}
2953
2954static int
2955__umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
2956{
2957	struct timespec *ts, timeout;
2958	int error;
2959
2960	/* Allow a null timespec (wait forever). */
2961	if (uap->uaddr2 == NULL)
2962		ts = NULL;
2963	else {
2964		error = umtx_copyin_timeout(uap->uaddr2, &timeout);
2965		if (error != 0)
2966			return (error);
2967		ts = &timeout;
2968	}
2969	return (do_lock_umtx(td, uap->obj, uap->val, ts));
2970}
2971
2972static int
2973__umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
2974{
2975	return (do_unlock_umtx(td, uap->obj, uap->val));
2976}
2977
2978static int
2979__umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
2980{
2981	struct _umtx_time timeout, *tm_p;
2982	int error;
2983
2984	if (uap->uaddr2 == NULL)
2985		tm_p = NULL;
2986	else {
2987		error = umtx_copyin_umtx_time(
2988		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
2989		if (error != 0)
2990			return (error);
2991		tm_p = &timeout;
2992	}
2993	return do_wait(td, uap->obj, uap->val, tm_p, 0, 0);
2994}
2995
2996static int
2997__umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
2998{
2999	struct _umtx_time timeout, *tm_p;
3000	int error;
3001
3002	if (uap->uaddr2 == NULL)
3003		tm_p = NULL;
3004	else {
3005		error = umtx_copyin_umtx_time(
3006		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3007		if (error != 0)
3008			return (error);
3009		tm_p = &timeout;
3010	}
3011	return do_wait(td, uap->obj, uap->val, tm_p, 1, 0);
3012}
3013
3014static int
3015__umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap)
3016{
3017	struct _umtx_time *tm_p, timeout;
3018	int error;
3019
3020	if (uap->uaddr2 == NULL)
3021		tm_p = NULL;
3022	else {
3023		error = umtx_copyin_umtx_time(
3024		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3025		if (error != 0)
3026			return (error);
3027		tm_p = &timeout;
3028	}
3029	return do_wait(td, uap->obj, uap->val, tm_p, 1, 1);
3030}
3031
3032static int
3033__umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
3034{
3035	return (kern_umtx_wake(td, uap->obj, uap->val, 0));
3036}
3037
3038#define BATCH_SIZE	128
3039static int
3040__umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap)
3041{
3042	int count = uap->val;
3043	void *uaddrs[BATCH_SIZE];
3044	char **upp = (char **)uap->obj;
3045	int tocopy;
3046	int error = 0;
3047	int i, pos = 0;
3048
3049	while (count > 0) {
3050		tocopy = count;
3051		if (tocopy > BATCH_SIZE)
3052			tocopy = BATCH_SIZE;
3053		error = copyin(upp+pos, uaddrs, tocopy * sizeof(char *));
3054		if (error != 0)
3055			break;
3056		for (i = 0; i < tocopy; ++i)
3057			kern_umtx_wake(td, uaddrs[i], INT_MAX, 1);
3058		count -= tocopy;
3059		pos += tocopy;
3060	}
3061	return (error);
3062}
3063
3064static int
3065__umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap)
3066{
3067	return (kern_umtx_wake(td, uap->obj, uap->val, 1));
3068}
3069
3070static int
3071__umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
3072{
3073	struct _umtx_time *tm_p, timeout;
3074	int error;
3075
3076	/* Allow a null timespec (wait forever). */
3077	if (uap->uaddr2 == NULL)
3078		tm_p = NULL;
3079	else {
3080		error = umtx_copyin_umtx_time(
3081		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3082		if (error != 0)
3083			return (error);
3084		tm_p = &timeout;
3085	}
3086	return do_lock_umutex(td, uap->obj, tm_p, 0);
3087}
3088
3089static int
3090__umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
3091{
3092	return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY);
3093}
3094
3095static int
3096__umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
3097{
3098	struct _umtx_time *tm_p, timeout;
3099	int error;
3100
3101	/* Allow a null timespec (wait forever). */
3102	if (uap->uaddr2 == NULL)
3103		tm_p = NULL;
3104	else {
3105		error = umtx_copyin_umtx_time(
3106		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3107		if (error != 0)
3108			return (error);
3109		tm_p = &timeout;
3110	}
3111	return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT);
3112}
3113
3114static int
3115__umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap)
3116{
3117	return do_wake_umutex(td, uap->obj);
3118}
3119
3120static int
3121__umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
3122{
3123	return do_unlock_umutex(td, uap->obj);
3124}
3125
3126static int
3127__umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
3128{
3129	return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
3130}
3131
3132static int
3133__umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
3134{
3135	struct timespec *ts, timeout;
3136	int error;
3137
3138	/* Allow a null timespec (wait forever). */
3139	if (uap->uaddr2 == NULL)
3140		ts = NULL;
3141	else {
3142		error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3143		if (error != 0)
3144			return (error);
3145		ts = &timeout;
3146	}
3147	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3148}
3149
3150static int
3151__umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
3152{
3153	return do_cv_signal(td, uap->obj);
3154}
3155
3156static int
3157__umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
3158{
3159	return do_cv_broadcast(td, uap->obj);
3160}
3161
3162static int
3163__umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
3164{
3165	struct _umtx_time timeout;
3166	int error;
3167
3168	/* Allow a null timespec (wait forever). */
3169	if (uap->uaddr2 == NULL) {
3170		error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3171	} else {
3172		error = umtx_copyin_umtx_time(uap->uaddr2,
3173		   (size_t)uap->uaddr1, &timeout);
3174		if (error != 0)
3175			return (error);
3176		error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3177	}
3178	return (error);
3179}
3180
3181static int
3182__umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
3183{
3184	struct _umtx_time timeout;
3185	int error;
3186
3187	/* Allow a null timespec (wait forever). */
3188	if (uap->uaddr2 == NULL) {
3189		error = do_rw_wrlock(td, uap->obj, 0);
3190	} else {
3191		error = umtx_copyin_umtx_time(uap->uaddr2,
3192		   (size_t)uap->uaddr1, &timeout);
3193		if (error != 0)
3194			return (error);
3195
3196		error = do_rw_wrlock2(td, uap->obj, &timeout);
3197	}
3198	return (error);
3199}
3200
3201static int
3202__umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
3203{
3204	return do_rw_unlock(td, uap->obj);
3205}
3206
3207static int
3208__umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap)
3209{
3210	struct _umtx_time *tm_p, timeout;
3211	int error;
3212
3213	/* Allow a null timespec (wait forever). */
3214	if (uap->uaddr2 == NULL)
3215		tm_p = NULL;
3216	else {
3217		error = umtx_copyin_umtx_time(
3218		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3219		if (error != 0)
3220			return (error);
3221		tm_p = &timeout;
3222	}
3223	return (do_sem_wait(td, uap->obj, tm_p));
3224}
3225
3226static int
3227__umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap)
3228{
3229	return do_sem_wake(td, uap->obj);
3230}
3231
3232typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
3233
3234static _umtx_op_func op_table[] = {
3235	__umtx_op_lock_umtx,		/* UMTX_OP_LOCK */
3236	__umtx_op_unlock_umtx,		/* UMTX_OP_UNLOCK */
3237	__umtx_op_wait,			/* UMTX_OP_WAIT */
3238	__umtx_op_wake,			/* UMTX_OP_WAKE */
3239	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_TRYLOCK */
3240	__umtx_op_lock_umutex,		/* UMTX_OP_MUTEX_LOCK */
3241	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK */
3242	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
3243	__umtx_op_cv_wait,		/* UMTX_OP_CV_WAIT*/
3244	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
3245	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
3246	__umtx_op_wait_uint,		/* UMTX_OP_WAIT_UINT */
3247	__umtx_op_rw_rdlock,		/* UMTX_OP_RW_RDLOCK */
3248	__umtx_op_rw_wrlock,		/* UMTX_OP_RW_WRLOCK */
3249	__umtx_op_rw_unlock,		/* UMTX_OP_RW_UNLOCK */
3250	__umtx_op_wait_uint_private,	/* UMTX_OP_WAIT_UINT_PRIVATE */
3251	__umtx_op_wake_private,		/* UMTX_OP_WAKE_PRIVATE */
3252	__umtx_op_wait_umutex,		/* UMTX_OP_UMUTEX_WAIT */
3253	__umtx_op_wake_umutex,		/* UMTX_OP_UMUTEX_WAKE */
3254	__umtx_op_sem_wait,		/* UMTX_OP_SEM_WAIT */
3255	__umtx_op_sem_wake,		/* UMTX_OP_SEM_WAKE */
3256	__umtx_op_nwake_private		/* UMTX_OP_NWAKE_PRIVATE */
3257};
3258
3259int
3260sys__umtx_op(struct thread *td, struct _umtx_op_args *uap)
3261{
3262	if ((unsigned)uap->op < UMTX_OP_MAX)
3263		return (*op_table[uap->op])(td, uap);
3264	return (EINVAL);
3265}
3266
3267#ifdef COMPAT_FREEBSD32
3268int
3269freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
3270    /* struct umtx *umtx */
3271{
3272	return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
3273}
3274
3275int
3276freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
3277    /* struct umtx *umtx */
3278{
3279	return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
3280}
3281
3282struct timespec32 {
3283	uint32_t tv_sec;
3284	uint32_t tv_nsec;
3285};
3286
3287struct umtx_time32 {
3288	struct	timespec32	timeout;
3289	uint32_t		flags;
3290	uint32_t		clockid;
3291};
3292
3293static inline int
3294umtx_copyin_timeout32(void *addr, struct timespec *tsp)
3295{
3296	struct timespec32 ts32;
3297	int error;
3298
3299	error = copyin(addr, &ts32, sizeof(struct timespec32));
3300	if (error == 0) {
3301		if (ts32.tv_sec < 0 ||
3302		    ts32.tv_nsec >= 1000000000 ||
3303		    ts32.tv_nsec < 0)
3304			error = EINVAL;
3305		else {
3306			tsp->tv_sec = ts32.tv_sec;
3307			tsp->tv_nsec = ts32.tv_nsec;
3308		}
3309	}
3310	return (error);
3311}
3312
3313static inline int
3314umtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp)
3315{
3316	struct umtx_time32 t32;
3317	int error;
3318
3319	t32.clockid = CLOCK_REALTIME;
3320	t32.flags   = 0;
3321	if (size <= sizeof(struct timespec32))
3322		error = copyin(addr, &t32.timeout, sizeof(struct timespec32));
3323	else
3324		error = copyin(addr, &t32, sizeof(struct umtx_time32));
3325	if (error != 0)
3326		return (error);
3327	if (t32.timeout.tv_sec < 0 ||
3328	    t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0)
3329		return (EINVAL);
3330	tp->_timeout.tv_sec = t32.timeout.tv_sec;
3331	tp->_timeout.tv_nsec = t32.timeout.tv_nsec;
3332	tp->_flags = t32.flags;
3333	tp->_clockid = t32.clockid;
3334	return (0);
3335}
3336
3337static int
3338__umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3339{
3340	struct timespec *ts, timeout;
3341	int error;
3342
3343	/* Allow a null timespec (wait forever). */
3344	if (uap->uaddr2 == NULL)
3345		ts = NULL;
3346	else {
3347		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3348		if (error != 0)
3349			return (error);
3350		ts = &timeout;
3351	}
3352	return (do_lock_umtx32(td, uap->obj, uap->val, ts));
3353}
3354
3355static int
3356__umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3357{
3358	return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
3359}
3360
3361static int
3362__umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3363{
3364	struct _umtx_time *tm_p, timeout;
3365	int error;
3366
3367	if (uap->uaddr2 == NULL)
3368		tm_p = NULL;
3369	else {
3370		error = umtx_copyin_umtx_time32(uap->uaddr2,
3371			(size_t)uap->uaddr1, &timeout);
3372		if (error != 0)
3373			return (error);
3374		tm_p = &timeout;
3375	}
3376	return do_wait(td, uap->obj, uap->val, tm_p, 1, 0);
3377}
3378
3379static int
3380__umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3381{
3382	struct _umtx_time *tm_p, timeout;
3383	int error;
3384
3385	/* Allow a null timespec (wait forever). */
3386	if (uap->uaddr2 == NULL)
3387		tm_p = NULL;
3388	else {
3389		error = umtx_copyin_umtx_time(uap->uaddr2,
3390			    (size_t)uap->uaddr1, &timeout);
3391		if (error != 0)
3392			return (error);
3393		tm_p = &timeout;
3394	}
3395	return do_lock_umutex(td, uap->obj, tm_p, 0);
3396}
3397
3398static int
3399__umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3400{
3401	struct _umtx_time *tm_p, timeout;
3402	int error;
3403
3404	/* Allow a null timespec (wait forever). */
3405	if (uap->uaddr2 == NULL)
3406		tm_p = NULL;
3407	else {
3408		error = umtx_copyin_umtx_time32(uap->uaddr2,
3409		    (size_t)uap->uaddr1, &timeout);
3410		if (error != 0)
3411			return (error);
3412		tm_p = &timeout;
3413	}
3414	return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT);
3415}
3416
3417static int
3418__umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3419{
3420	struct timespec *ts, timeout;
3421	int error;
3422
3423	/* Allow a null timespec (wait forever). */
3424	if (uap->uaddr2 == NULL)
3425		ts = NULL;
3426	else {
3427		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3428		if (error != 0)
3429			return (error);
3430		ts = &timeout;
3431	}
3432	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3433}
3434
3435static int
3436__umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3437{
3438	struct _umtx_time timeout;
3439	int error;
3440
3441	/* Allow a null timespec (wait forever). */
3442	if (uap->uaddr2 == NULL) {
3443		error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3444	} else {
3445		error = umtx_copyin_umtx_time32(uap->uaddr2,
3446		    (size_t)uap->uaddr1, &timeout);
3447		if (error != 0)
3448			return (error);
3449		error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3450	}
3451	return (error);
3452}
3453
3454static int
3455__umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3456{
3457	struct _umtx_time timeout;
3458	int error;
3459
3460	/* Allow a null timespec (wait forever). */
3461	if (uap->uaddr2 == NULL) {
3462		error = do_rw_wrlock(td, uap->obj, 0);
3463	} else {
3464		error = umtx_copyin_umtx_time32(uap->uaddr2,
3465		    (size_t)uap->uaddr1, &timeout);
3466		if (error != 0)
3467			return (error);
3468		error = do_rw_wrlock2(td, uap->obj, &timeout);
3469	}
3470	return (error);
3471}
3472
3473static int
3474__umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap)
3475{
3476	struct _umtx_time *tm_p, timeout;
3477	int error;
3478
3479	if (uap->uaddr2 == NULL)
3480		tm_p = NULL;
3481	else {
3482		error = umtx_copyin_umtx_time32(
3483		    uap->uaddr2, (size_t)uap->uaddr1,&timeout);
3484		if (error != 0)
3485			return (error);
3486		tm_p = &timeout;
3487	}
3488	return do_wait(td, uap->obj, uap->val, tm_p, 1, 1);
3489}
3490
3491static int
3492__umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3493{
3494	struct _umtx_time *tm_p, timeout;
3495	int error;
3496
3497	/* Allow a null timespec (wait forever). */
3498	if (uap->uaddr2 == NULL)
3499		tm_p = NULL;
3500	else {
3501		error = umtx_copyin_umtx_time32(uap->uaddr2,
3502		    (size_t)uap->uaddr1, &timeout);
3503		if (error != 0)
3504			return (error);
3505		tm_p = &timeout;
3506	}
3507	return (do_sem_wait(td, uap->obj, tm_p));
3508}
3509
3510static int
3511__umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap)
3512{
3513	int count = uap->val;
3514	uint32_t uaddrs[BATCH_SIZE];
3515	uint32_t **upp = (uint32_t **)uap->obj;
3516	int tocopy;
3517	int error = 0;
3518	int i, pos = 0;
3519
3520	while (count > 0) {
3521		tocopy = count;
3522		if (tocopy > BATCH_SIZE)
3523			tocopy = BATCH_SIZE;
3524		error = copyin(upp+pos, uaddrs, tocopy * sizeof(uint32_t));
3525		if (error != 0)
3526			break;
3527		for (i = 0; i < tocopy; ++i)
3528			kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i],
3529				INT_MAX, 1);
3530		count -= tocopy;
3531		pos += tocopy;
3532	}
3533	return (error);
3534}
3535
3536static _umtx_op_func op_table_compat32[] = {
3537	__umtx_op_lock_umtx_compat32,	/* UMTX_OP_LOCK */
3538	__umtx_op_unlock_umtx_compat32,	/* UMTX_OP_UNLOCK */
3539	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT */
3540	__umtx_op_wake,			/* UMTX_OP_WAKE */
3541	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_LOCK */
3542	__umtx_op_lock_umutex_compat32,	/* UMTX_OP_MUTEX_TRYLOCK */
3543	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK	*/
3544	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
3545	__umtx_op_cv_wait_compat32,	/* UMTX_OP_CV_WAIT*/
3546	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
3547	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
3548	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT_UINT */
3549	__umtx_op_rw_rdlock_compat32,	/* UMTX_OP_RW_RDLOCK */
3550	__umtx_op_rw_wrlock_compat32,	/* UMTX_OP_RW_WRLOCK */
3551	__umtx_op_rw_unlock,		/* UMTX_OP_RW_UNLOCK */
3552	__umtx_op_wait_uint_private_compat32,	/* UMTX_OP_WAIT_UINT_PRIVATE */
3553	__umtx_op_wake_private,		/* UMTX_OP_WAKE_PRIVATE */
3554	__umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */
3555	__umtx_op_wake_umutex,		/* UMTX_OP_UMUTEX_WAKE */
3556	__umtx_op_sem_wait_compat32,	/* UMTX_OP_SEM_WAIT */
3557	__umtx_op_sem_wake,		/* UMTX_OP_SEM_WAKE */
3558	__umtx_op_nwake_private32	/* UMTX_OP_NWAKE_PRIVATE */
3559};
3560
3561int
3562freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
3563{
3564	if ((unsigned)uap->op < UMTX_OP_MAX)
3565		return (*op_table_compat32[uap->op])(td,
3566			(struct _umtx_op_args *)uap);
3567	return (EINVAL);
3568}
3569#endif
3570
3571void
3572umtx_thread_init(struct thread *td)
3573{
3574	td->td_umtxq = umtxq_alloc();
3575	td->td_umtxq->uq_thread = td;
3576}
3577
3578void
3579umtx_thread_fini(struct thread *td)
3580{
3581	umtxq_free(td->td_umtxq);
3582}
3583
3584/*
3585 * It will be called when new thread is created, e.g fork().
3586 */
3587void
3588umtx_thread_alloc(struct thread *td)
3589{
3590	struct umtx_q *uq;
3591
3592	uq = td->td_umtxq;
3593	uq->uq_inherited_pri = PRI_MAX;
3594
3595	KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
3596	KASSERT(uq->uq_thread == td, ("uq_thread != td"));
3597	KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
3598	KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
3599}
3600
3601/*
3602 * exec() hook.
3603 */
3604static void
3605umtx_exec_hook(void *arg __unused, struct proc *p __unused,
3606	struct image_params *imgp __unused)
3607{
3608	umtx_thread_cleanup(curthread);
3609}
3610
3611/*
3612 * thread_exit() hook.
3613 */
3614void
3615umtx_thread_exit(struct thread *td)
3616{
3617	umtx_thread_cleanup(td);
3618}
3619
3620/*
3621 * clean up umtx data.
3622 */
3623static void
3624umtx_thread_cleanup(struct thread *td)
3625{
3626	struct umtx_q *uq;
3627	struct umtx_pi *pi;
3628
3629	if ((uq = td->td_umtxq) == NULL)
3630		return;
3631
3632	mtx_lock_spin(&umtx_lock);
3633	uq->uq_inherited_pri = PRI_MAX;
3634	while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
3635		pi->pi_owner = NULL;
3636		TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
3637	}
3638	mtx_unlock_spin(&umtx_lock);
3639	thread_lock(td);
3640	sched_lend_user_prio(td, PRI_MAX);
3641	thread_unlock(td);
3642}
3643