kern_umtx.c revision 216641
1139804Simp/*-
2139013Sdavidxu * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3112904Sjeff * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4112904Sjeff * All rights reserved.
5112904Sjeff *
6112904Sjeff * Redistribution and use in source and binary forms, with or without
7112904Sjeff * modification, are permitted provided that the following conditions
8112904Sjeff * are met:
9112904Sjeff * 1. Redistributions of source code must retain the above copyright
10112904Sjeff *    notice unmodified, this list of conditions, and the following
11112904Sjeff *    disclaimer.
12112904Sjeff * 2. Redistributions in binary form must reproduce the above copyright
13112904Sjeff *    notice, this list of conditions and the following disclaimer in the
14112904Sjeff *    documentation and/or other materials provided with the distribution.
15112904Sjeff *
16112904Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17112904Sjeff * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18112904Sjeff * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19112904Sjeff * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20112904Sjeff * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21112904Sjeff * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22112904Sjeff * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23112904Sjeff * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24112904Sjeff * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25112904Sjeff * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26112904Sjeff */
27112904Sjeff
28116182Sobrien#include <sys/cdefs.h>
29116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_umtx.c 216641 2010-12-22 05:01:52Z davidxu $");
30116182Sobrien
31162536Sdavidxu#include "opt_compat.h"
32112904Sjeff#include <sys/param.h>
33112904Sjeff#include <sys/kernel.h>
34131431Smarcel#include <sys/limits.h>
35112904Sjeff#include <sys/lock.h>
36115765Sjeff#include <sys/malloc.h>
37112904Sjeff#include <sys/mutex.h>
38164033Srwatson#include <sys/priv.h>
39112904Sjeff#include <sys/proc.h>
40161678Sdavidxu#include <sys/sched.h>
41165369Sdavidxu#include <sys/smp.h>
42161678Sdavidxu#include <sys/sysctl.h>
43112904Sjeff#include <sys/sysent.h>
44112904Sjeff#include <sys/systm.h>
45112904Sjeff#include <sys/sysproto.h>
46216641Sdavidxu#include <sys/syscallsubr.h>
47139013Sdavidxu#include <sys/eventhandler.h>
48112904Sjeff#include <sys/umtx.h>
49112904Sjeff
50139013Sdavidxu#include <vm/vm.h>
51139013Sdavidxu#include <vm/vm_param.h>
52139013Sdavidxu#include <vm/pmap.h>
53139013Sdavidxu#include <vm/vm_map.h>
54139013Sdavidxu#include <vm/vm_object.h>
55139013Sdavidxu
56165369Sdavidxu#include <machine/cpu.h>
57165369Sdavidxu
58205014Snwhitehorn#ifdef COMPAT_FREEBSD32
59162536Sdavidxu#include <compat/freebsd32/freebsd32_proto.h>
60162536Sdavidxu#endif
61162536Sdavidxu
62201887Sdavidxuenum {
63201887Sdavidxu	TYPE_SIMPLE_WAIT,
64201887Sdavidxu	TYPE_CV,
65201887Sdavidxu	TYPE_SEM,
66201887Sdavidxu	TYPE_SIMPLE_LOCK,
67201887Sdavidxu	TYPE_NORMAL_UMUTEX,
68201887Sdavidxu	TYPE_PI_UMUTEX,
69201887Sdavidxu	TYPE_PP_UMUTEX,
70201887Sdavidxu	TYPE_RWLOCK
71201887Sdavidxu};
72139013Sdavidxu
73179970Sdavidxu#define _UMUTEX_TRY		1
74179970Sdavidxu#define _UMUTEX_WAIT		2
75179970Sdavidxu
76161678Sdavidxu/* Key to represent a unique userland synchronous object */
77139013Sdavidxustruct umtx_key {
78161678Sdavidxu	int	hash;
79139013Sdavidxu	int	type;
80161678Sdavidxu	int	shared;
81139013Sdavidxu	union {
82139013Sdavidxu		struct {
83139013Sdavidxu			vm_object_t	object;
84161678Sdavidxu			uintptr_t	offset;
85139013Sdavidxu		} shared;
86139013Sdavidxu		struct {
87161678Sdavidxu			struct vmspace	*vs;
88161678Sdavidxu			uintptr_t	addr;
89139013Sdavidxu		} private;
90139013Sdavidxu		struct {
91161678Sdavidxu			void		*a;
92161678Sdavidxu			uintptr_t	b;
93139013Sdavidxu		} both;
94139013Sdavidxu	} info;
95139013Sdavidxu};
96139013Sdavidxu
97161678Sdavidxu/* Priority inheritance mutex info. */
98161678Sdavidxustruct umtx_pi {
99161678Sdavidxu	/* Owner thread */
100161678Sdavidxu	struct thread		*pi_owner;
101161678Sdavidxu
102161678Sdavidxu	/* Reference count */
103161678Sdavidxu	int			pi_refcount;
104161678Sdavidxu
105161678Sdavidxu 	/* List entry to link umtx holding by thread */
106161678Sdavidxu	TAILQ_ENTRY(umtx_pi)	pi_link;
107161678Sdavidxu
108161678Sdavidxu	/* List entry in hash */
109161678Sdavidxu	TAILQ_ENTRY(umtx_pi)	pi_hashlink;
110161678Sdavidxu
111161678Sdavidxu	/* List for waiters */
112161678Sdavidxu	TAILQ_HEAD(,umtx_q)	pi_blocked;
113161678Sdavidxu
114161678Sdavidxu	/* Identify a userland lock object */
115161678Sdavidxu	struct umtx_key		pi_key;
116161678Sdavidxu};
117161678Sdavidxu
118161678Sdavidxu/* A userland synchronous object user. */
119115765Sjeffstruct umtx_q {
120161678Sdavidxu	/* Linked list for the hash. */
121161678Sdavidxu	TAILQ_ENTRY(umtx_q)	uq_link;
122161678Sdavidxu
123161678Sdavidxu	/* Umtx key. */
124161678Sdavidxu	struct umtx_key		uq_key;
125161678Sdavidxu
126161678Sdavidxu	/* Umtx flags. */
127161678Sdavidxu	int			uq_flags;
128161678Sdavidxu#define UQF_UMTXQ	0x0001
129161678Sdavidxu
130161678Sdavidxu	/* The thread waits on. */
131161678Sdavidxu	struct thread		*uq_thread;
132161678Sdavidxu
133161678Sdavidxu	/*
134161678Sdavidxu	 * Blocked on PI mutex. read can use chain lock
135170300Sjeff	 * or umtx_lock, write must have both chain lock and
136170300Sjeff	 * umtx_lock being hold.
137161678Sdavidxu	 */
138161678Sdavidxu	struct umtx_pi		*uq_pi_blocked;
139161678Sdavidxu
140161678Sdavidxu	/* On blocked list */
141161678Sdavidxu	TAILQ_ENTRY(umtx_q)	uq_lockq;
142161678Sdavidxu
143161678Sdavidxu	/* Thread contending with us */
144161678Sdavidxu	TAILQ_HEAD(,umtx_pi)	uq_pi_contested;
145161678Sdavidxu
146161742Sdavidxu	/* Inherited priority from PP mutex */
147161678Sdavidxu	u_char			uq_inherited_pri;
148201991Sdavidxu
149201991Sdavidxu	/* Spare queue ready to be reused */
150201991Sdavidxu	struct umtxq_queue	*uq_spare_queue;
151201991Sdavidxu
152201991Sdavidxu	/* The queue we on */
153201991Sdavidxu	struct umtxq_queue	*uq_cur_queue;
154115765Sjeff};
155115765Sjeff
156161678SdavidxuTAILQ_HEAD(umtxq_head, umtx_q);
157161678Sdavidxu
158201991Sdavidxu/* Per-key wait-queue */
159201991Sdavidxustruct umtxq_queue {
160201991Sdavidxu	struct umtxq_head	head;
161201991Sdavidxu	struct umtx_key		key;
162201991Sdavidxu	LIST_ENTRY(umtxq_queue)	link;
163201991Sdavidxu	int			length;
164201991Sdavidxu};
165201991Sdavidxu
166201991SdavidxuLIST_HEAD(umtxq_list, umtxq_queue);
167201991Sdavidxu
168161678Sdavidxu/* Userland lock object's wait-queue chain */
169138224Sdavidxustruct umtxq_chain {
170161678Sdavidxu	/* Lock for this chain. */
171161678Sdavidxu	struct mtx		uc_lock;
172161678Sdavidxu
173161678Sdavidxu	/* List of sleep queues. */
174201991Sdavidxu	struct umtxq_list	uc_queue[2];
175177848Sdavidxu#define UMTX_SHARED_QUEUE	0
176177848Sdavidxu#define UMTX_EXCLUSIVE_QUEUE	1
177161678Sdavidxu
178201991Sdavidxu	LIST_HEAD(, umtxq_queue) uc_spare_queue;
179201991Sdavidxu
180161678Sdavidxu	/* Busy flag */
181161678Sdavidxu	char			uc_busy;
182161678Sdavidxu
183161678Sdavidxu	/* Chain lock waiters */
184158377Sdavidxu	int			uc_waiters;
185161678Sdavidxu
186161678Sdavidxu	/* All PI in the list */
187161678Sdavidxu	TAILQ_HEAD(,umtx_pi)	uc_pi_list;
188201991Sdavidxu
189138224Sdavidxu};
190115765Sjeff
191161678Sdavidxu#define	UMTXQ_LOCKED_ASSERT(uc)		mtx_assert(&(uc)->uc_lock, MA_OWNED)
192189756Sdavidxu#define	UMTXQ_BUSY_ASSERT(uc)	KASSERT(&(uc)->uc_busy, ("umtx chain is not busy"))
193161678Sdavidxu
194161678Sdavidxu/*
195161678Sdavidxu * Don't propagate time-sharing priority, there is a security reason,
196161678Sdavidxu * a user can simply introduce PI-mutex, let thread A lock the mutex,
197161678Sdavidxu * and let another thread B block on the mutex, because B is
198161678Sdavidxu * sleeping, its priority will be boosted, this causes A's priority to
199161678Sdavidxu * be boosted via priority propagating too and will never be lowered even
200161678Sdavidxu * if it is using 100%CPU, this is unfair to other processes.
201161678Sdavidxu */
202161678Sdavidxu
203163709Sjb#define UPRI(td)	(((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
204163709Sjb			  (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
205163709Sjb			 PRI_MAX_TIMESHARE : (td)->td_user_pri)
206161678Sdavidxu
207138224Sdavidxu#define	GOLDEN_RATIO_PRIME	2654404609U
208138224Sdavidxu#define	UMTX_CHAINS		128
209138224Sdavidxu#define	UMTX_SHIFTS		(__WORD_BIT - 7)
210115765Sjeff
211161678Sdavidxu#define THREAD_SHARE		0
212161678Sdavidxu#define PROCESS_SHARE		1
213161678Sdavidxu#define AUTO_SHARE		2
214161678Sdavidxu
215161678Sdavidxu#define	GET_SHARE(flags)	\
216161678Sdavidxu    (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
217161678Sdavidxu
218177848Sdavidxu#define BUSY_SPINS		200
219177848Sdavidxu
220161678Sdavidxustatic uma_zone_t		umtx_pi_zone;
221179421Sdavidxustatic struct umtxq_chain	umtxq_chains[2][UMTX_CHAINS];
222138224Sdavidxustatic MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
223161678Sdavidxustatic int			umtx_pi_allocated;
224115310Sjeff
225161678SdavidxuSYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
226161678SdavidxuSYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
227161678Sdavidxu    &umtx_pi_allocated, 0, "Allocated umtx_pi");
228161678Sdavidxu
229161678Sdavidxustatic void umtxq_sysinit(void *);
230161678Sdavidxustatic void umtxq_hash(struct umtx_key *key);
231161678Sdavidxustatic struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
232139013Sdavidxustatic void umtxq_lock(struct umtx_key *key);
233139013Sdavidxustatic void umtxq_unlock(struct umtx_key *key);
234139257Sdavidxustatic void umtxq_busy(struct umtx_key *key);
235139257Sdavidxustatic void umtxq_unbusy(struct umtx_key *key);
236177848Sdavidxustatic void umtxq_insert_queue(struct umtx_q *uq, int q);
237177848Sdavidxustatic void umtxq_remove_queue(struct umtx_q *uq, int q);
238161678Sdavidxustatic int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
239139257Sdavidxustatic int umtxq_count(struct umtx_key *key);
240139013Sdavidxustatic int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2);
241161678Sdavidxustatic int umtx_key_get(void *addr, int type, int share,
242139013Sdavidxu	struct umtx_key *key);
243139013Sdavidxustatic void umtx_key_release(struct umtx_key *key);
244163697Sdavidxustatic struct umtx_pi *umtx_pi_alloc(int);
245161678Sdavidxustatic void umtx_pi_free(struct umtx_pi *pi);
246174701Sdavidxustatic void umtx_pi_adjust_locked(struct thread *td, u_char oldpri);
247161678Sdavidxustatic int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
248161678Sdavidxustatic void umtx_thread_cleanup(struct thread *td);
249161678Sdavidxustatic void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
250161678Sdavidxu	struct image_params *imgp __unused);
251161678SdavidxuSYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
252115310Sjeff
253177848Sdavidxu#define umtxq_signal(key, nwake)	umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
254177848Sdavidxu#define umtxq_insert(uq)	umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
255177848Sdavidxu#define umtxq_remove(uq)	umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
256177848Sdavidxu
257170300Sjeffstatic struct mtx umtx_lock;
258170300Sjeff
259161678Sdavidxustatic void
260161678Sdavidxuumtxq_sysinit(void *arg __unused)
261161678Sdavidxu{
262179421Sdavidxu	int i, j;
263138224Sdavidxu
264161678Sdavidxu	umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
265161678Sdavidxu		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
266179421Sdavidxu	for (i = 0; i < 2; ++i) {
267179421Sdavidxu		for (j = 0; j < UMTX_CHAINS; ++j) {
268179421Sdavidxu			mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
269179421Sdavidxu				 MTX_DEF | MTX_DUPOK);
270201991Sdavidxu			LIST_INIT(&umtxq_chains[i][j].uc_queue[0]);
271201991Sdavidxu			LIST_INIT(&umtxq_chains[i][j].uc_queue[1]);
272201991Sdavidxu			LIST_INIT(&umtxq_chains[i][j].uc_spare_queue);
273179421Sdavidxu			TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
274179421Sdavidxu			umtxq_chains[i][j].uc_busy = 0;
275179421Sdavidxu			umtxq_chains[i][j].uc_waiters = 0;
276179421Sdavidxu		}
277161678Sdavidxu	}
278170300Sjeff	mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN);
279161678Sdavidxu	EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
280161678Sdavidxu	    EVENTHANDLER_PRI_ANY);
281161678Sdavidxu}
282161678Sdavidxu
283143149Sdavidxustruct umtx_q *
284143149Sdavidxuumtxq_alloc(void)
285143149Sdavidxu{
286161678Sdavidxu	struct umtx_q *uq;
287161678Sdavidxu
288161678Sdavidxu	uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
289201991Sdavidxu	uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, M_WAITOK | M_ZERO);
290201991Sdavidxu	TAILQ_INIT(&uq->uq_spare_queue->head);
291161678Sdavidxu	TAILQ_INIT(&uq->uq_pi_contested);
292161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
293161678Sdavidxu	return (uq);
294143149Sdavidxu}
295143149Sdavidxu
296143149Sdavidxuvoid
297143149Sdavidxuumtxq_free(struct umtx_q *uq)
298143149Sdavidxu{
299201991Sdavidxu	MPASS(uq->uq_spare_queue != NULL);
300201991Sdavidxu	free(uq->uq_spare_queue, M_UMTX);
301143149Sdavidxu	free(uq, M_UMTX);
302143149Sdavidxu}
303143149Sdavidxu
304161678Sdavidxustatic inline void
305139013Sdavidxuumtxq_hash(struct umtx_key *key)
306138224Sdavidxu{
307161678Sdavidxu	unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
308161678Sdavidxu	key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
309138224Sdavidxu}
310138224Sdavidxu
311139013Sdavidxustatic inline int
312139013Sdavidxuumtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2)
313139013Sdavidxu{
314139013Sdavidxu	return (k1->type == k2->type &&
315161678Sdavidxu		k1->info.both.a == k2->info.both.a &&
316161678Sdavidxu	        k1->info.both.b == k2->info.both.b);
317139013Sdavidxu}
318139013Sdavidxu
319161678Sdavidxustatic inline struct umtxq_chain *
320161678Sdavidxuumtxq_getchain(struct umtx_key *key)
321139013Sdavidxu{
322201886Sdavidxu	if (key->type <= TYPE_SEM)
323179421Sdavidxu		return (&umtxq_chains[1][key->hash]);
324179421Sdavidxu	return (&umtxq_chains[0][key->hash]);
325139013Sdavidxu}
326139013Sdavidxu
327161678Sdavidxu/*
328177848Sdavidxu * Lock a chain.
329161678Sdavidxu */
330138224Sdavidxustatic inline void
331177848Sdavidxuumtxq_lock(struct umtx_key *key)
332139257Sdavidxu{
333161678Sdavidxu	struct umtxq_chain *uc;
334139257Sdavidxu
335161678Sdavidxu	uc = umtxq_getchain(key);
336177848Sdavidxu	mtx_lock(&uc->uc_lock);
337139257Sdavidxu}
338139257Sdavidxu
339161678Sdavidxu/*
340177848Sdavidxu * Unlock a chain.
341161678Sdavidxu */
342139257Sdavidxustatic inline void
343177848Sdavidxuumtxq_unlock(struct umtx_key *key)
344139257Sdavidxu{
345161678Sdavidxu	struct umtxq_chain *uc;
346139257Sdavidxu
347161678Sdavidxu	uc = umtxq_getchain(key);
348177848Sdavidxu	mtx_unlock(&uc->uc_lock);
349139257Sdavidxu}
350139257Sdavidxu
351161678Sdavidxu/*
352177848Sdavidxu * Set chain to busy state when following operation
353177848Sdavidxu * may be blocked (kernel mutex can not be used).
354161678Sdavidxu */
355139257Sdavidxustatic inline void
356177848Sdavidxuumtxq_busy(struct umtx_key *key)
357138224Sdavidxu{
358161678Sdavidxu	struct umtxq_chain *uc;
359161678Sdavidxu
360161678Sdavidxu	uc = umtxq_getchain(key);
361177848Sdavidxu	mtx_assert(&uc->uc_lock, MA_OWNED);
362177848Sdavidxu	if (uc->uc_busy) {
363177880Sdavidxu#ifdef SMP
364177880Sdavidxu		if (smp_cpus > 1) {
365177880Sdavidxu			int count = BUSY_SPINS;
366177880Sdavidxu			if (count > 0) {
367177880Sdavidxu				umtxq_unlock(key);
368177880Sdavidxu				while (uc->uc_busy && --count > 0)
369177880Sdavidxu					cpu_spinwait();
370177880Sdavidxu				umtxq_lock(key);
371177880Sdavidxu			}
372177848Sdavidxu		}
373177880Sdavidxu#endif
374177880Sdavidxu		while (uc->uc_busy) {
375177848Sdavidxu			uc->uc_waiters++;
376177848Sdavidxu			msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
377177848Sdavidxu			uc->uc_waiters--;
378177848Sdavidxu		}
379177848Sdavidxu	}
380177848Sdavidxu	uc->uc_busy = 1;
381138224Sdavidxu}
382138224Sdavidxu
383161678Sdavidxu/*
384177848Sdavidxu * Unbusy a chain.
385161678Sdavidxu */
386138225Sdavidxustatic inline void
387177848Sdavidxuumtxq_unbusy(struct umtx_key *key)
388138224Sdavidxu{
389161678Sdavidxu	struct umtxq_chain *uc;
390161678Sdavidxu
391161678Sdavidxu	uc = umtxq_getchain(key);
392177848Sdavidxu	mtx_assert(&uc->uc_lock, MA_OWNED);
393177848Sdavidxu	KASSERT(uc->uc_busy != 0, ("not busy"));
394177848Sdavidxu	uc->uc_busy = 0;
395177848Sdavidxu	if (uc->uc_waiters)
396177848Sdavidxu		wakeup_one(uc);
397138224Sdavidxu}
398138224Sdavidxu
399201991Sdavidxustatic struct umtxq_queue *
400201991Sdavidxuumtxq_queue_lookup(struct umtx_key *key, int q)
401201991Sdavidxu{
402201991Sdavidxu	struct umtxq_queue *uh;
403201991Sdavidxu	struct umtxq_chain *uc;
404201991Sdavidxu
405201991Sdavidxu	uc = umtxq_getchain(key);
406201991Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
407201991Sdavidxu	LIST_FOREACH(uh, &uc->uc_queue[q], link) {
408201991Sdavidxu		if (umtx_key_match(&uh->key, key))
409201991Sdavidxu			return (uh);
410201991Sdavidxu	}
411201991Sdavidxu
412201991Sdavidxu	return (NULL);
413201991Sdavidxu}
414201991Sdavidxu
415139013Sdavidxustatic inline void
416177848Sdavidxuumtxq_insert_queue(struct umtx_q *uq, int q)
417115765Sjeff{
418201991Sdavidxu	struct umtxq_queue *uh;
419161678Sdavidxu	struct umtxq_chain *uc;
420139013Sdavidxu
421161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
422161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
423201991Sdavidxu	KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue"));
424203744Sdavidxu	uh = umtxq_queue_lookup(&uq->uq_key, q);
425201991Sdavidxu	if (uh != NULL) {
426201991Sdavidxu		LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link);
427201991Sdavidxu	} else {
428201991Sdavidxu		uh = uq->uq_spare_queue;
429201991Sdavidxu		uh->key = uq->uq_key;
430201991Sdavidxu		LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link);
431201991Sdavidxu	}
432201991Sdavidxu	uq->uq_spare_queue = NULL;
433201991Sdavidxu
434201991Sdavidxu	TAILQ_INSERT_TAIL(&uh->head, uq, uq_link);
435201991Sdavidxu	uh->length++;
436158718Sdavidxu	uq->uq_flags |= UQF_UMTXQ;
437201991Sdavidxu	uq->uq_cur_queue = uh;
438201991Sdavidxu	return;
439139013Sdavidxu}
440139013Sdavidxu
441139013Sdavidxustatic inline void
442177848Sdavidxuumtxq_remove_queue(struct umtx_q *uq, int q)
443139013Sdavidxu{
444161678Sdavidxu	struct umtxq_chain *uc;
445201991Sdavidxu	struct umtxq_queue *uh;
446161678Sdavidxu
447161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
448161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
449158718Sdavidxu	if (uq->uq_flags & UQF_UMTXQ) {
450201991Sdavidxu		uh = uq->uq_cur_queue;
451201991Sdavidxu		TAILQ_REMOVE(&uh->head, uq, uq_link);
452201991Sdavidxu		uh->length--;
453158718Sdavidxu		uq->uq_flags &= ~UQF_UMTXQ;
454201991Sdavidxu		if (TAILQ_EMPTY(&uh->head)) {
455201991Sdavidxu			KASSERT(uh->length == 0,
456201991Sdavidxu			    ("inconsistent umtxq_queue length"));
457201991Sdavidxu			LIST_REMOVE(uh, link);
458201991Sdavidxu		} else {
459201991Sdavidxu			uh = LIST_FIRST(&uc->uc_spare_queue);
460201991Sdavidxu			KASSERT(uh != NULL, ("uc_spare_queue is empty"));
461201991Sdavidxu			LIST_REMOVE(uh, link);
462201991Sdavidxu		}
463201991Sdavidxu		uq->uq_spare_queue = uh;
464201991Sdavidxu		uq->uq_cur_queue = NULL;
465139013Sdavidxu	}
466139013Sdavidxu}
467139013Sdavidxu
468161678Sdavidxu/*
469161678Sdavidxu * Check if there are multiple waiters
470161678Sdavidxu */
471139013Sdavidxustatic int
472139013Sdavidxuumtxq_count(struct umtx_key *key)
473139013Sdavidxu{
474161678Sdavidxu	struct umtxq_chain *uc;
475201991Sdavidxu	struct umtxq_queue *uh;
476115765Sjeff
477161678Sdavidxu	uc = umtxq_getchain(key);
478161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
479201991Sdavidxu	uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
480201991Sdavidxu	if (uh != NULL)
481201991Sdavidxu		return (uh->length);
482201991Sdavidxu	return (0);
483115765Sjeff}
484115765Sjeff
485161678Sdavidxu/*
486161678Sdavidxu * Check if there are multiple PI waiters and returns first
487161678Sdavidxu * waiter.
488161678Sdavidxu */
489139257Sdavidxustatic int
490161678Sdavidxuumtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
491161678Sdavidxu{
492161678Sdavidxu	struct umtxq_chain *uc;
493201991Sdavidxu	struct umtxq_queue *uh;
494161678Sdavidxu
495161678Sdavidxu	*first = NULL;
496161678Sdavidxu	uc = umtxq_getchain(key);
497161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
498201991Sdavidxu	uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
499201991Sdavidxu	if (uh != NULL) {
500201991Sdavidxu		*first = TAILQ_FIRST(&uh->head);
501201991Sdavidxu		return (uh->length);
502161678Sdavidxu	}
503201991Sdavidxu	return (0);
504161678Sdavidxu}
505161678Sdavidxu
506161678Sdavidxu/*
507161678Sdavidxu * Wake up threads waiting on an userland object.
508161678Sdavidxu */
509177848Sdavidxu
510161678Sdavidxustatic int
511177848Sdavidxuumtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
512115765Sjeff{
513161678Sdavidxu	struct umtxq_chain *uc;
514201991Sdavidxu	struct umtxq_queue *uh;
515201991Sdavidxu	struct umtx_q *uq;
516161678Sdavidxu	int ret;
517115765Sjeff
518139257Sdavidxu	ret = 0;
519161678Sdavidxu	uc = umtxq_getchain(key);
520161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
521201991Sdavidxu	uh = umtxq_queue_lookup(key, q);
522201991Sdavidxu	if (uh != NULL) {
523201991Sdavidxu		while ((uq = TAILQ_FIRST(&uh->head)) != NULL) {
524177848Sdavidxu			umtxq_remove_queue(uq, q);
525161678Sdavidxu			wakeup(uq);
526139257Sdavidxu			if (++ret >= n_wake)
527201991Sdavidxu				return (ret);
528139013Sdavidxu		}
529139013Sdavidxu	}
530139257Sdavidxu	return (ret);
531138224Sdavidxu}
532138224Sdavidxu
533177848Sdavidxu
534161678Sdavidxu/*
535161678Sdavidxu * Wake up specified thread.
536161678Sdavidxu */
537161678Sdavidxustatic inline void
538161678Sdavidxuumtxq_signal_thread(struct umtx_q *uq)
539161678Sdavidxu{
540161678Sdavidxu	struct umtxq_chain *uc;
541161678Sdavidxu
542161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
543161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
544161678Sdavidxu	umtxq_remove(uq);
545161678Sdavidxu	wakeup(uq);
546161678Sdavidxu}
547161678Sdavidxu
548161678Sdavidxu/*
549161678Sdavidxu * Put thread into sleep state, before sleeping, check if
550161678Sdavidxu * thread was removed from umtx queue.
551161678Sdavidxu */
552138224Sdavidxustatic inline int
553161678Sdavidxuumtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
554138224Sdavidxu{
555161678Sdavidxu	struct umtxq_chain *uc;
556161678Sdavidxu	int error;
557161678Sdavidxu
558161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
559161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
560161678Sdavidxu	if (!(uq->uq_flags & UQF_UMTXQ))
561161678Sdavidxu		return (0);
562161678Sdavidxu	error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
563139751Sdavidxu	if (error == EWOULDBLOCK)
564139751Sdavidxu		error = ETIMEDOUT;
565139751Sdavidxu	return (error);
566138224Sdavidxu}
567138224Sdavidxu
568161678Sdavidxu/*
569161678Sdavidxu * Convert userspace address into unique logical address.
570161678Sdavidxu */
571139013Sdavidxustatic int
572161678Sdavidxuumtx_key_get(void *addr, int type, int share, struct umtx_key *key)
573139013Sdavidxu{
574161678Sdavidxu	struct thread *td = curthread;
575139013Sdavidxu	vm_map_t map;
576139013Sdavidxu	vm_map_entry_t entry;
577139013Sdavidxu	vm_pindex_t pindex;
578139013Sdavidxu	vm_prot_t prot;
579139013Sdavidxu	boolean_t wired;
580139013Sdavidxu
581161678Sdavidxu	key->type = type;
582161678Sdavidxu	if (share == THREAD_SHARE) {
583161678Sdavidxu		key->shared = 0;
584161678Sdavidxu		key->info.private.vs = td->td_proc->p_vmspace;
585161678Sdavidxu		key->info.private.addr = (uintptr_t)addr;
586163677Sdavidxu	} else {
587163677Sdavidxu		MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
588161678Sdavidxu		map = &td->td_proc->p_vmspace->vm_map;
589161678Sdavidxu		if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
590161678Sdavidxu		    &entry, &key->info.shared.object, &pindex, &prot,
591161678Sdavidxu		    &wired) != KERN_SUCCESS) {
592161678Sdavidxu			return EFAULT;
593161678Sdavidxu		}
594161678Sdavidxu
595161678Sdavidxu		if ((share == PROCESS_SHARE) ||
596161678Sdavidxu		    (share == AUTO_SHARE &&
597161678Sdavidxu		     VM_INHERIT_SHARE == entry->inheritance)) {
598161678Sdavidxu			key->shared = 1;
599161678Sdavidxu			key->info.shared.offset = entry->offset + entry->start -
600161678Sdavidxu				(vm_offset_t)addr;
601161678Sdavidxu			vm_object_reference(key->info.shared.object);
602161678Sdavidxu		} else {
603161678Sdavidxu			key->shared = 0;
604161678Sdavidxu			key->info.private.vs = td->td_proc->p_vmspace;
605161678Sdavidxu			key->info.private.addr = (uintptr_t)addr;
606161678Sdavidxu		}
607161678Sdavidxu		vm_map_lookup_done(map, entry);
608139013Sdavidxu	}
609139013Sdavidxu
610161678Sdavidxu	umtxq_hash(key);
611139013Sdavidxu	return (0);
612139013Sdavidxu}
613139013Sdavidxu
614161678Sdavidxu/*
615161678Sdavidxu * Release key.
616161678Sdavidxu */
617139013Sdavidxustatic inline void
618139013Sdavidxuumtx_key_release(struct umtx_key *key)
619139013Sdavidxu{
620161678Sdavidxu	if (key->shared)
621139013Sdavidxu		vm_object_deallocate(key->info.shared.object);
622139013Sdavidxu}
623139013Sdavidxu
624161678Sdavidxu/*
625161678Sdavidxu * Lock a umtx object.
626161678Sdavidxu */
627139013Sdavidxustatic int
628163449Sdavidxu_do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo)
629112904Sjeff{
630143149Sdavidxu	struct umtx_q *uq;
631163449Sdavidxu	u_long owner;
632163449Sdavidxu	u_long old;
633138224Sdavidxu	int error = 0;
634112904Sjeff
635143149Sdavidxu	uq = td->td_umtxq;
636161678Sdavidxu
637112904Sjeff	/*
638161678Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
639112904Sjeff	 * can fault on any access.
640112904Sjeff	 */
641112904Sjeff	for (;;) {
642112904Sjeff		/*
643112904Sjeff		 * Try the uncontested case.  This should be done in userland.
644112904Sjeff		 */
645163449Sdavidxu		owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
646112904Sjeff
647138224Sdavidxu		/* The acquire succeeded. */
648138224Sdavidxu		if (owner == UMTX_UNOWNED)
649138224Sdavidxu			return (0);
650138224Sdavidxu
651115765Sjeff		/* The address was invalid. */
652115765Sjeff		if (owner == -1)
653115765Sjeff			return (EFAULT);
654115765Sjeff
655115765Sjeff		/* If no one owns it but it is contested try to acquire it. */
656115765Sjeff		if (owner == UMTX_CONTESTED) {
657163449Sdavidxu			owner = casuword(&umtx->u_owner,
658139013Sdavidxu			    UMTX_CONTESTED, id | UMTX_CONTESTED);
659115765Sjeff
660138224Sdavidxu			if (owner == UMTX_CONTESTED)
661138224Sdavidxu				return (0);
662138224Sdavidxu
663115765Sjeff			/* The address was invalid. */
664115765Sjeff			if (owner == -1)
665115765Sjeff				return (EFAULT);
666115765Sjeff
667115765Sjeff			/* If this failed the lock has changed, restart. */
668115765Sjeff			continue;
669112904Sjeff		}
670112904Sjeff
671138224Sdavidxu		/*
672138224Sdavidxu		 * If we caught a signal, we have retried and now
673138224Sdavidxu		 * exit immediately.
674138224Sdavidxu		 */
675161678Sdavidxu		if (error != 0)
676138224Sdavidxu			return (error);
677112904Sjeff
678161678Sdavidxu		if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
679161678Sdavidxu			AUTO_SHARE, &uq->uq_key)) != 0)
680161678Sdavidxu			return (error);
681161678Sdavidxu
682161678Sdavidxu		umtxq_lock(&uq->uq_key);
683161678Sdavidxu		umtxq_busy(&uq->uq_key);
684161678Sdavidxu		umtxq_insert(uq);
685161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
686161678Sdavidxu		umtxq_unlock(&uq->uq_key);
687161678Sdavidxu
688112904Sjeff		/*
689112904Sjeff		 * Set the contested bit so that a release in user space
690112904Sjeff		 * knows to use the system call for unlock.  If this fails
691112904Sjeff		 * either some one else has acquired the lock or it has been
692112904Sjeff		 * released.
693112904Sjeff		 */
694163449Sdavidxu		old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
695112904Sjeff
696112904Sjeff		/* The address was invalid. */
697112967Sjake		if (old == -1) {
698143149Sdavidxu			umtxq_lock(&uq->uq_key);
699143149Sdavidxu			umtxq_remove(uq);
700143149Sdavidxu			umtxq_unlock(&uq->uq_key);
701143149Sdavidxu			umtx_key_release(&uq->uq_key);
702115765Sjeff			return (EFAULT);
703112904Sjeff		}
704112904Sjeff
705112904Sjeff		/*
706115765Sjeff		 * We set the contested bit, sleep. Otherwise the lock changed
707117685Smtm		 * and we need to retry or we lost a race to the thread
708117685Smtm		 * unlocking the umtx.
709112904Sjeff		 */
710143149Sdavidxu		umtxq_lock(&uq->uq_key);
711161678Sdavidxu		if (old == owner)
712161678Sdavidxu			error = umtxq_sleep(uq, "umtx", timo);
713143149Sdavidxu		umtxq_remove(uq);
714143149Sdavidxu		umtxq_unlock(&uq->uq_key);
715143149Sdavidxu		umtx_key_release(&uq->uq_key);
716112904Sjeff	}
717117743Smtm
718117743Smtm	return (0);
719112904Sjeff}
720112904Sjeff
721161678Sdavidxu/*
722161678Sdavidxu * Lock a umtx object.
723161678Sdavidxu */
724139013Sdavidxustatic int
725163449Sdavidxudo_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
726140245Sdavidxu	struct timespec *timeout)
727112904Sjeff{
728140245Sdavidxu	struct timespec ts, ts2, ts3;
729139013Sdavidxu	struct timeval tv;
730140245Sdavidxu	int error;
731139013Sdavidxu
732140245Sdavidxu	if (timeout == NULL) {
733162536Sdavidxu		error = _do_lock_umtx(td, umtx, id, 0);
734162030Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
735162030Sdavidxu		if (error == EINTR)
736162030Sdavidxu			error = ERESTART;
737139013Sdavidxu	} else {
738140245Sdavidxu		getnanouptime(&ts);
739140245Sdavidxu		timespecadd(&ts, timeout);
740140245Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
741139013Sdavidxu		for (;;) {
742162536Sdavidxu			error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
743140245Sdavidxu			if (error != ETIMEDOUT)
744140245Sdavidxu				break;
745140245Sdavidxu			getnanouptime(&ts2);
746140245Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
747139751Sdavidxu				error = ETIMEDOUT;
748139013Sdavidxu				break;
749139013Sdavidxu			}
750140245Sdavidxu			ts3 = ts;
751140245Sdavidxu			timespecsub(&ts3, &ts2);
752140245Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
753139013Sdavidxu		}
754162030Sdavidxu		/* Timed-locking is not restarted. */
755162030Sdavidxu		if (error == ERESTART)
756162030Sdavidxu			error = EINTR;
757139013Sdavidxu	}
758139013Sdavidxu	return (error);
759139013Sdavidxu}
760139013Sdavidxu
761161678Sdavidxu/*
762161678Sdavidxu * Unlock a umtx object.
763161678Sdavidxu */
764139013Sdavidxustatic int
765163449Sdavidxudo_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
766139013Sdavidxu{
767139013Sdavidxu	struct umtx_key key;
768163449Sdavidxu	u_long owner;
769163449Sdavidxu	u_long old;
770139257Sdavidxu	int error;
771139257Sdavidxu	int count;
772112904Sjeff
773112904Sjeff	/*
774112904Sjeff	 * Make sure we own this mtx.
775112904Sjeff	 */
776163449Sdavidxu	owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
777161678Sdavidxu	if (owner == -1)
778115765Sjeff		return (EFAULT);
779115765Sjeff
780139013Sdavidxu	if ((owner & ~UMTX_CONTESTED) != id)
781115765Sjeff		return (EPERM);
782112904Sjeff
783161678Sdavidxu	/* This should be done in userland */
784161678Sdavidxu	if ((owner & UMTX_CONTESTED) == 0) {
785163449Sdavidxu		old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
786161678Sdavidxu		if (old == -1)
787161678Sdavidxu			return (EFAULT);
788161678Sdavidxu		if (old == owner)
789161678Sdavidxu			return (0);
790161855Sdavidxu		owner = old;
791161678Sdavidxu	}
792161678Sdavidxu
793117685Smtm	/* We should only ever be in here for contested locks */
794161678Sdavidxu	if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
795161678Sdavidxu		&key)) != 0)
796139257Sdavidxu		return (error);
797139257Sdavidxu
798139257Sdavidxu	umtxq_lock(&key);
799139257Sdavidxu	umtxq_busy(&key);
800139257Sdavidxu	count = umtxq_count(&key);
801139257Sdavidxu	umtxq_unlock(&key);
802139257Sdavidxu
803117743Smtm	/*
804117743Smtm	 * When unlocking the umtx, it must be marked as unowned if
805117743Smtm	 * there is zero or one thread only waiting for it.
806117743Smtm	 * Otherwise, it must be marked as contested.
807117743Smtm	 */
808163449Sdavidxu	old = casuword(&umtx->u_owner, owner,
809163449Sdavidxu		count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
810139257Sdavidxu	umtxq_lock(&key);
811161678Sdavidxu	umtxq_signal(&key,1);
812139257Sdavidxu	umtxq_unbusy(&key);
813139257Sdavidxu	umtxq_unlock(&key);
814139257Sdavidxu	umtx_key_release(&key);
815115765Sjeff	if (old == -1)
816115765Sjeff		return (EFAULT);
817138224Sdavidxu	if (old != owner)
818138224Sdavidxu		return (EINVAL);
819115765Sjeff	return (0);
820112904Sjeff}
821139013Sdavidxu
822205014Snwhitehorn#ifdef COMPAT_FREEBSD32
823162536Sdavidxu
824161678Sdavidxu/*
825162536Sdavidxu * Lock a umtx object.
826162536Sdavidxu */
827162536Sdavidxustatic int
828162536Sdavidxu_do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
829162536Sdavidxu{
830162536Sdavidxu	struct umtx_q *uq;
831162536Sdavidxu	uint32_t owner;
832162536Sdavidxu	uint32_t old;
833162536Sdavidxu	int error = 0;
834162536Sdavidxu
835162536Sdavidxu	uq = td->td_umtxq;
836162536Sdavidxu
837162536Sdavidxu	/*
838162536Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
839162536Sdavidxu	 * can fault on any access.
840162536Sdavidxu	 */
841162536Sdavidxu	for (;;) {
842162536Sdavidxu		/*
843162536Sdavidxu		 * Try the uncontested case.  This should be done in userland.
844162536Sdavidxu		 */
845162536Sdavidxu		owner = casuword32(m, UMUTEX_UNOWNED, id);
846162536Sdavidxu
847162536Sdavidxu		/* The acquire succeeded. */
848162536Sdavidxu		if (owner == UMUTEX_UNOWNED)
849162536Sdavidxu			return (0);
850162536Sdavidxu
851162536Sdavidxu		/* The address was invalid. */
852162536Sdavidxu		if (owner == -1)
853162536Sdavidxu			return (EFAULT);
854162536Sdavidxu
855162536Sdavidxu		/* If no one owns it but it is contested try to acquire it. */
856162536Sdavidxu		if (owner == UMUTEX_CONTESTED) {
857162536Sdavidxu			owner = casuword32(m,
858162536Sdavidxu			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
859162536Sdavidxu			if (owner == UMUTEX_CONTESTED)
860162536Sdavidxu				return (0);
861162536Sdavidxu
862162536Sdavidxu			/* The address was invalid. */
863162536Sdavidxu			if (owner == -1)
864162536Sdavidxu				return (EFAULT);
865162536Sdavidxu
866162536Sdavidxu			/* If this failed the lock has changed, restart. */
867162536Sdavidxu			continue;
868162536Sdavidxu		}
869162536Sdavidxu
870162536Sdavidxu		/*
871162536Sdavidxu		 * If we caught a signal, we have retried and now
872162536Sdavidxu		 * exit immediately.
873162536Sdavidxu		 */
874162536Sdavidxu		if (error != 0)
875162536Sdavidxu			return (error);
876162536Sdavidxu
877162536Sdavidxu		if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
878162536Sdavidxu			AUTO_SHARE, &uq->uq_key)) != 0)
879162536Sdavidxu			return (error);
880162536Sdavidxu
881162536Sdavidxu		umtxq_lock(&uq->uq_key);
882162536Sdavidxu		umtxq_busy(&uq->uq_key);
883162536Sdavidxu		umtxq_insert(uq);
884162536Sdavidxu		umtxq_unbusy(&uq->uq_key);
885162536Sdavidxu		umtxq_unlock(&uq->uq_key);
886162536Sdavidxu
887162536Sdavidxu		/*
888162536Sdavidxu		 * Set the contested bit so that a release in user space
889162536Sdavidxu		 * knows to use the system call for unlock.  If this fails
890162536Sdavidxu		 * either some one else has acquired the lock or it has been
891162536Sdavidxu		 * released.
892162536Sdavidxu		 */
893162536Sdavidxu		old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
894162536Sdavidxu
895162536Sdavidxu		/* The address was invalid. */
896162536Sdavidxu		if (old == -1) {
897162536Sdavidxu			umtxq_lock(&uq->uq_key);
898162536Sdavidxu			umtxq_remove(uq);
899162536Sdavidxu			umtxq_unlock(&uq->uq_key);
900162536Sdavidxu			umtx_key_release(&uq->uq_key);
901162536Sdavidxu			return (EFAULT);
902162536Sdavidxu		}
903162536Sdavidxu
904162536Sdavidxu		/*
905162536Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
906162536Sdavidxu		 * and we need to retry or we lost a race to the thread
907162536Sdavidxu		 * unlocking the umtx.
908162536Sdavidxu		 */
909162536Sdavidxu		umtxq_lock(&uq->uq_key);
910162536Sdavidxu		if (old == owner)
911162536Sdavidxu			error = umtxq_sleep(uq, "umtx", timo);
912162536Sdavidxu		umtxq_remove(uq);
913162536Sdavidxu		umtxq_unlock(&uq->uq_key);
914162536Sdavidxu		umtx_key_release(&uq->uq_key);
915162536Sdavidxu	}
916162536Sdavidxu
917162536Sdavidxu	return (0);
918162536Sdavidxu}
919162536Sdavidxu
920162536Sdavidxu/*
921162536Sdavidxu * Lock a umtx object.
922162536Sdavidxu */
923162536Sdavidxustatic int
924162536Sdavidxudo_lock_umtx32(struct thread *td, void *m, uint32_t id,
925162536Sdavidxu	struct timespec *timeout)
926162536Sdavidxu{
927162536Sdavidxu	struct timespec ts, ts2, ts3;
928162536Sdavidxu	struct timeval tv;
929162536Sdavidxu	int error;
930162536Sdavidxu
931162536Sdavidxu	if (timeout == NULL) {
932162536Sdavidxu		error = _do_lock_umtx32(td, m, id, 0);
933162536Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
934162536Sdavidxu		if (error == EINTR)
935162536Sdavidxu			error = ERESTART;
936162536Sdavidxu	} else {
937162536Sdavidxu		getnanouptime(&ts);
938162536Sdavidxu		timespecadd(&ts, timeout);
939162536Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
940162536Sdavidxu		for (;;) {
941162536Sdavidxu			error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
942162536Sdavidxu			if (error != ETIMEDOUT)
943162536Sdavidxu				break;
944162536Sdavidxu			getnanouptime(&ts2);
945162536Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
946162536Sdavidxu				error = ETIMEDOUT;
947162536Sdavidxu				break;
948162536Sdavidxu			}
949162536Sdavidxu			ts3 = ts;
950162536Sdavidxu			timespecsub(&ts3, &ts2);
951162536Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
952162536Sdavidxu		}
953162536Sdavidxu		/* Timed-locking is not restarted. */
954162536Sdavidxu		if (error == ERESTART)
955162536Sdavidxu			error = EINTR;
956162536Sdavidxu	}
957162536Sdavidxu	return (error);
958162536Sdavidxu}
959162536Sdavidxu
960162536Sdavidxu/*
961162536Sdavidxu * Unlock a umtx object.
962162536Sdavidxu */
963162536Sdavidxustatic int
964162536Sdavidxudo_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
965162536Sdavidxu{
966162536Sdavidxu	struct umtx_key key;
967162536Sdavidxu	uint32_t owner;
968162536Sdavidxu	uint32_t old;
969162536Sdavidxu	int error;
970162536Sdavidxu	int count;
971162536Sdavidxu
972162536Sdavidxu	/*
973162536Sdavidxu	 * Make sure we own this mtx.
974162536Sdavidxu	 */
975162536Sdavidxu	owner = fuword32(m);
976162536Sdavidxu	if (owner == -1)
977162536Sdavidxu		return (EFAULT);
978162536Sdavidxu
979162536Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
980162536Sdavidxu		return (EPERM);
981162536Sdavidxu
982162536Sdavidxu	/* This should be done in userland */
983162536Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
984162536Sdavidxu		old = casuword32(m, owner, UMUTEX_UNOWNED);
985162536Sdavidxu		if (old == -1)
986162536Sdavidxu			return (EFAULT);
987162536Sdavidxu		if (old == owner)
988162536Sdavidxu			return (0);
989162536Sdavidxu		owner = old;
990162536Sdavidxu	}
991162536Sdavidxu
992162536Sdavidxu	/* We should only ever be in here for contested locks */
993162536Sdavidxu	if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
994162536Sdavidxu		&key)) != 0)
995162536Sdavidxu		return (error);
996162536Sdavidxu
997162536Sdavidxu	umtxq_lock(&key);
998162536Sdavidxu	umtxq_busy(&key);
999162536Sdavidxu	count = umtxq_count(&key);
1000162536Sdavidxu	umtxq_unlock(&key);
1001162536Sdavidxu
1002162536Sdavidxu	/*
1003162536Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
1004162536Sdavidxu	 * there is zero or one thread only waiting for it.
1005162536Sdavidxu	 * Otherwise, it must be marked as contested.
1006162536Sdavidxu	 */
1007162536Sdavidxu	old = casuword32(m, owner,
1008162536Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1009162536Sdavidxu	umtxq_lock(&key);
1010162536Sdavidxu	umtxq_signal(&key,1);
1011162536Sdavidxu	umtxq_unbusy(&key);
1012162536Sdavidxu	umtxq_unlock(&key);
1013162536Sdavidxu	umtx_key_release(&key);
1014162536Sdavidxu	if (old == -1)
1015162536Sdavidxu		return (EFAULT);
1016162536Sdavidxu	if (old != owner)
1017162536Sdavidxu		return (EINVAL);
1018162536Sdavidxu	return (0);
1019162536Sdavidxu}
1020162536Sdavidxu#endif
1021162536Sdavidxu
1022162536Sdavidxu/*
1023161678Sdavidxu * Fetch and compare value, sleep on the address if value is not changed.
1024161678Sdavidxu */
1025139013Sdavidxustatic int
1026163449Sdavidxudo_wait(struct thread *td, void *addr, u_long id,
1027178646Sdavidxu	struct timespec *timeout, int compat32, int is_private)
1028139013Sdavidxu{
1029143149Sdavidxu	struct umtx_q *uq;
1030140245Sdavidxu	struct timespec ts, ts2, ts3;
1031139013Sdavidxu	struct timeval tv;
1032163449Sdavidxu	u_long tmp;
1033140245Sdavidxu	int error = 0;
1034139013Sdavidxu
1035143149Sdavidxu	uq = td->td_umtxq;
1036178646Sdavidxu	if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
1037178646Sdavidxu		is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
1038139013Sdavidxu		return (error);
1039161678Sdavidxu
1040161678Sdavidxu	umtxq_lock(&uq->uq_key);
1041161678Sdavidxu	umtxq_insert(uq);
1042161678Sdavidxu	umtxq_unlock(&uq->uq_key);
1043162536Sdavidxu	if (compat32 == 0)
1044162536Sdavidxu		tmp = fuword(addr);
1045162536Sdavidxu        else
1046190987Sdavidxu		tmp = (unsigned int)fuword32(addr);
1047139427Sdavidxu	if (tmp != id) {
1048143149Sdavidxu		umtxq_lock(&uq->uq_key);
1049143149Sdavidxu		umtxq_remove(uq);
1050143149Sdavidxu		umtxq_unlock(&uq->uq_key);
1051140245Sdavidxu	} else if (timeout == NULL) {
1052143149Sdavidxu		umtxq_lock(&uq->uq_key);
1053164839Sdavidxu		error = umtxq_sleep(uq, "uwait", 0);
1054161678Sdavidxu		umtxq_remove(uq);
1055143149Sdavidxu		umtxq_unlock(&uq->uq_key);
1056139013Sdavidxu	} else {
1057140245Sdavidxu		getnanouptime(&ts);
1058140245Sdavidxu		timespecadd(&ts, timeout);
1059140245Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
1060161678Sdavidxu		umtxq_lock(&uq->uq_key);
1061139013Sdavidxu		for (;;) {
1062164839Sdavidxu			error = umtxq_sleep(uq, "uwait", tvtohz(&tv));
1063211794Sdavidxu			if (!(uq->uq_flags & UQF_UMTXQ)) {
1064211794Sdavidxu				error = 0;
1065161678Sdavidxu				break;
1066211794Sdavidxu			}
1067140245Sdavidxu			if (error != ETIMEDOUT)
1068140245Sdavidxu				break;
1069161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1070140245Sdavidxu			getnanouptime(&ts2);
1071140245Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
1072139751Sdavidxu				error = ETIMEDOUT;
1073161678Sdavidxu				umtxq_lock(&uq->uq_key);
1074139013Sdavidxu				break;
1075139013Sdavidxu			}
1076140245Sdavidxu			ts3 = ts;
1077140245Sdavidxu			timespecsub(&ts3, &ts2);
1078140245Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
1079161678Sdavidxu			umtxq_lock(&uq->uq_key);
1080139013Sdavidxu		}
1081143149Sdavidxu		umtxq_remove(uq);
1082143149Sdavidxu		umtxq_unlock(&uq->uq_key);
1083139013Sdavidxu	}
1084143149Sdavidxu	umtx_key_release(&uq->uq_key);
1085139257Sdavidxu	if (error == ERESTART)
1086139257Sdavidxu		error = EINTR;
1087139013Sdavidxu	return (error);
1088139013Sdavidxu}
1089139013Sdavidxu
1090161678Sdavidxu/*
1091161678Sdavidxu * Wake up threads sleeping on the specified address.
1092161678Sdavidxu */
1093151692Sdavidxuint
1094178646Sdavidxukern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
1095139013Sdavidxu{
1096139013Sdavidxu	struct umtx_key key;
1097139257Sdavidxu	int ret;
1098139013Sdavidxu
1099178646Sdavidxu	if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
1100178646Sdavidxu		is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
1101139257Sdavidxu		return (ret);
1102139258Sdavidxu	umtxq_lock(&key);
1103139257Sdavidxu	ret = umtxq_signal(&key, n_wake);
1104139258Sdavidxu	umtxq_unlock(&key);
1105139257Sdavidxu	umtx_key_release(&key);
1106139013Sdavidxu	return (0);
1107139013Sdavidxu}
1108139013Sdavidxu
1109161678Sdavidxu/*
1110161678Sdavidxu * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1111161678Sdavidxu */
1112161678Sdavidxustatic int
1113161678Sdavidxu_do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1114179970Sdavidxu	int mode)
1115161678Sdavidxu{
1116161678Sdavidxu	struct umtx_q *uq;
1117161678Sdavidxu	uint32_t owner, old, id;
1118161678Sdavidxu	int error = 0;
1119161678Sdavidxu
1120161678Sdavidxu	id = td->td_tid;
1121161678Sdavidxu	uq = td->td_umtxq;
1122161678Sdavidxu
1123161678Sdavidxu	/*
1124161678Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
1125161678Sdavidxu	 * can fault on any access.
1126161678Sdavidxu	 */
1127161678Sdavidxu	for (;;) {
1128179970Sdavidxu		owner = fuword32(__DEVOLATILE(void *, &m->m_owner));
1129179970Sdavidxu		if (mode == _UMUTEX_WAIT) {
1130179970Sdavidxu			if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED)
1131179970Sdavidxu				return (0);
1132179970Sdavidxu		} else {
1133179970Sdavidxu			/*
1134179970Sdavidxu			 * Try the uncontested case.  This should be done in userland.
1135179970Sdavidxu			 */
1136179970Sdavidxu			owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1137161678Sdavidxu
1138179970Sdavidxu			/* The acquire succeeded. */
1139179970Sdavidxu			if (owner == UMUTEX_UNOWNED)
1140161678Sdavidxu				return (0);
1141161678Sdavidxu
1142161678Sdavidxu			/* The address was invalid. */
1143161678Sdavidxu			if (owner == -1)
1144161678Sdavidxu				return (EFAULT);
1145161678Sdavidxu
1146179970Sdavidxu			/* If no one owns it but it is contested try to acquire it. */
1147179970Sdavidxu			if (owner == UMUTEX_CONTESTED) {
1148179970Sdavidxu				owner = casuword32(&m->m_owner,
1149179970Sdavidxu				    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1150179970Sdavidxu
1151179970Sdavidxu				if (owner == UMUTEX_CONTESTED)
1152179970Sdavidxu					return (0);
1153179970Sdavidxu
1154179970Sdavidxu				/* The address was invalid. */
1155179970Sdavidxu				if (owner == -1)
1156179970Sdavidxu					return (EFAULT);
1157179970Sdavidxu
1158179970Sdavidxu				/* If this failed the lock has changed, restart. */
1159179970Sdavidxu				continue;
1160179970Sdavidxu			}
1161161678Sdavidxu		}
1162161678Sdavidxu
1163161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1164161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id)
1165161678Sdavidxu			return (EDEADLK);
1166161678Sdavidxu
1167179970Sdavidxu		if (mode == _UMUTEX_TRY)
1168161678Sdavidxu			return (EBUSY);
1169161678Sdavidxu
1170161678Sdavidxu		/*
1171161678Sdavidxu		 * If we caught a signal, we have retried and now
1172161678Sdavidxu		 * exit immediately.
1173161678Sdavidxu		 */
1174161678Sdavidxu		if (error != 0)
1175161678Sdavidxu			return (error);
1176161678Sdavidxu
1177161678Sdavidxu		if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1178161678Sdavidxu		    GET_SHARE(flags), &uq->uq_key)) != 0)
1179161678Sdavidxu			return (error);
1180161678Sdavidxu
1181161678Sdavidxu		umtxq_lock(&uq->uq_key);
1182161678Sdavidxu		umtxq_busy(&uq->uq_key);
1183161678Sdavidxu		umtxq_insert(uq);
1184161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1185161678Sdavidxu
1186161678Sdavidxu		/*
1187161678Sdavidxu		 * Set the contested bit so that a release in user space
1188161678Sdavidxu		 * knows to use the system call for unlock.  If this fails
1189161678Sdavidxu		 * either some one else has acquired the lock or it has been
1190161678Sdavidxu		 * released.
1191161678Sdavidxu		 */
1192161678Sdavidxu		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1193161678Sdavidxu
1194161678Sdavidxu		/* The address was invalid. */
1195161678Sdavidxu		if (old == -1) {
1196161678Sdavidxu			umtxq_lock(&uq->uq_key);
1197161678Sdavidxu			umtxq_remove(uq);
1198179970Sdavidxu			umtxq_unbusy(&uq->uq_key);
1199161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1200161678Sdavidxu			umtx_key_release(&uq->uq_key);
1201161678Sdavidxu			return (EFAULT);
1202161678Sdavidxu		}
1203161678Sdavidxu
1204161678Sdavidxu		/*
1205161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
1206161678Sdavidxu		 * and we need to retry or we lost a race to the thread
1207161678Sdavidxu		 * unlocking the umtx.
1208161678Sdavidxu		 */
1209161678Sdavidxu		umtxq_lock(&uq->uq_key);
1210179970Sdavidxu		umtxq_unbusy(&uq->uq_key);
1211161678Sdavidxu		if (old == owner)
1212161678Sdavidxu			error = umtxq_sleep(uq, "umtxn", timo);
1213161678Sdavidxu		umtxq_remove(uq);
1214161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1215161678Sdavidxu		umtx_key_release(&uq->uq_key);
1216161678Sdavidxu	}
1217161678Sdavidxu
1218161678Sdavidxu	return (0);
1219161678Sdavidxu}
1220161678Sdavidxu
1221161678Sdavidxu/*
1222161678Sdavidxu * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1223161678Sdavidxu */
1224161678Sdavidxu/*
1225161678Sdavidxu * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1226161678Sdavidxu */
1227161678Sdavidxustatic int
1228161678Sdavidxudo_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
1229161678Sdavidxu{
1230161678Sdavidxu	struct umtx_key key;
1231161678Sdavidxu	uint32_t owner, old, id;
1232161678Sdavidxu	int error;
1233161678Sdavidxu	int count;
1234161678Sdavidxu
1235161678Sdavidxu	id = td->td_tid;
1236161678Sdavidxu	/*
1237161678Sdavidxu	 * Make sure we own this mtx.
1238161678Sdavidxu	 */
1239163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1240161678Sdavidxu	if (owner == -1)
1241161678Sdavidxu		return (EFAULT);
1242161678Sdavidxu
1243161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
1244161678Sdavidxu		return (EPERM);
1245161678Sdavidxu
1246161678Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
1247161678Sdavidxu		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1248161678Sdavidxu		if (old == -1)
1249161678Sdavidxu			return (EFAULT);
1250161678Sdavidxu		if (old == owner)
1251161678Sdavidxu			return (0);
1252161855Sdavidxu		owner = old;
1253161678Sdavidxu	}
1254161678Sdavidxu
1255161678Sdavidxu	/* We should only ever be in here for contested locks */
1256161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1257161678Sdavidxu	    &key)) != 0)
1258161678Sdavidxu		return (error);
1259161678Sdavidxu
1260161678Sdavidxu	umtxq_lock(&key);
1261161678Sdavidxu	umtxq_busy(&key);
1262161678Sdavidxu	count = umtxq_count(&key);
1263161678Sdavidxu	umtxq_unlock(&key);
1264161678Sdavidxu
1265161678Sdavidxu	/*
1266161678Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
1267161678Sdavidxu	 * there is zero or one thread only waiting for it.
1268161678Sdavidxu	 * Otherwise, it must be marked as contested.
1269161678Sdavidxu	 */
1270161678Sdavidxu	old = casuword32(&m->m_owner, owner,
1271161678Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1272161678Sdavidxu	umtxq_lock(&key);
1273161678Sdavidxu	umtxq_signal(&key,1);
1274161678Sdavidxu	umtxq_unbusy(&key);
1275161678Sdavidxu	umtxq_unlock(&key);
1276161678Sdavidxu	umtx_key_release(&key);
1277161678Sdavidxu	if (old == -1)
1278161678Sdavidxu		return (EFAULT);
1279161678Sdavidxu	if (old != owner)
1280161678Sdavidxu		return (EINVAL);
1281161678Sdavidxu	return (0);
1282161678Sdavidxu}
1283161678Sdavidxu
1284179970Sdavidxu/*
1285179970Sdavidxu * Check if the mutex is available and wake up a waiter,
1286179970Sdavidxu * only for simple mutex.
1287179970Sdavidxu */
1288179970Sdavidxustatic int
1289179970Sdavidxudo_wake_umutex(struct thread *td, struct umutex *m)
1290179970Sdavidxu{
1291179970Sdavidxu	struct umtx_key key;
1292179970Sdavidxu	uint32_t owner;
1293179970Sdavidxu	uint32_t flags;
1294179970Sdavidxu	int error;
1295179970Sdavidxu	int count;
1296179970Sdavidxu
1297179970Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1298179970Sdavidxu	if (owner == -1)
1299179970Sdavidxu		return (EFAULT);
1300179970Sdavidxu
1301179970Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != 0)
1302179970Sdavidxu		return (0);
1303179970Sdavidxu
1304179970Sdavidxu	flags = fuword32(&m->m_flags);
1305179970Sdavidxu
1306179970Sdavidxu	/* We should only ever be in here for contested locks */
1307179970Sdavidxu	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1308179970Sdavidxu	    &key)) != 0)
1309179970Sdavidxu		return (error);
1310179970Sdavidxu
1311179970Sdavidxu	umtxq_lock(&key);
1312179970Sdavidxu	umtxq_busy(&key);
1313179970Sdavidxu	count = umtxq_count(&key);
1314179970Sdavidxu	umtxq_unlock(&key);
1315179970Sdavidxu
1316179970Sdavidxu	if (count <= 1)
1317179970Sdavidxu		owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED);
1318179970Sdavidxu
1319179970Sdavidxu	umtxq_lock(&key);
1320179970Sdavidxu	if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
1321179970Sdavidxu		umtxq_signal(&key, 1);
1322179970Sdavidxu	umtxq_unbusy(&key);
1323179970Sdavidxu	umtxq_unlock(&key);
1324179970Sdavidxu	umtx_key_release(&key);
1325179970Sdavidxu	return (0);
1326179970Sdavidxu}
1327179970Sdavidxu
1328161678Sdavidxustatic inline struct umtx_pi *
1329163697Sdavidxuumtx_pi_alloc(int flags)
1330161678Sdavidxu{
1331161678Sdavidxu	struct umtx_pi *pi;
1332161678Sdavidxu
1333163697Sdavidxu	pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1334161678Sdavidxu	TAILQ_INIT(&pi->pi_blocked);
1335161678Sdavidxu	atomic_add_int(&umtx_pi_allocated, 1);
1336161678Sdavidxu	return (pi);
1337161678Sdavidxu}
1338161678Sdavidxu
1339161678Sdavidxustatic inline void
1340161678Sdavidxuumtx_pi_free(struct umtx_pi *pi)
1341161678Sdavidxu{
1342161678Sdavidxu	uma_zfree(umtx_pi_zone, pi);
1343161678Sdavidxu	atomic_add_int(&umtx_pi_allocated, -1);
1344161678Sdavidxu}
1345161678Sdavidxu
1346161678Sdavidxu/*
1347161678Sdavidxu * Adjust the thread's position on a pi_state after its priority has been
1348161678Sdavidxu * changed.
1349161678Sdavidxu */
1350161678Sdavidxustatic int
1351161678Sdavidxuumtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1352161678Sdavidxu{
1353161678Sdavidxu	struct umtx_q *uq, *uq1, *uq2;
1354161678Sdavidxu	struct thread *td1;
1355161678Sdavidxu
1356170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1357161678Sdavidxu	if (pi == NULL)
1358161678Sdavidxu		return (0);
1359161678Sdavidxu
1360161678Sdavidxu	uq = td->td_umtxq;
1361161678Sdavidxu
1362161678Sdavidxu	/*
1363161678Sdavidxu	 * Check if the thread needs to be moved on the blocked chain.
1364161678Sdavidxu	 * It needs to be moved if either its priority is lower than
1365161678Sdavidxu	 * the previous thread or higher than the next thread.
1366161678Sdavidxu	 */
1367161678Sdavidxu	uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1368161678Sdavidxu	uq2 = TAILQ_NEXT(uq, uq_lockq);
1369161678Sdavidxu	if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1370161678Sdavidxu	    (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1371161678Sdavidxu		/*
1372161678Sdavidxu		 * Remove thread from blocked chain and determine where
1373161678Sdavidxu		 * it should be moved to.
1374161678Sdavidxu		 */
1375161678Sdavidxu		TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1376161678Sdavidxu		TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1377161678Sdavidxu			td1 = uq1->uq_thread;
1378161678Sdavidxu			MPASS(td1->td_proc->p_magic == P_MAGIC);
1379161678Sdavidxu			if (UPRI(td1) > UPRI(td))
1380161678Sdavidxu				break;
1381161678Sdavidxu		}
1382161678Sdavidxu
1383161678Sdavidxu		if (uq1 == NULL)
1384161678Sdavidxu			TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1385161678Sdavidxu		else
1386161678Sdavidxu			TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1387161678Sdavidxu	}
1388161678Sdavidxu	return (1);
1389161678Sdavidxu}
1390161678Sdavidxu
1391161678Sdavidxu/*
1392161678Sdavidxu * Propagate priority when a thread is blocked on POSIX
1393161678Sdavidxu * PI mutex.
1394161678Sdavidxu */
1395161678Sdavidxustatic void
1396161678Sdavidxuumtx_propagate_priority(struct thread *td)
1397161678Sdavidxu{
1398161678Sdavidxu	struct umtx_q *uq;
1399161678Sdavidxu	struct umtx_pi *pi;
1400161678Sdavidxu	int pri;
1401161678Sdavidxu
1402170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1403161678Sdavidxu	pri = UPRI(td);
1404161678Sdavidxu	uq = td->td_umtxq;
1405161678Sdavidxu	pi = uq->uq_pi_blocked;
1406161678Sdavidxu	if (pi == NULL)
1407161678Sdavidxu		return;
1408161678Sdavidxu
1409161678Sdavidxu	for (;;) {
1410161678Sdavidxu		td = pi->pi_owner;
1411216313Sdavidxu		if (td == NULL || td == curthread)
1412161678Sdavidxu			return;
1413161678Sdavidxu
1414161678Sdavidxu		MPASS(td->td_proc != NULL);
1415161678Sdavidxu		MPASS(td->td_proc->p_magic == P_MAGIC);
1416161678Sdavidxu
1417170300Sjeff		thread_lock(td);
1418216313Sdavidxu		if (td->td_lend_user_pri > pri)
1419216313Sdavidxu			sched_lend_user_prio(td, pri);
1420216313Sdavidxu		else {
1421216313Sdavidxu			thread_unlock(td);
1422216313Sdavidxu			break;
1423216313Sdavidxu		}
1424170300Sjeff		thread_unlock(td);
1425161678Sdavidxu
1426161678Sdavidxu		/*
1427161678Sdavidxu		 * Pick up the lock that td is blocked on.
1428161678Sdavidxu		 */
1429161678Sdavidxu		uq = td->td_umtxq;
1430161678Sdavidxu		pi = uq->uq_pi_blocked;
1431161678Sdavidxu		/* Resort td on the list if needed. */
1432161678Sdavidxu		if (!umtx_pi_adjust_thread(pi, td))
1433161678Sdavidxu			break;
1434161678Sdavidxu	}
1435161678Sdavidxu}
1436161678Sdavidxu
1437161678Sdavidxu/*
1438161678Sdavidxu * Unpropagate priority for a PI mutex when a thread blocked on
1439161678Sdavidxu * it is interrupted by signal or resumed by others.
1440161678Sdavidxu */
1441161678Sdavidxustatic void
1442161678Sdavidxuumtx_unpropagate_priority(struct umtx_pi *pi)
1443161678Sdavidxu{
1444161678Sdavidxu	struct umtx_q *uq, *uq_owner;
1445161678Sdavidxu	struct umtx_pi *pi2;
1446174701Sdavidxu	int pri, oldpri;
1447161678Sdavidxu
1448170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1449161678Sdavidxu
1450161678Sdavidxu	while (pi != NULL && pi->pi_owner != NULL) {
1451161678Sdavidxu		pri = PRI_MAX;
1452161678Sdavidxu		uq_owner = pi->pi_owner->td_umtxq;
1453161678Sdavidxu
1454161678Sdavidxu		TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1455161678Sdavidxu			uq = TAILQ_FIRST(&pi2->pi_blocked);
1456161678Sdavidxu			if (uq != NULL) {
1457161678Sdavidxu				if (pri > UPRI(uq->uq_thread))
1458161678Sdavidxu					pri = UPRI(uq->uq_thread);
1459161678Sdavidxu			}
1460161678Sdavidxu		}
1461161678Sdavidxu
1462161678Sdavidxu		if (pri > uq_owner->uq_inherited_pri)
1463161678Sdavidxu			pri = uq_owner->uq_inherited_pri;
1464170300Sjeff		thread_lock(pi->pi_owner);
1465174701Sdavidxu		oldpri = pi->pi_owner->td_user_pri;
1466161678Sdavidxu		sched_unlend_user_prio(pi->pi_owner, pri);
1467170300Sjeff		thread_unlock(pi->pi_owner);
1468189756Sdavidxu		if (uq_owner->uq_pi_blocked != NULL)
1469189756Sdavidxu			umtx_pi_adjust_locked(pi->pi_owner, oldpri);
1470161678Sdavidxu		pi = uq_owner->uq_pi_blocked;
1471161678Sdavidxu	}
1472161678Sdavidxu}
1473161678Sdavidxu
1474161678Sdavidxu/*
1475161678Sdavidxu * Insert a PI mutex into owned list.
1476161678Sdavidxu */
1477161678Sdavidxustatic void
1478161678Sdavidxuumtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1479161678Sdavidxu{
1480161678Sdavidxu	struct umtx_q *uq_owner;
1481161678Sdavidxu
1482161678Sdavidxu	uq_owner = owner->td_umtxq;
1483170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1484161678Sdavidxu	if (pi->pi_owner != NULL)
1485161678Sdavidxu		panic("pi_ower != NULL");
1486161678Sdavidxu	pi->pi_owner = owner;
1487161678Sdavidxu	TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1488161678Sdavidxu}
1489161678Sdavidxu
1490161678Sdavidxu/*
1491161678Sdavidxu * Claim ownership of a PI mutex.
1492161678Sdavidxu */
1493161678Sdavidxustatic int
1494161678Sdavidxuumtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1495161678Sdavidxu{
1496161678Sdavidxu	struct umtx_q *uq, *uq_owner;
1497161678Sdavidxu
1498161678Sdavidxu	uq_owner = owner->td_umtxq;
1499170300Sjeff	mtx_lock_spin(&umtx_lock);
1500161678Sdavidxu	if (pi->pi_owner == owner) {
1501170300Sjeff		mtx_unlock_spin(&umtx_lock);
1502161678Sdavidxu		return (0);
1503161678Sdavidxu	}
1504161678Sdavidxu
1505161678Sdavidxu	if (pi->pi_owner != NULL) {
1506161678Sdavidxu		/*
1507161678Sdavidxu		 * userland may have already messed the mutex, sigh.
1508161678Sdavidxu		 */
1509170300Sjeff		mtx_unlock_spin(&umtx_lock);
1510161678Sdavidxu		return (EPERM);
1511161678Sdavidxu	}
1512161678Sdavidxu	umtx_pi_setowner(pi, owner);
1513161678Sdavidxu	uq = TAILQ_FIRST(&pi->pi_blocked);
1514161678Sdavidxu	if (uq != NULL) {
1515161678Sdavidxu		int pri;
1516161678Sdavidxu
1517161678Sdavidxu		pri = UPRI(uq->uq_thread);
1518170300Sjeff		thread_lock(owner);
1519161678Sdavidxu		if (pri < UPRI(owner))
1520161678Sdavidxu			sched_lend_user_prio(owner, pri);
1521170300Sjeff		thread_unlock(owner);
1522161678Sdavidxu	}
1523170300Sjeff	mtx_unlock_spin(&umtx_lock);
1524161678Sdavidxu	return (0);
1525161678Sdavidxu}
1526161678Sdavidxu
1527174701Sdavidxustatic void
1528174701Sdavidxuumtx_pi_adjust_locked(struct thread *td, u_char oldpri)
1529161599Sdavidxu{
1530161678Sdavidxu	struct umtx_q *uq;
1531161678Sdavidxu	struct umtx_pi *pi;
1532161678Sdavidxu
1533161678Sdavidxu	uq = td->td_umtxq;
1534161678Sdavidxu	/*
1535161678Sdavidxu	 * Pick up the lock that td is blocked on.
1536161678Sdavidxu	 */
1537161678Sdavidxu	pi = uq->uq_pi_blocked;
1538161678Sdavidxu	MPASS(pi != NULL);
1539161678Sdavidxu
1540161678Sdavidxu	/* Resort the turnstile on the list. */
1541161678Sdavidxu	if (!umtx_pi_adjust_thread(pi, td))
1542161678Sdavidxu		return;
1543161678Sdavidxu
1544161678Sdavidxu	/*
1545161678Sdavidxu	 * If our priority was lowered and we are at the head of the
1546161678Sdavidxu	 * turnstile, then propagate our new priority up the chain.
1547161678Sdavidxu	 */
1548161678Sdavidxu	if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri)
1549161678Sdavidxu		umtx_propagate_priority(td);
1550161599Sdavidxu}
1551161599Sdavidxu
1552161678Sdavidxu/*
1553174701Sdavidxu * Adjust a thread's order position in its blocked PI mutex,
1554174701Sdavidxu * this may result new priority propagating process.
1555174701Sdavidxu */
1556174701Sdavidxuvoid
1557174701Sdavidxuumtx_pi_adjust(struct thread *td, u_char oldpri)
1558174701Sdavidxu{
1559174707Sdavidxu	struct umtx_q *uq;
1560174707Sdavidxu	struct umtx_pi *pi;
1561174707Sdavidxu
1562174707Sdavidxu	uq = td->td_umtxq;
1563174701Sdavidxu	mtx_lock_spin(&umtx_lock);
1564174707Sdavidxu	/*
1565174707Sdavidxu	 * Pick up the lock that td is blocked on.
1566174707Sdavidxu	 */
1567174707Sdavidxu	pi = uq->uq_pi_blocked;
1568174707Sdavidxu	if (pi != NULL)
1569174707Sdavidxu		umtx_pi_adjust_locked(td, oldpri);
1570174701Sdavidxu	mtx_unlock_spin(&umtx_lock);
1571174701Sdavidxu}
1572174701Sdavidxu
1573174701Sdavidxu/*
1574161678Sdavidxu * Sleep on a PI mutex.
1575161678Sdavidxu */
1576161678Sdavidxustatic int
1577161678Sdavidxuumtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1578161678Sdavidxu	uint32_t owner, const char *wmesg, int timo)
1579161678Sdavidxu{
1580161678Sdavidxu	struct umtxq_chain *uc;
1581161678Sdavidxu	struct thread *td, *td1;
1582161678Sdavidxu	struct umtx_q *uq1;
1583161678Sdavidxu	int pri;
1584161678Sdavidxu	int error = 0;
1585161678Sdavidxu
1586161678Sdavidxu	td = uq->uq_thread;
1587161678Sdavidxu	KASSERT(td == curthread, ("inconsistent uq_thread"));
1588161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
1589161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1590189756Sdavidxu	UMTXQ_BUSY_ASSERT(uc);
1591161678Sdavidxu	umtxq_insert(uq);
1592189756Sdavidxu	mtx_lock_spin(&umtx_lock);
1593161678Sdavidxu	if (pi->pi_owner == NULL) {
1594189756Sdavidxu		mtx_unlock_spin(&umtx_lock);
1595213642Sdavidxu		/* XXX Only look up thread in current process. */
1596213642Sdavidxu		td1 = tdfind(owner, curproc->p_pid);
1597170300Sjeff		mtx_lock_spin(&umtx_lock);
1598215336Sdavidxu		if (td1 != NULL) {
1599215336Sdavidxu			if (pi->pi_owner == NULL)
1600215336Sdavidxu				umtx_pi_setowner(pi, td1);
1601215336Sdavidxu			PROC_UNLOCK(td1->td_proc);
1602161678Sdavidxu		}
1603161678Sdavidxu	}
1604161678Sdavidxu
1605161678Sdavidxu	TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1606161678Sdavidxu		pri = UPRI(uq1->uq_thread);
1607161678Sdavidxu		if (pri > UPRI(td))
1608161678Sdavidxu			break;
1609161678Sdavidxu	}
1610161678Sdavidxu
1611161678Sdavidxu	if (uq1 != NULL)
1612161678Sdavidxu		TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1613161678Sdavidxu	else
1614161678Sdavidxu		TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1615161678Sdavidxu
1616161678Sdavidxu	uq->uq_pi_blocked = pi;
1617174701Sdavidxu	thread_lock(td);
1618161678Sdavidxu	td->td_flags |= TDF_UPIBLOCKED;
1619174701Sdavidxu	thread_unlock(td);
1620161678Sdavidxu	umtx_propagate_priority(td);
1621170300Sjeff	mtx_unlock_spin(&umtx_lock);
1622189756Sdavidxu	umtxq_unbusy(&uq->uq_key);
1623161678Sdavidxu
1624161678Sdavidxu	if (uq->uq_flags & UQF_UMTXQ) {
1625161678Sdavidxu		error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
1626161678Sdavidxu		if (error == EWOULDBLOCK)
1627161678Sdavidxu			error = ETIMEDOUT;
1628161678Sdavidxu		if (uq->uq_flags & UQF_UMTXQ) {
1629161678Sdavidxu			umtxq_remove(uq);
1630161678Sdavidxu		}
1631161678Sdavidxu	}
1632170300Sjeff	mtx_lock_spin(&umtx_lock);
1633161678Sdavidxu	uq->uq_pi_blocked = NULL;
1634174701Sdavidxu	thread_lock(td);
1635161678Sdavidxu	td->td_flags &= ~TDF_UPIBLOCKED;
1636174701Sdavidxu	thread_unlock(td);
1637161678Sdavidxu	TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1638161678Sdavidxu	umtx_unpropagate_priority(pi);
1639170300Sjeff	mtx_unlock_spin(&umtx_lock);
1640189756Sdavidxu	umtxq_unlock(&uq->uq_key);
1641161678Sdavidxu
1642161678Sdavidxu	return (error);
1643161678Sdavidxu}
1644161678Sdavidxu
1645161678Sdavidxu/*
1646161678Sdavidxu * Add reference count for a PI mutex.
1647161678Sdavidxu */
1648161678Sdavidxustatic void
1649161678Sdavidxuumtx_pi_ref(struct umtx_pi *pi)
1650161678Sdavidxu{
1651161678Sdavidxu	struct umtxq_chain *uc;
1652161678Sdavidxu
1653161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1654161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1655161678Sdavidxu	pi->pi_refcount++;
1656161678Sdavidxu}
1657161678Sdavidxu
1658161678Sdavidxu/*
1659161678Sdavidxu * Decrease reference count for a PI mutex, if the counter
1660161678Sdavidxu * is decreased to zero, its memory space is freed.
1661161678Sdavidxu */
1662161678Sdavidxustatic void
1663161678Sdavidxuumtx_pi_unref(struct umtx_pi *pi)
1664161678Sdavidxu{
1665161678Sdavidxu	struct umtxq_chain *uc;
1666161678Sdavidxu
1667161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1668161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1669161678Sdavidxu	KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1670161678Sdavidxu	if (--pi->pi_refcount == 0) {
1671170300Sjeff		mtx_lock_spin(&umtx_lock);
1672161678Sdavidxu		if (pi->pi_owner != NULL) {
1673161678Sdavidxu			TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
1674161678Sdavidxu				pi, pi_link);
1675161678Sdavidxu			pi->pi_owner = NULL;
1676161678Sdavidxu		}
1677161678Sdavidxu		KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1678161678Sdavidxu			("blocked queue not empty"));
1679170300Sjeff		mtx_unlock_spin(&umtx_lock);
1680161678Sdavidxu		TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1681189756Sdavidxu		umtx_pi_free(pi);
1682161678Sdavidxu	}
1683161678Sdavidxu}
1684161678Sdavidxu
1685161678Sdavidxu/*
1686161678Sdavidxu * Find a PI mutex in hash table.
1687161678Sdavidxu */
1688161678Sdavidxustatic struct umtx_pi *
1689161678Sdavidxuumtx_pi_lookup(struct umtx_key *key)
1690161678Sdavidxu{
1691161678Sdavidxu	struct umtxq_chain *uc;
1692161678Sdavidxu	struct umtx_pi *pi;
1693161678Sdavidxu
1694161678Sdavidxu	uc = umtxq_getchain(key);
1695161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1696161678Sdavidxu
1697161678Sdavidxu	TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1698161678Sdavidxu		if (umtx_key_match(&pi->pi_key, key)) {
1699161678Sdavidxu			return (pi);
1700161678Sdavidxu		}
1701161678Sdavidxu	}
1702161678Sdavidxu	return (NULL);
1703161678Sdavidxu}
1704161678Sdavidxu
1705161678Sdavidxu/*
1706161678Sdavidxu * Insert a PI mutex into hash table.
1707161678Sdavidxu */
1708161678Sdavidxustatic inline void
1709161678Sdavidxuumtx_pi_insert(struct umtx_pi *pi)
1710161678Sdavidxu{
1711161678Sdavidxu	struct umtxq_chain *uc;
1712161678Sdavidxu
1713161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1714161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1715161678Sdavidxu	TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1716161678Sdavidxu}
1717161678Sdavidxu
1718161678Sdavidxu/*
1719161678Sdavidxu * Lock a PI mutex.
1720161678Sdavidxu */
1721161678Sdavidxustatic int
1722161678Sdavidxu_do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1723161678Sdavidxu	int try)
1724161678Sdavidxu{
1725161678Sdavidxu	struct umtx_q *uq;
1726161678Sdavidxu	struct umtx_pi *pi, *new_pi;
1727161678Sdavidxu	uint32_t id, owner, old;
1728161678Sdavidxu	int error;
1729161678Sdavidxu
1730161678Sdavidxu	id = td->td_tid;
1731161678Sdavidxu	uq = td->td_umtxq;
1732161678Sdavidxu
1733161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1734161678Sdavidxu	    &uq->uq_key)) != 0)
1735161678Sdavidxu		return (error);
1736163697Sdavidxu	umtxq_lock(&uq->uq_key);
1737163697Sdavidxu	pi = umtx_pi_lookup(&uq->uq_key);
1738163697Sdavidxu	if (pi == NULL) {
1739163697Sdavidxu		new_pi = umtx_pi_alloc(M_NOWAIT);
1740163697Sdavidxu		if (new_pi == NULL) {
1741161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1742163697Sdavidxu			new_pi = umtx_pi_alloc(M_WAITOK);
1743161678Sdavidxu			umtxq_lock(&uq->uq_key);
1744161678Sdavidxu			pi = umtx_pi_lookup(&uq->uq_key);
1745163697Sdavidxu			if (pi != NULL) {
1746161678Sdavidxu				umtx_pi_free(new_pi);
1747163697Sdavidxu				new_pi = NULL;
1748161678Sdavidxu			}
1749161678Sdavidxu		}
1750163697Sdavidxu		if (new_pi != NULL) {
1751163697Sdavidxu			new_pi->pi_key = uq->uq_key;
1752163697Sdavidxu			umtx_pi_insert(new_pi);
1753163697Sdavidxu			pi = new_pi;
1754163697Sdavidxu		}
1755163697Sdavidxu	}
1756163697Sdavidxu	umtx_pi_ref(pi);
1757163697Sdavidxu	umtxq_unlock(&uq->uq_key);
1758161678Sdavidxu
1759163697Sdavidxu	/*
1760163697Sdavidxu	 * Care must be exercised when dealing with umtx structure.  It
1761163697Sdavidxu	 * can fault on any access.
1762163697Sdavidxu	 */
1763163697Sdavidxu	for (;;) {
1764161678Sdavidxu		/*
1765161678Sdavidxu		 * Try the uncontested case.  This should be done in userland.
1766161678Sdavidxu		 */
1767161678Sdavidxu		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1768161678Sdavidxu
1769161678Sdavidxu		/* The acquire succeeded. */
1770161678Sdavidxu		if (owner == UMUTEX_UNOWNED) {
1771161678Sdavidxu			error = 0;
1772161678Sdavidxu			break;
1773161678Sdavidxu		}
1774161678Sdavidxu
1775161678Sdavidxu		/* The address was invalid. */
1776161678Sdavidxu		if (owner == -1) {
1777161678Sdavidxu			error = EFAULT;
1778161678Sdavidxu			break;
1779161678Sdavidxu		}
1780161678Sdavidxu
1781161678Sdavidxu		/* If no one owns it but it is contested try to acquire it. */
1782161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
1783161678Sdavidxu			owner = casuword32(&m->m_owner,
1784161678Sdavidxu			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1785161678Sdavidxu
1786161678Sdavidxu			if (owner == UMUTEX_CONTESTED) {
1787161678Sdavidxu				umtxq_lock(&uq->uq_key);
1788189756Sdavidxu				umtxq_busy(&uq->uq_key);
1789161678Sdavidxu				error = umtx_pi_claim(pi, td);
1790189756Sdavidxu				umtxq_unbusy(&uq->uq_key);
1791161678Sdavidxu				umtxq_unlock(&uq->uq_key);
1792161678Sdavidxu				break;
1793161678Sdavidxu			}
1794161678Sdavidxu
1795161678Sdavidxu			/* The address was invalid. */
1796161678Sdavidxu			if (owner == -1) {
1797161678Sdavidxu				error = EFAULT;
1798161678Sdavidxu				break;
1799161678Sdavidxu			}
1800161678Sdavidxu
1801161678Sdavidxu			/* If this failed the lock has changed, restart. */
1802161678Sdavidxu			continue;
1803161678Sdavidxu		}
1804161678Sdavidxu
1805161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1806161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id) {
1807161678Sdavidxu			error = EDEADLK;
1808161678Sdavidxu			break;
1809161678Sdavidxu		}
1810161678Sdavidxu
1811161678Sdavidxu		if (try != 0) {
1812161678Sdavidxu			error = EBUSY;
1813161678Sdavidxu			break;
1814161678Sdavidxu		}
1815161678Sdavidxu
1816161678Sdavidxu		/*
1817161678Sdavidxu		 * If we caught a signal, we have retried and now
1818161678Sdavidxu		 * exit immediately.
1819161678Sdavidxu		 */
1820161678Sdavidxu		if (error != 0)
1821161678Sdavidxu			break;
1822161678Sdavidxu
1823161678Sdavidxu		umtxq_lock(&uq->uq_key);
1824161678Sdavidxu		umtxq_busy(&uq->uq_key);
1825161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1826161678Sdavidxu
1827161678Sdavidxu		/*
1828161678Sdavidxu		 * Set the contested bit so that a release in user space
1829161678Sdavidxu		 * knows to use the system call for unlock.  If this fails
1830161678Sdavidxu		 * either some one else has acquired the lock or it has been
1831161678Sdavidxu		 * released.
1832161678Sdavidxu		 */
1833161678Sdavidxu		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1834161678Sdavidxu
1835161678Sdavidxu		/* The address was invalid. */
1836161678Sdavidxu		if (old == -1) {
1837161678Sdavidxu			umtxq_lock(&uq->uq_key);
1838161678Sdavidxu			umtxq_unbusy(&uq->uq_key);
1839161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1840161678Sdavidxu			error = EFAULT;
1841161678Sdavidxu			break;
1842161678Sdavidxu		}
1843161678Sdavidxu
1844161678Sdavidxu		umtxq_lock(&uq->uq_key);
1845161678Sdavidxu		/*
1846161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
1847161678Sdavidxu		 * and we need to retry or we lost a race to the thread
1848161678Sdavidxu		 * unlocking the umtx.
1849161678Sdavidxu		 */
1850161678Sdavidxu		if (old == owner)
1851161678Sdavidxu			error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
1852161678Sdavidxu				 "umtxpi", timo);
1853189756Sdavidxu		else {
1854189756Sdavidxu			umtxq_unbusy(&uq->uq_key);
1855189756Sdavidxu			umtxq_unlock(&uq->uq_key);
1856189756Sdavidxu		}
1857161678Sdavidxu	}
1858161678Sdavidxu
1859163697Sdavidxu	umtxq_lock(&uq->uq_key);
1860163697Sdavidxu	umtx_pi_unref(pi);
1861163697Sdavidxu	umtxq_unlock(&uq->uq_key);
1862161678Sdavidxu
1863161678Sdavidxu	umtx_key_release(&uq->uq_key);
1864161678Sdavidxu	return (error);
1865161678Sdavidxu}
1866161678Sdavidxu
1867161678Sdavidxu/*
1868161678Sdavidxu * Unlock a PI mutex.
1869161678Sdavidxu */
1870161678Sdavidxustatic int
1871161678Sdavidxudo_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
1872161678Sdavidxu{
1873161678Sdavidxu	struct umtx_key key;
1874161678Sdavidxu	struct umtx_q *uq_first, *uq_first2, *uq_me;
1875161678Sdavidxu	struct umtx_pi *pi, *pi2;
1876161678Sdavidxu	uint32_t owner, old, id;
1877161678Sdavidxu	int error;
1878161678Sdavidxu	int count;
1879161678Sdavidxu	int pri;
1880161678Sdavidxu
1881161678Sdavidxu	id = td->td_tid;
1882161678Sdavidxu	/*
1883161678Sdavidxu	 * Make sure we own this mtx.
1884161678Sdavidxu	 */
1885163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1886161678Sdavidxu	if (owner == -1)
1887161678Sdavidxu		return (EFAULT);
1888161678Sdavidxu
1889161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
1890161678Sdavidxu		return (EPERM);
1891161678Sdavidxu
1892161678Sdavidxu	/* This should be done in userland */
1893161678Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
1894161678Sdavidxu		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1895161678Sdavidxu		if (old == -1)
1896161678Sdavidxu			return (EFAULT);
1897161678Sdavidxu		if (old == owner)
1898161678Sdavidxu			return (0);
1899161855Sdavidxu		owner = old;
1900161678Sdavidxu	}
1901161678Sdavidxu
1902161678Sdavidxu	/* We should only ever be in here for contested locks */
1903161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1904161678Sdavidxu	    &key)) != 0)
1905161678Sdavidxu		return (error);
1906161678Sdavidxu
1907161678Sdavidxu	umtxq_lock(&key);
1908161678Sdavidxu	umtxq_busy(&key);
1909161678Sdavidxu	count = umtxq_count_pi(&key, &uq_first);
1910161678Sdavidxu	if (uq_first != NULL) {
1911189756Sdavidxu		mtx_lock_spin(&umtx_lock);
1912161678Sdavidxu		pi = uq_first->uq_pi_blocked;
1913189756Sdavidxu		KASSERT(pi != NULL, ("pi == NULL?"));
1914161678Sdavidxu		if (pi->pi_owner != curthread) {
1915189756Sdavidxu			mtx_unlock_spin(&umtx_lock);
1916161678Sdavidxu			umtxq_unbusy(&key);
1917161678Sdavidxu			umtxq_unlock(&key);
1918189756Sdavidxu			umtx_key_release(&key);
1919161678Sdavidxu			/* userland messed the mutex */
1920161678Sdavidxu			return (EPERM);
1921161678Sdavidxu		}
1922161678Sdavidxu		uq_me = curthread->td_umtxq;
1923161678Sdavidxu		pi->pi_owner = NULL;
1924161678Sdavidxu		TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
1925189756Sdavidxu		/* get highest priority thread which is still sleeping. */
1926161678Sdavidxu		uq_first = TAILQ_FIRST(&pi->pi_blocked);
1927189756Sdavidxu		while (uq_first != NULL &&
1928189756Sdavidxu		       (uq_first->uq_flags & UQF_UMTXQ) == 0) {
1929189756Sdavidxu			uq_first = TAILQ_NEXT(uq_first, uq_lockq);
1930189756Sdavidxu		}
1931161678Sdavidxu		pri = PRI_MAX;
1932161678Sdavidxu		TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
1933161678Sdavidxu			uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
1934161678Sdavidxu			if (uq_first2 != NULL) {
1935161678Sdavidxu				if (pri > UPRI(uq_first2->uq_thread))
1936161678Sdavidxu					pri = UPRI(uq_first2->uq_thread);
1937161678Sdavidxu			}
1938161678Sdavidxu		}
1939170300Sjeff		thread_lock(curthread);
1940161678Sdavidxu		sched_unlend_user_prio(curthread, pri);
1941170300Sjeff		thread_unlock(curthread);
1942170300Sjeff		mtx_unlock_spin(&umtx_lock);
1943189756Sdavidxu		if (uq_first)
1944189756Sdavidxu			umtxq_signal_thread(uq_first);
1945161678Sdavidxu	}
1946161678Sdavidxu	umtxq_unlock(&key);
1947161678Sdavidxu
1948161678Sdavidxu	/*
1949161678Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
1950161678Sdavidxu	 * there is zero or one thread only waiting for it.
1951161678Sdavidxu	 * Otherwise, it must be marked as contested.
1952161678Sdavidxu	 */
1953161678Sdavidxu	old = casuword32(&m->m_owner, owner,
1954161678Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1955161678Sdavidxu
1956161678Sdavidxu	umtxq_lock(&key);
1957161678Sdavidxu	umtxq_unbusy(&key);
1958161678Sdavidxu	umtxq_unlock(&key);
1959161678Sdavidxu	umtx_key_release(&key);
1960161678Sdavidxu	if (old == -1)
1961161678Sdavidxu		return (EFAULT);
1962161678Sdavidxu	if (old != owner)
1963161678Sdavidxu		return (EINVAL);
1964161678Sdavidxu	return (0);
1965161678Sdavidxu}
1966161678Sdavidxu
1967161678Sdavidxu/*
1968161678Sdavidxu * Lock a PP mutex.
1969161678Sdavidxu */
1970161678Sdavidxustatic int
1971161678Sdavidxu_do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1972161678Sdavidxu	int try)
1973161678Sdavidxu{
1974161678Sdavidxu	struct umtx_q *uq, *uq2;
1975161678Sdavidxu	struct umtx_pi *pi;
1976161678Sdavidxu	uint32_t ceiling;
1977161678Sdavidxu	uint32_t owner, id;
1978161678Sdavidxu	int error, pri, old_inherited_pri, su;
1979161678Sdavidxu
1980161678Sdavidxu	id = td->td_tid;
1981161678Sdavidxu	uq = td->td_umtxq;
1982161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1983161678Sdavidxu	    &uq->uq_key)) != 0)
1984161678Sdavidxu		return (error);
1985164033Srwatson	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
1986161678Sdavidxu	for (;;) {
1987161678Sdavidxu		old_inherited_pri = uq->uq_inherited_pri;
1988161678Sdavidxu		umtxq_lock(&uq->uq_key);
1989161678Sdavidxu		umtxq_busy(&uq->uq_key);
1990161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1991161678Sdavidxu
1992161678Sdavidxu		ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
1993161678Sdavidxu		if (ceiling > RTP_PRIO_MAX) {
1994161678Sdavidxu			error = EINVAL;
1995161678Sdavidxu			goto out;
1996161678Sdavidxu		}
1997161678Sdavidxu
1998170300Sjeff		mtx_lock_spin(&umtx_lock);
1999161678Sdavidxu		if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
2000170300Sjeff			mtx_unlock_spin(&umtx_lock);
2001161678Sdavidxu			error = EINVAL;
2002161678Sdavidxu			goto out;
2003161678Sdavidxu		}
2004161678Sdavidxu		if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
2005161678Sdavidxu			uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
2006170300Sjeff			thread_lock(td);
2007161678Sdavidxu			if (uq->uq_inherited_pri < UPRI(td))
2008161678Sdavidxu				sched_lend_user_prio(td, uq->uq_inherited_pri);
2009170300Sjeff			thread_unlock(td);
2010161678Sdavidxu		}
2011170300Sjeff		mtx_unlock_spin(&umtx_lock);
2012161678Sdavidxu
2013161678Sdavidxu		owner = casuword32(&m->m_owner,
2014161678Sdavidxu		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2015161678Sdavidxu
2016161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
2017161678Sdavidxu			error = 0;
2018161678Sdavidxu			break;
2019161678Sdavidxu		}
2020161678Sdavidxu
2021161678Sdavidxu		/* The address was invalid. */
2022161678Sdavidxu		if (owner == -1) {
2023161678Sdavidxu			error = EFAULT;
2024161678Sdavidxu			break;
2025161678Sdavidxu		}
2026161678Sdavidxu
2027161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
2028161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id) {
2029161678Sdavidxu			error = EDEADLK;
2030161678Sdavidxu			break;
2031161678Sdavidxu		}
2032161678Sdavidxu
2033161678Sdavidxu		if (try != 0) {
2034161678Sdavidxu			error = EBUSY;
2035161678Sdavidxu			break;
2036161678Sdavidxu		}
2037161678Sdavidxu
2038161678Sdavidxu		/*
2039161678Sdavidxu		 * If we caught a signal, we have retried and now
2040161678Sdavidxu		 * exit immediately.
2041161678Sdavidxu		 */
2042161678Sdavidxu		if (error != 0)
2043161678Sdavidxu			break;
2044161678Sdavidxu
2045161678Sdavidxu		umtxq_lock(&uq->uq_key);
2046161678Sdavidxu		umtxq_insert(uq);
2047161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
2048161678Sdavidxu		error = umtxq_sleep(uq, "umtxpp", timo);
2049161678Sdavidxu		umtxq_remove(uq);
2050161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2051161678Sdavidxu
2052170300Sjeff		mtx_lock_spin(&umtx_lock);
2053161678Sdavidxu		uq->uq_inherited_pri = old_inherited_pri;
2054161678Sdavidxu		pri = PRI_MAX;
2055161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2056161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2057161678Sdavidxu			if (uq2 != NULL) {
2058161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
2059161678Sdavidxu					pri = UPRI(uq2->uq_thread);
2060161678Sdavidxu			}
2061161678Sdavidxu		}
2062161678Sdavidxu		if (pri > uq->uq_inherited_pri)
2063161678Sdavidxu			pri = uq->uq_inherited_pri;
2064170300Sjeff		thread_lock(td);
2065161678Sdavidxu		sched_unlend_user_prio(td, pri);
2066170300Sjeff		thread_unlock(td);
2067170300Sjeff		mtx_unlock_spin(&umtx_lock);
2068161678Sdavidxu	}
2069161678Sdavidxu
2070161678Sdavidxu	if (error != 0) {
2071170300Sjeff		mtx_lock_spin(&umtx_lock);
2072161678Sdavidxu		uq->uq_inherited_pri = old_inherited_pri;
2073161678Sdavidxu		pri = PRI_MAX;
2074161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2075161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2076161678Sdavidxu			if (uq2 != NULL) {
2077161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
2078161678Sdavidxu					pri = UPRI(uq2->uq_thread);
2079161678Sdavidxu			}
2080161678Sdavidxu		}
2081161678Sdavidxu		if (pri > uq->uq_inherited_pri)
2082161678Sdavidxu			pri = uq->uq_inherited_pri;
2083170300Sjeff		thread_lock(td);
2084161678Sdavidxu		sched_unlend_user_prio(td, pri);
2085170300Sjeff		thread_unlock(td);
2086170300Sjeff		mtx_unlock_spin(&umtx_lock);
2087161678Sdavidxu	}
2088161678Sdavidxu
2089161678Sdavidxuout:
2090161678Sdavidxu	umtxq_lock(&uq->uq_key);
2091161678Sdavidxu	umtxq_unbusy(&uq->uq_key);
2092161678Sdavidxu	umtxq_unlock(&uq->uq_key);
2093161678Sdavidxu	umtx_key_release(&uq->uq_key);
2094161678Sdavidxu	return (error);
2095161678Sdavidxu}
2096161678Sdavidxu
2097161678Sdavidxu/*
2098161678Sdavidxu * Unlock a PP mutex.
2099161678Sdavidxu */
2100161678Sdavidxustatic int
2101161678Sdavidxudo_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
2102161678Sdavidxu{
2103161678Sdavidxu	struct umtx_key key;
2104161678Sdavidxu	struct umtx_q *uq, *uq2;
2105161678Sdavidxu	struct umtx_pi *pi;
2106161678Sdavidxu	uint32_t owner, id;
2107161678Sdavidxu	uint32_t rceiling;
2108161926Sdavidxu	int error, pri, new_inherited_pri, su;
2109161678Sdavidxu
2110161678Sdavidxu	id = td->td_tid;
2111161678Sdavidxu	uq = td->td_umtxq;
2112164033Srwatson	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2113161678Sdavidxu
2114161678Sdavidxu	/*
2115161678Sdavidxu	 * Make sure we own this mtx.
2116161678Sdavidxu	 */
2117163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
2118161678Sdavidxu	if (owner == -1)
2119161678Sdavidxu		return (EFAULT);
2120161678Sdavidxu
2121161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
2122161678Sdavidxu		return (EPERM);
2123161678Sdavidxu
2124161678Sdavidxu	error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
2125161678Sdavidxu	if (error != 0)
2126161678Sdavidxu		return (error);
2127161678Sdavidxu
2128161678Sdavidxu	if (rceiling == -1)
2129161678Sdavidxu		new_inherited_pri = PRI_MAX;
2130161678Sdavidxu	else {
2131161678Sdavidxu		rceiling = RTP_PRIO_MAX - rceiling;
2132161678Sdavidxu		if (rceiling > RTP_PRIO_MAX)
2133161678Sdavidxu			return (EINVAL);
2134161678Sdavidxu		new_inherited_pri = PRI_MIN_REALTIME + rceiling;
2135161678Sdavidxu	}
2136161678Sdavidxu
2137161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2138161678Sdavidxu	    &key)) != 0)
2139161678Sdavidxu		return (error);
2140161678Sdavidxu	umtxq_lock(&key);
2141161678Sdavidxu	umtxq_busy(&key);
2142161678Sdavidxu	umtxq_unlock(&key);
2143161678Sdavidxu	/*
2144161678Sdavidxu	 * For priority protected mutex, always set unlocked state
2145161678Sdavidxu	 * to UMUTEX_CONTESTED, so that userland always enters kernel
2146161678Sdavidxu	 * to lock the mutex, it is necessary because thread priority
2147161678Sdavidxu	 * has to be adjusted for such mutex.
2148161678Sdavidxu	 */
2149163449Sdavidxu	error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2150163449Sdavidxu		UMUTEX_CONTESTED);
2151161678Sdavidxu
2152161678Sdavidxu	umtxq_lock(&key);
2153161678Sdavidxu	if (error == 0)
2154161678Sdavidxu		umtxq_signal(&key, 1);
2155161678Sdavidxu	umtxq_unbusy(&key);
2156161678Sdavidxu	umtxq_unlock(&key);
2157161678Sdavidxu
2158161678Sdavidxu	if (error == -1)
2159161678Sdavidxu		error = EFAULT;
2160161678Sdavidxu	else {
2161170300Sjeff		mtx_lock_spin(&umtx_lock);
2162161926Sdavidxu		if (su != 0)
2163161926Sdavidxu			uq->uq_inherited_pri = new_inherited_pri;
2164161678Sdavidxu		pri = PRI_MAX;
2165161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2166161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2167161678Sdavidxu			if (uq2 != NULL) {
2168161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
2169161678Sdavidxu					pri = UPRI(uq2->uq_thread);
2170161678Sdavidxu			}
2171161678Sdavidxu		}
2172161678Sdavidxu		if (pri > uq->uq_inherited_pri)
2173161678Sdavidxu			pri = uq->uq_inherited_pri;
2174170300Sjeff		thread_lock(td);
2175161678Sdavidxu		sched_unlend_user_prio(td, pri);
2176170300Sjeff		thread_unlock(td);
2177170300Sjeff		mtx_unlock_spin(&umtx_lock);
2178161678Sdavidxu	}
2179161678Sdavidxu	umtx_key_release(&key);
2180161678Sdavidxu	return (error);
2181161678Sdavidxu}
2182161678Sdavidxu
2183161678Sdavidxustatic int
2184161678Sdavidxudo_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2185161678Sdavidxu	uint32_t *old_ceiling)
2186161678Sdavidxu{
2187161678Sdavidxu	struct umtx_q *uq;
2188161678Sdavidxu	uint32_t save_ceiling;
2189161678Sdavidxu	uint32_t owner, id;
2190161678Sdavidxu	uint32_t flags;
2191161678Sdavidxu	int error;
2192161678Sdavidxu
2193161678Sdavidxu	flags = fuword32(&m->m_flags);
2194161678Sdavidxu	if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2195161678Sdavidxu		return (EINVAL);
2196161678Sdavidxu	if (ceiling > RTP_PRIO_MAX)
2197161678Sdavidxu		return (EINVAL);
2198161678Sdavidxu	id = td->td_tid;
2199161678Sdavidxu	uq = td->td_umtxq;
2200161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2201161678Sdavidxu	   &uq->uq_key)) != 0)
2202161678Sdavidxu		return (error);
2203161678Sdavidxu	for (;;) {
2204161678Sdavidxu		umtxq_lock(&uq->uq_key);
2205161678Sdavidxu		umtxq_busy(&uq->uq_key);
2206161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2207161678Sdavidxu
2208161678Sdavidxu		save_ceiling = fuword32(&m->m_ceilings[0]);
2209161678Sdavidxu
2210161678Sdavidxu		owner = casuword32(&m->m_owner,
2211161678Sdavidxu		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2212161678Sdavidxu
2213161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
2214161678Sdavidxu			suword32(&m->m_ceilings[0], ceiling);
2215163449Sdavidxu			suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2216163449Sdavidxu				UMUTEX_CONTESTED);
2217161678Sdavidxu			error = 0;
2218161678Sdavidxu			break;
2219161678Sdavidxu		}
2220161678Sdavidxu
2221161678Sdavidxu		/* The address was invalid. */
2222161678Sdavidxu		if (owner == -1) {
2223161678Sdavidxu			error = EFAULT;
2224161678Sdavidxu			break;
2225161678Sdavidxu		}
2226161678Sdavidxu
2227161678Sdavidxu		if ((owner & ~UMUTEX_CONTESTED) == id) {
2228161678Sdavidxu			suword32(&m->m_ceilings[0], ceiling);
2229161678Sdavidxu			error = 0;
2230161678Sdavidxu			break;
2231161678Sdavidxu		}
2232161678Sdavidxu
2233161678Sdavidxu		/*
2234161678Sdavidxu		 * If we caught a signal, we have retried and now
2235161678Sdavidxu		 * exit immediately.
2236161678Sdavidxu		 */
2237161678Sdavidxu		if (error != 0)
2238161678Sdavidxu			break;
2239161678Sdavidxu
2240161678Sdavidxu		/*
2241161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
2242161678Sdavidxu		 * and we need to retry or we lost a race to the thread
2243161678Sdavidxu		 * unlocking the umtx.
2244161678Sdavidxu		 */
2245161678Sdavidxu		umtxq_lock(&uq->uq_key);
2246161678Sdavidxu		umtxq_insert(uq);
2247161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
2248161678Sdavidxu		error = umtxq_sleep(uq, "umtxpp", 0);
2249161678Sdavidxu		umtxq_remove(uq);
2250161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2251161678Sdavidxu	}
2252161678Sdavidxu	umtxq_lock(&uq->uq_key);
2253161678Sdavidxu	if (error == 0)
2254161678Sdavidxu		umtxq_signal(&uq->uq_key, INT_MAX);
2255161678Sdavidxu	umtxq_unbusy(&uq->uq_key);
2256161678Sdavidxu	umtxq_unlock(&uq->uq_key);
2257161678Sdavidxu	umtx_key_release(&uq->uq_key);
2258161678Sdavidxu	if (error == 0 && old_ceiling != NULL)
2259161678Sdavidxu		suword32(old_ceiling, save_ceiling);
2260161678Sdavidxu	return (error);
2261161678Sdavidxu}
2262161678Sdavidxu
2263162030Sdavidxustatic int
2264162030Sdavidxu_do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
2265179970Sdavidxu	int mode)
2266162030Sdavidxu{
2267162030Sdavidxu	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2268162030Sdavidxu	case 0:
2269179970Sdavidxu		return (_do_lock_normal(td, m, flags, timo, mode));
2270162030Sdavidxu	case UMUTEX_PRIO_INHERIT:
2271179970Sdavidxu		return (_do_lock_pi(td, m, flags, timo, mode));
2272162030Sdavidxu	case UMUTEX_PRIO_PROTECT:
2273179970Sdavidxu		return (_do_lock_pp(td, m, flags, timo, mode));
2274162030Sdavidxu	}
2275162030Sdavidxu	return (EINVAL);
2276162030Sdavidxu}
2277162030Sdavidxu
2278161678Sdavidxu/*
2279161678Sdavidxu * Lock a userland POSIX mutex.
2280161678Sdavidxu */
2281161678Sdavidxustatic int
2282162030Sdavidxudo_lock_umutex(struct thread *td, struct umutex *m,
2283179970Sdavidxu	struct timespec *timeout, int mode)
2284161678Sdavidxu{
2285162030Sdavidxu	struct timespec ts, ts2, ts3;
2286162030Sdavidxu	struct timeval tv;
2287161678Sdavidxu	uint32_t flags;
2288162030Sdavidxu	int error;
2289161678Sdavidxu
2290161678Sdavidxu	flags = fuword32(&m->m_flags);
2291161678Sdavidxu	if (flags == -1)
2292161678Sdavidxu		return (EFAULT);
2293161678Sdavidxu
2294162030Sdavidxu	if (timeout == NULL) {
2295179970Sdavidxu		error = _do_lock_umutex(td, m, flags, 0, mode);
2296162030Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
2297179970Sdavidxu		if (error == EINTR && mode != _UMUTEX_WAIT)
2298162030Sdavidxu			error = ERESTART;
2299162030Sdavidxu	} else {
2300162030Sdavidxu		getnanouptime(&ts);
2301162030Sdavidxu		timespecadd(&ts, timeout);
2302162030Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
2303162030Sdavidxu		for (;;) {
2304179970Sdavidxu			error = _do_lock_umutex(td, m, flags, tvtohz(&tv), mode);
2305162030Sdavidxu			if (error != ETIMEDOUT)
2306162030Sdavidxu				break;
2307162030Sdavidxu			getnanouptime(&ts2);
2308162030Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
2309162030Sdavidxu				error = ETIMEDOUT;
2310162030Sdavidxu				break;
2311162030Sdavidxu			}
2312162030Sdavidxu			ts3 = ts;
2313162030Sdavidxu			timespecsub(&ts3, &ts2);
2314162030Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2315162030Sdavidxu		}
2316162030Sdavidxu		/* Timed-locking is not restarted. */
2317162030Sdavidxu		if (error == ERESTART)
2318162030Sdavidxu			error = EINTR;
2319161742Sdavidxu	}
2320162030Sdavidxu	return (error);
2321161678Sdavidxu}
2322161678Sdavidxu
2323161678Sdavidxu/*
2324161678Sdavidxu * Unlock a userland POSIX mutex.
2325161678Sdavidxu */
2326161678Sdavidxustatic int
2327161678Sdavidxudo_unlock_umutex(struct thread *td, struct umutex *m)
2328161678Sdavidxu{
2329161678Sdavidxu	uint32_t flags;
2330161678Sdavidxu
2331161678Sdavidxu	flags = fuword32(&m->m_flags);
2332161678Sdavidxu	if (flags == -1)
2333161678Sdavidxu		return (EFAULT);
2334161678Sdavidxu
2335161855Sdavidxu	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2336161855Sdavidxu	case 0:
2337161855Sdavidxu		return (do_unlock_normal(td, m, flags));
2338161855Sdavidxu	case UMUTEX_PRIO_INHERIT:
2339161855Sdavidxu		return (do_unlock_pi(td, m, flags));
2340161855Sdavidxu	case UMUTEX_PRIO_PROTECT:
2341161855Sdavidxu		return (do_unlock_pp(td, m, flags));
2342161855Sdavidxu	}
2343161678Sdavidxu
2344161855Sdavidxu	return (EINVAL);
2345161678Sdavidxu}
2346161678Sdavidxu
2347164839Sdavidxustatic int
2348164839Sdavidxudo_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2349164876Sdavidxu	struct timespec *timeout, u_long wflags)
2350164839Sdavidxu{
2351164839Sdavidxu	struct umtx_q *uq;
2352164839Sdavidxu	struct timeval tv;
2353164839Sdavidxu	struct timespec cts, ets, tts;
2354164839Sdavidxu	uint32_t flags;
2355216641Sdavidxu	uint32_t clockid;
2356164839Sdavidxu	int error;
2357164839Sdavidxu
2358164839Sdavidxu	uq = td->td_umtxq;
2359164839Sdavidxu	flags = fuword32(&cv->c_flags);
2360164839Sdavidxu	error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2361164839Sdavidxu	if (error != 0)
2362164839Sdavidxu		return (error);
2363216641Sdavidxu
2364216641Sdavidxu	if ((wflags & CVWAIT_CLOCKID) != 0) {
2365216641Sdavidxu		clockid = fuword32(&cv->c_clockid);
2366216641Sdavidxu		if (clockid < CLOCK_REALTIME ||
2367216641Sdavidxu		    clockid >= CLOCK_THREAD_CPUTIME_ID) {
2368216641Sdavidxu			/* hmm, only HW clock id will work. */
2369216641Sdavidxu			return (EINVAL);
2370216641Sdavidxu		}
2371216641Sdavidxu	} else {
2372216641Sdavidxu		clockid = CLOCK_REALTIME;
2373216641Sdavidxu	}
2374216641Sdavidxu
2375164839Sdavidxu	umtxq_lock(&uq->uq_key);
2376164839Sdavidxu	umtxq_busy(&uq->uq_key);
2377164839Sdavidxu	umtxq_insert(uq);
2378164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2379164839Sdavidxu
2380164839Sdavidxu	/*
2381216641Sdavidxu	 * Set c_has_waiters to 1 before releasing user mutex, also
2382216641Sdavidxu	 * don't modify cache line when unnecessary.
2383164839Sdavidxu	 */
2384216641Sdavidxu	if (fuword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters)) == 0)
2385216641Sdavidxu		suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
2386164839Sdavidxu
2387164839Sdavidxu	umtxq_lock(&uq->uq_key);
2388164839Sdavidxu	umtxq_unbusy(&uq->uq_key);
2389164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2390164839Sdavidxu
2391164839Sdavidxu	error = do_unlock_umutex(td, m);
2392164839Sdavidxu
2393164839Sdavidxu	umtxq_lock(&uq->uq_key);
2394164839Sdavidxu	if (error == 0) {
2395216641Sdavidxu		if (timeout == NULL) {
2396164839Sdavidxu			error = umtxq_sleep(uq, "ucond", 0);
2397164839Sdavidxu		} else {
2398216641Sdavidxu			if ((wflags & CVWAIT_ABSTIME) == 0) {
2399216641Sdavidxu				kern_clock_gettime(td, clockid, &ets);
2400216641Sdavidxu				timespecadd(&ets, timeout);
2401216641Sdavidxu				tts = *timeout;
2402216641Sdavidxu			} else { /* absolute time */
2403216641Sdavidxu				ets = *timeout;
2404216641Sdavidxu				tts = *timeout;
2405216641Sdavidxu				kern_clock_gettime(td, clockid, &cts);
2406216641Sdavidxu				timespecsub(&tts, &cts);
2407216641Sdavidxu			}
2408216641Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &tts);
2409164839Sdavidxu			for (;;) {
2410164839Sdavidxu				error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
2411164839Sdavidxu				if (error != ETIMEDOUT)
2412164839Sdavidxu					break;
2413216641Sdavidxu				kern_clock_gettime(td, clockid, &cts);
2414164839Sdavidxu				if (timespeccmp(&cts, &ets, >=)) {
2415164839Sdavidxu					error = ETIMEDOUT;
2416164839Sdavidxu					break;
2417164839Sdavidxu				}
2418164839Sdavidxu				tts = ets;
2419164839Sdavidxu				timespecsub(&tts, &cts);
2420164839Sdavidxu				TIMESPEC_TO_TIMEVAL(&tv, &tts);
2421164839Sdavidxu			}
2422164839Sdavidxu		}
2423164839Sdavidxu	}
2424164839Sdavidxu
2425211794Sdavidxu	if ((uq->uq_flags & UQF_UMTXQ) == 0)
2426211794Sdavidxu		error = 0;
2427211794Sdavidxu	else {
2428216641Sdavidxu		/*
2429216641Sdavidxu		 * This must be timeout,interrupted by signal or
2430216641Sdavidxu		 * surprious wakeup, clear c_has_waiter flag when
2431216641Sdavidxu		 * necessary.
2432216641Sdavidxu		 */
2433216641Sdavidxu		umtxq_busy(&uq->uq_key);
2434216641Sdavidxu		if ((uq->uq_flags & UQF_UMTXQ) != 0) {
2435216641Sdavidxu			int oldlen = uq->uq_cur_queue->length;
2436216641Sdavidxu			umtxq_remove(uq);
2437216641Sdavidxu			if (oldlen == 1) {
2438216641Sdavidxu				umtxq_unlock(&uq->uq_key);
2439216641Sdavidxu				suword32(
2440216641Sdavidxu				    __DEVOLATILE(uint32_t *,
2441216641Sdavidxu					 &cv->c_has_waiters), 0);
2442216641Sdavidxu				umtxq_lock(&uq->uq_key);
2443216641Sdavidxu			}
2444216641Sdavidxu		}
2445216641Sdavidxu		umtxq_unbusy(&uq->uq_key);
2446164839Sdavidxu		if (error == ERESTART)
2447164839Sdavidxu			error = EINTR;
2448164839Sdavidxu	}
2449211794Sdavidxu
2450164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2451164839Sdavidxu	umtx_key_release(&uq->uq_key);
2452164839Sdavidxu	return (error);
2453164839Sdavidxu}
2454164839Sdavidxu
2455164839Sdavidxu/*
2456164839Sdavidxu * Signal a userland condition variable.
2457164839Sdavidxu */
2458164839Sdavidxustatic int
2459164839Sdavidxudo_cv_signal(struct thread *td, struct ucond *cv)
2460164839Sdavidxu{
2461164839Sdavidxu	struct umtx_key key;
2462164839Sdavidxu	int error, cnt, nwake;
2463164839Sdavidxu	uint32_t flags;
2464164839Sdavidxu
2465164839Sdavidxu	flags = fuword32(&cv->c_flags);
2466164839Sdavidxu	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2467164839Sdavidxu		return (error);
2468164839Sdavidxu	umtxq_lock(&key);
2469164839Sdavidxu	umtxq_busy(&key);
2470164839Sdavidxu	cnt = umtxq_count(&key);
2471164839Sdavidxu	nwake = umtxq_signal(&key, 1);
2472164839Sdavidxu	if (cnt <= nwake) {
2473164839Sdavidxu		umtxq_unlock(&key);
2474164839Sdavidxu		error = suword32(
2475164839Sdavidxu		    __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2476164839Sdavidxu		umtxq_lock(&key);
2477164839Sdavidxu	}
2478164839Sdavidxu	umtxq_unbusy(&key);
2479164839Sdavidxu	umtxq_unlock(&key);
2480164839Sdavidxu	umtx_key_release(&key);
2481164839Sdavidxu	return (error);
2482164839Sdavidxu}
2483164839Sdavidxu
2484164839Sdavidxustatic int
2485164839Sdavidxudo_cv_broadcast(struct thread *td, struct ucond *cv)
2486164839Sdavidxu{
2487164839Sdavidxu	struct umtx_key key;
2488164839Sdavidxu	int error;
2489164839Sdavidxu	uint32_t flags;
2490164839Sdavidxu
2491164839Sdavidxu	flags = fuword32(&cv->c_flags);
2492164839Sdavidxu	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2493164839Sdavidxu		return (error);
2494164839Sdavidxu
2495164839Sdavidxu	umtxq_lock(&key);
2496164839Sdavidxu	umtxq_busy(&key);
2497164839Sdavidxu	umtxq_signal(&key, INT_MAX);
2498164839Sdavidxu	umtxq_unlock(&key);
2499164839Sdavidxu
2500164839Sdavidxu	error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2501164839Sdavidxu
2502164839Sdavidxu	umtxq_lock(&key);
2503164839Sdavidxu	umtxq_unbusy(&key);
2504164839Sdavidxu	umtxq_unlock(&key);
2505164839Sdavidxu
2506164839Sdavidxu	umtx_key_release(&key);
2507164839Sdavidxu	return (error);
2508164839Sdavidxu}
2509164839Sdavidxu
2510177848Sdavidxustatic int
2511177848Sdavidxudo_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, int timo)
2512177848Sdavidxu{
2513177848Sdavidxu	struct umtx_q *uq;
2514177848Sdavidxu	uint32_t flags, wrflags;
2515177848Sdavidxu	int32_t state, oldstate;
2516177848Sdavidxu	int32_t blocked_readers;
2517177848Sdavidxu	int error;
2518177848Sdavidxu
2519177848Sdavidxu	uq = td->td_umtxq;
2520177848Sdavidxu	flags = fuword32(&rwlock->rw_flags);
2521177848Sdavidxu	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2522177848Sdavidxu	if (error != 0)
2523177848Sdavidxu		return (error);
2524177848Sdavidxu
2525177848Sdavidxu	wrflags = URWLOCK_WRITE_OWNER;
2526177848Sdavidxu	if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
2527177848Sdavidxu		wrflags |= URWLOCK_WRITE_WAITERS;
2528177848Sdavidxu
2529177848Sdavidxu	for (;;) {
2530177848Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2531177848Sdavidxu		/* try to lock it */
2532177848Sdavidxu		while (!(state & wrflags)) {
2533177848Sdavidxu			if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
2534177848Sdavidxu				umtx_key_release(&uq->uq_key);
2535177848Sdavidxu				return (EAGAIN);
2536177848Sdavidxu			}
2537177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state + 1);
2538177848Sdavidxu			if (oldstate == state) {
2539177848Sdavidxu				umtx_key_release(&uq->uq_key);
2540177848Sdavidxu				return (0);
2541177848Sdavidxu			}
2542177848Sdavidxu			state = oldstate;
2543177848Sdavidxu		}
2544177848Sdavidxu
2545177848Sdavidxu		if (error)
2546177848Sdavidxu			break;
2547177848Sdavidxu
2548177848Sdavidxu		/* grab monitor lock */
2549177848Sdavidxu		umtxq_lock(&uq->uq_key);
2550177848Sdavidxu		umtxq_busy(&uq->uq_key);
2551177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2552177848Sdavidxu
2553203414Sdavidxu		/*
2554203414Sdavidxu		 * re-read the state, in case it changed between the try-lock above
2555203414Sdavidxu		 * and the check below
2556203414Sdavidxu		 */
2557203414Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2558203414Sdavidxu
2559177848Sdavidxu		/* set read contention bit */
2560177848Sdavidxu		while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) {
2561177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS);
2562177848Sdavidxu			if (oldstate == state)
2563177848Sdavidxu				goto sleep;
2564177848Sdavidxu			state = oldstate;
2565177848Sdavidxu		}
2566177848Sdavidxu
2567177848Sdavidxu		/* state is changed while setting flags, restart */
2568177848Sdavidxu		if (!(state & wrflags)) {
2569177848Sdavidxu			umtxq_lock(&uq->uq_key);
2570177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2571177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2572177848Sdavidxu			continue;
2573177848Sdavidxu		}
2574177848Sdavidxu
2575177848Sdavidxusleep:
2576177848Sdavidxu		/* contention bit is set, before sleeping, increase read waiter count */
2577177848Sdavidxu		blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2578177848Sdavidxu		suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
2579177848Sdavidxu
2580177848Sdavidxu		while (state & wrflags) {
2581177848Sdavidxu			umtxq_lock(&uq->uq_key);
2582177848Sdavidxu			umtxq_insert(uq);
2583177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2584177848Sdavidxu
2585177848Sdavidxu			error = umtxq_sleep(uq, "urdlck", timo);
2586177848Sdavidxu
2587177848Sdavidxu			umtxq_busy(&uq->uq_key);
2588177848Sdavidxu			umtxq_remove(uq);
2589177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2590177848Sdavidxu			if (error)
2591177848Sdavidxu				break;
2592177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2593177848Sdavidxu		}
2594177848Sdavidxu
2595177848Sdavidxu		/* decrease read waiter count, and may clear read contention bit */
2596177848Sdavidxu		blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2597177848Sdavidxu		suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
2598177848Sdavidxu		if (blocked_readers == 1) {
2599177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2600177848Sdavidxu			for (;;) {
2601177848Sdavidxu				oldstate = casuword32(&rwlock->rw_state, state,
2602177848Sdavidxu					 state & ~URWLOCK_READ_WAITERS);
2603177848Sdavidxu				if (oldstate == state)
2604177848Sdavidxu					break;
2605177848Sdavidxu				state = oldstate;
2606177848Sdavidxu			}
2607177848Sdavidxu		}
2608177848Sdavidxu
2609177848Sdavidxu		umtxq_lock(&uq->uq_key);
2610177848Sdavidxu		umtxq_unbusy(&uq->uq_key);
2611177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2612177848Sdavidxu	}
2613177848Sdavidxu	umtx_key_release(&uq->uq_key);
2614177848Sdavidxu	return (error);
2615177848Sdavidxu}
2616177848Sdavidxu
2617177848Sdavidxustatic int
2618177848Sdavidxudo_rw_rdlock2(struct thread *td, void *obj, long val, struct timespec *timeout)
2619177848Sdavidxu{
2620177848Sdavidxu	struct timespec ts, ts2, ts3;
2621177848Sdavidxu	struct timeval tv;
2622177848Sdavidxu	int error;
2623177848Sdavidxu
2624177848Sdavidxu	getnanouptime(&ts);
2625177848Sdavidxu	timespecadd(&ts, timeout);
2626177848Sdavidxu	TIMESPEC_TO_TIMEVAL(&tv, timeout);
2627177848Sdavidxu	for (;;) {
2628177848Sdavidxu		error = do_rw_rdlock(td, obj, val, tvtohz(&tv));
2629177848Sdavidxu		if (error != ETIMEDOUT)
2630177848Sdavidxu			break;
2631177848Sdavidxu		getnanouptime(&ts2);
2632177848Sdavidxu		if (timespeccmp(&ts2, &ts, >=)) {
2633177848Sdavidxu			error = ETIMEDOUT;
2634177848Sdavidxu			break;
2635177848Sdavidxu		}
2636177848Sdavidxu		ts3 = ts;
2637177848Sdavidxu		timespecsub(&ts3, &ts2);
2638177848Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2639177848Sdavidxu	}
2640177849Sdavidxu	if (error == ERESTART)
2641177849Sdavidxu		error = EINTR;
2642177848Sdavidxu	return (error);
2643177848Sdavidxu}
2644177848Sdavidxu
2645177848Sdavidxustatic int
2646177848Sdavidxudo_rw_wrlock(struct thread *td, struct urwlock *rwlock, int timo)
2647177848Sdavidxu{
2648177848Sdavidxu	struct umtx_q *uq;
2649177848Sdavidxu	uint32_t flags;
2650177848Sdavidxu	int32_t state, oldstate;
2651177848Sdavidxu	int32_t blocked_writers;
2652197476Sdavidxu	int32_t blocked_readers;
2653177848Sdavidxu	int error;
2654177848Sdavidxu
2655177848Sdavidxu	uq = td->td_umtxq;
2656177848Sdavidxu	flags = fuword32(&rwlock->rw_flags);
2657177848Sdavidxu	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2658177848Sdavidxu	if (error != 0)
2659177848Sdavidxu		return (error);
2660177848Sdavidxu
2661197476Sdavidxu	blocked_readers = 0;
2662177848Sdavidxu	for (;;) {
2663177848Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2664177848Sdavidxu		while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2665177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER);
2666177848Sdavidxu			if (oldstate == state) {
2667177848Sdavidxu				umtx_key_release(&uq->uq_key);
2668177848Sdavidxu				return (0);
2669177848Sdavidxu			}
2670177848Sdavidxu			state = oldstate;
2671177848Sdavidxu		}
2672177848Sdavidxu
2673197476Sdavidxu		if (error) {
2674197476Sdavidxu			if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) &&
2675197476Sdavidxu			    blocked_readers != 0) {
2676197476Sdavidxu				umtxq_lock(&uq->uq_key);
2677197476Sdavidxu				umtxq_busy(&uq->uq_key);
2678197476Sdavidxu				umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE);
2679197476Sdavidxu				umtxq_unbusy(&uq->uq_key);
2680197476Sdavidxu				umtxq_unlock(&uq->uq_key);
2681197476Sdavidxu			}
2682197476Sdavidxu
2683177848Sdavidxu			break;
2684197476Sdavidxu		}
2685177848Sdavidxu
2686177848Sdavidxu		/* grab monitor lock */
2687177848Sdavidxu		umtxq_lock(&uq->uq_key);
2688177848Sdavidxu		umtxq_busy(&uq->uq_key);
2689177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2690177848Sdavidxu
2691203414Sdavidxu		/*
2692203414Sdavidxu		 * re-read the state, in case it changed between the try-lock above
2693203414Sdavidxu		 * and the check below
2694203414Sdavidxu		 */
2695203414Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2696203414Sdavidxu
2697177848Sdavidxu		while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) &&
2698177848Sdavidxu		       (state & URWLOCK_WRITE_WAITERS) == 0) {
2699177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS);
2700177848Sdavidxu			if (oldstate == state)
2701177848Sdavidxu				goto sleep;
2702177848Sdavidxu			state = oldstate;
2703177848Sdavidxu		}
2704177848Sdavidxu
2705177848Sdavidxu		if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2706177848Sdavidxu			umtxq_lock(&uq->uq_key);
2707177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2708177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2709177848Sdavidxu			continue;
2710177848Sdavidxu		}
2711177848Sdavidxusleep:
2712177848Sdavidxu		blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2713177848Sdavidxu		suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
2714177848Sdavidxu
2715177848Sdavidxu		while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
2716177848Sdavidxu			umtxq_lock(&uq->uq_key);
2717177848Sdavidxu			umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2718177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2719177848Sdavidxu
2720177848Sdavidxu			error = umtxq_sleep(uq, "uwrlck", timo);
2721177848Sdavidxu
2722177848Sdavidxu			umtxq_busy(&uq->uq_key);
2723177848Sdavidxu			umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2724177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2725177848Sdavidxu			if (error)
2726177848Sdavidxu				break;
2727177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2728177848Sdavidxu		}
2729177848Sdavidxu
2730177848Sdavidxu		blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2731177848Sdavidxu		suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
2732177848Sdavidxu		if (blocked_writers == 1) {
2733177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2734177848Sdavidxu			for (;;) {
2735177848Sdavidxu				oldstate = casuword32(&rwlock->rw_state, state,
2736177848Sdavidxu					 state & ~URWLOCK_WRITE_WAITERS);
2737177848Sdavidxu				if (oldstate == state)
2738177848Sdavidxu					break;
2739177848Sdavidxu				state = oldstate;
2740177848Sdavidxu			}
2741197476Sdavidxu			blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2742197476Sdavidxu		} else
2743197476Sdavidxu			blocked_readers = 0;
2744177848Sdavidxu
2745177848Sdavidxu		umtxq_lock(&uq->uq_key);
2746177848Sdavidxu		umtxq_unbusy(&uq->uq_key);
2747177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2748177848Sdavidxu	}
2749177848Sdavidxu
2750177848Sdavidxu	umtx_key_release(&uq->uq_key);
2751177848Sdavidxu	return (error);
2752177848Sdavidxu}
2753177848Sdavidxu
2754177848Sdavidxustatic int
2755177848Sdavidxudo_rw_wrlock2(struct thread *td, void *obj, struct timespec *timeout)
2756177848Sdavidxu{
2757177848Sdavidxu	struct timespec ts, ts2, ts3;
2758177848Sdavidxu	struct timeval tv;
2759177848Sdavidxu	int error;
2760177848Sdavidxu
2761177848Sdavidxu	getnanouptime(&ts);
2762177848Sdavidxu	timespecadd(&ts, timeout);
2763177848Sdavidxu	TIMESPEC_TO_TIMEVAL(&tv, timeout);
2764177848Sdavidxu	for (;;) {
2765177848Sdavidxu		error = do_rw_wrlock(td, obj, tvtohz(&tv));
2766177848Sdavidxu		if (error != ETIMEDOUT)
2767177848Sdavidxu			break;
2768177848Sdavidxu		getnanouptime(&ts2);
2769177848Sdavidxu		if (timespeccmp(&ts2, &ts, >=)) {
2770177848Sdavidxu			error = ETIMEDOUT;
2771177848Sdavidxu			break;
2772177848Sdavidxu		}
2773177848Sdavidxu		ts3 = ts;
2774177848Sdavidxu		timespecsub(&ts3, &ts2);
2775177848Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2776177848Sdavidxu	}
2777177849Sdavidxu	if (error == ERESTART)
2778177849Sdavidxu		error = EINTR;
2779177848Sdavidxu	return (error);
2780177848Sdavidxu}
2781177848Sdavidxu
2782177848Sdavidxustatic int
2783177880Sdavidxudo_rw_unlock(struct thread *td, struct urwlock *rwlock)
2784177848Sdavidxu{
2785177848Sdavidxu	struct umtx_q *uq;
2786177848Sdavidxu	uint32_t flags;
2787177848Sdavidxu	int32_t state, oldstate;
2788177848Sdavidxu	int error, q, count;
2789177848Sdavidxu
2790177848Sdavidxu	uq = td->td_umtxq;
2791177848Sdavidxu	flags = fuword32(&rwlock->rw_flags);
2792177848Sdavidxu	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2793177848Sdavidxu	if (error != 0)
2794177848Sdavidxu		return (error);
2795177848Sdavidxu
2796177848Sdavidxu	state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2797177848Sdavidxu	if (state & URWLOCK_WRITE_OWNER) {
2798177848Sdavidxu		for (;;) {
2799177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state,
2800177848Sdavidxu				state & ~URWLOCK_WRITE_OWNER);
2801177848Sdavidxu			if (oldstate != state) {
2802177848Sdavidxu				state = oldstate;
2803177848Sdavidxu				if (!(oldstate & URWLOCK_WRITE_OWNER)) {
2804177848Sdavidxu					error = EPERM;
2805177848Sdavidxu					goto out;
2806177848Sdavidxu				}
2807177848Sdavidxu			} else
2808177848Sdavidxu				break;
2809177848Sdavidxu		}
2810177848Sdavidxu	} else if (URWLOCK_READER_COUNT(state) != 0) {
2811177848Sdavidxu		for (;;) {
2812177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state,
2813177848Sdavidxu				state - 1);
2814177848Sdavidxu			if (oldstate != state) {
2815177848Sdavidxu				state = oldstate;
2816177848Sdavidxu				if (URWLOCK_READER_COUNT(oldstate) == 0) {
2817177848Sdavidxu					error = EPERM;
2818177848Sdavidxu					goto out;
2819177848Sdavidxu				}
2820177848Sdavidxu			}
2821177848Sdavidxu			else
2822177848Sdavidxu				break;
2823177848Sdavidxu		}
2824177848Sdavidxu	} else {
2825177848Sdavidxu		error = EPERM;
2826177848Sdavidxu		goto out;
2827177848Sdavidxu	}
2828177848Sdavidxu
2829177848Sdavidxu	count = 0;
2830177848Sdavidxu
2831177848Sdavidxu	if (!(flags & URWLOCK_PREFER_READER)) {
2832177848Sdavidxu		if (state & URWLOCK_WRITE_WAITERS) {
2833177848Sdavidxu			count = 1;
2834177848Sdavidxu			q = UMTX_EXCLUSIVE_QUEUE;
2835177848Sdavidxu		} else if (state & URWLOCK_READ_WAITERS) {
2836177848Sdavidxu			count = INT_MAX;
2837177848Sdavidxu			q = UMTX_SHARED_QUEUE;
2838177848Sdavidxu		}
2839177848Sdavidxu	} else {
2840177848Sdavidxu		if (state & URWLOCK_READ_WAITERS) {
2841177848Sdavidxu			count = INT_MAX;
2842177848Sdavidxu			q = UMTX_SHARED_QUEUE;
2843177848Sdavidxu		} else if (state & URWLOCK_WRITE_WAITERS) {
2844177848Sdavidxu			count = 1;
2845177848Sdavidxu			q = UMTX_EXCLUSIVE_QUEUE;
2846177848Sdavidxu		}
2847177848Sdavidxu	}
2848177848Sdavidxu
2849177848Sdavidxu	if (count) {
2850177848Sdavidxu		umtxq_lock(&uq->uq_key);
2851177848Sdavidxu		umtxq_busy(&uq->uq_key);
2852177848Sdavidxu		umtxq_signal_queue(&uq->uq_key, count, q);
2853177848Sdavidxu		umtxq_unbusy(&uq->uq_key);
2854177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2855177848Sdavidxu	}
2856177848Sdavidxuout:
2857177848Sdavidxu	umtx_key_release(&uq->uq_key);
2858177848Sdavidxu	return (error);
2859177848Sdavidxu}
2860177848Sdavidxu
2861201472Sdavidxustatic int
2862201472Sdavidxudo_sem_wait(struct thread *td, struct _usem *sem, struct timespec *timeout)
2863201472Sdavidxu{
2864201472Sdavidxu	struct umtx_q *uq;
2865201472Sdavidxu	struct timeval tv;
2866201472Sdavidxu	struct timespec cts, ets, tts;
2867201472Sdavidxu	uint32_t flags, count;
2868201472Sdavidxu	int error;
2869201472Sdavidxu
2870201472Sdavidxu	uq = td->td_umtxq;
2871201472Sdavidxu	flags = fuword32(&sem->_flags);
2872201885Sdavidxu	error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
2873201472Sdavidxu	if (error != 0)
2874201472Sdavidxu		return (error);
2875201472Sdavidxu	umtxq_lock(&uq->uq_key);
2876201472Sdavidxu	umtxq_busy(&uq->uq_key);
2877201472Sdavidxu	umtxq_insert(uq);
2878201472Sdavidxu	umtxq_unlock(&uq->uq_key);
2879201472Sdavidxu
2880215652Sdavidxu	if (fuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters)) == 0)
2881215652Sdavidxu		casuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters), 0, 1);
2882203657Sdavidxu
2883201472Sdavidxu	count = fuword32(__DEVOLATILE(uint32_t *, &sem->_count));
2884201472Sdavidxu	if (count != 0) {
2885201472Sdavidxu		umtxq_lock(&uq->uq_key);
2886201472Sdavidxu		umtxq_unbusy(&uq->uq_key);
2887201472Sdavidxu		umtxq_remove(uq);
2888201472Sdavidxu		umtxq_unlock(&uq->uq_key);
2889201472Sdavidxu		umtx_key_release(&uq->uq_key);
2890201472Sdavidxu		return (0);
2891201472Sdavidxu	}
2892201472Sdavidxu
2893201472Sdavidxu	umtxq_lock(&uq->uq_key);
2894201472Sdavidxu	umtxq_unbusy(&uq->uq_key);
2895201472Sdavidxu	umtxq_unlock(&uq->uq_key);
2896201472Sdavidxu
2897201472Sdavidxu	umtxq_lock(&uq->uq_key);
2898201472Sdavidxu	if (timeout == NULL) {
2899201472Sdavidxu		error = umtxq_sleep(uq, "usem", 0);
2900201472Sdavidxu	} else {
2901201472Sdavidxu		getnanouptime(&ets);
2902201472Sdavidxu		timespecadd(&ets, timeout);
2903201472Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
2904201472Sdavidxu		for (;;) {
2905201472Sdavidxu			error = umtxq_sleep(uq, "usem", tvtohz(&tv));
2906201472Sdavidxu			if (error != ETIMEDOUT)
2907201472Sdavidxu				break;
2908201472Sdavidxu			getnanouptime(&cts);
2909201472Sdavidxu			if (timespeccmp(&cts, &ets, >=)) {
2910201472Sdavidxu				error = ETIMEDOUT;
2911201472Sdavidxu				break;
2912201472Sdavidxu			}
2913201472Sdavidxu			tts = ets;
2914201472Sdavidxu			timespecsub(&tts, &cts);
2915201472Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &tts);
2916201472Sdavidxu		}
2917201472Sdavidxu	}
2918201472Sdavidxu
2919211794Sdavidxu	if ((uq->uq_flags & UQF_UMTXQ) == 0)
2920211794Sdavidxu		error = 0;
2921211794Sdavidxu	else {
2922211794Sdavidxu		umtxq_remove(uq);
2923201472Sdavidxu		if (error == ERESTART)
2924201472Sdavidxu			error = EINTR;
2925201472Sdavidxu	}
2926201472Sdavidxu	umtxq_unlock(&uq->uq_key);
2927201472Sdavidxu	umtx_key_release(&uq->uq_key);
2928201472Sdavidxu	return (error);
2929201472Sdavidxu}
2930201472Sdavidxu
2931201472Sdavidxu/*
2932201472Sdavidxu * Signal a userland condition variable.
2933201472Sdavidxu */
2934201472Sdavidxustatic int
2935201472Sdavidxudo_sem_wake(struct thread *td, struct _usem *sem)
2936201472Sdavidxu{
2937201472Sdavidxu	struct umtx_key key;
2938201472Sdavidxu	int error, cnt, nwake;
2939201472Sdavidxu	uint32_t flags;
2940201472Sdavidxu
2941201472Sdavidxu	flags = fuword32(&sem->_flags);
2942201885Sdavidxu	if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
2943201472Sdavidxu		return (error);
2944201472Sdavidxu	umtxq_lock(&key);
2945201472Sdavidxu	umtxq_busy(&key);
2946201472Sdavidxu	cnt = umtxq_count(&key);
2947201472Sdavidxu	nwake = umtxq_signal(&key, 1);
2948201472Sdavidxu	if (cnt <= nwake) {
2949201472Sdavidxu		umtxq_unlock(&key);
2950201472Sdavidxu		error = suword32(
2951201472Sdavidxu		    __DEVOLATILE(uint32_t *, &sem->_has_waiters), 0);
2952201472Sdavidxu		umtxq_lock(&key);
2953201472Sdavidxu	}
2954201472Sdavidxu	umtxq_unbusy(&key);
2955201472Sdavidxu	umtxq_unlock(&key);
2956201472Sdavidxu	umtx_key_release(&key);
2957201472Sdavidxu	return (error);
2958201472Sdavidxu}
2959201472Sdavidxu
2960139013Sdavidxuint
2961139013Sdavidxu_umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
2962139013Sdavidxu    /* struct umtx *umtx */
2963139013Sdavidxu{
2964162536Sdavidxu	return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
2965139013Sdavidxu}
2966139013Sdavidxu
2967139013Sdavidxuint
2968139013Sdavidxu_umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
2969139013Sdavidxu    /* struct umtx *umtx */
2970139013Sdavidxu{
2971162536Sdavidxu	return do_unlock_umtx(td, uap->umtx, td->td_tid);
2972139013Sdavidxu}
2973139013Sdavidxu
2974162536Sdavidxustatic int
2975162536Sdavidxu__umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
2976139013Sdavidxu{
2977162536Sdavidxu	struct timespec *ts, timeout;
2978139013Sdavidxu	int error;
2979139013Sdavidxu
2980162536Sdavidxu	/* Allow a null timespec (wait forever). */
2981162536Sdavidxu	if (uap->uaddr2 == NULL)
2982162536Sdavidxu		ts = NULL;
2983162536Sdavidxu	else {
2984162536Sdavidxu		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2985162536Sdavidxu		if (error != 0)
2986162536Sdavidxu			return (error);
2987162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2988162536Sdavidxu		    timeout.tv_nsec < 0) {
2989162536Sdavidxu			return (EINVAL);
2990161678Sdavidxu		}
2991162536Sdavidxu		ts = &timeout;
2992162536Sdavidxu	}
2993162536Sdavidxu	return (do_lock_umtx(td, uap->obj, uap->val, ts));
2994162536Sdavidxu}
2995162536Sdavidxu
2996162536Sdavidxustatic int
2997162536Sdavidxu__umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
2998162536Sdavidxu{
2999162536Sdavidxu	return (do_unlock_umtx(td, uap->obj, uap->val));
3000162536Sdavidxu}
3001162536Sdavidxu
3002162536Sdavidxustatic int
3003162536Sdavidxu__umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
3004162536Sdavidxu{
3005162536Sdavidxu	struct timespec *ts, timeout;
3006162536Sdavidxu	int error;
3007162536Sdavidxu
3008162536Sdavidxu	if (uap->uaddr2 == NULL)
3009162536Sdavidxu		ts = NULL;
3010162536Sdavidxu	else {
3011162536Sdavidxu		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
3012162536Sdavidxu		if (error != 0)
3013162536Sdavidxu			return (error);
3014162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3015162536Sdavidxu		    timeout.tv_nsec < 0)
3016162536Sdavidxu			return (EINVAL);
3017162536Sdavidxu		ts = &timeout;
3018162536Sdavidxu	}
3019178646Sdavidxu	return do_wait(td, uap->obj, uap->val, ts, 0, 0);
3020162536Sdavidxu}
3021162536Sdavidxu
3022162536Sdavidxustatic int
3023173800Sdavidxu__umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
3024173800Sdavidxu{
3025173800Sdavidxu	struct timespec *ts, timeout;
3026173800Sdavidxu	int error;
3027173800Sdavidxu
3028173800Sdavidxu	if (uap->uaddr2 == NULL)
3029173800Sdavidxu		ts = NULL;
3030173800Sdavidxu	else {
3031173800Sdavidxu		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
3032173800Sdavidxu		if (error != 0)
3033173800Sdavidxu			return (error);
3034173800Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3035173800Sdavidxu		    timeout.tv_nsec < 0)
3036173800Sdavidxu			return (EINVAL);
3037173800Sdavidxu		ts = &timeout;
3038173800Sdavidxu	}
3039178646Sdavidxu	return do_wait(td, uap->obj, uap->val, ts, 1, 0);
3040173800Sdavidxu}
3041173800Sdavidxu
3042173800Sdavidxustatic int
3043178646Sdavidxu__umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap)
3044178646Sdavidxu{
3045178646Sdavidxu	struct timespec *ts, timeout;
3046178646Sdavidxu	int error;
3047178646Sdavidxu
3048178646Sdavidxu	if (uap->uaddr2 == NULL)
3049178646Sdavidxu		ts = NULL;
3050178646Sdavidxu	else {
3051178646Sdavidxu		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
3052178646Sdavidxu		if (error != 0)
3053178646Sdavidxu			return (error);
3054178646Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3055178646Sdavidxu		    timeout.tv_nsec < 0)
3056178646Sdavidxu			return (EINVAL);
3057178646Sdavidxu		ts = &timeout;
3058178646Sdavidxu	}
3059178646Sdavidxu	return do_wait(td, uap->obj, uap->val, ts, 1, 1);
3060178646Sdavidxu}
3061178646Sdavidxu
3062178646Sdavidxustatic int
3063162536Sdavidxu__umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
3064162536Sdavidxu{
3065178646Sdavidxu	return (kern_umtx_wake(td, uap->obj, uap->val, 0));
3066162536Sdavidxu}
3067162536Sdavidxu
3068216641Sdavidxu#define BATCH_SIZE	128
3069162536Sdavidxustatic int
3070216641Sdavidxu__umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap)
3071216641Sdavidxu{
3072216641Sdavidxu	int count = uap->val;
3073216641Sdavidxu	void *uaddrs[BATCH_SIZE];
3074216641Sdavidxu	char **upp = (char **)uap->obj;
3075216641Sdavidxu	int tocopy;
3076216641Sdavidxu	int error = 0;
3077216641Sdavidxu	int i, pos = 0;
3078216641Sdavidxu
3079216641Sdavidxu	while (count > 0) {
3080216641Sdavidxu		tocopy = count;
3081216641Sdavidxu		if (tocopy > BATCH_SIZE)
3082216641Sdavidxu			tocopy = BATCH_SIZE;
3083216641Sdavidxu		error = copyin(upp+pos, uaddrs, tocopy * sizeof(char *));
3084216641Sdavidxu		if (error != 0)
3085216641Sdavidxu			break;
3086216641Sdavidxu		for (i = 0; i < tocopy; ++i)
3087216641Sdavidxu			kern_umtx_wake(td, uaddrs[i], INT_MAX, 1);
3088216641Sdavidxu		count -= tocopy;
3089216641Sdavidxu		pos += tocopy;
3090216641Sdavidxu	}
3091216641Sdavidxu	return (error);
3092216641Sdavidxu}
3093216641Sdavidxu
3094216641Sdavidxustatic int
3095178646Sdavidxu__umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap)
3096178646Sdavidxu{
3097178646Sdavidxu	return (kern_umtx_wake(td, uap->obj, uap->val, 1));
3098178646Sdavidxu}
3099178646Sdavidxu
3100178646Sdavidxustatic int
3101162536Sdavidxu__umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
3102162536Sdavidxu{
3103162536Sdavidxu	struct timespec *ts, timeout;
3104162536Sdavidxu	int error;
3105162536Sdavidxu
3106162536Sdavidxu	/* Allow a null timespec (wait forever). */
3107162536Sdavidxu	if (uap->uaddr2 == NULL)
3108162536Sdavidxu		ts = NULL;
3109162536Sdavidxu	else {
3110162536Sdavidxu		error = copyin(uap->uaddr2, &timeout,
3111162536Sdavidxu		    sizeof(timeout));
3112162536Sdavidxu		if (error != 0)
3113162536Sdavidxu			return (error);
3114162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3115162536Sdavidxu		    timeout.tv_nsec < 0) {
3116162536Sdavidxu			return (EINVAL);
3117139013Sdavidxu		}
3118162536Sdavidxu		ts = &timeout;
3119139013Sdavidxu	}
3120162536Sdavidxu	return do_lock_umutex(td, uap->obj, ts, 0);
3121162536Sdavidxu}
3122162536Sdavidxu
3123162536Sdavidxustatic int
3124162536Sdavidxu__umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
3125162536Sdavidxu{
3126179970Sdavidxu	return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY);
3127162536Sdavidxu}
3128162536Sdavidxu
3129162536Sdavidxustatic int
3130179970Sdavidxu__umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
3131179970Sdavidxu{
3132179970Sdavidxu	struct timespec *ts, timeout;
3133179970Sdavidxu	int error;
3134179970Sdavidxu
3135179970Sdavidxu	/* Allow a null timespec (wait forever). */
3136179970Sdavidxu	if (uap->uaddr2 == NULL)
3137179970Sdavidxu		ts = NULL;
3138179970Sdavidxu	else {
3139179970Sdavidxu		error = copyin(uap->uaddr2, &timeout,
3140179970Sdavidxu		    sizeof(timeout));
3141179970Sdavidxu		if (error != 0)
3142179970Sdavidxu			return (error);
3143179970Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3144179970Sdavidxu		    timeout.tv_nsec < 0) {
3145179970Sdavidxu			return (EINVAL);
3146179970Sdavidxu		}
3147179970Sdavidxu		ts = &timeout;
3148179970Sdavidxu	}
3149179970Sdavidxu	return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
3150179970Sdavidxu}
3151179970Sdavidxu
3152179970Sdavidxustatic int
3153179970Sdavidxu__umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap)
3154179970Sdavidxu{
3155179970Sdavidxu	return do_wake_umutex(td, uap->obj);
3156179970Sdavidxu}
3157179970Sdavidxu
3158179970Sdavidxustatic int
3159162536Sdavidxu__umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
3160162536Sdavidxu{
3161162536Sdavidxu	return do_unlock_umutex(td, uap->obj);
3162162536Sdavidxu}
3163162536Sdavidxu
3164162536Sdavidxustatic int
3165162536Sdavidxu__umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
3166162536Sdavidxu{
3167162536Sdavidxu	return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
3168162536Sdavidxu}
3169162536Sdavidxu
3170164839Sdavidxustatic int
3171164839Sdavidxu__umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
3172164839Sdavidxu{
3173164839Sdavidxu	struct timespec *ts, timeout;
3174164839Sdavidxu	int error;
3175164839Sdavidxu
3176164839Sdavidxu	/* Allow a null timespec (wait forever). */
3177164839Sdavidxu	if (uap->uaddr2 == NULL)
3178164839Sdavidxu		ts = NULL;
3179164839Sdavidxu	else {
3180164839Sdavidxu		error = copyin(uap->uaddr2, &timeout,
3181164839Sdavidxu		    sizeof(timeout));
3182164839Sdavidxu		if (error != 0)
3183164839Sdavidxu			return (error);
3184164839Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3185164839Sdavidxu		    timeout.tv_nsec < 0) {
3186164839Sdavidxu			return (EINVAL);
3187164839Sdavidxu		}
3188164839Sdavidxu		ts = &timeout;
3189164839Sdavidxu	}
3190164876Sdavidxu	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3191164839Sdavidxu}
3192164839Sdavidxu
3193164839Sdavidxustatic int
3194164839Sdavidxu__umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
3195164839Sdavidxu{
3196164839Sdavidxu	return do_cv_signal(td, uap->obj);
3197164839Sdavidxu}
3198164839Sdavidxu
3199164839Sdavidxustatic int
3200164839Sdavidxu__umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
3201164839Sdavidxu{
3202164839Sdavidxu	return do_cv_broadcast(td, uap->obj);
3203164839Sdavidxu}
3204164839Sdavidxu
3205177848Sdavidxustatic int
3206177848Sdavidxu__umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
3207177848Sdavidxu{
3208177848Sdavidxu	struct timespec timeout;
3209177848Sdavidxu	int error;
3210177848Sdavidxu
3211177848Sdavidxu	/* Allow a null timespec (wait forever). */
3212177848Sdavidxu	if (uap->uaddr2 == NULL) {
3213177848Sdavidxu		error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3214177848Sdavidxu	} else {
3215177848Sdavidxu		error = copyin(uap->uaddr2, &timeout,
3216177848Sdavidxu		    sizeof(timeout));
3217177848Sdavidxu		if (error != 0)
3218177848Sdavidxu			return (error);
3219177848Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3220177848Sdavidxu		    timeout.tv_nsec < 0) {
3221177848Sdavidxu			return (EINVAL);
3222177848Sdavidxu		}
3223177848Sdavidxu		error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3224177848Sdavidxu	}
3225177848Sdavidxu	return (error);
3226177848Sdavidxu}
3227177848Sdavidxu
3228177848Sdavidxustatic int
3229177848Sdavidxu__umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
3230177848Sdavidxu{
3231177848Sdavidxu	struct timespec timeout;
3232177848Sdavidxu	int error;
3233177848Sdavidxu
3234177848Sdavidxu	/* Allow a null timespec (wait forever). */
3235177848Sdavidxu	if (uap->uaddr2 == NULL) {
3236177848Sdavidxu		error = do_rw_wrlock(td, uap->obj, 0);
3237177848Sdavidxu	} else {
3238177848Sdavidxu		error = copyin(uap->uaddr2, &timeout,
3239177848Sdavidxu		    sizeof(timeout));
3240177848Sdavidxu		if (error != 0)
3241177848Sdavidxu			return (error);
3242177848Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3243177848Sdavidxu		    timeout.tv_nsec < 0) {
3244177848Sdavidxu			return (EINVAL);
3245177848Sdavidxu		}
3246177848Sdavidxu
3247177848Sdavidxu		error = do_rw_wrlock2(td, uap->obj, &timeout);
3248177848Sdavidxu	}
3249177848Sdavidxu	return (error);
3250177848Sdavidxu}
3251177848Sdavidxu
3252177848Sdavidxustatic int
3253177848Sdavidxu__umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
3254177848Sdavidxu{
3255177880Sdavidxu	return do_rw_unlock(td, uap->obj);
3256177848Sdavidxu}
3257177848Sdavidxu
3258201472Sdavidxustatic int
3259201472Sdavidxu__umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap)
3260201472Sdavidxu{
3261201472Sdavidxu	struct timespec *ts, timeout;
3262201472Sdavidxu	int error;
3263201472Sdavidxu
3264201472Sdavidxu	/* Allow a null timespec (wait forever). */
3265201472Sdavidxu	if (uap->uaddr2 == NULL)
3266201472Sdavidxu		ts = NULL;
3267201472Sdavidxu	else {
3268201472Sdavidxu		error = copyin(uap->uaddr2, &timeout,
3269201472Sdavidxu		    sizeof(timeout));
3270201472Sdavidxu		if (error != 0)
3271201472Sdavidxu			return (error);
3272201472Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3273201472Sdavidxu		    timeout.tv_nsec < 0) {
3274201472Sdavidxu			return (EINVAL);
3275201472Sdavidxu		}
3276201472Sdavidxu		ts = &timeout;
3277201472Sdavidxu	}
3278201472Sdavidxu	return (do_sem_wait(td, uap->obj, ts));
3279201472Sdavidxu}
3280201472Sdavidxu
3281201472Sdavidxustatic int
3282201472Sdavidxu__umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap)
3283201472Sdavidxu{
3284201472Sdavidxu	return do_sem_wake(td, uap->obj);
3285201472Sdavidxu}
3286201472Sdavidxu
3287162536Sdavidxutypedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
3288162536Sdavidxu
3289162536Sdavidxustatic _umtx_op_func op_table[] = {
3290162536Sdavidxu	__umtx_op_lock_umtx,		/* UMTX_OP_LOCK */
3291162536Sdavidxu	__umtx_op_unlock_umtx,		/* UMTX_OP_UNLOCK */
3292162536Sdavidxu	__umtx_op_wait,			/* UMTX_OP_WAIT */
3293162536Sdavidxu	__umtx_op_wake,			/* UMTX_OP_WAKE */
3294162536Sdavidxu	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_TRYLOCK */
3295162536Sdavidxu	__umtx_op_lock_umutex,		/* UMTX_OP_MUTEX_LOCK */
3296162536Sdavidxu	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK */
3297164839Sdavidxu	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
3298164839Sdavidxu	__umtx_op_cv_wait,		/* UMTX_OP_CV_WAIT*/
3299164839Sdavidxu	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
3300173800Sdavidxu	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
3301177848Sdavidxu	__umtx_op_wait_uint,		/* UMTX_OP_WAIT_UINT */
3302177848Sdavidxu	__umtx_op_rw_rdlock,		/* UMTX_OP_RW_RDLOCK */
3303177848Sdavidxu	__umtx_op_rw_wrlock,		/* UMTX_OP_RW_WRLOCK */
3304178646Sdavidxu	__umtx_op_rw_unlock,		/* UMTX_OP_RW_UNLOCK */
3305178646Sdavidxu	__umtx_op_wait_uint_private,	/* UMTX_OP_WAIT_UINT_PRIVATE */
3306179970Sdavidxu	__umtx_op_wake_private,		/* UMTX_OP_WAKE_PRIVATE */
3307179970Sdavidxu	__umtx_op_wait_umutex,		/* UMTX_OP_UMUTEX_WAIT */
3308201472Sdavidxu	__umtx_op_wake_umutex,		/* UMTX_OP_UMUTEX_WAKE */
3309201472Sdavidxu	__umtx_op_sem_wait,		/* UMTX_OP_SEM_WAIT */
3310216641Sdavidxu	__umtx_op_sem_wake,		/* UMTX_OP_SEM_WAKE */
3311216641Sdavidxu	__umtx_op_nwake_private		/* UMTX_OP_NWAKE_PRIVATE */
3312162536Sdavidxu};
3313162536Sdavidxu
3314162536Sdavidxuint
3315162536Sdavidxu_umtx_op(struct thread *td, struct _umtx_op_args *uap)
3316162536Sdavidxu{
3317163678Sdavidxu	if ((unsigned)uap->op < UMTX_OP_MAX)
3318162536Sdavidxu		return (*op_table[uap->op])(td, uap);
3319162536Sdavidxu	return (EINVAL);
3320162536Sdavidxu}
3321162536Sdavidxu
3322205014Snwhitehorn#ifdef COMPAT_FREEBSD32
3323163046Sdavidxuint
3324163046Sdavidxufreebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
3325163046Sdavidxu    /* struct umtx *umtx */
3326163046Sdavidxu{
3327163046Sdavidxu	return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
3328163046Sdavidxu}
3329163046Sdavidxu
3330163046Sdavidxuint
3331163046Sdavidxufreebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
3332163046Sdavidxu    /* struct umtx *umtx */
3333163046Sdavidxu{
3334163046Sdavidxu	return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
3335163046Sdavidxu}
3336163046Sdavidxu
3337162536Sdavidxustruct timespec32 {
3338209390Sed	uint32_t tv_sec;
3339209390Sed	uint32_t tv_nsec;
3340162536Sdavidxu};
3341162536Sdavidxu
3342162536Sdavidxustatic inline int
3343162536Sdavidxucopyin_timeout32(void *addr, struct timespec *tsp)
3344162536Sdavidxu{
3345162536Sdavidxu	struct timespec32 ts32;
3346162536Sdavidxu	int error;
3347162536Sdavidxu
3348162536Sdavidxu	error = copyin(addr, &ts32, sizeof(struct timespec32));
3349162536Sdavidxu	if (error == 0) {
3350162536Sdavidxu		tsp->tv_sec = ts32.tv_sec;
3351162536Sdavidxu		tsp->tv_nsec = ts32.tv_nsec;
3352162536Sdavidxu	}
3353140421Sdavidxu	return (error);
3354139013Sdavidxu}
3355161678Sdavidxu
3356162536Sdavidxustatic int
3357162536Sdavidxu__umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3358162536Sdavidxu{
3359162536Sdavidxu	struct timespec *ts, timeout;
3360162536Sdavidxu	int error;
3361162536Sdavidxu
3362162536Sdavidxu	/* Allow a null timespec (wait forever). */
3363162536Sdavidxu	if (uap->uaddr2 == NULL)
3364162536Sdavidxu		ts = NULL;
3365162536Sdavidxu	else {
3366162536Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3367162536Sdavidxu		if (error != 0)
3368162536Sdavidxu			return (error);
3369162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3370162536Sdavidxu		    timeout.tv_nsec < 0) {
3371162536Sdavidxu			return (EINVAL);
3372162536Sdavidxu		}
3373162536Sdavidxu		ts = &timeout;
3374162536Sdavidxu	}
3375162536Sdavidxu	return (do_lock_umtx32(td, uap->obj, uap->val, ts));
3376162536Sdavidxu}
3377162536Sdavidxu
3378162536Sdavidxustatic int
3379162536Sdavidxu__umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3380162536Sdavidxu{
3381162536Sdavidxu	return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
3382162536Sdavidxu}
3383162536Sdavidxu
3384162536Sdavidxustatic int
3385162536Sdavidxu__umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3386162536Sdavidxu{
3387162536Sdavidxu	struct timespec *ts, timeout;
3388162536Sdavidxu	int error;
3389162536Sdavidxu
3390162536Sdavidxu	if (uap->uaddr2 == NULL)
3391162536Sdavidxu		ts = NULL;
3392162536Sdavidxu	else {
3393162536Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3394162536Sdavidxu		if (error != 0)
3395162536Sdavidxu			return (error);
3396162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3397162536Sdavidxu		    timeout.tv_nsec < 0)
3398162536Sdavidxu			return (EINVAL);
3399162536Sdavidxu		ts = &timeout;
3400162536Sdavidxu	}
3401178646Sdavidxu	return do_wait(td, uap->obj, uap->val, ts, 1, 0);
3402162536Sdavidxu}
3403162536Sdavidxu
3404162536Sdavidxustatic int
3405162536Sdavidxu__umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3406162536Sdavidxu{
3407162536Sdavidxu	struct timespec *ts, timeout;
3408162536Sdavidxu	int error;
3409162536Sdavidxu
3410162536Sdavidxu	/* Allow a null timespec (wait forever). */
3411162536Sdavidxu	if (uap->uaddr2 == NULL)
3412162536Sdavidxu		ts = NULL;
3413162536Sdavidxu	else {
3414162536Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3415162536Sdavidxu		if (error != 0)
3416162536Sdavidxu			return (error);
3417162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3418162536Sdavidxu		    timeout.tv_nsec < 0)
3419162536Sdavidxu			return (EINVAL);
3420162536Sdavidxu		ts = &timeout;
3421162536Sdavidxu	}
3422162536Sdavidxu	return do_lock_umutex(td, uap->obj, ts, 0);
3423162536Sdavidxu}
3424162536Sdavidxu
3425164839Sdavidxustatic int
3426179970Sdavidxu__umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3427179970Sdavidxu{
3428179970Sdavidxu	struct timespec *ts, timeout;
3429179970Sdavidxu	int error;
3430179970Sdavidxu
3431179970Sdavidxu	/* Allow a null timespec (wait forever). */
3432179970Sdavidxu	if (uap->uaddr2 == NULL)
3433179970Sdavidxu		ts = NULL;
3434179970Sdavidxu	else {
3435179970Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3436179970Sdavidxu		if (error != 0)
3437179970Sdavidxu			return (error);
3438179970Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3439179970Sdavidxu		    timeout.tv_nsec < 0)
3440179970Sdavidxu			return (EINVAL);
3441179970Sdavidxu		ts = &timeout;
3442179970Sdavidxu	}
3443179970Sdavidxu	return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
3444179970Sdavidxu}
3445179970Sdavidxu
3446179970Sdavidxustatic int
3447164839Sdavidxu__umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3448164839Sdavidxu{
3449164839Sdavidxu	struct timespec *ts, timeout;
3450164839Sdavidxu	int error;
3451164839Sdavidxu
3452164839Sdavidxu	/* Allow a null timespec (wait forever). */
3453164839Sdavidxu	if (uap->uaddr2 == NULL)
3454164839Sdavidxu		ts = NULL;
3455164839Sdavidxu	else {
3456164839Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3457164839Sdavidxu		if (error != 0)
3458164839Sdavidxu			return (error);
3459164839Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3460164839Sdavidxu		    timeout.tv_nsec < 0)
3461164839Sdavidxu			return (EINVAL);
3462164839Sdavidxu		ts = &timeout;
3463164839Sdavidxu	}
3464164876Sdavidxu	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3465164839Sdavidxu}
3466164839Sdavidxu
3467177848Sdavidxustatic int
3468177848Sdavidxu__umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3469177848Sdavidxu{
3470177848Sdavidxu	struct timespec timeout;
3471177848Sdavidxu	int error;
3472177848Sdavidxu
3473177848Sdavidxu	/* Allow a null timespec (wait forever). */
3474177848Sdavidxu	if (uap->uaddr2 == NULL) {
3475177848Sdavidxu		error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3476177848Sdavidxu	} else {
3477216463Smdf		error = copyin_timeout32(uap->uaddr2, &timeout);
3478177848Sdavidxu		if (error != 0)
3479177848Sdavidxu			return (error);
3480177848Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3481177848Sdavidxu		    timeout.tv_nsec < 0) {
3482177848Sdavidxu			return (EINVAL);
3483177848Sdavidxu		}
3484177848Sdavidxu		error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3485177848Sdavidxu	}
3486177848Sdavidxu	return (error);
3487177848Sdavidxu}
3488177848Sdavidxu
3489177848Sdavidxustatic int
3490177848Sdavidxu__umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3491177848Sdavidxu{
3492177848Sdavidxu	struct timespec timeout;
3493177848Sdavidxu	int error;
3494177848Sdavidxu
3495177848Sdavidxu	/* Allow a null timespec (wait forever). */
3496177848Sdavidxu	if (uap->uaddr2 == NULL) {
3497177852Sdavidxu		error = do_rw_wrlock(td, uap->obj, 0);
3498177848Sdavidxu	} else {
3499177848Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3500177848Sdavidxu		if (error != 0)
3501177848Sdavidxu			return (error);
3502177848Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3503177848Sdavidxu		    timeout.tv_nsec < 0) {
3504177848Sdavidxu			return (EINVAL);
3505177848Sdavidxu		}
3506177848Sdavidxu
3507177852Sdavidxu		error = do_rw_wrlock2(td, uap->obj, &timeout);
3508177848Sdavidxu	}
3509177848Sdavidxu	return (error);
3510177848Sdavidxu}
3511177848Sdavidxu
3512178646Sdavidxustatic int
3513178646Sdavidxu__umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap)
3514178646Sdavidxu{
3515178646Sdavidxu	struct timespec *ts, timeout;
3516178646Sdavidxu	int error;
3517178646Sdavidxu
3518178646Sdavidxu	if (uap->uaddr2 == NULL)
3519178646Sdavidxu		ts = NULL;
3520178646Sdavidxu	else {
3521178646Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3522178646Sdavidxu		if (error != 0)
3523178646Sdavidxu			return (error);
3524178646Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3525178646Sdavidxu		    timeout.tv_nsec < 0)
3526178646Sdavidxu			return (EINVAL);
3527178646Sdavidxu		ts = &timeout;
3528178646Sdavidxu	}
3529178646Sdavidxu	return do_wait(td, uap->obj, uap->val, ts, 1, 1);
3530178646Sdavidxu}
3531178646Sdavidxu
3532201472Sdavidxustatic int
3533201472Sdavidxu__umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3534201472Sdavidxu{
3535201472Sdavidxu	struct timespec *ts, timeout;
3536201472Sdavidxu	int error;
3537201472Sdavidxu
3538201472Sdavidxu	/* Allow a null timespec (wait forever). */
3539201472Sdavidxu	if (uap->uaddr2 == NULL)
3540201472Sdavidxu		ts = NULL;
3541201472Sdavidxu	else {
3542201472Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3543201472Sdavidxu		if (error != 0)
3544201472Sdavidxu			return (error);
3545201472Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3546201472Sdavidxu		    timeout.tv_nsec < 0)
3547201472Sdavidxu			return (EINVAL);
3548201472Sdavidxu		ts = &timeout;
3549201472Sdavidxu	}
3550201472Sdavidxu	return (do_sem_wait(td, uap->obj, ts));
3551201472Sdavidxu}
3552201472Sdavidxu
3553216641Sdavidxustatic int
3554216641Sdavidxu__umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap)
3555216641Sdavidxu{
3556216641Sdavidxu	int count = uap->val;
3557216641Sdavidxu	uint32_t uaddrs[BATCH_SIZE];
3558216641Sdavidxu	uint32_t **upp = (uint32_t **)uap->obj;
3559216641Sdavidxu	int tocopy;
3560216641Sdavidxu	int error = 0;
3561216641Sdavidxu	int i, pos = 0;
3562216641Sdavidxu
3563216641Sdavidxu	while (count > 0) {
3564216641Sdavidxu		tocopy = count;
3565216641Sdavidxu		if (tocopy > BATCH_SIZE)
3566216641Sdavidxu			tocopy = BATCH_SIZE;
3567216641Sdavidxu		error = copyin(upp+pos, uaddrs, tocopy * sizeof(uint32_t));
3568216641Sdavidxu		if (error != 0)
3569216641Sdavidxu			break;
3570216641Sdavidxu		for (i = 0; i < tocopy; ++i)
3571216641Sdavidxu			kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i],
3572216641Sdavidxu				INT_MAX, 1);
3573216641Sdavidxu		count -= tocopy;
3574216641Sdavidxu		pos += tocopy;
3575216641Sdavidxu	}
3576216641Sdavidxu	return (error);
3577216641Sdavidxu}
3578216641Sdavidxu
3579162536Sdavidxustatic _umtx_op_func op_table_compat32[] = {
3580162536Sdavidxu	__umtx_op_lock_umtx_compat32,	/* UMTX_OP_LOCK */
3581162536Sdavidxu	__umtx_op_unlock_umtx_compat32,	/* UMTX_OP_UNLOCK */
3582162536Sdavidxu	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT */
3583162536Sdavidxu	__umtx_op_wake,			/* UMTX_OP_WAKE */
3584162550Sdavidxu	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_LOCK */
3585162536Sdavidxu	__umtx_op_lock_umutex_compat32,	/* UMTX_OP_MUTEX_TRYLOCK */
3586162536Sdavidxu	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK	*/
3587164839Sdavidxu	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
3588164839Sdavidxu	__umtx_op_cv_wait_compat32,	/* UMTX_OP_CV_WAIT*/
3589164839Sdavidxu	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
3590173800Sdavidxu	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
3591177848Sdavidxu	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT_UINT */
3592177848Sdavidxu	__umtx_op_rw_rdlock_compat32,	/* UMTX_OP_RW_RDLOCK */
3593177848Sdavidxu	__umtx_op_rw_wrlock_compat32,	/* UMTX_OP_RW_WRLOCK */
3594178646Sdavidxu	__umtx_op_rw_unlock,		/* UMTX_OP_RW_UNLOCK */
3595178646Sdavidxu	__umtx_op_wait_uint_private_compat32,	/* UMTX_OP_WAIT_UINT_PRIVATE */
3596179970Sdavidxu	__umtx_op_wake_private,		/* UMTX_OP_WAKE_PRIVATE */
3597179970Sdavidxu	__umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */
3598201472Sdavidxu	__umtx_op_wake_umutex,		/* UMTX_OP_UMUTEX_WAKE */
3599201472Sdavidxu	__umtx_op_sem_wait_compat32,	/* UMTX_OP_SEM_WAIT */
3600216641Sdavidxu	__umtx_op_sem_wake,		/* UMTX_OP_SEM_WAKE */
3601216641Sdavidxu	__umtx_op_nwake_private32	/* UMTX_OP_NWAKE_PRIVATE */
3602162536Sdavidxu};
3603162536Sdavidxu
3604162536Sdavidxuint
3605162536Sdavidxufreebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
3606162536Sdavidxu{
3607163678Sdavidxu	if ((unsigned)uap->op < UMTX_OP_MAX)
3608162536Sdavidxu		return (*op_table_compat32[uap->op])(td,
3609162536Sdavidxu			(struct _umtx_op_args *)uap);
3610162536Sdavidxu	return (EINVAL);
3611162536Sdavidxu}
3612162536Sdavidxu#endif
3613162536Sdavidxu
3614161678Sdavidxuvoid
3615161678Sdavidxuumtx_thread_init(struct thread *td)
3616161678Sdavidxu{
3617161678Sdavidxu	td->td_umtxq = umtxq_alloc();
3618161678Sdavidxu	td->td_umtxq->uq_thread = td;
3619161678Sdavidxu}
3620161678Sdavidxu
3621161678Sdavidxuvoid
3622161678Sdavidxuumtx_thread_fini(struct thread *td)
3623161678Sdavidxu{
3624161678Sdavidxu	umtxq_free(td->td_umtxq);
3625161678Sdavidxu}
3626161678Sdavidxu
3627161678Sdavidxu/*
3628161678Sdavidxu * It will be called when new thread is created, e.g fork().
3629161678Sdavidxu */
3630161678Sdavidxuvoid
3631161678Sdavidxuumtx_thread_alloc(struct thread *td)
3632161678Sdavidxu{
3633161678Sdavidxu	struct umtx_q *uq;
3634161678Sdavidxu
3635161678Sdavidxu	uq = td->td_umtxq;
3636161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
3637161678Sdavidxu
3638161678Sdavidxu	KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
3639161678Sdavidxu	KASSERT(uq->uq_thread == td, ("uq_thread != td"));
3640161678Sdavidxu	KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
3641161678Sdavidxu	KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
3642161678Sdavidxu}
3643161678Sdavidxu
3644161678Sdavidxu/*
3645161678Sdavidxu * exec() hook.
3646161678Sdavidxu */
3647161678Sdavidxustatic void
3648161678Sdavidxuumtx_exec_hook(void *arg __unused, struct proc *p __unused,
3649161678Sdavidxu	struct image_params *imgp __unused)
3650161678Sdavidxu{
3651161678Sdavidxu	umtx_thread_cleanup(curthread);
3652161678Sdavidxu}
3653161678Sdavidxu
3654161678Sdavidxu/*
3655161678Sdavidxu * thread_exit() hook.
3656161678Sdavidxu */
3657161678Sdavidxuvoid
3658161678Sdavidxuumtx_thread_exit(struct thread *td)
3659161678Sdavidxu{
3660161678Sdavidxu	umtx_thread_cleanup(td);
3661161678Sdavidxu}
3662161678Sdavidxu
3663161678Sdavidxu/*
3664161678Sdavidxu * clean up umtx data.
3665161678Sdavidxu */
3666161678Sdavidxustatic void
3667161678Sdavidxuumtx_thread_cleanup(struct thread *td)
3668161678Sdavidxu{
3669161678Sdavidxu	struct umtx_q *uq;
3670161678Sdavidxu	struct umtx_pi *pi;
3671161678Sdavidxu
3672161678Sdavidxu	if ((uq = td->td_umtxq) == NULL)
3673161678Sdavidxu		return;
3674161678Sdavidxu
3675170300Sjeff	mtx_lock_spin(&umtx_lock);
3676161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
3677161678Sdavidxu	while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
3678161678Sdavidxu		pi->pi_owner = NULL;
3679161678Sdavidxu		TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
3680161678Sdavidxu	}
3681216313Sdavidxu	mtx_unlock_spin(&umtx_lock);
3682174701Sdavidxu	thread_lock(td);
3683216313Sdavidxu	sched_unlend_user_prio(td, PRI_MAX);
3684174701Sdavidxu	thread_unlock(td);
3685161678Sdavidxu}
3686