kern_umtx.c revision 232144
1139804Simp/*-
2139013Sdavidxu * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3112904Sjeff * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4112904Sjeff * All rights reserved.
5112904Sjeff *
6112904Sjeff * Redistribution and use in source and binary forms, with or without
7112904Sjeff * modification, are permitted provided that the following conditions
8112904Sjeff * are met:
9112904Sjeff * 1. Redistributions of source code must retain the above copyright
10112904Sjeff *    notice unmodified, this list of conditions, and the following
11112904Sjeff *    disclaimer.
12112904Sjeff * 2. Redistributions in binary form must reproduce the above copyright
13112904Sjeff *    notice, this list of conditions and the following disclaimer in the
14112904Sjeff *    documentation and/or other materials provided with the distribution.
15112904Sjeff *
16112904Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17112904Sjeff * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18112904Sjeff * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19112904Sjeff * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20112904Sjeff * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21112904Sjeff * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22112904Sjeff * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23112904Sjeff * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24112904Sjeff * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25112904Sjeff * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26112904Sjeff */
27112904Sjeff
28116182Sobrien#include <sys/cdefs.h>
29116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_umtx.c 232144 2012-02-25 02:12:17Z davidxu $");
30116182Sobrien
31162536Sdavidxu#include "opt_compat.h"
32112904Sjeff#include <sys/param.h>
33112904Sjeff#include <sys/kernel.h>
34131431Smarcel#include <sys/limits.h>
35112904Sjeff#include <sys/lock.h>
36115765Sjeff#include <sys/malloc.h>
37112904Sjeff#include <sys/mutex.h>
38164033Srwatson#include <sys/priv.h>
39112904Sjeff#include <sys/proc.h>
40161678Sdavidxu#include <sys/sched.h>
41165369Sdavidxu#include <sys/smp.h>
42161678Sdavidxu#include <sys/sysctl.h>
43112904Sjeff#include <sys/sysent.h>
44112904Sjeff#include <sys/systm.h>
45112904Sjeff#include <sys/sysproto.h>
46216641Sdavidxu#include <sys/syscallsubr.h>
47139013Sdavidxu#include <sys/eventhandler.h>
48112904Sjeff#include <sys/umtx.h>
49112904Sjeff
50139013Sdavidxu#include <vm/vm.h>
51139013Sdavidxu#include <vm/vm_param.h>
52139013Sdavidxu#include <vm/pmap.h>
53139013Sdavidxu#include <vm/vm_map.h>
54139013Sdavidxu#include <vm/vm_object.h>
55139013Sdavidxu
56165369Sdavidxu#include <machine/cpu.h>
57165369Sdavidxu
58205014Snwhitehorn#ifdef COMPAT_FREEBSD32
59162536Sdavidxu#include <compat/freebsd32/freebsd32_proto.h>
60162536Sdavidxu#endif
61162536Sdavidxu
62179970Sdavidxu#define _UMUTEX_TRY		1
63179970Sdavidxu#define _UMUTEX_WAIT		2
64179970Sdavidxu
65161678Sdavidxu/* Priority inheritance mutex info. */
66161678Sdavidxustruct umtx_pi {
67161678Sdavidxu	/* Owner thread */
68161678Sdavidxu	struct thread		*pi_owner;
69161678Sdavidxu
70161678Sdavidxu	/* Reference count */
71161678Sdavidxu	int			pi_refcount;
72161678Sdavidxu
73161678Sdavidxu 	/* List entry to link umtx holding by thread */
74161678Sdavidxu	TAILQ_ENTRY(umtx_pi)	pi_link;
75161678Sdavidxu
76161678Sdavidxu	/* List entry in hash */
77161678Sdavidxu	TAILQ_ENTRY(umtx_pi)	pi_hashlink;
78161678Sdavidxu
79161678Sdavidxu	/* List for waiters */
80161678Sdavidxu	TAILQ_HEAD(,umtx_q)	pi_blocked;
81161678Sdavidxu
82161678Sdavidxu	/* Identify a userland lock object */
83161678Sdavidxu	struct umtx_key		pi_key;
84161678Sdavidxu};
85161678Sdavidxu
86161678Sdavidxu/* A userland synchronous object user. */
87115765Sjeffstruct umtx_q {
88161678Sdavidxu	/* Linked list for the hash. */
89161678Sdavidxu	TAILQ_ENTRY(umtx_q)	uq_link;
90161678Sdavidxu
91161678Sdavidxu	/* Umtx key. */
92161678Sdavidxu	struct umtx_key		uq_key;
93161678Sdavidxu
94161678Sdavidxu	/* Umtx flags. */
95161678Sdavidxu	int			uq_flags;
96161678Sdavidxu#define UQF_UMTXQ	0x0001
97161678Sdavidxu
98161678Sdavidxu	/* The thread waits on. */
99161678Sdavidxu	struct thread		*uq_thread;
100161678Sdavidxu
101161678Sdavidxu	/*
102161678Sdavidxu	 * Blocked on PI mutex. read can use chain lock
103170300Sjeff	 * or umtx_lock, write must have both chain lock and
104170300Sjeff	 * umtx_lock being hold.
105161678Sdavidxu	 */
106161678Sdavidxu	struct umtx_pi		*uq_pi_blocked;
107161678Sdavidxu
108161678Sdavidxu	/* On blocked list */
109161678Sdavidxu	TAILQ_ENTRY(umtx_q)	uq_lockq;
110161678Sdavidxu
111161678Sdavidxu	/* Thread contending with us */
112161678Sdavidxu	TAILQ_HEAD(,umtx_pi)	uq_pi_contested;
113161678Sdavidxu
114161742Sdavidxu	/* Inherited priority from PP mutex */
115161678Sdavidxu	u_char			uq_inherited_pri;
116201991Sdavidxu
117201991Sdavidxu	/* Spare queue ready to be reused */
118201991Sdavidxu	struct umtxq_queue	*uq_spare_queue;
119201991Sdavidxu
120201991Sdavidxu	/* The queue we on */
121201991Sdavidxu	struct umtxq_queue	*uq_cur_queue;
122115765Sjeff};
123115765Sjeff
124161678SdavidxuTAILQ_HEAD(umtxq_head, umtx_q);
125161678Sdavidxu
126201991Sdavidxu/* Per-key wait-queue */
127201991Sdavidxustruct umtxq_queue {
128201991Sdavidxu	struct umtxq_head	head;
129201991Sdavidxu	struct umtx_key		key;
130201991Sdavidxu	LIST_ENTRY(umtxq_queue)	link;
131201991Sdavidxu	int			length;
132201991Sdavidxu};
133201991Sdavidxu
134201991SdavidxuLIST_HEAD(umtxq_list, umtxq_queue);
135201991Sdavidxu
136161678Sdavidxu/* Userland lock object's wait-queue chain */
137138224Sdavidxustruct umtxq_chain {
138161678Sdavidxu	/* Lock for this chain. */
139161678Sdavidxu	struct mtx		uc_lock;
140161678Sdavidxu
141161678Sdavidxu	/* List of sleep queues. */
142201991Sdavidxu	struct umtxq_list	uc_queue[2];
143177848Sdavidxu#define UMTX_SHARED_QUEUE	0
144177848Sdavidxu#define UMTX_EXCLUSIVE_QUEUE	1
145161678Sdavidxu
146201991Sdavidxu	LIST_HEAD(, umtxq_queue) uc_spare_queue;
147201991Sdavidxu
148161678Sdavidxu	/* Busy flag */
149161678Sdavidxu	char			uc_busy;
150161678Sdavidxu
151161678Sdavidxu	/* Chain lock waiters */
152158377Sdavidxu	int			uc_waiters;
153161678Sdavidxu
154161678Sdavidxu	/* All PI in the list */
155161678Sdavidxu	TAILQ_HEAD(,umtx_pi)	uc_pi_list;
156201991Sdavidxu
157138224Sdavidxu};
158115765Sjeff
159161678Sdavidxu#define	UMTXQ_LOCKED_ASSERT(uc)		mtx_assert(&(uc)->uc_lock, MA_OWNED)
160189756Sdavidxu#define	UMTXQ_BUSY_ASSERT(uc)	KASSERT(&(uc)->uc_busy, ("umtx chain is not busy"))
161161678Sdavidxu
162161678Sdavidxu/*
163161678Sdavidxu * Don't propagate time-sharing priority, there is a security reason,
164161678Sdavidxu * a user can simply introduce PI-mutex, let thread A lock the mutex,
165161678Sdavidxu * and let another thread B block on the mutex, because B is
166161678Sdavidxu * sleeping, its priority will be boosted, this causes A's priority to
167161678Sdavidxu * be boosted via priority propagating too and will never be lowered even
168161678Sdavidxu * if it is using 100%CPU, this is unfair to other processes.
169161678Sdavidxu */
170161678Sdavidxu
171163709Sjb#define UPRI(td)	(((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
172163709Sjb			  (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
173163709Sjb			 PRI_MAX_TIMESHARE : (td)->td_user_pri)
174161678Sdavidxu
175138224Sdavidxu#define	GOLDEN_RATIO_PRIME	2654404609U
176216678Sdavidxu#define	UMTX_CHAINS		512
177216678Sdavidxu#define	UMTX_SHIFTS		(__WORD_BIT - 9)
178115765Sjeff
179161678Sdavidxu#define	GET_SHARE(flags)	\
180161678Sdavidxu    (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
181161678Sdavidxu
182177848Sdavidxu#define BUSY_SPINS		200
183177848Sdavidxu
184161678Sdavidxustatic uma_zone_t		umtx_pi_zone;
185179421Sdavidxustatic struct umtxq_chain	umtxq_chains[2][UMTX_CHAINS];
186138224Sdavidxustatic MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
187161678Sdavidxustatic int			umtx_pi_allocated;
188115310Sjeff
189227309Sedstatic SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
190161678SdavidxuSYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
191161678Sdavidxu    &umtx_pi_allocated, 0, "Allocated umtx_pi");
192161678Sdavidxu
193161678Sdavidxustatic void umtxq_sysinit(void *);
194161678Sdavidxustatic void umtxq_hash(struct umtx_key *key);
195161678Sdavidxustatic struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
196139013Sdavidxustatic void umtxq_lock(struct umtx_key *key);
197139013Sdavidxustatic void umtxq_unlock(struct umtx_key *key);
198139257Sdavidxustatic void umtxq_busy(struct umtx_key *key);
199139257Sdavidxustatic void umtxq_unbusy(struct umtx_key *key);
200177848Sdavidxustatic void umtxq_insert_queue(struct umtx_q *uq, int q);
201177848Sdavidxustatic void umtxq_remove_queue(struct umtx_q *uq, int q);
202161678Sdavidxustatic int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
203139257Sdavidxustatic int umtxq_count(struct umtx_key *key);
204163697Sdavidxustatic struct umtx_pi *umtx_pi_alloc(int);
205161678Sdavidxustatic void umtx_pi_free(struct umtx_pi *pi);
206161678Sdavidxustatic int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
207161678Sdavidxustatic void umtx_thread_cleanup(struct thread *td);
208161678Sdavidxustatic void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
209161678Sdavidxu	struct image_params *imgp __unused);
210161678SdavidxuSYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
211115310Sjeff
212177848Sdavidxu#define umtxq_signal(key, nwake)	umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
213177848Sdavidxu#define umtxq_insert(uq)	umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
214177848Sdavidxu#define umtxq_remove(uq)	umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
215177848Sdavidxu
216170300Sjeffstatic struct mtx umtx_lock;
217170300Sjeff
218161678Sdavidxustatic void
219161678Sdavidxuumtxq_sysinit(void *arg __unused)
220161678Sdavidxu{
221179421Sdavidxu	int i, j;
222138224Sdavidxu
223161678Sdavidxu	umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
224161678Sdavidxu		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
225179421Sdavidxu	for (i = 0; i < 2; ++i) {
226179421Sdavidxu		for (j = 0; j < UMTX_CHAINS; ++j) {
227179421Sdavidxu			mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
228179421Sdavidxu				 MTX_DEF | MTX_DUPOK);
229201991Sdavidxu			LIST_INIT(&umtxq_chains[i][j].uc_queue[0]);
230201991Sdavidxu			LIST_INIT(&umtxq_chains[i][j].uc_queue[1]);
231201991Sdavidxu			LIST_INIT(&umtxq_chains[i][j].uc_spare_queue);
232179421Sdavidxu			TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
233179421Sdavidxu			umtxq_chains[i][j].uc_busy = 0;
234179421Sdavidxu			umtxq_chains[i][j].uc_waiters = 0;
235179421Sdavidxu		}
236161678Sdavidxu	}
237170300Sjeff	mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN);
238161678Sdavidxu	EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
239161678Sdavidxu	    EVENTHANDLER_PRI_ANY);
240161678Sdavidxu}
241161678Sdavidxu
242143149Sdavidxustruct umtx_q *
243143149Sdavidxuumtxq_alloc(void)
244143149Sdavidxu{
245161678Sdavidxu	struct umtx_q *uq;
246161678Sdavidxu
247161678Sdavidxu	uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
248201991Sdavidxu	uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, M_WAITOK | M_ZERO);
249201991Sdavidxu	TAILQ_INIT(&uq->uq_spare_queue->head);
250161678Sdavidxu	TAILQ_INIT(&uq->uq_pi_contested);
251161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
252161678Sdavidxu	return (uq);
253143149Sdavidxu}
254143149Sdavidxu
255143149Sdavidxuvoid
256143149Sdavidxuumtxq_free(struct umtx_q *uq)
257143149Sdavidxu{
258201991Sdavidxu	MPASS(uq->uq_spare_queue != NULL);
259201991Sdavidxu	free(uq->uq_spare_queue, M_UMTX);
260143149Sdavidxu	free(uq, M_UMTX);
261143149Sdavidxu}
262143149Sdavidxu
263161678Sdavidxustatic inline void
264139013Sdavidxuumtxq_hash(struct umtx_key *key)
265138224Sdavidxu{
266161678Sdavidxu	unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
267161678Sdavidxu	key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
268138224Sdavidxu}
269138224Sdavidxu
270161678Sdavidxustatic inline struct umtxq_chain *
271161678Sdavidxuumtxq_getchain(struct umtx_key *key)
272139013Sdavidxu{
273201886Sdavidxu	if (key->type <= TYPE_SEM)
274179421Sdavidxu		return (&umtxq_chains[1][key->hash]);
275179421Sdavidxu	return (&umtxq_chains[0][key->hash]);
276139013Sdavidxu}
277139013Sdavidxu
278161678Sdavidxu/*
279177848Sdavidxu * Lock a chain.
280161678Sdavidxu */
281138224Sdavidxustatic inline void
282177848Sdavidxuumtxq_lock(struct umtx_key *key)
283139257Sdavidxu{
284161678Sdavidxu	struct umtxq_chain *uc;
285139257Sdavidxu
286161678Sdavidxu	uc = umtxq_getchain(key);
287177848Sdavidxu	mtx_lock(&uc->uc_lock);
288139257Sdavidxu}
289139257Sdavidxu
290161678Sdavidxu/*
291177848Sdavidxu * Unlock a chain.
292161678Sdavidxu */
293139257Sdavidxustatic inline void
294177848Sdavidxuumtxq_unlock(struct umtx_key *key)
295139257Sdavidxu{
296161678Sdavidxu	struct umtxq_chain *uc;
297139257Sdavidxu
298161678Sdavidxu	uc = umtxq_getchain(key);
299177848Sdavidxu	mtx_unlock(&uc->uc_lock);
300139257Sdavidxu}
301139257Sdavidxu
302161678Sdavidxu/*
303177848Sdavidxu * Set chain to busy state when following operation
304177848Sdavidxu * may be blocked (kernel mutex can not be used).
305161678Sdavidxu */
306139257Sdavidxustatic inline void
307177848Sdavidxuumtxq_busy(struct umtx_key *key)
308138224Sdavidxu{
309161678Sdavidxu	struct umtxq_chain *uc;
310161678Sdavidxu
311161678Sdavidxu	uc = umtxq_getchain(key);
312177848Sdavidxu	mtx_assert(&uc->uc_lock, MA_OWNED);
313177848Sdavidxu	if (uc->uc_busy) {
314177880Sdavidxu#ifdef SMP
315177880Sdavidxu		if (smp_cpus > 1) {
316177880Sdavidxu			int count = BUSY_SPINS;
317177880Sdavidxu			if (count > 0) {
318177880Sdavidxu				umtxq_unlock(key);
319177880Sdavidxu				while (uc->uc_busy && --count > 0)
320177880Sdavidxu					cpu_spinwait();
321177880Sdavidxu				umtxq_lock(key);
322177880Sdavidxu			}
323177848Sdavidxu		}
324177880Sdavidxu#endif
325177880Sdavidxu		while (uc->uc_busy) {
326177848Sdavidxu			uc->uc_waiters++;
327177848Sdavidxu			msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
328177848Sdavidxu			uc->uc_waiters--;
329177848Sdavidxu		}
330177848Sdavidxu	}
331177848Sdavidxu	uc->uc_busy = 1;
332138224Sdavidxu}
333138224Sdavidxu
334161678Sdavidxu/*
335177848Sdavidxu * Unbusy a chain.
336161678Sdavidxu */
337138225Sdavidxustatic inline void
338177848Sdavidxuumtxq_unbusy(struct umtx_key *key)
339138224Sdavidxu{
340161678Sdavidxu	struct umtxq_chain *uc;
341161678Sdavidxu
342161678Sdavidxu	uc = umtxq_getchain(key);
343177848Sdavidxu	mtx_assert(&uc->uc_lock, MA_OWNED);
344177848Sdavidxu	KASSERT(uc->uc_busy != 0, ("not busy"));
345177848Sdavidxu	uc->uc_busy = 0;
346177848Sdavidxu	if (uc->uc_waiters)
347177848Sdavidxu		wakeup_one(uc);
348138224Sdavidxu}
349138224Sdavidxu
350201991Sdavidxustatic struct umtxq_queue *
351201991Sdavidxuumtxq_queue_lookup(struct umtx_key *key, int q)
352201991Sdavidxu{
353201991Sdavidxu	struct umtxq_queue *uh;
354201991Sdavidxu	struct umtxq_chain *uc;
355201991Sdavidxu
356201991Sdavidxu	uc = umtxq_getchain(key);
357201991Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
358201991Sdavidxu	LIST_FOREACH(uh, &uc->uc_queue[q], link) {
359201991Sdavidxu		if (umtx_key_match(&uh->key, key))
360201991Sdavidxu			return (uh);
361201991Sdavidxu	}
362201991Sdavidxu
363201991Sdavidxu	return (NULL);
364201991Sdavidxu}
365201991Sdavidxu
366139013Sdavidxustatic inline void
367177848Sdavidxuumtxq_insert_queue(struct umtx_q *uq, int q)
368115765Sjeff{
369201991Sdavidxu	struct umtxq_queue *uh;
370161678Sdavidxu	struct umtxq_chain *uc;
371139013Sdavidxu
372161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
373161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
374201991Sdavidxu	KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue"));
375203744Sdavidxu	uh = umtxq_queue_lookup(&uq->uq_key, q);
376201991Sdavidxu	if (uh != NULL) {
377201991Sdavidxu		LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link);
378201991Sdavidxu	} else {
379201991Sdavidxu		uh = uq->uq_spare_queue;
380201991Sdavidxu		uh->key = uq->uq_key;
381201991Sdavidxu		LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link);
382201991Sdavidxu	}
383201991Sdavidxu	uq->uq_spare_queue = NULL;
384201991Sdavidxu
385201991Sdavidxu	TAILQ_INSERT_TAIL(&uh->head, uq, uq_link);
386201991Sdavidxu	uh->length++;
387158718Sdavidxu	uq->uq_flags |= UQF_UMTXQ;
388201991Sdavidxu	uq->uq_cur_queue = uh;
389201991Sdavidxu	return;
390139013Sdavidxu}
391139013Sdavidxu
392139013Sdavidxustatic inline void
393177848Sdavidxuumtxq_remove_queue(struct umtx_q *uq, int q)
394139013Sdavidxu{
395161678Sdavidxu	struct umtxq_chain *uc;
396201991Sdavidxu	struct umtxq_queue *uh;
397161678Sdavidxu
398161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
399161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
400158718Sdavidxu	if (uq->uq_flags & UQF_UMTXQ) {
401201991Sdavidxu		uh = uq->uq_cur_queue;
402201991Sdavidxu		TAILQ_REMOVE(&uh->head, uq, uq_link);
403201991Sdavidxu		uh->length--;
404158718Sdavidxu		uq->uq_flags &= ~UQF_UMTXQ;
405201991Sdavidxu		if (TAILQ_EMPTY(&uh->head)) {
406201991Sdavidxu			KASSERT(uh->length == 0,
407201991Sdavidxu			    ("inconsistent umtxq_queue length"));
408201991Sdavidxu			LIST_REMOVE(uh, link);
409201991Sdavidxu		} else {
410201991Sdavidxu			uh = LIST_FIRST(&uc->uc_spare_queue);
411201991Sdavidxu			KASSERT(uh != NULL, ("uc_spare_queue is empty"));
412201991Sdavidxu			LIST_REMOVE(uh, link);
413201991Sdavidxu		}
414201991Sdavidxu		uq->uq_spare_queue = uh;
415201991Sdavidxu		uq->uq_cur_queue = NULL;
416139013Sdavidxu	}
417139013Sdavidxu}
418139013Sdavidxu
419161678Sdavidxu/*
420161678Sdavidxu * Check if there are multiple waiters
421161678Sdavidxu */
422139013Sdavidxustatic int
423139013Sdavidxuumtxq_count(struct umtx_key *key)
424139013Sdavidxu{
425161678Sdavidxu	struct umtxq_chain *uc;
426201991Sdavidxu	struct umtxq_queue *uh;
427115765Sjeff
428161678Sdavidxu	uc = umtxq_getchain(key);
429161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
430201991Sdavidxu	uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
431201991Sdavidxu	if (uh != NULL)
432201991Sdavidxu		return (uh->length);
433201991Sdavidxu	return (0);
434115765Sjeff}
435115765Sjeff
436161678Sdavidxu/*
437161678Sdavidxu * Check if there are multiple PI waiters and returns first
438161678Sdavidxu * waiter.
439161678Sdavidxu */
440139257Sdavidxustatic int
441161678Sdavidxuumtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
442161678Sdavidxu{
443161678Sdavidxu	struct umtxq_chain *uc;
444201991Sdavidxu	struct umtxq_queue *uh;
445161678Sdavidxu
446161678Sdavidxu	*first = NULL;
447161678Sdavidxu	uc = umtxq_getchain(key);
448161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
449201991Sdavidxu	uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
450201991Sdavidxu	if (uh != NULL) {
451201991Sdavidxu		*first = TAILQ_FIRST(&uh->head);
452201991Sdavidxu		return (uh->length);
453161678Sdavidxu	}
454201991Sdavidxu	return (0);
455161678Sdavidxu}
456161678Sdavidxu
457161678Sdavidxu/*
458161678Sdavidxu * Wake up threads waiting on an userland object.
459161678Sdavidxu */
460177848Sdavidxu
461161678Sdavidxustatic int
462177848Sdavidxuumtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
463115765Sjeff{
464161678Sdavidxu	struct umtxq_chain *uc;
465201991Sdavidxu	struct umtxq_queue *uh;
466201991Sdavidxu	struct umtx_q *uq;
467161678Sdavidxu	int ret;
468115765Sjeff
469139257Sdavidxu	ret = 0;
470161678Sdavidxu	uc = umtxq_getchain(key);
471161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
472201991Sdavidxu	uh = umtxq_queue_lookup(key, q);
473201991Sdavidxu	if (uh != NULL) {
474201991Sdavidxu		while ((uq = TAILQ_FIRST(&uh->head)) != NULL) {
475177848Sdavidxu			umtxq_remove_queue(uq, q);
476161678Sdavidxu			wakeup(uq);
477139257Sdavidxu			if (++ret >= n_wake)
478201991Sdavidxu				return (ret);
479139013Sdavidxu		}
480139013Sdavidxu	}
481139257Sdavidxu	return (ret);
482138224Sdavidxu}
483138224Sdavidxu
484177848Sdavidxu
485161678Sdavidxu/*
486161678Sdavidxu * Wake up specified thread.
487161678Sdavidxu */
488161678Sdavidxustatic inline void
489161678Sdavidxuumtxq_signal_thread(struct umtx_q *uq)
490161678Sdavidxu{
491161678Sdavidxu	struct umtxq_chain *uc;
492161678Sdavidxu
493161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
494161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
495161678Sdavidxu	umtxq_remove(uq);
496161678Sdavidxu	wakeup(uq);
497161678Sdavidxu}
498161678Sdavidxu
499161678Sdavidxu/*
500161678Sdavidxu * Put thread into sleep state, before sleeping, check if
501161678Sdavidxu * thread was removed from umtx queue.
502161678Sdavidxu */
503138224Sdavidxustatic inline int
504161678Sdavidxuumtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
505138224Sdavidxu{
506161678Sdavidxu	struct umtxq_chain *uc;
507161678Sdavidxu	int error;
508161678Sdavidxu
509161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
510161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
511161678Sdavidxu	if (!(uq->uq_flags & UQF_UMTXQ))
512161678Sdavidxu		return (0);
513161678Sdavidxu	error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
514139751Sdavidxu	if (error == EWOULDBLOCK)
515139751Sdavidxu		error = ETIMEDOUT;
516139751Sdavidxu	return (error);
517138224Sdavidxu}
518138224Sdavidxu
519161678Sdavidxu/*
520161678Sdavidxu * Convert userspace address into unique logical address.
521161678Sdavidxu */
522218969Sjhbint
523161678Sdavidxuumtx_key_get(void *addr, int type, int share, struct umtx_key *key)
524139013Sdavidxu{
525161678Sdavidxu	struct thread *td = curthread;
526139013Sdavidxu	vm_map_t map;
527139013Sdavidxu	vm_map_entry_t entry;
528139013Sdavidxu	vm_pindex_t pindex;
529139013Sdavidxu	vm_prot_t prot;
530139013Sdavidxu	boolean_t wired;
531139013Sdavidxu
532161678Sdavidxu	key->type = type;
533161678Sdavidxu	if (share == THREAD_SHARE) {
534161678Sdavidxu		key->shared = 0;
535161678Sdavidxu		key->info.private.vs = td->td_proc->p_vmspace;
536161678Sdavidxu		key->info.private.addr = (uintptr_t)addr;
537163677Sdavidxu	} else {
538163677Sdavidxu		MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
539161678Sdavidxu		map = &td->td_proc->p_vmspace->vm_map;
540161678Sdavidxu		if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
541161678Sdavidxu		    &entry, &key->info.shared.object, &pindex, &prot,
542161678Sdavidxu		    &wired) != KERN_SUCCESS) {
543161678Sdavidxu			return EFAULT;
544161678Sdavidxu		}
545161678Sdavidxu
546161678Sdavidxu		if ((share == PROCESS_SHARE) ||
547161678Sdavidxu		    (share == AUTO_SHARE &&
548161678Sdavidxu		     VM_INHERIT_SHARE == entry->inheritance)) {
549161678Sdavidxu			key->shared = 1;
550161678Sdavidxu			key->info.shared.offset = entry->offset + entry->start -
551161678Sdavidxu				(vm_offset_t)addr;
552161678Sdavidxu			vm_object_reference(key->info.shared.object);
553161678Sdavidxu		} else {
554161678Sdavidxu			key->shared = 0;
555161678Sdavidxu			key->info.private.vs = td->td_proc->p_vmspace;
556161678Sdavidxu			key->info.private.addr = (uintptr_t)addr;
557161678Sdavidxu		}
558161678Sdavidxu		vm_map_lookup_done(map, entry);
559139013Sdavidxu	}
560139013Sdavidxu
561161678Sdavidxu	umtxq_hash(key);
562139013Sdavidxu	return (0);
563139013Sdavidxu}
564139013Sdavidxu
565161678Sdavidxu/*
566161678Sdavidxu * Release key.
567161678Sdavidxu */
568218969Sjhbvoid
569139013Sdavidxuumtx_key_release(struct umtx_key *key)
570139013Sdavidxu{
571161678Sdavidxu	if (key->shared)
572139013Sdavidxu		vm_object_deallocate(key->info.shared.object);
573139013Sdavidxu}
574139013Sdavidxu
575161678Sdavidxu/*
576161678Sdavidxu * Lock a umtx object.
577161678Sdavidxu */
578139013Sdavidxustatic int
579163449Sdavidxu_do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo)
580112904Sjeff{
581143149Sdavidxu	struct umtx_q *uq;
582163449Sdavidxu	u_long owner;
583163449Sdavidxu	u_long old;
584138224Sdavidxu	int error = 0;
585112904Sjeff
586143149Sdavidxu	uq = td->td_umtxq;
587161678Sdavidxu
588112904Sjeff	/*
589161678Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
590112904Sjeff	 * can fault on any access.
591112904Sjeff	 */
592112904Sjeff	for (;;) {
593112904Sjeff		/*
594112904Sjeff		 * Try the uncontested case.  This should be done in userland.
595112904Sjeff		 */
596163449Sdavidxu		owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
597112904Sjeff
598138224Sdavidxu		/* The acquire succeeded. */
599138224Sdavidxu		if (owner == UMTX_UNOWNED)
600138224Sdavidxu			return (0);
601138224Sdavidxu
602115765Sjeff		/* The address was invalid. */
603115765Sjeff		if (owner == -1)
604115765Sjeff			return (EFAULT);
605115765Sjeff
606115765Sjeff		/* If no one owns it but it is contested try to acquire it. */
607115765Sjeff		if (owner == UMTX_CONTESTED) {
608163449Sdavidxu			owner = casuword(&umtx->u_owner,
609139013Sdavidxu			    UMTX_CONTESTED, id | UMTX_CONTESTED);
610115765Sjeff
611138224Sdavidxu			if (owner == UMTX_CONTESTED)
612138224Sdavidxu				return (0);
613138224Sdavidxu
614115765Sjeff			/* The address was invalid. */
615115765Sjeff			if (owner == -1)
616115765Sjeff				return (EFAULT);
617115765Sjeff
618115765Sjeff			/* If this failed the lock has changed, restart. */
619115765Sjeff			continue;
620112904Sjeff		}
621112904Sjeff
622138224Sdavidxu		/*
623138224Sdavidxu		 * If we caught a signal, we have retried and now
624138224Sdavidxu		 * exit immediately.
625138224Sdavidxu		 */
626161678Sdavidxu		if (error != 0)
627138224Sdavidxu			return (error);
628112904Sjeff
629161678Sdavidxu		if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
630161678Sdavidxu			AUTO_SHARE, &uq->uq_key)) != 0)
631161678Sdavidxu			return (error);
632161678Sdavidxu
633161678Sdavidxu		umtxq_lock(&uq->uq_key);
634161678Sdavidxu		umtxq_busy(&uq->uq_key);
635161678Sdavidxu		umtxq_insert(uq);
636161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
637161678Sdavidxu		umtxq_unlock(&uq->uq_key);
638161678Sdavidxu
639112904Sjeff		/*
640112904Sjeff		 * Set the contested bit so that a release in user space
641112904Sjeff		 * knows to use the system call for unlock.  If this fails
642112904Sjeff		 * either some one else has acquired the lock or it has been
643112904Sjeff		 * released.
644112904Sjeff		 */
645163449Sdavidxu		old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
646112904Sjeff
647112904Sjeff		/* The address was invalid. */
648112967Sjake		if (old == -1) {
649143149Sdavidxu			umtxq_lock(&uq->uq_key);
650143149Sdavidxu			umtxq_remove(uq);
651143149Sdavidxu			umtxq_unlock(&uq->uq_key);
652143149Sdavidxu			umtx_key_release(&uq->uq_key);
653115765Sjeff			return (EFAULT);
654112904Sjeff		}
655112904Sjeff
656112904Sjeff		/*
657115765Sjeff		 * We set the contested bit, sleep. Otherwise the lock changed
658117685Smtm		 * and we need to retry or we lost a race to the thread
659117685Smtm		 * unlocking the umtx.
660112904Sjeff		 */
661143149Sdavidxu		umtxq_lock(&uq->uq_key);
662161678Sdavidxu		if (old == owner)
663161678Sdavidxu			error = umtxq_sleep(uq, "umtx", timo);
664143149Sdavidxu		umtxq_remove(uq);
665143149Sdavidxu		umtxq_unlock(&uq->uq_key);
666143149Sdavidxu		umtx_key_release(&uq->uq_key);
667112904Sjeff	}
668117743Smtm
669117743Smtm	return (0);
670112904Sjeff}
671112904Sjeff
672161678Sdavidxu/*
673161678Sdavidxu * Lock a umtx object.
674161678Sdavidxu */
675139013Sdavidxustatic int
676163449Sdavidxudo_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
677140245Sdavidxu	struct timespec *timeout)
678112904Sjeff{
679140245Sdavidxu	struct timespec ts, ts2, ts3;
680139013Sdavidxu	struct timeval tv;
681140245Sdavidxu	int error;
682139013Sdavidxu
683140245Sdavidxu	if (timeout == NULL) {
684162536Sdavidxu		error = _do_lock_umtx(td, umtx, id, 0);
685162030Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
686162030Sdavidxu		if (error == EINTR)
687162030Sdavidxu			error = ERESTART;
688139013Sdavidxu	} else {
689140245Sdavidxu		getnanouptime(&ts);
690140245Sdavidxu		timespecadd(&ts, timeout);
691140245Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
692139013Sdavidxu		for (;;) {
693162536Sdavidxu			error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
694140245Sdavidxu			if (error != ETIMEDOUT)
695140245Sdavidxu				break;
696140245Sdavidxu			getnanouptime(&ts2);
697140245Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
698139751Sdavidxu				error = ETIMEDOUT;
699139013Sdavidxu				break;
700139013Sdavidxu			}
701140245Sdavidxu			ts3 = ts;
702140245Sdavidxu			timespecsub(&ts3, &ts2);
703140245Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
704139013Sdavidxu		}
705162030Sdavidxu		/* Timed-locking is not restarted. */
706162030Sdavidxu		if (error == ERESTART)
707162030Sdavidxu			error = EINTR;
708139013Sdavidxu	}
709139013Sdavidxu	return (error);
710139013Sdavidxu}
711139013Sdavidxu
712161678Sdavidxu/*
713161678Sdavidxu * Unlock a umtx object.
714161678Sdavidxu */
715139013Sdavidxustatic int
716163449Sdavidxudo_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
717139013Sdavidxu{
718139013Sdavidxu	struct umtx_key key;
719163449Sdavidxu	u_long owner;
720163449Sdavidxu	u_long old;
721139257Sdavidxu	int error;
722139257Sdavidxu	int count;
723112904Sjeff
724112904Sjeff	/*
725112904Sjeff	 * Make sure we own this mtx.
726112904Sjeff	 */
727163449Sdavidxu	owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
728161678Sdavidxu	if (owner == -1)
729115765Sjeff		return (EFAULT);
730115765Sjeff
731139013Sdavidxu	if ((owner & ~UMTX_CONTESTED) != id)
732115765Sjeff		return (EPERM);
733112904Sjeff
734161678Sdavidxu	/* This should be done in userland */
735161678Sdavidxu	if ((owner & UMTX_CONTESTED) == 0) {
736163449Sdavidxu		old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
737161678Sdavidxu		if (old == -1)
738161678Sdavidxu			return (EFAULT);
739161678Sdavidxu		if (old == owner)
740161678Sdavidxu			return (0);
741161855Sdavidxu		owner = old;
742161678Sdavidxu	}
743161678Sdavidxu
744117685Smtm	/* We should only ever be in here for contested locks */
745161678Sdavidxu	if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
746161678Sdavidxu		&key)) != 0)
747139257Sdavidxu		return (error);
748139257Sdavidxu
749139257Sdavidxu	umtxq_lock(&key);
750139257Sdavidxu	umtxq_busy(&key);
751139257Sdavidxu	count = umtxq_count(&key);
752139257Sdavidxu	umtxq_unlock(&key);
753139257Sdavidxu
754117743Smtm	/*
755117743Smtm	 * When unlocking the umtx, it must be marked as unowned if
756117743Smtm	 * there is zero or one thread only waiting for it.
757117743Smtm	 * Otherwise, it must be marked as contested.
758117743Smtm	 */
759163449Sdavidxu	old = casuword(&umtx->u_owner, owner,
760163449Sdavidxu		count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
761139257Sdavidxu	umtxq_lock(&key);
762161678Sdavidxu	umtxq_signal(&key,1);
763139257Sdavidxu	umtxq_unbusy(&key);
764139257Sdavidxu	umtxq_unlock(&key);
765139257Sdavidxu	umtx_key_release(&key);
766115765Sjeff	if (old == -1)
767115765Sjeff		return (EFAULT);
768138224Sdavidxu	if (old != owner)
769138224Sdavidxu		return (EINVAL);
770115765Sjeff	return (0);
771112904Sjeff}
772139013Sdavidxu
773205014Snwhitehorn#ifdef COMPAT_FREEBSD32
774162536Sdavidxu
775161678Sdavidxu/*
776162536Sdavidxu * Lock a umtx object.
777162536Sdavidxu */
778162536Sdavidxustatic int
779162536Sdavidxu_do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
780162536Sdavidxu{
781162536Sdavidxu	struct umtx_q *uq;
782162536Sdavidxu	uint32_t owner;
783162536Sdavidxu	uint32_t old;
784162536Sdavidxu	int error = 0;
785162536Sdavidxu
786162536Sdavidxu	uq = td->td_umtxq;
787162536Sdavidxu
788162536Sdavidxu	/*
789162536Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
790162536Sdavidxu	 * can fault on any access.
791162536Sdavidxu	 */
792162536Sdavidxu	for (;;) {
793162536Sdavidxu		/*
794162536Sdavidxu		 * Try the uncontested case.  This should be done in userland.
795162536Sdavidxu		 */
796162536Sdavidxu		owner = casuword32(m, UMUTEX_UNOWNED, id);
797162536Sdavidxu
798162536Sdavidxu		/* The acquire succeeded. */
799162536Sdavidxu		if (owner == UMUTEX_UNOWNED)
800162536Sdavidxu			return (0);
801162536Sdavidxu
802162536Sdavidxu		/* The address was invalid. */
803162536Sdavidxu		if (owner == -1)
804162536Sdavidxu			return (EFAULT);
805162536Sdavidxu
806162536Sdavidxu		/* If no one owns it but it is contested try to acquire it. */
807162536Sdavidxu		if (owner == UMUTEX_CONTESTED) {
808162536Sdavidxu			owner = casuword32(m,
809162536Sdavidxu			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
810162536Sdavidxu			if (owner == UMUTEX_CONTESTED)
811162536Sdavidxu				return (0);
812162536Sdavidxu
813162536Sdavidxu			/* The address was invalid. */
814162536Sdavidxu			if (owner == -1)
815162536Sdavidxu				return (EFAULT);
816162536Sdavidxu
817162536Sdavidxu			/* If this failed the lock has changed, restart. */
818162536Sdavidxu			continue;
819162536Sdavidxu		}
820162536Sdavidxu
821162536Sdavidxu		/*
822162536Sdavidxu		 * If we caught a signal, we have retried and now
823162536Sdavidxu		 * exit immediately.
824162536Sdavidxu		 */
825162536Sdavidxu		if (error != 0)
826162536Sdavidxu			return (error);
827162536Sdavidxu
828162536Sdavidxu		if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
829162536Sdavidxu			AUTO_SHARE, &uq->uq_key)) != 0)
830162536Sdavidxu			return (error);
831162536Sdavidxu
832162536Sdavidxu		umtxq_lock(&uq->uq_key);
833162536Sdavidxu		umtxq_busy(&uq->uq_key);
834162536Sdavidxu		umtxq_insert(uq);
835162536Sdavidxu		umtxq_unbusy(&uq->uq_key);
836162536Sdavidxu		umtxq_unlock(&uq->uq_key);
837162536Sdavidxu
838162536Sdavidxu		/*
839162536Sdavidxu		 * Set the contested bit so that a release in user space
840162536Sdavidxu		 * knows to use the system call for unlock.  If this fails
841162536Sdavidxu		 * either some one else has acquired the lock or it has been
842162536Sdavidxu		 * released.
843162536Sdavidxu		 */
844162536Sdavidxu		old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
845162536Sdavidxu
846162536Sdavidxu		/* The address was invalid. */
847162536Sdavidxu		if (old == -1) {
848162536Sdavidxu			umtxq_lock(&uq->uq_key);
849162536Sdavidxu			umtxq_remove(uq);
850162536Sdavidxu			umtxq_unlock(&uq->uq_key);
851162536Sdavidxu			umtx_key_release(&uq->uq_key);
852162536Sdavidxu			return (EFAULT);
853162536Sdavidxu		}
854162536Sdavidxu
855162536Sdavidxu		/*
856162536Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
857162536Sdavidxu		 * and we need to retry or we lost a race to the thread
858162536Sdavidxu		 * unlocking the umtx.
859162536Sdavidxu		 */
860162536Sdavidxu		umtxq_lock(&uq->uq_key);
861162536Sdavidxu		if (old == owner)
862162536Sdavidxu			error = umtxq_sleep(uq, "umtx", timo);
863162536Sdavidxu		umtxq_remove(uq);
864162536Sdavidxu		umtxq_unlock(&uq->uq_key);
865162536Sdavidxu		umtx_key_release(&uq->uq_key);
866162536Sdavidxu	}
867162536Sdavidxu
868162536Sdavidxu	return (0);
869162536Sdavidxu}
870162536Sdavidxu
871162536Sdavidxu/*
872162536Sdavidxu * Lock a umtx object.
873162536Sdavidxu */
874162536Sdavidxustatic int
875162536Sdavidxudo_lock_umtx32(struct thread *td, void *m, uint32_t id,
876162536Sdavidxu	struct timespec *timeout)
877162536Sdavidxu{
878162536Sdavidxu	struct timespec ts, ts2, ts3;
879162536Sdavidxu	struct timeval tv;
880162536Sdavidxu	int error;
881162536Sdavidxu
882162536Sdavidxu	if (timeout == NULL) {
883162536Sdavidxu		error = _do_lock_umtx32(td, m, id, 0);
884162536Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
885162536Sdavidxu		if (error == EINTR)
886162536Sdavidxu			error = ERESTART;
887162536Sdavidxu	} else {
888162536Sdavidxu		getnanouptime(&ts);
889162536Sdavidxu		timespecadd(&ts, timeout);
890162536Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
891162536Sdavidxu		for (;;) {
892162536Sdavidxu			error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
893162536Sdavidxu			if (error != ETIMEDOUT)
894162536Sdavidxu				break;
895162536Sdavidxu			getnanouptime(&ts2);
896162536Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
897162536Sdavidxu				error = ETIMEDOUT;
898162536Sdavidxu				break;
899162536Sdavidxu			}
900162536Sdavidxu			ts3 = ts;
901162536Sdavidxu			timespecsub(&ts3, &ts2);
902162536Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
903162536Sdavidxu		}
904162536Sdavidxu		/* Timed-locking is not restarted. */
905162536Sdavidxu		if (error == ERESTART)
906162536Sdavidxu			error = EINTR;
907162536Sdavidxu	}
908162536Sdavidxu	return (error);
909162536Sdavidxu}
910162536Sdavidxu
911162536Sdavidxu/*
912162536Sdavidxu * Unlock a umtx object.
913162536Sdavidxu */
914162536Sdavidxustatic int
915162536Sdavidxudo_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
916162536Sdavidxu{
917162536Sdavidxu	struct umtx_key key;
918162536Sdavidxu	uint32_t owner;
919162536Sdavidxu	uint32_t old;
920162536Sdavidxu	int error;
921162536Sdavidxu	int count;
922162536Sdavidxu
923162536Sdavidxu	/*
924162536Sdavidxu	 * Make sure we own this mtx.
925162536Sdavidxu	 */
926162536Sdavidxu	owner = fuword32(m);
927162536Sdavidxu	if (owner == -1)
928162536Sdavidxu		return (EFAULT);
929162536Sdavidxu
930162536Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
931162536Sdavidxu		return (EPERM);
932162536Sdavidxu
933162536Sdavidxu	/* This should be done in userland */
934162536Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
935162536Sdavidxu		old = casuword32(m, owner, UMUTEX_UNOWNED);
936162536Sdavidxu		if (old == -1)
937162536Sdavidxu			return (EFAULT);
938162536Sdavidxu		if (old == owner)
939162536Sdavidxu			return (0);
940162536Sdavidxu		owner = old;
941162536Sdavidxu	}
942162536Sdavidxu
943162536Sdavidxu	/* We should only ever be in here for contested locks */
944162536Sdavidxu	if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
945162536Sdavidxu		&key)) != 0)
946162536Sdavidxu		return (error);
947162536Sdavidxu
948162536Sdavidxu	umtxq_lock(&key);
949162536Sdavidxu	umtxq_busy(&key);
950162536Sdavidxu	count = umtxq_count(&key);
951162536Sdavidxu	umtxq_unlock(&key);
952162536Sdavidxu
953162536Sdavidxu	/*
954162536Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
955162536Sdavidxu	 * there is zero or one thread only waiting for it.
956162536Sdavidxu	 * Otherwise, it must be marked as contested.
957162536Sdavidxu	 */
958162536Sdavidxu	old = casuword32(m, owner,
959162536Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
960162536Sdavidxu	umtxq_lock(&key);
961162536Sdavidxu	umtxq_signal(&key,1);
962162536Sdavidxu	umtxq_unbusy(&key);
963162536Sdavidxu	umtxq_unlock(&key);
964162536Sdavidxu	umtx_key_release(&key);
965162536Sdavidxu	if (old == -1)
966162536Sdavidxu		return (EFAULT);
967162536Sdavidxu	if (old != owner)
968162536Sdavidxu		return (EINVAL);
969162536Sdavidxu	return (0);
970162536Sdavidxu}
971162536Sdavidxu#endif
972162536Sdavidxu
973231989Sdavidxustatic inline int
974231989Sdavidxutstohz(const struct timespec *tsp)
975231989Sdavidxu{
976231989Sdavidxu	struct timeval tv;
977231989Sdavidxu
978231989Sdavidxu	TIMESPEC_TO_TIMEVAL(&tv, tsp);
979231989Sdavidxu	return tvtohz(&tv);
980231989Sdavidxu}
981231989Sdavidxu
982162536Sdavidxu/*
983161678Sdavidxu * Fetch and compare value, sleep on the address if value is not changed.
984161678Sdavidxu */
985139013Sdavidxustatic int
986163449Sdavidxudo_wait(struct thread *td, void *addr, u_long id,
987232144Sdavidxu	struct _umtx_time *timeout, int compat32, int is_private)
988139013Sdavidxu{
989143149Sdavidxu	struct umtx_q *uq;
990231989Sdavidxu	struct timespec ets, cts, tts;
991163449Sdavidxu	u_long tmp;
992140245Sdavidxu	int error = 0;
993139013Sdavidxu
994143149Sdavidxu	uq = td->td_umtxq;
995178646Sdavidxu	if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
996178646Sdavidxu		is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
997139013Sdavidxu		return (error);
998161678Sdavidxu
999161678Sdavidxu	umtxq_lock(&uq->uq_key);
1000161678Sdavidxu	umtxq_insert(uq);
1001161678Sdavidxu	umtxq_unlock(&uq->uq_key);
1002162536Sdavidxu	if (compat32 == 0)
1003162536Sdavidxu		tmp = fuword(addr);
1004162536Sdavidxu        else
1005190987Sdavidxu		tmp = (unsigned int)fuword32(addr);
1006139427Sdavidxu	if (tmp != id) {
1007143149Sdavidxu		umtxq_lock(&uq->uq_key);
1008143149Sdavidxu		umtxq_remove(uq);
1009143149Sdavidxu		umtxq_unlock(&uq->uq_key);
1010140245Sdavidxu	} else if (timeout == NULL) {
1011143149Sdavidxu		umtxq_lock(&uq->uq_key);
1012164839Sdavidxu		error = umtxq_sleep(uq, "uwait", 0);
1013161678Sdavidxu		umtxq_remove(uq);
1014143149Sdavidxu		umtxq_unlock(&uq->uq_key);
1015139013Sdavidxu	} else {
1016232144Sdavidxu		kern_clock_gettime(td, timeout->_clockid, &cts);
1017232144Sdavidxu		if ((timeout->_flags & UMTX_ABSTIME) == 0) {
1018231989Sdavidxu			ets = cts;
1019232144Sdavidxu			timespecadd(&ets, &timeout->_timeout);
1020231989Sdavidxu		} else {
1021232144Sdavidxu			ets = timeout->_timeout;
1022231989Sdavidxu		}
1023161678Sdavidxu		umtxq_lock(&uq->uq_key);
1024139013Sdavidxu		for (;;) {
1025231989Sdavidxu			if (timespeccmp(&cts, &ets, >=)) {
1026231989Sdavidxu				error = ETIMEDOUT;
1027231989Sdavidxu				break;
1028231989Sdavidxu			}
1029231989Sdavidxu			tts = ets;
1030231989Sdavidxu			timespecsub(&tts, &cts);
1031231989Sdavidxu			error = umtxq_sleep(uq, "uwait", tstohz(&tts));
1032211794Sdavidxu			if (!(uq->uq_flags & UQF_UMTXQ)) {
1033211794Sdavidxu				error = 0;
1034161678Sdavidxu				break;
1035211794Sdavidxu			}
1036140245Sdavidxu			if (error != ETIMEDOUT)
1037140245Sdavidxu				break;
1038161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1039232144Sdavidxu			kern_clock_gettime(td, timeout->_clockid, &cts);
1040161678Sdavidxu			umtxq_lock(&uq->uq_key);
1041139013Sdavidxu		}
1042143149Sdavidxu		umtxq_remove(uq);
1043143149Sdavidxu		umtxq_unlock(&uq->uq_key);
1044139013Sdavidxu	}
1045143149Sdavidxu	umtx_key_release(&uq->uq_key);
1046139257Sdavidxu	if (error == ERESTART)
1047139257Sdavidxu		error = EINTR;
1048139013Sdavidxu	return (error);
1049139013Sdavidxu}
1050139013Sdavidxu
1051161678Sdavidxu/*
1052161678Sdavidxu * Wake up threads sleeping on the specified address.
1053161678Sdavidxu */
1054151692Sdavidxuint
1055178646Sdavidxukern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
1056139013Sdavidxu{
1057139013Sdavidxu	struct umtx_key key;
1058139257Sdavidxu	int ret;
1059139013Sdavidxu
1060178646Sdavidxu	if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
1061178646Sdavidxu		is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
1062139257Sdavidxu		return (ret);
1063139258Sdavidxu	umtxq_lock(&key);
1064139257Sdavidxu	ret = umtxq_signal(&key, n_wake);
1065139258Sdavidxu	umtxq_unlock(&key);
1066139257Sdavidxu	umtx_key_release(&key);
1067139013Sdavidxu	return (0);
1068139013Sdavidxu}
1069139013Sdavidxu
1070161678Sdavidxu/*
1071161678Sdavidxu * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1072161678Sdavidxu */
1073161678Sdavidxustatic int
1074161678Sdavidxu_do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1075179970Sdavidxu	int mode)
1076161678Sdavidxu{
1077161678Sdavidxu	struct umtx_q *uq;
1078161678Sdavidxu	uint32_t owner, old, id;
1079161678Sdavidxu	int error = 0;
1080161678Sdavidxu
1081161678Sdavidxu	id = td->td_tid;
1082161678Sdavidxu	uq = td->td_umtxq;
1083161678Sdavidxu
1084161678Sdavidxu	/*
1085161678Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
1086161678Sdavidxu	 * can fault on any access.
1087161678Sdavidxu	 */
1088161678Sdavidxu	for (;;) {
1089179970Sdavidxu		owner = fuword32(__DEVOLATILE(void *, &m->m_owner));
1090179970Sdavidxu		if (mode == _UMUTEX_WAIT) {
1091179970Sdavidxu			if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED)
1092179970Sdavidxu				return (0);
1093179970Sdavidxu		} else {
1094179970Sdavidxu			/*
1095179970Sdavidxu			 * Try the uncontested case.  This should be done in userland.
1096179970Sdavidxu			 */
1097179970Sdavidxu			owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1098161678Sdavidxu
1099179970Sdavidxu			/* The acquire succeeded. */
1100179970Sdavidxu			if (owner == UMUTEX_UNOWNED)
1101161678Sdavidxu				return (0);
1102161678Sdavidxu
1103161678Sdavidxu			/* The address was invalid. */
1104161678Sdavidxu			if (owner == -1)
1105161678Sdavidxu				return (EFAULT);
1106161678Sdavidxu
1107179970Sdavidxu			/* If no one owns it but it is contested try to acquire it. */
1108179970Sdavidxu			if (owner == UMUTEX_CONTESTED) {
1109179970Sdavidxu				owner = casuword32(&m->m_owner,
1110179970Sdavidxu				    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1111179970Sdavidxu
1112179970Sdavidxu				if (owner == UMUTEX_CONTESTED)
1113179970Sdavidxu					return (0);
1114179970Sdavidxu
1115179970Sdavidxu				/* The address was invalid. */
1116179970Sdavidxu				if (owner == -1)
1117179970Sdavidxu					return (EFAULT);
1118179970Sdavidxu
1119179970Sdavidxu				/* If this failed the lock has changed, restart. */
1120179970Sdavidxu				continue;
1121179970Sdavidxu			}
1122161678Sdavidxu		}
1123161678Sdavidxu
1124161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1125161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id)
1126161678Sdavidxu			return (EDEADLK);
1127161678Sdavidxu
1128179970Sdavidxu		if (mode == _UMUTEX_TRY)
1129161678Sdavidxu			return (EBUSY);
1130161678Sdavidxu
1131161678Sdavidxu		/*
1132161678Sdavidxu		 * If we caught a signal, we have retried and now
1133161678Sdavidxu		 * exit immediately.
1134161678Sdavidxu		 */
1135161678Sdavidxu		if (error != 0)
1136161678Sdavidxu			return (error);
1137161678Sdavidxu
1138161678Sdavidxu		if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1139161678Sdavidxu		    GET_SHARE(flags), &uq->uq_key)) != 0)
1140161678Sdavidxu			return (error);
1141161678Sdavidxu
1142161678Sdavidxu		umtxq_lock(&uq->uq_key);
1143161678Sdavidxu		umtxq_busy(&uq->uq_key);
1144161678Sdavidxu		umtxq_insert(uq);
1145161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1146161678Sdavidxu
1147161678Sdavidxu		/*
1148161678Sdavidxu		 * Set the contested bit so that a release in user space
1149161678Sdavidxu		 * knows to use the system call for unlock.  If this fails
1150161678Sdavidxu		 * either some one else has acquired the lock or it has been
1151161678Sdavidxu		 * released.
1152161678Sdavidxu		 */
1153161678Sdavidxu		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1154161678Sdavidxu
1155161678Sdavidxu		/* The address was invalid. */
1156161678Sdavidxu		if (old == -1) {
1157161678Sdavidxu			umtxq_lock(&uq->uq_key);
1158161678Sdavidxu			umtxq_remove(uq);
1159179970Sdavidxu			umtxq_unbusy(&uq->uq_key);
1160161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1161161678Sdavidxu			umtx_key_release(&uq->uq_key);
1162161678Sdavidxu			return (EFAULT);
1163161678Sdavidxu		}
1164161678Sdavidxu
1165161678Sdavidxu		/*
1166161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
1167161678Sdavidxu		 * and we need to retry or we lost a race to the thread
1168161678Sdavidxu		 * unlocking the umtx.
1169161678Sdavidxu		 */
1170161678Sdavidxu		umtxq_lock(&uq->uq_key);
1171179970Sdavidxu		umtxq_unbusy(&uq->uq_key);
1172161678Sdavidxu		if (old == owner)
1173161678Sdavidxu			error = umtxq_sleep(uq, "umtxn", timo);
1174161678Sdavidxu		umtxq_remove(uq);
1175161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1176161678Sdavidxu		umtx_key_release(&uq->uq_key);
1177161678Sdavidxu	}
1178161678Sdavidxu
1179161678Sdavidxu	return (0);
1180161678Sdavidxu}
1181161678Sdavidxu
1182161678Sdavidxu/*
1183161678Sdavidxu * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1184161678Sdavidxu */
1185161678Sdavidxu/*
1186161678Sdavidxu * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1187161678Sdavidxu */
1188161678Sdavidxustatic int
1189161678Sdavidxudo_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
1190161678Sdavidxu{
1191161678Sdavidxu	struct umtx_key key;
1192161678Sdavidxu	uint32_t owner, old, id;
1193161678Sdavidxu	int error;
1194161678Sdavidxu	int count;
1195161678Sdavidxu
1196161678Sdavidxu	id = td->td_tid;
1197161678Sdavidxu	/*
1198161678Sdavidxu	 * Make sure we own this mtx.
1199161678Sdavidxu	 */
1200163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1201161678Sdavidxu	if (owner == -1)
1202161678Sdavidxu		return (EFAULT);
1203161678Sdavidxu
1204161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
1205161678Sdavidxu		return (EPERM);
1206161678Sdavidxu
1207161678Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
1208161678Sdavidxu		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1209161678Sdavidxu		if (old == -1)
1210161678Sdavidxu			return (EFAULT);
1211161678Sdavidxu		if (old == owner)
1212161678Sdavidxu			return (0);
1213161855Sdavidxu		owner = old;
1214161678Sdavidxu	}
1215161678Sdavidxu
1216161678Sdavidxu	/* We should only ever be in here for contested locks */
1217161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1218161678Sdavidxu	    &key)) != 0)
1219161678Sdavidxu		return (error);
1220161678Sdavidxu
1221161678Sdavidxu	umtxq_lock(&key);
1222161678Sdavidxu	umtxq_busy(&key);
1223161678Sdavidxu	count = umtxq_count(&key);
1224161678Sdavidxu	umtxq_unlock(&key);
1225161678Sdavidxu
1226161678Sdavidxu	/*
1227161678Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
1228161678Sdavidxu	 * there is zero or one thread only waiting for it.
1229161678Sdavidxu	 * Otherwise, it must be marked as contested.
1230161678Sdavidxu	 */
1231161678Sdavidxu	old = casuword32(&m->m_owner, owner,
1232161678Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1233161678Sdavidxu	umtxq_lock(&key);
1234161678Sdavidxu	umtxq_signal(&key,1);
1235161678Sdavidxu	umtxq_unbusy(&key);
1236161678Sdavidxu	umtxq_unlock(&key);
1237161678Sdavidxu	umtx_key_release(&key);
1238161678Sdavidxu	if (old == -1)
1239161678Sdavidxu		return (EFAULT);
1240161678Sdavidxu	if (old != owner)
1241161678Sdavidxu		return (EINVAL);
1242161678Sdavidxu	return (0);
1243161678Sdavidxu}
1244161678Sdavidxu
1245179970Sdavidxu/*
1246179970Sdavidxu * Check if the mutex is available and wake up a waiter,
1247179970Sdavidxu * only for simple mutex.
1248179970Sdavidxu */
1249179970Sdavidxustatic int
1250179970Sdavidxudo_wake_umutex(struct thread *td, struct umutex *m)
1251179970Sdavidxu{
1252179970Sdavidxu	struct umtx_key key;
1253179970Sdavidxu	uint32_t owner;
1254179970Sdavidxu	uint32_t flags;
1255179970Sdavidxu	int error;
1256179970Sdavidxu	int count;
1257179970Sdavidxu
1258179970Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1259179970Sdavidxu	if (owner == -1)
1260179970Sdavidxu		return (EFAULT);
1261179970Sdavidxu
1262179970Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != 0)
1263179970Sdavidxu		return (0);
1264179970Sdavidxu
1265179970Sdavidxu	flags = fuword32(&m->m_flags);
1266179970Sdavidxu
1267179970Sdavidxu	/* We should only ever be in here for contested locks */
1268179970Sdavidxu	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1269179970Sdavidxu	    &key)) != 0)
1270179970Sdavidxu		return (error);
1271179970Sdavidxu
1272179970Sdavidxu	umtxq_lock(&key);
1273179970Sdavidxu	umtxq_busy(&key);
1274179970Sdavidxu	count = umtxq_count(&key);
1275179970Sdavidxu	umtxq_unlock(&key);
1276179970Sdavidxu
1277179970Sdavidxu	if (count <= 1)
1278179970Sdavidxu		owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED);
1279179970Sdavidxu
1280179970Sdavidxu	umtxq_lock(&key);
1281179970Sdavidxu	if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
1282179970Sdavidxu		umtxq_signal(&key, 1);
1283179970Sdavidxu	umtxq_unbusy(&key);
1284179970Sdavidxu	umtxq_unlock(&key);
1285179970Sdavidxu	umtx_key_release(&key);
1286179970Sdavidxu	return (0);
1287179970Sdavidxu}
1288179970Sdavidxu
1289161678Sdavidxustatic inline struct umtx_pi *
1290163697Sdavidxuumtx_pi_alloc(int flags)
1291161678Sdavidxu{
1292161678Sdavidxu	struct umtx_pi *pi;
1293161678Sdavidxu
1294163697Sdavidxu	pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1295161678Sdavidxu	TAILQ_INIT(&pi->pi_blocked);
1296161678Sdavidxu	atomic_add_int(&umtx_pi_allocated, 1);
1297161678Sdavidxu	return (pi);
1298161678Sdavidxu}
1299161678Sdavidxu
1300161678Sdavidxustatic inline void
1301161678Sdavidxuumtx_pi_free(struct umtx_pi *pi)
1302161678Sdavidxu{
1303161678Sdavidxu	uma_zfree(umtx_pi_zone, pi);
1304161678Sdavidxu	atomic_add_int(&umtx_pi_allocated, -1);
1305161678Sdavidxu}
1306161678Sdavidxu
1307161678Sdavidxu/*
1308161678Sdavidxu * Adjust the thread's position on a pi_state after its priority has been
1309161678Sdavidxu * changed.
1310161678Sdavidxu */
1311161678Sdavidxustatic int
1312161678Sdavidxuumtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1313161678Sdavidxu{
1314161678Sdavidxu	struct umtx_q *uq, *uq1, *uq2;
1315161678Sdavidxu	struct thread *td1;
1316161678Sdavidxu
1317170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1318161678Sdavidxu	if (pi == NULL)
1319161678Sdavidxu		return (0);
1320161678Sdavidxu
1321161678Sdavidxu	uq = td->td_umtxq;
1322161678Sdavidxu
1323161678Sdavidxu	/*
1324161678Sdavidxu	 * Check if the thread needs to be moved on the blocked chain.
1325161678Sdavidxu	 * It needs to be moved if either its priority is lower than
1326161678Sdavidxu	 * the previous thread or higher than the next thread.
1327161678Sdavidxu	 */
1328161678Sdavidxu	uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1329161678Sdavidxu	uq2 = TAILQ_NEXT(uq, uq_lockq);
1330161678Sdavidxu	if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1331161678Sdavidxu	    (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1332161678Sdavidxu		/*
1333161678Sdavidxu		 * Remove thread from blocked chain and determine where
1334161678Sdavidxu		 * it should be moved to.
1335161678Sdavidxu		 */
1336161678Sdavidxu		TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1337161678Sdavidxu		TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1338161678Sdavidxu			td1 = uq1->uq_thread;
1339161678Sdavidxu			MPASS(td1->td_proc->p_magic == P_MAGIC);
1340161678Sdavidxu			if (UPRI(td1) > UPRI(td))
1341161678Sdavidxu				break;
1342161678Sdavidxu		}
1343161678Sdavidxu
1344161678Sdavidxu		if (uq1 == NULL)
1345161678Sdavidxu			TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1346161678Sdavidxu		else
1347161678Sdavidxu			TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1348161678Sdavidxu	}
1349161678Sdavidxu	return (1);
1350161678Sdavidxu}
1351161678Sdavidxu
1352161678Sdavidxu/*
1353161678Sdavidxu * Propagate priority when a thread is blocked on POSIX
1354161678Sdavidxu * PI mutex.
1355161678Sdavidxu */
1356161678Sdavidxustatic void
1357161678Sdavidxuumtx_propagate_priority(struct thread *td)
1358161678Sdavidxu{
1359161678Sdavidxu	struct umtx_q *uq;
1360161678Sdavidxu	struct umtx_pi *pi;
1361161678Sdavidxu	int pri;
1362161678Sdavidxu
1363170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1364161678Sdavidxu	pri = UPRI(td);
1365161678Sdavidxu	uq = td->td_umtxq;
1366161678Sdavidxu	pi = uq->uq_pi_blocked;
1367161678Sdavidxu	if (pi == NULL)
1368161678Sdavidxu		return;
1369161678Sdavidxu
1370161678Sdavidxu	for (;;) {
1371161678Sdavidxu		td = pi->pi_owner;
1372216313Sdavidxu		if (td == NULL || td == curthread)
1373161678Sdavidxu			return;
1374161678Sdavidxu
1375161678Sdavidxu		MPASS(td->td_proc != NULL);
1376161678Sdavidxu		MPASS(td->td_proc->p_magic == P_MAGIC);
1377161678Sdavidxu
1378170300Sjeff		thread_lock(td);
1379216313Sdavidxu		if (td->td_lend_user_pri > pri)
1380216313Sdavidxu			sched_lend_user_prio(td, pri);
1381216313Sdavidxu		else {
1382216313Sdavidxu			thread_unlock(td);
1383216313Sdavidxu			break;
1384216313Sdavidxu		}
1385170300Sjeff		thread_unlock(td);
1386161678Sdavidxu
1387161678Sdavidxu		/*
1388161678Sdavidxu		 * Pick up the lock that td is blocked on.
1389161678Sdavidxu		 */
1390161678Sdavidxu		uq = td->td_umtxq;
1391161678Sdavidxu		pi = uq->uq_pi_blocked;
1392216791Sdavidxu		if (pi == NULL)
1393216791Sdavidxu			break;
1394161678Sdavidxu		/* Resort td on the list if needed. */
1395216791Sdavidxu		umtx_pi_adjust_thread(pi, td);
1396161678Sdavidxu	}
1397161678Sdavidxu}
1398161678Sdavidxu
1399161678Sdavidxu/*
1400161678Sdavidxu * Unpropagate priority for a PI mutex when a thread blocked on
1401161678Sdavidxu * it is interrupted by signal or resumed by others.
1402161678Sdavidxu */
1403161678Sdavidxustatic void
1404216791Sdavidxuumtx_repropagate_priority(struct umtx_pi *pi)
1405161678Sdavidxu{
1406161678Sdavidxu	struct umtx_q *uq, *uq_owner;
1407161678Sdavidxu	struct umtx_pi *pi2;
1408216791Sdavidxu	int pri;
1409161678Sdavidxu
1410170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1411161678Sdavidxu
1412161678Sdavidxu	while (pi != NULL && pi->pi_owner != NULL) {
1413161678Sdavidxu		pri = PRI_MAX;
1414161678Sdavidxu		uq_owner = pi->pi_owner->td_umtxq;
1415161678Sdavidxu
1416161678Sdavidxu		TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1417161678Sdavidxu			uq = TAILQ_FIRST(&pi2->pi_blocked);
1418161678Sdavidxu			if (uq != NULL) {
1419161678Sdavidxu				if (pri > UPRI(uq->uq_thread))
1420161678Sdavidxu					pri = UPRI(uq->uq_thread);
1421161678Sdavidxu			}
1422161678Sdavidxu		}
1423161678Sdavidxu
1424161678Sdavidxu		if (pri > uq_owner->uq_inherited_pri)
1425161678Sdavidxu			pri = uq_owner->uq_inherited_pri;
1426170300Sjeff		thread_lock(pi->pi_owner);
1427216791Sdavidxu		sched_lend_user_prio(pi->pi_owner, pri);
1428170300Sjeff		thread_unlock(pi->pi_owner);
1429216791Sdavidxu		if ((pi = uq_owner->uq_pi_blocked) != NULL)
1430216791Sdavidxu			umtx_pi_adjust_thread(pi, uq_owner->uq_thread);
1431161678Sdavidxu	}
1432161678Sdavidxu}
1433161678Sdavidxu
1434161678Sdavidxu/*
1435161678Sdavidxu * Insert a PI mutex into owned list.
1436161678Sdavidxu */
1437161678Sdavidxustatic void
1438161678Sdavidxuumtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1439161678Sdavidxu{
1440161678Sdavidxu	struct umtx_q *uq_owner;
1441161678Sdavidxu
1442161678Sdavidxu	uq_owner = owner->td_umtxq;
1443170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1444161678Sdavidxu	if (pi->pi_owner != NULL)
1445161678Sdavidxu		panic("pi_ower != NULL");
1446161678Sdavidxu	pi->pi_owner = owner;
1447161678Sdavidxu	TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1448161678Sdavidxu}
1449161678Sdavidxu
1450161678Sdavidxu/*
1451161678Sdavidxu * Claim ownership of a PI mutex.
1452161678Sdavidxu */
1453161678Sdavidxustatic int
1454161678Sdavidxuumtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1455161678Sdavidxu{
1456161678Sdavidxu	struct umtx_q *uq, *uq_owner;
1457161678Sdavidxu
1458161678Sdavidxu	uq_owner = owner->td_umtxq;
1459170300Sjeff	mtx_lock_spin(&umtx_lock);
1460161678Sdavidxu	if (pi->pi_owner == owner) {
1461170300Sjeff		mtx_unlock_spin(&umtx_lock);
1462161678Sdavidxu		return (0);
1463161678Sdavidxu	}
1464161678Sdavidxu
1465161678Sdavidxu	if (pi->pi_owner != NULL) {
1466161678Sdavidxu		/*
1467161678Sdavidxu		 * userland may have already messed the mutex, sigh.
1468161678Sdavidxu		 */
1469170300Sjeff		mtx_unlock_spin(&umtx_lock);
1470161678Sdavidxu		return (EPERM);
1471161678Sdavidxu	}
1472161678Sdavidxu	umtx_pi_setowner(pi, owner);
1473161678Sdavidxu	uq = TAILQ_FIRST(&pi->pi_blocked);
1474161678Sdavidxu	if (uq != NULL) {
1475161678Sdavidxu		int pri;
1476161678Sdavidxu
1477161678Sdavidxu		pri = UPRI(uq->uq_thread);
1478170300Sjeff		thread_lock(owner);
1479161678Sdavidxu		if (pri < UPRI(owner))
1480161678Sdavidxu			sched_lend_user_prio(owner, pri);
1481170300Sjeff		thread_unlock(owner);
1482161678Sdavidxu	}
1483170300Sjeff	mtx_unlock_spin(&umtx_lock);
1484161678Sdavidxu	return (0);
1485161678Sdavidxu}
1486161678Sdavidxu
1487161678Sdavidxu/*
1488174701Sdavidxu * Adjust a thread's order position in its blocked PI mutex,
1489174701Sdavidxu * this may result new priority propagating process.
1490174701Sdavidxu */
1491174701Sdavidxuvoid
1492174701Sdavidxuumtx_pi_adjust(struct thread *td, u_char oldpri)
1493174701Sdavidxu{
1494174707Sdavidxu	struct umtx_q *uq;
1495174707Sdavidxu	struct umtx_pi *pi;
1496174707Sdavidxu
1497174707Sdavidxu	uq = td->td_umtxq;
1498174701Sdavidxu	mtx_lock_spin(&umtx_lock);
1499174707Sdavidxu	/*
1500174707Sdavidxu	 * Pick up the lock that td is blocked on.
1501174707Sdavidxu	 */
1502174707Sdavidxu	pi = uq->uq_pi_blocked;
1503216791Sdavidxu	if (pi != NULL) {
1504216791Sdavidxu		umtx_pi_adjust_thread(pi, td);
1505216791Sdavidxu		umtx_repropagate_priority(pi);
1506216791Sdavidxu	}
1507174701Sdavidxu	mtx_unlock_spin(&umtx_lock);
1508174701Sdavidxu}
1509174701Sdavidxu
1510174701Sdavidxu/*
1511161678Sdavidxu * Sleep on a PI mutex.
1512161678Sdavidxu */
1513161678Sdavidxustatic int
1514161678Sdavidxuumtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1515161678Sdavidxu	uint32_t owner, const char *wmesg, int timo)
1516161678Sdavidxu{
1517161678Sdavidxu	struct umtxq_chain *uc;
1518161678Sdavidxu	struct thread *td, *td1;
1519161678Sdavidxu	struct umtx_q *uq1;
1520161678Sdavidxu	int pri;
1521161678Sdavidxu	int error = 0;
1522161678Sdavidxu
1523161678Sdavidxu	td = uq->uq_thread;
1524161678Sdavidxu	KASSERT(td == curthread, ("inconsistent uq_thread"));
1525161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
1526161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1527189756Sdavidxu	UMTXQ_BUSY_ASSERT(uc);
1528161678Sdavidxu	umtxq_insert(uq);
1529189756Sdavidxu	mtx_lock_spin(&umtx_lock);
1530161678Sdavidxu	if (pi->pi_owner == NULL) {
1531189756Sdavidxu		mtx_unlock_spin(&umtx_lock);
1532213642Sdavidxu		/* XXX Only look up thread in current process. */
1533213642Sdavidxu		td1 = tdfind(owner, curproc->p_pid);
1534170300Sjeff		mtx_lock_spin(&umtx_lock);
1535215336Sdavidxu		if (td1 != NULL) {
1536215336Sdavidxu			if (pi->pi_owner == NULL)
1537215336Sdavidxu				umtx_pi_setowner(pi, td1);
1538215336Sdavidxu			PROC_UNLOCK(td1->td_proc);
1539161678Sdavidxu		}
1540161678Sdavidxu	}
1541161678Sdavidxu
1542161678Sdavidxu	TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1543161678Sdavidxu		pri = UPRI(uq1->uq_thread);
1544161678Sdavidxu		if (pri > UPRI(td))
1545161678Sdavidxu			break;
1546161678Sdavidxu	}
1547161678Sdavidxu
1548161678Sdavidxu	if (uq1 != NULL)
1549161678Sdavidxu		TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1550161678Sdavidxu	else
1551161678Sdavidxu		TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1552161678Sdavidxu
1553161678Sdavidxu	uq->uq_pi_blocked = pi;
1554174701Sdavidxu	thread_lock(td);
1555161678Sdavidxu	td->td_flags |= TDF_UPIBLOCKED;
1556174701Sdavidxu	thread_unlock(td);
1557161678Sdavidxu	umtx_propagate_priority(td);
1558170300Sjeff	mtx_unlock_spin(&umtx_lock);
1559189756Sdavidxu	umtxq_unbusy(&uq->uq_key);
1560161678Sdavidxu
1561161678Sdavidxu	if (uq->uq_flags & UQF_UMTXQ) {
1562161678Sdavidxu		error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
1563161678Sdavidxu		if (error == EWOULDBLOCK)
1564161678Sdavidxu			error = ETIMEDOUT;
1565161678Sdavidxu		if (uq->uq_flags & UQF_UMTXQ) {
1566161678Sdavidxu			umtxq_remove(uq);
1567161678Sdavidxu		}
1568161678Sdavidxu	}
1569170300Sjeff	mtx_lock_spin(&umtx_lock);
1570161678Sdavidxu	uq->uq_pi_blocked = NULL;
1571174701Sdavidxu	thread_lock(td);
1572161678Sdavidxu	td->td_flags &= ~TDF_UPIBLOCKED;
1573174701Sdavidxu	thread_unlock(td);
1574161678Sdavidxu	TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1575216791Sdavidxu	umtx_repropagate_priority(pi);
1576170300Sjeff	mtx_unlock_spin(&umtx_lock);
1577189756Sdavidxu	umtxq_unlock(&uq->uq_key);
1578161678Sdavidxu
1579161678Sdavidxu	return (error);
1580161678Sdavidxu}
1581161678Sdavidxu
1582161678Sdavidxu/*
1583161678Sdavidxu * Add reference count for a PI mutex.
1584161678Sdavidxu */
1585161678Sdavidxustatic void
1586161678Sdavidxuumtx_pi_ref(struct umtx_pi *pi)
1587161678Sdavidxu{
1588161678Sdavidxu	struct umtxq_chain *uc;
1589161678Sdavidxu
1590161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1591161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1592161678Sdavidxu	pi->pi_refcount++;
1593161678Sdavidxu}
1594161678Sdavidxu
1595161678Sdavidxu/*
1596161678Sdavidxu * Decrease reference count for a PI mutex, if the counter
1597161678Sdavidxu * is decreased to zero, its memory space is freed.
1598161678Sdavidxu */
1599161678Sdavidxustatic void
1600161678Sdavidxuumtx_pi_unref(struct umtx_pi *pi)
1601161678Sdavidxu{
1602161678Sdavidxu	struct umtxq_chain *uc;
1603161678Sdavidxu
1604161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1605161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1606161678Sdavidxu	KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1607161678Sdavidxu	if (--pi->pi_refcount == 0) {
1608170300Sjeff		mtx_lock_spin(&umtx_lock);
1609161678Sdavidxu		if (pi->pi_owner != NULL) {
1610161678Sdavidxu			TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
1611161678Sdavidxu				pi, pi_link);
1612161678Sdavidxu			pi->pi_owner = NULL;
1613161678Sdavidxu		}
1614161678Sdavidxu		KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1615161678Sdavidxu			("blocked queue not empty"));
1616170300Sjeff		mtx_unlock_spin(&umtx_lock);
1617161678Sdavidxu		TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1618189756Sdavidxu		umtx_pi_free(pi);
1619161678Sdavidxu	}
1620161678Sdavidxu}
1621161678Sdavidxu
1622161678Sdavidxu/*
1623161678Sdavidxu * Find a PI mutex in hash table.
1624161678Sdavidxu */
1625161678Sdavidxustatic struct umtx_pi *
1626161678Sdavidxuumtx_pi_lookup(struct umtx_key *key)
1627161678Sdavidxu{
1628161678Sdavidxu	struct umtxq_chain *uc;
1629161678Sdavidxu	struct umtx_pi *pi;
1630161678Sdavidxu
1631161678Sdavidxu	uc = umtxq_getchain(key);
1632161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1633161678Sdavidxu
1634161678Sdavidxu	TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1635161678Sdavidxu		if (umtx_key_match(&pi->pi_key, key)) {
1636161678Sdavidxu			return (pi);
1637161678Sdavidxu		}
1638161678Sdavidxu	}
1639161678Sdavidxu	return (NULL);
1640161678Sdavidxu}
1641161678Sdavidxu
1642161678Sdavidxu/*
1643161678Sdavidxu * Insert a PI mutex into hash table.
1644161678Sdavidxu */
1645161678Sdavidxustatic inline void
1646161678Sdavidxuumtx_pi_insert(struct umtx_pi *pi)
1647161678Sdavidxu{
1648161678Sdavidxu	struct umtxq_chain *uc;
1649161678Sdavidxu
1650161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1651161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1652161678Sdavidxu	TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1653161678Sdavidxu}
1654161678Sdavidxu
1655161678Sdavidxu/*
1656161678Sdavidxu * Lock a PI mutex.
1657161678Sdavidxu */
1658161678Sdavidxustatic int
1659161678Sdavidxu_do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1660161678Sdavidxu	int try)
1661161678Sdavidxu{
1662161678Sdavidxu	struct umtx_q *uq;
1663161678Sdavidxu	struct umtx_pi *pi, *new_pi;
1664161678Sdavidxu	uint32_t id, owner, old;
1665161678Sdavidxu	int error;
1666161678Sdavidxu
1667161678Sdavidxu	id = td->td_tid;
1668161678Sdavidxu	uq = td->td_umtxq;
1669161678Sdavidxu
1670161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1671161678Sdavidxu	    &uq->uq_key)) != 0)
1672161678Sdavidxu		return (error);
1673163697Sdavidxu	umtxq_lock(&uq->uq_key);
1674163697Sdavidxu	pi = umtx_pi_lookup(&uq->uq_key);
1675163697Sdavidxu	if (pi == NULL) {
1676163697Sdavidxu		new_pi = umtx_pi_alloc(M_NOWAIT);
1677163697Sdavidxu		if (new_pi == NULL) {
1678161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1679163697Sdavidxu			new_pi = umtx_pi_alloc(M_WAITOK);
1680161678Sdavidxu			umtxq_lock(&uq->uq_key);
1681161678Sdavidxu			pi = umtx_pi_lookup(&uq->uq_key);
1682163697Sdavidxu			if (pi != NULL) {
1683161678Sdavidxu				umtx_pi_free(new_pi);
1684163697Sdavidxu				new_pi = NULL;
1685161678Sdavidxu			}
1686161678Sdavidxu		}
1687163697Sdavidxu		if (new_pi != NULL) {
1688163697Sdavidxu			new_pi->pi_key = uq->uq_key;
1689163697Sdavidxu			umtx_pi_insert(new_pi);
1690163697Sdavidxu			pi = new_pi;
1691163697Sdavidxu		}
1692163697Sdavidxu	}
1693163697Sdavidxu	umtx_pi_ref(pi);
1694163697Sdavidxu	umtxq_unlock(&uq->uq_key);
1695161678Sdavidxu
1696163697Sdavidxu	/*
1697163697Sdavidxu	 * Care must be exercised when dealing with umtx structure.  It
1698163697Sdavidxu	 * can fault on any access.
1699163697Sdavidxu	 */
1700163697Sdavidxu	for (;;) {
1701161678Sdavidxu		/*
1702161678Sdavidxu		 * Try the uncontested case.  This should be done in userland.
1703161678Sdavidxu		 */
1704161678Sdavidxu		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1705161678Sdavidxu
1706161678Sdavidxu		/* The acquire succeeded. */
1707161678Sdavidxu		if (owner == UMUTEX_UNOWNED) {
1708161678Sdavidxu			error = 0;
1709161678Sdavidxu			break;
1710161678Sdavidxu		}
1711161678Sdavidxu
1712161678Sdavidxu		/* The address was invalid. */
1713161678Sdavidxu		if (owner == -1) {
1714161678Sdavidxu			error = EFAULT;
1715161678Sdavidxu			break;
1716161678Sdavidxu		}
1717161678Sdavidxu
1718161678Sdavidxu		/* If no one owns it but it is contested try to acquire it. */
1719161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
1720161678Sdavidxu			owner = casuword32(&m->m_owner,
1721161678Sdavidxu			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1722161678Sdavidxu
1723161678Sdavidxu			if (owner == UMUTEX_CONTESTED) {
1724161678Sdavidxu				umtxq_lock(&uq->uq_key);
1725189756Sdavidxu				umtxq_busy(&uq->uq_key);
1726161678Sdavidxu				error = umtx_pi_claim(pi, td);
1727189756Sdavidxu				umtxq_unbusy(&uq->uq_key);
1728161678Sdavidxu				umtxq_unlock(&uq->uq_key);
1729161678Sdavidxu				break;
1730161678Sdavidxu			}
1731161678Sdavidxu
1732161678Sdavidxu			/* The address was invalid. */
1733161678Sdavidxu			if (owner == -1) {
1734161678Sdavidxu				error = EFAULT;
1735161678Sdavidxu				break;
1736161678Sdavidxu			}
1737161678Sdavidxu
1738161678Sdavidxu			/* If this failed the lock has changed, restart. */
1739161678Sdavidxu			continue;
1740161678Sdavidxu		}
1741161678Sdavidxu
1742161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1743161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id) {
1744161678Sdavidxu			error = EDEADLK;
1745161678Sdavidxu			break;
1746161678Sdavidxu		}
1747161678Sdavidxu
1748161678Sdavidxu		if (try != 0) {
1749161678Sdavidxu			error = EBUSY;
1750161678Sdavidxu			break;
1751161678Sdavidxu		}
1752161678Sdavidxu
1753161678Sdavidxu		/*
1754161678Sdavidxu		 * If we caught a signal, we have retried and now
1755161678Sdavidxu		 * exit immediately.
1756161678Sdavidxu		 */
1757161678Sdavidxu		if (error != 0)
1758161678Sdavidxu			break;
1759161678Sdavidxu
1760161678Sdavidxu		umtxq_lock(&uq->uq_key);
1761161678Sdavidxu		umtxq_busy(&uq->uq_key);
1762161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1763161678Sdavidxu
1764161678Sdavidxu		/*
1765161678Sdavidxu		 * Set the contested bit so that a release in user space
1766161678Sdavidxu		 * knows to use the system call for unlock.  If this fails
1767161678Sdavidxu		 * either some one else has acquired the lock or it has been
1768161678Sdavidxu		 * released.
1769161678Sdavidxu		 */
1770161678Sdavidxu		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1771161678Sdavidxu
1772161678Sdavidxu		/* The address was invalid. */
1773161678Sdavidxu		if (old == -1) {
1774161678Sdavidxu			umtxq_lock(&uq->uq_key);
1775161678Sdavidxu			umtxq_unbusy(&uq->uq_key);
1776161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1777161678Sdavidxu			error = EFAULT;
1778161678Sdavidxu			break;
1779161678Sdavidxu		}
1780161678Sdavidxu
1781161678Sdavidxu		umtxq_lock(&uq->uq_key);
1782161678Sdavidxu		/*
1783161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
1784161678Sdavidxu		 * and we need to retry or we lost a race to the thread
1785161678Sdavidxu		 * unlocking the umtx.
1786161678Sdavidxu		 */
1787161678Sdavidxu		if (old == owner)
1788161678Sdavidxu			error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
1789161678Sdavidxu				 "umtxpi", timo);
1790189756Sdavidxu		else {
1791189756Sdavidxu			umtxq_unbusy(&uq->uq_key);
1792189756Sdavidxu			umtxq_unlock(&uq->uq_key);
1793189756Sdavidxu		}
1794161678Sdavidxu	}
1795161678Sdavidxu
1796163697Sdavidxu	umtxq_lock(&uq->uq_key);
1797163697Sdavidxu	umtx_pi_unref(pi);
1798163697Sdavidxu	umtxq_unlock(&uq->uq_key);
1799161678Sdavidxu
1800161678Sdavidxu	umtx_key_release(&uq->uq_key);
1801161678Sdavidxu	return (error);
1802161678Sdavidxu}
1803161678Sdavidxu
1804161678Sdavidxu/*
1805161678Sdavidxu * Unlock a PI mutex.
1806161678Sdavidxu */
1807161678Sdavidxustatic int
1808161678Sdavidxudo_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
1809161678Sdavidxu{
1810161678Sdavidxu	struct umtx_key key;
1811161678Sdavidxu	struct umtx_q *uq_first, *uq_first2, *uq_me;
1812161678Sdavidxu	struct umtx_pi *pi, *pi2;
1813161678Sdavidxu	uint32_t owner, old, id;
1814161678Sdavidxu	int error;
1815161678Sdavidxu	int count;
1816161678Sdavidxu	int pri;
1817161678Sdavidxu
1818161678Sdavidxu	id = td->td_tid;
1819161678Sdavidxu	/*
1820161678Sdavidxu	 * Make sure we own this mtx.
1821161678Sdavidxu	 */
1822163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1823161678Sdavidxu	if (owner == -1)
1824161678Sdavidxu		return (EFAULT);
1825161678Sdavidxu
1826161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
1827161678Sdavidxu		return (EPERM);
1828161678Sdavidxu
1829161678Sdavidxu	/* This should be done in userland */
1830161678Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
1831161678Sdavidxu		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1832161678Sdavidxu		if (old == -1)
1833161678Sdavidxu			return (EFAULT);
1834161678Sdavidxu		if (old == owner)
1835161678Sdavidxu			return (0);
1836161855Sdavidxu		owner = old;
1837161678Sdavidxu	}
1838161678Sdavidxu
1839161678Sdavidxu	/* We should only ever be in here for contested locks */
1840161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1841161678Sdavidxu	    &key)) != 0)
1842161678Sdavidxu		return (error);
1843161678Sdavidxu
1844161678Sdavidxu	umtxq_lock(&key);
1845161678Sdavidxu	umtxq_busy(&key);
1846161678Sdavidxu	count = umtxq_count_pi(&key, &uq_first);
1847161678Sdavidxu	if (uq_first != NULL) {
1848189756Sdavidxu		mtx_lock_spin(&umtx_lock);
1849161678Sdavidxu		pi = uq_first->uq_pi_blocked;
1850189756Sdavidxu		KASSERT(pi != NULL, ("pi == NULL?"));
1851161678Sdavidxu		if (pi->pi_owner != curthread) {
1852189756Sdavidxu			mtx_unlock_spin(&umtx_lock);
1853161678Sdavidxu			umtxq_unbusy(&key);
1854161678Sdavidxu			umtxq_unlock(&key);
1855189756Sdavidxu			umtx_key_release(&key);
1856161678Sdavidxu			/* userland messed the mutex */
1857161678Sdavidxu			return (EPERM);
1858161678Sdavidxu		}
1859161678Sdavidxu		uq_me = curthread->td_umtxq;
1860161678Sdavidxu		pi->pi_owner = NULL;
1861161678Sdavidxu		TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
1862189756Sdavidxu		/* get highest priority thread which is still sleeping. */
1863161678Sdavidxu		uq_first = TAILQ_FIRST(&pi->pi_blocked);
1864189756Sdavidxu		while (uq_first != NULL &&
1865189756Sdavidxu		       (uq_first->uq_flags & UQF_UMTXQ) == 0) {
1866189756Sdavidxu			uq_first = TAILQ_NEXT(uq_first, uq_lockq);
1867189756Sdavidxu		}
1868161678Sdavidxu		pri = PRI_MAX;
1869161678Sdavidxu		TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
1870161678Sdavidxu			uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
1871161678Sdavidxu			if (uq_first2 != NULL) {
1872161678Sdavidxu				if (pri > UPRI(uq_first2->uq_thread))
1873161678Sdavidxu					pri = UPRI(uq_first2->uq_thread);
1874161678Sdavidxu			}
1875161678Sdavidxu		}
1876170300Sjeff		thread_lock(curthread);
1877216791Sdavidxu		sched_lend_user_prio(curthread, pri);
1878170300Sjeff		thread_unlock(curthread);
1879170300Sjeff		mtx_unlock_spin(&umtx_lock);
1880189756Sdavidxu		if (uq_first)
1881189756Sdavidxu			umtxq_signal_thread(uq_first);
1882161678Sdavidxu	}
1883161678Sdavidxu	umtxq_unlock(&key);
1884161678Sdavidxu
1885161678Sdavidxu	/*
1886161678Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
1887161678Sdavidxu	 * there is zero or one thread only waiting for it.
1888161678Sdavidxu	 * Otherwise, it must be marked as contested.
1889161678Sdavidxu	 */
1890161678Sdavidxu	old = casuword32(&m->m_owner, owner,
1891161678Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1892161678Sdavidxu
1893161678Sdavidxu	umtxq_lock(&key);
1894161678Sdavidxu	umtxq_unbusy(&key);
1895161678Sdavidxu	umtxq_unlock(&key);
1896161678Sdavidxu	umtx_key_release(&key);
1897161678Sdavidxu	if (old == -1)
1898161678Sdavidxu		return (EFAULT);
1899161678Sdavidxu	if (old != owner)
1900161678Sdavidxu		return (EINVAL);
1901161678Sdavidxu	return (0);
1902161678Sdavidxu}
1903161678Sdavidxu
1904161678Sdavidxu/*
1905161678Sdavidxu * Lock a PP mutex.
1906161678Sdavidxu */
1907161678Sdavidxustatic int
1908161678Sdavidxu_do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1909161678Sdavidxu	int try)
1910161678Sdavidxu{
1911161678Sdavidxu	struct umtx_q *uq, *uq2;
1912161678Sdavidxu	struct umtx_pi *pi;
1913161678Sdavidxu	uint32_t ceiling;
1914161678Sdavidxu	uint32_t owner, id;
1915161678Sdavidxu	int error, pri, old_inherited_pri, su;
1916161678Sdavidxu
1917161678Sdavidxu	id = td->td_tid;
1918161678Sdavidxu	uq = td->td_umtxq;
1919161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1920161678Sdavidxu	    &uq->uq_key)) != 0)
1921161678Sdavidxu		return (error);
1922164033Srwatson	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
1923161678Sdavidxu	for (;;) {
1924161678Sdavidxu		old_inherited_pri = uq->uq_inherited_pri;
1925161678Sdavidxu		umtxq_lock(&uq->uq_key);
1926161678Sdavidxu		umtxq_busy(&uq->uq_key);
1927161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1928161678Sdavidxu
1929161678Sdavidxu		ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
1930161678Sdavidxu		if (ceiling > RTP_PRIO_MAX) {
1931161678Sdavidxu			error = EINVAL;
1932161678Sdavidxu			goto out;
1933161678Sdavidxu		}
1934161678Sdavidxu
1935170300Sjeff		mtx_lock_spin(&umtx_lock);
1936161678Sdavidxu		if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
1937170300Sjeff			mtx_unlock_spin(&umtx_lock);
1938161678Sdavidxu			error = EINVAL;
1939161678Sdavidxu			goto out;
1940161678Sdavidxu		}
1941161678Sdavidxu		if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
1942161678Sdavidxu			uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
1943170300Sjeff			thread_lock(td);
1944161678Sdavidxu			if (uq->uq_inherited_pri < UPRI(td))
1945161678Sdavidxu				sched_lend_user_prio(td, uq->uq_inherited_pri);
1946170300Sjeff			thread_unlock(td);
1947161678Sdavidxu		}
1948170300Sjeff		mtx_unlock_spin(&umtx_lock);
1949161678Sdavidxu
1950161678Sdavidxu		owner = casuword32(&m->m_owner,
1951161678Sdavidxu		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1952161678Sdavidxu
1953161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
1954161678Sdavidxu			error = 0;
1955161678Sdavidxu			break;
1956161678Sdavidxu		}
1957161678Sdavidxu
1958161678Sdavidxu		/* The address was invalid. */
1959161678Sdavidxu		if (owner == -1) {
1960161678Sdavidxu			error = EFAULT;
1961161678Sdavidxu			break;
1962161678Sdavidxu		}
1963161678Sdavidxu
1964161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1965161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id) {
1966161678Sdavidxu			error = EDEADLK;
1967161678Sdavidxu			break;
1968161678Sdavidxu		}
1969161678Sdavidxu
1970161678Sdavidxu		if (try != 0) {
1971161678Sdavidxu			error = EBUSY;
1972161678Sdavidxu			break;
1973161678Sdavidxu		}
1974161678Sdavidxu
1975161678Sdavidxu		/*
1976161678Sdavidxu		 * If we caught a signal, we have retried and now
1977161678Sdavidxu		 * exit immediately.
1978161678Sdavidxu		 */
1979161678Sdavidxu		if (error != 0)
1980161678Sdavidxu			break;
1981161678Sdavidxu
1982161678Sdavidxu		umtxq_lock(&uq->uq_key);
1983161678Sdavidxu		umtxq_insert(uq);
1984161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
1985161678Sdavidxu		error = umtxq_sleep(uq, "umtxpp", timo);
1986161678Sdavidxu		umtxq_remove(uq);
1987161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1988161678Sdavidxu
1989170300Sjeff		mtx_lock_spin(&umtx_lock);
1990161678Sdavidxu		uq->uq_inherited_pri = old_inherited_pri;
1991161678Sdavidxu		pri = PRI_MAX;
1992161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1993161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
1994161678Sdavidxu			if (uq2 != NULL) {
1995161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
1996161678Sdavidxu					pri = UPRI(uq2->uq_thread);
1997161678Sdavidxu			}
1998161678Sdavidxu		}
1999161678Sdavidxu		if (pri > uq->uq_inherited_pri)
2000161678Sdavidxu			pri = uq->uq_inherited_pri;
2001170300Sjeff		thread_lock(td);
2002216791Sdavidxu		sched_lend_user_prio(td, pri);
2003170300Sjeff		thread_unlock(td);
2004170300Sjeff		mtx_unlock_spin(&umtx_lock);
2005161678Sdavidxu	}
2006161678Sdavidxu
2007161678Sdavidxu	if (error != 0) {
2008170300Sjeff		mtx_lock_spin(&umtx_lock);
2009161678Sdavidxu		uq->uq_inherited_pri = old_inherited_pri;
2010161678Sdavidxu		pri = PRI_MAX;
2011161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2012161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2013161678Sdavidxu			if (uq2 != NULL) {
2014161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
2015161678Sdavidxu					pri = UPRI(uq2->uq_thread);
2016161678Sdavidxu			}
2017161678Sdavidxu		}
2018161678Sdavidxu		if (pri > uq->uq_inherited_pri)
2019161678Sdavidxu			pri = uq->uq_inherited_pri;
2020170300Sjeff		thread_lock(td);
2021216791Sdavidxu		sched_lend_user_prio(td, pri);
2022170300Sjeff		thread_unlock(td);
2023170300Sjeff		mtx_unlock_spin(&umtx_lock);
2024161678Sdavidxu	}
2025161678Sdavidxu
2026161678Sdavidxuout:
2027161678Sdavidxu	umtxq_lock(&uq->uq_key);
2028161678Sdavidxu	umtxq_unbusy(&uq->uq_key);
2029161678Sdavidxu	umtxq_unlock(&uq->uq_key);
2030161678Sdavidxu	umtx_key_release(&uq->uq_key);
2031161678Sdavidxu	return (error);
2032161678Sdavidxu}
2033161678Sdavidxu
2034161678Sdavidxu/*
2035161678Sdavidxu * Unlock a PP mutex.
2036161678Sdavidxu */
2037161678Sdavidxustatic int
2038161678Sdavidxudo_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
2039161678Sdavidxu{
2040161678Sdavidxu	struct umtx_key key;
2041161678Sdavidxu	struct umtx_q *uq, *uq2;
2042161678Sdavidxu	struct umtx_pi *pi;
2043161678Sdavidxu	uint32_t owner, id;
2044161678Sdavidxu	uint32_t rceiling;
2045161926Sdavidxu	int error, pri, new_inherited_pri, su;
2046161678Sdavidxu
2047161678Sdavidxu	id = td->td_tid;
2048161678Sdavidxu	uq = td->td_umtxq;
2049164033Srwatson	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2050161678Sdavidxu
2051161678Sdavidxu	/*
2052161678Sdavidxu	 * Make sure we own this mtx.
2053161678Sdavidxu	 */
2054163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
2055161678Sdavidxu	if (owner == -1)
2056161678Sdavidxu		return (EFAULT);
2057161678Sdavidxu
2058161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
2059161678Sdavidxu		return (EPERM);
2060161678Sdavidxu
2061161678Sdavidxu	error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
2062161678Sdavidxu	if (error != 0)
2063161678Sdavidxu		return (error);
2064161678Sdavidxu
2065161678Sdavidxu	if (rceiling == -1)
2066161678Sdavidxu		new_inherited_pri = PRI_MAX;
2067161678Sdavidxu	else {
2068161678Sdavidxu		rceiling = RTP_PRIO_MAX - rceiling;
2069161678Sdavidxu		if (rceiling > RTP_PRIO_MAX)
2070161678Sdavidxu			return (EINVAL);
2071161678Sdavidxu		new_inherited_pri = PRI_MIN_REALTIME + rceiling;
2072161678Sdavidxu	}
2073161678Sdavidxu
2074161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2075161678Sdavidxu	    &key)) != 0)
2076161678Sdavidxu		return (error);
2077161678Sdavidxu	umtxq_lock(&key);
2078161678Sdavidxu	umtxq_busy(&key);
2079161678Sdavidxu	umtxq_unlock(&key);
2080161678Sdavidxu	/*
2081161678Sdavidxu	 * For priority protected mutex, always set unlocked state
2082161678Sdavidxu	 * to UMUTEX_CONTESTED, so that userland always enters kernel
2083161678Sdavidxu	 * to lock the mutex, it is necessary because thread priority
2084161678Sdavidxu	 * has to be adjusted for such mutex.
2085161678Sdavidxu	 */
2086163449Sdavidxu	error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2087163449Sdavidxu		UMUTEX_CONTESTED);
2088161678Sdavidxu
2089161678Sdavidxu	umtxq_lock(&key);
2090161678Sdavidxu	if (error == 0)
2091161678Sdavidxu		umtxq_signal(&key, 1);
2092161678Sdavidxu	umtxq_unbusy(&key);
2093161678Sdavidxu	umtxq_unlock(&key);
2094161678Sdavidxu
2095161678Sdavidxu	if (error == -1)
2096161678Sdavidxu		error = EFAULT;
2097161678Sdavidxu	else {
2098170300Sjeff		mtx_lock_spin(&umtx_lock);
2099161926Sdavidxu		if (su != 0)
2100161926Sdavidxu			uq->uq_inherited_pri = new_inherited_pri;
2101161678Sdavidxu		pri = PRI_MAX;
2102161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2103161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2104161678Sdavidxu			if (uq2 != NULL) {
2105161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
2106161678Sdavidxu					pri = UPRI(uq2->uq_thread);
2107161678Sdavidxu			}
2108161678Sdavidxu		}
2109161678Sdavidxu		if (pri > uq->uq_inherited_pri)
2110161678Sdavidxu			pri = uq->uq_inherited_pri;
2111170300Sjeff		thread_lock(td);
2112216791Sdavidxu		sched_lend_user_prio(td, pri);
2113170300Sjeff		thread_unlock(td);
2114170300Sjeff		mtx_unlock_spin(&umtx_lock);
2115161678Sdavidxu	}
2116161678Sdavidxu	umtx_key_release(&key);
2117161678Sdavidxu	return (error);
2118161678Sdavidxu}
2119161678Sdavidxu
2120161678Sdavidxustatic int
2121161678Sdavidxudo_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2122161678Sdavidxu	uint32_t *old_ceiling)
2123161678Sdavidxu{
2124161678Sdavidxu	struct umtx_q *uq;
2125161678Sdavidxu	uint32_t save_ceiling;
2126161678Sdavidxu	uint32_t owner, id;
2127161678Sdavidxu	uint32_t flags;
2128161678Sdavidxu	int error;
2129161678Sdavidxu
2130161678Sdavidxu	flags = fuword32(&m->m_flags);
2131161678Sdavidxu	if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2132161678Sdavidxu		return (EINVAL);
2133161678Sdavidxu	if (ceiling > RTP_PRIO_MAX)
2134161678Sdavidxu		return (EINVAL);
2135161678Sdavidxu	id = td->td_tid;
2136161678Sdavidxu	uq = td->td_umtxq;
2137161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2138161678Sdavidxu	   &uq->uq_key)) != 0)
2139161678Sdavidxu		return (error);
2140161678Sdavidxu	for (;;) {
2141161678Sdavidxu		umtxq_lock(&uq->uq_key);
2142161678Sdavidxu		umtxq_busy(&uq->uq_key);
2143161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2144161678Sdavidxu
2145161678Sdavidxu		save_ceiling = fuword32(&m->m_ceilings[0]);
2146161678Sdavidxu
2147161678Sdavidxu		owner = casuword32(&m->m_owner,
2148161678Sdavidxu		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2149161678Sdavidxu
2150161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
2151161678Sdavidxu			suword32(&m->m_ceilings[0], ceiling);
2152163449Sdavidxu			suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2153163449Sdavidxu				UMUTEX_CONTESTED);
2154161678Sdavidxu			error = 0;
2155161678Sdavidxu			break;
2156161678Sdavidxu		}
2157161678Sdavidxu
2158161678Sdavidxu		/* The address was invalid. */
2159161678Sdavidxu		if (owner == -1) {
2160161678Sdavidxu			error = EFAULT;
2161161678Sdavidxu			break;
2162161678Sdavidxu		}
2163161678Sdavidxu
2164161678Sdavidxu		if ((owner & ~UMUTEX_CONTESTED) == id) {
2165161678Sdavidxu			suword32(&m->m_ceilings[0], ceiling);
2166161678Sdavidxu			error = 0;
2167161678Sdavidxu			break;
2168161678Sdavidxu		}
2169161678Sdavidxu
2170161678Sdavidxu		/*
2171161678Sdavidxu		 * If we caught a signal, we have retried and now
2172161678Sdavidxu		 * exit immediately.
2173161678Sdavidxu		 */
2174161678Sdavidxu		if (error != 0)
2175161678Sdavidxu			break;
2176161678Sdavidxu
2177161678Sdavidxu		/*
2178161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
2179161678Sdavidxu		 * and we need to retry or we lost a race to the thread
2180161678Sdavidxu		 * unlocking the umtx.
2181161678Sdavidxu		 */
2182161678Sdavidxu		umtxq_lock(&uq->uq_key);
2183161678Sdavidxu		umtxq_insert(uq);
2184161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
2185161678Sdavidxu		error = umtxq_sleep(uq, "umtxpp", 0);
2186161678Sdavidxu		umtxq_remove(uq);
2187161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2188161678Sdavidxu	}
2189161678Sdavidxu	umtxq_lock(&uq->uq_key);
2190161678Sdavidxu	if (error == 0)
2191161678Sdavidxu		umtxq_signal(&uq->uq_key, INT_MAX);
2192161678Sdavidxu	umtxq_unbusy(&uq->uq_key);
2193161678Sdavidxu	umtxq_unlock(&uq->uq_key);
2194161678Sdavidxu	umtx_key_release(&uq->uq_key);
2195161678Sdavidxu	if (error == 0 && old_ceiling != NULL)
2196161678Sdavidxu		suword32(old_ceiling, save_ceiling);
2197161678Sdavidxu	return (error);
2198161678Sdavidxu}
2199161678Sdavidxu
2200162030Sdavidxustatic int
2201162030Sdavidxu_do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
2202179970Sdavidxu	int mode)
2203162030Sdavidxu{
2204162030Sdavidxu	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2205162030Sdavidxu	case 0:
2206179970Sdavidxu		return (_do_lock_normal(td, m, flags, timo, mode));
2207162030Sdavidxu	case UMUTEX_PRIO_INHERIT:
2208179970Sdavidxu		return (_do_lock_pi(td, m, flags, timo, mode));
2209162030Sdavidxu	case UMUTEX_PRIO_PROTECT:
2210179970Sdavidxu		return (_do_lock_pp(td, m, flags, timo, mode));
2211162030Sdavidxu	}
2212162030Sdavidxu	return (EINVAL);
2213162030Sdavidxu}
2214162030Sdavidxu
2215161678Sdavidxu/*
2216161678Sdavidxu * Lock a userland POSIX mutex.
2217161678Sdavidxu */
2218161678Sdavidxustatic int
2219162030Sdavidxudo_lock_umutex(struct thread *td, struct umutex *m,
2220232144Sdavidxu	struct _umtx_time *timeout, int mode)
2221161678Sdavidxu{
2222232144Sdavidxu	struct timespec cts, ets, tts;
2223161678Sdavidxu	uint32_t flags;
2224162030Sdavidxu	int error;
2225161678Sdavidxu
2226161678Sdavidxu	flags = fuword32(&m->m_flags);
2227161678Sdavidxu	if (flags == -1)
2228161678Sdavidxu		return (EFAULT);
2229161678Sdavidxu
2230162030Sdavidxu	if (timeout == NULL) {
2231179970Sdavidxu		error = _do_lock_umutex(td, m, flags, 0, mode);
2232162030Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
2233179970Sdavidxu		if (error == EINTR && mode != _UMUTEX_WAIT)
2234162030Sdavidxu			error = ERESTART;
2235162030Sdavidxu	} else {
2236232144Sdavidxu		kern_clock_gettime(td, timeout->_clockid, &cts);
2237232144Sdavidxu		if ((timeout->_flags & UMTX_ABSTIME) == 0) {
2238232144Sdavidxu			ets = cts;
2239232144Sdavidxu			timespecadd(&ets, &timeout->_timeout);
2240232144Sdavidxu			tts = timeout->_timeout;
2241232144Sdavidxu		} else {
2242232144Sdavidxu			ets = timeout->_timeout;
2243232144Sdavidxu			tts = timeout->_timeout;
2244232144Sdavidxu			timespecsub(&tts, &cts);
2245232144Sdavidxu		}
2246162030Sdavidxu		for (;;) {
2247232144Sdavidxu			error = _do_lock_umutex(td, m, flags, tstohz(&tts), mode);
2248162030Sdavidxu			if (error != ETIMEDOUT)
2249162030Sdavidxu				break;
2250232144Sdavidxu			kern_clock_gettime(td, timeout->_clockid, &cts);
2251232144Sdavidxu			if (timespeccmp(&cts, &ets, >=))
2252162030Sdavidxu				break;
2253232144Sdavidxu			tts = ets;
2254232144Sdavidxu			timespecsub(&tts, &cts);
2255162030Sdavidxu		}
2256162030Sdavidxu		/* Timed-locking is not restarted. */
2257162030Sdavidxu		if (error == ERESTART)
2258162030Sdavidxu			error = EINTR;
2259161742Sdavidxu	}
2260162030Sdavidxu	return (error);
2261161678Sdavidxu}
2262161678Sdavidxu
2263161678Sdavidxu/*
2264161678Sdavidxu * Unlock a userland POSIX mutex.
2265161678Sdavidxu */
2266161678Sdavidxustatic int
2267161678Sdavidxudo_unlock_umutex(struct thread *td, struct umutex *m)
2268161678Sdavidxu{
2269161678Sdavidxu	uint32_t flags;
2270161678Sdavidxu
2271161678Sdavidxu	flags = fuword32(&m->m_flags);
2272161678Sdavidxu	if (flags == -1)
2273161678Sdavidxu		return (EFAULT);
2274161678Sdavidxu
2275161855Sdavidxu	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2276161855Sdavidxu	case 0:
2277161855Sdavidxu		return (do_unlock_normal(td, m, flags));
2278161855Sdavidxu	case UMUTEX_PRIO_INHERIT:
2279161855Sdavidxu		return (do_unlock_pi(td, m, flags));
2280161855Sdavidxu	case UMUTEX_PRIO_PROTECT:
2281161855Sdavidxu		return (do_unlock_pp(td, m, flags));
2282161855Sdavidxu	}
2283161678Sdavidxu
2284161855Sdavidxu	return (EINVAL);
2285161678Sdavidxu}
2286161678Sdavidxu
2287164839Sdavidxustatic int
2288164839Sdavidxudo_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2289164876Sdavidxu	struct timespec *timeout, u_long wflags)
2290164839Sdavidxu{
2291164839Sdavidxu	struct umtx_q *uq;
2292164839Sdavidxu	struct timeval tv;
2293164839Sdavidxu	struct timespec cts, ets, tts;
2294164839Sdavidxu	uint32_t flags;
2295216641Sdavidxu	uint32_t clockid;
2296164839Sdavidxu	int error;
2297164839Sdavidxu
2298164839Sdavidxu	uq = td->td_umtxq;
2299164839Sdavidxu	flags = fuword32(&cv->c_flags);
2300164839Sdavidxu	error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2301164839Sdavidxu	if (error != 0)
2302164839Sdavidxu		return (error);
2303216641Sdavidxu
2304216641Sdavidxu	if ((wflags & CVWAIT_CLOCKID) != 0) {
2305216641Sdavidxu		clockid = fuword32(&cv->c_clockid);
2306216641Sdavidxu		if (clockid < CLOCK_REALTIME ||
2307216641Sdavidxu		    clockid >= CLOCK_THREAD_CPUTIME_ID) {
2308216641Sdavidxu			/* hmm, only HW clock id will work. */
2309216641Sdavidxu			return (EINVAL);
2310216641Sdavidxu		}
2311216641Sdavidxu	} else {
2312216641Sdavidxu		clockid = CLOCK_REALTIME;
2313216641Sdavidxu	}
2314216641Sdavidxu
2315164839Sdavidxu	umtxq_lock(&uq->uq_key);
2316164839Sdavidxu	umtxq_busy(&uq->uq_key);
2317164839Sdavidxu	umtxq_insert(uq);
2318164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2319164839Sdavidxu
2320164839Sdavidxu	/*
2321216641Sdavidxu	 * Set c_has_waiters to 1 before releasing user mutex, also
2322216641Sdavidxu	 * don't modify cache line when unnecessary.
2323164839Sdavidxu	 */
2324216641Sdavidxu	if (fuword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters)) == 0)
2325216641Sdavidxu		suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
2326164839Sdavidxu
2327164839Sdavidxu	umtxq_lock(&uq->uq_key);
2328164839Sdavidxu	umtxq_unbusy(&uq->uq_key);
2329164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2330164839Sdavidxu
2331164839Sdavidxu	error = do_unlock_umutex(td, m);
2332164839Sdavidxu
2333164839Sdavidxu	umtxq_lock(&uq->uq_key);
2334164839Sdavidxu	if (error == 0) {
2335216641Sdavidxu		if (timeout == NULL) {
2336164839Sdavidxu			error = umtxq_sleep(uq, "ucond", 0);
2337164839Sdavidxu		} else {
2338216641Sdavidxu			if ((wflags & CVWAIT_ABSTIME) == 0) {
2339216641Sdavidxu				kern_clock_gettime(td, clockid, &ets);
2340216641Sdavidxu				timespecadd(&ets, timeout);
2341216641Sdavidxu				tts = *timeout;
2342216641Sdavidxu			} else { /* absolute time */
2343216641Sdavidxu				ets = *timeout;
2344216641Sdavidxu				tts = *timeout;
2345216641Sdavidxu				kern_clock_gettime(td, clockid, &cts);
2346216641Sdavidxu				timespecsub(&tts, &cts);
2347216641Sdavidxu			}
2348216641Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &tts);
2349164839Sdavidxu			for (;;) {
2350164839Sdavidxu				error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
2351164839Sdavidxu				if (error != ETIMEDOUT)
2352164839Sdavidxu					break;
2353216641Sdavidxu				kern_clock_gettime(td, clockid, &cts);
2354164839Sdavidxu				if (timespeccmp(&cts, &ets, >=)) {
2355164839Sdavidxu					error = ETIMEDOUT;
2356164839Sdavidxu					break;
2357164839Sdavidxu				}
2358164839Sdavidxu				tts = ets;
2359164839Sdavidxu				timespecsub(&tts, &cts);
2360164839Sdavidxu				TIMESPEC_TO_TIMEVAL(&tv, &tts);
2361164839Sdavidxu			}
2362164839Sdavidxu		}
2363164839Sdavidxu	}
2364164839Sdavidxu
2365211794Sdavidxu	if ((uq->uq_flags & UQF_UMTXQ) == 0)
2366211794Sdavidxu		error = 0;
2367211794Sdavidxu	else {
2368216641Sdavidxu		/*
2369216641Sdavidxu		 * This must be timeout,interrupted by signal or
2370216641Sdavidxu		 * surprious wakeup, clear c_has_waiter flag when
2371216641Sdavidxu		 * necessary.
2372216641Sdavidxu		 */
2373216641Sdavidxu		umtxq_busy(&uq->uq_key);
2374216641Sdavidxu		if ((uq->uq_flags & UQF_UMTXQ) != 0) {
2375216641Sdavidxu			int oldlen = uq->uq_cur_queue->length;
2376216641Sdavidxu			umtxq_remove(uq);
2377216641Sdavidxu			if (oldlen == 1) {
2378216641Sdavidxu				umtxq_unlock(&uq->uq_key);
2379216641Sdavidxu				suword32(
2380216641Sdavidxu				    __DEVOLATILE(uint32_t *,
2381216641Sdavidxu					 &cv->c_has_waiters), 0);
2382216641Sdavidxu				umtxq_lock(&uq->uq_key);
2383216641Sdavidxu			}
2384216641Sdavidxu		}
2385216641Sdavidxu		umtxq_unbusy(&uq->uq_key);
2386164839Sdavidxu		if (error == ERESTART)
2387164839Sdavidxu			error = EINTR;
2388164839Sdavidxu	}
2389211794Sdavidxu
2390164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2391164839Sdavidxu	umtx_key_release(&uq->uq_key);
2392164839Sdavidxu	return (error);
2393164839Sdavidxu}
2394164839Sdavidxu
2395164839Sdavidxu/*
2396164839Sdavidxu * Signal a userland condition variable.
2397164839Sdavidxu */
2398164839Sdavidxustatic int
2399164839Sdavidxudo_cv_signal(struct thread *td, struct ucond *cv)
2400164839Sdavidxu{
2401164839Sdavidxu	struct umtx_key key;
2402164839Sdavidxu	int error, cnt, nwake;
2403164839Sdavidxu	uint32_t flags;
2404164839Sdavidxu
2405164839Sdavidxu	flags = fuword32(&cv->c_flags);
2406164839Sdavidxu	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2407164839Sdavidxu		return (error);
2408164839Sdavidxu	umtxq_lock(&key);
2409164839Sdavidxu	umtxq_busy(&key);
2410164839Sdavidxu	cnt = umtxq_count(&key);
2411164839Sdavidxu	nwake = umtxq_signal(&key, 1);
2412164839Sdavidxu	if (cnt <= nwake) {
2413164839Sdavidxu		umtxq_unlock(&key);
2414164839Sdavidxu		error = suword32(
2415164839Sdavidxu		    __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2416164839Sdavidxu		umtxq_lock(&key);
2417164839Sdavidxu	}
2418164839Sdavidxu	umtxq_unbusy(&key);
2419164839Sdavidxu	umtxq_unlock(&key);
2420164839Sdavidxu	umtx_key_release(&key);
2421164839Sdavidxu	return (error);
2422164839Sdavidxu}
2423164839Sdavidxu
2424164839Sdavidxustatic int
2425164839Sdavidxudo_cv_broadcast(struct thread *td, struct ucond *cv)
2426164839Sdavidxu{
2427164839Sdavidxu	struct umtx_key key;
2428164839Sdavidxu	int error;
2429164839Sdavidxu	uint32_t flags;
2430164839Sdavidxu
2431164839Sdavidxu	flags = fuword32(&cv->c_flags);
2432164839Sdavidxu	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2433164839Sdavidxu		return (error);
2434164839Sdavidxu
2435164839Sdavidxu	umtxq_lock(&key);
2436164839Sdavidxu	umtxq_busy(&key);
2437164839Sdavidxu	umtxq_signal(&key, INT_MAX);
2438164839Sdavidxu	umtxq_unlock(&key);
2439164839Sdavidxu
2440164839Sdavidxu	error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2441164839Sdavidxu
2442164839Sdavidxu	umtxq_lock(&key);
2443164839Sdavidxu	umtxq_unbusy(&key);
2444164839Sdavidxu	umtxq_unlock(&key);
2445164839Sdavidxu
2446164839Sdavidxu	umtx_key_release(&key);
2447164839Sdavidxu	return (error);
2448164839Sdavidxu}
2449164839Sdavidxu
2450177848Sdavidxustatic int
2451177848Sdavidxudo_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, int timo)
2452177848Sdavidxu{
2453177848Sdavidxu	struct umtx_q *uq;
2454177848Sdavidxu	uint32_t flags, wrflags;
2455177848Sdavidxu	int32_t state, oldstate;
2456177848Sdavidxu	int32_t blocked_readers;
2457177848Sdavidxu	int error;
2458177848Sdavidxu
2459177848Sdavidxu	uq = td->td_umtxq;
2460177848Sdavidxu	flags = fuword32(&rwlock->rw_flags);
2461177848Sdavidxu	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2462177848Sdavidxu	if (error != 0)
2463177848Sdavidxu		return (error);
2464177848Sdavidxu
2465177848Sdavidxu	wrflags = URWLOCK_WRITE_OWNER;
2466177848Sdavidxu	if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
2467177848Sdavidxu		wrflags |= URWLOCK_WRITE_WAITERS;
2468177848Sdavidxu
2469177848Sdavidxu	for (;;) {
2470177848Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2471177848Sdavidxu		/* try to lock it */
2472177848Sdavidxu		while (!(state & wrflags)) {
2473177848Sdavidxu			if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
2474177848Sdavidxu				umtx_key_release(&uq->uq_key);
2475177848Sdavidxu				return (EAGAIN);
2476177848Sdavidxu			}
2477177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state + 1);
2478177848Sdavidxu			if (oldstate == state) {
2479177848Sdavidxu				umtx_key_release(&uq->uq_key);
2480177848Sdavidxu				return (0);
2481177848Sdavidxu			}
2482177848Sdavidxu			state = oldstate;
2483177848Sdavidxu		}
2484177848Sdavidxu
2485177848Sdavidxu		if (error)
2486177848Sdavidxu			break;
2487177848Sdavidxu
2488177848Sdavidxu		/* grab monitor lock */
2489177848Sdavidxu		umtxq_lock(&uq->uq_key);
2490177848Sdavidxu		umtxq_busy(&uq->uq_key);
2491177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2492177848Sdavidxu
2493203414Sdavidxu		/*
2494203414Sdavidxu		 * re-read the state, in case it changed between the try-lock above
2495203414Sdavidxu		 * and the check below
2496203414Sdavidxu		 */
2497203414Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2498203414Sdavidxu
2499177848Sdavidxu		/* set read contention bit */
2500177848Sdavidxu		while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) {
2501177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS);
2502177848Sdavidxu			if (oldstate == state)
2503177848Sdavidxu				goto sleep;
2504177848Sdavidxu			state = oldstate;
2505177848Sdavidxu		}
2506177848Sdavidxu
2507177848Sdavidxu		/* state is changed while setting flags, restart */
2508177848Sdavidxu		if (!(state & wrflags)) {
2509177848Sdavidxu			umtxq_lock(&uq->uq_key);
2510177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2511177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2512177848Sdavidxu			continue;
2513177848Sdavidxu		}
2514177848Sdavidxu
2515177848Sdavidxusleep:
2516177848Sdavidxu		/* contention bit is set, before sleeping, increase read waiter count */
2517177848Sdavidxu		blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2518177848Sdavidxu		suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
2519177848Sdavidxu
2520177848Sdavidxu		while (state & wrflags) {
2521177848Sdavidxu			umtxq_lock(&uq->uq_key);
2522177848Sdavidxu			umtxq_insert(uq);
2523177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2524177848Sdavidxu
2525177848Sdavidxu			error = umtxq_sleep(uq, "urdlck", timo);
2526177848Sdavidxu
2527177848Sdavidxu			umtxq_busy(&uq->uq_key);
2528177848Sdavidxu			umtxq_remove(uq);
2529177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2530177848Sdavidxu			if (error)
2531177848Sdavidxu				break;
2532177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2533177848Sdavidxu		}
2534177848Sdavidxu
2535177848Sdavidxu		/* decrease read waiter count, and may clear read contention bit */
2536177848Sdavidxu		blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2537177848Sdavidxu		suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
2538177848Sdavidxu		if (blocked_readers == 1) {
2539177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2540177848Sdavidxu			for (;;) {
2541177848Sdavidxu				oldstate = casuword32(&rwlock->rw_state, state,
2542177848Sdavidxu					 state & ~URWLOCK_READ_WAITERS);
2543177848Sdavidxu				if (oldstate == state)
2544177848Sdavidxu					break;
2545177848Sdavidxu				state = oldstate;
2546177848Sdavidxu			}
2547177848Sdavidxu		}
2548177848Sdavidxu
2549177848Sdavidxu		umtxq_lock(&uq->uq_key);
2550177848Sdavidxu		umtxq_unbusy(&uq->uq_key);
2551177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2552177848Sdavidxu	}
2553177848Sdavidxu	umtx_key_release(&uq->uq_key);
2554177848Sdavidxu	return (error);
2555177848Sdavidxu}
2556177848Sdavidxu
2557177848Sdavidxustatic int
2558177848Sdavidxudo_rw_rdlock2(struct thread *td, void *obj, long val, struct timespec *timeout)
2559177848Sdavidxu{
2560177848Sdavidxu	struct timespec ts, ts2, ts3;
2561177848Sdavidxu	struct timeval tv;
2562177848Sdavidxu	int error;
2563177848Sdavidxu
2564177848Sdavidxu	getnanouptime(&ts);
2565177848Sdavidxu	timespecadd(&ts, timeout);
2566177848Sdavidxu	TIMESPEC_TO_TIMEVAL(&tv, timeout);
2567177848Sdavidxu	for (;;) {
2568177848Sdavidxu		error = do_rw_rdlock(td, obj, val, tvtohz(&tv));
2569177848Sdavidxu		if (error != ETIMEDOUT)
2570177848Sdavidxu			break;
2571177848Sdavidxu		getnanouptime(&ts2);
2572177848Sdavidxu		if (timespeccmp(&ts2, &ts, >=)) {
2573177848Sdavidxu			error = ETIMEDOUT;
2574177848Sdavidxu			break;
2575177848Sdavidxu		}
2576177848Sdavidxu		ts3 = ts;
2577177848Sdavidxu		timespecsub(&ts3, &ts2);
2578177848Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2579177848Sdavidxu	}
2580177849Sdavidxu	if (error == ERESTART)
2581177849Sdavidxu		error = EINTR;
2582177848Sdavidxu	return (error);
2583177848Sdavidxu}
2584177848Sdavidxu
2585177848Sdavidxustatic int
2586177848Sdavidxudo_rw_wrlock(struct thread *td, struct urwlock *rwlock, int timo)
2587177848Sdavidxu{
2588177848Sdavidxu	struct umtx_q *uq;
2589177848Sdavidxu	uint32_t flags;
2590177848Sdavidxu	int32_t state, oldstate;
2591177848Sdavidxu	int32_t blocked_writers;
2592197476Sdavidxu	int32_t blocked_readers;
2593177848Sdavidxu	int error;
2594177848Sdavidxu
2595177848Sdavidxu	uq = td->td_umtxq;
2596177848Sdavidxu	flags = fuword32(&rwlock->rw_flags);
2597177848Sdavidxu	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2598177848Sdavidxu	if (error != 0)
2599177848Sdavidxu		return (error);
2600177848Sdavidxu
2601197476Sdavidxu	blocked_readers = 0;
2602177848Sdavidxu	for (;;) {
2603177848Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2604177848Sdavidxu		while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2605177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER);
2606177848Sdavidxu			if (oldstate == state) {
2607177848Sdavidxu				umtx_key_release(&uq->uq_key);
2608177848Sdavidxu				return (0);
2609177848Sdavidxu			}
2610177848Sdavidxu			state = oldstate;
2611177848Sdavidxu		}
2612177848Sdavidxu
2613197476Sdavidxu		if (error) {
2614197476Sdavidxu			if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) &&
2615197476Sdavidxu			    blocked_readers != 0) {
2616197476Sdavidxu				umtxq_lock(&uq->uq_key);
2617197476Sdavidxu				umtxq_busy(&uq->uq_key);
2618197476Sdavidxu				umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE);
2619197476Sdavidxu				umtxq_unbusy(&uq->uq_key);
2620197476Sdavidxu				umtxq_unlock(&uq->uq_key);
2621197476Sdavidxu			}
2622197476Sdavidxu
2623177848Sdavidxu			break;
2624197476Sdavidxu		}
2625177848Sdavidxu
2626177848Sdavidxu		/* grab monitor lock */
2627177848Sdavidxu		umtxq_lock(&uq->uq_key);
2628177848Sdavidxu		umtxq_busy(&uq->uq_key);
2629177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2630177848Sdavidxu
2631203414Sdavidxu		/*
2632203414Sdavidxu		 * re-read the state, in case it changed between the try-lock above
2633203414Sdavidxu		 * and the check below
2634203414Sdavidxu		 */
2635203414Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2636203414Sdavidxu
2637177848Sdavidxu		while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) &&
2638177848Sdavidxu		       (state & URWLOCK_WRITE_WAITERS) == 0) {
2639177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS);
2640177848Sdavidxu			if (oldstate == state)
2641177848Sdavidxu				goto sleep;
2642177848Sdavidxu			state = oldstate;
2643177848Sdavidxu		}
2644177848Sdavidxu
2645177848Sdavidxu		if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2646177848Sdavidxu			umtxq_lock(&uq->uq_key);
2647177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2648177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2649177848Sdavidxu			continue;
2650177848Sdavidxu		}
2651177848Sdavidxusleep:
2652177848Sdavidxu		blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2653177848Sdavidxu		suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
2654177848Sdavidxu
2655177848Sdavidxu		while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
2656177848Sdavidxu			umtxq_lock(&uq->uq_key);
2657177848Sdavidxu			umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2658177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2659177848Sdavidxu
2660177848Sdavidxu			error = umtxq_sleep(uq, "uwrlck", timo);
2661177848Sdavidxu
2662177848Sdavidxu			umtxq_busy(&uq->uq_key);
2663177848Sdavidxu			umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2664177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2665177848Sdavidxu			if (error)
2666177848Sdavidxu				break;
2667177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2668177848Sdavidxu		}
2669177848Sdavidxu
2670177848Sdavidxu		blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2671177848Sdavidxu		suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
2672177848Sdavidxu		if (blocked_writers == 1) {
2673177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2674177848Sdavidxu			for (;;) {
2675177848Sdavidxu				oldstate = casuword32(&rwlock->rw_state, state,
2676177848Sdavidxu					 state & ~URWLOCK_WRITE_WAITERS);
2677177848Sdavidxu				if (oldstate == state)
2678177848Sdavidxu					break;
2679177848Sdavidxu				state = oldstate;
2680177848Sdavidxu			}
2681197476Sdavidxu			blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2682197476Sdavidxu		} else
2683197476Sdavidxu			blocked_readers = 0;
2684177848Sdavidxu
2685177848Sdavidxu		umtxq_lock(&uq->uq_key);
2686177848Sdavidxu		umtxq_unbusy(&uq->uq_key);
2687177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2688177848Sdavidxu	}
2689177848Sdavidxu
2690177848Sdavidxu	umtx_key_release(&uq->uq_key);
2691177848Sdavidxu	return (error);
2692177848Sdavidxu}
2693177848Sdavidxu
2694177848Sdavidxustatic int
2695177848Sdavidxudo_rw_wrlock2(struct thread *td, void *obj, struct timespec *timeout)
2696177848Sdavidxu{
2697177848Sdavidxu	struct timespec ts, ts2, ts3;
2698177848Sdavidxu	struct timeval tv;
2699177848Sdavidxu	int error;
2700177848Sdavidxu
2701177848Sdavidxu	getnanouptime(&ts);
2702177848Sdavidxu	timespecadd(&ts, timeout);
2703177848Sdavidxu	TIMESPEC_TO_TIMEVAL(&tv, timeout);
2704177848Sdavidxu	for (;;) {
2705177848Sdavidxu		error = do_rw_wrlock(td, obj, tvtohz(&tv));
2706177848Sdavidxu		if (error != ETIMEDOUT)
2707177848Sdavidxu			break;
2708177848Sdavidxu		getnanouptime(&ts2);
2709177848Sdavidxu		if (timespeccmp(&ts2, &ts, >=)) {
2710177848Sdavidxu			error = ETIMEDOUT;
2711177848Sdavidxu			break;
2712177848Sdavidxu		}
2713177848Sdavidxu		ts3 = ts;
2714177848Sdavidxu		timespecsub(&ts3, &ts2);
2715177848Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2716177848Sdavidxu	}
2717177849Sdavidxu	if (error == ERESTART)
2718177849Sdavidxu		error = EINTR;
2719177848Sdavidxu	return (error);
2720177848Sdavidxu}
2721177848Sdavidxu
2722177848Sdavidxustatic int
2723177880Sdavidxudo_rw_unlock(struct thread *td, struct urwlock *rwlock)
2724177848Sdavidxu{
2725177848Sdavidxu	struct umtx_q *uq;
2726177848Sdavidxu	uint32_t flags;
2727177848Sdavidxu	int32_t state, oldstate;
2728177848Sdavidxu	int error, q, count;
2729177848Sdavidxu
2730177848Sdavidxu	uq = td->td_umtxq;
2731177848Sdavidxu	flags = fuword32(&rwlock->rw_flags);
2732177848Sdavidxu	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2733177848Sdavidxu	if (error != 0)
2734177848Sdavidxu		return (error);
2735177848Sdavidxu
2736177848Sdavidxu	state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2737177848Sdavidxu	if (state & URWLOCK_WRITE_OWNER) {
2738177848Sdavidxu		for (;;) {
2739177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state,
2740177848Sdavidxu				state & ~URWLOCK_WRITE_OWNER);
2741177848Sdavidxu			if (oldstate != state) {
2742177848Sdavidxu				state = oldstate;
2743177848Sdavidxu				if (!(oldstate & URWLOCK_WRITE_OWNER)) {
2744177848Sdavidxu					error = EPERM;
2745177848Sdavidxu					goto out;
2746177848Sdavidxu				}
2747177848Sdavidxu			} else
2748177848Sdavidxu				break;
2749177848Sdavidxu		}
2750177848Sdavidxu	} else if (URWLOCK_READER_COUNT(state) != 0) {
2751177848Sdavidxu		for (;;) {
2752177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state,
2753177848Sdavidxu				state - 1);
2754177848Sdavidxu			if (oldstate != state) {
2755177848Sdavidxu				state = oldstate;
2756177848Sdavidxu				if (URWLOCK_READER_COUNT(oldstate) == 0) {
2757177848Sdavidxu					error = EPERM;
2758177848Sdavidxu					goto out;
2759177848Sdavidxu				}
2760177848Sdavidxu			}
2761177848Sdavidxu			else
2762177848Sdavidxu				break;
2763177848Sdavidxu		}
2764177848Sdavidxu	} else {
2765177848Sdavidxu		error = EPERM;
2766177848Sdavidxu		goto out;
2767177848Sdavidxu	}
2768177848Sdavidxu
2769177848Sdavidxu	count = 0;
2770177848Sdavidxu
2771177848Sdavidxu	if (!(flags & URWLOCK_PREFER_READER)) {
2772177848Sdavidxu		if (state & URWLOCK_WRITE_WAITERS) {
2773177848Sdavidxu			count = 1;
2774177848Sdavidxu			q = UMTX_EXCLUSIVE_QUEUE;
2775177848Sdavidxu		} else if (state & URWLOCK_READ_WAITERS) {
2776177848Sdavidxu			count = INT_MAX;
2777177848Sdavidxu			q = UMTX_SHARED_QUEUE;
2778177848Sdavidxu		}
2779177848Sdavidxu	} else {
2780177848Sdavidxu		if (state & URWLOCK_READ_WAITERS) {
2781177848Sdavidxu			count = INT_MAX;
2782177848Sdavidxu			q = UMTX_SHARED_QUEUE;
2783177848Sdavidxu		} else if (state & URWLOCK_WRITE_WAITERS) {
2784177848Sdavidxu			count = 1;
2785177848Sdavidxu			q = UMTX_EXCLUSIVE_QUEUE;
2786177848Sdavidxu		}
2787177848Sdavidxu	}
2788177848Sdavidxu
2789177848Sdavidxu	if (count) {
2790177848Sdavidxu		umtxq_lock(&uq->uq_key);
2791177848Sdavidxu		umtxq_busy(&uq->uq_key);
2792177848Sdavidxu		umtxq_signal_queue(&uq->uq_key, count, q);
2793177848Sdavidxu		umtxq_unbusy(&uq->uq_key);
2794177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2795177848Sdavidxu	}
2796177848Sdavidxuout:
2797177848Sdavidxu	umtx_key_release(&uq->uq_key);
2798177848Sdavidxu	return (error);
2799177848Sdavidxu}
2800177848Sdavidxu
2801201472Sdavidxustatic int
2802232144Sdavidxudo_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout)
2803201472Sdavidxu{
2804201472Sdavidxu	struct umtx_q *uq;
2805201472Sdavidxu	struct timespec cts, ets, tts;
2806201472Sdavidxu	uint32_t flags, count;
2807201472Sdavidxu	int error;
2808201472Sdavidxu
2809201472Sdavidxu	uq = td->td_umtxq;
2810201472Sdavidxu	flags = fuword32(&sem->_flags);
2811201885Sdavidxu	error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
2812201472Sdavidxu	if (error != 0)
2813201472Sdavidxu		return (error);
2814201472Sdavidxu	umtxq_lock(&uq->uq_key);
2815201472Sdavidxu	umtxq_busy(&uq->uq_key);
2816201472Sdavidxu	umtxq_insert(uq);
2817201472Sdavidxu	umtxq_unlock(&uq->uq_key);
2818201472Sdavidxu
2819230194Sdavidxu	casuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters), 0, 1);
2820230194Sdavidxu	rmb();
2821201472Sdavidxu	count = fuword32(__DEVOLATILE(uint32_t *, &sem->_count));
2822201472Sdavidxu	if (count != 0) {
2823201472Sdavidxu		umtxq_lock(&uq->uq_key);
2824201472Sdavidxu		umtxq_unbusy(&uq->uq_key);
2825201472Sdavidxu		umtxq_remove(uq);
2826201472Sdavidxu		umtxq_unlock(&uq->uq_key);
2827201472Sdavidxu		umtx_key_release(&uq->uq_key);
2828201472Sdavidxu		return (0);
2829201472Sdavidxu	}
2830201472Sdavidxu
2831201472Sdavidxu	umtxq_lock(&uq->uq_key);
2832201472Sdavidxu	umtxq_unbusy(&uq->uq_key);
2833201472Sdavidxu
2834201472Sdavidxu	if (timeout == NULL) {
2835201472Sdavidxu		error = umtxq_sleep(uq, "usem", 0);
2836201472Sdavidxu	} else {
2837232144Sdavidxu		umtxq_unlock(&uq->uq_key);
2838232144Sdavidxu		kern_clock_gettime(td, timeout->_clockid, &cts);
2839232144Sdavidxu		if ((timeout->_flags & UMTX_ABSTIME) == 0) {
2840232144Sdavidxu			ets = cts;
2841232144Sdavidxu			timespecadd(&ets, &timeout->_timeout);
2842232144Sdavidxu		} else {
2843232144Sdavidxu			ets = timeout->_timeout;
2844232144Sdavidxu		}
2845232144Sdavidxu		umtxq_lock(&uq->uq_key);
2846201472Sdavidxu		for (;;) {
2847201472Sdavidxu			if (timespeccmp(&cts, &ets, >=)) {
2848201472Sdavidxu				error = ETIMEDOUT;
2849201472Sdavidxu				break;
2850201472Sdavidxu			}
2851201472Sdavidxu			tts = ets;
2852201472Sdavidxu			timespecsub(&tts, &cts);
2853232144Sdavidxu			error = umtxq_sleep(uq, "usem", tstohz(&tts));
2854232144Sdavidxu			if (error != ETIMEDOUT)
2855232144Sdavidxu				break;
2856232144Sdavidxu			umtxq_unlock(&uq->uq_key);
2857232144Sdavidxu			kern_clock_gettime(td, timeout->_clockid, &cts);
2858232144Sdavidxu			umtxq_lock(&uq->uq_key);
2859201472Sdavidxu		}
2860201472Sdavidxu	}
2861201472Sdavidxu
2862211794Sdavidxu	if ((uq->uq_flags & UQF_UMTXQ) == 0)
2863211794Sdavidxu		error = 0;
2864211794Sdavidxu	else {
2865211794Sdavidxu		umtxq_remove(uq);
2866201472Sdavidxu		if (error == ERESTART)
2867201472Sdavidxu			error = EINTR;
2868201472Sdavidxu	}
2869201472Sdavidxu	umtxq_unlock(&uq->uq_key);
2870201472Sdavidxu	umtx_key_release(&uq->uq_key);
2871201472Sdavidxu	return (error);
2872201472Sdavidxu}
2873201472Sdavidxu
2874201472Sdavidxu/*
2875201472Sdavidxu * Signal a userland condition variable.
2876201472Sdavidxu */
2877201472Sdavidxustatic int
2878201472Sdavidxudo_sem_wake(struct thread *td, struct _usem *sem)
2879201472Sdavidxu{
2880201472Sdavidxu	struct umtx_key key;
2881201472Sdavidxu	int error, cnt, nwake;
2882201472Sdavidxu	uint32_t flags;
2883201472Sdavidxu
2884201472Sdavidxu	flags = fuword32(&sem->_flags);
2885201885Sdavidxu	if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
2886201472Sdavidxu		return (error);
2887201472Sdavidxu	umtxq_lock(&key);
2888201472Sdavidxu	umtxq_busy(&key);
2889201472Sdavidxu	cnt = umtxq_count(&key);
2890201472Sdavidxu	nwake = umtxq_signal(&key, 1);
2891201472Sdavidxu	if (cnt <= nwake) {
2892201472Sdavidxu		umtxq_unlock(&key);
2893201472Sdavidxu		error = suword32(
2894201472Sdavidxu		    __DEVOLATILE(uint32_t *, &sem->_has_waiters), 0);
2895201472Sdavidxu		umtxq_lock(&key);
2896201472Sdavidxu	}
2897201472Sdavidxu	umtxq_unbusy(&key);
2898201472Sdavidxu	umtxq_unlock(&key);
2899201472Sdavidxu	umtx_key_release(&key);
2900201472Sdavidxu	return (error);
2901201472Sdavidxu}
2902201472Sdavidxu
2903139013Sdavidxuint
2904225617Skmacysys__umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
2905139013Sdavidxu    /* struct umtx *umtx */
2906139013Sdavidxu{
2907162536Sdavidxu	return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
2908139013Sdavidxu}
2909139013Sdavidxu
2910139013Sdavidxuint
2911225617Skmacysys__umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
2912139013Sdavidxu    /* struct umtx *umtx */
2913139013Sdavidxu{
2914162536Sdavidxu	return do_unlock_umtx(td, uap->umtx, td->td_tid);
2915139013Sdavidxu}
2916139013Sdavidxu
2917228219Sphoinline int
2918228219Sphoumtx_copyin_timeout(const void *addr, struct timespec *tsp)
2919228219Spho{
2920228219Spho	int error;
2921228219Spho
2922228219Spho	error = copyin(addr, tsp, sizeof(struct timespec));
2923228219Spho	if (error == 0) {
2924228219Spho		if (tsp->tv_sec < 0 ||
2925228219Spho		    tsp->tv_nsec >= 1000000000 ||
2926228219Spho		    tsp->tv_nsec < 0)
2927228219Spho			error = EINVAL;
2928228219Spho	}
2929228219Spho	return (error);
2930228219Spho}
2931228219Spho
2932232144Sdavidxustatic inline int
2933232144Sdavidxuumtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp)
2934232144Sdavidxu{
2935232144Sdavidxu	int error;
2936232144Sdavidxu
2937232144Sdavidxu	tp->_clockid = CLOCK_REALTIME;
2938232144Sdavidxu	tp->_flags   = 0;
2939232144Sdavidxu	if (size <= sizeof(struct timespec))
2940232144Sdavidxu		error = copyin(addr, &tp->_timeout, sizeof(struct timespec));
2941232144Sdavidxu	else
2942232144Sdavidxu		error = copyin(addr, tp, sizeof(struct _umtx_time));
2943232144Sdavidxu	if (error != 0)
2944232144Sdavidxu		return (error);
2945232144Sdavidxu	if (tp->_timeout.tv_sec < 0 ||
2946232144Sdavidxu	    tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0)
2947232144Sdavidxu		return (EINVAL);
2948232144Sdavidxu	return (0);
2949232144Sdavidxu}
2950232144Sdavidxu
2951162536Sdavidxustatic int
2952162536Sdavidxu__umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
2953139013Sdavidxu{
2954162536Sdavidxu	struct timespec *ts, timeout;
2955139013Sdavidxu	int error;
2956139013Sdavidxu
2957162536Sdavidxu	/* Allow a null timespec (wait forever). */
2958162536Sdavidxu	if (uap->uaddr2 == NULL)
2959162536Sdavidxu		ts = NULL;
2960162536Sdavidxu	else {
2961228219Spho		error = umtx_copyin_timeout(uap->uaddr2, &timeout);
2962162536Sdavidxu		if (error != 0)
2963162536Sdavidxu			return (error);
2964162536Sdavidxu		ts = &timeout;
2965162536Sdavidxu	}
2966162536Sdavidxu	return (do_lock_umtx(td, uap->obj, uap->val, ts));
2967162536Sdavidxu}
2968162536Sdavidxu
2969162536Sdavidxustatic int
2970162536Sdavidxu__umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
2971162536Sdavidxu{
2972162536Sdavidxu	return (do_unlock_umtx(td, uap->obj, uap->val));
2973162536Sdavidxu}
2974162536Sdavidxu
2975162536Sdavidxustatic int
2976162536Sdavidxu__umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
2977162536Sdavidxu{
2978232144Sdavidxu	struct _umtx_time timeout, *tm_p;
2979162536Sdavidxu	int error;
2980162536Sdavidxu
2981162536Sdavidxu	if (uap->uaddr2 == NULL)
2982232144Sdavidxu		tm_p = NULL;
2983162536Sdavidxu	else {
2984232144Sdavidxu		error = umtx_copyin_umtx_time(
2985232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
2986162536Sdavidxu		if (error != 0)
2987162536Sdavidxu			return (error);
2988232144Sdavidxu		tm_p = &timeout;
2989162536Sdavidxu	}
2990232144Sdavidxu	return do_wait(td, uap->obj, uap->val, tm_p, 0, 0);
2991162536Sdavidxu}
2992162536Sdavidxu
2993162536Sdavidxustatic int
2994173800Sdavidxu__umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
2995173800Sdavidxu{
2996232144Sdavidxu	struct _umtx_time timeout, *tm_p;
2997173800Sdavidxu	int error;
2998173800Sdavidxu
2999173800Sdavidxu	if (uap->uaddr2 == NULL)
3000232144Sdavidxu		tm_p = NULL;
3001173800Sdavidxu	else {
3002232144Sdavidxu		error = umtx_copyin_umtx_time(
3003232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3004173800Sdavidxu		if (error != 0)
3005173800Sdavidxu			return (error);
3006232144Sdavidxu		tm_p = &timeout;
3007173800Sdavidxu	}
3008232144Sdavidxu	return do_wait(td, uap->obj, uap->val, tm_p, 1, 0);
3009173800Sdavidxu}
3010173800Sdavidxu
3011173800Sdavidxustatic int
3012178646Sdavidxu__umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap)
3013178646Sdavidxu{
3014232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3015178646Sdavidxu	int error;
3016178646Sdavidxu
3017178646Sdavidxu	if (uap->uaddr2 == NULL)
3018232144Sdavidxu		tm_p = NULL;
3019178646Sdavidxu	else {
3020232144Sdavidxu		error = umtx_copyin_umtx_time(
3021232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3022178646Sdavidxu		if (error != 0)
3023178646Sdavidxu			return (error);
3024232144Sdavidxu		tm_p = &timeout;
3025178646Sdavidxu	}
3026232144Sdavidxu	return do_wait(td, uap->obj, uap->val, tm_p, 1, 1);
3027178646Sdavidxu}
3028178646Sdavidxu
3029178646Sdavidxustatic int
3030162536Sdavidxu__umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
3031162536Sdavidxu{
3032178646Sdavidxu	return (kern_umtx_wake(td, uap->obj, uap->val, 0));
3033162536Sdavidxu}
3034162536Sdavidxu
3035216641Sdavidxu#define BATCH_SIZE	128
3036162536Sdavidxustatic int
3037216641Sdavidxu__umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap)
3038216641Sdavidxu{
3039216641Sdavidxu	int count = uap->val;
3040216641Sdavidxu	void *uaddrs[BATCH_SIZE];
3041216641Sdavidxu	char **upp = (char **)uap->obj;
3042216641Sdavidxu	int tocopy;
3043216641Sdavidxu	int error = 0;
3044216641Sdavidxu	int i, pos = 0;
3045216641Sdavidxu
3046216641Sdavidxu	while (count > 0) {
3047216641Sdavidxu		tocopy = count;
3048216641Sdavidxu		if (tocopy > BATCH_SIZE)
3049216641Sdavidxu			tocopy = BATCH_SIZE;
3050216641Sdavidxu		error = copyin(upp+pos, uaddrs, tocopy * sizeof(char *));
3051216641Sdavidxu		if (error != 0)
3052216641Sdavidxu			break;
3053216641Sdavidxu		for (i = 0; i < tocopy; ++i)
3054216641Sdavidxu			kern_umtx_wake(td, uaddrs[i], INT_MAX, 1);
3055216641Sdavidxu		count -= tocopy;
3056216641Sdavidxu		pos += tocopy;
3057216641Sdavidxu	}
3058216641Sdavidxu	return (error);
3059216641Sdavidxu}
3060216641Sdavidxu
3061216641Sdavidxustatic int
3062178646Sdavidxu__umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap)
3063178646Sdavidxu{
3064178646Sdavidxu	return (kern_umtx_wake(td, uap->obj, uap->val, 1));
3065178646Sdavidxu}
3066178646Sdavidxu
3067178646Sdavidxustatic int
3068162536Sdavidxu__umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
3069162536Sdavidxu{
3070232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3071162536Sdavidxu	int error;
3072162536Sdavidxu
3073162536Sdavidxu	/* Allow a null timespec (wait forever). */
3074162536Sdavidxu	if (uap->uaddr2 == NULL)
3075232144Sdavidxu		tm_p = NULL;
3076162536Sdavidxu	else {
3077232144Sdavidxu		error = umtx_copyin_umtx_time(
3078232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3079162536Sdavidxu		if (error != 0)
3080162536Sdavidxu			return (error);
3081232144Sdavidxu		tm_p = &timeout;
3082139013Sdavidxu	}
3083232144Sdavidxu	return do_lock_umutex(td, uap->obj, tm_p, 0);
3084162536Sdavidxu}
3085162536Sdavidxu
3086162536Sdavidxustatic int
3087162536Sdavidxu__umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
3088162536Sdavidxu{
3089179970Sdavidxu	return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY);
3090162536Sdavidxu}
3091162536Sdavidxu
3092162536Sdavidxustatic int
3093179970Sdavidxu__umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
3094179970Sdavidxu{
3095232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3096179970Sdavidxu	int error;
3097179970Sdavidxu
3098179970Sdavidxu	/* Allow a null timespec (wait forever). */
3099179970Sdavidxu	if (uap->uaddr2 == NULL)
3100232144Sdavidxu		tm_p = NULL;
3101179970Sdavidxu	else {
3102232144Sdavidxu		error = umtx_copyin_umtx_time(
3103232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3104179970Sdavidxu		if (error != 0)
3105179970Sdavidxu			return (error);
3106232144Sdavidxu		tm_p = &timeout;
3107179970Sdavidxu	}
3108232144Sdavidxu	return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT);
3109179970Sdavidxu}
3110179970Sdavidxu
3111179970Sdavidxustatic int
3112179970Sdavidxu__umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap)
3113179970Sdavidxu{
3114179970Sdavidxu	return do_wake_umutex(td, uap->obj);
3115179970Sdavidxu}
3116179970Sdavidxu
3117179970Sdavidxustatic int
3118162536Sdavidxu__umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
3119162536Sdavidxu{
3120162536Sdavidxu	return do_unlock_umutex(td, uap->obj);
3121162536Sdavidxu}
3122162536Sdavidxu
3123162536Sdavidxustatic int
3124162536Sdavidxu__umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
3125162536Sdavidxu{
3126162536Sdavidxu	return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
3127162536Sdavidxu}
3128162536Sdavidxu
3129164839Sdavidxustatic int
3130164839Sdavidxu__umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
3131164839Sdavidxu{
3132164839Sdavidxu	struct timespec *ts, timeout;
3133164839Sdavidxu	int error;
3134164839Sdavidxu
3135164839Sdavidxu	/* Allow a null timespec (wait forever). */
3136164839Sdavidxu	if (uap->uaddr2 == NULL)
3137164839Sdavidxu		ts = NULL;
3138164839Sdavidxu	else {
3139228219Spho		error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3140164839Sdavidxu		if (error != 0)
3141164839Sdavidxu			return (error);
3142164839Sdavidxu		ts = &timeout;
3143164839Sdavidxu	}
3144164876Sdavidxu	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3145164839Sdavidxu}
3146164839Sdavidxu
3147164839Sdavidxustatic int
3148164839Sdavidxu__umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
3149164839Sdavidxu{
3150164839Sdavidxu	return do_cv_signal(td, uap->obj);
3151164839Sdavidxu}
3152164839Sdavidxu
3153164839Sdavidxustatic int
3154164839Sdavidxu__umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
3155164839Sdavidxu{
3156164839Sdavidxu	return do_cv_broadcast(td, uap->obj);
3157164839Sdavidxu}
3158164839Sdavidxu
3159177848Sdavidxustatic int
3160177848Sdavidxu__umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
3161177848Sdavidxu{
3162177848Sdavidxu	struct timespec timeout;
3163177848Sdavidxu	int error;
3164177848Sdavidxu
3165177848Sdavidxu	/* Allow a null timespec (wait forever). */
3166177848Sdavidxu	if (uap->uaddr2 == NULL) {
3167177848Sdavidxu		error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3168177848Sdavidxu	} else {
3169228219Spho		error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3170177848Sdavidxu		if (error != 0)
3171177848Sdavidxu			return (error);
3172177848Sdavidxu		error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3173177848Sdavidxu	}
3174177848Sdavidxu	return (error);
3175177848Sdavidxu}
3176177848Sdavidxu
3177177848Sdavidxustatic int
3178177848Sdavidxu__umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
3179177848Sdavidxu{
3180177848Sdavidxu	struct timespec timeout;
3181177848Sdavidxu	int error;
3182177848Sdavidxu
3183177848Sdavidxu	/* Allow a null timespec (wait forever). */
3184177848Sdavidxu	if (uap->uaddr2 == NULL) {
3185177848Sdavidxu		error = do_rw_wrlock(td, uap->obj, 0);
3186177848Sdavidxu	} else {
3187228219Spho		error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3188177848Sdavidxu		if (error != 0)
3189177848Sdavidxu			return (error);
3190177848Sdavidxu
3191177848Sdavidxu		error = do_rw_wrlock2(td, uap->obj, &timeout);
3192177848Sdavidxu	}
3193177848Sdavidxu	return (error);
3194177848Sdavidxu}
3195177848Sdavidxu
3196177848Sdavidxustatic int
3197177848Sdavidxu__umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
3198177848Sdavidxu{
3199177880Sdavidxu	return do_rw_unlock(td, uap->obj);
3200177848Sdavidxu}
3201177848Sdavidxu
3202201472Sdavidxustatic int
3203201472Sdavidxu__umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap)
3204201472Sdavidxu{
3205232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3206201472Sdavidxu	int error;
3207201472Sdavidxu
3208201472Sdavidxu	/* Allow a null timespec (wait forever). */
3209201472Sdavidxu	if (uap->uaddr2 == NULL)
3210232144Sdavidxu		tm_p = NULL;
3211201472Sdavidxu	else {
3212232144Sdavidxu		error = umtx_copyin_umtx_time(
3213232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3214201472Sdavidxu		if (error != 0)
3215201472Sdavidxu			return (error);
3216232144Sdavidxu		tm_p = &timeout;
3217201472Sdavidxu	}
3218232144Sdavidxu	return (do_sem_wait(td, uap->obj, tm_p));
3219201472Sdavidxu}
3220201472Sdavidxu
3221201472Sdavidxustatic int
3222201472Sdavidxu__umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap)
3223201472Sdavidxu{
3224201472Sdavidxu	return do_sem_wake(td, uap->obj);
3225201472Sdavidxu}
3226201472Sdavidxu
3227162536Sdavidxutypedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
3228162536Sdavidxu
3229162536Sdavidxustatic _umtx_op_func op_table[] = {
3230162536Sdavidxu	__umtx_op_lock_umtx,		/* UMTX_OP_LOCK */
3231162536Sdavidxu	__umtx_op_unlock_umtx,		/* UMTX_OP_UNLOCK */
3232162536Sdavidxu	__umtx_op_wait,			/* UMTX_OP_WAIT */
3233162536Sdavidxu	__umtx_op_wake,			/* UMTX_OP_WAKE */
3234162536Sdavidxu	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_TRYLOCK */
3235162536Sdavidxu	__umtx_op_lock_umutex,		/* UMTX_OP_MUTEX_LOCK */
3236162536Sdavidxu	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK */
3237164839Sdavidxu	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
3238164839Sdavidxu	__umtx_op_cv_wait,		/* UMTX_OP_CV_WAIT*/
3239164839Sdavidxu	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
3240173800Sdavidxu	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
3241177848Sdavidxu	__umtx_op_wait_uint,		/* UMTX_OP_WAIT_UINT */
3242177848Sdavidxu	__umtx_op_rw_rdlock,		/* UMTX_OP_RW_RDLOCK */
3243177848Sdavidxu	__umtx_op_rw_wrlock,		/* UMTX_OP_RW_WRLOCK */
3244178646Sdavidxu	__umtx_op_rw_unlock,		/* UMTX_OP_RW_UNLOCK */
3245178646Sdavidxu	__umtx_op_wait_uint_private,	/* UMTX_OP_WAIT_UINT_PRIVATE */
3246179970Sdavidxu	__umtx_op_wake_private,		/* UMTX_OP_WAKE_PRIVATE */
3247179970Sdavidxu	__umtx_op_wait_umutex,		/* UMTX_OP_UMUTEX_WAIT */
3248201472Sdavidxu	__umtx_op_wake_umutex,		/* UMTX_OP_UMUTEX_WAKE */
3249201472Sdavidxu	__umtx_op_sem_wait,		/* UMTX_OP_SEM_WAIT */
3250216641Sdavidxu	__umtx_op_sem_wake,		/* UMTX_OP_SEM_WAKE */
3251216641Sdavidxu	__umtx_op_nwake_private		/* UMTX_OP_NWAKE_PRIVATE */
3252162536Sdavidxu};
3253162536Sdavidxu
3254162536Sdavidxuint
3255225617Skmacysys__umtx_op(struct thread *td, struct _umtx_op_args *uap)
3256162536Sdavidxu{
3257163678Sdavidxu	if ((unsigned)uap->op < UMTX_OP_MAX)
3258162536Sdavidxu		return (*op_table[uap->op])(td, uap);
3259162536Sdavidxu	return (EINVAL);
3260162536Sdavidxu}
3261162536Sdavidxu
3262205014Snwhitehorn#ifdef COMPAT_FREEBSD32
3263163046Sdavidxuint
3264163046Sdavidxufreebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
3265163046Sdavidxu    /* struct umtx *umtx */
3266163046Sdavidxu{
3267163046Sdavidxu	return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
3268163046Sdavidxu}
3269163046Sdavidxu
3270163046Sdavidxuint
3271163046Sdavidxufreebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
3272163046Sdavidxu    /* struct umtx *umtx */
3273163046Sdavidxu{
3274163046Sdavidxu	return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
3275163046Sdavidxu}
3276163046Sdavidxu
3277162536Sdavidxustruct timespec32 {
3278209390Sed	uint32_t tv_sec;
3279209390Sed	uint32_t tv_nsec;
3280162536Sdavidxu};
3281162536Sdavidxu
3282232144Sdavidxustruct umtx_time32 {
3283232144Sdavidxu	struct	timespec32	timeout;
3284232144Sdavidxu	uint32_t		flags;
3285232144Sdavidxu	uint32_t		clockid;
3286232144Sdavidxu};
3287232144Sdavidxu
3288162536Sdavidxustatic inline int
3289228218Sphoumtx_copyin_timeout32(void *addr, struct timespec *tsp)
3290162536Sdavidxu{
3291162536Sdavidxu	struct timespec32 ts32;
3292162536Sdavidxu	int error;
3293162536Sdavidxu
3294162536Sdavidxu	error = copyin(addr, &ts32, sizeof(struct timespec32));
3295162536Sdavidxu	if (error == 0) {
3296228218Spho		if (ts32.tv_sec < 0 ||
3297228218Spho		    ts32.tv_nsec >= 1000000000 ||
3298228218Spho		    ts32.tv_nsec < 0)
3299228218Spho			error = EINVAL;
3300228218Spho		else {
3301228218Spho			tsp->tv_sec = ts32.tv_sec;
3302228218Spho			tsp->tv_nsec = ts32.tv_nsec;
3303228218Spho		}
3304162536Sdavidxu	}
3305140421Sdavidxu	return (error);
3306139013Sdavidxu}
3307161678Sdavidxu
3308232144Sdavidxustatic inline int
3309232144Sdavidxuumtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp)
3310232144Sdavidxu{
3311232144Sdavidxu	struct umtx_time32 t32;
3312232144Sdavidxu	int error;
3313232144Sdavidxu
3314232144Sdavidxu	t32.clockid = CLOCK_REALTIME;
3315232144Sdavidxu	t32.flags   = 0;
3316232144Sdavidxu	if (size <= sizeof(struct timespec32))
3317232144Sdavidxu		error = copyin(addr, &t32.timeout, sizeof(struct timespec32));
3318232144Sdavidxu	else
3319232144Sdavidxu		error = copyin(addr, &t32, sizeof(struct umtx_time32));
3320232144Sdavidxu	if (error != 0)
3321232144Sdavidxu		return (error);
3322232144Sdavidxu	if (t32.timeout.tv_sec < 0 ||
3323232144Sdavidxu	    t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0)
3324232144Sdavidxu		return (EINVAL);
3325232144Sdavidxu	tp->_timeout.tv_sec = t32.timeout.tv_sec;
3326232144Sdavidxu	tp->_timeout.tv_nsec = t32.timeout.tv_nsec;
3327232144Sdavidxu	tp->_flags = t32.flags;
3328232144Sdavidxu	tp->_clockid = t32.clockid;
3329232144Sdavidxu	return (0);
3330232144Sdavidxu}
3331232144Sdavidxu
3332162536Sdavidxustatic int
3333162536Sdavidxu__umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3334162536Sdavidxu{
3335162536Sdavidxu	struct timespec *ts, timeout;
3336162536Sdavidxu	int error;
3337162536Sdavidxu
3338162536Sdavidxu	/* Allow a null timespec (wait forever). */
3339162536Sdavidxu	if (uap->uaddr2 == NULL)
3340162536Sdavidxu		ts = NULL;
3341162536Sdavidxu	else {
3342228218Spho		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3343162536Sdavidxu		if (error != 0)
3344162536Sdavidxu			return (error);
3345162536Sdavidxu		ts = &timeout;
3346162536Sdavidxu	}
3347162536Sdavidxu	return (do_lock_umtx32(td, uap->obj, uap->val, ts));
3348162536Sdavidxu}
3349162536Sdavidxu
3350162536Sdavidxustatic int
3351162536Sdavidxu__umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3352162536Sdavidxu{
3353162536Sdavidxu	return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
3354162536Sdavidxu}
3355162536Sdavidxu
3356162536Sdavidxustatic int
3357162536Sdavidxu__umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3358162536Sdavidxu{
3359232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3360162536Sdavidxu	int error;
3361162536Sdavidxu
3362162536Sdavidxu	if (uap->uaddr2 == NULL)
3363232144Sdavidxu		tm_p = NULL;
3364162536Sdavidxu	else {
3365232144Sdavidxu		error = umtx_copyin_umtx_time32(uap->uaddr2,
3366232144Sdavidxu			(size_t)uap->uaddr1, &timeout);
3367162536Sdavidxu		if (error != 0)
3368162536Sdavidxu			return (error);
3369232144Sdavidxu		tm_p = &timeout;
3370162536Sdavidxu	}
3371232144Sdavidxu	return do_wait(td, uap->obj, uap->val, tm_p, 1, 0);
3372162536Sdavidxu}
3373162536Sdavidxu
3374162536Sdavidxustatic int
3375162536Sdavidxu__umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3376162536Sdavidxu{
3377232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3378162536Sdavidxu	int error;
3379162536Sdavidxu
3380162536Sdavidxu	/* Allow a null timespec (wait forever). */
3381162536Sdavidxu	if (uap->uaddr2 == NULL)
3382232144Sdavidxu		tm_p = NULL;
3383162536Sdavidxu	else {
3384232144Sdavidxu		error = umtx_copyin_umtx_time(uap->uaddr2,
3385232144Sdavidxu			    (size_t)uap->uaddr1, &timeout);
3386162536Sdavidxu		if (error != 0)
3387162536Sdavidxu			return (error);
3388232144Sdavidxu		tm_p = &timeout;
3389162536Sdavidxu	}
3390232144Sdavidxu	return do_lock_umutex(td, uap->obj, tm_p, 0);
3391162536Sdavidxu}
3392162536Sdavidxu
3393164839Sdavidxustatic int
3394179970Sdavidxu__umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3395179970Sdavidxu{
3396232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3397179970Sdavidxu	int error;
3398179970Sdavidxu
3399179970Sdavidxu	/* Allow a null timespec (wait forever). */
3400179970Sdavidxu	if (uap->uaddr2 == NULL)
3401232144Sdavidxu		tm_p = NULL;
3402179970Sdavidxu	else {
3403232144Sdavidxu		error = umtx_copyin_umtx_time32(uap->uaddr2,
3404232144Sdavidxu		    (size_t)uap->uaddr1, &timeout);
3405179970Sdavidxu		if (error != 0)
3406179970Sdavidxu			return (error);
3407232144Sdavidxu		tm_p = &timeout;
3408179970Sdavidxu	}
3409232144Sdavidxu	return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT);
3410179970Sdavidxu}
3411179970Sdavidxu
3412179970Sdavidxustatic int
3413164839Sdavidxu__umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3414164839Sdavidxu{
3415164839Sdavidxu	struct timespec *ts, timeout;
3416164839Sdavidxu	int error;
3417164839Sdavidxu
3418164839Sdavidxu	/* Allow a null timespec (wait forever). */
3419164839Sdavidxu	if (uap->uaddr2 == NULL)
3420164839Sdavidxu		ts = NULL;
3421164839Sdavidxu	else {
3422228218Spho		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3423164839Sdavidxu		if (error != 0)
3424164839Sdavidxu			return (error);
3425164839Sdavidxu		ts = &timeout;
3426164839Sdavidxu	}
3427164876Sdavidxu	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3428164839Sdavidxu}
3429164839Sdavidxu
3430177848Sdavidxustatic int
3431177848Sdavidxu__umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3432177848Sdavidxu{
3433177848Sdavidxu	struct timespec timeout;
3434177848Sdavidxu	int error;
3435177848Sdavidxu
3436177848Sdavidxu	/* Allow a null timespec (wait forever). */
3437177848Sdavidxu	if (uap->uaddr2 == NULL) {
3438177848Sdavidxu		error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3439177848Sdavidxu	} else {
3440228218Spho		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3441177848Sdavidxu		if (error != 0)
3442177848Sdavidxu			return (error);
3443177848Sdavidxu		error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3444177848Sdavidxu	}
3445177848Sdavidxu	return (error);
3446177848Sdavidxu}
3447177848Sdavidxu
3448177848Sdavidxustatic int
3449177848Sdavidxu__umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3450177848Sdavidxu{
3451177848Sdavidxu	struct timespec timeout;
3452177848Sdavidxu	int error;
3453177848Sdavidxu
3454177848Sdavidxu	/* Allow a null timespec (wait forever). */
3455177848Sdavidxu	if (uap->uaddr2 == NULL) {
3456177852Sdavidxu		error = do_rw_wrlock(td, uap->obj, 0);
3457177848Sdavidxu	} else {
3458228218Spho		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3459177848Sdavidxu		if (error != 0)
3460177848Sdavidxu			return (error);
3461177848Sdavidxu
3462177852Sdavidxu		error = do_rw_wrlock2(td, uap->obj, &timeout);
3463177848Sdavidxu	}
3464177848Sdavidxu	return (error);
3465177848Sdavidxu}
3466177848Sdavidxu
3467178646Sdavidxustatic int
3468178646Sdavidxu__umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap)
3469178646Sdavidxu{
3470232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3471178646Sdavidxu	int error;
3472178646Sdavidxu
3473178646Sdavidxu	if (uap->uaddr2 == NULL)
3474232144Sdavidxu		tm_p = NULL;
3475178646Sdavidxu	else {
3476232144Sdavidxu		error = umtx_copyin_umtx_time32(
3477232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1,&timeout);
3478178646Sdavidxu		if (error != 0)
3479178646Sdavidxu			return (error);
3480232144Sdavidxu		tm_p = &timeout;
3481178646Sdavidxu	}
3482232144Sdavidxu	return do_wait(td, uap->obj, uap->val, tm_p, 1, 1);
3483178646Sdavidxu}
3484178646Sdavidxu
3485201472Sdavidxustatic int
3486201472Sdavidxu__umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3487201472Sdavidxu{
3488232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3489201472Sdavidxu	int error;
3490201472Sdavidxu
3491201472Sdavidxu	/* Allow a null timespec (wait forever). */
3492201472Sdavidxu	if (uap->uaddr2 == NULL)
3493232144Sdavidxu		tm_p = NULL;
3494201472Sdavidxu	else {
3495232144Sdavidxu		error = umtx_copyin_umtx_time32(uap->uaddr2,
3496232144Sdavidxu		    (size_t)uap->uaddr1, &timeout);
3497201472Sdavidxu		if (error != 0)
3498201472Sdavidxu			return (error);
3499232144Sdavidxu		tm_p = &timeout;
3500201472Sdavidxu	}
3501232144Sdavidxu	return (do_sem_wait(td, uap->obj, tm_p));
3502201472Sdavidxu}
3503201472Sdavidxu
3504216641Sdavidxustatic int
3505216641Sdavidxu__umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap)
3506216641Sdavidxu{
3507216641Sdavidxu	int count = uap->val;
3508216641Sdavidxu	uint32_t uaddrs[BATCH_SIZE];
3509216641Sdavidxu	uint32_t **upp = (uint32_t **)uap->obj;
3510216641Sdavidxu	int tocopy;
3511216641Sdavidxu	int error = 0;
3512216641Sdavidxu	int i, pos = 0;
3513216641Sdavidxu
3514216641Sdavidxu	while (count > 0) {
3515216641Sdavidxu		tocopy = count;
3516216641Sdavidxu		if (tocopy > BATCH_SIZE)
3517216641Sdavidxu			tocopy = BATCH_SIZE;
3518216641Sdavidxu		error = copyin(upp+pos, uaddrs, tocopy * sizeof(uint32_t));
3519216641Sdavidxu		if (error != 0)
3520216641Sdavidxu			break;
3521216641Sdavidxu		for (i = 0; i < tocopy; ++i)
3522216641Sdavidxu			kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i],
3523216641Sdavidxu				INT_MAX, 1);
3524216641Sdavidxu		count -= tocopy;
3525216641Sdavidxu		pos += tocopy;
3526216641Sdavidxu	}
3527216641Sdavidxu	return (error);
3528216641Sdavidxu}
3529216641Sdavidxu
3530162536Sdavidxustatic _umtx_op_func op_table_compat32[] = {
3531162536Sdavidxu	__umtx_op_lock_umtx_compat32,	/* UMTX_OP_LOCK */
3532162536Sdavidxu	__umtx_op_unlock_umtx_compat32,	/* UMTX_OP_UNLOCK */
3533162536Sdavidxu	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT */
3534162536Sdavidxu	__umtx_op_wake,			/* UMTX_OP_WAKE */
3535162550Sdavidxu	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_LOCK */
3536162536Sdavidxu	__umtx_op_lock_umutex_compat32,	/* UMTX_OP_MUTEX_TRYLOCK */
3537162536Sdavidxu	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK	*/
3538164839Sdavidxu	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
3539164839Sdavidxu	__umtx_op_cv_wait_compat32,	/* UMTX_OP_CV_WAIT*/
3540164839Sdavidxu	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
3541173800Sdavidxu	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
3542177848Sdavidxu	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT_UINT */
3543177848Sdavidxu	__umtx_op_rw_rdlock_compat32,	/* UMTX_OP_RW_RDLOCK */
3544177848Sdavidxu	__umtx_op_rw_wrlock_compat32,	/* UMTX_OP_RW_WRLOCK */
3545178646Sdavidxu	__umtx_op_rw_unlock,		/* UMTX_OP_RW_UNLOCK */
3546178646Sdavidxu	__umtx_op_wait_uint_private_compat32,	/* UMTX_OP_WAIT_UINT_PRIVATE */
3547179970Sdavidxu	__umtx_op_wake_private,		/* UMTX_OP_WAKE_PRIVATE */
3548179970Sdavidxu	__umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */
3549201472Sdavidxu	__umtx_op_wake_umutex,		/* UMTX_OP_UMUTEX_WAKE */
3550201472Sdavidxu	__umtx_op_sem_wait_compat32,	/* UMTX_OP_SEM_WAIT */
3551216641Sdavidxu	__umtx_op_sem_wake,		/* UMTX_OP_SEM_WAKE */
3552216641Sdavidxu	__umtx_op_nwake_private32	/* UMTX_OP_NWAKE_PRIVATE */
3553162536Sdavidxu};
3554162536Sdavidxu
3555162536Sdavidxuint
3556162536Sdavidxufreebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
3557162536Sdavidxu{
3558163678Sdavidxu	if ((unsigned)uap->op < UMTX_OP_MAX)
3559162536Sdavidxu		return (*op_table_compat32[uap->op])(td,
3560162536Sdavidxu			(struct _umtx_op_args *)uap);
3561162536Sdavidxu	return (EINVAL);
3562162536Sdavidxu}
3563162536Sdavidxu#endif
3564162536Sdavidxu
3565161678Sdavidxuvoid
3566161678Sdavidxuumtx_thread_init(struct thread *td)
3567161678Sdavidxu{
3568161678Sdavidxu	td->td_umtxq = umtxq_alloc();
3569161678Sdavidxu	td->td_umtxq->uq_thread = td;
3570161678Sdavidxu}
3571161678Sdavidxu
3572161678Sdavidxuvoid
3573161678Sdavidxuumtx_thread_fini(struct thread *td)
3574161678Sdavidxu{
3575161678Sdavidxu	umtxq_free(td->td_umtxq);
3576161678Sdavidxu}
3577161678Sdavidxu
3578161678Sdavidxu/*
3579161678Sdavidxu * It will be called when new thread is created, e.g fork().
3580161678Sdavidxu */
3581161678Sdavidxuvoid
3582161678Sdavidxuumtx_thread_alloc(struct thread *td)
3583161678Sdavidxu{
3584161678Sdavidxu	struct umtx_q *uq;
3585161678Sdavidxu
3586161678Sdavidxu	uq = td->td_umtxq;
3587161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
3588161678Sdavidxu
3589161678Sdavidxu	KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
3590161678Sdavidxu	KASSERT(uq->uq_thread == td, ("uq_thread != td"));
3591161678Sdavidxu	KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
3592161678Sdavidxu	KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
3593161678Sdavidxu}
3594161678Sdavidxu
3595161678Sdavidxu/*
3596161678Sdavidxu * exec() hook.
3597161678Sdavidxu */
3598161678Sdavidxustatic void
3599161678Sdavidxuumtx_exec_hook(void *arg __unused, struct proc *p __unused,
3600161678Sdavidxu	struct image_params *imgp __unused)
3601161678Sdavidxu{
3602161678Sdavidxu	umtx_thread_cleanup(curthread);
3603161678Sdavidxu}
3604161678Sdavidxu
3605161678Sdavidxu/*
3606161678Sdavidxu * thread_exit() hook.
3607161678Sdavidxu */
3608161678Sdavidxuvoid
3609161678Sdavidxuumtx_thread_exit(struct thread *td)
3610161678Sdavidxu{
3611161678Sdavidxu	umtx_thread_cleanup(td);
3612161678Sdavidxu}
3613161678Sdavidxu
3614161678Sdavidxu/*
3615161678Sdavidxu * clean up umtx data.
3616161678Sdavidxu */
3617161678Sdavidxustatic void
3618161678Sdavidxuumtx_thread_cleanup(struct thread *td)
3619161678Sdavidxu{
3620161678Sdavidxu	struct umtx_q *uq;
3621161678Sdavidxu	struct umtx_pi *pi;
3622161678Sdavidxu
3623161678Sdavidxu	if ((uq = td->td_umtxq) == NULL)
3624161678Sdavidxu		return;
3625161678Sdavidxu
3626170300Sjeff	mtx_lock_spin(&umtx_lock);
3627161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
3628161678Sdavidxu	while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
3629161678Sdavidxu		pi->pi_owner = NULL;
3630161678Sdavidxu		TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
3631161678Sdavidxu	}
3632216313Sdavidxu	mtx_unlock_spin(&umtx_lock);
3633174701Sdavidxu	thread_lock(td);
3634216791Sdavidxu	sched_lend_user_prio(td, PRI_MAX);
3635174701Sdavidxu	thread_unlock(td);
3636161678Sdavidxu}
3637