1139804Simp/*-
2139013Sdavidxu * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3112904Sjeff * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4112904Sjeff * All rights reserved.
5112904Sjeff *
6112904Sjeff * Redistribution and use in source and binary forms, with or without
7112904Sjeff * modification, are permitted provided that the following conditions
8112904Sjeff * are met:
9112904Sjeff * 1. Redistributions of source code must retain the above copyright
10112904Sjeff *    notice unmodified, this list of conditions, and the following
11112904Sjeff *    disclaimer.
12112904Sjeff * 2. Redistributions in binary form must reproduce the above copyright
13112904Sjeff *    notice, this list of conditions and the following disclaimer in the
14112904Sjeff *    documentation and/or other materials provided with the distribution.
15112904Sjeff *
16112904Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17112904Sjeff * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18112904Sjeff * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19112904Sjeff * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20112904Sjeff * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21112904Sjeff * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22112904Sjeff * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23112904Sjeff * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24112904Sjeff * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25112904Sjeff * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26112904Sjeff */
27112904Sjeff
28116182Sobrien#include <sys/cdefs.h>
29116182Sobrien__FBSDID("$FreeBSD: stable/10/sys/kern/kern_umtx.c 330678 2018-03-09 01:21:22Z brooks $");
30116182Sobrien
31162536Sdavidxu#include "opt_compat.h"
32233045Sdavide#include "opt_umtx_profiling.h"
33233045Sdavide
34112904Sjeff#include <sys/param.h>
35112904Sjeff#include <sys/kernel.h>
36131431Smarcel#include <sys/limits.h>
37112904Sjeff#include <sys/lock.h>
38115765Sjeff#include <sys/malloc.h>
39112904Sjeff#include <sys/mutex.h>
40164033Srwatson#include <sys/priv.h>
41112904Sjeff#include <sys/proc.h>
42248105Sattilio#include <sys/sbuf.h>
43161678Sdavidxu#include <sys/sched.h>
44165369Sdavidxu#include <sys/smp.h>
45161678Sdavidxu#include <sys/sysctl.h>
46112904Sjeff#include <sys/sysent.h>
47112904Sjeff#include <sys/systm.h>
48112904Sjeff#include <sys/sysproto.h>
49216641Sdavidxu#include <sys/syscallsubr.h>
50139013Sdavidxu#include <sys/eventhandler.h>
51112904Sjeff#include <sys/umtx.h>
52112904Sjeff
53139013Sdavidxu#include <vm/vm.h>
54139013Sdavidxu#include <vm/vm_param.h>
55139013Sdavidxu#include <vm/pmap.h>
56139013Sdavidxu#include <vm/vm_map.h>
57139013Sdavidxu#include <vm/vm_object.h>
58139013Sdavidxu
59165369Sdavidxu#include <machine/cpu.h>
60165369Sdavidxu
61205014Snwhitehorn#ifdef COMPAT_FREEBSD32
62162536Sdavidxu#include <compat/freebsd32/freebsd32_proto.h>
63162536Sdavidxu#endif
64162536Sdavidxu
65179970Sdavidxu#define _UMUTEX_TRY		1
66179970Sdavidxu#define _UMUTEX_WAIT		2
67179970Sdavidxu
68248105Sattilio#ifdef UMTX_PROFILING
69248105Sattilio#define	UPROF_PERC_BIGGER(w, f, sw, sf)					\
70248105Sattilio	(((w) > (sw)) || ((w) == (sw) && (f) > (sf)))
71248105Sattilio#endif
72248105Sattilio
73161678Sdavidxu/* Priority inheritance mutex info. */
74161678Sdavidxustruct umtx_pi {
75161678Sdavidxu	/* Owner thread */
76161678Sdavidxu	struct thread		*pi_owner;
77161678Sdavidxu
78161678Sdavidxu	/* Reference count */
79161678Sdavidxu	int			pi_refcount;
80161678Sdavidxu
81161678Sdavidxu 	/* List entry to link umtx holding by thread */
82161678Sdavidxu	TAILQ_ENTRY(umtx_pi)	pi_link;
83161678Sdavidxu
84161678Sdavidxu	/* List entry in hash */
85161678Sdavidxu	TAILQ_ENTRY(umtx_pi)	pi_hashlink;
86161678Sdavidxu
87161678Sdavidxu	/* List for waiters */
88161678Sdavidxu	TAILQ_HEAD(,umtx_q)	pi_blocked;
89161678Sdavidxu
90161678Sdavidxu	/* Identify a userland lock object */
91161678Sdavidxu	struct umtx_key		pi_key;
92161678Sdavidxu};
93161678Sdavidxu
94161678Sdavidxu/* A userland synchronous object user. */
95115765Sjeffstruct umtx_q {
96161678Sdavidxu	/* Linked list for the hash. */
97161678Sdavidxu	TAILQ_ENTRY(umtx_q)	uq_link;
98161678Sdavidxu
99161678Sdavidxu	/* Umtx key. */
100161678Sdavidxu	struct umtx_key		uq_key;
101161678Sdavidxu
102161678Sdavidxu	/* Umtx flags. */
103161678Sdavidxu	int			uq_flags;
104161678Sdavidxu#define UQF_UMTXQ	0x0001
105161678Sdavidxu
106161678Sdavidxu	/* The thread waits on. */
107161678Sdavidxu	struct thread		*uq_thread;
108161678Sdavidxu
109161678Sdavidxu	/*
110161678Sdavidxu	 * Blocked on PI mutex. read can use chain lock
111170300Sjeff	 * or umtx_lock, write must have both chain lock and
112170300Sjeff	 * umtx_lock being hold.
113161678Sdavidxu	 */
114161678Sdavidxu	struct umtx_pi		*uq_pi_blocked;
115161678Sdavidxu
116161678Sdavidxu	/* On blocked list */
117161678Sdavidxu	TAILQ_ENTRY(umtx_q)	uq_lockq;
118161678Sdavidxu
119161678Sdavidxu	/* Thread contending with us */
120161678Sdavidxu	TAILQ_HEAD(,umtx_pi)	uq_pi_contested;
121161678Sdavidxu
122161742Sdavidxu	/* Inherited priority from PP mutex */
123161678Sdavidxu	u_char			uq_inherited_pri;
124201991Sdavidxu
125201991Sdavidxu	/* Spare queue ready to be reused */
126201991Sdavidxu	struct umtxq_queue	*uq_spare_queue;
127201991Sdavidxu
128201991Sdavidxu	/* The queue we on */
129201991Sdavidxu	struct umtxq_queue	*uq_cur_queue;
130115765Sjeff};
131115765Sjeff
132161678SdavidxuTAILQ_HEAD(umtxq_head, umtx_q);
133161678Sdavidxu
134201991Sdavidxu/* Per-key wait-queue */
135201991Sdavidxustruct umtxq_queue {
136201991Sdavidxu	struct umtxq_head	head;
137201991Sdavidxu	struct umtx_key		key;
138201991Sdavidxu	LIST_ENTRY(umtxq_queue)	link;
139201991Sdavidxu	int			length;
140201991Sdavidxu};
141201991Sdavidxu
142201991SdavidxuLIST_HEAD(umtxq_list, umtxq_queue);
143201991Sdavidxu
144161678Sdavidxu/* Userland lock object's wait-queue chain */
145138224Sdavidxustruct umtxq_chain {
146161678Sdavidxu	/* Lock for this chain. */
147161678Sdavidxu	struct mtx		uc_lock;
148161678Sdavidxu
149161678Sdavidxu	/* List of sleep queues. */
150201991Sdavidxu	struct umtxq_list	uc_queue[2];
151177848Sdavidxu#define UMTX_SHARED_QUEUE	0
152177848Sdavidxu#define UMTX_EXCLUSIVE_QUEUE	1
153161678Sdavidxu
154201991Sdavidxu	LIST_HEAD(, umtxq_queue) uc_spare_queue;
155201991Sdavidxu
156161678Sdavidxu	/* Busy flag */
157161678Sdavidxu	char			uc_busy;
158161678Sdavidxu
159161678Sdavidxu	/* Chain lock waiters */
160158377Sdavidxu	int			uc_waiters;
161161678Sdavidxu
162161678Sdavidxu	/* All PI in the list */
163161678Sdavidxu	TAILQ_HEAD(,umtx_pi)	uc_pi_list;
164201991Sdavidxu
165233045Sdavide#ifdef UMTX_PROFILING
166248105Sattilio	u_int 			length;
167248105Sattilio	u_int			max_length;
168233045Sdavide#endif
169138224Sdavidxu};
170115765Sjeff
171161678Sdavidxu#define	UMTXQ_LOCKED_ASSERT(uc)		mtx_assert(&(uc)->uc_lock, MA_OWNED)
172161678Sdavidxu
173161678Sdavidxu/*
174161678Sdavidxu * Don't propagate time-sharing priority, there is a security reason,
175161678Sdavidxu * a user can simply introduce PI-mutex, let thread A lock the mutex,
176161678Sdavidxu * and let another thread B block on the mutex, because B is
177161678Sdavidxu * sleeping, its priority will be boosted, this causes A's priority to
178161678Sdavidxu * be boosted via priority propagating too and will never be lowered even
179161678Sdavidxu * if it is using 100%CPU, this is unfair to other processes.
180161678Sdavidxu */
181161678Sdavidxu
182163709Sjb#define UPRI(td)	(((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
183163709Sjb			  (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
184163709Sjb			 PRI_MAX_TIMESHARE : (td)->td_user_pri)
185161678Sdavidxu
186138224Sdavidxu#define	GOLDEN_RATIO_PRIME	2654404609U
187216678Sdavidxu#define	UMTX_CHAINS		512
188216678Sdavidxu#define	UMTX_SHIFTS		(__WORD_BIT - 9)
189115765Sjeff
190161678Sdavidxu#define	GET_SHARE(flags)	\
191161678Sdavidxu    (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
192161678Sdavidxu
193177848Sdavidxu#define BUSY_SPINS		200
194177848Sdavidxu
195233690Sdavidxustruct abs_timeout {
196233690Sdavidxu	int clockid;
197233690Sdavidxu	struct timespec cur;
198233690Sdavidxu	struct timespec end;
199233690Sdavidxu};
200233690Sdavidxu
201161678Sdavidxustatic uma_zone_t		umtx_pi_zone;
202179421Sdavidxustatic struct umtxq_chain	umtxq_chains[2][UMTX_CHAINS];
203138224Sdavidxustatic MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
204161678Sdavidxustatic int			umtx_pi_allocated;
205115310Sjeff
206227309Sedstatic SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
207161678SdavidxuSYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
208161678Sdavidxu    &umtx_pi_allocated, 0, "Allocated umtx_pi");
209161678Sdavidxu
210233045Sdavide#ifdef UMTX_PROFILING
211233045Sdavidestatic long max_length;
212233045SdavideSYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length");
213233045Sdavidestatic SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats");
214233045Sdavide#endif
215233045Sdavide
216161678Sdavidxustatic void umtxq_sysinit(void *);
217161678Sdavidxustatic void umtxq_hash(struct umtx_key *key);
218161678Sdavidxustatic struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
219139013Sdavidxustatic void umtxq_lock(struct umtx_key *key);
220139013Sdavidxustatic void umtxq_unlock(struct umtx_key *key);
221139257Sdavidxustatic void umtxq_busy(struct umtx_key *key);
222139257Sdavidxustatic void umtxq_unbusy(struct umtx_key *key);
223177848Sdavidxustatic void umtxq_insert_queue(struct umtx_q *uq, int q);
224177848Sdavidxustatic void umtxq_remove_queue(struct umtx_q *uq, int q);
225233690Sdavidxustatic int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *);
226139257Sdavidxustatic int umtxq_count(struct umtx_key *key);
227163697Sdavidxustatic struct umtx_pi *umtx_pi_alloc(int);
228161678Sdavidxustatic void umtx_pi_free(struct umtx_pi *pi);
229161678Sdavidxustatic int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
230161678Sdavidxustatic void umtx_thread_cleanup(struct thread *td);
231161678Sdavidxustatic void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
232161678Sdavidxu	struct image_params *imgp __unused);
233161678SdavidxuSYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
234115310Sjeff
235177848Sdavidxu#define umtxq_signal(key, nwake)	umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
236177848Sdavidxu#define umtxq_insert(uq)	umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
237177848Sdavidxu#define umtxq_remove(uq)	umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
238177848Sdavidxu
239170300Sjeffstatic struct mtx umtx_lock;
240170300Sjeff
241233045Sdavide#ifdef UMTX_PROFILING
242161678Sdavidxustatic void
243233045Sdavideumtx_init_profiling(void)
244233045Sdavide{
245233045Sdavide	struct sysctl_oid *chain_oid;
246233045Sdavide	char chain_name[10];
247233045Sdavide	int i;
248233045Sdavide
249233045Sdavide	for (i = 0; i < UMTX_CHAINS; ++i) {
250233045Sdavide		snprintf(chain_name, sizeof(chain_name), "%d", i);
251233045Sdavide		chain_oid = SYSCTL_ADD_NODE(NULL,
252233045Sdavide		    SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO,
253233045Sdavide		    chain_name, CTLFLAG_RD, NULL, "umtx hash stats");
254233045Sdavide		SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
255233045Sdavide		    "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL);
256233045Sdavide		SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
257233045Sdavide		    "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL);
258233045Sdavide	}
259233045Sdavide}
260248105Sattilio
261248105Sattiliostatic int
262248105Sattiliosysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS)
263248105Sattilio{
264248105Sattilio	char buf[512];
265248105Sattilio	struct sbuf sb;
266248105Sattilio	struct umtxq_chain *uc;
267248105Sattilio	u_int fract, i, j, tot, whole;
268248105Sattilio	u_int sf0, sf1, sf2, sf3, sf4;
269248105Sattilio	u_int si0, si1, si2, si3, si4;
270248105Sattilio	u_int sw0, sw1, sw2, sw3, sw4;
271248105Sattilio
272248105Sattilio	sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN);
273248105Sattilio	for (i = 0; i < 2; i++) {
274248105Sattilio		tot = 0;
275248105Sattilio		for (j = 0; j < UMTX_CHAINS; ++j) {
276248105Sattilio			uc = &umtxq_chains[i][j];
277248105Sattilio			mtx_lock(&uc->uc_lock);
278248105Sattilio			tot += uc->max_length;
279248105Sattilio			mtx_unlock(&uc->uc_lock);
280248105Sattilio		}
281248105Sattilio		if (tot == 0)
282248105Sattilio			sbuf_printf(&sb, "%u) Empty ", i);
283248105Sattilio		else {
284248105Sattilio			sf0 = sf1 = sf2 = sf3 = sf4 = 0;
285248105Sattilio			si0 = si1 = si2 = si3 = si4 = 0;
286248105Sattilio			sw0 = sw1 = sw2 = sw3 = sw4 = 0;
287248105Sattilio			for (j = 0; j < UMTX_CHAINS; j++) {
288248105Sattilio				uc = &umtxq_chains[i][j];
289248105Sattilio				mtx_lock(&uc->uc_lock);
290248105Sattilio				whole = uc->max_length * 100;
291248105Sattilio				mtx_unlock(&uc->uc_lock);
292248105Sattilio				fract = (whole % tot) * 100;
293248105Sattilio				if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) {
294248105Sattilio					sf0 = fract;
295248105Sattilio					si0 = j;
296248105Sattilio					sw0 = whole;
297248105Sattilio				} else if (UPROF_PERC_BIGGER(whole, fract, sw1,
298248105Sattilio				    sf1)) {
299248105Sattilio					sf1 = fract;
300248105Sattilio					si1 = j;
301248105Sattilio					sw1 = whole;
302248105Sattilio				} else if (UPROF_PERC_BIGGER(whole, fract, sw2,
303248105Sattilio				    sf2)) {
304248105Sattilio					sf2 = fract;
305248105Sattilio					si2 = j;
306248105Sattilio					sw2 = whole;
307248105Sattilio				} else if (UPROF_PERC_BIGGER(whole, fract, sw3,
308248105Sattilio				    sf3)) {
309248105Sattilio					sf3 = fract;
310248105Sattilio					si3 = j;
311248105Sattilio					sw3 = whole;
312248105Sattilio				} else if (UPROF_PERC_BIGGER(whole, fract, sw4,
313248105Sattilio				    sf4)) {
314248105Sattilio					sf4 = fract;
315248105Sattilio					si4 = j;
316248105Sattilio					sw4 = whole;
317248105Sattilio				}
318248105Sattilio			}
319248105Sattilio			sbuf_printf(&sb, "queue %u:\n", i);
320248105Sattilio			sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot,
321248105Sattilio			    sf0 / tot, si0);
322248105Sattilio			sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot,
323248105Sattilio			    sf1 / tot, si1);
324248105Sattilio			sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot,
325248105Sattilio			    sf2 / tot, si2);
326248105Sattilio			sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot,
327248105Sattilio			    sf3 / tot, si3);
328248105Sattilio			sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot,
329248105Sattilio			    sf4 / tot, si4);
330248105Sattilio		}
331248105Sattilio	}
332248105Sattilio	sbuf_trim(&sb);
333248105Sattilio	sbuf_finish(&sb);
334248105Sattilio	sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
335248105Sattilio	sbuf_delete(&sb);
336248105Sattilio	return (0);
337248105Sattilio}
338248105Sattilio
339248105Sattiliostatic int
340248105Sattiliosysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS)
341248105Sattilio{
342248105Sattilio	struct umtxq_chain *uc;
343248105Sattilio	u_int i, j;
344248105Sattilio	int clear, error;
345248105Sattilio
346248105Sattilio	clear = 0;
347248105Sattilio	error = sysctl_handle_int(oidp, &clear, 0, req);
348248105Sattilio	if (error != 0 || req->newptr == NULL)
349248105Sattilio		return (error);
350248105Sattilio
351248105Sattilio	if (clear != 0) {
352248105Sattilio		for (i = 0; i < 2; ++i) {
353248105Sattilio			for (j = 0; j < UMTX_CHAINS; ++j) {
354248105Sattilio				uc = &umtxq_chains[i][j];
355248105Sattilio				mtx_lock(&uc->uc_lock);
356248105Sattilio				uc->length = 0;
357248105Sattilio				uc->max_length = 0;
358248105Sattilio				mtx_unlock(&uc->uc_lock);
359248105Sattilio			}
360248105Sattilio		}
361248105Sattilio	}
362248105Sattilio	return (0);
363248105Sattilio}
364248105Sattilio
365248105SattilioSYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear,
366248105Sattilio    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0,
367248105Sattilio    sysctl_debug_umtx_chains_clear, "I", "Clear umtx chains statistics");
368248105SattilioSYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks,
369248105Sattilio    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0,
370248105Sattilio    sysctl_debug_umtx_chains_peaks, "A", "Highest peaks in chains max length");
371233045Sdavide#endif
372233045Sdavide
373233045Sdavidestatic void
374161678Sdavidxuumtxq_sysinit(void *arg __unused)
375161678Sdavidxu{
376179421Sdavidxu	int i, j;
377138224Sdavidxu
378161678Sdavidxu	umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
379161678Sdavidxu		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
380179421Sdavidxu	for (i = 0; i < 2; ++i) {
381179421Sdavidxu		for (j = 0; j < UMTX_CHAINS; ++j) {
382179421Sdavidxu			mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
383179421Sdavidxu				 MTX_DEF | MTX_DUPOK);
384201991Sdavidxu			LIST_INIT(&umtxq_chains[i][j].uc_queue[0]);
385201991Sdavidxu			LIST_INIT(&umtxq_chains[i][j].uc_queue[1]);
386201991Sdavidxu			LIST_INIT(&umtxq_chains[i][j].uc_spare_queue);
387179421Sdavidxu			TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
388179421Sdavidxu			umtxq_chains[i][j].uc_busy = 0;
389179421Sdavidxu			umtxq_chains[i][j].uc_waiters = 0;
390234302Sdavide#ifdef UMTX_PROFILING
391233045Sdavide			umtxq_chains[i][j].length = 0;
392233045Sdavide			umtxq_chains[i][j].max_length = 0;
393234302Sdavide#endif
394179421Sdavidxu		}
395161678Sdavidxu	}
396234302Sdavide#ifdef UMTX_PROFILING
397233045Sdavide	umtx_init_profiling();
398234302Sdavide#endif
399280309Skib	mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF);
400161678Sdavidxu	EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
401161678Sdavidxu	    EVENTHANDLER_PRI_ANY);
402161678Sdavidxu}
403161678Sdavidxu
404143149Sdavidxustruct umtx_q *
405143149Sdavidxuumtxq_alloc(void)
406143149Sdavidxu{
407161678Sdavidxu	struct umtx_q *uq;
408161678Sdavidxu
409161678Sdavidxu	uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
410201991Sdavidxu	uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, M_WAITOK | M_ZERO);
411201991Sdavidxu	TAILQ_INIT(&uq->uq_spare_queue->head);
412161678Sdavidxu	TAILQ_INIT(&uq->uq_pi_contested);
413161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
414161678Sdavidxu	return (uq);
415143149Sdavidxu}
416143149Sdavidxu
417143149Sdavidxuvoid
418143149Sdavidxuumtxq_free(struct umtx_q *uq)
419143149Sdavidxu{
420201991Sdavidxu	MPASS(uq->uq_spare_queue != NULL);
421201991Sdavidxu	free(uq->uq_spare_queue, M_UMTX);
422143149Sdavidxu	free(uq, M_UMTX);
423143149Sdavidxu}
424143149Sdavidxu
425161678Sdavidxustatic inline void
426139013Sdavidxuumtxq_hash(struct umtx_key *key)
427138224Sdavidxu{
428161678Sdavidxu	unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
429161678Sdavidxu	key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
430138224Sdavidxu}
431138224Sdavidxu
432161678Sdavidxustatic inline struct umtxq_chain *
433161678Sdavidxuumtxq_getchain(struct umtx_key *key)
434139013Sdavidxu{
435201886Sdavidxu	if (key->type <= TYPE_SEM)
436179421Sdavidxu		return (&umtxq_chains[1][key->hash]);
437179421Sdavidxu	return (&umtxq_chains[0][key->hash]);
438139013Sdavidxu}
439139013Sdavidxu
440161678Sdavidxu/*
441177848Sdavidxu * Lock a chain.
442161678Sdavidxu */
443138224Sdavidxustatic inline void
444177848Sdavidxuumtxq_lock(struct umtx_key *key)
445139257Sdavidxu{
446161678Sdavidxu	struct umtxq_chain *uc;
447139257Sdavidxu
448161678Sdavidxu	uc = umtxq_getchain(key);
449177848Sdavidxu	mtx_lock(&uc->uc_lock);
450139257Sdavidxu}
451139257Sdavidxu
452161678Sdavidxu/*
453177848Sdavidxu * Unlock a chain.
454161678Sdavidxu */
455139257Sdavidxustatic inline void
456177848Sdavidxuumtxq_unlock(struct umtx_key *key)
457139257Sdavidxu{
458161678Sdavidxu	struct umtxq_chain *uc;
459139257Sdavidxu
460161678Sdavidxu	uc = umtxq_getchain(key);
461177848Sdavidxu	mtx_unlock(&uc->uc_lock);
462139257Sdavidxu}
463139257Sdavidxu
464161678Sdavidxu/*
465177848Sdavidxu * Set chain to busy state when following operation
466177848Sdavidxu * may be blocked (kernel mutex can not be used).
467161678Sdavidxu */
468139257Sdavidxustatic inline void
469177848Sdavidxuumtxq_busy(struct umtx_key *key)
470138224Sdavidxu{
471161678Sdavidxu	struct umtxq_chain *uc;
472161678Sdavidxu
473161678Sdavidxu	uc = umtxq_getchain(key);
474177848Sdavidxu	mtx_assert(&uc->uc_lock, MA_OWNED);
475177848Sdavidxu	if (uc->uc_busy) {
476177880Sdavidxu#ifdef SMP
477177880Sdavidxu		if (smp_cpus > 1) {
478177880Sdavidxu			int count = BUSY_SPINS;
479177880Sdavidxu			if (count > 0) {
480177880Sdavidxu				umtxq_unlock(key);
481177880Sdavidxu				while (uc->uc_busy && --count > 0)
482177880Sdavidxu					cpu_spinwait();
483177880Sdavidxu				umtxq_lock(key);
484177880Sdavidxu			}
485177848Sdavidxu		}
486177880Sdavidxu#endif
487177880Sdavidxu		while (uc->uc_busy) {
488177848Sdavidxu			uc->uc_waiters++;
489177848Sdavidxu			msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
490177848Sdavidxu			uc->uc_waiters--;
491177848Sdavidxu		}
492177848Sdavidxu	}
493177848Sdavidxu	uc->uc_busy = 1;
494138224Sdavidxu}
495138224Sdavidxu
496161678Sdavidxu/*
497177848Sdavidxu * Unbusy a chain.
498161678Sdavidxu */
499138225Sdavidxustatic inline void
500177848Sdavidxuumtxq_unbusy(struct umtx_key *key)
501138224Sdavidxu{
502161678Sdavidxu	struct umtxq_chain *uc;
503161678Sdavidxu
504161678Sdavidxu	uc = umtxq_getchain(key);
505177848Sdavidxu	mtx_assert(&uc->uc_lock, MA_OWNED);
506177848Sdavidxu	KASSERT(uc->uc_busy != 0, ("not busy"));
507177848Sdavidxu	uc->uc_busy = 0;
508177848Sdavidxu	if (uc->uc_waiters)
509177848Sdavidxu		wakeup_one(uc);
510138224Sdavidxu}
511138224Sdavidxu
512274648Skibstatic inline void
513274648Skibumtxq_unbusy_unlocked(struct umtx_key *key)
514274648Skib{
515274648Skib
516274648Skib	umtxq_lock(key);
517274648Skib	umtxq_unbusy(key);
518274648Skib	umtxq_unlock(key);
519274648Skib}
520274648Skib
521201991Sdavidxustatic struct umtxq_queue *
522201991Sdavidxuumtxq_queue_lookup(struct umtx_key *key, int q)
523201991Sdavidxu{
524201991Sdavidxu	struct umtxq_queue *uh;
525201991Sdavidxu	struct umtxq_chain *uc;
526201991Sdavidxu
527201991Sdavidxu	uc = umtxq_getchain(key);
528201991Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
529201991Sdavidxu	LIST_FOREACH(uh, &uc->uc_queue[q], link) {
530201991Sdavidxu		if (umtx_key_match(&uh->key, key))
531201991Sdavidxu			return (uh);
532201991Sdavidxu	}
533201991Sdavidxu
534201991Sdavidxu	return (NULL);
535201991Sdavidxu}
536201991Sdavidxu
537139013Sdavidxustatic inline void
538177848Sdavidxuumtxq_insert_queue(struct umtx_q *uq, int q)
539115765Sjeff{
540201991Sdavidxu	struct umtxq_queue *uh;
541161678Sdavidxu	struct umtxq_chain *uc;
542139013Sdavidxu
543161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
544161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
545201991Sdavidxu	KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue"));
546203744Sdavidxu	uh = umtxq_queue_lookup(&uq->uq_key, q);
547201991Sdavidxu	if (uh != NULL) {
548201991Sdavidxu		LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link);
549201991Sdavidxu	} else {
550201991Sdavidxu		uh = uq->uq_spare_queue;
551201991Sdavidxu		uh->key = uq->uq_key;
552201991Sdavidxu		LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link);
553248591Sattilio#ifdef UMTX_PROFILING
554248591Sattilio		uc->length++;
555248591Sattilio		if (uc->length > uc->max_length) {
556248591Sattilio			uc->max_length = uc->length;
557248591Sattilio			if (uc->max_length > max_length)
558248591Sattilio				max_length = uc->max_length;
559248591Sattilio		}
560248591Sattilio#endif
561201991Sdavidxu	}
562201991Sdavidxu	uq->uq_spare_queue = NULL;
563201991Sdavidxu
564201991Sdavidxu	TAILQ_INSERT_TAIL(&uh->head, uq, uq_link);
565201991Sdavidxu	uh->length++;
566158718Sdavidxu	uq->uq_flags |= UQF_UMTXQ;
567201991Sdavidxu	uq->uq_cur_queue = uh;
568201991Sdavidxu	return;
569139013Sdavidxu}
570139013Sdavidxu
571139013Sdavidxustatic inline void
572177848Sdavidxuumtxq_remove_queue(struct umtx_q *uq, int q)
573139013Sdavidxu{
574161678Sdavidxu	struct umtxq_chain *uc;
575201991Sdavidxu	struct umtxq_queue *uh;
576161678Sdavidxu
577161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
578161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
579158718Sdavidxu	if (uq->uq_flags & UQF_UMTXQ) {
580201991Sdavidxu		uh = uq->uq_cur_queue;
581201991Sdavidxu		TAILQ_REMOVE(&uh->head, uq, uq_link);
582201991Sdavidxu		uh->length--;
583158718Sdavidxu		uq->uq_flags &= ~UQF_UMTXQ;
584201991Sdavidxu		if (TAILQ_EMPTY(&uh->head)) {
585201991Sdavidxu			KASSERT(uh->length == 0,
586201991Sdavidxu			    ("inconsistent umtxq_queue length"));
587248591Sattilio#ifdef UMTX_PROFILING
588248591Sattilio			uc->length--;
589248591Sattilio#endif
590201991Sdavidxu			LIST_REMOVE(uh, link);
591201991Sdavidxu		} else {
592201991Sdavidxu			uh = LIST_FIRST(&uc->uc_spare_queue);
593201991Sdavidxu			KASSERT(uh != NULL, ("uc_spare_queue is empty"));
594201991Sdavidxu			LIST_REMOVE(uh, link);
595201991Sdavidxu		}
596201991Sdavidxu		uq->uq_spare_queue = uh;
597201991Sdavidxu		uq->uq_cur_queue = NULL;
598139013Sdavidxu	}
599139013Sdavidxu}
600139013Sdavidxu
601161678Sdavidxu/*
602161678Sdavidxu * Check if there are multiple waiters
603161678Sdavidxu */
604139013Sdavidxustatic int
605139013Sdavidxuumtxq_count(struct umtx_key *key)
606139013Sdavidxu{
607161678Sdavidxu	struct umtxq_chain *uc;
608201991Sdavidxu	struct umtxq_queue *uh;
609115765Sjeff
610161678Sdavidxu	uc = umtxq_getchain(key);
611161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
612201991Sdavidxu	uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
613201991Sdavidxu	if (uh != NULL)
614201991Sdavidxu		return (uh->length);
615201991Sdavidxu	return (0);
616115765Sjeff}
617115765Sjeff
618161678Sdavidxu/*
619161678Sdavidxu * Check if there are multiple PI waiters and returns first
620161678Sdavidxu * waiter.
621161678Sdavidxu */
622139257Sdavidxustatic int
623161678Sdavidxuumtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
624161678Sdavidxu{
625161678Sdavidxu	struct umtxq_chain *uc;
626201991Sdavidxu	struct umtxq_queue *uh;
627161678Sdavidxu
628161678Sdavidxu	*first = NULL;
629161678Sdavidxu	uc = umtxq_getchain(key);
630161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
631201991Sdavidxu	uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
632201991Sdavidxu	if (uh != NULL) {
633201991Sdavidxu		*first = TAILQ_FIRST(&uh->head);
634201991Sdavidxu		return (uh->length);
635161678Sdavidxu	}
636201991Sdavidxu	return (0);
637161678Sdavidxu}
638161678Sdavidxu
639251684Skibstatic int
640251684Skibumtxq_check_susp(struct thread *td)
641251684Skib{
642251684Skib	struct proc *p;
643251684Skib	int error;
644251684Skib
645251684Skib	/*
646251684Skib	 * The check for TDF_NEEDSUSPCHK is racy, but it is enough to
647251684Skib	 * eventually break the lockstep loop.
648251684Skib	 */
649251684Skib	if ((td->td_flags & TDF_NEEDSUSPCHK) == 0)
650251684Skib		return (0);
651251684Skib	error = 0;
652251684Skib	p = td->td_proc;
653251684Skib	PROC_LOCK(p);
654251684Skib	if (P_SHOULDSTOP(p) ||
655251684Skib	    ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) {
656251684Skib		if (p->p_flag & P_SINGLE_EXIT)
657251684Skib			error = EINTR;
658251684Skib		else
659251684Skib			error = ERESTART;
660251684Skib	}
661251684Skib	PROC_UNLOCK(p);
662251684Skib	return (error);
663251684Skib}
664251684Skib
665161678Sdavidxu/*
666161678Sdavidxu * Wake up threads waiting on an userland object.
667161678Sdavidxu */
668177848Sdavidxu
669161678Sdavidxustatic int
670177848Sdavidxuumtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
671115765Sjeff{
672161678Sdavidxu	struct umtxq_chain *uc;
673201991Sdavidxu	struct umtxq_queue *uh;
674201991Sdavidxu	struct umtx_q *uq;
675161678Sdavidxu	int ret;
676115765Sjeff
677139257Sdavidxu	ret = 0;
678161678Sdavidxu	uc = umtxq_getchain(key);
679161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
680201991Sdavidxu	uh = umtxq_queue_lookup(key, q);
681201991Sdavidxu	if (uh != NULL) {
682201991Sdavidxu		while ((uq = TAILQ_FIRST(&uh->head)) != NULL) {
683177848Sdavidxu			umtxq_remove_queue(uq, q);
684161678Sdavidxu			wakeup(uq);
685139257Sdavidxu			if (++ret >= n_wake)
686201991Sdavidxu				return (ret);
687139013Sdavidxu		}
688139013Sdavidxu	}
689139257Sdavidxu	return (ret);
690138224Sdavidxu}
691138224Sdavidxu
692177848Sdavidxu
693161678Sdavidxu/*
694161678Sdavidxu * Wake up specified thread.
695161678Sdavidxu */
696161678Sdavidxustatic inline void
697161678Sdavidxuumtxq_signal_thread(struct umtx_q *uq)
698161678Sdavidxu{
699161678Sdavidxu	struct umtxq_chain *uc;
700161678Sdavidxu
701161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
702161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
703161678Sdavidxu	umtxq_remove(uq);
704161678Sdavidxu	wakeup(uq);
705161678Sdavidxu}
706161678Sdavidxu
707233690Sdavidxustatic inline int
708233690Sdavidxutstohz(const struct timespec *tsp)
709233690Sdavidxu{
710233690Sdavidxu	struct timeval tv;
711233690Sdavidxu
712233690Sdavidxu	TIMESPEC_TO_TIMEVAL(&tv, tsp);
713233690Sdavidxu	return tvtohz(&tv);
714233690Sdavidxu}
715233690Sdavidxu
716233690Sdavidxustatic void
717233690Sdavidxuabs_timeout_init(struct abs_timeout *timo, int clockid, int absolute,
718233690Sdavidxu	const struct timespec *timeout)
719233690Sdavidxu{
720233690Sdavidxu
721233690Sdavidxu	timo->clockid = clockid;
722233690Sdavidxu	if (!absolute) {
723233690Sdavidxu		kern_clock_gettime(curthread, clockid, &timo->end);
724233690Sdavidxu		timo->cur = timo->end;
725233690Sdavidxu		timespecadd(&timo->end, timeout);
726233690Sdavidxu	} else {
727233690Sdavidxu		timo->end = *timeout;
728233690Sdavidxu		kern_clock_gettime(curthread, clockid, &timo->cur);
729233690Sdavidxu	}
730233690Sdavidxu}
731233690Sdavidxu
732233690Sdavidxustatic void
733233690Sdavidxuabs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime)
734233690Sdavidxu{
735233690Sdavidxu
736233690Sdavidxu	abs_timeout_init(timo, umtxtime->_clockid,
737233690Sdavidxu		(umtxtime->_flags & UMTX_ABSTIME) != 0,
738233690Sdavidxu		&umtxtime->_timeout);
739233690Sdavidxu}
740233690Sdavidxu
741239202Sdavidxustatic inline void
742233690Sdavidxuabs_timeout_update(struct abs_timeout *timo)
743233690Sdavidxu{
744233690Sdavidxu	kern_clock_gettime(curthread, timo->clockid, &timo->cur);
745233690Sdavidxu}
746233690Sdavidxu
747233690Sdavidxustatic int
748233690Sdavidxuabs_timeout_gethz(struct abs_timeout *timo)
749233690Sdavidxu{
750233690Sdavidxu	struct timespec tts;
751233690Sdavidxu
752239202Sdavidxu	if (timespeccmp(&timo->end, &timo->cur, <=))
753239202Sdavidxu		return (-1);
754233690Sdavidxu	tts = timo->end;
755233690Sdavidxu	timespecsub(&tts, &timo->cur);
756233690Sdavidxu	return (tstohz(&tts));
757233690Sdavidxu}
758233690Sdavidxu
759161678Sdavidxu/*
760161678Sdavidxu * Put thread into sleep state, before sleeping, check if
761161678Sdavidxu * thread was removed from umtx queue.
762161678Sdavidxu */
763138224Sdavidxustatic inline int
764239202Sdavidxuumtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime)
765138224Sdavidxu{
766161678Sdavidxu	struct umtxq_chain *uc;
767239202Sdavidxu	int error, timo;
768161678Sdavidxu
769161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
770161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
771233690Sdavidxu	for (;;) {
772233690Sdavidxu		if (!(uq->uq_flags & UQF_UMTXQ))
773233690Sdavidxu			return (0);
774239202Sdavidxu		if (abstime != NULL) {
775239202Sdavidxu			timo = abs_timeout_gethz(abstime);
776239202Sdavidxu			if (timo < 0)
777239187Sdavidxu				return (ETIMEDOUT);
778239187Sdavidxu		} else
779239202Sdavidxu			timo = 0;
780239202Sdavidxu		error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo);
781239187Sdavidxu		if (error != EWOULDBLOCK) {
782233690Sdavidxu			umtxq_lock(&uq->uq_key);
783233690Sdavidxu			break;
784233690Sdavidxu		}
785239202Sdavidxu		if (abstime != NULL)
786239202Sdavidxu			abs_timeout_update(abstime);
787233690Sdavidxu		umtxq_lock(&uq->uq_key);
788233690Sdavidxu	}
789139751Sdavidxu	return (error);
790138224Sdavidxu}
791138224Sdavidxu
792161678Sdavidxu/*
793161678Sdavidxu * Convert userspace address into unique logical address.
794161678Sdavidxu */
795218969Sjhbint
796161678Sdavidxuumtx_key_get(void *addr, int type, int share, struct umtx_key *key)
797139013Sdavidxu{
798161678Sdavidxu	struct thread *td = curthread;
799139013Sdavidxu	vm_map_t map;
800139013Sdavidxu	vm_map_entry_t entry;
801139013Sdavidxu	vm_pindex_t pindex;
802139013Sdavidxu	vm_prot_t prot;
803139013Sdavidxu	boolean_t wired;
804139013Sdavidxu
805161678Sdavidxu	key->type = type;
806161678Sdavidxu	if (share == THREAD_SHARE) {
807161678Sdavidxu		key->shared = 0;
808161678Sdavidxu		key->info.private.vs = td->td_proc->p_vmspace;
809161678Sdavidxu		key->info.private.addr = (uintptr_t)addr;
810163677Sdavidxu	} else {
811163677Sdavidxu		MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
812161678Sdavidxu		map = &td->td_proc->p_vmspace->vm_map;
813161678Sdavidxu		if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
814161678Sdavidxu		    &entry, &key->info.shared.object, &pindex, &prot,
815161678Sdavidxu		    &wired) != KERN_SUCCESS) {
816161678Sdavidxu			return EFAULT;
817161678Sdavidxu		}
818161678Sdavidxu
819161678Sdavidxu		if ((share == PROCESS_SHARE) ||
820161678Sdavidxu		    (share == AUTO_SHARE &&
821161678Sdavidxu		     VM_INHERIT_SHARE == entry->inheritance)) {
822161678Sdavidxu			key->shared = 1;
823161678Sdavidxu			key->info.shared.offset = entry->offset + entry->start -
824161678Sdavidxu				(vm_offset_t)addr;
825161678Sdavidxu			vm_object_reference(key->info.shared.object);
826161678Sdavidxu		} else {
827161678Sdavidxu			key->shared = 0;
828161678Sdavidxu			key->info.private.vs = td->td_proc->p_vmspace;
829161678Sdavidxu			key->info.private.addr = (uintptr_t)addr;
830161678Sdavidxu		}
831161678Sdavidxu		vm_map_lookup_done(map, entry);
832139013Sdavidxu	}
833139013Sdavidxu
834161678Sdavidxu	umtxq_hash(key);
835139013Sdavidxu	return (0);
836139013Sdavidxu}
837139013Sdavidxu
838161678Sdavidxu/*
839161678Sdavidxu * Release key.
840161678Sdavidxu */
841218969Sjhbvoid
842139013Sdavidxuumtx_key_release(struct umtx_key *key)
843139013Sdavidxu{
844161678Sdavidxu	if (key->shared)
845139013Sdavidxu		vm_object_deallocate(key->info.shared.object);
846139013Sdavidxu}
847139013Sdavidxu
848161678Sdavidxu/*
849161678Sdavidxu * Lock a umtx object.
850161678Sdavidxu */
851139013Sdavidxustatic int
852233690Sdavidxudo_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
853233690Sdavidxu	const struct timespec *timeout)
854112904Sjeff{
855233690Sdavidxu	struct abs_timeout timo;
856143149Sdavidxu	struct umtx_q *uq;
857163449Sdavidxu	u_long owner;
858163449Sdavidxu	u_long old;
859138224Sdavidxu	int error = 0;
860112904Sjeff
861143149Sdavidxu	uq = td->td_umtxq;
862233690Sdavidxu	if (timeout != NULL)
863233690Sdavidxu		abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout);
864161678Sdavidxu
865112904Sjeff	/*
866161678Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
867112904Sjeff	 * can fault on any access.
868112904Sjeff	 */
869112904Sjeff	for (;;) {
870112904Sjeff		/*
871112904Sjeff		 * Try the uncontested case.  This should be done in userland.
872112904Sjeff		 */
873163449Sdavidxu		owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
874112904Sjeff
875138224Sdavidxu		/* The acquire succeeded. */
876138224Sdavidxu		if (owner == UMTX_UNOWNED)
877138224Sdavidxu			return (0);
878138224Sdavidxu
879115765Sjeff		/* The address was invalid. */
880115765Sjeff		if (owner == -1)
881115765Sjeff			return (EFAULT);
882115765Sjeff
883115765Sjeff		/* If no one owns it but it is contested try to acquire it. */
884115765Sjeff		if (owner == UMTX_CONTESTED) {
885163449Sdavidxu			owner = casuword(&umtx->u_owner,
886139013Sdavidxu			    UMTX_CONTESTED, id | UMTX_CONTESTED);
887115765Sjeff
888138224Sdavidxu			if (owner == UMTX_CONTESTED)
889138224Sdavidxu				return (0);
890138224Sdavidxu
891115765Sjeff			/* The address was invalid. */
892115765Sjeff			if (owner == -1)
893115765Sjeff				return (EFAULT);
894115765Sjeff
895251684Skib			error = umtxq_check_susp(td);
896251684Skib			if (error != 0)
897251684Skib				break;
898251684Skib
899115765Sjeff			/* If this failed the lock has changed, restart. */
900115765Sjeff			continue;
901112904Sjeff		}
902112904Sjeff
903138224Sdavidxu		/*
904138224Sdavidxu		 * If we caught a signal, we have retried and now
905138224Sdavidxu		 * exit immediately.
906138224Sdavidxu		 */
907161678Sdavidxu		if (error != 0)
908233690Sdavidxu			break;
909112904Sjeff
910161678Sdavidxu		if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
911161678Sdavidxu			AUTO_SHARE, &uq->uq_key)) != 0)
912161678Sdavidxu			return (error);
913161678Sdavidxu
914161678Sdavidxu		umtxq_lock(&uq->uq_key);
915161678Sdavidxu		umtxq_busy(&uq->uq_key);
916161678Sdavidxu		umtxq_insert(uq);
917161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
918161678Sdavidxu		umtxq_unlock(&uq->uq_key);
919161678Sdavidxu
920112904Sjeff		/*
921112904Sjeff		 * Set the contested bit so that a release in user space
922112904Sjeff		 * knows to use the system call for unlock.  If this fails
923112904Sjeff		 * either some one else has acquired the lock or it has been
924112904Sjeff		 * released.
925112904Sjeff		 */
926163449Sdavidxu		old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
927112904Sjeff
928112904Sjeff		/* The address was invalid. */
929112967Sjake		if (old == -1) {
930143149Sdavidxu			umtxq_lock(&uq->uq_key);
931143149Sdavidxu			umtxq_remove(uq);
932143149Sdavidxu			umtxq_unlock(&uq->uq_key);
933143149Sdavidxu			umtx_key_release(&uq->uq_key);
934115765Sjeff			return (EFAULT);
935112904Sjeff		}
936112904Sjeff
937112904Sjeff		/*
938115765Sjeff		 * We set the contested bit, sleep. Otherwise the lock changed
939117685Smtm		 * and we need to retry or we lost a race to the thread
940117685Smtm		 * unlocking the umtx.
941112904Sjeff		 */
942143149Sdavidxu		umtxq_lock(&uq->uq_key);
943161678Sdavidxu		if (old == owner)
944233690Sdavidxu			error = umtxq_sleep(uq, "umtx", timeout == NULL ? NULL :
945233690Sdavidxu			    &timo);
946143149Sdavidxu		umtxq_remove(uq);
947143149Sdavidxu		umtxq_unlock(&uq->uq_key);
948143149Sdavidxu		umtx_key_release(&uq->uq_key);
949251684Skib
950251684Skib		if (error == 0)
951251684Skib			error = umtxq_check_susp(td);
952112904Sjeff	}
953117743Smtm
954140245Sdavidxu	if (timeout == NULL) {
955162030Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
956162030Sdavidxu		if (error == EINTR)
957162030Sdavidxu			error = ERESTART;
958139013Sdavidxu	} else {
959162030Sdavidxu		/* Timed-locking is not restarted. */
960162030Sdavidxu		if (error == ERESTART)
961162030Sdavidxu			error = EINTR;
962139013Sdavidxu	}
963139013Sdavidxu	return (error);
964139013Sdavidxu}
965139013Sdavidxu
966161678Sdavidxu/*
967161678Sdavidxu * Unlock a umtx object.
968161678Sdavidxu */
969139013Sdavidxustatic int
970163449Sdavidxudo_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
971139013Sdavidxu{
972139013Sdavidxu	struct umtx_key key;
973163449Sdavidxu	u_long owner;
974163449Sdavidxu	u_long old;
975139257Sdavidxu	int error;
976139257Sdavidxu	int count;
977112904Sjeff
978112904Sjeff	/*
979112904Sjeff	 * Make sure we own this mtx.
980112904Sjeff	 */
981163449Sdavidxu	owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
982161678Sdavidxu	if (owner == -1)
983115765Sjeff		return (EFAULT);
984115765Sjeff
985139013Sdavidxu	if ((owner & ~UMTX_CONTESTED) != id)
986115765Sjeff		return (EPERM);
987112904Sjeff
988161678Sdavidxu	/* This should be done in userland */
989161678Sdavidxu	if ((owner & UMTX_CONTESTED) == 0) {
990163449Sdavidxu		old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
991161678Sdavidxu		if (old == -1)
992161678Sdavidxu			return (EFAULT);
993161678Sdavidxu		if (old == owner)
994161678Sdavidxu			return (0);
995161855Sdavidxu		owner = old;
996161678Sdavidxu	}
997161678Sdavidxu
998117685Smtm	/* We should only ever be in here for contested locks */
999161678Sdavidxu	if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
1000161678Sdavidxu		&key)) != 0)
1001139257Sdavidxu		return (error);
1002139257Sdavidxu
1003139257Sdavidxu	umtxq_lock(&key);
1004139257Sdavidxu	umtxq_busy(&key);
1005139257Sdavidxu	count = umtxq_count(&key);
1006139257Sdavidxu	umtxq_unlock(&key);
1007139257Sdavidxu
1008117743Smtm	/*
1009117743Smtm	 * When unlocking the umtx, it must be marked as unowned if
1010117743Smtm	 * there is zero or one thread only waiting for it.
1011117743Smtm	 * Otherwise, it must be marked as contested.
1012117743Smtm	 */
1013163449Sdavidxu	old = casuword(&umtx->u_owner, owner,
1014163449Sdavidxu		count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
1015139257Sdavidxu	umtxq_lock(&key);
1016161678Sdavidxu	umtxq_signal(&key,1);
1017139257Sdavidxu	umtxq_unbusy(&key);
1018139257Sdavidxu	umtxq_unlock(&key);
1019139257Sdavidxu	umtx_key_release(&key);
1020115765Sjeff	if (old == -1)
1021115765Sjeff		return (EFAULT);
1022138224Sdavidxu	if (old != owner)
1023138224Sdavidxu		return (EINVAL);
1024115765Sjeff	return (0);
1025112904Sjeff}
1026139013Sdavidxu
1027205014Snwhitehorn#ifdef COMPAT_FREEBSD32
1028162536Sdavidxu
1029161678Sdavidxu/*
1030162536Sdavidxu * Lock a umtx object.
1031162536Sdavidxu */
1032162536Sdavidxustatic int
1033233690Sdavidxudo_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id,
1034233690Sdavidxu	const struct timespec *timeout)
1035162536Sdavidxu{
1036233690Sdavidxu	struct abs_timeout timo;
1037162536Sdavidxu	struct umtx_q *uq;
1038162536Sdavidxu	uint32_t owner;
1039162536Sdavidxu	uint32_t old;
1040162536Sdavidxu	int error = 0;
1041162536Sdavidxu
1042162536Sdavidxu	uq = td->td_umtxq;
1043162536Sdavidxu
1044233690Sdavidxu	if (timeout != NULL)
1045233690Sdavidxu		abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout);
1046233690Sdavidxu
1047162536Sdavidxu	/*
1048162536Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
1049162536Sdavidxu	 * can fault on any access.
1050162536Sdavidxu	 */
1051162536Sdavidxu	for (;;) {
1052162536Sdavidxu		/*
1053162536Sdavidxu		 * Try the uncontested case.  This should be done in userland.
1054162536Sdavidxu		 */
1055162536Sdavidxu		owner = casuword32(m, UMUTEX_UNOWNED, id);
1056162536Sdavidxu
1057162536Sdavidxu		/* The acquire succeeded. */
1058162536Sdavidxu		if (owner == UMUTEX_UNOWNED)
1059162536Sdavidxu			return (0);
1060162536Sdavidxu
1061162536Sdavidxu		/* The address was invalid. */
1062162536Sdavidxu		if (owner == -1)
1063162536Sdavidxu			return (EFAULT);
1064162536Sdavidxu
1065162536Sdavidxu		/* If no one owns it but it is contested try to acquire it. */
1066162536Sdavidxu		if (owner == UMUTEX_CONTESTED) {
1067162536Sdavidxu			owner = casuword32(m,
1068162536Sdavidxu			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1069162536Sdavidxu			if (owner == UMUTEX_CONTESTED)
1070162536Sdavidxu				return (0);
1071162536Sdavidxu
1072162536Sdavidxu			/* The address was invalid. */
1073162536Sdavidxu			if (owner == -1)
1074162536Sdavidxu				return (EFAULT);
1075162536Sdavidxu
1076251684Skib			error = umtxq_check_susp(td);
1077251684Skib			if (error != 0)
1078251684Skib				break;
1079251684Skib
1080162536Sdavidxu			/* If this failed the lock has changed, restart. */
1081162536Sdavidxu			continue;
1082162536Sdavidxu		}
1083162536Sdavidxu
1084162536Sdavidxu		/*
1085162536Sdavidxu		 * If we caught a signal, we have retried and now
1086162536Sdavidxu		 * exit immediately.
1087162536Sdavidxu		 */
1088162536Sdavidxu		if (error != 0)
1089162536Sdavidxu			return (error);
1090162536Sdavidxu
1091162536Sdavidxu		if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
1092162536Sdavidxu			AUTO_SHARE, &uq->uq_key)) != 0)
1093162536Sdavidxu			return (error);
1094162536Sdavidxu
1095162536Sdavidxu		umtxq_lock(&uq->uq_key);
1096162536Sdavidxu		umtxq_busy(&uq->uq_key);
1097162536Sdavidxu		umtxq_insert(uq);
1098162536Sdavidxu		umtxq_unbusy(&uq->uq_key);
1099162536Sdavidxu		umtxq_unlock(&uq->uq_key);
1100162536Sdavidxu
1101162536Sdavidxu		/*
1102162536Sdavidxu		 * Set the contested bit so that a release in user space
1103162536Sdavidxu		 * knows to use the system call for unlock.  If this fails
1104162536Sdavidxu		 * either some one else has acquired the lock or it has been
1105162536Sdavidxu		 * released.
1106162536Sdavidxu		 */
1107162536Sdavidxu		old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
1108162536Sdavidxu
1109162536Sdavidxu		/* The address was invalid. */
1110162536Sdavidxu		if (old == -1) {
1111162536Sdavidxu			umtxq_lock(&uq->uq_key);
1112162536Sdavidxu			umtxq_remove(uq);
1113162536Sdavidxu			umtxq_unlock(&uq->uq_key);
1114162536Sdavidxu			umtx_key_release(&uq->uq_key);
1115162536Sdavidxu			return (EFAULT);
1116162536Sdavidxu		}
1117162536Sdavidxu
1118162536Sdavidxu		/*
1119162536Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
1120162536Sdavidxu		 * and we need to retry or we lost a race to the thread
1121162536Sdavidxu		 * unlocking the umtx.
1122162536Sdavidxu		 */
1123162536Sdavidxu		umtxq_lock(&uq->uq_key);
1124162536Sdavidxu		if (old == owner)
1125233690Sdavidxu			error = umtxq_sleep(uq, "umtx", timeout == NULL ?
1126233693Sdavidxu			    NULL : &timo);
1127162536Sdavidxu		umtxq_remove(uq);
1128162536Sdavidxu		umtxq_unlock(&uq->uq_key);
1129162536Sdavidxu		umtx_key_release(&uq->uq_key);
1130251684Skib
1131251684Skib		if (error == 0)
1132251684Skib			error = umtxq_check_susp(td);
1133162536Sdavidxu	}
1134162536Sdavidxu
1135162536Sdavidxu	if (timeout == NULL) {
1136162536Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
1137162536Sdavidxu		if (error == EINTR)
1138162536Sdavidxu			error = ERESTART;
1139162536Sdavidxu	} else {
1140162536Sdavidxu		/* Timed-locking is not restarted. */
1141162536Sdavidxu		if (error == ERESTART)
1142162536Sdavidxu			error = EINTR;
1143162536Sdavidxu	}
1144162536Sdavidxu	return (error);
1145162536Sdavidxu}
1146162536Sdavidxu
1147162536Sdavidxu/*
1148162536Sdavidxu * Unlock a umtx object.
1149162536Sdavidxu */
1150162536Sdavidxustatic int
1151162536Sdavidxudo_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
1152162536Sdavidxu{
1153162536Sdavidxu	struct umtx_key key;
1154162536Sdavidxu	uint32_t owner;
1155162536Sdavidxu	uint32_t old;
1156162536Sdavidxu	int error;
1157162536Sdavidxu	int count;
1158162536Sdavidxu
1159162536Sdavidxu	/*
1160162536Sdavidxu	 * Make sure we own this mtx.
1161162536Sdavidxu	 */
1162162536Sdavidxu	owner = fuword32(m);
1163162536Sdavidxu	if (owner == -1)
1164162536Sdavidxu		return (EFAULT);
1165162536Sdavidxu
1166162536Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
1167162536Sdavidxu		return (EPERM);
1168162536Sdavidxu
1169162536Sdavidxu	/* This should be done in userland */
1170162536Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
1171162536Sdavidxu		old = casuword32(m, owner, UMUTEX_UNOWNED);
1172162536Sdavidxu		if (old == -1)
1173162536Sdavidxu			return (EFAULT);
1174162536Sdavidxu		if (old == owner)
1175162536Sdavidxu			return (0);
1176162536Sdavidxu		owner = old;
1177162536Sdavidxu	}
1178162536Sdavidxu
1179162536Sdavidxu	/* We should only ever be in here for contested locks */
1180162536Sdavidxu	if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
1181162536Sdavidxu		&key)) != 0)
1182162536Sdavidxu		return (error);
1183162536Sdavidxu
1184162536Sdavidxu	umtxq_lock(&key);
1185162536Sdavidxu	umtxq_busy(&key);
1186162536Sdavidxu	count = umtxq_count(&key);
1187162536Sdavidxu	umtxq_unlock(&key);
1188162536Sdavidxu
1189162536Sdavidxu	/*
1190162536Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
1191162536Sdavidxu	 * there is zero or one thread only waiting for it.
1192162536Sdavidxu	 * Otherwise, it must be marked as contested.
1193162536Sdavidxu	 */
1194162536Sdavidxu	old = casuword32(m, owner,
1195162536Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1196162536Sdavidxu	umtxq_lock(&key);
1197162536Sdavidxu	umtxq_signal(&key,1);
1198162536Sdavidxu	umtxq_unbusy(&key);
1199162536Sdavidxu	umtxq_unlock(&key);
1200162536Sdavidxu	umtx_key_release(&key);
1201162536Sdavidxu	if (old == -1)
1202162536Sdavidxu		return (EFAULT);
1203162536Sdavidxu	if (old != owner)
1204162536Sdavidxu		return (EINVAL);
1205162536Sdavidxu	return (0);
1206162536Sdavidxu}
1207162536Sdavidxu#endif
1208162536Sdavidxu
1209162536Sdavidxu/*
1210161678Sdavidxu * Fetch and compare value, sleep on the address if value is not changed.
1211161678Sdavidxu */
1212139013Sdavidxustatic int
1213163449Sdavidxudo_wait(struct thread *td, void *addr, u_long id,
1214232144Sdavidxu	struct _umtx_time *timeout, int compat32, int is_private)
1215139013Sdavidxu{
1216233690Sdavidxu	struct abs_timeout timo;
1217143149Sdavidxu	struct umtx_q *uq;
1218163449Sdavidxu	u_long tmp;
1219274648Skib	uint32_t tmp32;
1220140245Sdavidxu	int error = 0;
1221139013Sdavidxu
1222143149Sdavidxu	uq = td->td_umtxq;
1223178646Sdavidxu	if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
1224178646Sdavidxu		is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
1225139013Sdavidxu		return (error);
1226161678Sdavidxu
1227233690Sdavidxu	if (timeout != NULL)
1228233690Sdavidxu		abs_timeout_init2(&timo, timeout);
1229233690Sdavidxu
1230161678Sdavidxu	umtxq_lock(&uq->uq_key);
1231161678Sdavidxu	umtxq_insert(uq);
1232161678Sdavidxu	umtxq_unlock(&uq->uq_key);
1233274648Skib	if (compat32 == 0) {
1234274648Skib		error = fueword(addr, &tmp);
1235274648Skib		if (error != 0)
1236274648Skib			error = EFAULT;
1237274648Skib	} else {
1238274648Skib		error = fueword32(addr, &tmp32);
1239274648Skib		if (error == 0)
1240274648Skib			tmp = tmp32;
1241274648Skib		else
1242274648Skib			error = EFAULT;
1243274648Skib	}
1244233642Sdavidxu	umtxq_lock(&uq->uq_key);
1245274648Skib	if (error == 0) {
1246274648Skib		if (tmp == id)
1247274648Skib			error = umtxq_sleep(uq, "uwait", timeout == NULL ?
1248274648Skib			    NULL : &timo);
1249274648Skib		if ((uq->uq_flags & UQF_UMTXQ) == 0)
1250274648Skib			error = 0;
1251274648Skib		else
1252274648Skib			umtxq_remove(uq);
1253274648Skib	} else if ((uq->uq_flags & UQF_UMTXQ) != 0) {
1254143149Sdavidxu		umtxq_remove(uq);
1255274648Skib	}
1256233642Sdavidxu	umtxq_unlock(&uq->uq_key);
1257143149Sdavidxu	umtx_key_release(&uq->uq_key);
1258139257Sdavidxu	if (error == ERESTART)
1259139257Sdavidxu		error = EINTR;
1260139013Sdavidxu	return (error);
1261139013Sdavidxu}
1262139013Sdavidxu
1263161678Sdavidxu/*
1264161678Sdavidxu * Wake up threads sleeping on the specified address.
1265161678Sdavidxu */
1266151692Sdavidxuint
1267178646Sdavidxukern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
1268139013Sdavidxu{
1269139013Sdavidxu	struct umtx_key key;
1270139257Sdavidxu	int ret;
1271139013Sdavidxu
1272178646Sdavidxu	if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
1273178646Sdavidxu		is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
1274139257Sdavidxu		return (ret);
1275139258Sdavidxu	umtxq_lock(&key);
1276288494Svangyzen	umtxq_signal(&key, n_wake);
1277139258Sdavidxu	umtxq_unlock(&key);
1278139257Sdavidxu	umtx_key_release(&key);
1279139013Sdavidxu	return (0);
1280139013Sdavidxu}
1281139013Sdavidxu
1282161678Sdavidxu/*
1283161678Sdavidxu * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1284161678Sdavidxu */
1285161678Sdavidxustatic int
1286233690Sdavidxudo_lock_normal(struct thread *td, struct umutex *m, uint32_t flags,
1287233690Sdavidxu	struct _umtx_time *timeout, int mode)
1288161678Sdavidxu{
1289233690Sdavidxu	struct abs_timeout timo;
1290161678Sdavidxu	struct umtx_q *uq;
1291161678Sdavidxu	uint32_t owner, old, id;
1292274648Skib	int error, rv;
1293161678Sdavidxu
1294161678Sdavidxu	id = td->td_tid;
1295161678Sdavidxu	uq = td->td_umtxq;
1296274648Skib	error = 0;
1297233690Sdavidxu	if (timeout != NULL)
1298233690Sdavidxu		abs_timeout_init2(&timo, timeout);
1299233690Sdavidxu
1300161678Sdavidxu	/*
1301161678Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
1302161678Sdavidxu	 * can fault on any access.
1303161678Sdavidxu	 */
1304161678Sdavidxu	for (;;) {
1305274648Skib		rv = fueword32(&m->m_owner, &owner);
1306274648Skib		if (rv == -1)
1307274648Skib			return (EFAULT);
1308179970Sdavidxu		if (mode == _UMUTEX_WAIT) {
1309179970Sdavidxu			if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED)
1310179970Sdavidxu				return (0);
1311179970Sdavidxu		} else {
1312179970Sdavidxu			/*
1313179970Sdavidxu			 * Try the uncontested case.  This should be done in userland.
1314179970Sdavidxu			 */
1315274648Skib			rv = casueword32(&m->m_owner, UMUTEX_UNOWNED,
1316274648Skib			    &owner, id);
1317274648Skib			/* The address was invalid. */
1318274648Skib			if (rv == -1)
1319274648Skib				return (EFAULT);
1320161678Sdavidxu
1321179970Sdavidxu			/* The acquire succeeded. */
1322179970Sdavidxu			if (owner == UMUTEX_UNOWNED)
1323161678Sdavidxu				return (0);
1324161678Sdavidxu
1325179970Sdavidxu			/* If no one owns it but it is contested try to acquire it. */
1326179970Sdavidxu			if (owner == UMUTEX_CONTESTED) {
1327274648Skib				rv = casueword32(&m->m_owner,
1328274648Skib				    UMUTEX_CONTESTED, &owner,
1329274648Skib				    id | UMUTEX_CONTESTED);
1330274648Skib				/* The address was invalid. */
1331274648Skib				if (rv == -1)
1332274648Skib					return (EFAULT);
1333179970Sdavidxu
1334179970Sdavidxu				if (owner == UMUTEX_CONTESTED)
1335179970Sdavidxu					return (0);
1336179970Sdavidxu
1337274648Skib				rv = umtxq_check_susp(td);
1338274648Skib				if (rv != 0)
1339274648Skib					return (rv);
1340179970Sdavidxu
1341179970Sdavidxu				/* If this failed the lock has changed, restart. */
1342179970Sdavidxu				continue;
1343179970Sdavidxu			}
1344161678Sdavidxu		}
1345161678Sdavidxu
1346161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1347161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id)
1348161678Sdavidxu			return (EDEADLK);
1349161678Sdavidxu
1350179970Sdavidxu		if (mode == _UMUTEX_TRY)
1351161678Sdavidxu			return (EBUSY);
1352161678Sdavidxu
1353161678Sdavidxu		/*
1354161678Sdavidxu		 * If we caught a signal, we have retried and now
1355161678Sdavidxu		 * exit immediately.
1356161678Sdavidxu		 */
1357233691Sdavidxu		if (error != 0)
1358161678Sdavidxu			return (error);
1359161678Sdavidxu
1360161678Sdavidxu		if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1361161678Sdavidxu		    GET_SHARE(flags), &uq->uq_key)) != 0)
1362161678Sdavidxu			return (error);
1363161678Sdavidxu
1364161678Sdavidxu		umtxq_lock(&uq->uq_key);
1365161678Sdavidxu		umtxq_busy(&uq->uq_key);
1366161678Sdavidxu		umtxq_insert(uq);
1367161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1368161678Sdavidxu
1369161678Sdavidxu		/*
1370161678Sdavidxu		 * Set the contested bit so that a release in user space
1371161678Sdavidxu		 * knows to use the system call for unlock.  If this fails
1372161678Sdavidxu		 * either some one else has acquired the lock or it has been
1373161678Sdavidxu		 * released.
1374161678Sdavidxu		 */
1375274648Skib		rv = casueword32(&m->m_owner, owner, &old,
1376274648Skib		    owner | UMUTEX_CONTESTED);
1377161678Sdavidxu
1378161678Sdavidxu		/* The address was invalid. */
1379274648Skib		if (rv == -1) {
1380161678Sdavidxu			umtxq_lock(&uq->uq_key);
1381161678Sdavidxu			umtxq_remove(uq);
1382179970Sdavidxu			umtxq_unbusy(&uq->uq_key);
1383161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1384161678Sdavidxu			umtx_key_release(&uq->uq_key);
1385161678Sdavidxu			return (EFAULT);
1386161678Sdavidxu		}
1387161678Sdavidxu
1388161678Sdavidxu		/*
1389161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
1390161678Sdavidxu		 * and we need to retry or we lost a race to the thread
1391161678Sdavidxu		 * unlocking the umtx.
1392161678Sdavidxu		 */
1393161678Sdavidxu		umtxq_lock(&uq->uq_key);
1394179970Sdavidxu		umtxq_unbusy(&uq->uq_key);
1395161678Sdavidxu		if (old == owner)
1396233690Sdavidxu			error = umtxq_sleep(uq, "umtxn", timeout == NULL ?
1397233690Sdavidxu			    NULL : &timo);
1398161678Sdavidxu		umtxq_remove(uq);
1399161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1400161678Sdavidxu		umtx_key_release(&uq->uq_key);
1401251684Skib
1402251684Skib		if (error == 0)
1403251684Skib			error = umtxq_check_susp(td);
1404161678Sdavidxu	}
1405161678Sdavidxu
1406161678Sdavidxu	return (0);
1407161678Sdavidxu}
1408161678Sdavidxu
1409161678Sdavidxu/*
1410161678Sdavidxu * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1411161678Sdavidxu */
1412161678Sdavidxustatic int
1413161678Sdavidxudo_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
1414161678Sdavidxu{
1415161678Sdavidxu	struct umtx_key key;
1416161678Sdavidxu	uint32_t owner, old, id;
1417161678Sdavidxu	int error;
1418161678Sdavidxu	int count;
1419161678Sdavidxu
1420161678Sdavidxu	id = td->td_tid;
1421161678Sdavidxu	/*
1422161678Sdavidxu	 * Make sure we own this mtx.
1423161678Sdavidxu	 */
1424274648Skib	error = fueword32(&m->m_owner, &owner);
1425274648Skib	if (error == -1)
1426161678Sdavidxu		return (EFAULT);
1427161678Sdavidxu
1428161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
1429161678Sdavidxu		return (EPERM);
1430161678Sdavidxu
1431161678Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
1432274648Skib		error = casueword32(&m->m_owner, owner, &old, UMUTEX_UNOWNED);
1433274648Skib		if (error == -1)
1434161678Sdavidxu			return (EFAULT);
1435161678Sdavidxu		if (old == owner)
1436161678Sdavidxu			return (0);
1437161855Sdavidxu		owner = old;
1438161678Sdavidxu	}
1439161678Sdavidxu
1440161678Sdavidxu	/* We should only ever be in here for contested locks */
1441161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1442161678Sdavidxu	    &key)) != 0)
1443161678Sdavidxu		return (error);
1444161678Sdavidxu
1445161678Sdavidxu	umtxq_lock(&key);
1446161678Sdavidxu	umtxq_busy(&key);
1447161678Sdavidxu	count = umtxq_count(&key);
1448161678Sdavidxu	umtxq_unlock(&key);
1449161678Sdavidxu
1450161678Sdavidxu	/*
1451161678Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
1452161678Sdavidxu	 * there is zero or one thread only waiting for it.
1453161678Sdavidxu	 * Otherwise, it must be marked as contested.
1454161678Sdavidxu	 */
1455274648Skib	error = casueword32(&m->m_owner, owner, &old,
1456274648Skib	    count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1457161678Sdavidxu	umtxq_lock(&key);
1458161678Sdavidxu	umtxq_signal(&key,1);
1459161678Sdavidxu	umtxq_unbusy(&key);
1460161678Sdavidxu	umtxq_unlock(&key);
1461161678Sdavidxu	umtx_key_release(&key);
1462274648Skib	if (error == -1)
1463161678Sdavidxu		return (EFAULT);
1464161678Sdavidxu	if (old != owner)
1465161678Sdavidxu		return (EINVAL);
1466161678Sdavidxu	return (0);
1467161678Sdavidxu}
1468161678Sdavidxu
1469179970Sdavidxu/*
1470179970Sdavidxu * Check if the mutex is available and wake up a waiter,
1471179970Sdavidxu * only for simple mutex.
1472179970Sdavidxu */
1473179970Sdavidxustatic int
1474179970Sdavidxudo_wake_umutex(struct thread *td, struct umutex *m)
1475179970Sdavidxu{
1476179970Sdavidxu	struct umtx_key key;
1477179970Sdavidxu	uint32_t owner;
1478179970Sdavidxu	uint32_t flags;
1479179970Sdavidxu	int error;
1480179970Sdavidxu	int count;
1481179970Sdavidxu
1482274648Skib	error = fueword32(&m->m_owner, &owner);
1483274648Skib	if (error == -1)
1484179970Sdavidxu		return (EFAULT);
1485179970Sdavidxu
1486179970Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != 0)
1487179970Sdavidxu		return (0);
1488179970Sdavidxu
1489274648Skib	error = fueword32(&m->m_flags, &flags);
1490274648Skib	if (error == -1)
1491274648Skib		return (EFAULT);
1492179970Sdavidxu
1493179970Sdavidxu	/* We should only ever be in here for contested locks */
1494179970Sdavidxu	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1495179970Sdavidxu	    &key)) != 0)
1496179970Sdavidxu		return (error);
1497179970Sdavidxu
1498179970Sdavidxu	umtxq_lock(&key);
1499179970Sdavidxu	umtxq_busy(&key);
1500179970Sdavidxu	count = umtxq_count(&key);
1501179970Sdavidxu	umtxq_unlock(&key);
1502179970Sdavidxu
1503274648Skib	if (count <= 1) {
1504274648Skib		error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner,
1505274648Skib		    UMUTEX_UNOWNED);
1506274648Skib		if (error == -1)
1507274648Skib			error = EFAULT;
1508274648Skib	}
1509179970Sdavidxu
1510179970Sdavidxu	umtxq_lock(&key);
1511274648Skib	if (error == 0 && count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
1512179970Sdavidxu		umtxq_signal(&key, 1);
1513179970Sdavidxu	umtxq_unbusy(&key);
1514179970Sdavidxu	umtxq_unlock(&key);
1515179970Sdavidxu	umtx_key_release(&key);
1516274648Skib	return (error);
1517179970Sdavidxu}
1518179970Sdavidxu
1519233912Sdavidxu/*
1520233912Sdavidxu * Check if the mutex has waiters and tries to fix contention bit.
1521233912Sdavidxu */
1522233912Sdavidxustatic int
1523233912Sdavidxudo_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags)
1524233912Sdavidxu{
1525233912Sdavidxu	struct umtx_key key;
1526233912Sdavidxu	uint32_t owner, old;
1527233912Sdavidxu	int type;
1528233912Sdavidxu	int error;
1529233912Sdavidxu	int count;
1530233912Sdavidxu
1531233912Sdavidxu	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
1532233912Sdavidxu	case 0:
1533233912Sdavidxu		type = TYPE_NORMAL_UMUTEX;
1534233912Sdavidxu		break;
1535233912Sdavidxu	case UMUTEX_PRIO_INHERIT:
1536233912Sdavidxu		type = TYPE_PI_UMUTEX;
1537233912Sdavidxu		break;
1538233912Sdavidxu	case UMUTEX_PRIO_PROTECT:
1539233912Sdavidxu		type = TYPE_PP_UMUTEX;
1540233912Sdavidxu		break;
1541233912Sdavidxu	default:
1542233912Sdavidxu		return (EINVAL);
1543233912Sdavidxu	}
1544233912Sdavidxu	if ((error = umtx_key_get(m, type, GET_SHARE(flags),
1545233912Sdavidxu	    &key)) != 0)
1546233912Sdavidxu		return (error);
1547233912Sdavidxu
1548233912Sdavidxu	owner = 0;
1549233912Sdavidxu	umtxq_lock(&key);
1550233912Sdavidxu	umtxq_busy(&key);
1551233912Sdavidxu	count = umtxq_count(&key);
1552233912Sdavidxu	umtxq_unlock(&key);
1553233912Sdavidxu	/*
1554233912Sdavidxu	 * Only repair contention bit if there is a waiter, this means the mutex
1555233912Sdavidxu	 * is still being referenced by userland code, otherwise don't update
1556233912Sdavidxu	 * any memory.
1557233912Sdavidxu	 */
1558233912Sdavidxu	if (count > 1) {
1559274648Skib		error = fueword32(&m->m_owner, &owner);
1560274648Skib		if (error == -1)
1561274648Skib			error = EFAULT;
1562274648Skib		while (error == 0 && (owner & UMUTEX_CONTESTED) == 0) {
1563274648Skib			error = casueword32(&m->m_owner, owner, &old,
1564274648Skib			    owner | UMUTEX_CONTESTED);
1565274648Skib			if (error == -1) {
1566274648Skib				error = EFAULT;
1567274648Skib				break;
1568274648Skib			}
1569233912Sdavidxu			if (old == owner)
1570233912Sdavidxu				break;
1571233912Sdavidxu			owner = old;
1572251684Skib			error = umtxq_check_susp(td);
1573251684Skib			if (error != 0)
1574251684Skib				break;
1575233912Sdavidxu		}
1576233912Sdavidxu	} else if (count == 1) {
1577274648Skib		error = fueword32(&m->m_owner, &owner);
1578274648Skib		if (error == -1)
1579274648Skib			error = EFAULT;
1580274648Skib		while (error == 0 && (owner & ~UMUTEX_CONTESTED) != 0 &&
1581233912Sdavidxu		       (owner & UMUTEX_CONTESTED) == 0) {
1582274648Skib			error = casueword32(&m->m_owner, owner, &old,
1583274648Skib			    owner | UMUTEX_CONTESTED);
1584274648Skib			if (error == -1) {
1585274648Skib				error = EFAULT;
1586274648Skib				break;
1587274648Skib			}
1588233912Sdavidxu			if (old == owner)
1589233912Sdavidxu				break;
1590233912Sdavidxu			owner = old;
1591251684Skib			error = umtxq_check_susp(td);
1592251684Skib			if (error != 0)
1593251684Skib				break;
1594233912Sdavidxu		}
1595233912Sdavidxu	}
1596233912Sdavidxu	umtxq_lock(&key);
1597274648Skib	if (error == EFAULT) {
1598233912Sdavidxu		umtxq_signal(&key, INT_MAX);
1599274648Skib	} else if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
1600233912Sdavidxu		umtxq_signal(&key, 1);
1601233912Sdavidxu	umtxq_unbusy(&key);
1602233912Sdavidxu	umtxq_unlock(&key);
1603233912Sdavidxu	umtx_key_release(&key);
1604233912Sdavidxu	return (error);
1605233912Sdavidxu}
1606233912Sdavidxu
1607161678Sdavidxustatic inline struct umtx_pi *
1608163697Sdavidxuumtx_pi_alloc(int flags)
1609161678Sdavidxu{
1610161678Sdavidxu	struct umtx_pi *pi;
1611161678Sdavidxu
1612163697Sdavidxu	pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1613161678Sdavidxu	TAILQ_INIT(&pi->pi_blocked);
1614161678Sdavidxu	atomic_add_int(&umtx_pi_allocated, 1);
1615161678Sdavidxu	return (pi);
1616161678Sdavidxu}
1617161678Sdavidxu
1618161678Sdavidxustatic inline void
1619161678Sdavidxuumtx_pi_free(struct umtx_pi *pi)
1620161678Sdavidxu{
1621161678Sdavidxu	uma_zfree(umtx_pi_zone, pi);
1622161678Sdavidxu	atomic_add_int(&umtx_pi_allocated, -1);
1623161678Sdavidxu}
1624161678Sdavidxu
1625161678Sdavidxu/*
1626161678Sdavidxu * Adjust the thread's position on a pi_state after its priority has been
1627161678Sdavidxu * changed.
1628161678Sdavidxu */
1629161678Sdavidxustatic int
1630161678Sdavidxuumtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1631161678Sdavidxu{
1632161678Sdavidxu	struct umtx_q *uq, *uq1, *uq2;
1633161678Sdavidxu	struct thread *td1;
1634161678Sdavidxu
1635170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1636161678Sdavidxu	if (pi == NULL)
1637161678Sdavidxu		return (0);
1638161678Sdavidxu
1639161678Sdavidxu	uq = td->td_umtxq;
1640161678Sdavidxu
1641161678Sdavidxu	/*
1642161678Sdavidxu	 * Check if the thread needs to be moved on the blocked chain.
1643161678Sdavidxu	 * It needs to be moved if either its priority is lower than
1644161678Sdavidxu	 * the previous thread or higher than the next thread.
1645161678Sdavidxu	 */
1646161678Sdavidxu	uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1647161678Sdavidxu	uq2 = TAILQ_NEXT(uq, uq_lockq);
1648161678Sdavidxu	if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1649161678Sdavidxu	    (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1650161678Sdavidxu		/*
1651161678Sdavidxu		 * Remove thread from blocked chain and determine where
1652161678Sdavidxu		 * it should be moved to.
1653161678Sdavidxu		 */
1654161678Sdavidxu		TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1655161678Sdavidxu		TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1656161678Sdavidxu			td1 = uq1->uq_thread;
1657161678Sdavidxu			MPASS(td1->td_proc->p_magic == P_MAGIC);
1658161678Sdavidxu			if (UPRI(td1) > UPRI(td))
1659161678Sdavidxu				break;
1660161678Sdavidxu		}
1661161678Sdavidxu
1662161678Sdavidxu		if (uq1 == NULL)
1663161678Sdavidxu			TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1664161678Sdavidxu		else
1665161678Sdavidxu			TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1666161678Sdavidxu	}
1667161678Sdavidxu	return (1);
1668161678Sdavidxu}
1669161678Sdavidxu
1670278345Skibstatic struct umtx_pi *
1671278345Skibumtx_pi_next(struct umtx_pi *pi)
1672278345Skib{
1673278345Skib	struct umtx_q *uq_owner;
1674278345Skib
1675278345Skib	if (pi->pi_owner == NULL)
1676278345Skib		return (NULL);
1677278345Skib	uq_owner = pi->pi_owner->td_umtxq;
1678278345Skib	if (uq_owner == NULL)
1679278345Skib		return (NULL);
1680278345Skib	return (uq_owner->uq_pi_blocked);
1681278345Skib}
1682278345Skib
1683161678Sdavidxu/*
1684278345Skib * Floyd's Cycle-Finding Algorithm.
1685278345Skib */
1686278345Skibstatic bool
1687278345Skibumtx_pi_check_loop(struct umtx_pi *pi)
1688278345Skib{
1689278345Skib	struct umtx_pi *pi1;	/* fast iterator */
1690278345Skib
1691278345Skib	mtx_assert(&umtx_lock, MA_OWNED);
1692278345Skib	if (pi == NULL)
1693278345Skib		return (false);
1694278345Skib	pi1 = pi;
1695278345Skib	for (;;) {
1696278345Skib		pi = umtx_pi_next(pi);
1697278345Skib		if (pi == NULL)
1698278345Skib			break;
1699278345Skib		pi1 = umtx_pi_next(pi1);
1700278345Skib		if (pi1 == NULL)
1701278345Skib			break;
1702278345Skib		pi1 = umtx_pi_next(pi1);
1703278345Skib		if (pi1 == NULL)
1704278345Skib			break;
1705278345Skib		if (pi == pi1)
1706278345Skib			return (true);
1707278345Skib	}
1708278345Skib	return (false);
1709278345Skib}
1710278345Skib
1711278345Skib/*
1712161678Sdavidxu * Propagate priority when a thread is blocked on POSIX
1713161678Sdavidxu * PI mutex.
1714161678Sdavidxu */
1715161678Sdavidxustatic void
1716161678Sdavidxuumtx_propagate_priority(struct thread *td)
1717161678Sdavidxu{
1718161678Sdavidxu	struct umtx_q *uq;
1719161678Sdavidxu	struct umtx_pi *pi;
1720161678Sdavidxu	int pri;
1721161678Sdavidxu
1722170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1723161678Sdavidxu	pri = UPRI(td);
1724161678Sdavidxu	uq = td->td_umtxq;
1725161678Sdavidxu	pi = uq->uq_pi_blocked;
1726161678Sdavidxu	if (pi == NULL)
1727161678Sdavidxu		return;
1728278345Skib	if (umtx_pi_check_loop(pi))
1729278345Skib		return;
1730161678Sdavidxu
1731161678Sdavidxu	for (;;) {
1732161678Sdavidxu		td = pi->pi_owner;
1733216313Sdavidxu		if (td == NULL || td == curthread)
1734161678Sdavidxu			return;
1735161678Sdavidxu
1736161678Sdavidxu		MPASS(td->td_proc != NULL);
1737161678Sdavidxu		MPASS(td->td_proc->p_magic == P_MAGIC);
1738161678Sdavidxu
1739170300Sjeff		thread_lock(td);
1740216313Sdavidxu		if (td->td_lend_user_pri > pri)
1741216313Sdavidxu			sched_lend_user_prio(td, pri);
1742216313Sdavidxu		else {
1743216313Sdavidxu			thread_unlock(td);
1744216313Sdavidxu			break;
1745216313Sdavidxu		}
1746170300Sjeff		thread_unlock(td);
1747161678Sdavidxu
1748161678Sdavidxu		/*
1749161678Sdavidxu		 * Pick up the lock that td is blocked on.
1750161678Sdavidxu		 */
1751161678Sdavidxu		uq = td->td_umtxq;
1752161678Sdavidxu		pi = uq->uq_pi_blocked;
1753216791Sdavidxu		if (pi == NULL)
1754216791Sdavidxu			break;
1755161678Sdavidxu		/* Resort td on the list if needed. */
1756216791Sdavidxu		umtx_pi_adjust_thread(pi, td);
1757161678Sdavidxu	}
1758161678Sdavidxu}
1759161678Sdavidxu
1760161678Sdavidxu/*
1761161678Sdavidxu * Unpropagate priority for a PI mutex when a thread blocked on
1762161678Sdavidxu * it is interrupted by signal or resumed by others.
1763161678Sdavidxu */
1764161678Sdavidxustatic void
1765216791Sdavidxuumtx_repropagate_priority(struct umtx_pi *pi)
1766161678Sdavidxu{
1767161678Sdavidxu	struct umtx_q *uq, *uq_owner;
1768161678Sdavidxu	struct umtx_pi *pi2;
1769216791Sdavidxu	int pri;
1770161678Sdavidxu
1771170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1772161678Sdavidxu
1773278345Skib	if (umtx_pi_check_loop(pi))
1774278345Skib		return;
1775161678Sdavidxu	while (pi != NULL && pi->pi_owner != NULL) {
1776161678Sdavidxu		pri = PRI_MAX;
1777161678Sdavidxu		uq_owner = pi->pi_owner->td_umtxq;
1778161678Sdavidxu
1779161678Sdavidxu		TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1780161678Sdavidxu			uq = TAILQ_FIRST(&pi2->pi_blocked);
1781161678Sdavidxu			if (uq != NULL) {
1782161678Sdavidxu				if (pri > UPRI(uq->uq_thread))
1783161678Sdavidxu					pri = UPRI(uq->uq_thread);
1784161678Sdavidxu			}
1785161678Sdavidxu		}
1786161678Sdavidxu
1787161678Sdavidxu		if (pri > uq_owner->uq_inherited_pri)
1788161678Sdavidxu			pri = uq_owner->uq_inherited_pri;
1789170300Sjeff		thread_lock(pi->pi_owner);
1790216791Sdavidxu		sched_lend_user_prio(pi->pi_owner, pri);
1791170300Sjeff		thread_unlock(pi->pi_owner);
1792216791Sdavidxu		if ((pi = uq_owner->uq_pi_blocked) != NULL)
1793216791Sdavidxu			umtx_pi_adjust_thread(pi, uq_owner->uq_thread);
1794161678Sdavidxu	}
1795161678Sdavidxu}
1796161678Sdavidxu
1797161678Sdavidxu/*
1798161678Sdavidxu * Insert a PI mutex into owned list.
1799161678Sdavidxu */
1800161678Sdavidxustatic void
1801161678Sdavidxuumtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1802161678Sdavidxu{
1803161678Sdavidxu	struct umtx_q *uq_owner;
1804161678Sdavidxu
1805161678Sdavidxu	uq_owner = owner->td_umtxq;
1806170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1807161678Sdavidxu	if (pi->pi_owner != NULL)
1808288494Svangyzen		panic("pi_owner != NULL");
1809161678Sdavidxu	pi->pi_owner = owner;
1810161678Sdavidxu	TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1811161678Sdavidxu}
1812161678Sdavidxu
1813279583Skib
1814161678Sdavidxu/*
1815279583Skib * Disown a PI mutex, and remove it from the owned list.
1816279583Skib */
1817279583Skibstatic void
1818279583Skibumtx_pi_disown(struct umtx_pi *pi)
1819279583Skib{
1820279583Skib
1821279583Skib	mtx_assert(&umtx_lock, MA_OWNED);
1822279583Skib	TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link);
1823279583Skib	pi->pi_owner = NULL;
1824279583Skib}
1825279583Skib
1826279583Skib/*
1827161678Sdavidxu * Claim ownership of a PI mutex.
1828161678Sdavidxu */
1829161678Sdavidxustatic int
1830161678Sdavidxuumtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1831161678Sdavidxu{
1832288494Svangyzen	struct umtx_q *uq;
1833161678Sdavidxu
1834280309Skib	mtx_lock(&umtx_lock);
1835161678Sdavidxu	if (pi->pi_owner == owner) {
1836280309Skib		mtx_unlock(&umtx_lock);
1837161678Sdavidxu		return (0);
1838161678Sdavidxu	}
1839161678Sdavidxu
1840161678Sdavidxu	if (pi->pi_owner != NULL) {
1841161678Sdavidxu		/*
1842161678Sdavidxu		 * userland may have already messed the mutex, sigh.
1843161678Sdavidxu		 */
1844280309Skib		mtx_unlock(&umtx_lock);
1845161678Sdavidxu		return (EPERM);
1846161678Sdavidxu	}
1847161678Sdavidxu	umtx_pi_setowner(pi, owner);
1848161678Sdavidxu	uq = TAILQ_FIRST(&pi->pi_blocked);
1849161678Sdavidxu	if (uq != NULL) {
1850161678Sdavidxu		int pri;
1851161678Sdavidxu
1852161678Sdavidxu		pri = UPRI(uq->uq_thread);
1853170300Sjeff		thread_lock(owner);
1854161678Sdavidxu		if (pri < UPRI(owner))
1855161678Sdavidxu			sched_lend_user_prio(owner, pri);
1856170300Sjeff		thread_unlock(owner);
1857161678Sdavidxu	}
1858280309Skib	mtx_unlock(&umtx_lock);
1859161678Sdavidxu	return (0);
1860161678Sdavidxu}
1861161678Sdavidxu
1862161678Sdavidxu/*
1863174701Sdavidxu * Adjust a thread's order position in its blocked PI mutex,
1864174701Sdavidxu * this may result new priority propagating process.
1865174701Sdavidxu */
1866174701Sdavidxuvoid
1867174701Sdavidxuumtx_pi_adjust(struct thread *td, u_char oldpri)
1868174701Sdavidxu{
1869174707Sdavidxu	struct umtx_q *uq;
1870174707Sdavidxu	struct umtx_pi *pi;
1871174707Sdavidxu
1872174707Sdavidxu	uq = td->td_umtxq;
1873280309Skib	mtx_lock(&umtx_lock);
1874174707Sdavidxu	/*
1875174707Sdavidxu	 * Pick up the lock that td is blocked on.
1876174707Sdavidxu	 */
1877174707Sdavidxu	pi = uq->uq_pi_blocked;
1878216791Sdavidxu	if (pi != NULL) {
1879216791Sdavidxu		umtx_pi_adjust_thread(pi, td);
1880216791Sdavidxu		umtx_repropagate_priority(pi);
1881216791Sdavidxu	}
1882280309Skib	mtx_unlock(&umtx_lock);
1883174701Sdavidxu}
1884174701Sdavidxu
1885174701Sdavidxu/*
1886161678Sdavidxu * Sleep on a PI mutex.
1887161678Sdavidxu */
1888161678Sdavidxustatic int
1889161678Sdavidxuumtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1890233690Sdavidxu	uint32_t owner, const char *wmesg, struct abs_timeout *timo)
1891161678Sdavidxu{
1892161678Sdavidxu	struct umtxq_chain *uc;
1893161678Sdavidxu	struct thread *td, *td1;
1894161678Sdavidxu	struct umtx_q *uq1;
1895161678Sdavidxu	int pri;
1896161678Sdavidxu	int error = 0;
1897161678Sdavidxu
1898161678Sdavidxu	td = uq->uq_thread;
1899161678Sdavidxu	KASSERT(td == curthread, ("inconsistent uq_thread"));
1900161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
1901161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1902274748Skib	KASSERT(uc->uc_busy != 0, ("umtx chain is not busy"));
1903161678Sdavidxu	umtxq_insert(uq);
1904280309Skib	mtx_lock(&umtx_lock);
1905161678Sdavidxu	if (pi->pi_owner == NULL) {
1906280309Skib		mtx_unlock(&umtx_lock);
1907213642Sdavidxu		/* XXX Only look up thread in current process. */
1908213642Sdavidxu		td1 = tdfind(owner, curproc->p_pid);
1909280309Skib		mtx_lock(&umtx_lock);
1910215336Sdavidxu		if (td1 != NULL) {
1911215336Sdavidxu			if (pi->pi_owner == NULL)
1912215336Sdavidxu				umtx_pi_setowner(pi, td1);
1913215336Sdavidxu			PROC_UNLOCK(td1->td_proc);
1914161678Sdavidxu		}
1915161678Sdavidxu	}
1916161678Sdavidxu
1917161678Sdavidxu	TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1918161678Sdavidxu		pri = UPRI(uq1->uq_thread);
1919161678Sdavidxu		if (pri > UPRI(td))
1920161678Sdavidxu			break;
1921161678Sdavidxu	}
1922161678Sdavidxu
1923161678Sdavidxu	if (uq1 != NULL)
1924161678Sdavidxu		TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1925161678Sdavidxu	else
1926161678Sdavidxu		TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1927161678Sdavidxu
1928161678Sdavidxu	uq->uq_pi_blocked = pi;
1929174701Sdavidxu	thread_lock(td);
1930161678Sdavidxu	td->td_flags |= TDF_UPIBLOCKED;
1931174701Sdavidxu	thread_unlock(td);
1932161678Sdavidxu	umtx_propagate_priority(td);
1933280309Skib	mtx_unlock(&umtx_lock);
1934189756Sdavidxu	umtxq_unbusy(&uq->uq_key);
1935161678Sdavidxu
1936233690Sdavidxu	error = umtxq_sleep(uq, wmesg, timo);
1937233690Sdavidxu	umtxq_remove(uq);
1938233690Sdavidxu
1939280309Skib	mtx_lock(&umtx_lock);
1940161678Sdavidxu	uq->uq_pi_blocked = NULL;
1941174701Sdavidxu	thread_lock(td);
1942161678Sdavidxu	td->td_flags &= ~TDF_UPIBLOCKED;
1943174701Sdavidxu	thread_unlock(td);
1944161678Sdavidxu	TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1945216791Sdavidxu	umtx_repropagate_priority(pi);
1946280309Skib	mtx_unlock(&umtx_lock);
1947189756Sdavidxu	umtxq_unlock(&uq->uq_key);
1948161678Sdavidxu
1949161678Sdavidxu	return (error);
1950161678Sdavidxu}
1951161678Sdavidxu
1952161678Sdavidxu/*
1953161678Sdavidxu * Add reference count for a PI mutex.
1954161678Sdavidxu */
1955161678Sdavidxustatic void
1956161678Sdavidxuumtx_pi_ref(struct umtx_pi *pi)
1957161678Sdavidxu{
1958161678Sdavidxu	struct umtxq_chain *uc;
1959161678Sdavidxu
1960161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1961161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1962161678Sdavidxu	pi->pi_refcount++;
1963161678Sdavidxu}
1964161678Sdavidxu
1965161678Sdavidxu/*
1966161678Sdavidxu * Decrease reference count for a PI mutex, if the counter
1967161678Sdavidxu * is decreased to zero, its memory space is freed.
1968161678Sdavidxu */
1969161678Sdavidxustatic void
1970161678Sdavidxuumtx_pi_unref(struct umtx_pi *pi)
1971161678Sdavidxu{
1972161678Sdavidxu	struct umtxq_chain *uc;
1973161678Sdavidxu
1974161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1975161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1976161678Sdavidxu	KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1977161678Sdavidxu	if (--pi->pi_refcount == 0) {
1978280309Skib		mtx_lock(&umtx_lock);
1979288494Svangyzen		if (pi->pi_owner != NULL)
1980288494Svangyzen			umtx_pi_disown(pi);
1981161678Sdavidxu		KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1982161678Sdavidxu			("blocked queue not empty"));
1983280309Skib		mtx_unlock(&umtx_lock);
1984161678Sdavidxu		TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1985189756Sdavidxu		umtx_pi_free(pi);
1986161678Sdavidxu	}
1987161678Sdavidxu}
1988161678Sdavidxu
1989161678Sdavidxu/*
1990161678Sdavidxu * Find a PI mutex in hash table.
1991161678Sdavidxu */
1992161678Sdavidxustatic struct umtx_pi *
1993161678Sdavidxuumtx_pi_lookup(struct umtx_key *key)
1994161678Sdavidxu{
1995161678Sdavidxu	struct umtxq_chain *uc;
1996161678Sdavidxu	struct umtx_pi *pi;
1997161678Sdavidxu
1998161678Sdavidxu	uc = umtxq_getchain(key);
1999161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
2000161678Sdavidxu
2001161678Sdavidxu	TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
2002161678Sdavidxu		if (umtx_key_match(&pi->pi_key, key)) {
2003161678Sdavidxu			return (pi);
2004161678Sdavidxu		}
2005161678Sdavidxu	}
2006161678Sdavidxu	return (NULL);
2007161678Sdavidxu}
2008161678Sdavidxu
2009161678Sdavidxu/*
2010161678Sdavidxu * Insert a PI mutex into hash table.
2011161678Sdavidxu */
2012161678Sdavidxustatic inline void
2013161678Sdavidxuumtx_pi_insert(struct umtx_pi *pi)
2014161678Sdavidxu{
2015161678Sdavidxu	struct umtxq_chain *uc;
2016161678Sdavidxu
2017161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
2018161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
2019161678Sdavidxu	TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
2020161678Sdavidxu}
2021161678Sdavidxu
2022161678Sdavidxu/*
2023161678Sdavidxu * Lock a PI mutex.
2024161678Sdavidxu */
2025161678Sdavidxustatic int
2026233690Sdavidxudo_lock_pi(struct thread *td, struct umutex *m, uint32_t flags,
2027233690Sdavidxu    struct _umtx_time *timeout, int try)
2028161678Sdavidxu{
2029233690Sdavidxu	struct abs_timeout timo;
2030161678Sdavidxu	struct umtx_q *uq;
2031161678Sdavidxu	struct umtx_pi *pi, *new_pi;
2032161678Sdavidxu	uint32_t id, owner, old;
2033274648Skib	int error, rv;
2034161678Sdavidxu
2035161678Sdavidxu	id = td->td_tid;
2036161678Sdavidxu	uq = td->td_umtxq;
2037161678Sdavidxu
2038161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
2039161678Sdavidxu	    &uq->uq_key)) != 0)
2040161678Sdavidxu		return (error);
2041233690Sdavidxu
2042233690Sdavidxu	if (timeout != NULL)
2043233690Sdavidxu		abs_timeout_init2(&timo, timeout);
2044233690Sdavidxu
2045163697Sdavidxu	umtxq_lock(&uq->uq_key);
2046163697Sdavidxu	pi = umtx_pi_lookup(&uq->uq_key);
2047163697Sdavidxu	if (pi == NULL) {
2048163697Sdavidxu		new_pi = umtx_pi_alloc(M_NOWAIT);
2049163697Sdavidxu		if (new_pi == NULL) {
2050161678Sdavidxu			umtxq_unlock(&uq->uq_key);
2051163697Sdavidxu			new_pi = umtx_pi_alloc(M_WAITOK);
2052161678Sdavidxu			umtxq_lock(&uq->uq_key);
2053161678Sdavidxu			pi = umtx_pi_lookup(&uq->uq_key);
2054163697Sdavidxu			if (pi != NULL) {
2055161678Sdavidxu				umtx_pi_free(new_pi);
2056163697Sdavidxu				new_pi = NULL;
2057161678Sdavidxu			}
2058161678Sdavidxu		}
2059163697Sdavidxu		if (new_pi != NULL) {
2060163697Sdavidxu			new_pi->pi_key = uq->uq_key;
2061163697Sdavidxu			umtx_pi_insert(new_pi);
2062163697Sdavidxu			pi = new_pi;
2063163697Sdavidxu		}
2064163697Sdavidxu	}
2065163697Sdavidxu	umtx_pi_ref(pi);
2066163697Sdavidxu	umtxq_unlock(&uq->uq_key);
2067161678Sdavidxu
2068163697Sdavidxu	/*
2069163697Sdavidxu	 * Care must be exercised when dealing with umtx structure.  It
2070163697Sdavidxu	 * can fault on any access.
2071163697Sdavidxu	 */
2072163697Sdavidxu	for (;;) {
2073161678Sdavidxu		/*
2074161678Sdavidxu		 * Try the uncontested case.  This should be done in userland.
2075161678Sdavidxu		 */
2076274648Skib		rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id);
2077274648Skib		/* The address was invalid. */
2078274648Skib		if (rv == -1) {
2079274648Skib			error = EFAULT;
2080274648Skib			break;
2081274648Skib		}
2082161678Sdavidxu
2083161678Sdavidxu		/* The acquire succeeded. */
2084161678Sdavidxu		if (owner == UMUTEX_UNOWNED) {
2085161678Sdavidxu			error = 0;
2086161678Sdavidxu			break;
2087161678Sdavidxu		}
2088161678Sdavidxu
2089161678Sdavidxu		/* If no one owns it but it is contested try to acquire it. */
2090161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
2091274648Skib			rv = casueword32(&m->m_owner,
2092274648Skib			    UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED);
2093274648Skib			/* The address was invalid. */
2094274648Skib			if (rv == -1) {
2095274648Skib				error = EFAULT;
2096274648Skib				break;
2097274648Skib			}
2098161678Sdavidxu
2099161678Sdavidxu			if (owner == UMUTEX_CONTESTED) {
2100161678Sdavidxu				umtxq_lock(&uq->uq_key);
2101189756Sdavidxu				umtxq_busy(&uq->uq_key);
2102161678Sdavidxu				error = umtx_pi_claim(pi, td);
2103189756Sdavidxu				umtxq_unbusy(&uq->uq_key);
2104161678Sdavidxu				umtxq_unlock(&uq->uq_key);
2105279585Skib				if (error != 0) {
2106279585Skib					/*
2107279585Skib					 * Since we're going to return an
2108279585Skib					 * error, restore the m_owner to its
2109279585Skib					 * previous, unowned state to avoid
2110279585Skib					 * compounding the problem.
2111279585Skib					 */
2112279585Skib					(void)casuword32(&m->m_owner,
2113279585Skib					    id | UMUTEX_CONTESTED,
2114279585Skib					    UMUTEX_CONTESTED);
2115279585Skib				}
2116161678Sdavidxu				break;
2117161678Sdavidxu			}
2118161678Sdavidxu
2119251684Skib			error = umtxq_check_susp(td);
2120251684Skib			if (error != 0)
2121251684Skib				break;
2122251684Skib
2123161678Sdavidxu			/* If this failed the lock has changed, restart. */
2124161678Sdavidxu			continue;
2125161678Sdavidxu		}
2126161678Sdavidxu
2127278345Skib		if ((owner & ~UMUTEX_CONTESTED) == id) {
2128161678Sdavidxu			error = EDEADLK;
2129161678Sdavidxu			break;
2130161678Sdavidxu		}
2131161678Sdavidxu
2132161678Sdavidxu		if (try != 0) {
2133161678Sdavidxu			error = EBUSY;
2134161678Sdavidxu			break;
2135161678Sdavidxu		}
2136161678Sdavidxu
2137161678Sdavidxu		/*
2138161678Sdavidxu		 * If we caught a signal, we have retried and now
2139161678Sdavidxu		 * exit immediately.
2140161678Sdavidxu		 */
2141161678Sdavidxu		if (error != 0)
2142161678Sdavidxu			break;
2143161678Sdavidxu
2144161678Sdavidxu		umtxq_lock(&uq->uq_key);
2145161678Sdavidxu		umtxq_busy(&uq->uq_key);
2146161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2147161678Sdavidxu
2148161678Sdavidxu		/*
2149161678Sdavidxu		 * Set the contested bit so that a release in user space
2150161678Sdavidxu		 * knows to use the system call for unlock.  If this fails
2151161678Sdavidxu		 * either some one else has acquired the lock or it has been
2152161678Sdavidxu		 * released.
2153161678Sdavidxu		 */
2154274648Skib		rv = casueword32(&m->m_owner, owner, &old,
2155274648Skib		    owner | UMUTEX_CONTESTED);
2156161678Sdavidxu
2157161678Sdavidxu		/* The address was invalid. */
2158274648Skib		if (rv == -1) {
2159274648Skib			umtxq_unbusy_unlocked(&uq->uq_key);
2160161678Sdavidxu			error = EFAULT;
2161161678Sdavidxu			break;
2162161678Sdavidxu		}
2163161678Sdavidxu
2164161678Sdavidxu		umtxq_lock(&uq->uq_key);
2165161678Sdavidxu		/*
2166161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
2167161678Sdavidxu		 * and we need to retry or we lost a race to the thread
2168161678Sdavidxu		 * unlocking the umtx.
2169161678Sdavidxu		 */
2170270789Skib		if (old == owner) {
2171161678Sdavidxu			error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
2172233690Sdavidxu			    "umtxpi", timeout == NULL ? NULL : &timo);
2173270789Skib			if (error != 0)
2174270789Skib				continue;
2175270789Skib		} else {
2176189756Sdavidxu			umtxq_unbusy(&uq->uq_key);
2177189756Sdavidxu			umtxq_unlock(&uq->uq_key);
2178189756Sdavidxu		}
2179251684Skib
2180251684Skib		error = umtxq_check_susp(td);
2181251684Skib		if (error != 0)
2182251684Skib			break;
2183161678Sdavidxu	}
2184161678Sdavidxu
2185163697Sdavidxu	umtxq_lock(&uq->uq_key);
2186163697Sdavidxu	umtx_pi_unref(pi);
2187163697Sdavidxu	umtxq_unlock(&uq->uq_key);
2188161678Sdavidxu
2189161678Sdavidxu	umtx_key_release(&uq->uq_key);
2190161678Sdavidxu	return (error);
2191161678Sdavidxu}
2192161678Sdavidxu
2193161678Sdavidxu/*
2194161678Sdavidxu * Unlock a PI mutex.
2195161678Sdavidxu */
2196161678Sdavidxustatic int
2197161678Sdavidxudo_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
2198161678Sdavidxu{
2199161678Sdavidxu	struct umtx_key key;
2200161678Sdavidxu	struct umtx_q *uq_first, *uq_first2, *uq_me;
2201161678Sdavidxu	struct umtx_pi *pi, *pi2;
2202161678Sdavidxu	uint32_t owner, old, id;
2203161678Sdavidxu	int error;
2204161678Sdavidxu	int count;
2205161678Sdavidxu	int pri;
2206161678Sdavidxu
2207161678Sdavidxu	id = td->td_tid;
2208161678Sdavidxu	/*
2209161678Sdavidxu	 * Make sure we own this mtx.
2210161678Sdavidxu	 */
2211274648Skib	error = fueword32(&m->m_owner, &owner);
2212274648Skib	if (error == -1)
2213161678Sdavidxu		return (EFAULT);
2214161678Sdavidxu
2215161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
2216161678Sdavidxu		return (EPERM);
2217161678Sdavidxu
2218161678Sdavidxu	/* This should be done in userland */
2219161678Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
2220274648Skib		error = casueword32(&m->m_owner, owner, &old, UMUTEX_UNOWNED);
2221274648Skib		if (error == -1)
2222161678Sdavidxu			return (EFAULT);
2223161678Sdavidxu		if (old == owner)
2224161678Sdavidxu			return (0);
2225161855Sdavidxu		owner = old;
2226161678Sdavidxu	}
2227161678Sdavidxu
2228161678Sdavidxu	/* We should only ever be in here for contested locks */
2229161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
2230161678Sdavidxu	    &key)) != 0)
2231161678Sdavidxu		return (error);
2232161678Sdavidxu
2233161678Sdavidxu	umtxq_lock(&key);
2234161678Sdavidxu	umtxq_busy(&key);
2235161678Sdavidxu	count = umtxq_count_pi(&key, &uq_first);
2236161678Sdavidxu	if (uq_first != NULL) {
2237280309Skib		mtx_lock(&umtx_lock);
2238161678Sdavidxu		pi = uq_first->uq_pi_blocked;
2239189756Sdavidxu		KASSERT(pi != NULL, ("pi == NULL?"));
2240288494Svangyzen		if (pi->pi_owner != td) {
2241280309Skib			mtx_unlock(&umtx_lock);
2242161678Sdavidxu			umtxq_unbusy(&key);
2243161678Sdavidxu			umtxq_unlock(&key);
2244189756Sdavidxu			umtx_key_release(&key);
2245161678Sdavidxu			/* userland messed the mutex */
2246161678Sdavidxu			return (EPERM);
2247161678Sdavidxu		}
2248288494Svangyzen		uq_me = td->td_umtxq;
2249279583Skib		umtx_pi_disown(pi);
2250189756Sdavidxu		/* get highest priority thread which is still sleeping. */
2251161678Sdavidxu		uq_first = TAILQ_FIRST(&pi->pi_blocked);
2252189756Sdavidxu		while (uq_first != NULL &&
2253189756Sdavidxu		       (uq_first->uq_flags & UQF_UMTXQ) == 0) {
2254189756Sdavidxu			uq_first = TAILQ_NEXT(uq_first, uq_lockq);
2255189756Sdavidxu		}
2256161678Sdavidxu		pri = PRI_MAX;
2257161678Sdavidxu		TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
2258161678Sdavidxu			uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
2259161678Sdavidxu			if (uq_first2 != NULL) {
2260161678Sdavidxu				if (pri > UPRI(uq_first2->uq_thread))
2261161678Sdavidxu					pri = UPRI(uq_first2->uq_thread);
2262161678Sdavidxu			}
2263161678Sdavidxu		}
2264288494Svangyzen		thread_lock(td);
2265288494Svangyzen		sched_lend_user_prio(td, pri);
2266288494Svangyzen		thread_unlock(td);
2267280309Skib		mtx_unlock(&umtx_lock);
2268189756Sdavidxu		if (uq_first)
2269189756Sdavidxu			umtxq_signal_thread(uq_first);
2270279583Skib	} else {
2271279583Skib		pi = umtx_pi_lookup(&key);
2272279583Skib		/*
2273279583Skib		 * A umtx_pi can exist if a signal or timeout removed the
2274279583Skib		 * last waiter from the umtxq, but there is still
2275279583Skib		 * a thread in do_lock_pi() holding the umtx_pi.
2276279583Skib		 */
2277279583Skib		if (pi != NULL) {
2278279583Skib			/*
2279279583Skib			 * The umtx_pi can be unowned, such as when a thread
2280279583Skib			 * has just entered do_lock_pi(), allocated the
2281279583Skib			 * umtx_pi, and unlocked the umtxq.
2282279583Skib			 * If the current thread owns it, it must disown it.
2283279583Skib			 */
2284280309Skib			mtx_lock(&umtx_lock);
2285279583Skib			if (pi->pi_owner == td)
2286279583Skib				umtx_pi_disown(pi);
2287280309Skib			mtx_unlock(&umtx_lock);
2288279583Skib		}
2289161678Sdavidxu	}
2290161678Sdavidxu	umtxq_unlock(&key);
2291161678Sdavidxu
2292161678Sdavidxu	/*
2293161678Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
2294161678Sdavidxu	 * there is zero or one thread only waiting for it.
2295161678Sdavidxu	 * Otherwise, it must be marked as contested.
2296161678Sdavidxu	 */
2297274648Skib	error = casueword32(&m->m_owner, owner, &old,
2298274648Skib	    count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
2299161678Sdavidxu
2300274648Skib	umtxq_unbusy_unlocked(&key);
2301161678Sdavidxu	umtx_key_release(&key);
2302274648Skib	if (error == -1)
2303161678Sdavidxu		return (EFAULT);
2304161678Sdavidxu	if (old != owner)
2305161678Sdavidxu		return (EINVAL);
2306161678Sdavidxu	return (0);
2307161678Sdavidxu}
2308161678Sdavidxu
2309161678Sdavidxu/*
2310161678Sdavidxu * Lock a PP mutex.
2311161678Sdavidxu */
2312161678Sdavidxustatic int
2313233690Sdavidxudo_lock_pp(struct thread *td, struct umutex *m, uint32_t flags,
2314233690Sdavidxu    struct _umtx_time *timeout, int try)
2315161678Sdavidxu{
2316233690Sdavidxu	struct abs_timeout timo;
2317161678Sdavidxu	struct umtx_q *uq, *uq2;
2318161678Sdavidxu	struct umtx_pi *pi;
2319161678Sdavidxu	uint32_t ceiling;
2320161678Sdavidxu	uint32_t owner, id;
2321274648Skib	int error, pri, old_inherited_pri, su, rv;
2322161678Sdavidxu
2323161678Sdavidxu	id = td->td_tid;
2324161678Sdavidxu	uq = td->td_umtxq;
2325161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2326161678Sdavidxu	    &uq->uq_key)) != 0)
2327161678Sdavidxu		return (error);
2328233690Sdavidxu
2329233690Sdavidxu	if (timeout != NULL)
2330233690Sdavidxu		abs_timeout_init2(&timo, timeout);
2331233690Sdavidxu
2332164033Srwatson	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2333161678Sdavidxu	for (;;) {
2334161678Sdavidxu		old_inherited_pri = uq->uq_inherited_pri;
2335161678Sdavidxu		umtxq_lock(&uq->uq_key);
2336161678Sdavidxu		umtxq_busy(&uq->uq_key);
2337161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2338161678Sdavidxu
2339274648Skib		rv = fueword32(&m->m_ceilings[0], &ceiling);
2340274648Skib		if (rv == -1) {
2341274648Skib			error = EFAULT;
2342274648Skib			goto out;
2343274648Skib		}
2344274648Skib		ceiling = RTP_PRIO_MAX - ceiling;
2345161678Sdavidxu		if (ceiling > RTP_PRIO_MAX) {
2346161678Sdavidxu			error = EINVAL;
2347161678Sdavidxu			goto out;
2348161678Sdavidxu		}
2349161678Sdavidxu
2350280309Skib		mtx_lock(&umtx_lock);
2351161678Sdavidxu		if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
2352280309Skib			mtx_unlock(&umtx_lock);
2353161678Sdavidxu			error = EINVAL;
2354161678Sdavidxu			goto out;
2355161678Sdavidxu		}
2356161678Sdavidxu		if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
2357161678Sdavidxu			uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
2358170300Sjeff			thread_lock(td);
2359161678Sdavidxu			if (uq->uq_inherited_pri < UPRI(td))
2360161678Sdavidxu				sched_lend_user_prio(td, uq->uq_inherited_pri);
2361170300Sjeff			thread_unlock(td);
2362161678Sdavidxu		}
2363280309Skib		mtx_unlock(&umtx_lock);
2364161678Sdavidxu
2365274648Skib		rv = casueword32(&m->m_owner,
2366274648Skib		    UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED);
2367274648Skib		/* The address was invalid. */
2368274648Skib		if (rv == -1) {
2369274648Skib			error = EFAULT;
2370274648Skib			break;
2371274648Skib		}
2372161678Sdavidxu
2373161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
2374161678Sdavidxu			error = 0;
2375161678Sdavidxu			break;
2376161678Sdavidxu		}
2377161678Sdavidxu
2378161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
2379161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id) {
2380161678Sdavidxu			error = EDEADLK;
2381161678Sdavidxu			break;
2382161678Sdavidxu		}
2383161678Sdavidxu
2384161678Sdavidxu		if (try != 0) {
2385161678Sdavidxu			error = EBUSY;
2386161678Sdavidxu			break;
2387161678Sdavidxu		}
2388161678Sdavidxu
2389161678Sdavidxu		/*
2390161678Sdavidxu		 * If we caught a signal, we have retried and now
2391161678Sdavidxu		 * exit immediately.
2392161678Sdavidxu		 */
2393161678Sdavidxu		if (error != 0)
2394161678Sdavidxu			break;
2395161678Sdavidxu
2396161678Sdavidxu		umtxq_lock(&uq->uq_key);
2397161678Sdavidxu		umtxq_insert(uq);
2398161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
2399233690Sdavidxu		error = umtxq_sleep(uq, "umtxpp", timeout == NULL ?
2400233690Sdavidxu		    NULL : &timo);
2401161678Sdavidxu		umtxq_remove(uq);
2402161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2403161678Sdavidxu
2404280309Skib		mtx_lock(&umtx_lock);
2405161678Sdavidxu		uq->uq_inherited_pri = old_inherited_pri;
2406161678Sdavidxu		pri = PRI_MAX;
2407161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2408161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2409161678Sdavidxu			if (uq2 != NULL) {
2410161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
2411161678Sdavidxu					pri = UPRI(uq2->uq_thread);
2412161678Sdavidxu			}
2413161678Sdavidxu		}
2414161678Sdavidxu		if (pri > uq->uq_inherited_pri)
2415161678Sdavidxu			pri = uq->uq_inherited_pri;
2416170300Sjeff		thread_lock(td);
2417216791Sdavidxu		sched_lend_user_prio(td, pri);
2418170300Sjeff		thread_unlock(td);
2419280309Skib		mtx_unlock(&umtx_lock);
2420161678Sdavidxu	}
2421161678Sdavidxu
2422161678Sdavidxu	if (error != 0) {
2423280309Skib		mtx_lock(&umtx_lock);
2424161678Sdavidxu		uq->uq_inherited_pri = old_inherited_pri;
2425161678Sdavidxu		pri = PRI_MAX;
2426161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2427161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2428161678Sdavidxu			if (uq2 != NULL) {
2429161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
2430161678Sdavidxu					pri = UPRI(uq2->uq_thread);
2431161678Sdavidxu			}
2432161678Sdavidxu		}
2433161678Sdavidxu		if (pri > uq->uq_inherited_pri)
2434161678Sdavidxu			pri = uq->uq_inherited_pri;
2435170300Sjeff		thread_lock(td);
2436216791Sdavidxu		sched_lend_user_prio(td, pri);
2437170300Sjeff		thread_unlock(td);
2438280309Skib		mtx_unlock(&umtx_lock);
2439161678Sdavidxu	}
2440161678Sdavidxu
2441161678Sdavidxuout:
2442274648Skib	umtxq_unbusy_unlocked(&uq->uq_key);
2443161678Sdavidxu	umtx_key_release(&uq->uq_key);
2444161678Sdavidxu	return (error);
2445161678Sdavidxu}
2446161678Sdavidxu
2447161678Sdavidxu/*
2448161678Sdavidxu * Unlock a PP mutex.
2449161678Sdavidxu */
2450161678Sdavidxustatic int
2451161678Sdavidxudo_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
2452161678Sdavidxu{
2453161678Sdavidxu	struct umtx_key key;
2454161678Sdavidxu	struct umtx_q *uq, *uq2;
2455161678Sdavidxu	struct umtx_pi *pi;
2456161678Sdavidxu	uint32_t owner, id;
2457161678Sdavidxu	uint32_t rceiling;
2458161926Sdavidxu	int error, pri, new_inherited_pri, su;
2459161678Sdavidxu
2460161678Sdavidxu	id = td->td_tid;
2461161678Sdavidxu	uq = td->td_umtxq;
2462164033Srwatson	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2463161678Sdavidxu
2464161678Sdavidxu	/*
2465161678Sdavidxu	 * Make sure we own this mtx.
2466161678Sdavidxu	 */
2467274648Skib	error = fueword32(&m->m_owner, &owner);
2468274648Skib	if (error == -1)
2469161678Sdavidxu		return (EFAULT);
2470161678Sdavidxu
2471161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
2472161678Sdavidxu		return (EPERM);
2473161678Sdavidxu
2474161678Sdavidxu	error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
2475161678Sdavidxu	if (error != 0)
2476161678Sdavidxu		return (error);
2477161678Sdavidxu
2478161678Sdavidxu	if (rceiling == -1)
2479161678Sdavidxu		new_inherited_pri = PRI_MAX;
2480161678Sdavidxu	else {
2481161678Sdavidxu		rceiling = RTP_PRIO_MAX - rceiling;
2482161678Sdavidxu		if (rceiling > RTP_PRIO_MAX)
2483161678Sdavidxu			return (EINVAL);
2484161678Sdavidxu		new_inherited_pri = PRI_MIN_REALTIME + rceiling;
2485161678Sdavidxu	}
2486161678Sdavidxu
2487161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2488161678Sdavidxu	    &key)) != 0)
2489161678Sdavidxu		return (error);
2490161678Sdavidxu	umtxq_lock(&key);
2491161678Sdavidxu	umtxq_busy(&key);
2492161678Sdavidxu	umtxq_unlock(&key);
2493161678Sdavidxu	/*
2494161678Sdavidxu	 * For priority protected mutex, always set unlocked state
2495161678Sdavidxu	 * to UMUTEX_CONTESTED, so that userland always enters kernel
2496161678Sdavidxu	 * to lock the mutex, it is necessary because thread priority
2497161678Sdavidxu	 * has to be adjusted for such mutex.
2498161678Sdavidxu	 */
2499274648Skib	error = suword32(&m->m_owner, UMUTEX_CONTESTED);
2500161678Sdavidxu
2501161678Sdavidxu	umtxq_lock(&key);
2502161678Sdavidxu	if (error == 0)
2503161678Sdavidxu		umtxq_signal(&key, 1);
2504161678Sdavidxu	umtxq_unbusy(&key);
2505161678Sdavidxu	umtxq_unlock(&key);
2506161678Sdavidxu
2507161678Sdavidxu	if (error == -1)
2508161678Sdavidxu		error = EFAULT;
2509161678Sdavidxu	else {
2510280309Skib		mtx_lock(&umtx_lock);
2511161926Sdavidxu		if (su != 0)
2512161926Sdavidxu			uq->uq_inherited_pri = new_inherited_pri;
2513161678Sdavidxu		pri = PRI_MAX;
2514161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2515161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2516161678Sdavidxu			if (uq2 != NULL) {
2517161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
2518161678Sdavidxu					pri = UPRI(uq2->uq_thread);
2519161678Sdavidxu			}
2520161678Sdavidxu		}
2521161678Sdavidxu		if (pri > uq->uq_inherited_pri)
2522161678Sdavidxu			pri = uq->uq_inherited_pri;
2523170300Sjeff		thread_lock(td);
2524216791Sdavidxu		sched_lend_user_prio(td, pri);
2525170300Sjeff		thread_unlock(td);
2526280309Skib		mtx_unlock(&umtx_lock);
2527161678Sdavidxu	}
2528161678Sdavidxu	umtx_key_release(&key);
2529161678Sdavidxu	return (error);
2530161678Sdavidxu}
2531161678Sdavidxu
2532161678Sdavidxustatic int
2533161678Sdavidxudo_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2534161678Sdavidxu	uint32_t *old_ceiling)
2535161678Sdavidxu{
2536161678Sdavidxu	struct umtx_q *uq;
2537161678Sdavidxu	uint32_t save_ceiling;
2538161678Sdavidxu	uint32_t owner, id;
2539161678Sdavidxu	uint32_t flags;
2540274648Skib	int error, rv;
2541161678Sdavidxu
2542274648Skib	error = fueword32(&m->m_flags, &flags);
2543274648Skib	if (error == -1)
2544274648Skib		return (EFAULT);
2545161678Sdavidxu	if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2546161678Sdavidxu		return (EINVAL);
2547161678Sdavidxu	if (ceiling > RTP_PRIO_MAX)
2548161678Sdavidxu		return (EINVAL);
2549161678Sdavidxu	id = td->td_tid;
2550161678Sdavidxu	uq = td->td_umtxq;
2551161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2552161678Sdavidxu	   &uq->uq_key)) != 0)
2553161678Sdavidxu		return (error);
2554161678Sdavidxu	for (;;) {
2555161678Sdavidxu		umtxq_lock(&uq->uq_key);
2556161678Sdavidxu		umtxq_busy(&uq->uq_key);
2557161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2558161678Sdavidxu
2559274648Skib		rv = fueword32(&m->m_ceilings[0], &save_ceiling);
2560274648Skib		if (rv == -1) {
2561274648Skib			error = EFAULT;
2562274648Skib			break;
2563274648Skib		}
2564161678Sdavidxu
2565274648Skib		rv = casueword32(&m->m_owner,
2566274648Skib		    UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED);
2567274648Skib		if (rv == -1) {
2568274648Skib			error = EFAULT;
2569274648Skib			break;
2570274648Skib		}
2571161678Sdavidxu
2572161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
2573161678Sdavidxu			suword32(&m->m_ceilings[0], ceiling);
2574274648Skib			suword32(&m->m_owner, UMUTEX_CONTESTED);
2575161678Sdavidxu			error = 0;
2576161678Sdavidxu			break;
2577161678Sdavidxu		}
2578161678Sdavidxu
2579161678Sdavidxu		if ((owner & ~UMUTEX_CONTESTED) == id) {
2580161678Sdavidxu			suword32(&m->m_ceilings[0], ceiling);
2581161678Sdavidxu			error = 0;
2582161678Sdavidxu			break;
2583161678Sdavidxu		}
2584161678Sdavidxu
2585161678Sdavidxu		/*
2586161678Sdavidxu		 * If we caught a signal, we have retried and now
2587161678Sdavidxu		 * exit immediately.
2588161678Sdavidxu		 */
2589161678Sdavidxu		if (error != 0)
2590161678Sdavidxu			break;
2591161678Sdavidxu
2592161678Sdavidxu		/*
2593161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
2594161678Sdavidxu		 * and we need to retry or we lost a race to the thread
2595161678Sdavidxu		 * unlocking the umtx.
2596161678Sdavidxu		 */
2597161678Sdavidxu		umtxq_lock(&uq->uq_key);
2598161678Sdavidxu		umtxq_insert(uq);
2599161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
2600233690Sdavidxu		error = umtxq_sleep(uq, "umtxpp", NULL);
2601161678Sdavidxu		umtxq_remove(uq);
2602161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2603161678Sdavidxu	}
2604161678Sdavidxu	umtxq_lock(&uq->uq_key);
2605161678Sdavidxu	if (error == 0)
2606161678Sdavidxu		umtxq_signal(&uq->uq_key, INT_MAX);
2607161678Sdavidxu	umtxq_unbusy(&uq->uq_key);
2608161678Sdavidxu	umtxq_unlock(&uq->uq_key);
2609161678Sdavidxu	umtx_key_release(&uq->uq_key);
2610161678Sdavidxu	if (error == 0 && old_ceiling != NULL)
2611161678Sdavidxu		suword32(old_ceiling, save_ceiling);
2612161678Sdavidxu	return (error);
2613161678Sdavidxu}
2614161678Sdavidxu
2615161678Sdavidxu/*
2616161678Sdavidxu * Lock a userland POSIX mutex.
2617161678Sdavidxu */
2618161678Sdavidxustatic int
2619162030Sdavidxudo_lock_umutex(struct thread *td, struct umutex *m,
2620233690Sdavidxu    struct _umtx_time *timeout, int mode)
2621161678Sdavidxu{
2622161678Sdavidxu	uint32_t flags;
2623162030Sdavidxu	int error;
2624161678Sdavidxu
2625274648Skib	error = fueword32(&m->m_flags, &flags);
2626274648Skib	if (error == -1)
2627161678Sdavidxu		return (EFAULT);
2628161678Sdavidxu
2629233690Sdavidxu	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2630233690Sdavidxu	case 0:
2631233690Sdavidxu		error = do_lock_normal(td, m, flags, timeout, mode);
2632233690Sdavidxu		break;
2633233690Sdavidxu	case UMUTEX_PRIO_INHERIT:
2634233690Sdavidxu		error = do_lock_pi(td, m, flags, timeout, mode);
2635233690Sdavidxu		break;
2636233690Sdavidxu	case UMUTEX_PRIO_PROTECT:
2637233690Sdavidxu		error = do_lock_pp(td, m, flags, timeout, mode);
2638233690Sdavidxu		break;
2639233690Sdavidxu	default:
2640233690Sdavidxu		return (EINVAL);
2641233690Sdavidxu	}
2642162030Sdavidxu	if (timeout == NULL) {
2643179970Sdavidxu		if (error == EINTR && mode != _UMUTEX_WAIT)
2644162030Sdavidxu			error = ERESTART;
2645162030Sdavidxu	} else {
2646162030Sdavidxu		/* Timed-locking is not restarted. */
2647162030Sdavidxu		if (error == ERESTART)
2648162030Sdavidxu			error = EINTR;
2649161742Sdavidxu	}
2650162030Sdavidxu	return (error);
2651161678Sdavidxu}
2652161678Sdavidxu
2653161678Sdavidxu/*
2654161678Sdavidxu * Unlock a userland POSIX mutex.
2655161678Sdavidxu */
2656161678Sdavidxustatic int
2657161678Sdavidxudo_unlock_umutex(struct thread *td, struct umutex *m)
2658161678Sdavidxu{
2659161678Sdavidxu	uint32_t flags;
2660274648Skib	int error;
2661161678Sdavidxu
2662274648Skib	error = fueword32(&m->m_flags, &flags);
2663274648Skib	if (error == -1)
2664161678Sdavidxu		return (EFAULT);
2665161678Sdavidxu
2666161855Sdavidxu	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2667161855Sdavidxu	case 0:
2668161855Sdavidxu		return (do_unlock_normal(td, m, flags));
2669161855Sdavidxu	case UMUTEX_PRIO_INHERIT:
2670161855Sdavidxu		return (do_unlock_pi(td, m, flags));
2671161855Sdavidxu	case UMUTEX_PRIO_PROTECT:
2672161855Sdavidxu		return (do_unlock_pp(td, m, flags));
2673161855Sdavidxu	}
2674161678Sdavidxu
2675161855Sdavidxu	return (EINVAL);
2676161678Sdavidxu}
2677161678Sdavidxu
2678164839Sdavidxustatic int
2679164839Sdavidxudo_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2680164876Sdavidxu	struct timespec *timeout, u_long wflags)
2681164839Sdavidxu{
2682233690Sdavidxu	struct abs_timeout timo;
2683164839Sdavidxu	struct umtx_q *uq;
2684274648Skib	uint32_t flags, clockid, hasw;
2685164839Sdavidxu	int error;
2686164839Sdavidxu
2687164839Sdavidxu	uq = td->td_umtxq;
2688274648Skib	error = fueword32(&cv->c_flags, &flags);
2689274648Skib	if (error == -1)
2690274648Skib		return (EFAULT);
2691164839Sdavidxu	error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2692164839Sdavidxu	if (error != 0)
2693164839Sdavidxu		return (error);
2694216641Sdavidxu
2695216641Sdavidxu	if ((wflags & CVWAIT_CLOCKID) != 0) {
2696274648Skib		error = fueword32(&cv->c_clockid, &clockid);
2697274648Skib		if (error == -1) {
2698274648Skib			umtx_key_release(&uq->uq_key);
2699274648Skib			return (EFAULT);
2700274648Skib		}
2701216641Sdavidxu		if (clockid < CLOCK_REALTIME ||
2702216641Sdavidxu		    clockid >= CLOCK_THREAD_CPUTIME_ID) {
2703216641Sdavidxu			/* hmm, only HW clock id will work. */
2704274648Skib			umtx_key_release(&uq->uq_key);
2705216641Sdavidxu			return (EINVAL);
2706216641Sdavidxu		}
2707216641Sdavidxu	} else {
2708216641Sdavidxu		clockid = CLOCK_REALTIME;
2709216641Sdavidxu	}
2710216641Sdavidxu
2711164839Sdavidxu	umtxq_lock(&uq->uq_key);
2712164839Sdavidxu	umtxq_busy(&uq->uq_key);
2713164839Sdavidxu	umtxq_insert(uq);
2714164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2715164839Sdavidxu
2716164839Sdavidxu	/*
2717216641Sdavidxu	 * Set c_has_waiters to 1 before releasing user mutex, also
2718216641Sdavidxu	 * don't modify cache line when unnecessary.
2719164839Sdavidxu	 */
2720274648Skib	error = fueword32(&cv->c_has_waiters, &hasw);
2721274648Skib	if (error == 0 && hasw == 0)
2722274648Skib		suword32(&cv->c_has_waiters, 1);
2723164839Sdavidxu
2724274648Skib	umtxq_unbusy_unlocked(&uq->uq_key);
2725164839Sdavidxu
2726164839Sdavidxu	error = do_unlock_umutex(td, m);
2727233690Sdavidxu
2728233700Sdavidxu	if (timeout != NULL)
2729233690Sdavidxu		abs_timeout_init(&timo, clockid, ((wflags & CVWAIT_ABSTIME) != 0),
2730233690Sdavidxu			timeout);
2731164839Sdavidxu
2732164839Sdavidxu	umtxq_lock(&uq->uq_key);
2733164839Sdavidxu	if (error == 0) {
2734233690Sdavidxu		error = umtxq_sleep(uq, "ucond", timeout == NULL ?
2735233690Sdavidxu		    NULL : &timo);
2736164839Sdavidxu	}
2737164839Sdavidxu
2738211794Sdavidxu	if ((uq->uq_flags & UQF_UMTXQ) == 0)
2739211794Sdavidxu		error = 0;
2740211794Sdavidxu	else {
2741216641Sdavidxu		/*
2742216641Sdavidxu		 * This must be timeout,interrupted by signal or
2743216641Sdavidxu		 * surprious wakeup, clear c_has_waiter flag when
2744216641Sdavidxu		 * necessary.
2745216641Sdavidxu		 */
2746216641Sdavidxu		umtxq_busy(&uq->uq_key);
2747216641Sdavidxu		if ((uq->uq_flags & UQF_UMTXQ) != 0) {
2748216641Sdavidxu			int oldlen = uq->uq_cur_queue->length;
2749216641Sdavidxu			umtxq_remove(uq);
2750216641Sdavidxu			if (oldlen == 1) {
2751216641Sdavidxu				umtxq_unlock(&uq->uq_key);
2752274648Skib				suword32(&cv->c_has_waiters, 0);
2753216641Sdavidxu				umtxq_lock(&uq->uq_key);
2754216641Sdavidxu			}
2755216641Sdavidxu		}
2756216641Sdavidxu		umtxq_unbusy(&uq->uq_key);
2757164839Sdavidxu		if (error == ERESTART)
2758164839Sdavidxu			error = EINTR;
2759164839Sdavidxu	}
2760211794Sdavidxu
2761164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2762164839Sdavidxu	umtx_key_release(&uq->uq_key);
2763164839Sdavidxu	return (error);
2764164839Sdavidxu}
2765164839Sdavidxu
2766164839Sdavidxu/*
2767164839Sdavidxu * Signal a userland condition variable.
2768164839Sdavidxu */
2769164839Sdavidxustatic int
2770164839Sdavidxudo_cv_signal(struct thread *td, struct ucond *cv)
2771164839Sdavidxu{
2772164839Sdavidxu	struct umtx_key key;
2773164839Sdavidxu	int error, cnt, nwake;
2774164839Sdavidxu	uint32_t flags;
2775164839Sdavidxu
2776274648Skib	error = fueword32(&cv->c_flags, &flags);
2777274648Skib	if (error == -1)
2778274648Skib		return (EFAULT);
2779164839Sdavidxu	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2780164839Sdavidxu		return (error);
2781164839Sdavidxu	umtxq_lock(&key);
2782164839Sdavidxu	umtxq_busy(&key);
2783164839Sdavidxu	cnt = umtxq_count(&key);
2784164839Sdavidxu	nwake = umtxq_signal(&key, 1);
2785164839Sdavidxu	if (cnt <= nwake) {
2786164839Sdavidxu		umtxq_unlock(&key);
2787274648Skib		error = suword32(&cv->c_has_waiters, 0);
2788274648Skib		if (error == -1)
2789274648Skib			error = EFAULT;
2790164839Sdavidxu		umtxq_lock(&key);
2791164839Sdavidxu	}
2792164839Sdavidxu	umtxq_unbusy(&key);
2793164839Sdavidxu	umtxq_unlock(&key);
2794164839Sdavidxu	umtx_key_release(&key);
2795164839Sdavidxu	return (error);
2796164839Sdavidxu}
2797164839Sdavidxu
2798164839Sdavidxustatic int
2799164839Sdavidxudo_cv_broadcast(struct thread *td, struct ucond *cv)
2800164839Sdavidxu{
2801164839Sdavidxu	struct umtx_key key;
2802164839Sdavidxu	int error;
2803164839Sdavidxu	uint32_t flags;
2804164839Sdavidxu
2805274648Skib	error = fueword32(&cv->c_flags, &flags);
2806274648Skib	if (error == -1)
2807274648Skib		return (EFAULT);
2808164839Sdavidxu	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2809164839Sdavidxu		return (error);
2810164839Sdavidxu
2811164839Sdavidxu	umtxq_lock(&key);
2812164839Sdavidxu	umtxq_busy(&key);
2813164839Sdavidxu	umtxq_signal(&key, INT_MAX);
2814164839Sdavidxu	umtxq_unlock(&key);
2815164839Sdavidxu
2816274648Skib	error = suword32(&cv->c_has_waiters, 0);
2817274648Skib	if (error == -1)
2818274648Skib		error = EFAULT;
2819164839Sdavidxu
2820274648Skib	umtxq_unbusy_unlocked(&key);
2821164839Sdavidxu
2822164839Sdavidxu	umtx_key_release(&key);
2823164839Sdavidxu	return (error);
2824164839Sdavidxu}
2825164839Sdavidxu
2826177848Sdavidxustatic int
2827233690Sdavidxudo_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, struct _umtx_time *timeout)
2828177848Sdavidxu{
2829233690Sdavidxu	struct abs_timeout timo;
2830177848Sdavidxu	struct umtx_q *uq;
2831177848Sdavidxu	uint32_t flags, wrflags;
2832177848Sdavidxu	int32_t state, oldstate;
2833177848Sdavidxu	int32_t blocked_readers;
2834305185Skib	int error, error1, rv;
2835177848Sdavidxu
2836177848Sdavidxu	uq = td->td_umtxq;
2837274648Skib	error = fueword32(&rwlock->rw_flags, &flags);
2838274648Skib	if (error == -1)
2839274648Skib		return (EFAULT);
2840177848Sdavidxu	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2841177848Sdavidxu	if (error != 0)
2842177848Sdavidxu		return (error);
2843177848Sdavidxu
2844233690Sdavidxu	if (timeout != NULL)
2845233690Sdavidxu		abs_timeout_init2(&timo, timeout);
2846233690Sdavidxu
2847177848Sdavidxu	wrflags = URWLOCK_WRITE_OWNER;
2848177848Sdavidxu	if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
2849177848Sdavidxu		wrflags |= URWLOCK_WRITE_WAITERS;
2850177848Sdavidxu
2851177848Sdavidxu	for (;;) {
2852274648Skib		rv = fueword32(&rwlock->rw_state, &state);
2853274648Skib		if (rv == -1) {
2854274648Skib			umtx_key_release(&uq->uq_key);
2855274648Skib			return (EFAULT);
2856274648Skib		}
2857274648Skib
2858177848Sdavidxu		/* try to lock it */
2859177848Sdavidxu		while (!(state & wrflags)) {
2860177848Sdavidxu			if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
2861177848Sdavidxu				umtx_key_release(&uq->uq_key);
2862177848Sdavidxu				return (EAGAIN);
2863177848Sdavidxu			}
2864274648Skib			rv = casueword32(&rwlock->rw_state, state,
2865274648Skib			    &oldstate, state + 1);
2866274648Skib			if (rv == -1) {
2867251684Skib				umtx_key_release(&uq->uq_key);
2868251684Skib				return (EFAULT);
2869251684Skib			}
2870177848Sdavidxu			if (oldstate == state) {
2871177848Sdavidxu				umtx_key_release(&uq->uq_key);
2872177848Sdavidxu				return (0);
2873177848Sdavidxu			}
2874251684Skib			error = umtxq_check_susp(td);
2875251684Skib			if (error != 0)
2876251684Skib				break;
2877177848Sdavidxu			state = oldstate;
2878177848Sdavidxu		}
2879177848Sdavidxu
2880177848Sdavidxu		if (error)
2881177848Sdavidxu			break;
2882177848Sdavidxu
2883177848Sdavidxu		/* grab monitor lock */
2884177848Sdavidxu		umtxq_lock(&uq->uq_key);
2885177848Sdavidxu		umtxq_busy(&uq->uq_key);
2886177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2887177848Sdavidxu
2888203414Sdavidxu		/*
2889203414Sdavidxu		 * re-read the state, in case it changed between the try-lock above
2890203414Sdavidxu		 * and the check below
2891203414Sdavidxu		 */
2892274648Skib		rv = fueword32(&rwlock->rw_state, &state);
2893274648Skib		if (rv == -1)
2894274648Skib			error = EFAULT;
2895203414Sdavidxu
2896177848Sdavidxu		/* set read contention bit */
2897274648Skib		while (error == 0 && (state & wrflags) &&
2898274648Skib		    !(state & URWLOCK_READ_WAITERS)) {
2899274648Skib			rv = casueword32(&rwlock->rw_state, state,
2900274648Skib			    &oldstate, state | URWLOCK_READ_WAITERS);
2901274648Skib			if (rv == -1) {
2902251684Skib				error = EFAULT;
2903251684Skib				break;
2904251684Skib			}
2905177848Sdavidxu			if (oldstate == state)
2906177848Sdavidxu				goto sleep;
2907177848Sdavidxu			state = oldstate;
2908251684Skib			error = umtxq_check_susp(td);
2909251684Skib			if (error != 0)
2910251684Skib				break;
2911177848Sdavidxu		}
2912251684Skib		if (error != 0) {
2913274648Skib			umtxq_unbusy_unlocked(&uq->uq_key);
2914251684Skib			break;
2915251684Skib		}
2916177848Sdavidxu
2917177848Sdavidxu		/* state is changed while setting flags, restart */
2918177848Sdavidxu		if (!(state & wrflags)) {
2919274648Skib			umtxq_unbusy_unlocked(&uq->uq_key);
2920251684Skib			error = umtxq_check_susp(td);
2921251684Skib			if (error != 0)
2922251684Skib				break;
2923177848Sdavidxu			continue;
2924177848Sdavidxu		}
2925177848Sdavidxu
2926177848Sdavidxusleep:
2927177848Sdavidxu		/* contention bit is set, before sleeping, increase read waiter count */
2928274648Skib		rv = fueword32(&rwlock->rw_blocked_readers,
2929274648Skib		    &blocked_readers);
2930274648Skib		if (rv == -1) {
2931274648Skib			umtxq_unbusy_unlocked(&uq->uq_key);
2932274648Skib			error = EFAULT;
2933274648Skib			break;
2934274648Skib		}
2935177848Sdavidxu		suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
2936177848Sdavidxu
2937177848Sdavidxu		while (state & wrflags) {
2938177848Sdavidxu			umtxq_lock(&uq->uq_key);
2939177848Sdavidxu			umtxq_insert(uq);
2940177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2941177848Sdavidxu
2942233690Sdavidxu			error = umtxq_sleep(uq, "urdlck", timeout == NULL ?
2943233690Sdavidxu			    NULL : &timo);
2944177848Sdavidxu
2945177848Sdavidxu			umtxq_busy(&uq->uq_key);
2946177848Sdavidxu			umtxq_remove(uq);
2947177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2948177848Sdavidxu			if (error)
2949177848Sdavidxu				break;
2950274648Skib			rv = fueword32(&rwlock->rw_state, &state);
2951274648Skib			if (rv == -1) {
2952274648Skib				error = EFAULT;
2953274648Skib				break;
2954274648Skib			}
2955177848Sdavidxu		}
2956177848Sdavidxu
2957177848Sdavidxu		/* decrease read waiter count, and may clear read contention bit */
2958274648Skib		rv = fueword32(&rwlock->rw_blocked_readers,
2959274648Skib		    &blocked_readers);
2960274648Skib		if (rv == -1) {
2961274648Skib			umtxq_unbusy_unlocked(&uq->uq_key);
2962274648Skib			error = EFAULT;
2963274648Skib			break;
2964274648Skib		}
2965177848Sdavidxu		suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
2966177848Sdavidxu		if (blocked_readers == 1) {
2967274648Skib			rv = fueword32(&rwlock->rw_state, &state);
2968305184Skib			if (rv == -1) {
2969305184Skib				umtxq_unbusy_unlocked(&uq->uq_key);
2970274648Skib				error = EFAULT;
2971305184Skib				break;
2972305184Skib			}
2973305184Skib			for (;;) {
2974274648Skib				rv = casueword32(&rwlock->rw_state, state,
2975274648Skib				    &oldstate, state & ~URWLOCK_READ_WAITERS);
2976274648Skib				if (rv == -1) {
2977251684Skib					error = EFAULT;
2978251684Skib					break;
2979251684Skib				}
2980177848Sdavidxu				if (oldstate == state)
2981177848Sdavidxu					break;
2982177848Sdavidxu				state = oldstate;
2983305185Skib				error1 = umtxq_check_susp(td);
2984305185Skib				if (error1 != 0) {
2985305185Skib					if (error == 0)
2986305185Skib						error = error1;
2987305184Skib					break;
2988305185Skib				}
2989177848Sdavidxu			}
2990177848Sdavidxu		}
2991177848Sdavidxu
2992274648Skib		umtxq_unbusy_unlocked(&uq->uq_key);
2993251684Skib		if (error != 0)
2994251684Skib			break;
2995177848Sdavidxu	}
2996177848Sdavidxu	umtx_key_release(&uq->uq_key);
2997177849Sdavidxu	if (error == ERESTART)
2998177849Sdavidxu		error = EINTR;
2999177848Sdavidxu	return (error);
3000177848Sdavidxu}
3001177848Sdavidxu
3002177848Sdavidxustatic int
3003233690Sdavidxudo_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout)
3004177848Sdavidxu{
3005233690Sdavidxu	struct abs_timeout timo;
3006177848Sdavidxu	struct umtx_q *uq;
3007177848Sdavidxu	uint32_t flags;
3008177848Sdavidxu	int32_t state, oldstate;
3009177848Sdavidxu	int32_t blocked_writers;
3010197476Sdavidxu	int32_t blocked_readers;
3011305185Skib	int error, error1, rv;
3012177848Sdavidxu
3013177848Sdavidxu	uq = td->td_umtxq;
3014274648Skib	error = fueword32(&rwlock->rw_flags, &flags);
3015274648Skib	if (error == -1)
3016274648Skib		return (EFAULT);
3017177848Sdavidxu	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
3018177848Sdavidxu	if (error != 0)
3019177848Sdavidxu		return (error);
3020177848Sdavidxu
3021233690Sdavidxu	if (timeout != NULL)
3022233690Sdavidxu		abs_timeout_init2(&timo, timeout);
3023233690Sdavidxu
3024197476Sdavidxu	blocked_readers = 0;
3025177848Sdavidxu	for (;;) {
3026274648Skib		rv = fueword32(&rwlock->rw_state, &state);
3027274648Skib		if (rv == -1) {
3028274648Skib			umtx_key_release(&uq->uq_key);
3029274648Skib			return (EFAULT);
3030274648Skib		}
3031177848Sdavidxu		while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
3032274648Skib			rv = casueword32(&rwlock->rw_state, state,
3033274648Skib			    &oldstate, state | URWLOCK_WRITE_OWNER);
3034274648Skib			if (rv == -1) {
3035251684Skib				umtx_key_release(&uq->uq_key);
3036251684Skib				return (EFAULT);
3037251684Skib			}
3038177848Sdavidxu			if (oldstate == state) {
3039177848Sdavidxu				umtx_key_release(&uq->uq_key);
3040177848Sdavidxu				return (0);
3041177848Sdavidxu			}
3042177848Sdavidxu			state = oldstate;
3043251684Skib			error = umtxq_check_susp(td);
3044251684Skib			if (error != 0)
3045251684Skib				break;
3046177848Sdavidxu		}
3047177848Sdavidxu
3048197476Sdavidxu		if (error) {
3049197476Sdavidxu			if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) &&
3050197476Sdavidxu			    blocked_readers != 0) {
3051197476Sdavidxu				umtxq_lock(&uq->uq_key);
3052197476Sdavidxu				umtxq_busy(&uq->uq_key);
3053197476Sdavidxu				umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE);
3054197476Sdavidxu				umtxq_unbusy(&uq->uq_key);
3055197476Sdavidxu				umtxq_unlock(&uq->uq_key);
3056197476Sdavidxu			}
3057197476Sdavidxu
3058177848Sdavidxu			break;
3059197476Sdavidxu		}
3060177848Sdavidxu
3061177848Sdavidxu		/* grab monitor lock */
3062177848Sdavidxu		umtxq_lock(&uq->uq_key);
3063177848Sdavidxu		umtxq_busy(&uq->uq_key);
3064177848Sdavidxu		umtxq_unlock(&uq->uq_key);
3065177848Sdavidxu
3066203414Sdavidxu		/*
3067203414Sdavidxu		 * re-read the state, in case it changed between the try-lock above
3068203414Sdavidxu		 * and the check below
3069203414Sdavidxu		 */
3070274648Skib		rv = fueword32(&rwlock->rw_state, &state);
3071274648Skib		if (rv == -1)
3072274648Skib			error = EFAULT;
3073203414Sdavidxu
3074274648Skib		while (error == 0 && ((state & URWLOCK_WRITE_OWNER) ||
3075274648Skib		    URWLOCK_READER_COUNT(state) != 0) &&
3076274648Skib		    (state & URWLOCK_WRITE_WAITERS) == 0) {
3077274648Skib			rv = casueword32(&rwlock->rw_state, state,
3078274648Skib			    &oldstate, state | URWLOCK_WRITE_WAITERS);
3079274648Skib			if (rv == -1) {
3080251684Skib				error = EFAULT;
3081251684Skib				break;
3082251684Skib			}
3083177848Sdavidxu			if (oldstate == state)
3084177848Sdavidxu				goto sleep;
3085177848Sdavidxu			state = oldstate;
3086251684Skib			error = umtxq_check_susp(td);
3087251684Skib			if (error != 0)
3088251684Skib				break;
3089177848Sdavidxu		}
3090251684Skib		if (error != 0) {
3091274648Skib			umtxq_unbusy_unlocked(&uq->uq_key);
3092251684Skib			break;
3093251684Skib		}
3094177848Sdavidxu
3095177848Sdavidxu		if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
3096274648Skib			umtxq_unbusy_unlocked(&uq->uq_key);
3097251684Skib			error = umtxq_check_susp(td);
3098251684Skib			if (error != 0)
3099251684Skib				break;
3100177848Sdavidxu			continue;
3101177848Sdavidxu		}
3102177848Sdavidxusleep:
3103274648Skib		rv = fueword32(&rwlock->rw_blocked_writers,
3104274648Skib		    &blocked_writers);
3105274648Skib		if (rv == -1) {
3106274648Skib			umtxq_unbusy_unlocked(&uq->uq_key);
3107274648Skib			error = EFAULT;
3108274648Skib			break;
3109274648Skib		}
3110177848Sdavidxu		suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
3111177848Sdavidxu
3112177848Sdavidxu		while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
3113177848Sdavidxu			umtxq_lock(&uq->uq_key);
3114177848Sdavidxu			umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
3115177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
3116177848Sdavidxu
3117233690Sdavidxu			error = umtxq_sleep(uq, "uwrlck", timeout == NULL ?
3118233690Sdavidxu			    NULL : &timo);
3119177848Sdavidxu
3120177848Sdavidxu			umtxq_busy(&uq->uq_key);
3121177848Sdavidxu			umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
3122177848Sdavidxu			umtxq_unlock(&uq->uq_key);
3123177848Sdavidxu			if (error)
3124177848Sdavidxu				break;
3125274648Skib			rv = fueword32(&rwlock->rw_state, &state);
3126274648Skib			if (rv == -1) {
3127274648Skib				error = EFAULT;
3128274648Skib				break;
3129274648Skib			}
3130177848Sdavidxu		}
3131177848Sdavidxu
3132274648Skib		rv = fueword32(&rwlock->rw_blocked_writers,
3133274648Skib		    &blocked_writers);
3134274648Skib		if (rv == -1) {
3135274648Skib			umtxq_unbusy_unlocked(&uq->uq_key);
3136274648Skib			error = EFAULT;
3137274648Skib			break;
3138274648Skib		}
3139177848Sdavidxu		suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
3140177848Sdavidxu		if (blocked_writers == 1) {
3141274648Skib			rv = fueword32(&rwlock->rw_state, &state);
3142274648Skib			if (rv == -1) {
3143274648Skib				umtxq_unbusy_unlocked(&uq->uq_key);
3144274648Skib				error = EFAULT;
3145274648Skib				break;
3146274648Skib			}
3147177848Sdavidxu			for (;;) {
3148274648Skib				rv = casueword32(&rwlock->rw_state, state,
3149274648Skib				    &oldstate, state & ~URWLOCK_WRITE_WAITERS);
3150274648Skib				if (rv == -1) {
3151251684Skib					error = EFAULT;
3152251684Skib					break;
3153251684Skib				}
3154177848Sdavidxu				if (oldstate == state)
3155177848Sdavidxu					break;
3156177848Sdavidxu				state = oldstate;
3157305185Skib				error1 = umtxq_check_susp(td);
3158251684Skib				/*
3159251684Skib				 * We are leaving the URWLOCK_WRITE_WAITERS
3160251684Skib				 * behind, but this should not harm the
3161251684Skib				 * correctness.
3162251684Skib				 */
3163305185Skib				if (error1 != 0) {
3164305185Skib					if (error == 0)
3165305185Skib						error = error1;
3166251684Skib					break;
3167305185Skib				}
3168177848Sdavidxu			}
3169274648Skib			rv = fueword32(&rwlock->rw_blocked_readers,
3170274648Skib			    &blocked_readers);
3171274648Skib			if (rv == -1) {
3172274648Skib				umtxq_unbusy_unlocked(&uq->uq_key);
3173274648Skib				error = EFAULT;
3174274648Skib				break;
3175274648Skib			}
3176197476Sdavidxu		} else
3177197476Sdavidxu			blocked_readers = 0;
3178177848Sdavidxu
3179274648Skib		umtxq_unbusy_unlocked(&uq->uq_key);
3180177848Sdavidxu	}
3181177848Sdavidxu
3182177848Sdavidxu	umtx_key_release(&uq->uq_key);
3183177849Sdavidxu	if (error == ERESTART)
3184177849Sdavidxu		error = EINTR;
3185177848Sdavidxu	return (error);
3186177848Sdavidxu}
3187177848Sdavidxu
3188177848Sdavidxustatic int
3189177880Sdavidxudo_rw_unlock(struct thread *td, struct urwlock *rwlock)
3190177848Sdavidxu{
3191177848Sdavidxu	struct umtx_q *uq;
3192177848Sdavidxu	uint32_t flags;
3193177848Sdavidxu	int32_t state, oldstate;
3194274648Skib	int error, rv, q, count;
3195177848Sdavidxu
3196177848Sdavidxu	uq = td->td_umtxq;
3197274648Skib	error = fueword32(&rwlock->rw_flags, &flags);
3198274648Skib	if (error == -1)
3199274648Skib		return (EFAULT);
3200177848Sdavidxu	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
3201177848Sdavidxu	if (error != 0)
3202177848Sdavidxu		return (error);
3203177848Sdavidxu
3204274648Skib	error = fueword32(&rwlock->rw_state, &state);
3205274648Skib	if (error == -1) {
3206274648Skib		error = EFAULT;
3207274648Skib		goto out;
3208274648Skib	}
3209177848Sdavidxu	if (state & URWLOCK_WRITE_OWNER) {
3210177848Sdavidxu		for (;;) {
3211274648Skib			rv = casueword32(&rwlock->rw_state, state,
3212274648Skib			    &oldstate, state & ~URWLOCK_WRITE_OWNER);
3213274648Skib			if (rv == -1) {
3214251684Skib				error = EFAULT;
3215251684Skib				goto out;
3216251684Skib			}
3217177848Sdavidxu			if (oldstate != state) {
3218177848Sdavidxu				state = oldstate;
3219177848Sdavidxu				if (!(oldstate & URWLOCK_WRITE_OWNER)) {
3220177848Sdavidxu					error = EPERM;
3221177848Sdavidxu					goto out;
3222177848Sdavidxu				}
3223251684Skib				error = umtxq_check_susp(td);
3224251684Skib				if (error != 0)
3225251684Skib					goto out;
3226177848Sdavidxu			} else
3227177848Sdavidxu				break;
3228177848Sdavidxu		}
3229177848Sdavidxu	} else if (URWLOCK_READER_COUNT(state) != 0) {
3230177848Sdavidxu		for (;;) {
3231274648Skib			rv = casueword32(&rwlock->rw_state, state,
3232274648Skib			    &oldstate, state - 1);
3233274648Skib			if (rv == -1) {
3234251684Skib				error = EFAULT;
3235251684Skib				goto out;
3236251684Skib			}
3237177848Sdavidxu			if (oldstate != state) {
3238177848Sdavidxu				state = oldstate;
3239177848Sdavidxu				if (URWLOCK_READER_COUNT(oldstate) == 0) {
3240177848Sdavidxu					error = EPERM;
3241177848Sdavidxu					goto out;
3242177848Sdavidxu				}
3243251684Skib				error = umtxq_check_susp(td);
3244251684Skib				if (error != 0)
3245251684Skib					goto out;
3246251684Skib			} else
3247177848Sdavidxu				break;
3248177848Sdavidxu		}
3249177848Sdavidxu	} else {
3250177848Sdavidxu		error = EPERM;
3251177848Sdavidxu		goto out;
3252177848Sdavidxu	}
3253177848Sdavidxu
3254177848Sdavidxu	count = 0;
3255177848Sdavidxu
3256177848Sdavidxu	if (!(flags & URWLOCK_PREFER_READER)) {
3257177848Sdavidxu		if (state & URWLOCK_WRITE_WAITERS) {
3258177848Sdavidxu			count = 1;
3259177848Sdavidxu			q = UMTX_EXCLUSIVE_QUEUE;
3260177848Sdavidxu		} else if (state & URWLOCK_READ_WAITERS) {
3261177848Sdavidxu			count = INT_MAX;
3262177848Sdavidxu			q = UMTX_SHARED_QUEUE;
3263177848Sdavidxu		}
3264177848Sdavidxu	} else {
3265177848Sdavidxu		if (state & URWLOCK_READ_WAITERS) {
3266177848Sdavidxu			count = INT_MAX;
3267177848Sdavidxu			q = UMTX_SHARED_QUEUE;
3268177848Sdavidxu		} else if (state & URWLOCK_WRITE_WAITERS) {
3269177848Sdavidxu			count = 1;
3270177848Sdavidxu			q = UMTX_EXCLUSIVE_QUEUE;
3271177848Sdavidxu		}
3272177848Sdavidxu	}
3273177848Sdavidxu
3274177848Sdavidxu	if (count) {
3275177848Sdavidxu		umtxq_lock(&uq->uq_key);
3276177848Sdavidxu		umtxq_busy(&uq->uq_key);
3277177848Sdavidxu		umtxq_signal_queue(&uq->uq_key, count, q);
3278177848Sdavidxu		umtxq_unbusy(&uq->uq_key);
3279177848Sdavidxu		umtxq_unlock(&uq->uq_key);
3280177848Sdavidxu	}
3281177848Sdavidxuout:
3282177848Sdavidxu	umtx_key_release(&uq->uq_key);
3283177848Sdavidxu	return (error);
3284177848Sdavidxu}
3285177848Sdavidxu
3286201472Sdavidxustatic int
3287232144Sdavidxudo_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout)
3288201472Sdavidxu{
3289233690Sdavidxu	struct abs_timeout timo;
3290201472Sdavidxu	struct umtx_q *uq;
3291274648Skib	uint32_t flags, count, count1;
3292274648Skib	int error, rv;
3293201472Sdavidxu
3294201472Sdavidxu	uq = td->td_umtxq;
3295274648Skib	error = fueword32(&sem->_flags, &flags);
3296274648Skib	if (error == -1)
3297274648Skib		return (EFAULT);
3298201885Sdavidxu	error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
3299201472Sdavidxu	if (error != 0)
3300201472Sdavidxu		return (error);
3301233690Sdavidxu
3302233690Sdavidxu	if (timeout != NULL)
3303233690Sdavidxu		abs_timeout_init2(&timo, timeout);
3304233690Sdavidxu
3305201472Sdavidxu	umtxq_lock(&uq->uq_key);
3306201472Sdavidxu	umtxq_busy(&uq->uq_key);
3307201472Sdavidxu	umtxq_insert(uq);
3308201472Sdavidxu	umtxq_unlock(&uq->uq_key);
3309274648Skib	rv = casueword32(&sem->_has_waiters, 0, &count1, 1);
3310274648Skib	if (rv == 0)
3311274648Skib		rv = fueword32(&sem->_count, &count);
3312274648Skib	if (rv == -1 || count != 0) {
3313201472Sdavidxu		umtxq_lock(&uq->uq_key);
3314201472Sdavidxu		umtxq_unbusy(&uq->uq_key);
3315201472Sdavidxu		umtxq_remove(uq);
3316201472Sdavidxu		umtxq_unlock(&uq->uq_key);
3317201472Sdavidxu		umtx_key_release(&uq->uq_key);
3318274648Skib		return (rv == -1 ? EFAULT : 0);
3319201472Sdavidxu	}
3320201472Sdavidxu	umtxq_lock(&uq->uq_key);
3321201472Sdavidxu	umtxq_unbusy(&uq->uq_key);
3322201472Sdavidxu
3323233690Sdavidxu	error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo);
3324201472Sdavidxu
3325211794Sdavidxu	if ((uq->uq_flags & UQF_UMTXQ) == 0)
3326211794Sdavidxu		error = 0;
3327211794Sdavidxu	else {
3328211794Sdavidxu		umtxq_remove(uq);
3329249644Sjilles		/* A relative timeout cannot be restarted. */
3330249644Sjilles		if (error == ERESTART && timeout != NULL &&
3331249644Sjilles		    (timeout->_flags & UMTX_ABSTIME) == 0)
3332201472Sdavidxu			error = EINTR;
3333201472Sdavidxu	}
3334201472Sdavidxu	umtxq_unlock(&uq->uq_key);
3335201472Sdavidxu	umtx_key_release(&uq->uq_key);
3336201472Sdavidxu	return (error);
3337201472Sdavidxu}
3338201472Sdavidxu
3339201472Sdavidxu/*
3340201472Sdavidxu * Signal a userland condition variable.
3341201472Sdavidxu */
3342201472Sdavidxustatic int
3343201472Sdavidxudo_sem_wake(struct thread *td, struct _usem *sem)
3344201472Sdavidxu{
3345201472Sdavidxu	struct umtx_key key;
3346233913Sdavidxu	int error, cnt;
3347201472Sdavidxu	uint32_t flags;
3348201472Sdavidxu
3349274648Skib	error = fueword32(&sem->_flags, &flags);
3350274648Skib	if (error == -1)
3351274648Skib		return (EFAULT);
3352201885Sdavidxu	if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
3353201472Sdavidxu		return (error);
3354201472Sdavidxu	umtxq_lock(&key);
3355201472Sdavidxu	umtxq_busy(&key);
3356201472Sdavidxu	cnt = umtxq_count(&key);
3357233913Sdavidxu	if (cnt > 0) {
3358233913Sdavidxu		/*
3359233913Sdavidxu		 * Check if count is greater than 0, this means the memory is
3360233913Sdavidxu		 * still being referenced by user code, so we can safely
3361233913Sdavidxu		 * update _has_waiters flag.
3362233913Sdavidxu		 */
3363233913Sdavidxu		if (cnt == 1) {
3364233913Sdavidxu			umtxq_unlock(&key);
3365274648Skib			error = suword32(&sem->_has_waiters, 0);
3366233913Sdavidxu			umtxq_lock(&key);
3367274648Skib			if (error == -1)
3368274648Skib				error = EFAULT;
3369233913Sdavidxu		}
3370305063Sbadger		umtxq_signal(&key, 1);
3371201472Sdavidxu	}
3372201472Sdavidxu	umtxq_unbusy(&key);
3373201472Sdavidxu	umtxq_unlock(&key);
3374201472Sdavidxu	umtx_key_release(&key);
3375201472Sdavidxu	return (error);
3376201472Sdavidxu}
3377201472Sdavidxu
3378139013Sdavidxuint
3379225617Skmacysys__umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
3380139013Sdavidxu    /* struct umtx *umtx */
3381139013Sdavidxu{
3382233690Sdavidxu	return do_lock_umtx(td, uap->umtx, td->td_tid, 0);
3383139013Sdavidxu}
3384139013Sdavidxu
3385139013Sdavidxuint
3386225617Skmacysys__umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
3387139013Sdavidxu    /* struct umtx *umtx */
3388139013Sdavidxu{
3389162536Sdavidxu	return do_unlock_umtx(td, uap->umtx, td->td_tid);
3390139013Sdavidxu}
3391139013Sdavidxu
3392228219Sphoinline int
3393228219Sphoumtx_copyin_timeout(const void *addr, struct timespec *tsp)
3394228219Spho{
3395228219Spho	int error;
3396228219Spho
3397228219Spho	error = copyin(addr, tsp, sizeof(struct timespec));
3398228219Spho	if (error == 0) {
3399228219Spho		if (tsp->tv_sec < 0 ||
3400228219Spho		    tsp->tv_nsec >= 1000000000 ||
3401228219Spho		    tsp->tv_nsec < 0)
3402228219Spho			error = EINVAL;
3403228219Spho	}
3404228219Spho	return (error);
3405228219Spho}
3406228219Spho
3407232144Sdavidxustatic inline int
3408232144Sdavidxuumtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp)
3409232144Sdavidxu{
3410232144Sdavidxu	int error;
3411232144Sdavidxu
3412232286Sdavidxu	if (size <= sizeof(struct timespec)) {
3413232286Sdavidxu		tp->_clockid = CLOCK_REALTIME;
3414232286Sdavidxu		tp->_flags = 0;
3415232144Sdavidxu		error = copyin(addr, &tp->_timeout, sizeof(struct timespec));
3416232286Sdavidxu	} else
3417232144Sdavidxu		error = copyin(addr, tp, sizeof(struct _umtx_time));
3418232144Sdavidxu	if (error != 0)
3419232144Sdavidxu		return (error);
3420232144Sdavidxu	if (tp->_timeout.tv_sec < 0 ||
3421232144Sdavidxu	    tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0)
3422232144Sdavidxu		return (EINVAL);
3423232144Sdavidxu	return (0);
3424232144Sdavidxu}
3425232144Sdavidxu
3426162536Sdavidxustatic int
3427162536Sdavidxu__umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
3428139013Sdavidxu{
3429162536Sdavidxu	struct timespec *ts, timeout;
3430139013Sdavidxu	int error;
3431139013Sdavidxu
3432162536Sdavidxu	/* Allow a null timespec (wait forever). */
3433162536Sdavidxu	if (uap->uaddr2 == NULL)
3434162536Sdavidxu		ts = NULL;
3435162536Sdavidxu	else {
3436228219Spho		error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3437162536Sdavidxu		if (error != 0)
3438162536Sdavidxu			return (error);
3439162536Sdavidxu		ts = &timeout;
3440162536Sdavidxu	}
3441162536Sdavidxu	return (do_lock_umtx(td, uap->obj, uap->val, ts));
3442162536Sdavidxu}
3443162536Sdavidxu
3444162536Sdavidxustatic int
3445162536Sdavidxu__umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
3446162536Sdavidxu{
3447162536Sdavidxu	return (do_unlock_umtx(td, uap->obj, uap->val));
3448162536Sdavidxu}
3449162536Sdavidxu
3450162536Sdavidxustatic int
3451162536Sdavidxu__umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
3452162536Sdavidxu{
3453232144Sdavidxu	struct _umtx_time timeout, *tm_p;
3454162536Sdavidxu	int error;
3455162536Sdavidxu
3456162536Sdavidxu	if (uap->uaddr2 == NULL)
3457232144Sdavidxu		tm_p = NULL;
3458162536Sdavidxu	else {
3459232144Sdavidxu		error = umtx_copyin_umtx_time(
3460232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3461162536Sdavidxu		if (error != 0)
3462162536Sdavidxu			return (error);
3463232144Sdavidxu		tm_p = &timeout;
3464162536Sdavidxu	}
3465232144Sdavidxu	return do_wait(td, uap->obj, uap->val, tm_p, 0, 0);
3466162536Sdavidxu}
3467162536Sdavidxu
3468162536Sdavidxustatic int
3469173800Sdavidxu__umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
3470173800Sdavidxu{
3471232144Sdavidxu	struct _umtx_time timeout, *tm_p;
3472173800Sdavidxu	int error;
3473173800Sdavidxu
3474173800Sdavidxu	if (uap->uaddr2 == NULL)
3475232144Sdavidxu		tm_p = NULL;
3476173800Sdavidxu	else {
3477232144Sdavidxu		error = umtx_copyin_umtx_time(
3478232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3479173800Sdavidxu		if (error != 0)
3480173800Sdavidxu			return (error);
3481232144Sdavidxu		tm_p = &timeout;
3482173800Sdavidxu	}
3483232144Sdavidxu	return do_wait(td, uap->obj, uap->val, tm_p, 1, 0);
3484173800Sdavidxu}
3485173800Sdavidxu
3486173800Sdavidxustatic int
3487178646Sdavidxu__umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap)
3488178646Sdavidxu{
3489232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3490178646Sdavidxu	int error;
3491178646Sdavidxu
3492178646Sdavidxu	if (uap->uaddr2 == NULL)
3493232144Sdavidxu		tm_p = NULL;
3494178646Sdavidxu	else {
3495232144Sdavidxu		error = umtx_copyin_umtx_time(
3496232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3497178646Sdavidxu		if (error != 0)
3498178646Sdavidxu			return (error);
3499232144Sdavidxu		tm_p = &timeout;
3500178646Sdavidxu	}
3501232144Sdavidxu	return do_wait(td, uap->obj, uap->val, tm_p, 1, 1);
3502178646Sdavidxu}
3503178646Sdavidxu
3504178646Sdavidxustatic int
3505162536Sdavidxu__umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
3506162536Sdavidxu{
3507178646Sdavidxu	return (kern_umtx_wake(td, uap->obj, uap->val, 0));
3508162536Sdavidxu}
3509162536Sdavidxu
3510216641Sdavidxu#define BATCH_SIZE	128
3511162536Sdavidxustatic int
3512216641Sdavidxu__umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap)
3513216641Sdavidxu{
3514216641Sdavidxu	int count = uap->val;
3515216641Sdavidxu	void *uaddrs[BATCH_SIZE];
3516216641Sdavidxu	char **upp = (char **)uap->obj;
3517216641Sdavidxu	int tocopy;
3518216641Sdavidxu	int error = 0;
3519216641Sdavidxu	int i, pos = 0;
3520216641Sdavidxu
3521216641Sdavidxu	while (count > 0) {
3522216641Sdavidxu		tocopy = count;
3523216641Sdavidxu		if (tocopy > BATCH_SIZE)
3524216641Sdavidxu			tocopy = BATCH_SIZE;
3525216641Sdavidxu		error = copyin(upp+pos, uaddrs, tocopy * sizeof(char *));
3526216641Sdavidxu		if (error != 0)
3527216641Sdavidxu			break;
3528216641Sdavidxu		for (i = 0; i < tocopy; ++i)
3529216641Sdavidxu			kern_umtx_wake(td, uaddrs[i], INT_MAX, 1);
3530216641Sdavidxu		count -= tocopy;
3531216641Sdavidxu		pos += tocopy;
3532216641Sdavidxu	}
3533216641Sdavidxu	return (error);
3534216641Sdavidxu}
3535216641Sdavidxu
3536216641Sdavidxustatic int
3537178646Sdavidxu__umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap)
3538178646Sdavidxu{
3539178646Sdavidxu	return (kern_umtx_wake(td, uap->obj, uap->val, 1));
3540178646Sdavidxu}
3541178646Sdavidxu
3542178646Sdavidxustatic int
3543162536Sdavidxu__umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
3544162536Sdavidxu{
3545232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3546162536Sdavidxu	int error;
3547162536Sdavidxu
3548162536Sdavidxu	/* Allow a null timespec (wait forever). */
3549162536Sdavidxu	if (uap->uaddr2 == NULL)
3550232144Sdavidxu		tm_p = NULL;
3551162536Sdavidxu	else {
3552232144Sdavidxu		error = umtx_copyin_umtx_time(
3553232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3554162536Sdavidxu		if (error != 0)
3555162536Sdavidxu			return (error);
3556232144Sdavidxu		tm_p = &timeout;
3557139013Sdavidxu	}
3558232144Sdavidxu	return do_lock_umutex(td, uap->obj, tm_p, 0);
3559162536Sdavidxu}
3560162536Sdavidxu
3561162536Sdavidxustatic int
3562162536Sdavidxu__umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
3563162536Sdavidxu{
3564179970Sdavidxu	return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY);
3565162536Sdavidxu}
3566162536Sdavidxu
3567162536Sdavidxustatic int
3568179970Sdavidxu__umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
3569179970Sdavidxu{
3570232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3571179970Sdavidxu	int error;
3572179970Sdavidxu
3573179970Sdavidxu	/* Allow a null timespec (wait forever). */
3574179970Sdavidxu	if (uap->uaddr2 == NULL)
3575232144Sdavidxu		tm_p = NULL;
3576179970Sdavidxu	else {
3577232144Sdavidxu		error = umtx_copyin_umtx_time(
3578232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3579179970Sdavidxu		if (error != 0)
3580179970Sdavidxu			return (error);
3581232144Sdavidxu		tm_p = &timeout;
3582179970Sdavidxu	}
3583232144Sdavidxu	return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT);
3584179970Sdavidxu}
3585179970Sdavidxu
3586179970Sdavidxustatic int
3587179970Sdavidxu__umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap)
3588179970Sdavidxu{
3589179970Sdavidxu	return do_wake_umutex(td, uap->obj);
3590179970Sdavidxu}
3591179970Sdavidxu
3592179970Sdavidxustatic int
3593162536Sdavidxu__umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
3594162536Sdavidxu{
3595162536Sdavidxu	return do_unlock_umutex(td, uap->obj);
3596162536Sdavidxu}
3597162536Sdavidxu
3598162536Sdavidxustatic int
3599162536Sdavidxu__umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
3600162536Sdavidxu{
3601162536Sdavidxu	return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
3602162536Sdavidxu}
3603162536Sdavidxu
3604164839Sdavidxustatic int
3605164839Sdavidxu__umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
3606164839Sdavidxu{
3607164839Sdavidxu	struct timespec *ts, timeout;
3608164839Sdavidxu	int error;
3609164839Sdavidxu
3610164839Sdavidxu	/* Allow a null timespec (wait forever). */
3611164839Sdavidxu	if (uap->uaddr2 == NULL)
3612164839Sdavidxu		ts = NULL;
3613164839Sdavidxu	else {
3614228219Spho		error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3615164839Sdavidxu		if (error != 0)
3616164839Sdavidxu			return (error);
3617164839Sdavidxu		ts = &timeout;
3618164839Sdavidxu	}
3619164876Sdavidxu	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3620164839Sdavidxu}
3621164839Sdavidxu
3622164839Sdavidxustatic int
3623164839Sdavidxu__umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
3624164839Sdavidxu{
3625164839Sdavidxu	return do_cv_signal(td, uap->obj);
3626164839Sdavidxu}
3627164839Sdavidxu
3628164839Sdavidxustatic int
3629164839Sdavidxu__umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
3630164839Sdavidxu{
3631164839Sdavidxu	return do_cv_broadcast(td, uap->obj);
3632164839Sdavidxu}
3633164839Sdavidxu
3634177848Sdavidxustatic int
3635177848Sdavidxu__umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
3636177848Sdavidxu{
3637232209Sdavidxu	struct _umtx_time timeout;
3638177848Sdavidxu	int error;
3639177848Sdavidxu
3640177848Sdavidxu	/* Allow a null timespec (wait forever). */
3641177848Sdavidxu	if (uap->uaddr2 == NULL) {
3642177848Sdavidxu		error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3643177848Sdavidxu	} else {
3644232209Sdavidxu		error = umtx_copyin_umtx_time(uap->uaddr2,
3645232209Sdavidxu		   (size_t)uap->uaddr1, &timeout);
3646177848Sdavidxu		if (error != 0)
3647177848Sdavidxu			return (error);
3648233690Sdavidxu		error = do_rw_rdlock(td, uap->obj, uap->val, &timeout);
3649177848Sdavidxu	}
3650177848Sdavidxu	return (error);
3651177848Sdavidxu}
3652177848Sdavidxu
3653177848Sdavidxustatic int
3654177848Sdavidxu__umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
3655177848Sdavidxu{
3656232209Sdavidxu	struct _umtx_time timeout;
3657177848Sdavidxu	int error;
3658177848Sdavidxu
3659177848Sdavidxu	/* Allow a null timespec (wait forever). */
3660177848Sdavidxu	if (uap->uaddr2 == NULL) {
3661177848Sdavidxu		error = do_rw_wrlock(td, uap->obj, 0);
3662177848Sdavidxu	} else {
3663232209Sdavidxu		error = umtx_copyin_umtx_time(uap->uaddr2,
3664232209Sdavidxu		   (size_t)uap->uaddr1, &timeout);
3665177848Sdavidxu		if (error != 0)
3666177848Sdavidxu			return (error);
3667177848Sdavidxu
3668233690Sdavidxu		error = do_rw_wrlock(td, uap->obj, &timeout);
3669177848Sdavidxu	}
3670177848Sdavidxu	return (error);
3671177848Sdavidxu}
3672177848Sdavidxu
3673177848Sdavidxustatic int
3674177848Sdavidxu__umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
3675177848Sdavidxu{
3676177880Sdavidxu	return do_rw_unlock(td, uap->obj);
3677177848Sdavidxu}
3678177848Sdavidxu
3679201472Sdavidxustatic int
3680201472Sdavidxu__umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap)
3681201472Sdavidxu{
3682232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3683201472Sdavidxu	int error;
3684201472Sdavidxu
3685201472Sdavidxu	/* Allow a null timespec (wait forever). */
3686201472Sdavidxu	if (uap->uaddr2 == NULL)
3687232144Sdavidxu		tm_p = NULL;
3688201472Sdavidxu	else {
3689232144Sdavidxu		error = umtx_copyin_umtx_time(
3690232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3691201472Sdavidxu		if (error != 0)
3692201472Sdavidxu			return (error);
3693232144Sdavidxu		tm_p = &timeout;
3694201472Sdavidxu	}
3695232144Sdavidxu	return (do_sem_wait(td, uap->obj, tm_p));
3696201472Sdavidxu}
3697201472Sdavidxu
3698201472Sdavidxustatic int
3699201472Sdavidxu__umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap)
3700201472Sdavidxu{
3701201472Sdavidxu	return do_sem_wake(td, uap->obj);
3702201472Sdavidxu}
3703201472Sdavidxu
3704233912Sdavidxustatic int
3705233912Sdavidxu__umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap)
3706233912Sdavidxu{
3707233912Sdavidxu	return do_wake2_umutex(td, uap->obj, uap->val);
3708233912Sdavidxu}
3709233912Sdavidxu
3710162536Sdavidxutypedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
3711162536Sdavidxu
3712162536Sdavidxustatic _umtx_op_func op_table[] = {
3713162536Sdavidxu	__umtx_op_lock_umtx,		/* UMTX_OP_LOCK */
3714162536Sdavidxu	__umtx_op_unlock_umtx,		/* UMTX_OP_UNLOCK */
3715162536Sdavidxu	__umtx_op_wait,			/* UMTX_OP_WAIT */
3716162536Sdavidxu	__umtx_op_wake,			/* UMTX_OP_WAKE */
3717162536Sdavidxu	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_TRYLOCK */
3718162536Sdavidxu	__umtx_op_lock_umutex,		/* UMTX_OP_MUTEX_LOCK */
3719162536Sdavidxu	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK */
3720164839Sdavidxu	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
3721164839Sdavidxu	__umtx_op_cv_wait,		/* UMTX_OP_CV_WAIT*/
3722164839Sdavidxu	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
3723173800Sdavidxu	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
3724177848Sdavidxu	__umtx_op_wait_uint,		/* UMTX_OP_WAIT_UINT */
3725177848Sdavidxu	__umtx_op_rw_rdlock,		/* UMTX_OP_RW_RDLOCK */
3726177848Sdavidxu	__umtx_op_rw_wrlock,		/* UMTX_OP_RW_WRLOCK */
3727178646Sdavidxu	__umtx_op_rw_unlock,		/* UMTX_OP_RW_UNLOCK */
3728178646Sdavidxu	__umtx_op_wait_uint_private,	/* UMTX_OP_WAIT_UINT_PRIVATE */
3729179970Sdavidxu	__umtx_op_wake_private,		/* UMTX_OP_WAKE_PRIVATE */
3730179970Sdavidxu	__umtx_op_wait_umutex,		/* UMTX_OP_UMUTEX_WAIT */
3731201472Sdavidxu	__umtx_op_wake_umutex,		/* UMTX_OP_UMUTEX_WAKE */
3732201472Sdavidxu	__umtx_op_sem_wait,		/* UMTX_OP_SEM_WAIT */
3733216641Sdavidxu	__umtx_op_sem_wake,		/* UMTX_OP_SEM_WAKE */
3734233912Sdavidxu	__umtx_op_nwake_private,	/* UMTX_OP_NWAKE_PRIVATE */
3735233912Sdavidxu	__umtx_op_wake2_umutex		/* UMTX_OP_UMUTEX_WAKE2 */
3736162536Sdavidxu};
3737162536Sdavidxu
3738162536Sdavidxuint
3739225617Skmacysys__umtx_op(struct thread *td, struct _umtx_op_args *uap)
3740162536Sdavidxu{
3741163678Sdavidxu	if ((unsigned)uap->op < UMTX_OP_MAX)
3742162536Sdavidxu		return (*op_table[uap->op])(td, uap);
3743162536Sdavidxu	return (EINVAL);
3744162536Sdavidxu}
3745162536Sdavidxu
3746205014Snwhitehorn#ifdef COMPAT_FREEBSD32
3747163046Sdavidxuint
3748163046Sdavidxufreebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
3749163046Sdavidxu    /* struct umtx *umtx */
3750163046Sdavidxu{
3751163046Sdavidxu	return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
3752163046Sdavidxu}
3753163046Sdavidxu
3754163046Sdavidxuint
3755163046Sdavidxufreebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
3756163046Sdavidxu    /* struct umtx *umtx */
3757163046Sdavidxu{
3758163046Sdavidxu	return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
3759163046Sdavidxu}
3760163046Sdavidxu
3761162536Sdavidxustruct timespec32 {
3762242202Sdavide	int32_t tv_sec;
3763242202Sdavide	int32_t tv_nsec;
3764162536Sdavidxu};
3765162536Sdavidxu
3766232144Sdavidxustruct umtx_time32 {
3767232144Sdavidxu	struct	timespec32	timeout;
3768232144Sdavidxu	uint32_t		flags;
3769232144Sdavidxu	uint32_t		clockid;
3770232144Sdavidxu};
3771232144Sdavidxu
3772162536Sdavidxustatic inline int
3773228218Sphoumtx_copyin_timeout32(void *addr, struct timespec *tsp)
3774162536Sdavidxu{
3775162536Sdavidxu	struct timespec32 ts32;
3776162536Sdavidxu	int error;
3777162536Sdavidxu
3778162536Sdavidxu	error = copyin(addr, &ts32, sizeof(struct timespec32));
3779162536Sdavidxu	if (error == 0) {
3780228218Spho		if (ts32.tv_sec < 0 ||
3781228218Spho		    ts32.tv_nsec >= 1000000000 ||
3782228218Spho		    ts32.tv_nsec < 0)
3783228218Spho			error = EINVAL;
3784228218Spho		else {
3785228218Spho			tsp->tv_sec = ts32.tv_sec;
3786228218Spho			tsp->tv_nsec = ts32.tv_nsec;
3787228218Spho		}
3788162536Sdavidxu	}
3789140421Sdavidxu	return (error);
3790139013Sdavidxu}
3791161678Sdavidxu
3792232144Sdavidxustatic inline int
3793232144Sdavidxuumtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp)
3794232144Sdavidxu{
3795232144Sdavidxu	struct umtx_time32 t32;
3796232144Sdavidxu	int error;
3797232144Sdavidxu
3798232144Sdavidxu	t32.clockid = CLOCK_REALTIME;
3799232144Sdavidxu	t32.flags   = 0;
3800232144Sdavidxu	if (size <= sizeof(struct timespec32))
3801232144Sdavidxu		error = copyin(addr, &t32.timeout, sizeof(struct timespec32));
3802232144Sdavidxu	else
3803232144Sdavidxu		error = copyin(addr, &t32, sizeof(struct umtx_time32));
3804232144Sdavidxu	if (error != 0)
3805232144Sdavidxu		return (error);
3806232144Sdavidxu	if (t32.timeout.tv_sec < 0 ||
3807232144Sdavidxu	    t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0)
3808232144Sdavidxu		return (EINVAL);
3809232144Sdavidxu	tp->_timeout.tv_sec = t32.timeout.tv_sec;
3810232144Sdavidxu	tp->_timeout.tv_nsec = t32.timeout.tv_nsec;
3811232144Sdavidxu	tp->_flags = t32.flags;
3812232144Sdavidxu	tp->_clockid = t32.clockid;
3813232144Sdavidxu	return (0);
3814232144Sdavidxu}
3815232144Sdavidxu
3816162536Sdavidxustatic int
3817162536Sdavidxu__umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3818162536Sdavidxu{
3819162536Sdavidxu	struct timespec *ts, timeout;
3820162536Sdavidxu	int error;
3821162536Sdavidxu
3822162536Sdavidxu	/* Allow a null timespec (wait forever). */
3823162536Sdavidxu	if (uap->uaddr2 == NULL)
3824162536Sdavidxu		ts = NULL;
3825162536Sdavidxu	else {
3826228218Spho		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3827162536Sdavidxu		if (error != 0)
3828162536Sdavidxu			return (error);
3829162536Sdavidxu		ts = &timeout;
3830162536Sdavidxu	}
3831162536Sdavidxu	return (do_lock_umtx32(td, uap->obj, uap->val, ts));
3832162536Sdavidxu}
3833162536Sdavidxu
3834162536Sdavidxustatic int
3835162536Sdavidxu__umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3836162536Sdavidxu{
3837162536Sdavidxu	return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
3838162536Sdavidxu}
3839162536Sdavidxu
3840162536Sdavidxustatic int
3841162536Sdavidxu__umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3842162536Sdavidxu{
3843232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3844162536Sdavidxu	int error;
3845162536Sdavidxu
3846162536Sdavidxu	if (uap->uaddr2 == NULL)
3847232144Sdavidxu		tm_p = NULL;
3848162536Sdavidxu	else {
3849232144Sdavidxu		error = umtx_copyin_umtx_time32(uap->uaddr2,
3850232144Sdavidxu			(size_t)uap->uaddr1, &timeout);
3851162536Sdavidxu		if (error != 0)
3852162536Sdavidxu			return (error);
3853232144Sdavidxu		tm_p = &timeout;
3854162536Sdavidxu	}
3855232144Sdavidxu	return do_wait(td, uap->obj, uap->val, tm_p, 1, 0);
3856162536Sdavidxu}
3857162536Sdavidxu
3858162536Sdavidxustatic int
3859162536Sdavidxu__umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3860162536Sdavidxu{
3861232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3862162536Sdavidxu	int error;
3863162536Sdavidxu
3864162536Sdavidxu	/* Allow a null timespec (wait forever). */
3865162536Sdavidxu	if (uap->uaddr2 == NULL)
3866232144Sdavidxu		tm_p = NULL;
3867162536Sdavidxu	else {
3868330678Sbrooks		error = umtx_copyin_umtx_time32(uap->uaddr2,
3869232144Sdavidxu			    (size_t)uap->uaddr1, &timeout);
3870162536Sdavidxu		if (error != 0)
3871162536Sdavidxu			return (error);
3872232144Sdavidxu		tm_p = &timeout;
3873162536Sdavidxu	}
3874232144Sdavidxu	return do_lock_umutex(td, uap->obj, tm_p, 0);
3875162536Sdavidxu}
3876162536Sdavidxu
3877164839Sdavidxustatic int
3878179970Sdavidxu__umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3879179970Sdavidxu{
3880232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3881179970Sdavidxu	int error;
3882179970Sdavidxu
3883179970Sdavidxu	/* Allow a null timespec (wait forever). */
3884179970Sdavidxu	if (uap->uaddr2 == NULL)
3885232144Sdavidxu		tm_p = NULL;
3886179970Sdavidxu	else {
3887232144Sdavidxu		error = umtx_copyin_umtx_time32(uap->uaddr2,
3888232144Sdavidxu		    (size_t)uap->uaddr1, &timeout);
3889179970Sdavidxu		if (error != 0)
3890179970Sdavidxu			return (error);
3891232144Sdavidxu		tm_p = &timeout;
3892179970Sdavidxu	}
3893232144Sdavidxu	return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT);
3894179970Sdavidxu}
3895179970Sdavidxu
3896179970Sdavidxustatic int
3897164839Sdavidxu__umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3898164839Sdavidxu{
3899164839Sdavidxu	struct timespec *ts, timeout;
3900164839Sdavidxu	int error;
3901164839Sdavidxu
3902164839Sdavidxu	/* Allow a null timespec (wait forever). */
3903164839Sdavidxu	if (uap->uaddr2 == NULL)
3904164839Sdavidxu		ts = NULL;
3905164839Sdavidxu	else {
3906228218Spho		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3907164839Sdavidxu		if (error != 0)
3908164839Sdavidxu			return (error);
3909164839Sdavidxu		ts = &timeout;
3910164839Sdavidxu	}
3911164876Sdavidxu	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3912164839Sdavidxu}
3913164839Sdavidxu
3914177848Sdavidxustatic int
3915177848Sdavidxu__umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3916177848Sdavidxu{
3917232209Sdavidxu	struct _umtx_time timeout;
3918177848Sdavidxu	int error;
3919177848Sdavidxu
3920177848Sdavidxu	/* Allow a null timespec (wait forever). */
3921177848Sdavidxu	if (uap->uaddr2 == NULL) {
3922177848Sdavidxu		error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3923177848Sdavidxu	} else {
3924232209Sdavidxu		error = umtx_copyin_umtx_time32(uap->uaddr2,
3925232209Sdavidxu		    (size_t)uap->uaddr1, &timeout);
3926177848Sdavidxu		if (error != 0)
3927177848Sdavidxu			return (error);
3928233693Sdavidxu		error = do_rw_rdlock(td, uap->obj, uap->val, &timeout);
3929177848Sdavidxu	}
3930177848Sdavidxu	return (error);
3931177848Sdavidxu}
3932177848Sdavidxu
3933177848Sdavidxustatic int
3934177848Sdavidxu__umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3935177848Sdavidxu{
3936232209Sdavidxu	struct _umtx_time timeout;
3937177848Sdavidxu	int error;
3938177848Sdavidxu
3939177848Sdavidxu	/* Allow a null timespec (wait forever). */
3940177848Sdavidxu	if (uap->uaddr2 == NULL) {
3941177852Sdavidxu		error = do_rw_wrlock(td, uap->obj, 0);
3942177848Sdavidxu	} else {
3943232209Sdavidxu		error = umtx_copyin_umtx_time32(uap->uaddr2,
3944232209Sdavidxu		    (size_t)uap->uaddr1, &timeout);
3945177848Sdavidxu		if (error != 0)
3946177848Sdavidxu			return (error);
3947233693Sdavidxu		error = do_rw_wrlock(td, uap->obj, &timeout);
3948177848Sdavidxu	}
3949177848Sdavidxu	return (error);
3950177848Sdavidxu}
3951177848Sdavidxu
3952178646Sdavidxustatic int
3953178646Sdavidxu__umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap)
3954178646Sdavidxu{
3955232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3956178646Sdavidxu	int error;
3957178646Sdavidxu
3958178646Sdavidxu	if (uap->uaddr2 == NULL)
3959232144Sdavidxu		tm_p = NULL;
3960178646Sdavidxu	else {
3961232144Sdavidxu		error = umtx_copyin_umtx_time32(
3962232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1,&timeout);
3963178646Sdavidxu		if (error != 0)
3964178646Sdavidxu			return (error);
3965232144Sdavidxu		tm_p = &timeout;
3966178646Sdavidxu	}
3967232144Sdavidxu	return do_wait(td, uap->obj, uap->val, tm_p, 1, 1);
3968178646Sdavidxu}
3969178646Sdavidxu
3970201472Sdavidxustatic int
3971201472Sdavidxu__umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3972201472Sdavidxu{
3973232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3974201472Sdavidxu	int error;
3975201472Sdavidxu
3976201472Sdavidxu	/* Allow a null timespec (wait forever). */
3977201472Sdavidxu	if (uap->uaddr2 == NULL)
3978232144Sdavidxu		tm_p = NULL;
3979201472Sdavidxu	else {
3980232144Sdavidxu		error = umtx_copyin_umtx_time32(uap->uaddr2,
3981232144Sdavidxu		    (size_t)uap->uaddr1, &timeout);
3982201472Sdavidxu		if (error != 0)
3983201472Sdavidxu			return (error);
3984232144Sdavidxu		tm_p = &timeout;
3985201472Sdavidxu	}
3986232144Sdavidxu	return (do_sem_wait(td, uap->obj, tm_p));
3987201472Sdavidxu}
3988201472Sdavidxu
3989216641Sdavidxustatic int
3990216641Sdavidxu__umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap)
3991216641Sdavidxu{
3992216641Sdavidxu	int count = uap->val;
3993216641Sdavidxu	uint32_t uaddrs[BATCH_SIZE];
3994216641Sdavidxu	uint32_t **upp = (uint32_t **)uap->obj;
3995216641Sdavidxu	int tocopy;
3996216641Sdavidxu	int error = 0;
3997216641Sdavidxu	int i, pos = 0;
3998216641Sdavidxu
3999216641Sdavidxu	while (count > 0) {
4000216641Sdavidxu		tocopy = count;
4001216641Sdavidxu		if (tocopy > BATCH_SIZE)
4002216641Sdavidxu			tocopy = BATCH_SIZE;
4003216641Sdavidxu		error = copyin(upp+pos, uaddrs, tocopy * sizeof(uint32_t));
4004216641Sdavidxu		if (error != 0)
4005216641Sdavidxu			break;
4006216641Sdavidxu		for (i = 0; i < tocopy; ++i)
4007216641Sdavidxu			kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i],
4008216641Sdavidxu				INT_MAX, 1);
4009216641Sdavidxu		count -= tocopy;
4010216641Sdavidxu		pos += tocopy;
4011216641Sdavidxu	}
4012216641Sdavidxu	return (error);
4013216641Sdavidxu}
4014216641Sdavidxu
4015162536Sdavidxustatic _umtx_op_func op_table_compat32[] = {
4016162536Sdavidxu	__umtx_op_lock_umtx_compat32,	/* UMTX_OP_LOCK */
4017162536Sdavidxu	__umtx_op_unlock_umtx_compat32,	/* UMTX_OP_UNLOCK */
4018162536Sdavidxu	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT */
4019162536Sdavidxu	__umtx_op_wake,			/* UMTX_OP_WAKE */
4020162550Sdavidxu	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_LOCK */
4021162536Sdavidxu	__umtx_op_lock_umutex_compat32,	/* UMTX_OP_MUTEX_TRYLOCK */
4022162536Sdavidxu	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK	*/
4023164839Sdavidxu	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
4024164839Sdavidxu	__umtx_op_cv_wait_compat32,	/* UMTX_OP_CV_WAIT*/
4025164839Sdavidxu	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
4026173800Sdavidxu	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
4027177848Sdavidxu	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT_UINT */
4028177848Sdavidxu	__umtx_op_rw_rdlock_compat32,	/* UMTX_OP_RW_RDLOCK */
4029177848Sdavidxu	__umtx_op_rw_wrlock_compat32,	/* UMTX_OP_RW_WRLOCK */
4030178646Sdavidxu	__umtx_op_rw_unlock,		/* UMTX_OP_RW_UNLOCK */
4031178646Sdavidxu	__umtx_op_wait_uint_private_compat32,	/* UMTX_OP_WAIT_UINT_PRIVATE */
4032179970Sdavidxu	__umtx_op_wake_private,		/* UMTX_OP_WAKE_PRIVATE */
4033179970Sdavidxu	__umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */
4034201472Sdavidxu	__umtx_op_wake_umutex,		/* UMTX_OP_UMUTEX_WAKE */
4035201472Sdavidxu	__umtx_op_sem_wait_compat32,	/* UMTX_OP_SEM_WAIT */
4036216641Sdavidxu	__umtx_op_sem_wake,		/* UMTX_OP_SEM_WAKE */
4037233912Sdavidxu	__umtx_op_nwake_private32,	/* UMTX_OP_NWAKE_PRIVATE */
4038233912Sdavidxu	__umtx_op_wake2_umutex		/* UMTX_OP_UMUTEX_WAKE2 */
4039162536Sdavidxu};
4040162536Sdavidxu
4041162536Sdavidxuint
4042162536Sdavidxufreebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
4043162536Sdavidxu{
4044163678Sdavidxu	if ((unsigned)uap->op < UMTX_OP_MAX)
4045162536Sdavidxu		return (*op_table_compat32[uap->op])(td,
4046162536Sdavidxu			(struct _umtx_op_args *)uap);
4047162536Sdavidxu	return (EINVAL);
4048162536Sdavidxu}
4049162536Sdavidxu#endif
4050162536Sdavidxu
4051161678Sdavidxuvoid
4052161678Sdavidxuumtx_thread_init(struct thread *td)
4053161678Sdavidxu{
4054161678Sdavidxu	td->td_umtxq = umtxq_alloc();
4055161678Sdavidxu	td->td_umtxq->uq_thread = td;
4056161678Sdavidxu}
4057161678Sdavidxu
4058161678Sdavidxuvoid
4059161678Sdavidxuumtx_thread_fini(struct thread *td)
4060161678Sdavidxu{
4061161678Sdavidxu	umtxq_free(td->td_umtxq);
4062161678Sdavidxu}
4063161678Sdavidxu
4064161678Sdavidxu/*
4065161678Sdavidxu * It will be called when new thread is created, e.g fork().
4066161678Sdavidxu */
4067161678Sdavidxuvoid
4068161678Sdavidxuumtx_thread_alloc(struct thread *td)
4069161678Sdavidxu{
4070161678Sdavidxu	struct umtx_q *uq;
4071161678Sdavidxu
4072161678Sdavidxu	uq = td->td_umtxq;
4073161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
4074161678Sdavidxu
4075161678Sdavidxu	KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
4076161678Sdavidxu	KASSERT(uq->uq_thread == td, ("uq_thread != td"));
4077161678Sdavidxu	KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
4078161678Sdavidxu	KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
4079161678Sdavidxu}
4080161678Sdavidxu
4081161678Sdavidxu/*
4082161678Sdavidxu * exec() hook.
4083161678Sdavidxu */
4084161678Sdavidxustatic void
4085161678Sdavidxuumtx_exec_hook(void *arg __unused, struct proc *p __unused,
4086161678Sdavidxu	struct image_params *imgp __unused)
4087161678Sdavidxu{
4088161678Sdavidxu	umtx_thread_cleanup(curthread);
4089161678Sdavidxu}
4090161678Sdavidxu
4091161678Sdavidxu/*
4092161678Sdavidxu * thread_exit() hook.
4093161678Sdavidxu */
4094161678Sdavidxuvoid
4095161678Sdavidxuumtx_thread_exit(struct thread *td)
4096161678Sdavidxu{
4097161678Sdavidxu	umtx_thread_cleanup(td);
4098161678Sdavidxu}
4099161678Sdavidxu
4100161678Sdavidxu/*
4101161678Sdavidxu * clean up umtx data.
4102161678Sdavidxu */
4103161678Sdavidxustatic void
4104161678Sdavidxuumtx_thread_cleanup(struct thread *td)
4105161678Sdavidxu{
4106161678Sdavidxu	struct umtx_q *uq;
4107161678Sdavidxu	struct umtx_pi *pi;
4108161678Sdavidxu
4109161678Sdavidxu	if ((uq = td->td_umtxq) == NULL)
4110161678Sdavidxu		return;
4111161678Sdavidxu
4112280309Skib	mtx_lock(&umtx_lock);
4113161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
4114161678Sdavidxu	while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
4115161678Sdavidxu		pi->pi_owner = NULL;
4116161678Sdavidxu		TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
4117161678Sdavidxu	}
4118280309Skib	mtx_unlock(&umtx_lock);
4119174701Sdavidxu	thread_lock(td);
4120216791Sdavidxu	sched_lend_user_prio(td, PRI_MAX);
4121174701Sdavidxu	thread_unlock(td);
4122161678Sdavidxu}
4123