kern_umtx.c revision 161678
1227569Sphilip/*-
2284555Sarybchik * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3284555Sarybchik * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4227569Sphilip * All rights reserved.
5227569Sphilip *
6284555Sarybchik * Redistribution and use in source and binary forms, with or without
7227569Sphilip * modification, are permitted provided that the following conditions
8284555Sarybchik * are met:
9284555Sarybchik * 1. Redistributions of source code must retain the above copyright
10284555Sarybchik *    notice unmodified, this list of conditions, and the following
11284555Sarybchik *    disclaimer.
12284555Sarybchik * 2. Redistributions in binary form must reproduce the above copyright
13227569Sphilip *    notice, this list of conditions and the following disclaimer in the
14284555Sarybchik *    documentation and/or other materials provided with the distribution.
15284555Sarybchik *
16284555Sarybchik * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17284555Sarybchik * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18284555Sarybchik * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19284555Sarybchik * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20284555Sarybchik * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21284555Sarybchik * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22284555Sarybchik * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23284555Sarybchik * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24284555Sarybchik * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25284555Sarybchik * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26284555Sarybchik */
27284555Sarybchik
28284555Sarybchik#include <sys/cdefs.h>
29284555Sarybchik__FBSDID("$FreeBSD: head/sys/kern/kern_umtx.c 161678 2006-08-28 04:24:51Z davidxu $");
30227569Sphilip
31228078Sphilip#include <sys/param.h>
32228078Sphilip#include <sys/kernel.h>
33227569Sphilip#include <sys/limits.h>
34227569Sphilip#include <sys/lock.h>
35227569Sphilip#include <sys/malloc.h>
36227569Sphilip#include <sys/mutex.h>
37227569Sphilip#include <sys/proc.h>
38227569Sphilip#include <sys/sched.h>
39227569Sphilip#include <sys/sysctl.h>
40227569Sphilip#include <sys/sysent.h>
41227569Sphilip#include <sys/systm.h>
42227569Sphilip#include <sys/sysproto.h>
43227569Sphilip#include <sys/eventhandler.h>
44227569Sphilip#include <sys/umtx.h>
45227569Sphilip
46227569Sphilip#include <vm/vm.h>
47227569Sphilip#include <vm/vm_param.h>
48227569Sphilip#include <vm/pmap.h>
49227569Sphilip#include <vm/vm_map.h>
50227569Sphilip#include <vm/vm_object.h>
51227569Sphilip
52227569Sphilip#define TYPE_SIMPLE_LOCK	0
53227569Sphilip#define TYPE_SIMPLE_WAIT	1
54227569Sphilip#define TYPE_NORMAL_UMUTEX	2
55227569Sphilip#define TYPE_PI_UMUTEX		3
56227569Sphilip#define TYPE_PP_UMUTEX		4
57227569Sphilip#define TYPE_CV			5
58227569Sphilip
59227569Sphilip/* Key to represent a unique userland synchronous object */
60227569Sphilipstruct umtx_key {
61227569Sphilip	int	hash;
62227569Sphilip	int	type;
63227569Sphilip	int	shared;
64227569Sphilip	union {
65227569Sphilip		struct {
66227569Sphilip			vm_object_t	object;
67227569Sphilip			uintptr_t	offset;
68227569Sphilip		} shared;
69227569Sphilip		struct {
70227569Sphilip			struct vmspace	*vs;
71227569Sphilip			uintptr_t	addr;
72227569Sphilip		} private;
73227569Sphilip		struct {
74227569Sphilip			void		*a;
75227569Sphilip			uintptr_t	b;
76227569Sphilip		} both;
77227569Sphilip	} info;
78227569Sphilip};
79227569Sphilip
80227569Sphilip/* Priority inheritance mutex info. */
81227569Sphilipstruct umtx_pi {
82227569Sphilip	/* Owner thread */
83227569Sphilip	struct thread		*pi_owner;
84227569Sphilip
85227569Sphilip	/* Reference count */
86227569Sphilip	int			pi_refcount;
87227569Sphilip
88227569Sphilip 	/* List entry to link umtx holding by thread */
89227569Sphilip	TAILQ_ENTRY(umtx_pi)	pi_link;
90227569Sphilip
91227569Sphilip	/* List entry in hash */
92227569Sphilip	TAILQ_ENTRY(umtx_pi)	pi_hashlink;
93227569Sphilip
94227569Sphilip	/* List for waiters */
95227569Sphilip	TAILQ_HEAD(,umtx_q)	pi_blocked;
96227569Sphilip
97227569Sphilip	/* Identify a userland lock object */
98227569Sphilip	struct umtx_key		pi_key;
99227569Sphilip};
100227569Sphilip
101227569Sphilip/* A userland synchronous object user. */
102227569Sphilipstruct umtx_q {
103227569Sphilip	/* Linked list for the hash. */
104227569Sphilip	TAILQ_ENTRY(umtx_q)	uq_link;
105227569Sphilip
106227569Sphilip	/* Umtx key. */
107227569Sphilip	struct umtx_key		uq_key;
108227569Sphilip
109227569Sphilip	/* Umtx flags. */
110227569Sphilip	int			uq_flags;
111227569Sphilip#define UQF_UMTXQ	0x0001
112227569Sphilip
113227569Sphilip	/* The thread waits on. */
114227569Sphilip	struct thread		*uq_thread;
115227569Sphilip
116227569Sphilip	/*
117227569Sphilip	 * Blocked on PI mutex. read can use chain lock
118227569Sphilip	 * or sched_lock, write must have both chain lock and
119227569Sphilip	 * sched_lock being hold.
120227569Sphilip	 */
121284555Sarybchik	struct umtx_pi		*uq_pi_blocked;
122284555Sarybchik
123284555Sarybchik	/* On blocked list */
124284555Sarybchik	TAILQ_ENTRY(umtx_q)	uq_lockq;
125227569Sphilip
126227569Sphilip	/* Thread contending with us */
127227569Sphilip	TAILQ_HEAD(,umtx_pi)	uq_pi_contested;
128227569Sphilip
129227569Sphilip	/* Inherited prioroty from PP mutex */
130227569Sphilip	u_char			uq_inherited_pri;
131227569Sphilip};
132227569Sphilip
133227569SphilipTAILQ_HEAD(umtxq_head, umtx_q);
134227569Sphilip
135227569Sphilip/* Userland lock object's wait-queue chain */
136227569Sphilipstruct umtxq_chain {
137227569Sphilip	/* Lock for this chain. */
138227569Sphilip	struct mtx		uc_lock;
139227569Sphilip
140227569Sphilip	/* List of sleep queues. */
141227569Sphilip	struct umtxq_head	uc_queue;
142227569Sphilip
143227569Sphilip	/* Busy flag */
144227569Sphilip	char			uc_busy;
145227569Sphilip
146227569Sphilip	/* Chain lock waiters */
147227569Sphilip	int			uc_waiters;
148227569Sphilip
149227569Sphilip	/* All PI in the list */
150227569Sphilip	TAILQ_HEAD(,umtx_pi)	uc_pi_list;
151227569Sphilip};
152227569Sphilip
153227569Sphilip#define	UMTXQ_LOCKED_ASSERT(uc)		mtx_assert(&(uc)->uc_lock, MA_OWNED)
154227569Sphilip
155227569Sphilip/*
156227569Sphilip * Don't propagate time-sharing priority, there is a security reason,
157227569Sphilip * a user can simply introduce PI-mutex, let thread A lock the mutex,
158227569Sphilip * and let another thread B block on the mutex, because B is
159227569Sphilip * sleeping, its priority will be boosted, this causes A's priority to
160227569Sphilip * be boosted via priority propagating too and will never be lowered even
161227569Sphilip * if it is using 100%CPU, this is unfair to other processes.
162227569Sphilip */
163227569Sphilip
164227569Sphilip#define UPRI(td)	(((td)->td_ksegrp->kg_user_pri >= PRI_MIN_TIMESHARE &&\
165227569Sphilip			  (td)->td_ksegrp->kg_user_pri <= PRI_MAX_TIMESHARE) ?\
166227569Sphilip			 PRI_MAX_TIMESHARE : (td)->td_ksegrp->kg_user_pri)
167227569Sphilip
168227569Sphilip#define	GOLDEN_RATIO_PRIME	2654404609U
169227569Sphilip#define	UMTX_CHAINS		128
170227569Sphilip#define	UMTX_SHIFTS		(__WORD_BIT - 7)
171227569Sphilip
172227569Sphilip#define THREAD_SHARE		0
173227569Sphilip#define PROCESS_SHARE		1
174227569Sphilip#define AUTO_SHARE		2
175227569Sphilip
176227569Sphilip#define	GET_SHARE(flags)	\
177227569Sphilip    (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
178227569Sphilip
179227569Sphilipstatic uma_zone_t		umtx_pi_zone;
180227569Sphilipstatic struct umtxq_chain	umtxq_chains[UMTX_CHAINS];
181227569Sphilipstatic MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
182227569Sphilipstatic int			umtx_pi_allocated;
183227569Sphilip
184227569SphilipSYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
185227569SphilipSYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
186227569Sphilip    &umtx_pi_allocated, 0, "Allocated umtx_pi");
187227569Sphilip
188227569Sphilipstatic void umtxq_sysinit(void *);
189227569Sphilipstatic void umtxq_hash(struct umtx_key *key);
190227569Sphilipstatic struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
191227569Sphilipstatic void umtxq_lock(struct umtx_key *key);
192227569Sphilipstatic void umtxq_unlock(struct umtx_key *key);
193227569Sphilipstatic void umtxq_busy(struct umtx_key *key);
194227569Sphilipstatic void umtxq_unbusy(struct umtx_key *key);
195227569Sphilipstatic void umtxq_insert(struct umtx_q *uq);
196227569Sphilipstatic void umtxq_remove(struct umtx_q *uq);
197227569Sphilipstatic int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
198227569Sphilipstatic int umtxq_count(struct umtx_key *key);
199227569Sphilipstatic int umtxq_signal(struct umtx_key *key, int nr_wakeup);
200227569Sphilipstatic int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2);
201227569Sphilipstatic int umtx_key_get(void *addr, int type, int share,
202227569Sphilip	struct umtx_key *key);
203227569Sphilipstatic void umtx_key_release(struct umtx_key *key);
204227569Sphilipstatic struct umtx_pi *umtx_pi_alloc(void);
205227569Sphilipstatic void umtx_pi_free(struct umtx_pi *pi);
206227569Sphilipstatic int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
207227569Sphilipstatic void umtx_thread_cleanup(struct thread *td);
208227569Sphilipstatic void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
209227569Sphilip	struct image_params *imgp __unused);
210227569SphilipSYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
211227569Sphilip
212227569Sphilipstatic void
213227569Sphilipumtxq_sysinit(void *arg __unused)
214227569Sphilip{
215227569Sphilip	int i;
216227569Sphilip
217227569Sphilip	umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
218227569Sphilip		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
219227569Sphilip	for (i = 0; i < UMTX_CHAINS; ++i) {
220227569Sphilip		mtx_init(&umtxq_chains[i].uc_lock, "umtxql", NULL,
221227569Sphilip			 MTX_DEF | MTX_DUPOK);
222227569Sphilip		TAILQ_INIT(&umtxq_chains[i].uc_queue);
223227569Sphilip		TAILQ_INIT(&umtxq_chains[i].uc_pi_list);
224227569Sphilip		umtxq_chains[i].uc_busy = 0;
225227569Sphilip		umtxq_chains[i].uc_waiters = 0;
226227569Sphilip	}
227227569Sphilip	EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
228227569Sphilip	    EVENTHANDLER_PRI_ANY);
229227569Sphilip}
230227569Sphilip
231227569Sphilipstruct umtx_q *
232227569Sphilipumtxq_alloc(void)
233227569Sphilip{
234227569Sphilip	struct umtx_q *uq;
235227569Sphilip
236284555Sarybchik	uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
237284555Sarybchik	TAILQ_INIT(&uq->uq_pi_contested);
238284555Sarybchik	uq->uq_inherited_pri = PRI_MAX;
239227569Sphilip	return (uq);
240227569Sphilip}
241227569Sphilip
242227569Sphilipvoid
243227569Sphilipumtxq_free(struct umtx_q *uq)
244227569Sphilip{
245227569Sphilip	free(uq, M_UMTX);
246227569Sphilip}
247227569Sphilip
248227569Sphilipstatic inline void
249227569Sphilipumtxq_hash(struct umtx_key *key)
250227569Sphilip{
251227569Sphilip	unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
252227569Sphilip	key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
253227569Sphilip}
254227569Sphilip
255227569Sphilipstatic inline int
256227569Sphilipumtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2)
257227569Sphilip{
258227569Sphilip	return (k1->type == k2->type &&
259227569Sphilip		k1->info.both.a == k2->info.both.a &&
260227569Sphilip	        k1->info.both.b == k2->info.both.b);
261227569Sphilip}
262227569Sphilip
263227569Sphilipstatic inline struct umtxq_chain *
264227569Sphilipumtxq_getchain(struct umtx_key *key)
265227569Sphilip{
266227569Sphilip	return (&umtxq_chains[key->hash]);
267227569Sphilip}
268227569Sphilip
269227569Sphilip/*
270227569Sphilip * Set chain to busy state when following operation
271227569Sphilip * may be blocked (kernel mutex can not be used).
272227569Sphilip */
273227569Sphilipstatic inline void
274227569Sphilipumtxq_busy(struct umtx_key *key)
275227569Sphilip{
276227569Sphilip	struct umtxq_chain *uc;
277227569Sphilip
278227569Sphilip	uc = umtxq_getchain(key);
279227569Sphilip	mtx_assert(&uc->uc_lock, MA_OWNED);
280227569Sphilip	while (uc->uc_busy != 0) {
281227569Sphilip		uc->uc_waiters++;
282227569Sphilip		msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
283227569Sphilip		uc->uc_waiters--;
284227569Sphilip	}
285227569Sphilip	uc->uc_busy = 1;
286227569Sphilip}
287227569Sphilip
288227569Sphilip/*
289227569Sphilip * Unbusy a chain.
290227569Sphilip */
291227569Sphilipstatic inline void
292227569Sphilipumtxq_unbusy(struct umtx_key *key)
293227569Sphilip{
294227569Sphilip	struct umtxq_chain *uc;
295227569Sphilip
296227569Sphilip	uc = umtxq_getchain(key);
297227569Sphilip	mtx_assert(&uc->uc_lock, MA_OWNED);
298227569Sphilip	KASSERT(uc->uc_busy != 0, ("not busy"));
299227569Sphilip	uc->uc_busy = 0;
300227569Sphilip	if (uc->uc_waiters)
301227569Sphilip		wakeup_one(uc);
302227569Sphilip}
303227569Sphilip
304227569Sphilip/*
305227569Sphilip * Lock a chain.
306227569Sphilip */
307227569Sphilipstatic inline void
308227569Sphilipumtxq_lock(struct umtx_key *key)
309227569Sphilip{
310227569Sphilip	struct umtxq_chain *uc;
311227569Sphilip
312227569Sphilip	uc = umtxq_getchain(key);
313227569Sphilip	mtx_lock(&uc->uc_lock);
314227569Sphilip}
315227569Sphilip
316227569Sphilip/*
317227569Sphilip * Unlock a chain.
318227569Sphilip */
319227569Sphilipstatic inline void
320227569Sphilipumtxq_unlock(struct umtx_key *key)
321227569Sphilip{
322227569Sphilip	struct umtxq_chain *uc;
323227569Sphilip
324227569Sphilip	uc = umtxq_getchain(key);
325227569Sphilip	mtx_unlock(&uc->uc_lock);
326227569Sphilip}
327227569Sphilip
328227569Sphilip/*
329227569Sphilip * Insert a thread onto the umtx queue.
330227569Sphilip */
331227569Sphilipstatic inline void
332227569Sphilipumtxq_insert(struct umtx_q *uq)
333227569Sphilip{
334227569Sphilip	struct umtxq_chain *uc;
335227569Sphilip
336227569Sphilip	uc = umtxq_getchain(&uq->uq_key);
337227569Sphilip	UMTXQ_LOCKED_ASSERT(uc);
338227569Sphilip	TAILQ_INSERT_TAIL(&uc->uc_queue, uq, uq_link);
339227569Sphilip	uq->uq_flags |= UQF_UMTXQ;
340227569Sphilip}
341227569Sphilip
342227569Sphilip/*
343227569Sphilip * Remove thread from the umtx queue.
344227569Sphilip */
345227569Sphilipstatic inline void
346227569Sphilipumtxq_remove(struct umtx_q *uq)
347227569Sphilip{
348227569Sphilip	struct umtxq_chain *uc;
349227569Sphilip
350227569Sphilip	uc = umtxq_getchain(&uq->uq_key);
351227569Sphilip	UMTXQ_LOCKED_ASSERT(uc);
352227569Sphilip	if (uq->uq_flags & UQF_UMTXQ) {
353227569Sphilip		TAILQ_REMOVE(&uc->uc_queue, uq, uq_link);
354227569Sphilip		uq->uq_flags &= ~UQF_UMTXQ;
355227569Sphilip	}
356227569Sphilip}
357227569Sphilip
358227569Sphilip/*
359227569Sphilip * Check if there are multiple waiters
360227569Sphilip */
361227569Sphilipstatic int
362227569Sphilipumtxq_count(struct umtx_key *key)
363227569Sphilip{
364227569Sphilip	struct umtxq_chain *uc;
365227569Sphilip	struct umtx_q *uq;
366227569Sphilip	int count = 0;
367227569Sphilip
368227569Sphilip	uc = umtxq_getchain(key);
369227569Sphilip	UMTXQ_LOCKED_ASSERT(uc);
370227569Sphilip	TAILQ_FOREACH(uq, &uc->uc_queue, uq_link) {
371227569Sphilip		if (umtx_key_match(&uq->uq_key, key)) {
372227569Sphilip			if (++count > 1)
373227569Sphilip				break;
374227569Sphilip		}
375227569Sphilip	}
376227569Sphilip	return (count);
377227569Sphilip}
378227569Sphilip
379227569Sphilip/*
380227569Sphilip * Check if there are multiple PI waiters and returns first
381227569Sphilip * waiter.
382227569Sphilip */
383227569Sphilipstatic int
384227569Sphilipumtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
385227569Sphilip{
386227569Sphilip	struct umtxq_chain *uc;
387227569Sphilip	struct umtx_q *uq;
388227569Sphilip	int count = 0;
389227569Sphilip
390227569Sphilip	*first = NULL;
391227569Sphilip	uc = umtxq_getchain(key);
392227569Sphilip	UMTXQ_LOCKED_ASSERT(uc);
393227569Sphilip	TAILQ_FOREACH(uq, &uc->uc_queue, uq_link) {
394227569Sphilip		if (umtx_key_match(&uq->uq_key, key)) {
395227569Sphilip			if (++count > 1)
396227569Sphilip				break;
397227569Sphilip			*first = uq;
398227569Sphilip		}
399227569Sphilip	}
400227569Sphilip	return (count);
401227569Sphilip}
402227569Sphilip
403227569Sphilip/*
404227569Sphilip * Wake up threads waiting on an userland object.
405227569Sphilip */
406227569Sphilipstatic int
407227569Sphilipumtxq_signal(struct umtx_key *key, int n_wake)
408227569Sphilip{
409227569Sphilip	struct umtxq_chain *uc;
410227569Sphilip	struct umtx_q *uq, *next;
411227569Sphilip	int ret;
412227569Sphilip
413227569Sphilip	ret = 0;
414227569Sphilip	uc = umtxq_getchain(key);
415227569Sphilip	UMTXQ_LOCKED_ASSERT(uc);
416227569Sphilip	TAILQ_FOREACH_SAFE(uq, &uc->uc_queue, uq_link, next) {
417227569Sphilip		if (umtx_key_match(&uq->uq_key, key)) {
418227569Sphilip			umtxq_remove(uq);
419227569Sphilip			wakeup(uq);
420227569Sphilip			if (++ret >= n_wake)
421227569Sphilip				break;
422227569Sphilip		}
423227569Sphilip	}
424227569Sphilip	return (ret);
425227569Sphilip}
426227569Sphilip
427227569Sphilip/*
428227569Sphilip * Wake up specified thread.
429227569Sphilip */
430227569Sphilipstatic inline void
431227569Sphilipumtxq_signal_thread(struct umtx_q *uq)
432227569Sphilip{
433227569Sphilip	struct umtxq_chain *uc;
434227569Sphilip
435227569Sphilip	uc = umtxq_getchain(&uq->uq_key);
436227569Sphilip	UMTXQ_LOCKED_ASSERT(uc);
437227569Sphilip	umtxq_remove(uq);
438227569Sphilip	wakeup(uq);
439227569Sphilip}
440227569Sphilip
441227569Sphilip/*
442227569Sphilip * Put thread into sleep state, before sleeping, check if
443227569Sphilip * thread was removed from umtx queue.
444227569Sphilip */
445227569Sphilipstatic inline int
446227569Sphilipumtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
447227569Sphilip{
448227569Sphilip	struct umtxq_chain *uc;
449227569Sphilip	int error;
450227569Sphilip
451227569Sphilip	uc = umtxq_getchain(&uq->uq_key);
452227569Sphilip	UMTXQ_LOCKED_ASSERT(uc);
453227569Sphilip	if (!(uq->uq_flags & UQF_UMTXQ))
454227569Sphilip		return (0);
455227569Sphilip	error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
456227569Sphilip	if (error == EWOULDBLOCK)
457227569Sphilip		error = ETIMEDOUT;
458227569Sphilip	return (error);
459227569Sphilip}
460227569Sphilip
461227569Sphilip/*
462227569Sphilip * Convert userspace address into unique logical address.
463227569Sphilip */
464227569Sphilipstatic int
465227569Sphilipumtx_key_get(void *addr, int type, int share, struct umtx_key *key)
466227569Sphilip{
467227569Sphilip	struct thread *td = curthread;
468227569Sphilip	vm_map_t map;
469227569Sphilip	vm_map_entry_t entry;
470227569Sphilip	vm_pindex_t pindex;
471227569Sphilip	vm_prot_t prot;
472227569Sphilip	boolean_t wired;
473227569Sphilip
474227569Sphilip	key->type = type;
475227569Sphilip	if (share == THREAD_SHARE) {
476227569Sphilip		key->shared = 0;
477227569Sphilip		key->info.private.vs = td->td_proc->p_vmspace;
478227569Sphilip		key->info.private.addr = (uintptr_t)addr;
479227569Sphilip	} else if (share == PROCESS_SHARE || share == AUTO_SHARE) {
480227569Sphilip		map = &td->td_proc->p_vmspace->vm_map;
481227569Sphilip		if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
482227569Sphilip		    &entry, &key->info.shared.object, &pindex, &prot,
483227569Sphilip		    &wired) != KERN_SUCCESS) {
484227569Sphilip			return EFAULT;
485227569Sphilip		}
486227569Sphilip
487227569Sphilip		if ((share == PROCESS_SHARE) ||
488227569Sphilip		    (share == AUTO_SHARE &&
489227569Sphilip		     VM_INHERIT_SHARE == entry->inheritance)) {
490227569Sphilip			key->shared = 1;
491227569Sphilip			key->info.shared.offset = entry->offset + entry->start -
492227569Sphilip				(vm_offset_t)addr;
493227569Sphilip			vm_object_reference(key->info.shared.object);
494227569Sphilip		} else {
495227569Sphilip			key->shared = 0;
496227569Sphilip			key->info.private.vs = td->td_proc->p_vmspace;
497227569Sphilip			key->info.private.addr = (uintptr_t)addr;
498227569Sphilip		}
499227569Sphilip		vm_map_lookup_done(map, entry);
500227569Sphilip	}
501227569Sphilip
502227569Sphilip	umtxq_hash(key);
503227569Sphilip	return (0);
504227569Sphilip}
505227569Sphilip
506227569Sphilip/*
507227569Sphilip * Release key.
508227569Sphilip */
509227569Sphilipstatic inline void
510227569Sphilipumtx_key_release(struct umtx_key *key)
511227569Sphilip{
512227569Sphilip	if (key->shared)
513227569Sphilip		vm_object_deallocate(key->info.shared.object);
514227569Sphilip}
515227569Sphilip
516227569Sphilip/*
517227569Sphilip * Lock a umtx object.
518227569Sphilip */
519227569Sphilipstatic int
520227569Sphilip_do_lock(struct thread *td, struct umtx *umtx, uintptr_t id, int timo)
521227569Sphilip{
522227569Sphilip	struct umtx_q *uq;
523227569Sphilip	intptr_t owner;
524227569Sphilip	intptr_t old;
525227569Sphilip	int error = 0;
526227569Sphilip
527227569Sphilip	uq = td->td_umtxq;
528227569Sphilip
529227569Sphilip	/*
530227569Sphilip	 * Care must be exercised when dealing with umtx structure. It
531227569Sphilip	 * can fault on any access.
532227569Sphilip	 */
533227569Sphilip	for (;;) {
534227569Sphilip		/*
535227569Sphilip		 * Try the uncontested case.  This should be done in userland.
536227569Sphilip		 */
537227569Sphilip		owner = casuptr((intptr_t *)&umtx->u_owner, UMTX_UNOWNED, id);
538227569Sphilip
539294018Sarybchik		/* The acquire succeeded. */
540227569Sphilip		if (owner == UMTX_UNOWNED)
541227569Sphilip			return (0);
542227569Sphilip
543227569Sphilip		/* The address was invalid. */
544227569Sphilip		if (owner == -1)
545227569Sphilip			return (EFAULT);
546227569Sphilip
547227569Sphilip		/* If no one owns it but it is contested try to acquire it. */
548227569Sphilip		if (owner == UMTX_CONTESTED) {
549227569Sphilip			owner = casuptr((intptr_t *)&umtx->u_owner,
550227569Sphilip			    UMTX_CONTESTED, id | UMTX_CONTESTED);
551227569Sphilip
552227569Sphilip			if (owner == UMTX_CONTESTED)
553227569Sphilip				return (0);
554227569Sphilip
555227569Sphilip			/* The address was invalid. */
556227569Sphilip			if (owner == -1)
557227569Sphilip				return (EFAULT);
558227569Sphilip
559227569Sphilip			/* If this failed the lock has changed, restart. */
560227569Sphilip			continue;
561227569Sphilip		}
562227569Sphilip
563227569Sphilip		/*
564227569Sphilip		 * If we caught a signal, we have retried and now
565227569Sphilip		 * exit immediately.
566227569Sphilip		 */
567227569Sphilip		if (error != 0)
568227569Sphilip			return (error);
569227569Sphilip
570227569Sphilip		if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
571227569Sphilip			AUTO_SHARE, &uq->uq_key)) != 0)
572227569Sphilip			return (error);
573227569Sphilip
574227569Sphilip		umtxq_lock(&uq->uq_key);
575227569Sphilip		umtxq_busy(&uq->uq_key);
576227569Sphilip		umtxq_insert(uq);
577227569Sphilip		umtxq_unbusy(&uq->uq_key);
578227569Sphilip		umtxq_unlock(&uq->uq_key);
579227569Sphilip
580227569Sphilip		/*
581227569Sphilip		 * Set the contested bit so that a release in user space
582227569Sphilip		 * knows to use the system call for unlock.  If this fails
583227569Sphilip		 * either some one else has acquired the lock or it has been
584227569Sphilip		 * released.
585227569Sphilip		 */
586227569Sphilip		old = casuptr((intptr_t *)&umtx->u_owner, owner,
587227569Sphilip		    owner | UMTX_CONTESTED);
588227569Sphilip
589227569Sphilip		/* The address was invalid. */
590227569Sphilip		if (old == -1) {
591227569Sphilip			umtxq_lock(&uq->uq_key);
592227569Sphilip			umtxq_remove(uq);
593227569Sphilip			umtxq_unlock(&uq->uq_key);
594227569Sphilip			umtx_key_release(&uq->uq_key);
595227569Sphilip			return (EFAULT);
596227569Sphilip		}
597227569Sphilip
598227569Sphilip		/*
599227569Sphilip		 * We set the contested bit, sleep. Otherwise the lock changed
600227569Sphilip		 * and we need to retry or we lost a race to the thread
601227569Sphilip		 * unlocking the umtx.
602227569Sphilip		 */
603227569Sphilip		umtxq_lock(&uq->uq_key);
604227569Sphilip		if (old == owner)
605227569Sphilip			error = umtxq_sleep(uq, "umtx", timo);
606227569Sphilip		umtxq_remove(uq);
607227569Sphilip		umtxq_unlock(&uq->uq_key);
608227569Sphilip		umtx_key_release(&uq->uq_key);
609227569Sphilip	}
610227569Sphilip
611227569Sphilip	return (0);
612227569Sphilip}
613227569Sphilip
614227569Sphilip/*
615227569Sphilip * Lock a umtx object.
616227569Sphilip */
617227569Sphilipstatic int
618227569Sphilipdo_lock(struct thread *td, struct umtx *umtx, uintptr_t id,
619227569Sphilip	struct timespec *timeout)
620227569Sphilip{
621227569Sphilip	struct timespec ts, ts2, ts3;
622227569Sphilip	struct timeval tv;
623227569Sphilip	int error;
624227569Sphilip
625227569Sphilip	if (timeout == NULL) {
626227569Sphilip		error = _do_lock(td, umtx, id, 0);
627227569Sphilip	} else {
628227569Sphilip		getnanouptime(&ts);
629227569Sphilip		timespecadd(&ts, timeout);
630227569Sphilip		TIMESPEC_TO_TIMEVAL(&tv, timeout);
631227569Sphilip		for (;;) {
632227569Sphilip			error = _do_lock(td, umtx, id, tvtohz(&tv));
633227569Sphilip			if (error != ETIMEDOUT)
634227569Sphilip				break;
635227569Sphilip			getnanouptime(&ts2);
636227569Sphilip			if (timespeccmp(&ts2, &ts, >=)) {
637227569Sphilip				error = ETIMEDOUT;
638227569Sphilip				break;
639227569Sphilip			}
640227569Sphilip			ts3 = ts;
641227569Sphilip			timespecsub(&ts3, &ts2);
642227569Sphilip			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
643227569Sphilip		}
644227569Sphilip	}
645227569Sphilip	/*
646227569Sphilip	 * This lets userland back off critical region if needed.
647227569Sphilip	 */
648227569Sphilip	if (error == EINTR)
649227569Sphilip		error = ERESTART;
650227569Sphilip	return (error);
651227569Sphilip}
652227569Sphilip
653227569Sphilip/*
654227569Sphilip * Unlock a umtx object.
655227569Sphilip */
656227569Sphilipstatic int
657227569Sphilipdo_unlock(struct thread *td, struct umtx *umtx, uintptr_t id)
658227569Sphilip{
659227569Sphilip	struct umtx_key key;
660227569Sphilip	intptr_t owner;
661227569Sphilip	intptr_t old;
662227569Sphilip	int error;
663227569Sphilip	int count;
664227569Sphilip
665227569Sphilip	/*
666227569Sphilip	 * Make sure we own this mtx.
667227569Sphilip	 *
668227569Sphilip	 * XXX Need a {fu,su}ptr this is not correct on arch where
669227569Sphilip	 * sizeof(intptr_t) != sizeof(long).
670227569Sphilip	 */
671227569Sphilip	owner = fuword(&umtx->u_owner);
672227569Sphilip	if (owner == -1)
673227569Sphilip		return (EFAULT);
674227569Sphilip
675227569Sphilip	if ((owner & ~UMTX_CONTESTED) != id)
676227569Sphilip		return (EPERM);
677227569Sphilip
678227569Sphilip	/* This should be done in userland */
679227569Sphilip	if ((owner & UMTX_CONTESTED) == 0) {
680227569Sphilip		old = casuptr((intptr_t *)&umtx->u_owner, owner,
681227569Sphilip			UMTX_UNOWNED);
682227569Sphilip		if (old == -1)
683227569Sphilip			return (EFAULT);
684227569Sphilip		if (old == owner)
685227569Sphilip			return (0);
686227569Sphilip	}
687227569Sphilip
688227569Sphilip	/* We should only ever be in here for contested locks */
689227569Sphilip	if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
690227569Sphilip		&key)) != 0)
691227569Sphilip		return (error);
692227569Sphilip
693227569Sphilip	umtxq_lock(&key);
694227569Sphilip	umtxq_busy(&key);
695227569Sphilip	count = umtxq_count(&key);
696227569Sphilip	umtxq_unlock(&key);
697227569Sphilip
698227569Sphilip	/*
699227569Sphilip	 * When unlocking the umtx, it must be marked as unowned if
700227569Sphilip	 * there is zero or one thread only waiting for it.
701227569Sphilip	 * Otherwise, it must be marked as contested.
702227569Sphilip	 */
703227569Sphilip	old = casuptr((intptr_t *)&umtx->u_owner, owner,
704227569Sphilip			count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
705227569Sphilip	umtxq_lock(&key);
706227569Sphilip	umtxq_signal(&key,1);
707227569Sphilip	umtxq_unbusy(&key);
708227569Sphilip	umtxq_unlock(&key);
709227569Sphilip	umtx_key_release(&key);
710227569Sphilip	if (old == -1)
711227569Sphilip		return (EFAULT);
712227569Sphilip	if (old != owner)
713227569Sphilip		return (EINVAL);
714227569Sphilip	return (0);
715227569Sphilip}
716227569Sphilip
717227569Sphilip/*
718227569Sphilip * Fetch and compare value, sleep on the address if value is not changed.
719227569Sphilip */
720227569Sphilipstatic int
721227569Sphilipdo_wait(struct thread *td, struct umtx *umtx, uintptr_t id, struct timespec *timeout)
722227569Sphilip{
723227569Sphilip	struct umtx_q *uq;
724227569Sphilip	struct timespec ts, ts2, ts3;
725227569Sphilip	struct timeval tv;
726227569Sphilip	uintptr_t tmp;
727227569Sphilip	int error = 0;
728227569Sphilip
729227569Sphilip	uq = td->td_umtxq;
730227569Sphilip	if ((error = umtx_key_get(umtx, TYPE_SIMPLE_WAIT, AUTO_SHARE,
731227569Sphilip	    &uq->uq_key)) != 0)
732227569Sphilip		return (error);
733227569Sphilip
734227569Sphilip	umtxq_lock(&uq->uq_key);
735227569Sphilip	umtxq_insert(uq);
736227569Sphilip	umtxq_unlock(&uq->uq_key);
737227569Sphilip	tmp = fuword(&umtx->u_owner);
738227569Sphilip	if (tmp != id) {
739227569Sphilip		umtxq_lock(&uq->uq_key);
740227569Sphilip		umtxq_remove(uq);
741227569Sphilip		umtxq_unlock(&uq->uq_key);
742227569Sphilip	} else if (timeout == NULL) {
743227569Sphilip		umtxq_lock(&uq->uq_key);
744227569Sphilip		error = umtxq_sleep(uq, "ucond", 0);
745227569Sphilip		umtxq_remove(uq);
746227569Sphilip		umtxq_unlock(&uq->uq_key);
747227569Sphilip	} else {
748227569Sphilip		getnanouptime(&ts);
749227569Sphilip		timespecadd(&ts, timeout);
750227569Sphilip		TIMESPEC_TO_TIMEVAL(&tv, timeout);
751227569Sphilip		umtxq_lock(&uq->uq_key);
752227569Sphilip		for (;;) {
753227569Sphilip			error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
754227569Sphilip			if (!(uq->uq_flags & UQF_UMTXQ))
755227569Sphilip				break;
756227569Sphilip			if (error != ETIMEDOUT)
757227569Sphilip				break;
758227569Sphilip			umtxq_unlock(&uq->uq_key);
759227569Sphilip			getnanouptime(&ts2);
760227569Sphilip			if (timespeccmp(&ts2, &ts, >=)) {
761227569Sphilip				error = ETIMEDOUT;
762227569Sphilip				umtxq_lock(&uq->uq_key);
763227569Sphilip				break;
764227569Sphilip			}
765227569Sphilip			ts3 = ts;
766227569Sphilip			timespecsub(&ts3, &ts2);
767227569Sphilip			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
768227569Sphilip			umtxq_lock(&uq->uq_key);
769227569Sphilip		}
770227569Sphilip		umtxq_remove(uq);
771227569Sphilip		umtxq_unlock(&uq->uq_key);
772227569Sphilip	}
773227569Sphilip	umtx_key_release(&uq->uq_key);
774227569Sphilip	if (error == ERESTART)
775227569Sphilip		error = EINTR;
776227569Sphilip	return (error);
777227569Sphilip}
778227569Sphilip
779227569Sphilip/*
780227569Sphilip * Wake up threads sleeping on the specified address.
781227569Sphilip */
782227569Sphilipint
783227569Sphilipkern_umtx_wake(struct thread *td, void *uaddr, int n_wake)
784227569Sphilip{
785227569Sphilip	struct umtx_key key;
786227569Sphilip	int ret;
787227569Sphilip
788227569Sphilip	if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, AUTO_SHARE,
789227569Sphilip	   &key)) != 0)
790227569Sphilip		return (ret);
791227569Sphilip	umtxq_lock(&key);
792227569Sphilip	ret = umtxq_signal(&key, n_wake);
793227569Sphilip	umtxq_unlock(&key);
794227569Sphilip	umtx_key_release(&key);
795227569Sphilip	return (0);
796227569Sphilip}
797227569Sphilip
798227569Sphilip/*
799227569Sphilip * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
800227569Sphilip */
801227569Sphilipstatic int
802227569Sphilip_do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
803227569Sphilip	int try)
804227569Sphilip{
805227569Sphilip	struct umtx_q *uq;
806227569Sphilip	uint32_t owner, old, id;
807227569Sphilip	int error = 0;
808227569Sphilip
809227569Sphilip	id = td->td_tid;
810227569Sphilip	uq = td->td_umtxq;
811227569Sphilip
812227569Sphilip	/*
813227569Sphilip	 * Care must be exercised when dealing with umtx structure. It
814227569Sphilip	 * can fault on any access.
815227569Sphilip	 */
816227569Sphilip	for (;;) {
817227569Sphilip		/*
818227569Sphilip		 * Try the uncontested case.  This should be done in userland.
819227569Sphilip		 */
820227569Sphilip		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
821227569Sphilip
822227569Sphilip		/* The acquire succeeded. */
823227569Sphilip		if (owner == UMUTEX_UNOWNED)
824227569Sphilip			return (0);
825227569Sphilip
826227569Sphilip		/* The address was invalid. */
827227569Sphilip		if (owner == -1)
828227569Sphilip			return (EFAULT);
829227569Sphilip
830227569Sphilip		/* If no one owns it but it is contested try to acquire it. */
831227569Sphilip		if (owner == UMUTEX_CONTESTED) {
832227569Sphilip			owner = casuword32(&m->m_owner,
833227569Sphilip			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
834227569Sphilip
835227569Sphilip			if (owner == UMUTEX_CONTESTED)
836227569Sphilip				return (0);
837227569Sphilip
838227569Sphilip			/* The address was invalid. */
839227569Sphilip			if (owner == -1)
840227569Sphilip				return (EFAULT);
841227569Sphilip
842227569Sphilip			/* If this failed the lock has changed, restart. */
843227569Sphilip			continue;
844227569Sphilip		}
845227569Sphilip
846227569Sphilip		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
847227569Sphilip		    (owner & ~UMUTEX_CONTESTED) == id)
848227569Sphilip			return (EDEADLK);
849227569Sphilip
850227569Sphilip		if (try != 0)
851227569Sphilip			return (EBUSY);
852227569Sphilip
853227569Sphilip		/*
854227569Sphilip		 * If we caught a signal, we have retried and now
855227569Sphilip		 * exit immediately.
856227569Sphilip		 */
857227569Sphilip		if (error != 0)
858227569Sphilip			return (error);
859227569Sphilip
860227569Sphilip		if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
861227569Sphilip		    GET_SHARE(flags), &uq->uq_key)) != 0)
862227569Sphilip			return (error);
863227569Sphilip
864227569Sphilip		umtxq_lock(&uq->uq_key);
865227569Sphilip		umtxq_busy(&uq->uq_key);
866227569Sphilip		umtxq_insert(uq);
867227569Sphilip		umtxq_unbusy(&uq->uq_key);
868227569Sphilip		umtxq_unlock(&uq->uq_key);
869227569Sphilip
870227569Sphilip		/*
871227569Sphilip		 * Set the contested bit so that a release in user space
872227569Sphilip		 * knows to use the system call for unlock.  If this fails
873227569Sphilip		 * either some one else has acquired the lock or it has been
874227569Sphilip		 * released.
875227569Sphilip		 */
876227569Sphilip		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
877227569Sphilip
878227569Sphilip		/* The address was invalid. */
879227569Sphilip		if (old == -1) {
880227569Sphilip			umtxq_lock(&uq->uq_key);
881227569Sphilip			umtxq_remove(uq);
882227569Sphilip			umtxq_unlock(&uq->uq_key);
883227569Sphilip			umtx_key_release(&uq->uq_key);
884227569Sphilip			return (EFAULT);
885227569Sphilip		}
886227569Sphilip
887227569Sphilip		/*
888227569Sphilip		 * We set the contested bit, sleep. Otherwise the lock changed
889227569Sphilip		 * and we need to retry or we lost a race to the thread
890227569Sphilip		 * unlocking the umtx.
891227569Sphilip		 */
892227569Sphilip		umtxq_lock(&uq->uq_key);
893227569Sphilip		if (old == owner)
894227569Sphilip			error = umtxq_sleep(uq, "umtxn", timo);
895227569Sphilip		umtxq_remove(uq);
896227569Sphilip		umtxq_unlock(&uq->uq_key);
897227569Sphilip		umtx_key_release(&uq->uq_key);
898227569Sphilip	}
899227569Sphilip
900227569Sphilip	return (0);
901284555Sarybchik}
902227569Sphilip
903227569Sphilip/*
904227569Sphilip * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
905227569Sphilip */
906227569Sphilipstatic int
907227569Sphilipdo_lock_normal(struct thread *td, struct umutex *m, uint32_t flags,
908227569Sphilip	struct timespec *timeout, int try)
909227569Sphilip{
910227569Sphilip	struct timespec ts, ts2, ts3;
911227569Sphilip	struct timeval tv;
912227569Sphilip	int error;
913227569Sphilip
914227569Sphilip	if (timeout == NULL) {
915227569Sphilip		error = _do_lock_normal(td, m, flags, 0, try);
916227569Sphilip	} else {
917227569Sphilip		getnanouptime(&ts);
918227569Sphilip		timespecadd(&ts, timeout);
919227569Sphilip		TIMESPEC_TO_TIMEVAL(&tv, timeout);
920227569Sphilip		for (;;) {
921227569Sphilip			error = _do_lock_normal(td, m, flags, tvtohz(&tv), try);
922227569Sphilip			if (error != ETIMEDOUT)
923227569Sphilip				break;
924227569Sphilip			getnanouptime(&ts2);
925227569Sphilip			if (timespeccmp(&ts2, &ts, >=)) {
926227569Sphilip				error = ETIMEDOUT;
927227569Sphilip				break;
928227569Sphilip			}
929227569Sphilip			ts3 = ts;
930227569Sphilip			timespecsub(&ts3, &ts2);
931227569Sphilip			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
932227569Sphilip		}
933227569Sphilip	}
934227569Sphilip	/*
935227569Sphilip	 * This lets userland back off critical region if needed.
936227569Sphilip	 */
937227569Sphilip	if (error == EINTR)
938227569Sphilip		error = ERESTART;
939227569Sphilip	return (error);
940227569Sphilip}
941227569Sphilip
942227569Sphilip/*
943227569Sphilip * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
944227569Sphilip */
945227569Sphilipstatic int
946227569Sphilipdo_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
947227569Sphilip{
948227569Sphilip	struct umtx_key key;
949227569Sphilip	uint32_t owner, old, id;
950227569Sphilip	int error;
951227569Sphilip	int count;
952227569Sphilip
953227569Sphilip	id = td->td_tid;
954227569Sphilip	/*
955227569Sphilip	 * Make sure we own this mtx.
956227569Sphilip	 */
957227569Sphilip	owner = fuword32(&m->m_owner);
958227569Sphilip	if (owner == -1)
959227569Sphilip		return (EFAULT);
960227569Sphilip
961227569Sphilip	if ((owner & ~UMUTEX_CONTESTED) != id)
962227569Sphilip		return (EPERM);
963227569Sphilip
964227569Sphilip	/* This should be done in userland */
965227569Sphilip	if ((owner & UMUTEX_CONTESTED) == 0) {
966227569Sphilip		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
967227569Sphilip		if (old == -1)
968227569Sphilip			return (EFAULT);
969227569Sphilip		if (old == owner)
970227569Sphilip			return (0);
971227569Sphilip	}
972227569Sphilip
973227569Sphilip	/* We should only ever be in here for contested locks */
974227569Sphilip	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
975284555Sarybchik	    &key)) != 0)
976227569Sphilip		return (error);
977227569Sphilip
978227569Sphilip	umtxq_lock(&key);
979227569Sphilip	umtxq_busy(&key);
980227569Sphilip	count = umtxq_count(&key);
981227569Sphilip	umtxq_unlock(&key);
982227569Sphilip
983227569Sphilip	/*
984227569Sphilip	 * When unlocking the umtx, it must be marked as unowned if
985227569Sphilip	 * there is zero or one thread only waiting for it.
986227569Sphilip	 * Otherwise, it must be marked as contested.
987227569Sphilip	 */
988227569Sphilip	old = casuword32(&m->m_owner, owner,
989227569Sphilip		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
990227569Sphilip	umtxq_lock(&key);
991227569Sphilip	umtxq_signal(&key,1);
992227569Sphilip	umtxq_unbusy(&key);
993227569Sphilip	umtxq_unlock(&key);
994227569Sphilip	umtx_key_release(&key);
995227569Sphilip	if (old == -1)
996227569Sphilip		return (EFAULT);
997227569Sphilip	if (old != owner)
998227569Sphilip		return (EINVAL);
999227569Sphilip	return (0);
1000227569Sphilip}
1001227569Sphilip
1002227569Sphilipstatic inline struct umtx_pi *
1003227569Sphilipumtx_pi_alloc(void)
1004227569Sphilip{
1005227569Sphilip	struct umtx_pi *pi;
1006227569Sphilip
1007227569Sphilip	pi = uma_zalloc(umtx_pi_zone, M_ZERO | M_WAITOK);
1008227569Sphilip	TAILQ_INIT(&pi->pi_blocked);
1009227569Sphilip	atomic_add_int(&umtx_pi_allocated, 1);
1010227569Sphilip	return (pi);
1011227569Sphilip}
1012227569Sphilip
1013227569Sphilipstatic inline void
1014227569Sphilipumtx_pi_free(struct umtx_pi *pi)
1015227569Sphilip{
1016227569Sphilip	uma_zfree(umtx_pi_zone, pi);
1017227569Sphilip	atomic_add_int(&umtx_pi_allocated, -1);
1018227569Sphilip}
1019227569Sphilip
1020227569Sphilip/*
1021227569Sphilip * Adjust the thread's position on a pi_state after its priority has been
1022227569Sphilip * changed.
1023227569Sphilip */
1024227569Sphilipstatic int
1025227569Sphilipumtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1026227569Sphilip{
1027227569Sphilip	struct umtx_q *uq, *uq1, *uq2;
1028227569Sphilip	struct thread *td1;
1029227569Sphilip
1030227569Sphilip	mtx_assert(&sched_lock, MA_OWNED);
1031227569Sphilip	if (pi == NULL)
1032227569Sphilip		return (0);
1033227569Sphilip
1034227569Sphilip	uq = td->td_umtxq;
1035227569Sphilip
1036227569Sphilip	/*
1037227569Sphilip	 * Check if the thread needs to be moved on the blocked chain.
1038227569Sphilip	 * It needs to be moved if either its priority is lower than
1039227569Sphilip	 * the previous thread or higher than the next thread.
1040227569Sphilip	 */
1041227569Sphilip	uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1042227569Sphilip	uq2 = TAILQ_NEXT(uq, uq_lockq);
1043227569Sphilip	if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1044227569Sphilip	    (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1045227569Sphilip		/*
1046227569Sphilip		 * Remove thread from blocked chain and determine where
1047227569Sphilip		 * it should be moved to.
1048227569Sphilip		 */
1049227569Sphilip		TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1050227569Sphilip		TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1051227569Sphilip			td1 = uq1->uq_thread;
1052227569Sphilip			MPASS(td1->td_proc->p_magic == P_MAGIC);
1053227569Sphilip			if (UPRI(td1) > UPRI(td))
1054227569Sphilip				break;
1055227569Sphilip		}
1056227569Sphilip
1057227569Sphilip		if (uq1 == NULL)
1058227569Sphilip			TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1059227569Sphilip		else
1060227569Sphilip			TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1061227569Sphilip	}
1062227569Sphilip	return (1);
1063227569Sphilip}
1064227569Sphilip
1065227569Sphilip/*
1066227569Sphilip * Propagate priority when a thread is blocked on POSIX
1067227569Sphilip * PI mutex.
1068227569Sphilip */
1069227569Sphilipstatic void
1070227569Sphilipumtx_propagate_priority(struct thread *td)
1071227569Sphilip{
1072227569Sphilip	struct umtx_q *uq;
1073227569Sphilip	struct umtx_pi *pi;
1074227569Sphilip	int pri;
1075227569Sphilip
1076227569Sphilip	mtx_assert(&sched_lock, MA_OWNED);
1077227569Sphilip	pri = UPRI(td);
1078227569Sphilip	uq = td->td_umtxq;
1079227569Sphilip	pi = uq->uq_pi_blocked;
1080227569Sphilip	if (pi == NULL)
1081227569Sphilip		return;
1082227569Sphilip
1083227569Sphilip	for (;;) {
1084227569Sphilip		td = pi->pi_owner;
1085227569Sphilip		if (td == NULL)
1086227569Sphilip			return;
1087227569Sphilip
1088227569Sphilip		MPASS(td->td_proc != NULL);
1089227569Sphilip		MPASS(td->td_proc->p_magic == P_MAGIC);
1090227569Sphilip
1091227569Sphilip		if (UPRI(td) <= pri)
1092227569Sphilip			return;
1093227569Sphilip
1094227569Sphilip		sched_lend_user_prio(td, pri);
1095227569Sphilip
1096227569Sphilip		/*
1097227569Sphilip		 * Pick up the lock that td is blocked on.
1098227569Sphilip		 */
1099227569Sphilip		uq = td->td_umtxq;
1100227569Sphilip		pi = uq->uq_pi_blocked;
1101227569Sphilip		/* Resort td on the list if needed. */
1102227569Sphilip		if (!umtx_pi_adjust_thread(pi, td))
1103227569Sphilip			break;
1104227569Sphilip	}
1105227569Sphilip}
1106227569Sphilip
1107227569Sphilip/*
1108227569Sphilip * Unpropagate priority for a PI mutex when a thread blocked on
1109227569Sphilip * it is interrupted by signal or resumed by others.
1110227569Sphilip */
1111227569Sphilipstatic void
1112227569Sphilipumtx_unpropagate_priority(struct umtx_pi *pi)
1113227569Sphilip{
1114227569Sphilip	struct umtx_q *uq, *uq_owner;
1115227569Sphilip	struct umtx_pi *pi2;
1116227569Sphilip	int pri;
1117227569Sphilip
1118227569Sphilip	mtx_assert(&sched_lock, MA_OWNED);
1119227569Sphilip
1120227569Sphilip	while (pi != NULL && pi->pi_owner != NULL) {
1121227569Sphilip		pri = PRI_MAX;
1122227569Sphilip		uq_owner = pi->pi_owner->td_umtxq;
1123227569Sphilip
1124227569Sphilip		TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1125227569Sphilip			uq = TAILQ_FIRST(&pi2->pi_blocked);
1126227569Sphilip			if (uq != NULL) {
1127227569Sphilip				if (pri > UPRI(uq->uq_thread))
1128227569Sphilip					pri = UPRI(uq->uq_thread);
1129227569Sphilip			}
1130227569Sphilip		}
1131227569Sphilip
1132227569Sphilip		if (pri > uq_owner->uq_inherited_pri)
1133227569Sphilip			pri = uq_owner->uq_inherited_pri;
1134227569Sphilip		sched_unlend_user_prio(pi->pi_owner, pri);
1135227569Sphilip		pi = uq_owner->uq_pi_blocked;
1136227569Sphilip	}
1137227569Sphilip}
1138227569Sphilip
1139227569Sphilip/*
1140227569Sphilip * Insert a PI mutex into owned list.
1141227569Sphilip */
1142227569Sphilipstatic void
1143227569Sphilipumtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1144227569Sphilip{
1145227569Sphilip	struct umtx_q *uq_owner;
1146227569Sphilip
1147227569Sphilip	uq_owner = owner->td_umtxq;
1148227569Sphilip	mtx_assert(&sched_lock, MA_OWNED);
1149227569Sphilip	if (pi->pi_owner != NULL)
1150227569Sphilip		panic("pi_ower != NULL");
1151227569Sphilip	pi->pi_owner = owner;
1152227569Sphilip	TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1153227569Sphilip}
1154227569Sphilip
1155227569Sphilip/*
1156227569Sphilip * Claim ownership of a PI mutex.
1157227569Sphilip */
1158227569Sphilipstatic int
1159227569Sphilipumtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1160227569Sphilip{
1161227569Sphilip	struct umtx_q *uq, *uq_owner;
1162227569Sphilip
1163227569Sphilip	uq_owner = owner->td_umtxq;
1164227569Sphilip	mtx_lock_spin(&sched_lock);
1165227569Sphilip	if (pi->pi_owner == owner) {
1166227569Sphilip		mtx_unlock_spin(&sched_lock);
1167227569Sphilip		return (0);
1168227569Sphilip	}
1169227569Sphilip
1170227569Sphilip	if (pi->pi_owner != NULL) {
1171227569Sphilip		/*
1172227569Sphilip		 * userland may have already messed the mutex, sigh.
1173227569Sphilip		 */
1174227569Sphilip		mtx_unlock_spin(&sched_lock);
1175227569Sphilip		return (EPERM);
1176227569Sphilip	}
1177227569Sphilip	umtx_pi_setowner(pi, owner);
1178227569Sphilip	uq = TAILQ_FIRST(&pi->pi_blocked);
1179227569Sphilip	if (uq != NULL) {
1180227569Sphilip		int pri;
1181227569Sphilip
1182227569Sphilip		pri = UPRI(uq->uq_thread);
1183227569Sphilip		if (pri < UPRI(owner))
1184227569Sphilip			sched_lend_user_prio(owner, pri);
1185227569Sphilip	}
1186227569Sphilip	mtx_unlock_spin(&sched_lock);
1187227569Sphilip	return (0);
1188227569Sphilip}
1189227569Sphilip
1190227569Sphilip/*
1191227569Sphilip * Adjust a thread's order position in its blocked PI mutex,
1192227569Sphilip * this may result new priority propagating process.
1193227569Sphilip */
1194227569Sphilipvoid
1195227569Sphilipumtx_pi_adjust(struct thread *td, u_char oldpri)
1196227569Sphilip{
1197227569Sphilip	struct umtx_q *uq;
1198227569Sphilip	struct umtx_pi *pi;
1199227569Sphilip
1200227569Sphilip	uq = td->td_umtxq;
1201227569Sphilip
1202227569Sphilip	mtx_assert(&sched_lock, MA_OWNED);
1203227569Sphilip	MPASS(TD_ON_UPILOCK(td));
1204227569Sphilip
1205227569Sphilip	/*
1206227569Sphilip	 * Pick up the lock that td is blocked on.
1207227569Sphilip	 */
1208227569Sphilip	pi = uq->uq_pi_blocked;
1209227569Sphilip	MPASS(pi != NULL);
1210227569Sphilip
1211227569Sphilip	/* Resort the turnstile on the list. */
1212227569Sphilip	if (!umtx_pi_adjust_thread(pi, td))
1213227569Sphilip		return;
1214227569Sphilip
1215227569Sphilip	/*
1216227569Sphilip	 * If our priority was lowered and we are at the head of the
1217227569Sphilip	 * turnstile, then propagate our new priority up the chain.
1218227569Sphilip	 */
1219227569Sphilip	if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri)
1220227569Sphilip		umtx_propagate_priority(td);
1221227569Sphilip}
1222227569Sphilip
1223227569Sphilip/*
1224227569Sphilip * Sleep on a PI mutex.
1225227569Sphilip */
1226227569Sphilipstatic int
1227227569Sphilipumtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1228227569Sphilip	uint32_t owner, const char *wmesg, int timo)
1229227569Sphilip{
1230227569Sphilip	struct umtxq_chain *uc;
1231227569Sphilip	struct thread *td, *td1;
1232227569Sphilip	struct umtx_q *uq1;
1233227569Sphilip	int pri;
1234227569Sphilip	int error = 0;
1235227569Sphilip
1236227569Sphilip	td = uq->uq_thread;
1237227569Sphilip	KASSERT(td == curthread, ("inconsistent uq_thread"));
1238227569Sphilip	uc = umtxq_getchain(&uq->uq_key);
1239227569Sphilip	UMTXQ_LOCKED_ASSERT(uc);
1240227569Sphilip	umtxq_insert(uq);
1241227569Sphilip	if (pi->pi_owner == NULL) {
1242227569Sphilip		/* XXX
1243227569Sphilip		 * Current, We only support process private PI-mutex,
1244227569Sphilip		 * non-contended PI-mutexes are locked in userland.
1245227569Sphilip		 * Process shared PI-mutex should always be initialized
1246227569Sphilip		 * by kernel and be registered in kernel, locking should
1247227569Sphilip		 * always be done by kernel to avoid security problems.
1248227569Sphilip		 * For process private PI-mutex, we can find owner
1249227569Sphilip		 * thread and boost its priority safely.
1250227569Sphilip		 */
1251227569Sphilip		PROC_LOCK(curproc);
1252227569Sphilip		td1 = thread_find(curproc, owner);
1253227569Sphilip		mtx_lock_spin(&sched_lock);
1254227569Sphilip		if (td1 != NULL && pi->pi_owner == NULL) {
1255227569Sphilip			uq1 = td1->td_umtxq;
1256227569Sphilip			umtx_pi_setowner(pi, td1);
1257227569Sphilip		}
1258227569Sphilip		PROC_UNLOCK(curproc);
1259227569Sphilip	} else {
1260227569Sphilip		mtx_lock_spin(&sched_lock);
1261227569Sphilip	}
1262227569Sphilip
1263227569Sphilip	TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1264227569Sphilip		pri = UPRI(uq1->uq_thread);
1265227569Sphilip		if (pri > UPRI(td))
1266227569Sphilip			break;
1267227569Sphilip	}
1268227569Sphilip
1269227569Sphilip	if (uq1 != NULL)
1270227569Sphilip		TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1271227569Sphilip	else
1272227569Sphilip		TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1273227569Sphilip
1274227569Sphilip	uq->uq_pi_blocked = pi;
1275227569Sphilip	td->td_flags |= TDF_UPIBLOCKED;
1276227569Sphilip	mtx_unlock_spin(&sched_lock);
1277227569Sphilip	umtxq_unlock(&uq->uq_key);
1278227569Sphilip
1279227569Sphilip	mtx_lock_spin(&sched_lock);
1280227569Sphilip	umtx_propagate_priority(td);
1281227569Sphilip	mtx_unlock_spin(&sched_lock);
1282227569Sphilip
1283227569Sphilip	umtxq_lock(&uq->uq_key);
1284227569Sphilip	if (uq->uq_flags & UQF_UMTXQ) {
1285227569Sphilip		error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
1286227569Sphilip		if (error == EWOULDBLOCK)
1287227569Sphilip			error = ETIMEDOUT;
1288227569Sphilip		if (uq->uq_flags & UQF_UMTXQ) {
1289227569Sphilip			umtxq_busy(&uq->uq_key);
1290227569Sphilip			umtxq_remove(uq);
1291227569Sphilip			umtxq_unbusy(&uq->uq_key);
1292227569Sphilip		}
1293227569Sphilip	}
1294227569Sphilip	umtxq_unlock(&uq->uq_key);
1295227569Sphilip
1296227569Sphilip	mtx_lock_spin(&sched_lock);
1297227569Sphilip	uq->uq_pi_blocked = NULL;
1298227569Sphilip	td->td_flags &= ~TDF_UPIBLOCKED;
1299227569Sphilip	TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1300227569Sphilip	umtx_unpropagate_priority(pi);
1301227569Sphilip	mtx_unlock_spin(&sched_lock);
1302227569Sphilip
1303227569Sphilip	umtxq_lock(&uq->uq_key);
1304227569Sphilip
1305227569Sphilip	return (error);
1306227569Sphilip}
1307227569Sphilip
1308227569Sphilip/*
1309227569Sphilip * Add reference count for a PI mutex.
1310227569Sphilip */
1311227569Sphilipstatic void
1312227569Sphilipumtx_pi_ref(struct umtx_pi *pi)
1313227569Sphilip{
1314227569Sphilip	struct umtxq_chain *uc;
1315227569Sphilip
1316227569Sphilip	uc = umtxq_getchain(&pi->pi_key);
1317227569Sphilip	UMTXQ_LOCKED_ASSERT(uc);
1318227569Sphilip	pi->pi_refcount++;
1319227569Sphilip}
1320227569Sphilip
1321227569Sphilip/*
1322227569Sphilip * Decrease reference count for a PI mutex, if the counter
1323227569Sphilip * is decreased to zero, its memory space is freed.
1324227569Sphilip */
1325227569Sphilipstatic void
1326227569Sphilipumtx_pi_unref(struct umtx_pi *pi)
1327227569Sphilip{
1328227569Sphilip	struct umtxq_chain *uc;
1329227569Sphilip	int free = 0;
1330227569Sphilip
1331227569Sphilip	uc = umtxq_getchain(&pi->pi_key);
1332227569Sphilip	UMTXQ_LOCKED_ASSERT(uc);
1333227569Sphilip	KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1334227569Sphilip	if (--pi->pi_refcount == 0) {
1335227569Sphilip		mtx_lock_spin(&sched_lock);
1336227569Sphilip		if (pi->pi_owner != NULL) {
1337227569Sphilip			TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
1338227569Sphilip				pi, pi_link);
1339227569Sphilip			pi->pi_owner = NULL;
1340227569Sphilip		}
1341227569Sphilip		KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1342227569Sphilip			("blocked queue not empty"));
1343227569Sphilip		mtx_unlock_spin(&sched_lock);
1344227569Sphilip		TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1345227569Sphilip		free = 1;
1346227569Sphilip	}
1347227569Sphilip	if (free)
1348227569Sphilip		umtx_pi_free(pi);
1349227569Sphilip}
1350227569Sphilip
1351227569Sphilip/*
1352227569Sphilip * Find a PI mutex in hash table.
1353227569Sphilip */
1354227569Sphilipstatic struct umtx_pi *
1355227569Sphilipumtx_pi_lookup(struct umtx_key *key)
1356227569Sphilip{
1357227569Sphilip	struct umtxq_chain *uc;
1358227569Sphilip	struct umtx_pi *pi;
1359227569Sphilip
1360227569Sphilip	uc = umtxq_getchain(key);
1361227569Sphilip	UMTXQ_LOCKED_ASSERT(uc);
1362227569Sphilip
1363227569Sphilip	TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1364227569Sphilip		if (umtx_key_match(&pi->pi_key, key)) {
1365227569Sphilip			return (pi);
1366227569Sphilip		}
1367227569Sphilip	}
1368227569Sphilip	return (NULL);
1369227569Sphilip}
1370227569Sphilip
1371227569Sphilip/*
1372227569Sphilip * Insert a PI mutex into hash table.
1373227569Sphilip */
1374227569Sphilipstatic inline void
1375227569Sphilipumtx_pi_insert(struct umtx_pi *pi)
1376227569Sphilip{
1377227569Sphilip	struct umtxq_chain *uc;
1378227569Sphilip
1379227569Sphilip	uc = umtxq_getchain(&pi->pi_key);
1380227569Sphilip	UMTXQ_LOCKED_ASSERT(uc);
1381227569Sphilip	TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1382284555Sarybchik}
1383284555Sarybchik
1384284555Sarybchik/*
1385284555Sarybchik * Lock a PI mutex.
1386284555Sarybchik */
1387284555Sarybchikstatic int
1388284555Sarybchik_do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1389284555Sarybchik	int try)
1390284555Sarybchik{
1391284555Sarybchik	struct umtx_q *uq;
1392284555Sarybchik	struct umtx_pi *pi, *new_pi;
1393284555Sarybchik	uint32_t id, owner, old;
1394284555Sarybchik	int error;
1395284555Sarybchik
1396284555Sarybchik	id = td->td_tid;
1397284555Sarybchik	uq = td->td_umtxq;
1398284555Sarybchik
1399227569Sphilip	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1400227569Sphilip	    &uq->uq_key)) != 0)
1401227569Sphilip		return (error);
1402227569Sphilip	for (;;) {
1403227569Sphilip		pi = NULL;
1404227569Sphilip		umtxq_lock(&uq->uq_key);
1405227569Sphilip		pi = umtx_pi_lookup(&uq->uq_key);
1406227569Sphilip		if (pi == NULL) {
1407227569Sphilip			umtxq_unlock(&uq->uq_key);
1408227569Sphilip			new_pi = umtx_pi_alloc();
1409227569Sphilip			new_pi->pi_key = uq->uq_key;
1410227569Sphilip			umtxq_lock(&uq->uq_key);
1411227569Sphilip			pi = umtx_pi_lookup(&uq->uq_key);
1412227569Sphilip			if (pi != NULL)
1413227569Sphilip				umtx_pi_free(new_pi);
1414227569Sphilip			else {
1415227569Sphilip				umtx_pi_insert(new_pi);
1416227569Sphilip				pi = new_pi;
1417227569Sphilip			}
1418227569Sphilip		}
1419227569Sphilip
1420227569Sphilip		umtx_pi_ref(pi);
1421227569Sphilip		umtxq_unlock(&uq->uq_key);
1422227569Sphilip
1423227569Sphilip		/*
1424227569Sphilip		 * Care must be exercised when dealing with umtx structure.  It
1425227569Sphilip		 * can fault on any access.
1426227569Sphilip		 */
1427227569Sphilip
1428227569Sphilip		/*
1429227569Sphilip		 * Try the uncontested case.  This should be done in userland.
1430227569Sphilip		 */
1431227569Sphilip		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1432227569Sphilip
1433227569Sphilip		/* The acquire succeeded. */
1434227569Sphilip		if (owner == UMUTEX_UNOWNED) {
1435284555Sarybchik			error = 0;
1436284555Sarybchik			break;
1437284555Sarybchik		}
1438284555Sarybchik
1439284555Sarybchik		/* The address was invalid. */
1440284555Sarybchik		if (owner == -1) {
1441284555Sarybchik			error = EFAULT;
1442284555Sarybchik			break;
1443284555Sarybchik		}
1444284555Sarybchik
1445284555Sarybchik		/* If no one owns it but it is contested try to acquire it. */
1446227569Sphilip		if (owner == UMUTEX_CONTESTED) {
1447227569Sphilip			owner = casuword32(&m->m_owner,
1448227569Sphilip			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1449227569Sphilip
1450227569Sphilip			if (owner == UMUTEX_CONTESTED) {
1451227569Sphilip				umtxq_lock(&uq->uq_key);
1452227569Sphilip				error = umtx_pi_claim(pi, td);
1453227569Sphilip				umtxq_unlock(&uq->uq_key);
1454227569Sphilip				break;
1455227569Sphilip			}
1456227569Sphilip
1457227569Sphilip			/* The address was invalid. */
1458227569Sphilip			if (owner == -1) {
1459227569Sphilip				error = EFAULT;
1460284555Sarybchik				break;
1461284555Sarybchik			}
1462284555Sarybchik
1463284555Sarybchik			/* If this failed the lock has changed, restart. */
1464284555Sarybchik			umtxq_lock(&uq->uq_key);
1465227569Sphilip			umtx_pi_unref(pi);
1466227569Sphilip			umtxq_unlock(&uq->uq_key);
1467227569Sphilip			pi = NULL;
1468227569Sphilip			continue;
1469227569Sphilip		}
1470227569Sphilip
1471227569Sphilip		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1472227569Sphilip		    (owner & ~UMUTEX_CONTESTED) == id) {
1473227569Sphilip			error = EDEADLK;
1474227569Sphilip			break;
1475227569Sphilip		}
1476227569Sphilip
1477227569Sphilip		if (try != 0) {
1478227569Sphilip			error = EBUSY;
1479284555Sarybchik			break;
1480284555Sarybchik		}
1481284555Sarybchik
1482284555Sarybchik		/*
1483284555Sarybchik		 * If we caught a signal, we have retried and now
1484227569Sphilip		 * exit immediately.
1485227569Sphilip		 */
1486227569Sphilip		if (error != 0)
1487227569Sphilip			break;
1488227569Sphilip
1489227569Sphilip		umtxq_lock(&uq->uq_key);
1490227569Sphilip		umtxq_busy(&uq->uq_key);
1491227569Sphilip		umtxq_unlock(&uq->uq_key);
1492227569Sphilip
1493227569Sphilip		/*
1494227569Sphilip		 * Set the contested bit so that a release in user space
1495227569Sphilip		 * knows to use the system call for unlock.  If this fails
1496227569Sphilip		 * either some one else has acquired the lock or it has been
1497227569Sphilip		 * released.
1498284555Sarybchik		 */
1499284555Sarybchik		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1500284555Sarybchik
1501284555Sarybchik		/* The address was invalid. */
1502284555Sarybchik		if (old == -1) {
1503227569Sphilip			umtxq_lock(&uq->uq_key);
1504227569Sphilip			umtxq_unbusy(&uq->uq_key);
1505227569Sphilip			umtxq_unlock(&uq->uq_key);
1506227569Sphilip			error = EFAULT;
1507227569Sphilip			break;
1508227569Sphilip		}
1509227569Sphilip
1510227569Sphilip		umtxq_lock(&uq->uq_key);
1511227569Sphilip		umtxq_unbusy(&uq->uq_key);
1512227569Sphilip		/*
1513227569Sphilip		 * We set the contested bit, sleep. Otherwise the lock changed
1514227569Sphilip		 * and we need to retry or we lost a race to the thread
1515227569Sphilip		 * unlocking the umtx.
1516227569Sphilip		 */
1517227569Sphilip		if (old == owner)
1518227569Sphilip			error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
1519227569Sphilip				 "umtxpi", timo);
1520227569Sphilip		umtx_pi_unref(pi);
1521227569Sphilip		umtxq_unlock(&uq->uq_key);
1522227569Sphilip		pi = NULL;
1523227569Sphilip	}
1524227569Sphilip
1525227569Sphilip	if (pi != NULL) {
1526227569Sphilip		umtxq_lock(&uq->uq_key);
1527227569Sphilip		umtx_pi_unref(pi);
1528227569Sphilip		umtxq_unlock(&uq->uq_key);
1529227569Sphilip	}
1530227569Sphilip
1531227569Sphilip	umtx_key_release(&uq->uq_key);
1532227569Sphilip	return (error);
1533227569Sphilip}
1534227569Sphilip
1535227569Sphilipstatic int
1536227569Sphilipdo_lock_pi(struct thread *td, struct umutex *m, uint32_t flags,
1537227569Sphilip	struct timespec *timeout, int try)
1538227569Sphilip{
1539227569Sphilip	struct timespec ts, ts2, ts3;
1540227569Sphilip	struct timeval tv;
1541227569Sphilip	int error;
1542227569Sphilip
1543227569Sphilip	if (timeout == NULL) {
1544227569Sphilip		error = _do_lock_pi(td, m, flags, 0, try);
1545227569Sphilip	} else {
1546227569Sphilip		getnanouptime(&ts);
1547227569Sphilip		timespecadd(&ts, timeout);
1548227569Sphilip		TIMESPEC_TO_TIMEVAL(&tv, timeout);
1549227569Sphilip		for (;;) {
1550227569Sphilip			error = _do_lock_pi(td, m, flags, tvtohz(&tv), try);
1551227569Sphilip			if (error != ETIMEDOUT)
1552227569Sphilip				break;
1553227569Sphilip			getnanouptime(&ts2);
1554227569Sphilip			if (timespeccmp(&ts2, &ts, >=)) {
1555227569Sphilip				error = ETIMEDOUT;
1556227569Sphilip				break;
1557227569Sphilip			}
1558227569Sphilip			ts3 = ts;
1559227569Sphilip			timespecsub(&ts3, &ts2);
1560227569Sphilip			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
1561227569Sphilip		}
1562227569Sphilip	}
1563227569Sphilip	/*
1564227569Sphilip	 * This lets userland back off critical region if needed.
1565227569Sphilip	 */
1566227569Sphilip	if (error == EINTR)
1567227569Sphilip		error = ERESTART;
1568227569Sphilip	return (error);
1569227569Sphilip}
1570227569Sphilip
1571227569Sphilip/*
1572227569Sphilip * Unlock a PI mutex.
1573227569Sphilip */
1574227569Sphilipstatic int
1575227569Sphilipdo_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
1576227569Sphilip{
1577227569Sphilip	struct umtx_key key;
1578227569Sphilip	struct umtx_q *uq_first, *uq_first2, *uq_me;
1579227569Sphilip	struct umtx_pi *pi, *pi2;
1580227569Sphilip	uint32_t owner, old, id;
1581227569Sphilip	int error;
1582227569Sphilip	int count;
1583227569Sphilip	int pri;
1584227569Sphilip
1585227569Sphilip	id = td->td_tid;
1586227569Sphilip	/*
1587293965Sarybchik	 * Make sure we own this mtx.
1588227569Sphilip	 */
1589227569Sphilip	owner = fuword32(&m->m_owner);
1590227569Sphilip	if (owner == -1)
1591227569Sphilip		return (EFAULT);
1592227569Sphilip
1593227569Sphilip	if ((owner & ~UMUTEX_CONTESTED) != id)
1594227569Sphilip		return (EPERM);
1595227569Sphilip
1596227569Sphilip	/* This should be done in userland */
1597227569Sphilip	if ((owner & UMUTEX_CONTESTED) == 0) {
1598227569Sphilip		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1599227569Sphilip		if (old == -1)
1600227569Sphilip			return (EFAULT);
1601227569Sphilip		if (old == owner)
1602227569Sphilip			return (0);
1603227569Sphilip	}
1604227569Sphilip
1605227569Sphilip	/* We should only ever be in here for contested locks */
1606227569Sphilip	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1607227569Sphilip	    &key)) != 0)
1608227569Sphilip		return (error);
1609227569Sphilip
1610227569Sphilip	umtxq_lock(&key);
1611227569Sphilip	umtxq_busy(&key);
1612284555Sarybchik	count = umtxq_count_pi(&key, &uq_first);
1613227569Sphilip	if (uq_first != NULL) {
1614227569Sphilip		pi = uq_first->uq_pi_blocked;
1615284555Sarybchik		if (pi->pi_owner != curthread) {
1616227569Sphilip			umtxq_unbusy(&key);
1617227569Sphilip			umtxq_unlock(&key);
1618227569Sphilip			/* userland messed the mutex */
1619284555Sarybchik			return (EPERM);
1620227569Sphilip		}
1621227569Sphilip		uq_me = curthread->td_umtxq;
1622227569Sphilip		mtx_lock_spin(&sched_lock);
1623227569Sphilip		pi->pi_owner = NULL;
1624227569Sphilip		TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
1625227569Sphilip		uq_first = TAILQ_FIRST(&pi->pi_blocked);
1626227569Sphilip		pri = PRI_MAX;
1627227569Sphilip		TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
1628227569Sphilip			uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
1629227569Sphilip			if (uq_first2 != NULL) {
1630227569Sphilip				if (pri > UPRI(uq_first2->uq_thread))
1631227569Sphilip					pri = UPRI(uq_first2->uq_thread);
1632227569Sphilip			}
1633227569Sphilip		}
1634227569Sphilip		sched_unlend_user_prio(curthread, pri);
1635284555Sarybchik		mtx_unlock_spin(&sched_lock);
1636227569Sphilip	}
1637227569Sphilip	umtxq_unlock(&key);
1638284555Sarybchik
1639227569Sphilip	/*
1640227569Sphilip	 * When unlocking the umtx, it must be marked as unowned if
1641227569Sphilip	 * there is zero or one thread only waiting for it.
1642284555Sarybchik	 * Otherwise, it must be marked as contested.
1643227569Sphilip	 */
1644227569Sphilip	old = casuword32(&m->m_owner, owner,
1645227569Sphilip		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1646227569Sphilip
1647227569Sphilip	umtxq_lock(&key);
1648227569Sphilip	if (uq_first != NULL)
1649227569Sphilip		umtxq_signal_thread(uq_first);
1650	umtxq_unbusy(&key);
1651	umtxq_unlock(&key);
1652	umtx_key_release(&key);
1653	if (old == -1)
1654		return (EFAULT);
1655	if (old != owner)
1656		return (EINVAL);
1657	return (0);
1658}
1659
1660/*
1661 * Lock a PP mutex.
1662 */
1663static int
1664_do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1665	int try)
1666{
1667	struct umtx_q *uq, *uq2;
1668	struct umtx_pi *pi;
1669	uint32_t ceiling;
1670	uint32_t owner, id;
1671	int error, pri, old_inherited_pri, su;
1672
1673	id = td->td_tid;
1674	uq = td->td_umtxq;
1675	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1676	    &uq->uq_key)) != 0)
1677		return (error);
1678	su = (suser(td) == 0);
1679	for (;;) {
1680		old_inherited_pri = uq->uq_inherited_pri;
1681		umtxq_lock(&uq->uq_key);
1682		umtxq_busy(&uq->uq_key);
1683		umtxq_unlock(&uq->uq_key);
1684
1685		ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
1686		if (ceiling > RTP_PRIO_MAX) {
1687			error = EINVAL;
1688			goto out;
1689		}
1690
1691		mtx_lock_spin(&sched_lock);
1692		if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
1693			mtx_unlock_spin(&sched_lock);
1694			error = EINVAL;
1695			goto out;
1696		}
1697		if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
1698			uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
1699			if (uq->uq_inherited_pri < UPRI(td))
1700				sched_lend_user_prio(td, uq->uq_inherited_pri);
1701		}
1702		mtx_unlock_spin(&sched_lock);
1703
1704		owner = casuword32(&m->m_owner,
1705		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1706
1707		if (owner == UMUTEX_CONTESTED) {
1708			error = 0;
1709			break;
1710		}
1711
1712		/* The address was invalid. */
1713		if (owner == -1) {
1714			error = EFAULT;
1715			break;
1716		}
1717
1718		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1719		    (owner & ~UMUTEX_CONTESTED) == id) {
1720			error = EDEADLK;
1721			break;
1722		}
1723
1724		if (try != 0) {
1725			error = EBUSY;
1726			break;
1727		}
1728
1729		/*
1730		 * If we caught a signal, we have retried and now
1731		 * exit immediately.
1732		 */
1733		if (error != 0)
1734			break;
1735
1736		/*
1737		 * We set the contested bit, sleep. Otherwise the lock changed
1738		 * and we need to retry or we lost a race to the thread
1739		 * unlocking the umtx.
1740		 */
1741		umtxq_lock(&uq->uq_key);
1742		umtxq_insert(uq);
1743		umtxq_unbusy(&uq->uq_key);
1744		error = umtxq_sleep(uq, "umtxpp", timo);
1745		umtxq_remove(uq);
1746		umtxq_unlock(&uq->uq_key);
1747
1748		mtx_lock_spin(&sched_lock);
1749		uq->uq_inherited_pri = old_inherited_pri;
1750		pri = PRI_MAX;
1751		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1752			uq2 = TAILQ_FIRST(&pi->pi_blocked);
1753			if (uq2 != NULL) {
1754				if (pri > UPRI(uq2->uq_thread))
1755					pri = UPRI(uq2->uq_thread);
1756			}
1757		}
1758		if (pri > uq->uq_inherited_pri)
1759			pri = uq->uq_inherited_pri;
1760		sched_unlend_user_prio(td, pri);
1761		mtx_unlock_spin(&sched_lock);
1762	}
1763
1764	if (error != 0) {
1765		mtx_lock_spin(&sched_lock);
1766		uq->uq_inherited_pri = old_inherited_pri;
1767		pri = PRI_MAX;
1768		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1769			uq2 = TAILQ_FIRST(&pi->pi_blocked);
1770			if (uq2 != NULL) {
1771				if (pri > UPRI(uq2->uq_thread))
1772					pri = UPRI(uq2->uq_thread);
1773			}
1774		}
1775		if (pri > uq->uq_inherited_pri)
1776			pri = uq->uq_inherited_pri;
1777		sched_unlend_user_prio(td, pri);
1778		mtx_unlock_spin(&sched_lock);
1779	}
1780
1781out:
1782	umtxq_lock(&uq->uq_key);
1783	umtxq_unbusy(&uq->uq_key);
1784	umtxq_unlock(&uq->uq_key);
1785	umtx_key_release(&uq->uq_key);
1786	return (error);
1787}
1788
1789/*
1790 * Lock a PP mutex.
1791 */
1792static int
1793do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags,
1794	struct timespec *timeout, int try)
1795{
1796	struct timespec ts, ts2, ts3;
1797	struct timeval tv;
1798	int error;
1799
1800	if (timeout == NULL) {
1801		error = _do_lock_pp(td, m, flags, 0, try);
1802	} else {
1803		getnanouptime(&ts);
1804		timespecadd(&ts, timeout);
1805		TIMESPEC_TO_TIMEVAL(&tv, timeout);
1806		for (;;) {
1807			error = _do_lock_pp(td, m, flags, tvtohz(&tv), try);
1808			if (error != ETIMEDOUT)
1809				break;
1810			getnanouptime(&ts2);
1811			if (timespeccmp(&ts2, &ts, >=)) {
1812				error = ETIMEDOUT;
1813				break;
1814			}
1815			ts3 = ts;
1816			timespecsub(&ts3, &ts2);
1817			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
1818		}
1819	}
1820	/*
1821	 * This lets userland back off critical region if needed.
1822	 */
1823	if (error == EINTR)
1824		error = ERESTART;
1825	return (error);
1826}
1827
1828/*
1829 * Unlock a PP mutex.
1830 */
1831static int
1832do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
1833{
1834	struct umtx_key key;
1835	struct umtx_q *uq, *uq2;
1836	struct umtx_pi *pi;
1837	uint32_t owner, id;
1838	uint32_t rceiling;
1839	int error, pri, new_inherited_pri;
1840
1841	id = td->td_tid;
1842	uq = td->td_umtxq;
1843
1844	/*
1845	 * Make sure we own this mtx.
1846	 */
1847	owner = fuword32(&m->m_owner);
1848	if (owner == -1)
1849		return (EFAULT);
1850
1851	if ((owner & ~UMUTEX_CONTESTED) != id)
1852		return (EPERM);
1853
1854	error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
1855	if (error != 0)
1856		return (error);
1857
1858	if (rceiling == -1)
1859		new_inherited_pri = PRI_MAX;
1860	else {
1861		rceiling = RTP_PRIO_MAX - rceiling;
1862		if (rceiling > RTP_PRIO_MAX)
1863			return (EINVAL);
1864		new_inherited_pri = PRI_MIN_REALTIME + rceiling;
1865	}
1866
1867	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1868	    &key)) != 0)
1869		return (error);
1870	umtxq_lock(&key);
1871	umtxq_busy(&key);
1872	umtxq_unlock(&key);
1873	/*
1874	 * For priority protected mutex, always set unlocked state
1875	 * to UMUTEX_CONTESTED, so that userland always enters kernel
1876	 * to lock the mutex, it is necessary because thread priority
1877	 * has to be adjusted for such mutex.
1878	 */
1879	error = suword32(&m->m_owner, UMUTEX_CONTESTED);
1880
1881	umtxq_lock(&key);
1882	if (error == 0)
1883		umtxq_signal(&key, 1);
1884	umtxq_unbusy(&key);
1885	umtxq_unlock(&key);
1886
1887	if (error == -1)
1888		error = EFAULT;
1889	else {
1890		mtx_lock_spin(&sched_lock);
1891		uq->uq_inherited_pri = new_inherited_pri;
1892		pri = PRI_MAX;
1893		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1894			uq2 = TAILQ_FIRST(&pi->pi_blocked);
1895			if (uq2 != NULL) {
1896				if (pri > UPRI(uq2->uq_thread))
1897					pri = UPRI(uq2->uq_thread);
1898			}
1899		}
1900		if (pri > uq->uq_inherited_pri)
1901			pri = uq->uq_inherited_pri;
1902		sched_unlend_user_prio(td, pri);
1903		mtx_unlock_spin(&sched_lock);
1904	}
1905	umtx_key_release(&key);
1906	return (error);
1907}
1908
1909static int
1910do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
1911	uint32_t *old_ceiling)
1912{
1913	struct umtx_q *uq;
1914	uint32_t save_ceiling;
1915	uint32_t owner, id;
1916	uint32_t flags;
1917	int error;
1918
1919	flags = fuword32(&m->m_flags);
1920	if ((flags & UMUTEX_PRIO_PROTECT) == 0)
1921		return (EINVAL);
1922	if (ceiling > RTP_PRIO_MAX)
1923		return (EINVAL);
1924	id = td->td_tid;
1925	uq = td->td_umtxq;
1926	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1927	   &uq->uq_key)) != 0)
1928		return (error);
1929	for (;;) {
1930		umtxq_lock(&uq->uq_key);
1931		umtxq_busy(&uq->uq_key);
1932		umtxq_unlock(&uq->uq_key);
1933
1934		save_ceiling = fuword32(&m->m_ceilings[0]);
1935
1936		owner = casuword32(&m->m_owner,
1937		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1938
1939		if (owner == UMUTEX_CONTESTED) {
1940			suword32(&m->m_ceilings[0], ceiling);
1941			suword32(&m->m_owner, UMUTEX_CONTESTED);
1942			error = 0;
1943			break;
1944		}
1945
1946		/* The address was invalid. */
1947		if (owner == -1) {
1948			error = EFAULT;
1949			break;
1950		}
1951
1952		if ((owner & ~UMUTEX_CONTESTED) == id) {
1953			suword32(&m->m_ceilings[0], ceiling);
1954			error = 0;
1955			break;
1956		}
1957
1958		/*
1959		 * If we caught a signal, we have retried and now
1960		 * exit immediately.
1961		 */
1962		if (error != 0)
1963			break;
1964
1965		/*
1966		 * We set the contested bit, sleep. Otherwise the lock changed
1967		 * and we need to retry or we lost a race to the thread
1968		 * unlocking the umtx.
1969		 */
1970		umtxq_lock(&uq->uq_key);
1971		umtxq_insert(uq);
1972		umtxq_unbusy(&uq->uq_key);
1973		error = umtxq_sleep(uq, "umtxpp", 0);
1974		umtxq_remove(uq);
1975		umtxq_unlock(&uq->uq_key);
1976	}
1977	umtxq_lock(&uq->uq_key);
1978	if (error == 0)
1979		umtxq_signal(&uq->uq_key, INT_MAX);
1980	umtxq_unbusy(&uq->uq_key);
1981	umtxq_unlock(&uq->uq_key);
1982	umtx_key_release(&uq->uq_key);
1983	if (error == 0 && old_ceiling != NULL)
1984		suword32(old_ceiling, save_ceiling);
1985	return (error);
1986}
1987
1988/*
1989 * Lock a userland POSIX mutex.
1990 */
1991static int
1992do_lock_umutex(struct thread *td, struct umutex *m, struct timespec *ts,
1993	int try)
1994{
1995	uint32_t flags;
1996	int ret;
1997
1998	flags = fuword32(&m->m_flags);
1999	if (flags == -1)
2000		return (EFAULT);
2001
2002	if ((flags & UMUTEX_PRIO_INHERIT) != 0)
2003		ret = do_lock_pi(td, m, flags, ts, try);
2004	else if ((flags & UMUTEX_PRIO_PROTECT) != 0)
2005		ret = do_lock_pp(td, m, flags, ts, try);
2006	else
2007		ret = do_lock_normal(td, m, flags, ts, try);
2008
2009	return (ret);
2010}
2011
2012/*
2013 * Unlock a userland POSIX mutex.
2014 */
2015static int
2016do_unlock_umutex(struct thread *td, struct umutex *m)
2017{
2018	uint32_t flags;
2019	int ret;
2020
2021	flags = fuword32(&m->m_flags);
2022	if (flags == -1)
2023		return (EFAULT);
2024
2025	if ((flags & UMUTEX_PRIO_INHERIT) != 0)
2026		ret = do_unlock_pi(td, m, flags);
2027	else if ((flags & UMUTEX_PRIO_PROTECT) != 0)
2028		ret = do_unlock_pp(td, m, flags);
2029	else
2030		ret = do_unlock_normal(td, m, flags);
2031
2032	return (ret);
2033}
2034
2035int
2036_umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
2037    /* struct umtx *umtx */
2038{
2039	return _do_lock(td, uap->umtx, td->td_tid, 0);
2040}
2041
2042int
2043_umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
2044    /* struct umtx *umtx */
2045{
2046	return do_unlock(td, uap->umtx, td->td_tid);
2047}
2048
2049int
2050_umtx_op(struct thread *td, struct _umtx_op_args *uap)
2051{
2052	struct timespec timeout;
2053	struct timespec *ts;
2054	int error;
2055
2056	switch(uap->op) {
2057	case UMTX_OP_MUTEX_LOCK:
2058		/* Allow a null timespec (wait forever). */
2059		if (uap->uaddr2 == NULL)
2060			ts = NULL;
2061		else {
2062			error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2063			if (error != 0)
2064				break;
2065			if (timeout.tv_nsec >= 1000000000 ||
2066			    timeout.tv_nsec < 0) {
2067				error = EINVAL;
2068				break;
2069			}
2070			ts = &timeout;
2071		}
2072		error = do_lock_umutex(td, uap->obj, ts, 0);
2073		break;
2074	case UMTX_OP_MUTEX_UNLOCK:
2075		error = do_unlock_umutex(td, uap->obj);
2076		break;
2077	case UMTX_OP_MUTEX_TRYLOCK:
2078		error = do_lock_umutex(td, uap->obj, NULL, 1);
2079		break;
2080	case UMTX_OP_SET_CEILING:
2081		error = do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
2082		break;
2083	case UMTX_OP_LOCK:
2084		/* Allow a null timespec (wait forever). */
2085		if (uap->uaddr2 == NULL)
2086			ts = NULL;
2087		else {
2088			error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2089			if (error != 0)
2090				break;
2091			if (timeout.tv_nsec >= 1000000000 ||
2092			    timeout.tv_nsec < 0) {
2093				error = EINVAL;
2094				break;
2095			}
2096			ts = &timeout;
2097		}
2098		error = do_lock(td, uap->obj, uap->val, ts);
2099		break;
2100	case UMTX_OP_UNLOCK:
2101		error = do_unlock(td, uap->obj, uap->val);
2102		break;
2103	case UMTX_OP_WAIT:
2104		/* Allow a null timespec (wait forever). */
2105		if (uap->uaddr2 == NULL)
2106			ts = NULL;
2107		else {
2108			error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2109			if (error != 0)
2110				break;
2111			if (timeout.tv_nsec >= 1000000000 ||
2112			    timeout.tv_nsec < 0) {
2113				error = EINVAL;
2114				break;
2115			}
2116			ts = &timeout;
2117		}
2118		error = do_wait(td, uap->obj, uap->val, ts);
2119		break;
2120	case UMTX_OP_WAKE:
2121		error = kern_umtx_wake(td, uap->obj, uap->val);
2122		break;
2123	default:
2124		error = EINVAL;
2125		break;
2126	}
2127	return (error);
2128}
2129
2130void
2131umtx_thread_init(struct thread *td)
2132{
2133	td->td_umtxq = umtxq_alloc();
2134	td->td_umtxq->uq_thread = td;
2135}
2136
2137void
2138umtx_thread_fini(struct thread *td)
2139{
2140	umtxq_free(td->td_umtxq);
2141}
2142
2143/*
2144 * It will be called when new thread is created, e.g fork().
2145 */
2146void
2147umtx_thread_alloc(struct thread *td)
2148{
2149	struct umtx_q *uq;
2150
2151	uq = td->td_umtxq;
2152	uq->uq_inherited_pri = PRI_MAX;
2153
2154	KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
2155	KASSERT(uq->uq_thread == td, ("uq_thread != td"));
2156	KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
2157	KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
2158}
2159
2160/*
2161 * exec() hook.
2162 */
2163static void
2164umtx_exec_hook(void *arg __unused, struct proc *p __unused,
2165	struct image_params *imgp __unused)
2166{
2167	umtx_thread_cleanup(curthread);
2168}
2169
2170/*
2171 * thread_exit() hook.
2172 */
2173void
2174umtx_thread_exit(struct thread *td)
2175{
2176	umtx_thread_cleanup(td);
2177}
2178
2179/*
2180 * clean up umtx data.
2181 */
2182static void
2183umtx_thread_cleanup(struct thread *td)
2184{
2185	struct umtx_q *uq;
2186	struct umtx_pi *pi;
2187
2188	if ((uq = td->td_umtxq) == NULL)
2189		return;
2190
2191	mtx_lock_spin(&sched_lock);
2192	uq->uq_inherited_pri = PRI_MAX;
2193	while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
2194		pi->pi_owner = NULL;
2195		TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
2196	}
2197	td->td_flags &= ~TDF_UBORROWING;
2198	mtx_unlock_spin(&sched_lock);
2199}
2200