kern_umtx.c revision 163697
1139804Simp/*-
2139013Sdavidxu * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3112904Sjeff * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4112904Sjeff * All rights reserved.
5112904Sjeff *
6112904Sjeff * Redistribution and use in source and binary forms, with or without
7112904Sjeff * modification, are permitted provided that the following conditions
8112904Sjeff * are met:
9112904Sjeff * 1. Redistributions of source code must retain the above copyright
10112904Sjeff *    notice unmodified, this list of conditions, and the following
11112904Sjeff *    disclaimer.
12112904Sjeff * 2. Redistributions in binary form must reproduce the above copyright
13112904Sjeff *    notice, this list of conditions and the following disclaimer in the
14112904Sjeff *    documentation and/or other materials provided with the distribution.
15112904Sjeff *
16112904Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17112904Sjeff * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18112904Sjeff * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19112904Sjeff * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20112904Sjeff * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21112904Sjeff * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22112904Sjeff * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23112904Sjeff * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24112904Sjeff * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25112904Sjeff * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26112904Sjeff */
27112904Sjeff
28116182Sobrien#include <sys/cdefs.h>
29116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_umtx.c 163697 2006-10-26 09:33:34Z davidxu $");
30116182Sobrien
31162536Sdavidxu#include "opt_compat.h"
32112904Sjeff#include <sys/param.h>
33112904Sjeff#include <sys/kernel.h>
34131431Smarcel#include <sys/limits.h>
35112904Sjeff#include <sys/lock.h>
36115765Sjeff#include <sys/malloc.h>
37112904Sjeff#include <sys/mutex.h>
38112904Sjeff#include <sys/proc.h>
39161678Sdavidxu#include <sys/sched.h>
40161678Sdavidxu#include <sys/sysctl.h>
41112904Sjeff#include <sys/sysent.h>
42112904Sjeff#include <sys/systm.h>
43112904Sjeff#include <sys/sysproto.h>
44139013Sdavidxu#include <sys/eventhandler.h>
45112904Sjeff#include <sys/umtx.h>
46112904Sjeff
47139013Sdavidxu#include <vm/vm.h>
48139013Sdavidxu#include <vm/vm_param.h>
49139013Sdavidxu#include <vm/pmap.h>
50139013Sdavidxu#include <vm/vm_map.h>
51139013Sdavidxu#include <vm/vm_object.h>
52139013Sdavidxu
53162536Sdavidxu#ifdef COMPAT_IA32
54162536Sdavidxu#include <compat/freebsd32/freebsd32_proto.h>
55162536Sdavidxu#endif
56162536Sdavidxu
57161678Sdavidxu#define TYPE_SIMPLE_LOCK	0
58161678Sdavidxu#define TYPE_SIMPLE_WAIT	1
59161678Sdavidxu#define TYPE_NORMAL_UMUTEX	2
60161678Sdavidxu#define TYPE_PI_UMUTEX		3
61161678Sdavidxu#define TYPE_PP_UMUTEX		4
62161678Sdavidxu#define TYPE_CV			5
63139013Sdavidxu
64161678Sdavidxu/* Key to represent a unique userland synchronous object */
65139013Sdavidxustruct umtx_key {
66161678Sdavidxu	int	hash;
67139013Sdavidxu	int	type;
68161678Sdavidxu	int	shared;
69139013Sdavidxu	union {
70139013Sdavidxu		struct {
71139013Sdavidxu			vm_object_t	object;
72161678Sdavidxu			uintptr_t	offset;
73139013Sdavidxu		} shared;
74139013Sdavidxu		struct {
75161678Sdavidxu			struct vmspace	*vs;
76161678Sdavidxu			uintptr_t	addr;
77139013Sdavidxu		} private;
78139013Sdavidxu		struct {
79161678Sdavidxu			void		*a;
80161678Sdavidxu			uintptr_t	b;
81139013Sdavidxu		} both;
82139013Sdavidxu	} info;
83139013Sdavidxu};
84139013Sdavidxu
85161678Sdavidxu/* Priority inheritance mutex info. */
86161678Sdavidxustruct umtx_pi {
87161678Sdavidxu	/* Owner thread */
88161678Sdavidxu	struct thread		*pi_owner;
89161678Sdavidxu
90161678Sdavidxu	/* Reference count */
91161678Sdavidxu	int			pi_refcount;
92161678Sdavidxu
93161678Sdavidxu 	/* List entry to link umtx holding by thread */
94161678Sdavidxu	TAILQ_ENTRY(umtx_pi)	pi_link;
95161678Sdavidxu
96161678Sdavidxu	/* List entry in hash */
97161678Sdavidxu	TAILQ_ENTRY(umtx_pi)	pi_hashlink;
98161678Sdavidxu
99161678Sdavidxu	/* List for waiters */
100161678Sdavidxu	TAILQ_HEAD(,umtx_q)	pi_blocked;
101161678Sdavidxu
102161678Sdavidxu	/* Identify a userland lock object */
103161678Sdavidxu	struct umtx_key		pi_key;
104161678Sdavidxu};
105161678Sdavidxu
106161678Sdavidxu/* A userland synchronous object user. */
107115765Sjeffstruct umtx_q {
108161678Sdavidxu	/* Linked list for the hash. */
109161678Sdavidxu	TAILQ_ENTRY(umtx_q)	uq_link;
110161678Sdavidxu
111161678Sdavidxu	/* Umtx key. */
112161678Sdavidxu	struct umtx_key		uq_key;
113161678Sdavidxu
114161678Sdavidxu	/* Umtx flags. */
115161678Sdavidxu	int			uq_flags;
116161678Sdavidxu#define UQF_UMTXQ	0x0001
117161678Sdavidxu
118161678Sdavidxu	/* The thread waits on. */
119161678Sdavidxu	struct thread		*uq_thread;
120161678Sdavidxu
121161678Sdavidxu	/*
122161678Sdavidxu	 * Blocked on PI mutex. read can use chain lock
123161678Sdavidxu	 * or sched_lock, write must have both chain lock and
124161678Sdavidxu	 * sched_lock being hold.
125161678Sdavidxu	 */
126161678Sdavidxu	struct umtx_pi		*uq_pi_blocked;
127161678Sdavidxu
128161678Sdavidxu	/* On blocked list */
129161678Sdavidxu	TAILQ_ENTRY(umtx_q)	uq_lockq;
130161678Sdavidxu
131161678Sdavidxu	/* Thread contending with us */
132161678Sdavidxu	TAILQ_HEAD(,umtx_pi)	uq_pi_contested;
133161678Sdavidxu
134161742Sdavidxu	/* Inherited priority from PP mutex */
135161678Sdavidxu	u_char			uq_inherited_pri;
136115765Sjeff};
137115765Sjeff
138161678SdavidxuTAILQ_HEAD(umtxq_head, umtx_q);
139161678Sdavidxu
140161678Sdavidxu/* Userland lock object's wait-queue chain */
141138224Sdavidxustruct umtxq_chain {
142161678Sdavidxu	/* Lock for this chain. */
143161678Sdavidxu	struct mtx		uc_lock;
144161678Sdavidxu
145161678Sdavidxu	/* List of sleep queues. */
146161678Sdavidxu	struct umtxq_head	uc_queue;
147161678Sdavidxu
148161678Sdavidxu	/* Busy flag */
149161678Sdavidxu	char			uc_busy;
150161678Sdavidxu
151161678Sdavidxu	/* Chain lock waiters */
152158377Sdavidxu	int			uc_waiters;
153161678Sdavidxu
154161678Sdavidxu	/* All PI in the list */
155161678Sdavidxu	TAILQ_HEAD(,umtx_pi)	uc_pi_list;
156138224Sdavidxu};
157115765Sjeff
158161678Sdavidxu#define	UMTXQ_LOCKED_ASSERT(uc)		mtx_assert(&(uc)->uc_lock, MA_OWNED)
159161678Sdavidxu
160161678Sdavidxu/*
161161678Sdavidxu * Don't propagate time-sharing priority, there is a security reason,
162161678Sdavidxu * a user can simply introduce PI-mutex, let thread A lock the mutex,
163161678Sdavidxu * and let another thread B block on the mutex, because B is
164161678Sdavidxu * sleeping, its priority will be boosted, this causes A's priority to
165161678Sdavidxu * be boosted via priority propagating too and will never be lowered even
166161678Sdavidxu * if it is using 100%CPU, this is unfair to other processes.
167161678Sdavidxu */
168161678Sdavidxu
169161678Sdavidxu#define UPRI(td)	(((td)->td_ksegrp->kg_user_pri >= PRI_MIN_TIMESHARE &&\
170161678Sdavidxu			  (td)->td_ksegrp->kg_user_pri <= PRI_MAX_TIMESHARE) ?\
171161678Sdavidxu			 PRI_MAX_TIMESHARE : (td)->td_ksegrp->kg_user_pri)
172161678Sdavidxu
173138224Sdavidxu#define	GOLDEN_RATIO_PRIME	2654404609U
174138224Sdavidxu#define	UMTX_CHAINS		128
175138224Sdavidxu#define	UMTX_SHIFTS		(__WORD_BIT - 7)
176115765Sjeff
177161678Sdavidxu#define THREAD_SHARE		0
178161678Sdavidxu#define PROCESS_SHARE		1
179161678Sdavidxu#define AUTO_SHARE		2
180161678Sdavidxu
181161678Sdavidxu#define	GET_SHARE(flags)	\
182161678Sdavidxu    (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
183161678Sdavidxu
184161678Sdavidxustatic uma_zone_t		umtx_pi_zone;
185161678Sdavidxustatic struct umtxq_chain	umtxq_chains[UMTX_CHAINS];
186138224Sdavidxustatic MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
187161678Sdavidxustatic int			umtx_pi_allocated;
188115310Sjeff
189161678SdavidxuSYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
190161678SdavidxuSYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
191161678Sdavidxu    &umtx_pi_allocated, 0, "Allocated umtx_pi");
192161678Sdavidxu
193161678Sdavidxustatic void umtxq_sysinit(void *);
194161678Sdavidxustatic void umtxq_hash(struct umtx_key *key);
195161678Sdavidxustatic struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
196139013Sdavidxustatic void umtxq_lock(struct umtx_key *key);
197139013Sdavidxustatic void umtxq_unlock(struct umtx_key *key);
198139257Sdavidxustatic void umtxq_busy(struct umtx_key *key);
199139257Sdavidxustatic void umtxq_unbusy(struct umtx_key *key);
200139013Sdavidxustatic void umtxq_insert(struct umtx_q *uq);
201139013Sdavidxustatic void umtxq_remove(struct umtx_q *uq);
202161678Sdavidxustatic int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
203139257Sdavidxustatic int umtxq_count(struct umtx_key *key);
204139257Sdavidxustatic int umtxq_signal(struct umtx_key *key, int nr_wakeup);
205139013Sdavidxustatic int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2);
206161678Sdavidxustatic int umtx_key_get(void *addr, int type, int share,
207139013Sdavidxu	struct umtx_key *key);
208139013Sdavidxustatic void umtx_key_release(struct umtx_key *key);
209163697Sdavidxustatic struct umtx_pi *umtx_pi_alloc(int);
210161678Sdavidxustatic void umtx_pi_free(struct umtx_pi *pi);
211161678Sdavidxustatic int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
212161678Sdavidxustatic void umtx_thread_cleanup(struct thread *td);
213161678Sdavidxustatic void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
214161678Sdavidxu	struct image_params *imgp __unused);
215161678SdavidxuSYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
216115310Sjeff
217161678Sdavidxustatic void
218161678Sdavidxuumtxq_sysinit(void *arg __unused)
219161678Sdavidxu{
220161678Sdavidxu	int i;
221138224Sdavidxu
222161678Sdavidxu	umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
223161678Sdavidxu		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
224161678Sdavidxu	for (i = 0; i < UMTX_CHAINS; ++i) {
225161678Sdavidxu		mtx_init(&umtxq_chains[i].uc_lock, "umtxql", NULL,
226161678Sdavidxu			 MTX_DEF | MTX_DUPOK);
227161678Sdavidxu		TAILQ_INIT(&umtxq_chains[i].uc_queue);
228161678Sdavidxu		TAILQ_INIT(&umtxq_chains[i].uc_pi_list);
229161678Sdavidxu		umtxq_chains[i].uc_busy = 0;
230161678Sdavidxu		umtxq_chains[i].uc_waiters = 0;
231161678Sdavidxu	}
232161678Sdavidxu	EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
233161678Sdavidxu	    EVENTHANDLER_PRI_ANY);
234161678Sdavidxu}
235161678Sdavidxu
236143149Sdavidxustruct umtx_q *
237143149Sdavidxuumtxq_alloc(void)
238143149Sdavidxu{
239161678Sdavidxu	struct umtx_q *uq;
240161678Sdavidxu
241161678Sdavidxu	uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
242161678Sdavidxu	TAILQ_INIT(&uq->uq_pi_contested);
243161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
244161678Sdavidxu	return (uq);
245143149Sdavidxu}
246143149Sdavidxu
247143149Sdavidxuvoid
248143149Sdavidxuumtxq_free(struct umtx_q *uq)
249143149Sdavidxu{
250143149Sdavidxu	free(uq, M_UMTX);
251143149Sdavidxu}
252143149Sdavidxu
253161678Sdavidxustatic inline void
254139013Sdavidxuumtxq_hash(struct umtx_key *key)
255138224Sdavidxu{
256161678Sdavidxu	unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
257161678Sdavidxu	key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
258138224Sdavidxu}
259138224Sdavidxu
260139013Sdavidxustatic inline int
261139013Sdavidxuumtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2)
262139013Sdavidxu{
263139013Sdavidxu	return (k1->type == k2->type &&
264161678Sdavidxu		k1->info.both.a == k2->info.both.a &&
265161678Sdavidxu	        k1->info.both.b == k2->info.both.b);
266139013Sdavidxu}
267139013Sdavidxu
268161678Sdavidxustatic inline struct umtxq_chain *
269161678Sdavidxuumtxq_getchain(struct umtx_key *key)
270139013Sdavidxu{
271161678Sdavidxu	return (&umtxq_chains[key->hash]);
272139013Sdavidxu}
273139013Sdavidxu
274161678Sdavidxu/*
275161678Sdavidxu * Set chain to busy state when following operation
276161678Sdavidxu * may be blocked (kernel mutex can not be used).
277161678Sdavidxu */
278138224Sdavidxustatic inline void
279139257Sdavidxuumtxq_busy(struct umtx_key *key)
280139257Sdavidxu{
281161678Sdavidxu	struct umtxq_chain *uc;
282139257Sdavidxu
283161678Sdavidxu	uc = umtxq_getchain(key);
284161678Sdavidxu	mtx_assert(&uc->uc_lock, MA_OWNED);
285161678Sdavidxu	while (uc->uc_busy != 0) {
286161678Sdavidxu		uc->uc_waiters++;
287161678Sdavidxu		msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
288161678Sdavidxu		uc->uc_waiters--;
289139257Sdavidxu	}
290161678Sdavidxu	uc->uc_busy = 1;
291139257Sdavidxu}
292139257Sdavidxu
293161678Sdavidxu/*
294161678Sdavidxu * Unbusy a chain.
295161678Sdavidxu */
296139257Sdavidxustatic inline void
297139257Sdavidxuumtxq_unbusy(struct umtx_key *key)
298139257Sdavidxu{
299161678Sdavidxu	struct umtxq_chain *uc;
300139257Sdavidxu
301161678Sdavidxu	uc = umtxq_getchain(key);
302161678Sdavidxu	mtx_assert(&uc->uc_lock, MA_OWNED);
303161678Sdavidxu	KASSERT(uc->uc_busy != 0, ("not busy"));
304161678Sdavidxu	uc->uc_busy = 0;
305161678Sdavidxu	if (uc->uc_waiters)
306161678Sdavidxu		wakeup_one(uc);
307139257Sdavidxu}
308139257Sdavidxu
309161678Sdavidxu/*
310161678Sdavidxu * Lock a chain.
311161678Sdavidxu */
312139257Sdavidxustatic inline void
313139013Sdavidxuumtxq_lock(struct umtx_key *key)
314138224Sdavidxu{
315161678Sdavidxu	struct umtxq_chain *uc;
316161678Sdavidxu
317161678Sdavidxu	uc = umtxq_getchain(key);
318161678Sdavidxu	mtx_lock(&uc->uc_lock);
319138224Sdavidxu}
320138224Sdavidxu
321161678Sdavidxu/*
322161678Sdavidxu * Unlock a chain.
323161678Sdavidxu */
324138225Sdavidxustatic inline void
325139013Sdavidxuumtxq_unlock(struct umtx_key *key)
326138224Sdavidxu{
327161678Sdavidxu	struct umtxq_chain *uc;
328161678Sdavidxu
329161678Sdavidxu	uc = umtxq_getchain(key);
330161678Sdavidxu	mtx_unlock(&uc->uc_lock);
331138224Sdavidxu}
332138224Sdavidxu
333139013Sdavidxu/*
334139013Sdavidxu * Insert a thread onto the umtx queue.
335139013Sdavidxu */
336139013Sdavidxustatic inline void
337139013Sdavidxuumtxq_insert(struct umtx_q *uq)
338115765Sjeff{
339161678Sdavidxu	struct umtxq_chain *uc;
340139013Sdavidxu
341161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
342161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
343161678Sdavidxu	TAILQ_INSERT_TAIL(&uc->uc_queue, uq, uq_link);
344158718Sdavidxu	uq->uq_flags |= UQF_UMTXQ;
345139013Sdavidxu}
346139013Sdavidxu
347139013Sdavidxu/*
348139013Sdavidxu * Remove thread from the umtx queue.
349139013Sdavidxu */
350139013Sdavidxustatic inline void
351139013Sdavidxuumtxq_remove(struct umtx_q *uq)
352139013Sdavidxu{
353161678Sdavidxu	struct umtxq_chain *uc;
354161678Sdavidxu
355161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
356161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
357158718Sdavidxu	if (uq->uq_flags & UQF_UMTXQ) {
358161678Sdavidxu		TAILQ_REMOVE(&uc->uc_queue, uq, uq_link);
359158718Sdavidxu		uq->uq_flags &= ~UQF_UMTXQ;
360139013Sdavidxu	}
361139013Sdavidxu}
362139013Sdavidxu
363161678Sdavidxu/*
364161678Sdavidxu * Check if there are multiple waiters
365161678Sdavidxu */
366139013Sdavidxustatic int
367139013Sdavidxuumtxq_count(struct umtx_key *key)
368139013Sdavidxu{
369161678Sdavidxu	struct umtxq_chain *uc;
370115765Sjeff	struct umtx_q *uq;
371161678Sdavidxu	int count = 0;
372115765Sjeff
373161678Sdavidxu	uc = umtxq_getchain(key);
374161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
375161678Sdavidxu	TAILQ_FOREACH(uq, &uc->uc_queue, uq_link) {
376139013Sdavidxu		if (umtx_key_match(&uq->uq_key, key)) {
377139013Sdavidxu			if (++count > 1)
378139013Sdavidxu				break;
379139013Sdavidxu		}
380115765Sjeff	}
381139013Sdavidxu	return (count);
382115765Sjeff}
383115765Sjeff
384161678Sdavidxu/*
385161678Sdavidxu * Check if there are multiple PI waiters and returns first
386161678Sdavidxu * waiter.
387161678Sdavidxu */
388139257Sdavidxustatic int
389161678Sdavidxuumtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
390161678Sdavidxu{
391161678Sdavidxu	struct umtxq_chain *uc;
392161678Sdavidxu	struct umtx_q *uq;
393161678Sdavidxu	int count = 0;
394161678Sdavidxu
395161678Sdavidxu	*first = NULL;
396161678Sdavidxu	uc = umtxq_getchain(key);
397161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
398161678Sdavidxu	TAILQ_FOREACH(uq, &uc->uc_queue, uq_link) {
399161678Sdavidxu		if (umtx_key_match(&uq->uq_key, key)) {
400161678Sdavidxu			if (++count > 1)
401161678Sdavidxu				break;
402161678Sdavidxu			*first = uq;
403161678Sdavidxu		}
404161678Sdavidxu	}
405161678Sdavidxu	return (count);
406161678Sdavidxu}
407161678Sdavidxu
408161678Sdavidxu/*
409161678Sdavidxu * Wake up threads waiting on an userland object.
410161678Sdavidxu */
411161678Sdavidxustatic int
412139257Sdavidxuumtxq_signal(struct umtx_key *key, int n_wake)
413115765Sjeff{
414161678Sdavidxu	struct umtxq_chain *uc;
415139257Sdavidxu	struct umtx_q *uq, *next;
416161678Sdavidxu	int ret;
417115765Sjeff
418139257Sdavidxu	ret = 0;
419161678Sdavidxu	uc = umtxq_getchain(key);
420161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
421161678Sdavidxu	TAILQ_FOREACH_SAFE(uq, &uc->uc_queue, uq_link, next) {
422139013Sdavidxu		if (umtx_key_match(&uq->uq_key, key)) {
423139013Sdavidxu			umtxq_remove(uq);
424161678Sdavidxu			wakeup(uq);
425139257Sdavidxu			if (++ret >= n_wake)
426139257Sdavidxu				break;
427139013Sdavidxu		}
428139013Sdavidxu	}
429139257Sdavidxu	return (ret);
430138224Sdavidxu}
431138224Sdavidxu
432161678Sdavidxu/*
433161678Sdavidxu * Wake up specified thread.
434161678Sdavidxu */
435161678Sdavidxustatic inline void
436161678Sdavidxuumtxq_signal_thread(struct umtx_q *uq)
437161678Sdavidxu{
438161678Sdavidxu	struct umtxq_chain *uc;
439161678Sdavidxu
440161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
441161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
442161678Sdavidxu	umtxq_remove(uq);
443161678Sdavidxu	wakeup(uq);
444161678Sdavidxu}
445161678Sdavidxu
446161678Sdavidxu/*
447161678Sdavidxu * Put thread into sleep state, before sleeping, check if
448161678Sdavidxu * thread was removed from umtx queue.
449161678Sdavidxu */
450138224Sdavidxustatic inline int
451161678Sdavidxuumtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
452138224Sdavidxu{
453161678Sdavidxu	struct umtxq_chain *uc;
454161678Sdavidxu	int error;
455161678Sdavidxu
456161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
457161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
458161678Sdavidxu	if (!(uq->uq_flags & UQF_UMTXQ))
459161678Sdavidxu		return (0);
460161678Sdavidxu	error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
461139751Sdavidxu	if (error == EWOULDBLOCK)
462139751Sdavidxu		error = ETIMEDOUT;
463139751Sdavidxu	return (error);
464138224Sdavidxu}
465138224Sdavidxu
466161678Sdavidxu/*
467161678Sdavidxu * Convert userspace address into unique logical address.
468161678Sdavidxu */
469139013Sdavidxustatic int
470161678Sdavidxuumtx_key_get(void *addr, int type, int share, struct umtx_key *key)
471139013Sdavidxu{
472161678Sdavidxu	struct thread *td = curthread;
473139013Sdavidxu	vm_map_t map;
474139013Sdavidxu	vm_map_entry_t entry;
475139013Sdavidxu	vm_pindex_t pindex;
476139013Sdavidxu	vm_prot_t prot;
477139013Sdavidxu	boolean_t wired;
478139013Sdavidxu
479161678Sdavidxu	key->type = type;
480161678Sdavidxu	if (share == THREAD_SHARE) {
481161678Sdavidxu		key->shared = 0;
482161678Sdavidxu		key->info.private.vs = td->td_proc->p_vmspace;
483161678Sdavidxu		key->info.private.addr = (uintptr_t)addr;
484163677Sdavidxu	} else {
485163677Sdavidxu		MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
486161678Sdavidxu		map = &td->td_proc->p_vmspace->vm_map;
487161678Sdavidxu		if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
488161678Sdavidxu		    &entry, &key->info.shared.object, &pindex, &prot,
489161678Sdavidxu		    &wired) != KERN_SUCCESS) {
490161678Sdavidxu			return EFAULT;
491161678Sdavidxu		}
492161678Sdavidxu
493161678Sdavidxu		if ((share == PROCESS_SHARE) ||
494161678Sdavidxu		    (share == AUTO_SHARE &&
495161678Sdavidxu		     VM_INHERIT_SHARE == entry->inheritance)) {
496161678Sdavidxu			key->shared = 1;
497161678Sdavidxu			key->info.shared.offset = entry->offset + entry->start -
498161678Sdavidxu				(vm_offset_t)addr;
499161678Sdavidxu			vm_object_reference(key->info.shared.object);
500161678Sdavidxu		} else {
501161678Sdavidxu			key->shared = 0;
502161678Sdavidxu			key->info.private.vs = td->td_proc->p_vmspace;
503161678Sdavidxu			key->info.private.addr = (uintptr_t)addr;
504161678Sdavidxu		}
505161678Sdavidxu		vm_map_lookup_done(map, entry);
506139013Sdavidxu	}
507139013Sdavidxu
508161678Sdavidxu	umtxq_hash(key);
509139013Sdavidxu	return (0);
510139013Sdavidxu}
511139013Sdavidxu
512161678Sdavidxu/*
513161678Sdavidxu * Release key.
514161678Sdavidxu */
515139013Sdavidxustatic inline void
516139013Sdavidxuumtx_key_release(struct umtx_key *key)
517139013Sdavidxu{
518161678Sdavidxu	if (key->shared)
519139013Sdavidxu		vm_object_deallocate(key->info.shared.object);
520139013Sdavidxu}
521139013Sdavidxu
522161678Sdavidxu/*
523161678Sdavidxu * Lock a umtx object.
524161678Sdavidxu */
525139013Sdavidxustatic int
526163449Sdavidxu_do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo)
527112904Sjeff{
528143149Sdavidxu	struct umtx_q *uq;
529163449Sdavidxu	u_long owner;
530163449Sdavidxu	u_long old;
531138224Sdavidxu	int error = 0;
532112904Sjeff
533143149Sdavidxu	uq = td->td_umtxq;
534161678Sdavidxu
535112904Sjeff	/*
536161678Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
537112904Sjeff	 * can fault on any access.
538112904Sjeff	 */
539112904Sjeff	for (;;) {
540112904Sjeff		/*
541112904Sjeff		 * Try the uncontested case.  This should be done in userland.
542112904Sjeff		 */
543163449Sdavidxu		owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
544112904Sjeff
545138224Sdavidxu		/* The acquire succeeded. */
546138224Sdavidxu		if (owner == UMTX_UNOWNED)
547138224Sdavidxu			return (0);
548138224Sdavidxu
549115765Sjeff		/* The address was invalid. */
550115765Sjeff		if (owner == -1)
551115765Sjeff			return (EFAULT);
552115765Sjeff
553115765Sjeff		/* If no one owns it but it is contested try to acquire it. */
554115765Sjeff		if (owner == UMTX_CONTESTED) {
555163449Sdavidxu			owner = casuword(&umtx->u_owner,
556139013Sdavidxu			    UMTX_CONTESTED, id | UMTX_CONTESTED);
557115765Sjeff
558138224Sdavidxu			if (owner == UMTX_CONTESTED)
559138224Sdavidxu				return (0);
560138224Sdavidxu
561115765Sjeff			/* The address was invalid. */
562115765Sjeff			if (owner == -1)
563115765Sjeff				return (EFAULT);
564115765Sjeff
565115765Sjeff			/* If this failed the lock has changed, restart. */
566115765Sjeff			continue;
567112904Sjeff		}
568112904Sjeff
569138224Sdavidxu		/*
570138224Sdavidxu		 * If we caught a signal, we have retried and now
571138224Sdavidxu		 * exit immediately.
572138224Sdavidxu		 */
573161678Sdavidxu		if (error != 0)
574138224Sdavidxu			return (error);
575112904Sjeff
576161678Sdavidxu		if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
577161678Sdavidxu			AUTO_SHARE, &uq->uq_key)) != 0)
578161678Sdavidxu			return (error);
579161678Sdavidxu
580161678Sdavidxu		umtxq_lock(&uq->uq_key);
581161678Sdavidxu		umtxq_busy(&uq->uq_key);
582161678Sdavidxu		umtxq_insert(uq);
583161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
584161678Sdavidxu		umtxq_unlock(&uq->uq_key);
585161678Sdavidxu
586112904Sjeff		/*
587112904Sjeff		 * Set the contested bit so that a release in user space
588112904Sjeff		 * knows to use the system call for unlock.  If this fails
589112904Sjeff		 * either some one else has acquired the lock or it has been
590112904Sjeff		 * released.
591112904Sjeff		 */
592163449Sdavidxu		old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
593112904Sjeff
594112904Sjeff		/* The address was invalid. */
595112967Sjake		if (old == -1) {
596143149Sdavidxu			umtxq_lock(&uq->uq_key);
597143149Sdavidxu			umtxq_remove(uq);
598143149Sdavidxu			umtxq_unlock(&uq->uq_key);
599143149Sdavidxu			umtx_key_release(&uq->uq_key);
600115765Sjeff			return (EFAULT);
601112904Sjeff		}
602112904Sjeff
603112904Sjeff		/*
604115765Sjeff		 * We set the contested bit, sleep. Otherwise the lock changed
605117685Smtm		 * and we need to retry or we lost a race to the thread
606117685Smtm		 * unlocking the umtx.
607112904Sjeff		 */
608143149Sdavidxu		umtxq_lock(&uq->uq_key);
609161678Sdavidxu		if (old == owner)
610161678Sdavidxu			error = umtxq_sleep(uq, "umtx", timo);
611143149Sdavidxu		umtxq_remove(uq);
612143149Sdavidxu		umtxq_unlock(&uq->uq_key);
613143149Sdavidxu		umtx_key_release(&uq->uq_key);
614112904Sjeff	}
615117743Smtm
616117743Smtm	return (0);
617112904Sjeff}
618112904Sjeff
619161678Sdavidxu/*
620161678Sdavidxu * Lock a umtx object.
621161678Sdavidxu */
622139013Sdavidxustatic int
623163449Sdavidxudo_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
624140245Sdavidxu	struct timespec *timeout)
625112904Sjeff{
626140245Sdavidxu	struct timespec ts, ts2, ts3;
627139013Sdavidxu	struct timeval tv;
628140245Sdavidxu	int error;
629139013Sdavidxu
630140245Sdavidxu	if (timeout == NULL) {
631162536Sdavidxu		error = _do_lock_umtx(td, umtx, id, 0);
632162030Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
633162030Sdavidxu		if (error == EINTR)
634162030Sdavidxu			error = ERESTART;
635139013Sdavidxu	} else {
636140245Sdavidxu		getnanouptime(&ts);
637140245Sdavidxu		timespecadd(&ts, timeout);
638140245Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
639139013Sdavidxu		for (;;) {
640162536Sdavidxu			error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
641140245Sdavidxu			if (error != ETIMEDOUT)
642140245Sdavidxu				break;
643140245Sdavidxu			getnanouptime(&ts2);
644140245Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
645139751Sdavidxu				error = ETIMEDOUT;
646139013Sdavidxu				break;
647139013Sdavidxu			}
648140245Sdavidxu			ts3 = ts;
649140245Sdavidxu			timespecsub(&ts3, &ts2);
650140245Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
651139013Sdavidxu		}
652162030Sdavidxu		/* Timed-locking is not restarted. */
653162030Sdavidxu		if (error == ERESTART)
654162030Sdavidxu			error = EINTR;
655139013Sdavidxu	}
656139013Sdavidxu	return (error);
657139013Sdavidxu}
658139013Sdavidxu
659161678Sdavidxu/*
660161678Sdavidxu * Unlock a umtx object.
661161678Sdavidxu */
662139013Sdavidxustatic int
663163449Sdavidxudo_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
664139013Sdavidxu{
665139013Sdavidxu	struct umtx_key key;
666163449Sdavidxu	u_long owner;
667163449Sdavidxu	u_long old;
668139257Sdavidxu	int error;
669139257Sdavidxu	int count;
670112904Sjeff
671112904Sjeff	/*
672112904Sjeff	 * Make sure we own this mtx.
673112904Sjeff	 */
674163449Sdavidxu	owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
675161678Sdavidxu	if (owner == -1)
676115765Sjeff		return (EFAULT);
677115765Sjeff
678139013Sdavidxu	if ((owner & ~UMTX_CONTESTED) != id)
679115765Sjeff		return (EPERM);
680112904Sjeff
681161678Sdavidxu	/* This should be done in userland */
682161678Sdavidxu	if ((owner & UMTX_CONTESTED) == 0) {
683163449Sdavidxu		old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
684161678Sdavidxu		if (old == -1)
685161678Sdavidxu			return (EFAULT);
686161678Sdavidxu		if (old == owner)
687161678Sdavidxu			return (0);
688161855Sdavidxu		owner = old;
689161678Sdavidxu	}
690161678Sdavidxu
691117685Smtm	/* We should only ever be in here for contested locks */
692161678Sdavidxu	if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
693161678Sdavidxu		&key)) != 0)
694139257Sdavidxu		return (error);
695139257Sdavidxu
696139257Sdavidxu	umtxq_lock(&key);
697139257Sdavidxu	umtxq_busy(&key);
698139257Sdavidxu	count = umtxq_count(&key);
699139257Sdavidxu	umtxq_unlock(&key);
700139257Sdavidxu
701117743Smtm	/*
702117743Smtm	 * When unlocking the umtx, it must be marked as unowned if
703117743Smtm	 * there is zero or one thread only waiting for it.
704117743Smtm	 * Otherwise, it must be marked as contested.
705117743Smtm	 */
706163449Sdavidxu	old = casuword(&umtx->u_owner, owner,
707163449Sdavidxu		count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
708139257Sdavidxu	umtxq_lock(&key);
709161678Sdavidxu	umtxq_signal(&key,1);
710139257Sdavidxu	umtxq_unbusy(&key);
711139257Sdavidxu	umtxq_unlock(&key);
712139257Sdavidxu	umtx_key_release(&key);
713115765Sjeff	if (old == -1)
714115765Sjeff		return (EFAULT);
715138224Sdavidxu	if (old != owner)
716138224Sdavidxu		return (EINVAL);
717115765Sjeff	return (0);
718112904Sjeff}
719139013Sdavidxu
720162536Sdavidxu#ifdef COMPAT_IA32
721162536Sdavidxu
722161678Sdavidxu/*
723162536Sdavidxu * Lock a umtx object.
724162536Sdavidxu */
725162536Sdavidxustatic int
726162536Sdavidxu_do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
727162536Sdavidxu{
728162536Sdavidxu	struct umtx_q *uq;
729162536Sdavidxu	uint32_t owner;
730162536Sdavidxu	uint32_t old;
731162536Sdavidxu	int error = 0;
732162536Sdavidxu
733162536Sdavidxu	uq = td->td_umtxq;
734162536Sdavidxu
735162536Sdavidxu	/*
736162536Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
737162536Sdavidxu	 * can fault on any access.
738162536Sdavidxu	 */
739162536Sdavidxu	for (;;) {
740162536Sdavidxu		/*
741162536Sdavidxu		 * Try the uncontested case.  This should be done in userland.
742162536Sdavidxu		 */
743162536Sdavidxu		owner = casuword32(m, UMUTEX_UNOWNED, id);
744162536Sdavidxu
745162536Sdavidxu		/* The acquire succeeded. */
746162536Sdavidxu		if (owner == UMUTEX_UNOWNED)
747162536Sdavidxu			return (0);
748162536Sdavidxu
749162536Sdavidxu		/* The address was invalid. */
750162536Sdavidxu		if (owner == -1)
751162536Sdavidxu			return (EFAULT);
752162536Sdavidxu
753162536Sdavidxu		/* If no one owns it but it is contested try to acquire it. */
754162536Sdavidxu		if (owner == UMUTEX_CONTESTED) {
755162536Sdavidxu			owner = casuword32(m,
756162536Sdavidxu			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
757162536Sdavidxu			if (owner == UMUTEX_CONTESTED)
758162536Sdavidxu				return (0);
759162536Sdavidxu
760162536Sdavidxu			/* The address was invalid. */
761162536Sdavidxu			if (owner == -1)
762162536Sdavidxu				return (EFAULT);
763162536Sdavidxu
764162536Sdavidxu			/* If this failed the lock has changed, restart. */
765162536Sdavidxu			continue;
766162536Sdavidxu		}
767162536Sdavidxu
768162536Sdavidxu		/*
769162536Sdavidxu		 * If we caught a signal, we have retried and now
770162536Sdavidxu		 * exit immediately.
771162536Sdavidxu		 */
772162536Sdavidxu		if (error != 0)
773162536Sdavidxu			return (error);
774162536Sdavidxu
775162536Sdavidxu		if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
776162536Sdavidxu			AUTO_SHARE, &uq->uq_key)) != 0)
777162536Sdavidxu			return (error);
778162536Sdavidxu
779162536Sdavidxu		umtxq_lock(&uq->uq_key);
780162536Sdavidxu		umtxq_busy(&uq->uq_key);
781162536Sdavidxu		umtxq_insert(uq);
782162536Sdavidxu		umtxq_unbusy(&uq->uq_key);
783162536Sdavidxu		umtxq_unlock(&uq->uq_key);
784162536Sdavidxu
785162536Sdavidxu		/*
786162536Sdavidxu		 * Set the contested bit so that a release in user space
787162536Sdavidxu		 * knows to use the system call for unlock.  If this fails
788162536Sdavidxu		 * either some one else has acquired the lock or it has been
789162536Sdavidxu		 * released.
790162536Sdavidxu		 */
791162536Sdavidxu		old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
792162536Sdavidxu
793162536Sdavidxu		/* The address was invalid. */
794162536Sdavidxu		if (old == -1) {
795162536Sdavidxu			umtxq_lock(&uq->uq_key);
796162536Sdavidxu			umtxq_remove(uq);
797162536Sdavidxu			umtxq_unlock(&uq->uq_key);
798162536Sdavidxu			umtx_key_release(&uq->uq_key);
799162536Sdavidxu			return (EFAULT);
800162536Sdavidxu		}
801162536Sdavidxu
802162536Sdavidxu		/*
803162536Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
804162536Sdavidxu		 * and we need to retry or we lost a race to the thread
805162536Sdavidxu		 * unlocking the umtx.
806162536Sdavidxu		 */
807162536Sdavidxu		umtxq_lock(&uq->uq_key);
808162536Sdavidxu		if (old == owner)
809162536Sdavidxu			error = umtxq_sleep(uq, "umtx", timo);
810162536Sdavidxu		umtxq_remove(uq);
811162536Sdavidxu		umtxq_unlock(&uq->uq_key);
812162536Sdavidxu		umtx_key_release(&uq->uq_key);
813162536Sdavidxu	}
814162536Sdavidxu
815162536Sdavidxu	return (0);
816162536Sdavidxu}
817162536Sdavidxu
818162536Sdavidxu/*
819162536Sdavidxu * Lock a umtx object.
820162536Sdavidxu */
821162536Sdavidxustatic int
822162536Sdavidxudo_lock_umtx32(struct thread *td, void *m, uint32_t id,
823162536Sdavidxu	struct timespec *timeout)
824162536Sdavidxu{
825162536Sdavidxu	struct timespec ts, ts2, ts3;
826162536Sdavidxu	struct timeval tv;
827162536Sdavidxu	int error;
828162536Sdavidxu
829162536Sdavidxu	if (timeout == NULL) {
830162536Sdavidxu		error = _do_lock_umtx32(td, m, id, 0);
831162536Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
832162536Sdavidxu		if (error == EINTR)
833162536Sdavidxu			error = ERESTART;
834162536Sdavidxu	} else {
835162536Sdavidxu		getnanouptime(&ts);
836162536Sdavidxu		timespecadd(&ts, timeout);
837162536Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
838162536Sdavidxu		for (;;) {
839162536Sdavidxu			error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
840162536Sdavidxu			if (error != ETIMEDOUT)
841162536Sdavidxu				break;
842162536Sdavidxu			getnanouptime(&ts2);
843162536Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
844162536Sdavidxu				error = ETIMEDOUT;
845162536Sdavidxu				break;
846162536Sdavidxu			}
847162536Sdavidxu			ts3 = ts;
848162536Sdavidxu			timespecsub(&ts3, &ts2);
849162536Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
850162536Sdavidxu		}
851162536Sdavidxu		/* Timed-locking is not restarted. */
852162536Sdavidxu		if (error == ERESTART)
853162536Sdavidxu			error = EINTR;
854162536Sdavidxu	}
855162536Sdavidxu	return (error);
856162536Sdavidxu}
857162536Sdavidxu
858162536Sdavidxu/*
859162536Sdavidxu * Unlock a umtx object.
860162536Sdavidxu */
861162536Sdavidxustatic int
862162536Sdavidxudo_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
863162536Sdavidxu{
864162536Sdavidxu	struct umtx_key key;
865162536Sdavidxu	uint32_t owner;
866162536Sdavidxu	uint32_t old;
867162536Sdavidxu	int error;
868162536Sdavidxu	int count;
869162536Sdavidxu
870162536Sdavidxu	/*
871162536Sdavidxu	 * Make sure we own this mtx.
872162536Sdavidxu	 */
873162536Sdavidxu	owner = fuword32(m);
874162536Sdavidxu	if (owner == -1)
875162536Sdavidxu		return (EFAULT);
876162536Sdavidxu
877162536Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
878162536Sdavidxu		return (EPERM);
879162536Sdavidxu
880162536Sdavidxu	/* This should be done in userland */
881162536Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
882162536Sdavidxu		old = casuword32(m, owner, UMUTEX_UNOWNED);
883162536Sdavidxu		if (old == -1)
884162536Sdavidxu			return (EFAULT);
885162536Sdavidxu		if (old == owner)
886162536Sdavidxu			return (0);
887162536Sdavidxu		owner = old;
888162536Sdavidxu	}
889162536Sdavidxu
890162536Sdavidxu	/* We should only ever be in here for contested locks */
891162536Sdavidxu	if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
892162536Sdavidxu		&key)) != 0)
893162536Sdavidxu		return (error);
894162536Sdavidxu
895162536Sdavidxu	umtxq_lock(&key);
896162536Sdavidxu	umtxq_busy(&key);
897162536Sdavidxu	count = umtxq_count(&key);
898162536Sdavidxu	umtxq_unlock(&key);
899162536Sdavidxu
900162536Sdavidxu	/*
901162536Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
902162536Sdavidxu	 * there is zero or one thread only waiting for it.
903162536Sdavidxu	 * Otherwise, it must be marked as contested.
904162536Sdavidxu	 */
905162536Sdavidxu	old = casuword32(m, owner,
906162536Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
907162536Sdavidxu	umtxq_lock(&key);
908162536Sdavidxu	umtxq_signal(&key,1);
909162536Sdavidxu	umtxq_unbusy(&key);
910162536Sdavidxu	umtxq_unlock(&key);
911162536Sdavidxu	umtx_key_release(&key);
912162536Sdavidxu	if (old == -1)
913162536Sdavidxu		return (EFAULT);
914162536Sdavidxu	if (old != owner)
915162536Sdavidxu		return (EINVAL);
916162536Sdavidxu	return (0);
917162536Sdavidxu}
918162536Sdavidxu#endif
919162536Sdavidxu
920162536Sdavidxu/*
921161678Sdavidxu * Fetch and compare value, sleep on the address if value is not changed.
922161678Sdavidxu */
923139013Sdavidxustatic int
924163449Sdavidxudo_wait(struct thread *td, void *addr, u_long id,
925162536Sdavidxu	struct timespec *timeout, int compat32)
926139013Sdavidxu{
927143149Sdavidxu	struct umtx_q *uq;
928140245Sdavidxu	struct timespec ts, ts2, ts3;
929139013Sdavidxu	struct timeval tv;
930163449Sdavidxu	u_long tmp;
931140245Sdavidxu	int error = 0;
932139013Sdavidxu
933143149Sdavidxu	uq = td->td_umtxq;
934162536Sdavidxu	if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, AUTO_SHARE,
935161678Sdavidxu	    &uq->uq_key)) != 0)
936139013Sdavidxu		return (error);
937161678Sdavidxu
938161678Sdavidxu	umtxq_lock(&uq->uq_key);
939161678Sdavidxu	umtxq_insert(uq);
940161678Sdavidxu	umtxq_unlock(&uq->uq_key);
941162536Sdavidxu	if (compat32 == 0)
942162536Sdavidxu		tmp = fuword(addr);
943162536Sdavidxu        else
944162536Sdavidxu		tmp = fuword32(addr);
945139427Sdavidxu	if (tmp != id) {
946143149Sdavidxu		umtxq_lock(&uq->uq_key);
947143149Sdavidxu		umtxq_remove(uq);
948143149Sdavidxu		umtxq_unlock(&uq->uq_key);
949140245Sdavidxu	} else if (timeout == NULL) {
950143149Sdavidxu		umtxq_lock(&uq->uq_key);
951161678Sdavidxu		error = umtxq_sleep(uq, "ucond", 0);
952161678Sdavidxu		umtxq_remove(uq);
953143149Sdavidxu		umtxq_unlock(&uq->uq_key);
954139013Sdavidxu	} else {
955140245Sdavidxu		getnanouptime(&ts);
956140245Sdavidxu		timespecadd(&ts, timeout);
957140245Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
958161678Sdavidxu		umtxq_lock(&uq->uq_key);
959139013Sdavidxu		for (;;) {
960161678Sdavidxu			error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
961161678Sdavidxu			if (!(uq->uq_flags & UQF_UMTXQ))
962161678Sdavidxu				break;
963140245Sdavidxu			if (error != ETIMEDOUT)
964140245Sdavidxu				break;
965161678Sdavidxu			umtxq_unlock(&uq->uq_key);
966140245Sdavidxu			getnanouptime(&ts2);
967140245Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
968139751Sdavidxu				error = ETIMEDOUT;
969161678Sdavidxu				umtxq_lock(&uq->uq_key);
970139013Sdavidxu				break;
971139013Sdavidxu			}
972140245Sdavidxu			ts3 = ts;
973140245Sdavidxu			timespecsub(&ts3, &ts2);
974140245Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
975161678Sdavidxu			umtxq_lock(&uq->uq_key);
976139013Sdavidxu		}
977143149Sdavidxu		umtxq_remove(uq);
978143149Sdavidxu		umtxq_unlock(&uq->uq_key);
979139013Sdavidxu	}
980143149Sdavidxu	umtx_key_release(&uq->uq_key);
981139257Sdavidxu	if (error == ERESTART)
982139257Sdavidxu		error = EINTR;
983139013Sdavidxu	return (error);
984139013Sdavidxu}
985139013Sdavidxu
986161678Sdavidxu/*
987161678Sdavidxu * Wake up threads sleeping on the specified address.
988161678Sdavidxu */
989151692Sdavidxuint
990151692Sdavidxukern_umtx_wake(struct thread *td, void *uaddr, int n_wake)
991139013Sdavidxu{
992139013Sdavidxu	struct umtx_key key;
993139257Sdavidxu	int ret;
994139013Sdavidxu
995161678Sdavidxu	if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, AUTO_SHARE,
996161678Sdavidxu	   &key)) != 0)
997139257Sdavidxu		return (ret);
998139258Sdavidxu	umtxq_lock(&key);
999139257Sdavidxu	ret = umtxq_signal(&key, n_wake);
1000139258Sdavidxu	umtxq_unlock(&key);
1001139257Sdavidxu	umtx_key_release(&key);
1002139013Sdavidxu	return (0);
1003139013Sdavidxu}
1004139013Sdavidxu
1005161678Sdavidxu/*
1006161678Sdavidxu * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1007161678Sdavidxu */
1008161678Sdavidxustatic int
1009161678Sdavidxu_do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1010161678Sdavidxu	int try)
1011161678Sdavidxu{
1012161678Sdavidxu	struct umtx_q *uq;
1013161678Sdavidxu	uint32_t owner, old, id;
1014161678Sdavidxu	int error = 0;
1015161678Sdavidxu
1016161678Sdavidxu	id = td->td_tid;
1017161678Sdavidxu	uq = td->td_umtxq;
1018161678Sdavidxu
1019161678Sdavidxu	/*
1020161678Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
1021161678Sdavidxu	 * can fault on any access.
1022161678Sdavidxu	 */
1023161678Sdavidxu	for (;;) {
1024161678Sdavidxu		/*
1025161678Sdavidxu		 * Try the uncontested case.  This should be done in userland.
1026161678Sdavidxu		 */
1027161678Sdavidxu		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1028161678Sdavidxu
1029161678Sdavidxu		/* The acquire succeeded. */
1030161678Sdavidxu		if (owner == UMUTEX_UNOWNED)
1031161678Sdavidxu			return (0);
1032161678Sdavidxu
1033161678Sdavidxu		/* The address was invalid. */
1034161678Sdavidxu		if (owner == -1)
1035161678Sdavidxu			return (EFAULT);
1036161678Sdavidxu
1037161678Sdavidxu		/* If no one owns it but it is contested try to acquire it. */
1038161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
1039161678Sdavidxu			owner = casuword32(&m->m_owner,
1040161678Sdavidxu			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1041161678Sdavidxu
1042161678Sdavidxu			if (owner == UMUTEX_CONTESTED)
1043161678Sdavidxu				return (0);
1044161678Sdavidxu
1045161678Sdavidxu			/* The address was invalid. */
1046161678Sdavidxu			if (owner == -1)
1047161678Sdavidxu				return (EFAULT);
1048161678Sdavidxu
1049161678Sdavidxu			/* If this failed the lock has changed, restart. */
1050161678Sdavidxu			continue;
1051161678Sdavidxu		}
1052161678Sdavidxu
1053161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1054161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id)
1055161678Sdavidxu			return (EDEADLK);
1056161678Sdavidxu
1057161678Sdavidxu		if (try != 0)
1058161678Sdavidxu			return (EBUSY);
1059161678Sdavidxu
1060161678Sdavidxu		/*
1061161678Sdavidxu		 * If we caught a signal, we have retried and now
1062161678Sdavidxu		 * exit immediately.
1063161678Sdavidxu		 */
1064161678Sdavidxu		if (error != 0)
1065161678Sdavidxu			return (error);
1066161678Sdavidxu
1067161678Sdavidxu		if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1068161678Sdavidxu		    GET_SHARE(flags), &uq->uq_key)) != 0)
1069161678Sdavidxu			return (error);
1070161678Sdavidxu
1071161678Sdavidxu		umtxq_lock(&uq->uq_key);
1072161678Sdavidxu		umtxq_busy(&uq->uq_key);
1073161678Sdavidxu		umtxq_insert(uq);
1074161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
1075161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1076161678Sdavidxu
1077161678Sdavidxu		/*
1078161678Sdavidxu		 * Set the contested bit so that a release in user space
1079161678Sdavidxu		 * knows to use the system call for unlock.  If this fails
1080161678Sdavidxu		 * either some one else has acquired the lock or it has been
1081161678Sdavidxu		 * released.
1082161678Sdavidxu		 */
1083161678Sdavidxu		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1084161678Sdavidxu
1085161678Sdavidxu		/* The address was invalid. */
1086161678Sdavidxu		if (old == -1) {
1087161678Sdavidxu			umtxq_lock(&uq->uq_key);
1088161678Sdavidxu			umtxq_remove(uq);
1089161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1090161678Sdavidxu			umtx_key_release(&uq->uq_key);
1091161678Sdavidxu			return (EFAULT);
1092161678Sdavidxu		}
1093161678Sdavidxu
1094161678Sdavidxu		/*
1095161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
1096161678Sdavidxu		 * and we need to retry or we lost a race to the thread
1097161678Sdavidxu		 * unlocking the umtx.
1098161678Sdavidxu		 */
1099161678Sdavidxu		umtxq_lock(&uq->uq_key);
1100161678Sdavidxu		if (old == owner)
1101161678Sdavidxu			error = umtxq_sleep(uq, "umtxn", timo);
1102161678Sdavidxu		umtxq_remove(uq);
1103161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1104161678Sdavidxu		umtx_key_release(&uq->uq_key);
1105161678Sdavidxu	}
1106161678Sdavidxu
1107161678Sdavidxu	return (0);
1108161678Sdavidxu}
1109161678Sdavidxu
1110161678Sdavidxu/*
1111161678Sdavidxu * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1112161678Sdavidxu */
1113161678Sdavidxu/*
1114161678Sdavidxu * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1115161678Sdavidxu */
1116161678Sdavidxustatic int
1117161678Sdavidxudo_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
1118161678Sdavidxu{
1119161678Sdavidxu	struct umtx_key key;
1120161678Sdavidxu	uint32_t owner, old, id;
1121161678Sdavidxu	int error;
1122161678Sdavidxu	int count;
1123161678Sdavidxu
1124161678Sdavidxu	id = td->td_tid;
1125161678Sdavidxu	/*
1126161678Sdavidxu	 * Make sure we own this mtx.
1127161678Sdavidxu	 */
1128163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1129161678Sdavidxu	if (owner == -1)
1130161678Sdavidxu		return (EFAULT);
1131161678Sdavidxu
1132161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
1133161678Sdavidxu		return (EPERM);
1134161678Sdavidxu
1135161678Sdavidxu	/* This should be done in userland */
1136161678Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
1137161678Sdavidxu		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1138161678Sdavidxu		if (old == -1)
1139161678Sdavidxu			return (EFAULT);
1140161678Sdavidxu		if (old == owner)
1141161678Sdavidxu			return (0);
1142161855Sdavidxu		owner = old;
1143161678Sdavidxu	}
1144161678Sdavidxu
1145161678Sdavidxu	/* We should only ever be in here for contested locks */
1146161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1147161678Sdavidxu	    &key)) != 0)
1148161678Sdavidxu		return (error);
1149161678Sdavidxu
1150161678Sdavidxu	umtxq_lock(&key);
1151161678Sdavidxu	umtxq_busy(&key);
1152161678Sdavidxu	count = umtxq_count(&key);
1153161678Sdavidxu	umtxq_unlock(&key);
1154161678Sdavidxu
1155161678Sdavidxu	/*
1156161678Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
1157161678Sdavidxu	 * there is zero or one thread only waiting for it.
1158161678Sdavidxu	 * Otherwise, it must be marked as contested.
1159161678Sdavidxu	 */
1160161678Sdavidxu	old = casuword32(&m->m_owner, owner,
1161161678Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1162161678Sdavidxu	umtxq_lock(&key);
1163161678Sdavidxu	umtxq_signal(&key,1);
1164161678Sdavidxu	umtxq_unbusy(&key);
1165161678Sdavidxu	umtxq_unlock(&key);
1166161678Sdavidxu	umtx_key_release(&key);
1167161678Sdavidxu	if (old == -1)
1168161678Sdavidxu		return (EFAULT);
1169161678Sdavidxu	if (old != owner)
1170161678Sdavidxu		return (EINVAL);
1171161678Sdavidxu	return (0);
1172161678Sdavidxu}
1173161678Sdavidxu
1174161678Sdavidxustatic inline struct umtx_pi *
1175163697Sdavidxuumtx_pi_alloc(int flags)
1176161678Sdavidxu{
1177161678Sdavidxu	struct umtx_pi *pi;
1178161678Sdavidxu
1179163697Sdavidxu	pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1180161678Sdavidxu	TAILQ_INIT(&pi->pi_blocked);
1181161678Sdavidxu	atomic_add_int(&umtx_pi_allocated, 1);
1182161678Sdavidxu	return (pi);
1183161678Sdavidxu}
1184161678Sdavidxu
1185161678Sdavidxustatic inline void
1186161678Sdavidxuumtx_pi_free(struct umtx_pi *pi)
1187161678Sdavidxu{
1188161678Sdavidxu	uma_zfree(umtx_pi_zone, pi);
1189161678Sdavidxu	atomic_add_int(&umtx_pi_allocated, -1);
1190161678Sdavidxu}
1191161678Sdavidxu
1192161678Sdavidxu/*
1193161678Sdavidxu * Adjust the thread's position on a pi_state after its priority has been
1194161678Sdavidxu * changed.
1195161678Sdavidxu */
1196161678Sdavidxustatic int
1197161678Sdavidxuumtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1198161678Sdavidxu{
1199161678Sdavidxu	struct umtx_q *uq, *uq1, *uq2;
1200161678Sdavidxu	struct thread *td1;
1201161678Sdavidxu
1202161678Sdavidxu	mtx_assert(&sched_lock, MA_OWNED);
1203161678Sdavidxu	if (pi == NULL)
1204161678Sdavidxu		return (0);
1205161678Sdavidxu
1206161678Sdavidxu	uq = td->td_umtxq;
1207161678Sdavidxu
1208161678Sdavidxu	/*
1209161678Sdavidxu	 * Check if the thread needs to be moved on the blocked chain.
1210161678Sdavidxu	 * It needs to be moved if either its priority is lower than
1211161678Sdavidxu	 * the previous thread or higher than the next thread.
1212161678Sdavidxu	 */
1213161678Sdavidxu	uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1214161678Sdavidxu	uq2 = TAILQ_NEXT(uq, uq_lockq);
1215161678Sdavidxu	if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1216161678Sdavidxu	    (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1217161678Sdavidxu		/*
1218161678Sdavidxu		 * Remove thread from blocked chain and determine where
1219161678Sdavidxu		 * it should be moved to.
1220161678Sdavidxu		 */
1221161678Sdavidxu		TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1222161678Sdavidxu		TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1223161678Sdavidxu			td1 = uq1->uq_thread;
1224161678Sdavidxu			MPASS(td1->td_proc->p_magic == P_MAGIC);
1225161678Sdavidxu			if (UPRI(td1) > UPRI(td))
1226161678Sdavidxu				break;
1227161678Sdavidxu		}
1228161678Sdavidxu
1229161678Sdavidxu		if (uq1 == NULL)
1230161678Sdavidxu			TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1231161678Sdavidxu		else
1232161678Sdavidxu			TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1233161678Sdavidxu	}
1234161678Sdavidxu	return (1);
1235161678Sdavidxu}
1236161678Sdavidxu
1237161678Sdavidxu/*
1238161678Sdavidxu * Propagate priority when a thread is blocked on POSIX
1239161678Sdavidxu * PI mutex.
1240161678Sdavidxu */
1241161678Sdavidxustatic void
1242161678Sdavidxuumtx_propagate_priority(struct thread *td)
1243161678Sdavidxu{
1244161678Sdavidxu	struct umtx_q *uq;
1245161678Sdavidxu	struct umtx_pi *pi;
1246161678Sdavidxu	int pri;
1247161678Sdavidxu
1248161678Sdavidxu	mtx_assert(&sched_lock, MA_OWNED);
1249161678Sdavidxu	pri = UPRI(td);
1250161678Sdavidxu	uq = td->td_umtxq;
1251161678Sdavidxu	pi = uq->uq_pi_blocked;
1252161678Sdavidxu	if (pi == NULL)
1253161678Sdavidxu		return;
1254161678Sdavidxu
1255161678Sdavidxu	for (;;) {
1256161678Sdavidxu		td = pi->pi_owner;
1257161678Sdavidxu		if (td == NULL)
1258161678Sdavidxu			return;
1259161678Sdavidxu
1260161678Sdavidxu		MPASS(td->td_proc != NULL);
1261161678Sdavidxu		MPASS(td->td_proc->p_magic == P_MAGIC);
1262161678Sdavidxu
1263161678Sdavidxu		if (UPRI(td) <= pri)
1264161678Sdavidxu			return;
1265161678Sdavidxu
1266161678Sdavidxu		sched_lend_user_prio(td, pri);
1267161678Sdavidxu
1268161678Sdavidxu		/*
1269161678Sdavidxu		 * Pick up the lock that td is blocked on.
1270161678Sdavidxu		 */
1271161678Sdavidxu		uq = td->td_umtxq;
1272161678Sdavidxu		pi = uq->uq_pi_blocked;
1273161678Sdavidxu		/* Resort td on the list if needed. */
1274161678Sdavidxu		if (!umtx_pi_adjust_thread(pi, td))
1275161678Sdavidxu			break;
1276161678Sdavidxu	}
1277161678Sdavidxu}
1278161678Sdavidxu
1279161678Sdavidxu/*
1280161678Sdavidxu * Unpropagate priority for a PI mutex when a thread blocked on
1281161678Sdavidxu * it is interrupted by signal or resumed by others.
1282161678Sdavidxu */
1283161678Sdavidxustatic void
1284161678Sdavidxuumtx_unpropagate_priority(struct umtx_pi *pi)
1285161678Sdavidxu{
1286161678Sdavidxu	struct umtx_q *uq, *uq_owner;
1287161678Sdavidxu	struct umtx_pi *pi2;
1288161678Sdavidxu	int pri;
1289161678Sdavidxu
1290161678Sdavidxu	mtx_assert(&sched_lock, MA_OWNED);
1291161678Sdavidxu
1292161678Sdavidxu	while (pi != NULL && pi->pi_owner != NULL) {
1293161678Sdavidxu		pri = PRI_MAX;
1294161678Sdavidxu		uq_owner = pi->pi_owner->td_umtxq;
1295161678Sdavidxu
1296161678Sdavidxu		TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1297161678Sdavidxu			uq = TAILQ_FIRST(&pi2->pi_blocked);
1298161678Sdavidxu			if (uq != NULL) {
1299161678Sdavidxu				if (pri > UPRI(uq->uq_thread))
1300161678Sdavidxu					pri = UPRI(uq->uq_thread);
1301161678Sdavidxu			}
1302161678Sdavidxu		}
1303161678Sdavidxu
1304161678Sdavidxu		if (pri > uq_owner->uq_inherited_pri)
1305161678Sdavidxu			pri = uq_owner->uq_inherited_pri;
1306161678Sdavidxu		sched_unlend_user_prio(pi->pi_owner, pri);
1307161678Sdavidxu		pi = uq_owner->uq_pi_blocked;
1308161678Sdavidxu	}
1309161678Sdavidxu}
1310161678Sdavidxu
1311161678Sdavidxu/*
1312161678Sdavidxu * Insert a PI mutex into owned list.
1313161678Sdavidxu */
1314161678Sdavidxustatic void
1315161678Sdavidxuumtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1316161678Sdavidxu{
1317161678Sdavidxu	struct umtx_q *uq_owner;
1318161678Sdavidxu
1319161678Sdavidxu	uq_owner = owner->td_umtxq;
1320161678Sdavidxu	mtx_assert(&sched_lock, MA_OWNED);
1321161678Sdavidxu	if (pi->pi_owner != NULL)
1322161678Sdavidxu		panic("pi_ower != NULL");
1323161678Sdavidxu	pi->pi_owner = owner;
1324161678Sdavidxu	TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1325161678Sdavidxu}
1326161678Sdavidxu
1327161678Sdavidxu/*
1328161678Sdavidxu * Claim ownership of a PI mutex.
1329161678Sdavidxu */
1330161678Sdavidxustatic int
1331161678Sdavidxuumtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1332161678Sdavidxu{
1333161678Sdavidxu	struct umtx_q *uq, *uq_owner;
1334161678Sdavidxu
1335161678Sdavidxu	uq_owner = owner->td_umtxq;
1336161678Sdavidxu	mtx_lock_spin(&sched_lock);
1337161678Sdavidxu	if (pi->pi_owner == owner) {
1338161678Sdavidxu		mtx_unlock_spin(&sched_lock);
1339161678Sdavidxu		return (0);
1340161678Sdavidxu	}
1341161678Sdavidxu
1342161678Sdavidxu	if (pi->pi_owner != NULL) {
1343161678Sdavidxu		/*
1344161678Sdavidxu		 * userland may have already messed the mutex, sigh.
1345161678Sdavidxu		 */
1346161678Sdavidxu		mtx_unlock_spin(&sched_lock);
1347161678Sdavidxu		return (EPERM);
1348161678Sdavidxu	}
1349161678Sdavidxu	umtx_pi_setowner(pi, owner);
1350161678Sdavidxu	uq = TAILQ_FIRST(&pi->pi_blocked);
1351161678Sdavidxu	if (uq != NULL) {
1352161678Sdavidxu		int pri;
1353161678Sdavidxu
1354161678Sdavidxu		pri = UPRI(uq->uq_thread);
1355161678Sdavidxu		if (pri < UPRI(owner))
1356161678Sdavidxu			sched_lend_user_prio(owner, pri);
1357161678Sdavidxu	}
1358161678Sdavidxu	mtx_unlock_spin(&sched_lock);
1359161678Sdavidxu	return (0);
1360161678Sdavidxu}
1361161678Sdavidxu
1362161678Sdavidxu/*
1363161678Sdavidxu * Adjust a thread's order position in its blocked PI mutex,
1364161678Sdavidxu * this may result new priority propagating process.
1365161678Sdavidxu */
1366161599Sdavidxuvoid
1367161678Sdavidxuumtx_pi_adjust(struct thread *td, u_char oldpri)
1368161599Sdavidxu{
1369161678Sdavidxu	struct umtx_q *uq;
1370161678Sdavidxu	struct umtx_pi *pi;
1371161678Sdavidxu
1372161678Sdavidxu	uq = td->td_umtxq;
1373161678Sdavidxu
1374161678Sdavidxu	mtx_assert(&sched_lock, MA_OWNED);
1375161678Sdavidxu	MPASS(TD_ON_UPILOCK(td));
1376161678Sdavidxu
1377161678Sdavidxu	/*
1378161678Sdavidxu	 * Pick up the lock that td is blocked on.
1379161678Sdavidxu	 */
1380161678Sdavidxu	pi = uq->uq_pi_blocked;
1381161678Sdavidxu	MPASS(pi != NULL);
1382161678Sdavidxu
1383161678Sdavidxu	/* Resort the turnstile on the list. */
1384161678Sdavidxu	if (!umtx_pi_adjust_thread(pi, td))
1385161678Sdavidxu		return;
1386161678Sdavidxu
1387161678Sdavidxu	/*
1388161678Sdavidxu	 * If our priority was lowered and we are at the head of the
1389161678Sdavidxu	 * turnstile, then propagate our new priority up the chain.
1390161678Sdavidxu	 */
1391161678Sdavidxu	if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri)
1392161678Sdavidxu		umtx_propagate_priority(td);
1393161599Sdavidxu}
1394161599Sdavidxu
1395161678Sdavidxu/*
1396161678Sdavidxu * Sleep on a PI mutex.
1397161678Sdavidxu */
1398161678Sdavidxustatic int
1399161678Sdavidxuumtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1400161678Sdavidxu	uint32_t owner, const char *wmesg, int timo)
1401161678Sdavidxu{
1402161678Sdavidxu	struct umtxq_chain *uc;
1403161678Sdavidxu	struct thread *td, *td1;
1404161678Sdavidxu	struct umtx_q *uq1;
1405161678Sdavidxu	int pri;
1406161678Sdavidxu	int error = 0;
1407161678Sdavidxu
1408161678Sdavidxu	td = uq->uq_thread;
1409161678Sdavidxu	KASSERT(td == curthread, ("inconsistent uq_thread"));
1410161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
1411161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1412161678Sdavidxu	umtxq_insert(uq);
1413161678Sdavidxu	if (pi->pi_owner == NULL) {
1414161678Sdavidxu		/* XXX
1415161678Sdavidxu		 * Current, We only support process private PI-mutex,
1416161678Sdavidxu		 * non-contended PI-mutexes are locked in userland.
1417161678Sdavidxu		 * Process shared PI-mutex should always be initialized
1418161678Sdavidxu		 * by kernel and be registered in kernel, locking should
1419161678Sdavidxu		 * always be done by kernel to avoid security problems.
1420161678Sdavidxu		 * For process private PI-mutex, we can find owner
1421161678Sdavidxu		 * thread and boost its priority safely.
1422161678Sdavidxu		 */
1423161678Sdavidxu		PROC_LOCK(curproc);
1424161678Sdavidxu		td1 = thread_find(curproc, owner);
1425161678Sdavidxu		mtx_lock_spin(&sched_lock);
1426161678Sdavidxu		if (td1 != NULL && pi->pi_owner == NULL) {
1427161678Sdavidxu			uq1 = td1->td_umtxq;
1428161678Sdavidxu			umtx_pi_setowner(pi, td1);
1429161678Sdavidxu		}
1430161678Sdavidxu		PROC_UNLOCK(curproc);
1431161678Sdavidxu	} else {
1432161678Sdavidxu		mtx_lock_spin(&sched_lock);
1433161678Sdavidxu	}
1434161678Sdavidxu
1435161678Sdavidxu	TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1436161678Sdavidxu		pri = UPRI(uq1->uq_thread);
1437161678Sdavidxu		if (pri > UPRI(td))
1438161678Sdavidxu			break;
1439161678Sdavidxu	}
1440161678Sdavidxu
1441161678Sdavidxu	if (uq1 != NULL)
1442161678Sdavidxu		TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1443161678Sdavidxu	else
1444161678Sdavidxu		TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1445161678Sdavidxu
1446161678Sdavidxu	uq->uq_pi_blocked = pi;
1447161678Sdavidxu	td->td_flags |= TDF_UPIBLOCKED;
1448161678Sdavidxu	mtx_unlock_spin(&sched_lock);
1449161678Sdavidxu	umtxq_unlock(&uq->uq_key);
1450161678Sdavidxu
1451161678Sdavidxu	mtx_lock_spin(&sched_lock);
1452161678Sdavidxu	umtx_propagate_priority(td);
1453161678Sdavidxu	mtx_unlock_spin(&sched_lock);
1454161678Sdavidxu
1455161678Sdavidxu	umtxq_lock(&uq->uq_key);
1456161678Sdavidxu	if (uq->uq_flags & UQF_UMTXQ) {
1457161678Sdavidxu		error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
1458161678Sdavidxu		if (error == EWOULDBLOCK)
1459161678Sdavidxu			error = ETIMEDOUT;
1460161678Sdavidxu		if (uq->uq_flags & UQF_UMTXQ) {
1461161678Sdavidxu			umtxq_busy(&uq->uq_key);
1462161678Sdavidxu			umtxq_remove(uq);
1463161678Sdavidxu			umtxq_unbusy(&uq->uq_key);
1464161678Sdavidxu		}
1465161678Sdavidxu	}
1466161678Sdavidxu	umtxq_unlock(&uq->uq_key);
1467161678Sdavidxu
1468161678Sdavidxu	mtx_lock_spin(&sched_lock);
1469161678Sdavidxu	uq->uq_pi_blocked = NULL;
1470161678Sdavidxu	td->td_flags &= ~TDF_UPIBLOCKED;
1471161678Sdavidxu	TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1472161678Sdavidxu	umtx_unpropagate_priority(pi);
1473161678Sdavidxu	mtx_unlock_spin(&sched_lock);
1474161678Sdavidxu
1475161678Sdavidxu	umtxq_lock(&uq->uq_key);
1476161678Sdavidxu
1477161678Sdavidxu	return (error);
1478161678Sdavidxu}
1479161678Sdavidxu
1480161678Sdavidxu/*
1481161678Sdavidxu * Add reference count for a PI mutex.
1482161678Sdavidxu */
1483161678Sdavidxustatic void
1484161678Sdavidxuumtx_pi_ref(struct umtx_pi *pi)
1485161678Sdavidxu{
1486161678Sdavidxu	struct umtxq_chain *uc;
1487161678Sdavidxu
1488161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1489161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1490161678Sdavidxu	pi->pi_refcount++;
1491161678Sdavidxu}
1492161678Sdavidxu
1493161678Sdavidxu/*
1494161678Sdavidxu * Decrease reference count for a PI mutex, if the counter
1495161678Sdavidxu * is decreased to zero, its memory space is freed.
1496161678Sdavidxu */
1497161678Sdavidxustatic void
1498161678Sdavidxuumtx_pi_unref(struct umtx_pi *pi)
1499161678Sdavidxu{
1500161678Sdavidxu	struct umtxq_chain *uc;
1501161678Sdavidxu	int free = 0;
1502161678Sdavidxu
1503161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1504161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1505161678Sdavidxu	KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1506161678Sdavidxu	if (--pi->pi_refcount == 0) {
1507161678Sdavidxu		mtx_lock_spin(&sched_lock);
1508161678Sdavidxu		if (pi->pi_owner != NULL) {
1509161678Sdavidxu			TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
1510161678Sdavidxu				pi, pi_link);
1511161678Sdavidxu			pi->pi_owner = NULL;
1512161678Sdavidxu		}
1513161678Sdavidxu		KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1514161678Sdavidxu			("blocked queue not empty"));
1515161678Sdavidxu		mtx_unlock_spin(&sched_lock);
1516161678Sdavidxu		TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1517161678Sdavidxu		free = 1;
1518161678Sdavidxu	}
1519161678Sdavidxu	if (free)
1520161678Sdavidxu		umtx_pi_free(pi);
1521161678Sdavidxu}
1522161678Sdavidxu
1523161678Sdavidxu/*
1524161678Sdavidxu * Find a PI mutex in hash table.
1525161678Sdavidxu */
1526161678Sdavidxustatic struct umtx_pi *
1527161678Sdavidxuumtx_pi_lookup(struct umtx_key *key)
1528161678Sdavidxu{
1529161678Sdavidxu	struct umtxq_chain *uc;
1530161678Sdavidxu	struct umtx_pi *pi;
1531161678Sdavidxu
1532161678Sdavidxu	uc = umtxq_getchain(key);
1533161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1534161678Sdavidxu
1535161678Sdavidxu	TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1536161678Sdavidxu		if (umtx_key_match(&pi->pi_key, key)) {
1537161678Sdavidxu			return (pi);
1538161678Sdavidxu		}
1539161678Sdavidxu	}
1540161678Sdavidxu	return (NULL);
1541161678Sdavidxu}
1542161678Sdavidxu
1543161678Sdavidxu/*
1544161678Sdavidxu * Insert a PI mutex into hash table.
1545161678Sdavidxu */
1546161678Sdavidxustatic inline void
1547161678Sdavidxuumtx_pi_insert(struct umtx_pi *pi)
1548161678Sdavidxu{
1549161678Sdavidxu	struct umtxq_chain *uc;
1550161678Sdavidxu
1551161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1552161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1553161678Sdavidxu	TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1554161678Sdavidxu}
1555161678Sdavidxu
1556161678Sdavidxu/*
1557161678Sdavidxu * Lock a PI mutex.
1558161678Sdavidxu */
1559161678Sdavidxustatic int
1560161678Sdavidxu_do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1561161678Sdavidxu	int try)
1562161678Sdavidxu{
1563161678Sdavidxu	struct umtx_q *uq;
1564161678Sdavidxu	struct umtx_pi *pi, *new_pi;
1565161678Sdavidxu	uint32_t id, owner, old;
1566161678Sdavidxu	int error;
1567161678Sdavidxu
1568161678Sdavidxu	id = td->td_tid;
1569161678Sdavidxu	uq = td->td_umtxq;
1570161678Sdavidxu
1571161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1572161678Sdavidxu	    &uq->uq_key)) != 0)
1573161678Sdavidxu		return (error);
1574163697Sdavidxu	umtxq_lock(&uq->uq_key);
1575163697Sdavidxu	pi = umtx_pi_lookup(&uq->uq_key);
1576163697Sdavidxu	if (pi == NULL) {
1577163697Sdavidxu		new_pi = umtx_pi_alloc(M_NOWAIT);
1578163697Sdavidxu		if (new_pi == NULL) {
1579161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1580163697Sdavidxu			new_pi = umtx_pi_alloc(M_WAITOK);
1581161678Sdavidxu			new_pi->pi_key = uq->uq_key;
1582161678Sdavidxu			umtxq_lock(&uq->uq_key);
1583161678Sdavidxu			pi = umtx_pi_lookup(&uq->uq_key);
1584163697Sdavidxu			if (pi != NULL) {
1585161678Sdavidxu				umtx_pi_free(new_pi);
1586163697Sdavidxu				new_pi = NULL;
1587161678Sdavidxu			}
1588161678Sdavidxu		}
1589163697Sdavidxu		if (new_pi != NULL) {
1590163697Sdavidxu			new_pi->pi_key = uq->uq_key;
1591163697Sdavidxu			umtx_pi_insert(new_pi);
1592163697Sdavidxu			pi = new_pi;
1593163697Sdavidxu		}
1594163697Sdavidxu	}
1595163697Sdavidxu	umtx_pi_ref(pi);
1596163697Sdavidxu	umtxq_unlock(&uq->uq_key);
1597161678Sdavidxu
1598163697Sdavidxu	/*
1599163697Sdavidxu	 * Care must be exercised when dealing with umtx structure.  It
1600163697Sdavidxu	 * can fault on any access.
1601163697Sdavidxu	 */
1602163697Sdavidxu	for (;;) {
1603161678Sdavidxu		/*
1604161678Sdavidxu		 * Try the uncontested case.  This should be done in userland.
1605161678Sdavidxu		 */
1606161678Sdavidxu		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1607161678Sdavidxu
1608161678Sdavidxu		/* The acquire succeeded. */
1609161678Sdavidxu		if (owner == UMUTEX_UNOWNED) {
1610161678Sdavidxu			error = 0;
1611161678Sdavidxu			break;
1612161678Sdavidxu		}
1613161678Sdavidxu
1614161678Sdavidxu		/* The address was invalid. */
1615161678Sdavidxu		if (owner == -1) {
1616161678Sdavidxu			error = EFAULT;
1617161678Sdavidxu			break;
1618161678Sdavidxu		}
1619161678Sdavidxu
1620161678Sdavidxu		/* If no one owns it but it is contested try to acquire it. */
1621161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
1622161678Sdavidxu			owner = casuword32(&m->m_owner,
1623161678Sdavidxu			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1624161678Sdavidxu
1625161678Sdavidxu			if (owner == UMUTEX_CONTESTED) {
1626161678Sdavidxu				umtxq_lock(&uq->uq_key);
1627161678Sdavidxu				error = umtx_pi_claim(pi, td);
1628161678Sdavidxu				umtxq_unlock(&uq->uq_key);
1629161678Sdavidxu				break;
1630161678Sdavidxu			}
1631161678Sdavidxu
1632161678Sdavidxu			/* The address was invalid. */
1633161678Sdavidxu			if (owner == -1) {
1634161678Sdavidxu				error = EFAULT;
1635161678Sdavidxu				break;
1636161678Sdavidxu			}
1637161678Sdavidxu
1638161678Sdavidxu			/* If this failed the lock has changed, restart. */
1639161678Sdavidxu			continue;
1640161678Sdavidxu		}
1641161678Sdavidxu
1642161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1643161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id) {
1644161678Sdavidxu			error = EDEADLK;
1645161678Sdavidxu			break;
1646161678Sdavidxu		}
1647161678Sdavidxu
1648161678Sdavidxu		if (try != 0) {
1649161678Sdavidxu			error = EBUSY;
1650161678Sdavidxu			break;
1651161678Sdavidxu		}
1652161678Sdavidxu
1653161678Sdavidxu		/*
1654161678Sdavidxu		 * If we caught a signal, we have retried and now
1655161678Sdavidxu		 * exit immediately.
1656161678Sdavidxu		 */
1657161678Sdavidxu		if (error != 0)
1658161678Sdavidxu			break;
1659161678Sdavidxu
1660161678Sdavidxu		umtxq_lock(&uq->uq_key);
1661161678Sdavidxu		umtxq_busy(&uq->uq_key);
1662161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1663161678Sdavidxu
1664161678Sdavidxu		/*
1665161678Sdavidxu		 * Set the contested bit so that a release in user space
1666161678Sdavidxu		 * knows to use the system call for unlock.  If this fails
1667161678Sdavidxu		 * either some one else has acquired the lock or it has been
1668161678Sdavidxu		 * released.
1669161678Sdavidxu		 */
1670161678Sdavidxu		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1671161678Sdavidxu
1672161678Sdavidxu		/* The address was invalid. */
1673161678Sdavidxu		if (old == -1) {
1674161678Sdavidxu			umtxq_lock(&uq->uq_key);
1675161678Sdavidxu			umtxq_unbusy(&uq->uq_key);
1676161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1677161678Sdavidxu			error = EFAULT;
1678161678Sdavidxu			break;
1679161678Sdavidxu		}
1680161678Sdavidxu
1681161678Sdavidxu		umtxq_lock(&uq->uq_key);
1682161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
1683161678Sdavidxu		/*
1684161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
1685161678Sdavidxu		 * and we need to retry or we lost a race to the thread
1686161678Sdavidxu		 * unlocking the umtx.
1687161678Sdavidxu		 */
1688161678Sdavidxu		if (old == owner)
1689161678Sdavidxu			error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
1690161678Sdavidxu				 "umtxpi", timo);
1691161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1692161678Sdavidxu	}
1693161678Sdavidxu
1694163697Sdavidxu	umtxq_lock(&uq->uq_key);
1695163697Sdavidxu	umtx_pi_unref(pi);
1696163697Sdavidxu	umtxq_unlock(&uq->uq_key);
1697161678Sdavidxu
1698161678Sdavidxu	umtx_key_release(&uq->uq_key);
1699161678Sdavidxu	return (error);
1700161678Sdavidxu}
1701161678Sdavidxu
1702161678Sdavidxu/*
1703161678Sdavidxu * Unlock a PI mutex.
1704161678Sdavidxu */
1705161678Sdavidxustatic int
1706161678Sdavidxudo_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
1707161678Sdavidxu{
1708161678Sdavidxu	struct umtx_key key;
1709161678Sdavidxu	struct umtx_q *uq_first, *uq_first2, *uq_me;
1710161678Sdavidxu	struct umtx_pi *pi, *pi2;
1711161678Sdavidxu	uint32_t owner, old, id;
1712161678Sdavidxu	int error;
1713161678Sdavidxu	int count;
1714161678Sdavidxu	int pri;
1715161678Sdavidxu
1716161678Sdavidxu	id = td->td_tid;
1717161678Sdavidxu	/*
1718161678Sdavidxu	 * Make sure we own this mtx.
1719161678Sdavidxu	 */
1720163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1721161678Sdavidxu	if (owner == -1)
1722161678Sdavidxu		return (EFAULT);
1723161678Sdavidxu
1724161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
1725161678Sdavidxu		return (EPERM);
1726161678Sdavidxu
1727161678Sdavidxu	/* This should be done in userland */
1728161678Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
1729161678Sdavidxu		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1730161678Sdavidxu		if (old == -1)
1731161678Sdavidxu			return (EFAULT);
1732161678Sdavidxu		if (old == owner)
1733161678Sdavidxu			return (0);
1734161855Sdavidxu		owner = old;
1735161678Sdavidxu	}
1736161678Sdavidxu
1737161678Sdavidxu	/* We should only ever be in here for contested locks */
1738161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1739161678Sdavidxu	    &key)) != 0)
1740161678Sdavidxu		return (error);
1741161678Sdavidxu
1742161678Sdavidxu	umtxq_lock(&key);
1743161678Sdavidxu	umtxq_busy(&key);
1744161678Sdavidxu	count = umtxq_count_pi(&key, &uq_first);
1745161678Sdavidxu	if (uq_first != NULL) {
1746161678Sdavidxu		pi = uq_first->uq_pi_blocked;
1747161678Sdavidxu		if (pi->pi_owner != curthread) {
1748161678Sdavidxu			umtxq_unbusy(&key);
1749161678Sdavidxu			umtxq_unlock(&key);
1750161678Sdavidxu			/* userland messed the mutex */
1751161678Sdavidxu			return (EPERM);
1752161678Sdavidxu		}
1753161678Sdavidxu		uq_me = curthread->td_umtxq;
1754161678Sdavidxu		mtx_lock_spin(&sched_lock);
1755161678Sdavidxu		pi->pi_owner = NULL;
1756161678Sdavidxu		TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
1757161678Sdavidxu		uq_first = TAILQ_FIRST(&pi->pi_blocked);
1758161678Sdavidxu		pri = PRI_MAX;
1759161678Sdavidxu		TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
1760161678Sdavidxu			uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
1761161678Sdavidxu			if (uq_first2 != NULL) {
1762161678Sdavidxu				if (pri > UPRI(uq_first2->uq_thread))
1763161678Sdavidxu					pri = UPRI(uq_first2->uq_thread);
1764161678Sdavidxu			}
1765161678Sdavidxu		}
1766161678Sdavidxu		sched_unlend_user_prio(curthread, pri);
1767161678Sdavidxu		mtx_unlock_spin(&sched_lock);
1768161678Sdavidxu	}
1769161678Sdavidxu	umtxq_unlock(&key);
1770161678Sdavidxu
1771161678Sdavidxu	/*
1772161678Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
1773161678Sdavidxu	 * there is zero or one thread only waiting for it.
1774161678Sdavidxu	 * Otherwise, it must be marked as contested.
1775161678Sdavidxu	 */
1776161678Sdavidxu	old = casuword32(&m->m_owner, owner,
1777161678Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1778161678Sdavidxu
1779161678Sdavidxu	umtxq_lock(&key);
1780161678Sdavidxu	if (uq_first != NULL)
1781161678Sdavidxu		umtxq_signal_thread(uq_first);
1782161678Sdavidxu	umtxq_unbusy(&key);
1783161678Sdavidxu	umtxq_unlock(&key);
1784161678Sdavidxu	umtx_key_release(&key);
1785161678Sdavidxu	if (old == -1)
1786161678Sdavidxu		return (EFAULT);
1787161678Sdavidxu	if (old != owner)
1788161678Sdavidxu		return (EINVAL);
1789161678Sdavidxu	return (0);
1790161678Sdavidxu}
1791161678Sdavidxu
1792161678Sdavidxu/*
1793161678Sdavidxu * Lock a PP mutex.
1794161678Sdavidxu */
1795161678Sdavidxustatic int
1796161678Sdavidxu_do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1797161678Sdavidxu	int try)
1798161678Sdavidxu{
1799161678Sdavidxu	struct umtx_q *uq, *uq2;
1800161678Sdavidxu	struct umtx_pi *pi;
1801161678Sdavidxu	uint32_t ceiling;
1802161678Sdavidxu	uint32_t owner, id;
1803161678Sdavidxu	int error, pri, old_inherited_pri, su;
1804161678Sdavidxu
1805161678Sdavidxu	id = td->td_tid;
1806161678Sdavidxu	uq = td->td_umtxq;
1807161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1808161678Sdavidxu	    &uq->uq_key)) != 0)
1809161678Sdavidxu		return (error);
1810161678Sdavidxu	su = (suser(td) == 0);
1811161678Sdavidxu	for (;;) {
1812161678Sdavidxu		old_inherited_pri = uq->uq_inherited_pri;
1813161678Sdavidxu		umtxq_lock(&uq->uq_key);
1814161678Sdavidxu		umtxq_busy(&uq->uq_key);
1815161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1816161678Sdavidxu
1817161678Sdavidxu		ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
1818161678Sdavidxu		if (ceiling > RTP_PRIO_MAX) {
1819161678Sdavidxu			error = EINVAL;
1820161678Sdavidxu			goto out;
1821161678Sdavidxu		}
1822161678Sdavidxu
1823161678Sdavidxu		mtx_lock_spin(&sched_lock);
1824161678Sdavidxu		if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
1825161678Sdavidxu			mtx_unlock_spin(&sched_lock);
1826161678Sdavidxu			error = EINVAL;
1827161678Sdavidxu			goto out;
1828161678Sdavidxu		}
1829161678Sdavidxu		if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
1830161678Sdavidxu			uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
1831161678Sdavidxu			if (uq->uq_inherited_pri < UPRI(td))
1832161678Sdavidxu				sched_lend_user_prio(td, uq->uq_inherited_pri);
1833161678Sdavidxu		}
1834161678Sdavidxu		mtx_unlock_spin(&sched_lock);
1835161678Sdavidxu
1836161678Sdavidxu		owner = casuword32(&m->m_owner,
1837161678Sdavidxu		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1838161678Sdavidxu
1839161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
1840161678Sdavidxu			error = 0;
1841161678Sdavidxu			break;
1842161678Sdavidxu		}
1843161678Sdavidxu
1844161678Sdavidxu		/* The address was invalid. */
1845161678Sdavidxu		if (owner == -1) {
1846161678Sdavidxu			error = EFAULT;
1847161678Sdavidxu			break;
1848161678Sdavidxu		}
1849161678Sdavidxu
1850161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1851161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id) {
1852161678Sdavidxu			error = EDEADLK;
1853161678Sdavidxu			break;
1854161678Sdavidxu		}
1855161678Sdavidxu
1856161678Sdavidxu		if (try != 0) {
1857161678Sdavidxu			error = EBUSY;
1858161678Sdavidxu			break;
1859161678Sdavidxu		}
1860161678Sdavidxu
1861161678Sdavidxu		/*
1862161678Sdavidxu		 * If we caught a signal, we have retried and now
1863161678Sdavidxu		 * exit immediately.
1864161678Sdavidxu		 */
1865161678Sdavidxu		if (error != 0)
1866161678Sdavidxu			break;
1867161678Sdavidxu
1868161678Sdavidxu		umtxq_lock(&uq->uq_key);
1869161678Sdavidxu		umtxq_insert(uq);
1870161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
1871161678Sdavidxu		error = umtxq_sleep(uq, "umtxpp", timo);
1872161678Sdavidxu		umtxq_remove(uq);
1873161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1874161678Sdavidxu
1875161678Sdavidxu		mtx_lock_spin(&sched_lock);
1876161678Sdavidxu		uq->uq_inherited_pri = old_inherited_pri;
1877161678Sdavidxu		pri = PRI_MAX;
1878161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1879161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
1880161678Sdavidxu			if (uq2 != NULL) {
1881161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
1882161678Sdavidxu					pri = UPRI(uq2->uq_thread);
1883161678Sdavidxu			}
1884161678Sdavidxu		}
1885161678Sdavidxu		if (pri > uq->uq_inherited_pri)
1886161678Sdavidxu			pri = uq->uq_inherited_pri;
1887161678Sdavidxu		sched_unlend_user_prio(td, pri);
1888161678Sdavidxu		mtx_unlock_spin(&sched_lock);
1889161678Sdavidxu	}
1890161678Sdavidxu
1891161678Sdavidxu	if (error != 0) {
1892161678Sdavidxu		mtx_lock_spin(&sched_lock);
1893161678Sdavidxu		uq->uq_inherited_pri = old_inherited_pri;
1894161678Sdavidxu		pri = PRI_MAX;
1895161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1896161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
1897161678Sdavidxu			if (uq2 != NULL) {
1898161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
1899161678Sdavidxu					pri = UPRI(uq2->uq_thread);
1900161678Sdavidxu			}
1901161678Sdavidxu		}
1902161678Sdavidxu		if (pri > uq->uq_inherited_pri)
1903161678Sdavidxu			pri = uq->uq_inherited_pri;
1904161678Sdavidxu		sched_unlend_user_prio(td, pri);
1905161678Sdavidxu		mtx_unlock_spin(&sched_lock);
1906161678Sdavidxu	}
1907161678Sdavidxu
1908161678Sdavidxuout:
1909161678Sdavidxu	umtxq_lock(&uq->uq_key);
1910161678Sdavidxu	umtxq_unbusy(&uq->uq_key);
1911161678Sdavidxu	umtxq_unlock(&uq->uq_key);
1912161678Sdavidxu	umtx_key_release(&uq->uq_key);
1913161678Sdavidxu	return (error);
1914161678Sdavidxu}
1915161678Sdavidxu
1916161678Sdavidxu/*
1917161678Sdavidxu * Unlock a PP mutex.
1918161678Sdavidxu */
1919161678Sdavidxustatic int
1920161678Sdavidxudo_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
1921161678Sdavidxu{
1922161678Sdavidxu	struct umtx_key key;
1923161678Sdavidxu	struct umtx_q *uq, *uq2;
1924161678Sdavidxu	struct umtx_pi *pi;
1925161678Sdavidxu	uint32_t owner, id;
1926161678Sdavidxu	uint32_t rceiling;
1927161926Sdavidxu	int error, pri, new_inherited_pri, su;
1928161678Sdavidxu
1929161678Sdavidxu	id = td->td_tid;
1930161678Sdavidxu	uq = td->td_umtxq;
1931161926Sdavidxu	su = (suser(td) == 0);
1932161678Sdavidxu
1933161678Sdavidxu	/*
1934161678Sdavidxu	 * Make sure we own this mtx.
1935161678Sdavidxu	 */
1936163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1937161678Sdavidxu	if (owner == -1)
1938161678Sdavidxu		return (EFAULT);
1939161678Sdavidxu
1940161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
1941161678Sdavidxu		return (EPERM);
1942161678Sdavidxu
1943161678Sdavidxu	error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
1944161678Sdavidxu	if (error != 0)
1945161678Sdavidxu		return (error);
1946161678Sdavidxu
1947161678Sdavidxu	if (rceiling == -1)
1948161678Sdavidxu		new_inherited_pri = PRI_MAX;
1949161678Sdavidxu	else {
1950161678Sdavidxu		rceiling = RTP_PRIO_MAX - rceiling;
1951161678Sdavidxu		if (rceiling > RTP_PRIO_MAX)
1952161678Sdavidxu			return (EINVAL);
1953161678Sdavidxu		new_inherited_pri = PRI_MIN_REALTIME + rceiling;
1954161678Sdavidxu	}
1955161678Sdavidxu
1956161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1957161678Sdavidxu	    &key)) != 0)
1958161678Sdavidxu		return (error);
1959161678Sdavidxu	umtxq_lock(&key);
1960161678Sdavidxu	umtxq_busy(&key);
1961161678Sdavidxu	umtxq_unlock(&key);
1962161678Sdavidxu	/*
1963161678Sdavidxu	 * For priority protected mutex, always set unlocked state
1964161678Sdavidxu	 * to UMUTEX_CONTESTED, so that userland always enters kernel
1965161678Sdavidxu	 * to lock the mutex, it is necessary because thread priority
1966161678Sdavidxu	 * has to be adjusted for such mutex.
1967161678Sdavidxu	 */
1968163449Sdavidxu	error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
1969163449Sdavidxu		UMUTEX_CONTESTED);
1970161678Sdavidxu
1971161678Sdavidxu	umtxq_lock(&key);
1972161678Sdavidxu	if (error == 0)
1973161678Sdavidxu		umtxq_signal(&key, 1);
1974161678Sdavidxu	umtxq_unbusy(&key);
1975161678Sdavidxu	umtxq_unlock(&key);
1976161678Sdavidxu
1977161678Sdavidxu	if (error == -1)
1978161678Sdavidxu		error = EFAULT;
1979161678Sdavidxu	else {
1980161678Sdavidxu		mtx_lock_spin(&sched_lock);
1981161926Sdavidxu		if (su != 0)
1982161926Sdavidxu			uq->uq_inherited_pri = new_inherited_pri;
1983161678Sdavidxu		pri = PRI_MAX;
1984161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1985161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
1986161678Sdavidxu			if (uq2 != NULL) {
1987161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
1988161678Sdavidxu					pri = UPRI(uq2->uq_thread);
1989161678Sdavidxu			}
1990161678Sdavidxu		}
1991161678Sdavidxu		if (pri > uq->uq_inherited_pri)
1992161678Sdavidxu			pri = uq->uq_inherited_pri;
1993161678Sdavidxu		sched_unlend_user_prio(td, pri);
1994161678Sdavidxu		mtx_unlock_spin(&sched_lock);
1995161678Sdavidxu	}
1996161678Sdavidxu	umtx_key_release(&key);
1997161678Sdavidxu	return (error);
1998161678Sdavidxu}
1999161678Sdavidxu
2000161678Sdavidxustatic int
2001161678Sdavidxudo_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2002161678Sdavidxu	uint32_t *old_ceiling)
2003161678Sdavidxu{
2004161678Sdavidxu	struct umtx_q *uq;
2005161678Sdavidxu	uint32_t save_ceiling;
2006161678Sdavidxu	uint32_t owner, id;
2007161678Sdavidxu	uint32_t flags;
2008161678Sdavidxu	int error;
2009161678Sdavidxu
2010161678Sdavidxu	flags = fuword32(&m->m_flags);
2011161678Sdavidxu	if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2012161678Sdavidxu		return (EINVAL);
2013161678Sdavidxu	if (ceiling > RTP_PRIO_MAX)
2014161678Sdavidxu		return (EINVAL);
2015161678Sdavidxu	id = td->td_tid;
2016161678Sdavidxu	uq = td->td_umtxq;
2017161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2018161678Sdavidxu	   &uq->uq_key)) != 0)
2019161678Sdavidxu		return (error);
2020161678Sdavidxu	for (;;) {
2021161678Sdavidxu		umtxq_lock(&uq->uq_key);
2022161678Sdavidxu		umtxq_busy(&uq->uq_key);
2023161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2024161678Sdavidxu
2025161678Sdavidxu		save_ceiling = fuword32(&m->m_ceilings[0]);
2026161678Sdavidxu
2027161678Sdavidxu		owner = casuword32(&m->m_owner,
2028161678Sdavidxu		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2029161678Sdavidxu
2030161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
2031161678Sdavidxu			suword32(&m->m_ceilings[0], ceiling);
2032163449Sdavidxu			suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2033163449Sdavidxu				UMUTEX_CONTESTED);
2034161678Sdavidxu			error = 0;
2035161678Sdavidxu			break;
2036161678Sdavidxu		}
2037161678Sdavidxu
2038161678Sdavidxu		/* The address was invalid. */
2039161678Sdavidxu		if (owner == -1) {
2040161678Sdavidxu			error = EFAULT;
2041161678Sdavidxu			break;
2042161678Sdavidxu		}
2043161678Sdavidxu
2044161678Sdavidxu		if ((owner & ~UMUTEX_CONTESTED) == id) {
2045161678Sdavidxu			suword32(&m->m_ceilings[0], ceiling);
2046161678Sdavidxu			error = 0;
2047161678Sdavidxu			break;
2048161678Sdavidxu		}
2049161678Sdavidxu
2050161678Sdavidxu		/*
2051161678Sdavidxu		 * If we caught a signal, we have retried and now
2052161678Sdavidxu		 * exit immediately.
2053161678Sdavidxu		 */
2054161678Sdavidxu		if (error != 0)
2055161678Sdavidxu			break;
2056161678Sdavidxu
2057161678Sdavidxu		/*
2058161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
2059161678Sdavidxu		 * and we need to retry or we lost a race to the thread
2060161678Sdavidxu		 * unlocking the umtx.
2061161678Sdavidxu		 */
2062161678Sdavidxu		umtxq_lock(&uq->uq_key);
2063161678Sdavidxu		umtxq_insert(uq);
2064161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
2065161678Sdavidxu		error = umtxq_sleep(uq, "umtxpp", 0);
2066161678Sdavidxu		umtxq_remove(uq);
2067161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2068161678Sdavidxu	}
2069161678Sdavidxu	umtxq_lock(&uq->uq_key);
2070161678Sdavidxu	if (error == 0)
2071161678Sdavidxu		umtxq_signal(&uq->uq_key, INT_MAX);
2072161678Sdavidxu	umtxq_unbusy(&uq->uq_key);
2073161678Sdavidxu	umtxq_unlock(&uq->uq_key);
2074161678Sdavidxu	umtx_key_release(&uq->uq_key);
2075161678Sdavidxu	if (error == 0 && old_ceiling != NULL)
2076161678Sdavidxu		suword32(old_ceiling, save_ceiling);
2077161678Sdavidxu	return (error);
2078161678Sdavidxu}
2079161678Sdavidxu
2080162030Sdavidxustatic int
2081162030Sdavidxu_do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
2082162030Sdavidxu	int try)
2083162030Sdavidxu{
2084162030Sdavidxu	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2085162030Sdavidxu	case 0:
2086162030Sdavidxu		return (_do_lock_normal(td, m, flags, timo, try));
2087162030Sdavidxu	case UMUTEX_PRIO_INHERIT:
2088162030Sdavidxu		return (_do_lock_pi(td, m, flags, timo, try));
2089162030Sdavidxu	case UMUTEX_PRIO_PROTECT:
2090162030Sdavidxu		return (_do_lock_pp(td, m, flags, timo, try));
2091162030Sdavidxu	}
2092162030Sdavidxu	return (EINVAL);
2093162030Sdavidxu}
2094162030Sdavidxu
2095161678Sdavidxu/*
2096161678Sdavidxu * Lock a userland POSIX mutex.
2097161678Sdavidxu */
2098161678Sdavidxustatic int
2099162030Sdavidxudo_lock_umutex(struct thread *td, struct umutex *m,
2100162030Sdavidxu	struct timespec *timeout, int try)
2101161678Sdavidxu{
2102162030Sdavidxu	struct timespec ts, ts2, ts3;
2103162030Sdavidxu	struct timeval tv;
2104161678Sdavidxu	uint32_t flags;
2105162030Sdavidxu	int error;
2106161678Sdavidxu
2107161678Sdavidxu	flags = fuword32(&m->m_flags);
2108161678Sdavidxu	if (flags == -1)
2109161678Sdavidxu		return (EFAULT);
2110161678Sdavidxu
2111162030Sdavidxu	if (timeout == NULL) {
2112162030Sdavidxu		error = _do_lock_umutex(td, m, flags, 0, try);
2113162030Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
2114162030Sdavidxu		if (error == EINTR)
2115162030Sdavidxu			error = ERESTART;
2116162030Sdavidxu	} else {
2117162030Sdavidxu		getnanouptime(&ts);
2118162030Sdavidxu		timespecadd(&ts, timeout);
2119162030Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
2120162030Sdavidxu		for (;;) {
2121162030Sdavidxu			error = _do_lock_umutex(td, m, flags, tvtohz(&tv), try);
2122162030Sdavidxu			if (error != ETIMEDOUT)
2123162030Sdavidxu				break;
2124162030Sdavidxu			getnanouptime(&ts2);
2125162030Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
2126162030Sdavidxu				error = ETIMEDOUT;
2127162030Sdavidxu				break;
2128162030Sdavidxu			}
2129162030Sdavidxu			ts3 = ts;
2130162030Sdavidxu			timespecsub(&ts3, &ts2);
2131162030Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2132162030Sdavidxu		}
2133162030Sdavidxu		/* Timed-locking is not restarted. */
2134162030Sdavidxu		if (error == ERESTART)
2135162030Sdavidxu			error = EINTR;
2136161742Sdavidxu	}
2137162030Sdavidxu	return (error);
2138161678Sdavidxu}
2139161678Sdavidxu
2140161678Sdavidxu/*
2141161678Sdavidxu * Unlock a userland POSIX mutex.
2142161678Sdavidxu */
2143161678Sdavidxustatic int
2144161678Sdavidxudo_unlock_umutex(struct thread *td, struct umutex *m)
2145161678Sdavidxu{
2146161678Sdavidxu	uint32_t flags;
2147161678Sdavidxu
2148161678Sdavidxu	flags = fuword32(&m->m_flags);
2149161678Sdavidxu	if (flags == -1)
2150161678Sdavidxu		return (EFAULT);
2151161678Sdavidxu
2152161855Sdavidxu	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2153161855Sdavidxu	case 0:
2154161855Sdavidxu		return (do_unlock_normal(td, m, flags));
2155161855Sdavidxu	case UMUTEX_PRIO_INHERIT:
2156161855Sdavidxu		return (do_unlock_pi(td, m, flags));
2157161855Sdavidxu	case UMUTEX_PRIO_PROTECT:
2158161855Sdavidxu		return (do_unlock_pp(td, m, flags));
2159161855Sdavidxu	}
2160161678Sdavidxu
2161161855Sdavidxu	return (EINVAL);
2162161678Sdavidxu}
2163161678Sdavidxu
2164139013Sdavidxuint
2165139013Sdavidxu_umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
2166139013Sdavidxu    /* struct umtx *umtx */
2167139013Sdavidxu{
2168162536Sdavidxu	return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
2169139013Sdavidxu}
2170139013Sdavidxu
2171139013Sdavidxuint
2172139013Sdavidxu_umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
2173139013Sdavidxu    /* struct umtx *umtx */
2174139013Sdavidxu{
2175162536Sdavidxu	return do_unlock_umtx(td, uap->umtx, td->td_tid);
2176139013Sdavidxu}
2177139013Sdavidxu
2178162536Sdavidxustatic int
2179162536Sdavidxu__umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
2180139013Sdavidxu{
2181162536Sdavidxu	struct timespec *ts, timeout;
2182139013Sdavidxu	int error;
2183139013Sdavidxu
2184162536Sdavidxu	/* Allow a null timespec (wait forever). */
2185162536Sdavidxu	if (uap->uaddr2 == NULL)
2186162536Sdavidxu		ts = NULL;
2187162536Sdavidxu	else {
2188162536Sdavidxu		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2189162536Sdavidxu		if (error != 0)
2190162536Sdavidxu			return (error);
2191162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2192162536Sdavidxu		    timeout.tv_nsec < 0) {
2193162536Sdavidxu			return (EINVAL);
2194161678Sdavidxu		}
2195162536Sdavidxu		ts = &timeout;
2196162536Sdavidxu	}
2197162536Sdavidxu	return (do_lock_umtx(td, uap->obj, uap->val, ts));
2198162536Sdavidxu}
2199162536Sdavidxu
2200162536Sdavidxustatic int
2201162536Sdavidxu__umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
2202162536Sdavidxu{
2203162536Sdavidxu	return (do_unlock_umtx(td, uap->obj, uap->val));
2204162536Sdavidxu}
2205162536Sdavidxu
2206162536Sdavidxustatic int
2207162536Sdavidxu__umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
2208162536Sdavidxu{
2209162536Sdavidxu	struct timespec *ts, timeout;
2210162536Sdavidxu	int error;
2211162536Sdavidxu
2212162536Sdavidxu	if (uap->uaddr2 == NULL)
2213162536Sdavidxu		ts = NULL;
2214162536Sdavidxu	else {
2215162536Sdavidxu		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2216162536Sdavidxu		if (error != 0)
2217162536Sdavidxu			return (error);
2218162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2219162536Sdavidxu		    timeout.tv_nsec < 0)
2220162536Sdavidxu			return (EINVAL);
2221162536Sdavidxu		ts = &timeout;
2222162536Sdavidxu	}
2223162536Sdavidxu	return do_wait(td, uap->obj, uap->val, ts, 0);
2224162536Sdavidxu}
2225162536Sdavidxu
2226162536Sdavidxustatic int
2227162536Sdavidxu__umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
2228162536Sdavidxu{
2229162536Sdavidxu	return (kern_umtx_wake(td, uap->obj, uap->val));
2230162536Sdavidxu}
2231162536Sdavidxu
2232162536Sdavidxustatic int
2233162536Sdavidxu__umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
2234162536Sdavidxu{
2235162536Sdavidxu	struct timespec *ts, timeout;
2236162536Sdavidxu	int error;
2237162536Sdavidxu
2238162536Sdavidxu	/* Allow a null timespec (wait forever). */
2239162536Sdavidxu	if (uap->uaddr2 == NULL)
2240162536Sdavidxu		ts = NULL;
2241162536Sdavidxu	else {
2242162536Sdavidxu		error = copyin(uap->uaddr2, &timeout,
2243162536Sdavidxu		    sizeof(timeout));
2244162536Sdavidxu		if (error != 0)
2245162536Sdavidxu			return (error);
2246162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2247162536Sdavidxu		    timeout.tv_nsec < 0) {
2248162536Sdavidxu			return (EINVAL);
2249139013Sdavidxu		}
2250162536Sdavidxu		ts = &timeout;
2251139013Sdavidxu	}
2252162536Sdavidxu	return do_lock_umutex(td, uap->obj, ts, 0);
2253162536Sdavidxu}
2254162536Sdavidxu
2255162536Sdavidxustatic int
2256162536Sdavidxu__umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
2257162536Sdavidxu{
2258162536Sdavidxu	return do_lock_umutex(td, uap->obj, NULL, 1);
2259162536Sdavidxu}
2260162536Sdavidxu
2261162536Sdavidxustatic int
2262162536Sdavidxu__umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
2263162536Sdavidxu{
2264162536Sdavidxu	return do_unlock_umutex(td, uap->obj);
2265162536Sdavidxu}
2266162536Sdavidxu
2267162536Sdavidxustatic int
2268162536Sdavidxu__umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
2269162536Sdavidxu{
2270162536Sdavidxu	return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
2271162536Sdavidxu}
2272162536Sdavidxu
2273162536Sdavidxutypedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
2274162536Sdavidxu
2275162536Sdavidxustatic _umtx_op_func op_table[] = {
2276162536Sdavidxu	__umtx_op_lock_umtx,		/* UMTX_OP_LOCK */
2277162536Sdavidxu	__umtx_op_unlock_umtx,		/* UMTX_OP_UNLOCK */
2278162536Sdavidxu	__umtx_op_wait,			/* UMTX_OP_WAIT */
2279162536Sdavidxu	__umtx_op_wake,			/* UMTX_OP_WAKE */
2280162536Sdavidxu	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_TRYLOCK */
2281162536Sdavidxu	__umtx_op_lock_umutex,		/* UMTX_OP_MUTEX_LOCK */
2282162536Sdavidxu	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK */
2283162536Sdavidxu	__umtx_op_set_ceiling		/* UMTX_OP_SET_CEILING */
2284162536Sdavidxu};
2285162536Sdavidxu
2286162536Sdavidxuint
2287162536Sdavidxu_umtx_op(struct thread *td, struct _umtx_op_args *uap)
2288162536Sdavidxu{
2289163678Sdavidxu	if ((unsigned)uap->op < UMTX_OP_MAX)
2290162536Sdavidxu		return (*op_table[uap->op])(td, uap);
2291162536Sdavidxu	return (EINVAL);
2292162536Sdavidxu}
2293162536Sdavidxu
2294162536Sdavidxu#ifdef COMPAT_IA32
2295162536Sdavidxu
2296163046Sdavidxuint
2297163046Sdavidxufreebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
2298163046Sdavidxu    /* struct umtx *umtx */
2299163046Sdavidxu{
2300163046Sdavidxu	return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
2301163046Sdavidxu}
2302163046Sdavidxu
2303163046Sdavidxuint
2304163046Sdavidxufreebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
2305163046Sdavidxu    /* struct umtx *umtx */
2306163046Sdavidxu{
2307163046Sdavidxu	return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
2308163046Sdavidxu}
2309163046Sdavidxu
2310162536Sdavidxustruct timespec32 {
2311162536Sdavidxu	u_int32_t tv_sec;
2312162536Sdavidxu	u_int32_t tv_nsec;
2313162536Sdavidxu};
2314162536Sdavidxu
2315162536Sdavidxustatic inline int
2316162536Sdavidxucopyin_timeout32(void *addr, struct timespec *tsp)
2317162536Sdavidxu{
2318162536Sdavidxu	struct timespec32 ts32;
2319162536Sdavidxu	int error;
2320162536Sdavidxu
2321162536Sdavidxu	error = copyin(addr, &ts32, sizeof(struct timespec32));
2322162536Sdavidxu	if (error == 0) {
2323162536Sdavidxu		tsp->tv_sec = ts32.tv_sec;
2324162536Sdavidxu		tsp->tv_nsec = ts32.tv_nsec;
2325162536Sdavidxu	}
2326140421Sdavidxu	return (error);
2327139013Sdavidxu}
2328161678Sdavidxu
2329162536Sdavidxustatic int
2330162536Sdavidxu__umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
2331162536Sdavidxu{
2332162536Sdavidxu	struct timespec *ts, timeout;
2333162536Sdavidxu	int error;
2334162536Sdavidxu
2335162536Sdavidxu	/* Allow a null timespec (wait forever). */
2336162536Sdavidxu	if (uap->uaddr2 == NULL)
2337162536Sdavidxu		ts = NULL;
2338162536Sdavidxu	else {
2339162536Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
2340162536Sdavidxu		if (error != 0)
2341162536Sdavidxu			return (error);
2342162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2343162536Sdavidxu		    timeout.tv_nsec < 0) {
2344162536Sdavidxu			return (EINVAL);
2345162536Sdavidxu		}
2346162536Sdavidxu		ts = &timeout;
2347162536Sdavidxu	}
2348162536Sdavidxu	return (do_lock_umtx32(td, uap->obj, uap->val, ts));
2349162536Sdavidxu}
2350162536Sdavidxu
2351162536Sdavidxustatic int
2352162536Sdavidxu__umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
2353162536Sdavidxu{
2354162536Sdavidxu	return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
2355162536Sdavidxu}
2356162536Sdavidxu
2357162536Sdavidxustatic int
2358162536Sdavidxu__umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
2359162536Sdavidxu{
2360162536Sdavidxu	struct timespec *ts, timeout;
2361162536Sdavidxu	int error;
2362162536Sdavidxu
2363162536Sdavidxu	if (uap->uaddr2 == NULL)
2364162536Sdavidxu		ts = NULL;
2365162536Sdavidxu	else {
2366162536Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
2367162536Sdavidxu		if (error != 0)
2368162536Sdavidxu			return (error);
2369162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2370162536Sdavidxu		    timeout.tv_nsec < 0)
2371162536Sdavidxu			return (EINVAL);
2372162536Sdavidxu		ts = &timeout;
2373162536Sdavidxu	}
2374162536Sdavidxu	return do_wait(td, uap->obj, uap->val, ts, 1);
2375162536Sdavidxu}
2376162536Sdavidxu
2377162536Sdavidxustatic int
2378162536Sdavidxu__umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
2379162536Sdavidxu{
2380162536Sdavidxu	struct timespec *ts, timeout;
2381162536Sdavidxu	int error;
2382162536Sdavidxu
2383162536Sdavidxu	/* Allow a null timespec (wait forever). */
2384162536Sdavidxu	if (uap->uaddr2 == NULL)
2385162536Sdavidxu		ts = NULL;
2386162536Sdavidxu	else {
2387162536Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
2388162536Sdavidxu		if (error != 0)
2389162536Sdavidxu			return (error);
2390162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2391162536Sdavidxu		    timeout.tv_nsec < 0)
2392162536Sdavidxu			return (EINVAL);
2393162536Sdavidxu		ts = &timeout;
2394162536Sdavidxu	}
2395162536Sdavidxu	return do_lock_umutex(td, uap->obj, ts, 0);
2396162536Sdavidxu}
2397162536Sdavidxu
2398162536Sdavidxustatic _umtx_op_func op_table_compat32[] = {
2399162536Sdavidxu	__umtx_op_lock_umtx_compat32,	/* UMTX_OP_LOCK */
2400162536Sdavidxu	__umtx_op_unlock_umtx_compat32,	/* UMTX_OP_UNLOCK */
2401162536Sdavidxu	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT */
2402162536Sdavidxu	__umtx_op_wake,			/* UMTX_OP_WAKE */
2403162550Sdavidxu	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_LOCK */
2404162536Sdavidxu	__umtx_op_lock_umutex_compat32,	/* UMTX_OP_MUTEX_TRYLOCK */
2405162536Sdavidxu	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK	*/
2406162536Sdavidxu	__umtx_op_set_ceiling		/* UMTX_OP_SET_CEILING */
2407162536Sdavidxu};
2408162536Sdavidxu
2409162536Sdavidxuint
2410162536Sdavidxufreebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
2411162536Sdavidxu{
2412163678Sdavidxu	if ((unsigned)uap->op < UMTX_OP_MAX)
2413162536Sdavidxu		return (*op_table_compat32[uap->op])(td,
2414162536Sdavidxu			(struct _umtx_op_args *)uap);
2415162536Sdavidxu	return (EINVAL);
2416162536Sdavidxu}
2417162536Sdavidxu#endif
2418162536Sdavidxu
2419161678Sdavidxuvoid
2420161678Sdavidxuumtx_thread_init(struct thread *td)
2421161678Sdavidxu{
2422161678Sdavidxu	td->td_umtxq = umtxq_alloc();
2423161678Sdavidxu	td->td_umtxq->uq_thread = td;
2424161678Sdavidxu}
2425161678Sdavidxu
2426161678Sdavidxuvoid
2427161678Sdavidxuumtx_thread_fini(struct thread *td)
2428161678Sdavidxu{
2429161678Sdavidxu	umtxq_free(td->td_umtxq);
2430161678Sdavidxu}
2431161678Sdavidxu
2432161678Sdavidxu/*
2433161678Sdavidxu * It will be called when new thread is created, e.g fork().
2434161678Sdavidxu */
2435161678Sdavidxuvoid
2436161678Sdavidxuumtx_thread_alloc(struct thread *td)
2437161678Sdavidxu{
2438161678Sdavidxu	struct umtx_q *uq;
2439161678Sdavidxu
2440161678Sdavidxu	uq = td->td_umtxq;
2441161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
2442161678Sdavidxu
2443161678Sdavidxu	KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
2444161678Sdavidxu	KASSERT(uq->uq_thread == td, ("uq_thread != td"));
2445161678Sdavidxu	KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
2446161678Sdavidxu	KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
2447161678Sdavidxu}
2448161678Sdavidxu
2449161678Sdavidxu/*
2450161678Sdavidxu * exec() hook.
2451161678Sdavidxu */
2452161678Sdavidxustatic void
2453161678Sdavidxuumtx_exec_hook(void *arg __unused, struct proc *p __unused,
2454161678Sdavidxu	struct image_params *imgp __unused)
2455161678Sdavidxu{
2456161678Sdavidxu	umtx_thread_cleanup(curthread);
2457161678Sdavidxu}
2458161678Sdavidxu
2459161678Sdavidxu/*
2460161678Sdavidxu * thread_exit() hook.
2461161678Sdavidxu */
2462161678Sdavidxuvoid
2463161678Sdavidxuumtx_thread_exit(struct thread *td)
2464161678Sdavidxu{
2465161678Sdavidxu	umtx_thread_cleanup(td);
2466161678Sdavidxu}
2467161678Sdavidxu
2468161678Sdavidxu/*
2469161678Sdavidxu * clean up umtx data.
2470161678Sdavidxu */
2471161678Sdavidxustatic void
2472161678Sdavidxuumtx_thread_cleanup(struct thread *td)
2473161678Sdavidxu{
2474161678Sdavidxu	struct umtx_q *uq;
2475161678Sdavidxu	struct umtx_pi *pi;
2476161678Sdavidxu
2477161678Sdavidxu	if ((uq = td->td_umtxq) == NULL)
2478161678Sdavidxu		return;
2479161678Sdavidxu
2480161678Sdavidxu	mtx_lock_spin(&sched_lock);
2481161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
2482161678Sdavidxu	while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
2483161678Sdavidxu		pi->pi_owner = NULL;
2484161678Sdavidxu		TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
2485161678Sdavidxu	}
2486161678Sdavidxu	td->td_flags &= ~TDF_UBORROWING;
2487161678Sdavidxu	mtx_unlock_spin(&sched_lock);
2488161678Sdavidxu}
2489