kern_umtx.c revision 139751
1189251Ssam/*
2189251Ssam * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3189251Ssam * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4189251Ssam * All rights reserved.
5189251Ssam *
6189251Ssam * Redistribution and use in source and binary forms, with or without
7189251Ssam * modification, are permitted provided that the following conditions
8189251Ssam * are met:
9189251Ssam * 1. Redistributions of source code must retain the above copyright
10189251Ssam *    notice unmodified, this list of conditions, and the following
11189251Ssam *    disclaimer.
12189251Ssam * 2. Redistributions in binary form must reproduce the above copyright
13189251Ssam *    notice, this list of conditions and the following disclaimer in the
14189251Ssam *    documentation and/or other materials provided with the distribution.
15189251Ssam *
16189251Ssam * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17189251Ssam * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18189251Ssam * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19189251Ssam * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20189251Ssam * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21189251Ssam * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22189251Ssam * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23189251Ssam * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24189251Ssam * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25189251Ssam * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26189251Ssam */
27189251Ssam
28189251Ssam#include <sys/cdefs.h>
29189251Ssam__FBSDID("$FreeBSD: head/sys/kern/kern_umtx.c 139751 2005-01-06 02:08:34Z davidxu $");
30189251Ssam
31189251Ssam#include <sys/param.h>
32189251Ssam#include <sys/kernel.h>
33189251Ssam#include <sys/limits.h>
34189251Ssam#include <sys/lock.h>
35189251Ssam#include <sys/malloc.h>
36189251Ssam#include <sys/mutex.h>
37189251Ssam#include <sys/proc.h>
38189251Ssam#include <sys/sysent.h>
39189251Ssam#include <sys/systm.h>
40189251Ssam#include <sys/sysproto.h>
41189251Ssam#include <sys/eventhandler.h>
42189251Ssam#include <sys/thr.h>
43189251Ssam#include <sys/umtx.h>
44189251Ssam
45189251Ssam#include <vm/vm.h>
46189251Ssam#include <vm/vm_param.h>
47189251Ssam#include <vm/pmap.h>
48189251Ssam#include <vm/vm_map.h>
49189251Ssam#include <vm/vm_object.h>
50189251Ssam
51189251Ssam#define UMTX_PRIVATE	0
52189251Ssam#define UMTX_SHARED	1
53189251Ssam
54189251Ssam#define UMTX_STATIC_SHARED
55189251Ssam
56189251Ssamstruct umtx_key {
57189251Ssam	int	type;
58189251Ssam	union {
59189251Ssam		struct {
60189251Ssam			vm_object_t	object;
61189251Ssam			long		offset;
62189251Ssam		} shared;
63189251Ssam		struct {
64189251Ssam			struct umtx	*umtx;
65189251Ssam			long		pid;
66189251Ssam		} private;
67189251Ssam		struct {
68189251Ssam			void		*ptr;
69189251Ssam			long		word;
70189251Ssam		} both;
71189251Ssam	} info;
72189251Ssam};
73189251Ssam
74189251Ssamstruct umtx_q {
75189251Ssam	LIST_ENTRY(umtx_q)	uq_next;	/* Linked list for the hash. */
76189251Ssam	struct umtx_key		uq_key;		/* Umtx key. */
77189251Ssam	struct thread		*uq_thread;	/* The thread waits on. */
78189251Ssam	LIST_ENTRY(umtx_q)	uq_rqnext;	/* Linked list for requeuing. */
79189251Ssam	vm_offset_t		uq_addr;	/* Umtx's virtual address. */
80189251Ssam};
81189251Ssam
82189251SsamLIST_HEAD(umtx_head, umtx_q);
83189251Ssamstruct umtxq_chain {
84189251Ssam	struct mtx		uc_lock;	/* Lock for this chain. */
85189251Ssam	struct umtx_head	uc_queue;	/* List of sleep queues. */
86189251Ssam#define	UCF_BUSY		0x01
87189251Ssam#define	UCF_WANT		0x02
88189251Ssam	int			uc_flags;
89189251Ssam};
90189251Ssam
91189251Ssam#define	GOLDEN_RATIO_PRIME	2654404609U
92189251Ssam#define	UMTX_CHAINS		128
93189251Ssam#define	UMTX_SHIFTS		(__WORD_BIT - 7)
94189251Ssam
95189251Ssamstatic struct umtxq_chain umtxq_chains[UMTX_CHAINS];
96189251Ssamstatic MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
97189251Ssam
98189251Ssamstatic void umtxq_init_chains(void *);
99189251Ssamstatic int umtxq_hash(struct umtx_key *key);
100189251Ssamstatic struct mtx *umtxq_mtx(int chain);
101189251Ssamstatic void umtxq_lock(struct umtx_key *key);
102189251Ssamstatic void umtxq_unlock(struct umtx_key *key);
103189251Ssamstatic void umtxq_busy(struct umtx_key *key);
104189251Ssamstatic void umtxq_unbusy(struct umtx_key *key);
105189251Ssamstatic void umtxq_insert(struct umtx_q *uq);
106189251Ssamstatic void umtxq_remove(struct umtx_q *uq);
107189251Ssamstatic int umtxq_sleep(struct thread *td, struct umtx_key *key,
108189251Ssam	int prio, const char *wmesg, int timo);
109189251Ssamstatic int umtxq_count(struct umtx_key *key);
110189251Ssamstatic int umtxq_signal(struct umtx_key *key, int nr_wakeup);
111189251Ssam#ifdef UMTX_DYNAMIC_SHARED
112189251Ssamstatic void fork_handler(void *arg, struct proc *p1, struct proc *p2,
113189251Ssam	int flags);
114189251Ssam#endif
115189251Ssamstatic int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2);
116189251Ssamstatic int umtx_key_get(struct thread *td, struct umtx *umtx,
117189251Ssam	struct umtx_key *key);
118189251Ssamstatic void umtx_key_release(struct umtx_key *key);
119189251Ssam
120189251SsamSYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_init_chains, NULL);
121189251Ssam
122189251Ssamstatic void
123189251Ssamumtxq_init_chains(void *arg __unused)
124189251Ssam{
125189251Ssam	int i;
126189251Ssam
127189251Ssam	for (i = 0; i < UMTX_CHAINS; ++i) {
128189251Ssam		mtx_init(&umtxq_chains[i].uc_lock, "umtxq_lock", NULL,
129189251Ssam			 MTX_DEF | MTX_DUPOK);
130189251Ssam		LIST_INIT(&umtxq_chains[i].uc_queue);
131189251Ssam		umtxq_chains[i].uc_flags = 0;
132189251Ssam	}
133189251Ssam#ifdef UMTX_DYNAMIC_SHARED
134189251Ssam	EVENTHANDLER_REGISTER(process_fork, fork_handler, 0, 10000);
135189251Ssam#endif
136189251Ssam}
137189251Ssam
138189251Ssamstatic inline int
139189251Ssamumtxq_hash(struct umtx_key *key)
140189251Ssam{
141189251Ssam	unsigned n = (uintptr_t)key->info.both.ptr + key->info.both.word;
142189251Ssam	return (((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS);
143189251Ssam}
144189251Ssam
145189251Ssamstatic inline int
146189251Ssamumtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2)
147189251Ssam{
148189251Ssam	return (k1->type == k2->type &&
149189251Ssam		k1->info.both.ptr == k2->info.both.ptr &&
150189251Ssam	        k1->info.both.word == k2->info.both.word);
151189251Ssam}
152189251Ssam
153189251Ssamstatic inline struct mtx *
154189251Ssamumtxq_mtx(int chain)
155189251Ssam{
156189251Ssam	return (&umtxq_chains[chain].uc_lock);
157189251Ssam}
158189251Ssam
159189251Ssamstatic inline void
160189251Ssamumtxq_busy(struct umtx_key *key)
161189251Ssam{
162189251Ssam	int chain = umtxq_hash(key);
163189251Ssam
164189251Ssam	mtx_assert(umtxq_mtx(chain), MA_OWNED);
165189251Ssam	while (umtxq_chains[chain].uc_flags & UCF_BUSY) {
166189251Ssam		umtxq_chains[chain].uc_flags |= UCF_WANT;
167189251Ssam		msleep(&umtxq_chains[chain], umtxq_mtx(chain),
168189251Ssam		       curthread->td_priority, "umtxq_busy", 0);
169189251Ssam	}
170189251Ssam	umtxq_chains[chain].uc_flags |= UCF_BUSY;
171189251Ssam}
172189251Ssam
173189251Ssamstatic inline void
174189251Ssamumtxq_unbusy(struct umtx_key *key)
175189251Ssam{
176189251Ssam	int chain = umtxq_hash(key);
177189251Ssam
178189251Ssam	mtx_assert(umtxq_mtx(chain), MA_OWNED);
179189251Ssam	KASSERT(umtxq_chains[chain].uc_flags & UCF_BUSY, ("not busy"));
180189251Ssam	umtxq_chains[chain].uc_flags &= ~UCF_BUSY;
181189251Ssam	if (umtxq_chains[chain].uc_flags & UCF_WANT) {
182189251Ssam		umtxq_chains[chain].uc_flags &= ~UCF_WANT;
183189251Ssam		wakeup(&umtxq_chains[chain]);
184189251Ssam	}
185189251Ssam}
186189251Ssam
187189251Ssamstatic inline void
188189251Ssamumtxq_lock(struct umtx_key *key)
189189251Ssam{
190189251Ssam	int chain = umtxq_hash(key);
191189251Ssam	mtx_lock(umtxq_mtx(chain));
192189251Ssam}
193189251Ssam
194189251Ssamstatic inline void
195189251Ssamumtxq_unlock(struct umtx_key *key)
196189251Ssam{
197189251Ssam	int chain = umtxq_hash(key);
198189251Ssam	mtx_unlock(umtxq_mtx(chain));
199189251Ssam}
200189251Ssam
201189251Ssam/*
202189251Ssam * Insert a thread onto the umtx queue.
203189251Ssam */
204189251Ssamstatic inline void
205189251Ssamumtxq_insert(struct umtx_q *uq)
206189251Ssam{
207189251Ssam	struct umtx_head *head;
208189251Ssam	int chain = umtxq_hash(&uq->uq_key);
209189251Ssam
210189251Ssam	mtx_assert(umtxq_mtx(chain), MA_OWNED);
211189251Ssam	head = &umtxq_chains[chain].uc_queue;
212189251Ssam	LIST_INSERT_HEAD(head, uq, uq_next);
213189251Ssam	uq->uq_thread->td_umtxq = uq;
214189251Ssam	mtx_lock_spin(&sched_lock);
215189251Ssam	uq->uq_thread->td_flags |= TDF_UMTXQ;
216189251Ssam	mtx_unlock_spin(&sched_lock);
217189251Ssam}
218189251Ssam
219189251Ssam/*
220189251Ssam * Remove thread from the umtx queue.
221189251Ssam */
222189251Ssamstatic inline void
223189251Ssamumtxq_remove(struct umtx_q *uq)
224189251Ssam{
225189251Ssam	mtx_assert(umtxq_mtx(umtxq_hash(&uq->uq_key)), MA_OWNED);
226189251Ssam	if (uq->uq_thread->td_flags & TDF_UMTXQ) {
227189251Ssam		LIST_REMOVE(uq, uq_next);
228189251Ssam		uq->uq_thread->td_umtxq = NULL;
229189251Ssam		/* turning off TDF_UMTXQ should be the last thing. */
230189251Ssam		mtx_lock_spin(&sched_lock);
231189251Ssam		uq->uq_thread->td_flags &= ~TDF_UMTXQ;
232189251Ssam		mtx_unlock_spin(&sched_lock);
233189251Ssam	}
234189251Ssam}
235189251Ssam
236189251Ssamstatic int
237189251Ssamumtxq_count(struct umtx_key *key)
238189251Ssam{
239189251Ssam	struct umtx_q *uq;
240189251Ssam	struct umtx_head *head;
241189251Ssam	int chain, count = 0;
242189251Ssam
243189251Ssam	chain = umtxq_hash(key);
244189251Ssam	mtx_assert(umtxq_mtx(chain), MA_OWNED);
245189251Ssam	head = &umtxq_chains[chain].uc_queue;
246189251Ssam	LIST_FOREACH(uq, head, uq_next) {
247189251Ssam		if (umtx_key_match(&uq->uq_key, key)) {
248189251Ssam			if (++count > 1)
249189251Ssam				break;
250189251Ssam		}
251189251Ssam	}
252189251Ssam	return (count);
253189251Ssam}
254189251Ssam
255189251Ssamstatic int
256189251Ssamumtxq_signal(struct umtx_key *key, int n_wake)
257189251Ssam{
258189251Ssam	struct umtx_q *uq, *next;
259189251Ssam	struct umtx_head *head;
260189251Ssam	struct thread *blocked = NULL;
261189251Ssam	int chain, ret;
262189251Ssam
263189251Ssam	ret = 0;
264189251Ssam	chain = umtxq_hash(key);
265189251Ssam	mtx_assert(umtxq_mtx(chain), MA_OWNED);
266189251Ssam	head = &umtxq_chains[chain].uc_queue;
267189251Ssam	for (uq = LIST_FIRST(head); uq; uq = next) {
268189251Ssam		next = LIST_NEXT(uq, uq_next);
269189251Ssam		if (umtx_key_match(&uq->uq_key, key)) {
270189251Ssam			blocked = uq->uq_thread;
271189251Ssam			umtxq_remove(uq);
272189251Ssam			wakeup(blocked);
273189251Ssam			if (++ret >= n_wake)
274189251Ssam				break;
275189251Ssam		}
276189251Ssam	}
277189251Ssam	return (ret);
278189251Ssam}
279189251Ssam
280189251Ssamstatic inline int
281189251Ssamumtxq_sleep(struct thread *td, struct umtx_key *key, int priority,
282189251Ssam	    const char *wmesg, int timo)
283189251Ssam{
284189251Ssam	int chain = umtxq_hash(key);
285189251Ssam	int error = msleep(td, umtxq_mtx(chain), priority, wmesg, timo);
286189251Ssam	if (error == EWOULDBLOCK)
287189251Ssam		error = ETIMEDOUT;
288189251Ssam	return (error);
289189251Ssam}
290189251Ssam
291189251Ssamstatic int
292189251Ssamumtx_key_get(struct thread *td, struct umtx *umtx, struct umtx_key *key)
293189251Ssam{
294189251Ssam#if defined(UMTX_DYNAMIC_SHARED) || defined(UMTX_STATIC_SHARED)
295189251Ssam	vm_map_t map;
296189251Ssam	vm_map_entry_t entry;
297189251Ssam	vm_pindex_t pindex;
298189251Ssam	vm_prot_t prot;
299189251Ssam	boolean_t wired;
300189251Ssam
301189251Ssam	map = &td->td_proc->p_vmspace->vm_map;
302189251Ssam	if (vm_map_lookup(&map, (vm_offset_t)umtx, VM_PROT_WRITE,
303189251Ssam	    &entry, &key->info.shared.object, &pindex, &prot,
304189251Ssam	    &wired) != KERN_SUCCESS) {
305189251Ssam		return EFAULT;
306189251Ssam	}
307189251Ssam#endif
308189251Ssam
309189251Ssam#if defined(UMTX_DYNAMIC_SHARED)
310189251Ssam	key->type = UMTX_SHARED;
311189251Ssam	key->info.shared.offset = entry->offset + entry->start -
312189251Ssam		(vm_offset_t)umtx;
313189251Ssam	/*
314189251Ssam	 * Add object reference, if we don't do this, a buggy application
315189251Ssam	 * deallocates the object, the object will be reused by other
316189251Ssam	 * applications, then unlock will wake wrong thread.
317189251Ssam	 */
318189251Ssam	vm_object_reference(key->info.shared.object);
319189251Ssam	vm_map_lookup_done(map, entry);
320189251Ssam#elif defined(UMTX_STATIC_SHARED)
321189251Ssam	if (VM_INHERIT_SHARE == entry->inheritance) {
322189251Ssam		key->type = UMTX_SHARED;
323189251Ssam		key->info.shared.offset = entry->offset + entry->start -
324189251Ssam			(vm_offset_t)umtx;
325189251Ssam		vm_object_reference(key->info.shared.object);
326189251Ssam	} else {
327189251Ssam		key->type = UMTX_PRIVATE;
328189251Ssam		key->info.private.umtx = umtx;
329189251Ssam		key->info.private.pid  = td->td_proc->p_pid;
330189251Ssam	}
331189251Ssam	vm_map_lookup_done(map, entry);
332189251Ssam#else
333189251Ssam	key->type = UMTX_PRIVATE;
334189251Ssam	key->info.private.umtx = umtx;
335189251Ssam	key->info.private.pid  = td->td_proc->p_pid;
336189251Ssam#endif
337189251Ssam	return (0);
338189251Ssam}
339189251Ssam
340189251Ssamstatic inline void
341189251Ssamumtx_key_release(struct umtx_key *key)
342189251Ssam{
343189251Ssam	if (key->type == UMTX_SHARED)
344189251Ssam		vm_object_deallocate(key->info.shared.object);
345189251Ssam}
346189251Ssam
347189251Ssamstatic inline int
348189251Ssamumtxq_queue_me(struct thread *td, struct umtx *umtx, struct umtx_q *uq)
349189251Ssam{
350189251Ssam	int error;
351189251Ssam
352189251Ssam	if ((error = umtx_key_get(td, umtx, &uq->uq_key)) != 0)
353189251Ssam		return (error);
354189251Ssam
355189251Ssam	uq->uq_addr = (vm_offset_t)umtx;
356189251Ssam	uq->uq_thread = td;
357189251Ssam	umtxq_lock(&uq->uq_key);
358189251Ssam	/* hmm, for condition variable, we don't need busy flag. */
359189251Ssam	umtxq_busy(&uq->uq_key);
360189251Ssam	umtxq_insert(uq);
361189251Ssam	umtxq_unbusy(&uq->uq_key);
362189251Ssam	umtxq_unlock(&uq->uq_key);
363189251Ssam	return (0);
364189251Ssam}
365189251Ssam
366189251Ssam#if defined(UMTX_DYNAMIC_SHARED)
367189251Ssamstatic void
368189251Ssamfork_handler(void *arg, struct proc *p1, struct proc *p2, int flags)
369189251Ssam{
370189251Ssam	vm_map_t map;
371189251Ssam	vm_map_entry_t entry;
372189251Ssam	vm_object_t object;
373189251Ssam	vm_pindex_t pindex;
374189251Ssam	vm_prot_t prot;
375189251Ssam	boolean_t wired;
376189251Ssam	struct umtx_key key;
377189251Ssam	LIST_HEAD(, umtx_q) workq;
378189251Ssam	struct umtx_q *uq;
379189251Ssam	struct thread *td;
380189251Ssam	int onq;
381189251Ssam
382189251Ssam	LIST_INIT(&workq);
383189251Ssam
384189251Ssam	/* Collect threads waiting on umtxq */
385189251Ssam	PROC_LOCK(p1);
386189251Ssam	FOREACH_THREAD_IN_PROC(p1, td) {
387189251Ssam		if (td->td_flags & TDF_UMTXQ) {
388189251Ssam			uq = td->td_umtxq;
389189251Ssam			if (uq)
390189251Ssam				LIST_INSERT_HEAD(&workq, uq, uq_rqnext);
391189251Ssam		}
392189251Ssam	}
393189251Ssam	PROC_UNLOCK(p1);
394189251Ssam
395189251Ssam	LIST_FOREACH(uq, &workq, uq_rqnext) {
396189251Ssam		map = &p1->p_vmspace->vm_map;
397189251Ssam		if (vm_map_lookup(&map, uq->uq_addr, VM_PROT_WRITE,
398189251Ssam		    &entry, &object, &pindex, &prot, &wired) != KERN_SUCCESS) {
399189251Ssam			continue;
400189251Ssam		}
401189251Ssam		key.type = UMTX_SHARED;
402189251Ssam		key.info.shared.object = object;
403189251Ssam		key.info.shared.offset = entry->offset + entry->start -
404189251Ssam			uq->uq_addr;
405189251Ssam		if (umtx_key_match(&key, &uq->uq_key)) {
406189251Ssam			vm_map_lookup_done(map, entry);
407189251Ssam			continue;
408189251Ssam		}
409189251Ssam
410189251Ssam		umtxq_lock(&uq->uq_key);
411189251Ssam		umtxq_busy(&uq->uq_key);
412189251Ssam		if (uq->uq_thread->td_flags & TDF_UMTXQ) {
413189251Ssam			umtxq_remove(uq);
414189251Ssam			onq = 1;
415189251Ssam		} else
416189251Ssam			onq = 0;
417189251Ssam		umtxq_unbusy(&uq->uq_key);
418189251Ssam		umtxq_unlock(&uq->uq_key);
419189251Ssam		if (onq) {
420189251Ssam			vm_object_deallocate(uq->uq_key.info.shared.object);
421189251Ssam			uq->uq_key = key;
422189251Ssam			umtxq_lock(&uq->uq_key);
423189251Ssam			umtxq_busy(&uq->uq_key);
424189251Ssam			umtxq_insert(uq);
425189251Ssam			umtxq_unbusy(&uq->uq_key);
426189251Ssam			umtxq_unlock(&uq->uq_key);
427189251Ssam			vm_object_reference(uq->uq_key.info.shared.object);
428189251Ssam		}
429189251Ssam		vm_map_lookup_done(map, entry);
430189251Ssam	}
431189251Ssam}
432189251Ssam#endif
433189251Ssam
434189251Ssamstatic int
435189251Ssam_do_lock(struct thread *td, struct umtx *umtx, long id, int timo)
436189251Ssam{
437189251Ssam	struct umtx_q uq;
438189251Ssam	intptr_t owner;
439189251Ssam	intptr_t old;
440189251Ssam	int error = 0;
441189251Ssam
442189251Ssam	/*
443189251Ssam	 * Care must be exercised when dealing with umtx structure.  It
444189251Ssam	 * can fault on any access.
445189251Ssam	 */
446189251Ssam
447189251Ssam	for (;;) {
448189251Ssam		/*
449189251Ssam		 * Try the uncontested case.  This should be done in userland.
450189251Ssam		 */
451189251Ssam		owner = casuptr((intptr_t *)&umtx->u_owner,
452189251Ssam		    UMTX_UNOWNED, id);
453189251Ssam
454189251Ssam		/* The acquire succeeded. */
455189251Ssam		if (owner == UMTX_UNOWNED)
456189251Ssam			return (0);
457189251Ssam
458189251Ssam		/* The address was invalid. */
459189251Ssam		if (owner == -1)
460189251Ssam			return (EFAULT);
461189251Ssam
462189251Ssam		/* If no one owns it but it is contested try to acquire it. */
463189251Ssam		if (owner == UMTX_CONTESTED) {
464189251Ssam			owner = casuptr((intptr_t *)&umtx->u_owner,
465189251Ssam			    UMTX_CONTESTED, id | UMTX_CONTESTED);
466189251Ssam
467189251Ssam			if (owner == UMTX_CONTESTED)
468189251Ssam				return (0);
469189251Ssam
470189251Ssam			/* The address was invalid. */
471189251Ssam			if (owner == -1)
472189251Ssam				return (EFAULT);
473189251Ssam
474189251Ssam			/* If this failed the lock has changed, restart. */
475189251Ssam			continue;
476189251Ssam		}
477189251Ssam
478189251Ssam		/*
479189251Ssam		 * If we caught a signal, we have retried and now
480189251Ssam		 * exit immediately.
481189251Ssam		 */
482189251Ssam		if (error || (error = umtxq_queue_me(td, umtx, &uq)) != 0)
483189251Ssam			return (error);
484189251Ssam
485189251Ssam		/*
486189251Ssam		 * Set the contested bit so that a release in user space
487189251Ssam		 * knows to use the system call for unlock.  If this fails
488189251Ssam		 * either some one else has acquired the lock or it has been
489189251Ssam		 * released.
490189251Ssam		 */
491189251Ssam		old = casuptr((intptr_t *)&umtx->u_owner, owner,
492189251Ssam		    owner | UMTX_CONTESTED);
493189251Ssam
494189251Ssam		/* The address was invalid. */
495189251Ssam		if (old == -1) {
496189251Ssam			umtxq_lock(&uq.uq_key);
497189251Ssam			umtxq_busy(&uq.uq_key);
498189251Ssam			umtxq_remove(&uq);
499189251Ssam			umtxq_unbusy(&uq.uq_key);
500189251Ssam			umtxq_unlock(&uq.uq_key);
501189251Ssam			umtx_key_release(&uq.uq_key);
502189251Ssam			return (EFAULT);
503189251Ssam		}
504189251Ssam
505189251Ssam		/*
506189251Ssam		 * We set the contested bit, sleep. Otherwise the lock changed
507189251Ssam		 * and we need to retry or we lost a race to the thread
508189251Ssam		 * unlocking the umtx.
509189251Ssam		 */
510189251Ssam		umtxq_lock(&uq.uq_key);
511189251Ssam		if (old == owner && (td->td_flags & TDF_UMTXQ)) {
512189251Ssam			error = umtxq_sleep(td, &uq.uq_key,
513189251Ssam				       td->td_priority | PCATCH,
514189251Ssam				       "umtx", timo);
515189251Ssam		}
516189251Ssam		umtxq_busy(&uq.uq_key);
517189251Ssam		umtxq_remove(&uq);
518189251Ssam		umtxq_unbusy(&uq.uq_key);
519189251Ssam		umtxq_unlock(&uq.uq_key);
520189251Ssam		umtx_key_release(&uq.uq_key);
521189251Ssam	}
522189251Ssam
523189251Ssam	return (0);
524189251Ssam}
525189251Ssam
526189251Ssamstatic int
527189251Ssamdo_lock(struct thread *td, struct umtx *umtx, long id,
528189251Ssam	struct timespec *abstime)
529189251Ssam{
530189251Ssam	struct timespec ts1, ts2;
531189251Ssam	struct timeval tv;
532189251Ssam	int timo, error;
533189251Ssam
534189251Ssam	if (abstime == NULL) {
535189251Ssam		error = _do_lock(td, umtx, id, 0);
536189251Ssam	} else {
537189251Ssam		for (;;) {
538189251Ssam			ts1 = *abstime;
539189251Ssam			getnanotime(&ts2);
540189251Ssam			timespecsub(&ts1, &ts2);
541189251Ssam			TIMESPEC_TO_TIMEVAL(&tv, &ts1);
542189251Ssam			if (tv.tv_sec < 0) {
543189251Ssam				error = ETIMEDOUT;
544189251Ssam				break;
545189251Ssam			}
546189251Ssam			timo = tvtohz(&tv);
547189251Ssam			error = _do_lock(td, umtx, id, timo);
548189251Ssam			if (error != ETIMEDOUT)
549189251Ssam				break;
550189251Ssam		}
551189251Ssam	}
552189251Ssam	/*
553189251Ssam	 * This lets userland back off critical region if needed.
554189251Ssam	 */
555189251Ssam	if (error == ERESTART)
556189251Ssam		error = EINTR;
557189251Ssam	return (error);
558189251Ssam}
559189251Ssam
560189251Ssamstatic int
561189251Ssamdo_unlock(struct thread *td, struct umtx *umtx, long id)
562189251Ssam{
563189251Ssam	struct umtx_key key;
564189251Ssam	intptr_t owner;
565189251Ssam	intptr_t old;
566189251Ssam	int error;
567189251Ssam	int count;
568189251Ssam
569189251Ssam	/*
570189251Ssam	 * Make sure we own this mtx.
571189251Ssam	 *
572189251Ssam	 * XXX Need a {fu,su}ptr this is not correct on arch where
573189251Ssam	 * sizeof(intptr_t) != sizeof(long).
574189251Ssam	 */
575189251Ssam	if ((owner = fuword(&umtx->u_owner)) == -1)
576189251Ssam		return (EFAULT);
577189251Ssam
578189251Ssam	if ((owner & ~UMTX_CONTESTED) != id)
579189251Ssam		return (EPERM);
580189251Ssam
581189251Ssam	/* We should only ever be in here for contested locks */
582189251Ssam	if ((owner & UMTX_CONTESTED) == 0)
583189251Ssam		return (EINVAL);
584189251Ssam
585189251Ssam	if ((error = umtx_key_get(td, umtx, &key)) != 0)
586189251Ssam		return (error);
587189251Ssam
588189251Ssam	umtxq_lock(&key);
589189251Ssam	umtxq_busy(&key);
590189251Ssam	count = umtxq_count(&key);
591189251Ssam	umtxq_unlock(&key);
592189251Ssam
593189251Ssam	/*
594189251Ssam	 * When unlocking the umtx, it must be marked as unowned if
595189251Ssam	 * there is zero or one thread only waiting for it.
596189251Ssam	 * Otherwise, it must be marked as contested.
597189251Ssam	 */
598189251Ssam	old = casuptr((intptr_t *)&umtx->u_owner, owner,
599189251Ssam			count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
600189251Ssam	umtxq_lock(&key);
601189251Ssam	umtxq_signal(&key, 0);
602189251Ssam	umtxq_unbusy(&key);
603189251Ssam	umtxq_unlock(&key);
604189251Ssam	umtx_key_release(&key);
605189251Ssam	if (old == -1)
606189251Ssam		return (EFAULT);
607189251Ssam	if (old != owner)
608189251Ssam		return (EINVAL);
609189251Ssam	return (0);
610189251Ssam}
611189251Ssam
612189251Ssamstatic int
613189251Ssamdo_wait(struct thread *td, struct umtx *umtx, long id, struct timespec *abstime)
614189251Ssam{
615189251Ssam	struct umtx_q uq;
616189251Ssam	struct timespec ts1, ts2;
617189251Ssam	struct timeval tv;
618189251Ssam	long tmp;
619189251Ssam	int timo, error = 0;
620189251Ssam
621189251Ssam	if ((error = umtxq_queue_me(td, umtx, &uq)) != 0)
622189251Ssam		return (error);
623189251Ssam	tmp = fuword(&umtx->u_owner);
624189251Ssam	if (tmp != id) {
625189251Ssam		umtxq_lock(&uq.uq_key);
626189251Ssam		umtxq_remove(&uq);
627189251Ssam		umtxq_unlock(&uq.uq_key);
628189251Ssam	} else if (abstime == NULL) {
629189251Ssam		umtxq_lock(&uq.uq_key);
630189251Ssam		if (td->td_flags & TDF_UMTXQ)
631189251Ssam			error = umtxq_sleep(td, &uq.uq_key,
632189251Ssam			       td->td_priority | PCATCH, "ucond", 0);
633189251Ssam		if (!(td->td_flags & TDF_UMTXQ))
634189251Ssam			error = 0;
635189251Ssam		else
636189251Ssam			umtxq_remove(&uq);
637189251Ssam		umtxq_unlock(&uq.uq_key);
638189251Ssam	} else {
639189251Ssam		for (;;) {
640189251Ssam			ts1 = *abstime;
641189251Ssam			getnanotime(&ts2);
642189251Ssam			timespecsub(&ts1, &ts2);
643189251Ssam			TIMESPEC_TO_TIMEVAL(&tv, &ts1);
644189251Ssam			umtxq_lock(&uq.uq_key);
645189251Ssam			if (tv.tv_sec < 0) {
646189251Ssam				error = ETIMEDOUT;
647189251Ssam				break;
648189251Ssam			}
649189251Ssam			timo = tvtohz(&tv);
650189251Ssam			if (td->td_flags & TDF_UMTXQ)
651189251Ssam				error = umtxq_sleep(td, &uq.uq_key,
652189251Ssam					    td->td_priority | PCATCH,
653189251Ssam					    "ucond", timo);
654189251Ssam			if (!(td->td_flags & TDF_UMTXQ))
655189251Ssam				break;
656189251Ssam			umtxq_unlock(&uq.uq_key);
657189251Ssam		}
658189251Ssam		if (!(td->td_flags & TDF_UMTXQ))
659189251Ssam			error = 0;
660189251Ssam		else
661189251Ssam			umtxq_remove(&uq);
662189251Ssam		umtxq_unlock(&uq.uq_key);
663189251Ssam	}
664189251Ssam	umtx_key_release(&uq.uq_key);
665189251Ssam	if (error == ERESTART)
666189251Ssam		error = EINTR;
667189251Ssam	return (error);
668189251Ssam}
669189251Ssam
670189251Ssamstatic int
671189251Ssamdo_wake(struct thread *td, void *uaddr, int n_wake)
672189251Ssam{
673189251Ssam	struct umtx_key key;
674189251Ssam	int ret;
675189251Ssam
676189251Ssam	if ((ret = umtx_key_get(td, uaddr, &key)) != 0)
677189251Ssam		return (ret);
678189251Ssam	umtxq_lock(&key);
679189251Ssam	ret = umtxq_signal(&key, n_wake);
680189251Ssam	umtxq_unlock(&key);
681189251Ssam	umtx_key_release(&key);
682189251Ssam	td->td_retval[0] = ret;
683189251Ssam	return (0);
684189251Ssam}
685189251Ssam
686189251Ssamint
687189251Ssam_umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
688189251Ssam    /* struct umtx *umtx */
689189251Ssam{
690189251Ssam	return _do_lock(td, uap->umtx, td->td_tid, 0);
691189251Ssam}
692189251Ssam
693189251Ssamint
694189251Ssam_umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
695189251Ssam    /* struct umtx *umtx */
696189251Ssam{
697189251Ssam	return do_unlock(td, uap->umtx, td->td_tid);
698189251Ssam}
699189251Ssam
700189251Ssamint
701189251Ssam_umtx_op(struct thread *td, struct _umtx_op_args *uap)
702189251Ssam{
703189251Ssam	struct timespec abstime;
704189251Ssam	struct timespec *ts;
705189251Ssam	int error;
706189251Ssam
707189251Ssam	switch(uap->op) {
708189251Ssam	case UMTX_OP_LOCK:
709189251Ssam		/* Allow a null timespec (wait forever). */
710189251Ssam		if (uap->uaddr2 == NULL)
711189251Ssam			ts = NULL;
712189251Ssam		else {
713189251Ssam			error = copyin(uap->uaddr2, &abstime, sizeof(abstime));
714189251Ssam			if (error != 0)
715189251Ssam				return (error);
716189251Ssam			if (abstime.tv_nsec >= 1000000000 ||
717189251Ssam			    abstime.tv_nsec < 0)
718189251Ssam				return (EINVAL);
719189251Ssam			ts = &abstime;
720189251Ssam		}
721189251Ssam		return do_lock(td, uap->umtx, uap->id, ts);
722189251Ssam	case UMTX_OP_UNLOCK:
723189251Ssam		return do_unlock(td, uap->umtx, uap->id);
724189251Ssam	case UMTX_OP_WAIT:
725189251Ssam		/* Allow a null timespec (wait forever). */
726189251Ssam		if (uap->uaddr2 == NULL)
727189251Ssam			ts = NULL;
728189251Ssam		else {
729189251Ssam			error = copyin(uap->uaddr2, &abstime, sizeof(abstime));
730189251Ssam			if (error != 0)
731189251Ssam				return (error);
732189251Ssam			if (abstime.tv_nsec >= 1000000000 ||
733189251Ssam			    abstime.tv_nsec < 0)
734189251Ssam				return (EINVAL);
735189251Ssam			ts = &abstime;
736189251Ssam		}
737189251Ssam		return do_wait(td, uap->umtx, uap->id, ts);
738189251Ssam	case UMTX_OP_WAKE:
739189251Ssam		return do_wake(td, uap->umtx, uap->id);
740189251Ssam	default:
741189251Ssam		return (EINVAL);
742189251Ssam	}
743189251Ssam}
744189251Ssam