kern_umtx.c revision 139013
1112904Sjeff/*
2139013Sdavidxu * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3112904Sjeff * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4112904Sjeff * All rights reserved.
5112904Sjeff *
6112904Sjeff * Redistribution and use in source and binary forms, with or without
7112904Sjeff * modification, are permitted provided that the following conditions
8112904Sjeff * are met:
9112904Sjeff * 1. Redistributions of source code must retain the above copyright
10112904Sjeff *    notice unmodified, this list of conditions, and the following
11112904Sjeff *    disclaimer.
12112904Sjeff * 2. Redistributions in binary form must reproduce the above copyright
13112904Sjeff *    notice, this list of conditions and the following disclaimer in the
14112904Sjeff *    documentation and/or other materials provided with the distribution.
15112904Sjeff *
16112904Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17112904Sjeff * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18112904Sjeff * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19112904Sjeff * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20112904Sjeff * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21112904Sjeff * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22112904Sjeff * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23112904Sjeff * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24112904Sjeff * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25112904Sjeff * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26112904Sjeff */
27112904Sjeff
28116182Sobrien#include <sys/cdefs.h>
29116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_umtx.c 139013 2004-12-18 12:52:44Z davidxu $");
30116182Sobrien
31112904Sjeff#include <sys/param.h>
32112904Sjeff#include <sys/kernel.h>
33131431Smarcel#include <sys/limits.h>
34112904Sjeff#include <sys/lock.h>
35115765Sjeff#include <sys/malloc.h>
36112904Sjeff#include <sys/mutex.h>
37112904Sjeff#include <sys/proc.h>
38112904Sjeff#include <sys/sysent.h>
39112904Sjeff#include <sys/systm.h>
40112904Sjeff#include <sys/sysproto.h>
41139013Sdavidxu#include <sys/eventhandler.h>
42112904Sjeff#include <sys/thr.h>
43112904Sjeff#include <sys/umtx.h>
44112904Sjeff
45139013Sdavidxu#include <vm/vm.h>
46139013Sdavidxu#include <vm/vm_param.h>
47139013Sdavidxu#include <vm/pmap.h>
48139013Sdavidxu#include <vm/vm_map.h>
49139013Sdavidxu#include <vm/vm_object.h>
50139013Sdavidxu
51139013Sdavidxu#define UMTX_PRIVATE	0
52139013Sdavidxu#define UMTX_SHARED	1
53139013Sdavidxu
54139013Sdavidxu#define UMTX_STATIC_SHARED
55139013Sdavidxu
56139013Sdavidxustruct umtx_key {
57139013Sdavidxu	int	type;
58139013Sdavidxu	union {
59139013Sdavidxu		struct {
60139013Sdavidxu			vm_object_t	object;
61139013Sdavidxu			long		offset;
62139013Sdavidxu		} shared;
63139013Sdavidxu		struct {
64139013Sdavidxu			struct umtx	*umtx;
65139013Sdavidxu			long		pid;
66139013Sdavidxu		} private;
67139013Sdavidxu		struct {
68139013Sdavidxu			void		*ptr;
69139013Sdavidxu			long		word;
70139013Sdavidxu		} both;
71139013Sdavidxu	} info;
72139013Sdavidxu};
73139013Sdavidxu
74115765Sjeffstruct umtx_q {
75115765Sjeff	LIST_ENTRY(umtx_q)	uq_next;	/* Linked list for the hash. */
76139013Sdavidxu	struct umtx_key		uq_key;		/* Umtx key. */
77139013Sdavidxu	struct thread		*uq_thread;	/* The thread waits on */
78139013Sdavidxu	LIST_ENTRY(umtx_q)	uq_rqnext;	/* Linked list for requeuing. */
79139013Sdavidxu	vm_offset_t		uq_addr;	/* Umtx's virtual address. */
80115765Sjeff};
81115765Sjeff
82115765SjeffLIST_HEAD(umtx_head, umtx_q);
83138224Sdavidxustruct umtxq_chain {
84139013Sdavidxu	struct mtx		uc_lock;	/* Lock for this chain. */
85139013Sdavidxu	struct umtx_head	uc_queue;	/* List of sleep queues. */
86138224Sdavidxu};
87115765Sjeff
88138224Sdavidxu#define	GOLDEN_RATIO_PRIME	2654404609U
89138224Sdavidxu#define	UMTX_CHAINS		128
90138224Sdavidxu#define	UMTX_SHIFTS		(__WORD_BIT - 7)
91115765Sjeff
92138224Sdavidxustatic struct umtxq_chain umtxq_chains[UMTX_CHAINS];
93138224Sdavidxustatic MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
94115310Sjeff
95131431Smarcel#define	UMTX_CONTESTED	LONG_MIN
96115310Sjeff
97139013Sdavidxustatic void umtxq_init_chains(void *);
98139013Sdavidxustatic int umtxq_hash(struct umtx_key *key);
99139013Sdavidxustatic struct mtx *umtxq_mtx(int chain);
100139013Sdavidxustatic void umtxq_lock(struct umtx_key *key);
101139013Sdavidxustatic void umtxq_unlock(struct umtx_key *key);
102139013Sdavidxustatic void umtxq_insert(struct umtx_q *uq);
103139013Sdavidxustatic void umtxq_remove(struct umtx_q *uq);
104139013Sdavidxustatic int umtxq_sleep(struct thread *td, struct umtx_key *key,
105139013Sdavidxu	int prio, const char *wmesg, int timo);
106139013Sdavidxustatic int  umtxq_count(struct umtx_key *key);
107139013Sdavidxustatic void umtxq_signal(struct umtx_key *key);
108139013Sdavidxustatic void umtxq_broadcast(struct umtx_key *key);
109139013Sdavidxu#ifdef UMTX_DYNAMIC_SHARED
110139013Sdavidxustatic void fork_handler(void *arg, struct proc *p1, struct proc *p2,
111139013Sdavidxu	int flags);
112139013Sdavidxu#endif
113139013Sdavidxustatic int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2);
114139013Sdavidxustatic int umtx_key_get(struct thread *td, struct umtx *umtx,
115139013Sdavidxu	struct umtx_key *key);
116139013Sdavidxustatic void umtx_key_release(struct umtx_key *key);
117115310Sjeff
118139013SdavidxuSYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_init_chains, NULL);
119138224Sdavidxu
120138224Sdavidxustatic void
121139013Sdavidxuumtxq_init_chains(void *arg __unused)
122138224Sdavidxu{
123138224Sdavidxu	int i;
124138224Sdavidxu
125138224Sdavidxu	for (i = 0; i < UMTX_CHAINS; ++i) {
126138224Sdavidxu		mtx_init(&umtxq_chains[i].uc_lock, "umtxq_lock", NULL,
127138224Sdavidxu			 MTX_DEF | MTX_DUPOK);
128139013Sdavidxu		LIST_INIT(&umtxq_chains[i].uc_queue);
129138224Sdavidxu	}
130139013Sdavidxu#ifdef UMTX_DYNAMIC_SHARED
131139013Sdavidxu	EVENTHANDLER_REGISTER(process_fork, fork_handler, 0, 10000);
132139013Sdavidxu#endif
133138224Sdavidxu}
134138224Sdavidxu
135138224Sdavidxustatic inline int
136139013Sdavidxuumtxq_hash(struct umtx_key *key)
137138224Sdavidxu{
138139013Sdavidxu	unsigned n = (uintptr_t)key->info.both.ptr + key->info.both.word;
139138224Sdavidxu	return (((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS);
140138224Sdavidxu}
141138224Sdavidxu
142139013Sdavidxustatic inline int
143139013Sdavidxuumtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2)
144139013Sdavidxu{
145139013Sdavidxu	return (k1->type == k2->type &&
146139013Sdavidxu		k1->info.both.ptr == k2->info.both.ptr &&
147139013Sdavidxu	        k1->info.both.word == k2->info.both.word);
148139013Sdavidxu}
149139013Sdavidxu
150139013Sdavidxustatic inline struct mtx *
151139013Sdavidxuumtxq_mtx(int chain)
152139013Sdavidxu{
153139013Sdavidxu	return (&umtxq_chains[chain].uc_lock);
154139013Sdavidxu}
155139013Sdavidxu
156138224Sdavidxustatic inline void
157139013Sdavidxuumtxq_lock(struct umtx_key *key)
158138224Sdavidxu{
159139013Sdavidxu	int chain = umtxq_hash(key);
160139013Sdavidxu	mtx_lock(umtxq_mtx(chain));
161138224Sdavidxu}
162138224Sdavidxu
163138225Sdavidxustatic inline void
164139013Sdavidxuumtxq_unlock(struct umtx_key *key)
165138224Sdavidxu{
166139013Sdavidxu	int chain = umtxq_hash(key);
167139013Sdavidxu	mtx_unlock(umtxq_mtx(chain));
168138224Sdavidxu}
169138224Sdavidxu
170139013Sdavidxu/*
171139013Sdavidxu * Insert a thread onto the umtx queue.
172139013Sdavidxu */
173139013Sdavidxustatic inline void
174139013Sdavidxuumtxq_insert(struct umtx_q *uq)
175115765Sjeff{
176115765Sjeff	struct umtx_head *head;
177139013Sdavidxu	int chain = umtxq_hash(&uq->uq_key);
178139013Sdavidxu
179139013Sdavidxu	head = &umtxq_chains[chain].uc_queue;
180139013Sdavidxu	LIST_INSERT_HEAD(head, uq, uq_next);
181139013Sdavidxu	uq->uq_thread->td_umtxq = uq;
182139013Sdavidxu	mtx_lock_spin(&sched_lock);
183139013Sdavidxu	uq->uq_thread->td_flags |= TDF_UMTXQ;
184139013Sdavidxu	mtx_unlock_spin(&sched_lock);
185139013Sdavidxu}
186139013Sdavidxu
187139013Sdavidxu/*
188139013Sdavidxu * Remove thread from the umtx queue.
189139013Sdavidxu */
190139013Sdavidxustatic inline void
191139013Sdavidxuumtxq_remove(struct umtx_q *uq)
192139013Sdavidxu{
193139013Sdavidxu	if (uq->uq_thread->td_flags & TDF_UMTXQ) {
194139013Sdavidxu		LIST_REMOVE(uq, uq_next);
195139013Sdavidxu		uq->uq_thread->td_umtxq = NULL;
196139013Sdavidxu		/* turning off TDF_UMTXQ should be the last thing. */
197139013Sdavidxu		mtx_lock_spin(&sched_lock);
198139013Sdavidxu		uq->uq_thread->td_flags &= ~TDF_UMTXQ;
199139013Sdavidxu		mtx_unlock_spin(&sched_lock);
200139013Sdavidxu	}
201139013Sdavidxu}
202139013Sdavidxu
203139013Sdavidxustatic int
204139013Sdavidxuumtxq_count(struct umtx_key *key)
205139013Sdavidxu{
206115765Sjeff	struct umtx_q *uq;
207139013Sdavidxu	struct umtx_head *head;
208139013Sdavidxu	int chain, count = 0;
209115765Sjeff
210139013Sdavidxu	chain = umtxq_hash(key);
211139013Sdavidxu	umtxq_lock(key);
212139013Sdavidxu	head = &umtxq_chains[chain].uc_queue;
213115765Sjeff	LIST_FOREACH(uq, head, uq_next) {
214139013Sdavidxu		if (umtx_key_match(&uq->uq_key, key)) {
215139013Sdavidxu			if (++count > 1)
216139013Sdavidxu				break;
217139013Sdavidxu		}
218115765Sjeff	}
219139013Sdavidxu	umtxq_unlock(key);
220139013Sdavidxu	return (count);
221115765Sjeff}
222115765Sjeff
223139013Sdavidxustatic void
224139013Sdavidxuumtxq_signal(struct umtx_key *key)
225115765Sjeff{
226139013Sdavidxu	struct umtx_q *uq;
227115765Sjeff	struct umtx_head *head;
228139013Sdavidxu	struct thread *blocked = NULL;
229138224Sdavidxu	int chain;
230115765Sjeff
231139013Sdavidxu	chain = umtxq_hash(key);
232139013Sdavidxu	umtxq_lock(key);
233139013Sdavidxu	head = &umtxq_chains[chain].uc_queue;
234139013Sdavidxu	LIST_FOREACH(uq, head, uq_next) {
235139013Sdavidxu		if (umtx_key_match(&uq->uq_key, key)) {
236139013Sdavidxu			blocked = uq->uq_thread;
237139013Sdavidxu			umtxq_remove(uq);
238139013Sdavidxu			break;
239138224Sdavidxu		}
240115765Sjeff	}
241139013Sdavidxu	umtxq_unlock(key);
242139013Sdavidxu	if (blocked != NULL)
243139013Sdavidxu		wakeup(blocked);
244115765Sjeff}
245115765Sjeff
246115765Sjeffstatic void
247139013Sdavidxuumtxq_broadcast(struct umtx_key *key)
248115765Sjeff{
249139013Sdavidxu	struct umtx_q *uq, *next;
250139013Sdavidxu	struct umtx_head *head;
251139013Sdavidxu	struct thread *blocked;
252138224Sdavidxu	int chain;
253138224Sdavidxu
254139013Sdavidxu	chain = umtxq_hash(key);
255139013Sdavidxu	umtxq_lock(key);
256139013Sdavidxu	head = &umtxq_chains[chain].uc_queue;
257139013Sdavidxu	for (uq = LIST_FIRST(head); uq != NULL; uq = next) {
258139013Sdavidxu		next = LIST_NEXT(uq, uq_next);
259139013Sdavidxu		if (umtx_key_match(&uq->uq_key, key)) {
260139013Sdavidxu			blocked = uq->uq_thread;
261139013Sdavidxu			umtxq_remove(uq);
262139013Sdavidxu			wakeup(blocked);
263139013Sdavidxu		}
264139013Sdavidxu		uq = next;
265139013Sdavidxu	}
266139013Sdavidxu	umtxq_unlock(key);
267138224Sdavidxu}
268138224Sdavidxu
269138224Sdavidxustatic inline int
270139013Sdavidxuumtxq_sleep(struct thread *td, struct umtx_key *key, int priority,
271139013Sdavidxu	    const char *wmesg, int timo)
272138224Sdavidxu{
273139013Sdavidxu	int error;
274139013Sdavidxu	int chain = umtxq_hash(key);
275138224Sdavidxu
276139013Sdavidxu	error = msleep(td, umtxq_mtx(chain), priority, wmesg, timo);
277139013Sdavidxu	return (error);
278138224Sdavidxu}
279138224Sdavidxu
280139013Sdavidxustatic int
281139013Sdavidxuumtx_key_get(struct thread *td, struct umtx *umtx, struct umtx_key *key)
282139013Sdavidxu{
283139013Sdavidxu#if defined(UMTX_DYNAMIC_SHARED) || defined(UMTX_STATIC_SHARED)
284139013Sdavidxu	vm_map_t map;
285139013Sdavidxu	vm_map_entry_t entry;
286139013Sdavidxu	vm_pindex_t pindex;
287139013Sdavidxu	vm_prot_t prot;
288139013Sdavidxu	boolean_t wired;
289139013Sdavidxu
290139013Sdavidxu	map = &td->td_proc->p_vmspace->vm_map;
291139013Sdavidxu	if (vm_map_lookup(&map, (vm_offset_t)umtx, VM_PROT_WRITE,
292139013Sdavidxu	    &entry, &key->info.shared.object, &pindex, &prot,
293139013Sdavidxu	    &wired) != KERN_SUCCESS) {
294139013Sdavidxu		return EFAULT;
295139013Sdavidxu	}
296139013Sdavidxu#endif
297139013Sdavidxu
298139013Sdavidxu#if defined(UMTX_DYNAMIC_SHARED)
299139013Sdavidxu	key->type = UMTX_SHARED;
300139013Sdavidxu	key->info.shared.offset = entry->offset + entry->start -
301139013Sdavidxu		(vm_offset_t)umtx;
302139013Sdavidxu	/*
303139013Sdavidxu	 * Add object reference, if we don't do this, a buggy application
304139013Sdavidxu	 * deallocates the object, the object will be reused by other
305139013Sdavidxu	 * applications, then unlock will wake wrong thread.
306139013Sdavidxu	 */
307139013Sdavidxu	vm_object_reference(key->info.shared.object);
308139013Sdavidxu	vm_map_lookup_done(map, entry);
309139013Sdavidxu#elif defined(UMTX_STATIC_SHARED)
310139013Sdavidxu	if (VM_INHERIT_SHARE == entry->inheritance) {
311139013Sdavidxu		key->type = UMTX_SHARED;
312139013Sdavidxu		key->info.shared.offset = entry->offset + entry->start -
313139013Sdavidxu			(vm_offset_t)umtx;
314139013Sdavidxu		vm_object_reference(key->info.shared.object);
315139013Sdavidxu	} else {
316139013Sdavidxu		key->type = UMTX_PRIVATE;
317139013Sdavidxu		key->info.private.umtx = umtx;
318139013Sdavidxu		key->info.private.pid  = td->td_proc->p_pid;
319139013Sdavidxu	}
320139013Sdavidxu	vm_map_lookup_done(map, entry);
321139013Sdavidxu#else
322139013Sdavidxu	key->type = UMTX_PRIVATE;
323139013Sdavidxu	key->info.private.umtx = umtx;
324139013Sdavidxu	key->info.private.pid  = td->td_proc->p_pid;
325139013Sdavidxu#endif
326139013Sdavidxu	return (0);
327139013Sdavidxu}
328139013Sdavidxu
329139013Sdavidxustatic inline void
330139013Sdavidxuumtx_key_release(struct umtx_key *key)
331139013Sdavidxu{
332139013Sdavidxu	if (key->type == UMTX_SHARED)
333139013Sdavidxu		vm_object_deallocate(key->info.shared.object);
334139013Sdavidxu}
335139013Sdavidxu
336138224Sdavidxustatic inline int
337139013Sdavidxuumtxq_queue_me(struct thread *td, struct umtx *umtx, struct umtx_q *uq)
338138224Sdavidxu{
339139013Sdavidxu	int error;
340138224Sdavidxu
341139013Sdavidxu	if ((error = umtx_key_get(td, umtx, &uq->uq_key)) != 0)
342139013Sdavidxu		return (error);
343139013Sdavidxu
344139013Sdavidxu	uq->uq_addr = (vm_offset_t)umtx;
345139013Sdavidxu	uq->uq_thread = td;
346139013Sdavidxu	umtxq_lock(&uq->uq_key);
347139013Sdavidxu	umtxq_insert(uq);
348139013Sdavidxu	umtxq_unlock(&uq->uq_key);
349139013Sdavidxu	return (0);
350138224Sdavidxu}
351138224Sdavidxu
352139013Sdavidxu#if defined(UMTX_DYNAMIC_SHARED)
353138224Sdavidxustatic void
354139013Sdavidxufork_handler(void *arg, struct proc *p1, struct proc *p2, int flags)
355138224Sdavidxu{
356139013Sdavidxu	vm_map_t map;
357139013Sdavidxu	vm_map_entry_t entry;
358139013Sdavidxu	vm_object_t object;
359139013Sdavidxu	vm_pindex_t pindex;
360139013Sdavidxu	vm_prot_t prot;
361139013Sdavidxu	boolean_t wired;
362139013Sdavidxu	struct umtx_key key;
363139013Sdavidxu	LIST_HEAD(, umtx_q) workq;
364138224Sdavidxu	struct umtx_q *uq;
365139013Sdavidxu	struct thread *td;
366139013Sdavidxu	int onq;
367138224Sdavidxu
368139013Sdavidxu	LIST_INIT(&workq);
369139013Sdavidxu
370139013Sdavidxu	/* Collect threads waiting on umtxq */
371139013Sdavidxu	PROC_LOCK(p1);
372139013Sdavidxu	FOREACH_THREAD_IN_PROC(p1, td) {
373139013Sdavidxu		if (td->td_flags & TDF_UMTXQ) {
374139013Sdavidxu			uq = td->td_umtxq;
375139013Sdavidxu			if (uq)
376139013Sdavidxu				LIST_INSERT_HEAD(&workq, uq, uq_rqnext);
377138224Sdavidxu		}
378115765Sjeff	}
379139013Sdavidxu	PROC_UNLOCK(p1);
380139013Sdavidxu
381139013Sdavidxu	LIST_FOREACH(uq, &workq, uq_rqnext) {
382139013Sdavidxu		map = &p1->p_vmspace->vm_map;
383139013Sdavidxu		if (vm_map_lookup(&map, uq->uq_addr, VM_PROT_WRITE,
384139013Sdavidxu		    &entry, &object, &pindex, &prot, &wired) != KERN_SUCCESS) {
385139013Sdavidxu			continue;
386139013Sdavidxu		}
387139013Sdavidxu		key.type = UMTX_SHARED;
388139013Sdavidxu		key.info.shared.object = object;
389139013Sdavidxu		key.info.shared.offset = entry->offset + entry->start -
390139013Sdavidxu			uq->uq_addr;
391139013Sdavidxu		if (umtx_key_match(&key, &uq->uq_key)) {
392139013Sdavidxu			vm_map_lookup_done(map, entry);
393139013Sdavidxu			continue;
394139013Sdavidxu		}
395139013Sdavidxu
396139013Sdavidxu		umtxq_lock(&uq->uq_key);
397139013Sdavidxu		if (uq->uq_thread->td_flags & TDF_UMTXQ) {
398139013Sdavidxu			umtxq_remove(uq);
399139013Sdavidxu			onq = 1;
400139013Sdavidxu		} else
401139013Sdavidxu			onq = 0;
402139013Sdavidxu		umtxq_unlock(&uq->uq_key);
403139013Sdavidxu		if (onq) {
404139013Sdavidxu			vm_object_deallocate(uq->uq_key.info.shared.object);
405139013Sdavidxu			uq->uq_key = key;
406139013Sdavidxu			umtxq_lock(&uq->uq_key);
407139013Sdavidxu			umtxq_insert(uq);
408139013Sdavidxu			umtxq_unlock(&uq->uq_key);
409139013Sdavidxu			vm_object_reference(uq->uq_key.info.shared.object);
410139013Sdavidxu		}
411139013Sdavidxu		vm_map_lookup_done(map, entry);
412139013Sdavidxu	}
413115765Sjeff}
414139013Sdavidxu#endif
415115765Sjeff
416139013Sdavidxustatic int
417139013Sdavidxu_do_lock(struct thread *td, struct umtx *umtx, long id, int timo)
418112904Sjeff{
419139013Sdavidxu	struct umtx_q uq;
420112904Sjeff	intptr_t owner;
421112967Sjake	intptr_t old;
422138224Sdavidxu	int error = 0;
423112904Sjeff
424112904Sjeff	/*
425139013Sdavidxu	 * Care must be exercised when dealing with umtx structure.  It
426112904Sjeff	 * can fault on any access.
427112904Sjeff	 */
428112904Sjeff
429112904Sjeff	for (;;) {
430112904Sjeff		/*
431112904Sjeff		 * Try the uncontested case.  This should be done in userland.
432112904Sjeff		 */
433112904Sjeff		owner = casuptr((intptr_t *)&umtx->u_owner,
434139013Sdavidxu		    UMTX_UNOWNED, id);
435112904Sjeff
436138224Sdavidxu		/* The acquire succeeded. */
437138224Sdavidxu		if (owner == UMTX_UNOWNED)
438138224Sdavidxu			return (0);
439138224Sdavidxu
440115765Sjeff		/* The address was invalid. */
441115765Sjeff		if (owner == -1)
442115765Sjeff			return (EFAULT);
443115765Sjeff
444115765Sjeff		/* If no one owns it but it is contested try to acquire it. */
445115765Sjeff		if (owner == UMTX_CONTESTED) {
446115765Sjeff			owner = casuptr((intptr_t *)&umtx->u_owner,
447139013Sdavidxu			    UMTX_CONTESTED, id | UMTX_CONTESTED);
448115765Sjeff
449138224Sdavidxu			if (owner == UMTX_CONTESTED)
450138224Sdavidxu				return (0);
451138224Sdavidxu
452115765Sjeff			/* The address was invalid. */
453115765Sjeff			if (owner == -1)
454115765Sjeff				return (EFAULT);
455115765Sjeff
456115765Sjeff			/* If this failed the lock has changed, restart. */
457115765Sjeff			continue;
458112904Sjeff		}
459112904Sjeff
460138224Sdavidxu		/*
461138224Sdavidxu		 * If we caught a signal, we have retried and now
462138224Sdavidxu		 * exit immediately.
463138224Sdavidxu		 */
464139013Sdavidxu		if (error || (error = umtxq_queue_me(td, umtx, &uq)) != 0)
465138224Sdavidxu			return (error);
466112904Sjeff
467112904Sjeff		/*
468112904Sjeff		 * Set the contested bit so that a release in user space
469112904Sjeff		 * knows to use the system call for unlock.  If this fails
470112904Sjeff		 * either some one else has acquired the lock or it has been
471112904Sjeff		 * released.
472112904Sjeff		 */
473112967Sjake		old = casuptr((intptr_t *)&umtx->u_owner, owner,
474112967Sjake		    owner | UMTX_CONTESTED);
475112904Sjeff
476112904Sjeff		/* The address was invalid. */
477112967Sjake		if (old == -1) {
478139013Sdavidxu			umtxq_lock(&uq.uq_key);
479139013Sdavidxu			umtxq_remove(&uq);
480139013Sdavidxu			umtxq_unlock(&uq.uq_key);
481139013Sdavidxu			umtx_key_release(&uq.uq_key);
482115765Sjeff			return (EFAULT);
483112904Sjeff		}
484112904Sjeff
485112904Sjeff		/*
486115765Sjeff		 * We set the contested bit, sleep. Otherwise the lock changed
487117685Smtm		 * and we need to retry or we lost a race to the thread
488117685Smtm		 * unlocking the umtx.
489112904Sjeff		 */
490139013Sdavidxu		umtxq_lock(&uq.uq_key);
491139013Sdavidxu		if (old == owner && (td->td_flags & TDF_UMTXQ)) {
492139013Sdavidxu			error = umtxq_sleep(td, &uq.uq_key,
493139013Sdavidxu				       td->td_priority | PCATCH | PDROP,
494139013Sdavidxu				       "umtx", timo);
495139013Sdavidxu			if (td->td_flags & TDF_UMTXQ) {
496139013Sdavidxu				umtxq_lock(&uq.uq_key);
497139013Sdavidxu				umtxq_remove(&uq);
498139013Sdavidxu				umtxq_unlock(&uq.uq_key);
499139013Sdavidxu			}
500139013Sdavidxu		} else {
501139013Sdavidxu			umtxq_remove(&uq);
502139013Sdavidxu			umtxq_unlock(&uq.uq_key);
503115765Sjeff			error = 0;
504138224Sdavidxu		}
505139013Sdavidxu		umtx_key_release(&uq.uq_key);
506112904Sjeff	}
507117743Smtm
508117743Smtm	return (0);
509112904Sjeff}
510112904Sjeff
511139013Sdavidxustatic int
512139013Sdavidxudo_lock(struct thread *td, struct umtx *umtx, long id,
513139013Sdavidxu	struct timespec *abstime)
514112904Sjeff{
515139013Sdavidxu	struct timespec ts1, ts2;
516139013Sdavidxu	struct timeval tv;
517139013Sdavidxu	int timo, error;
518139013Sdavidxu
519139013Sdavidxu	if (abstime == NULL) {
520139013Sdavidxu		error = _do_lock(td, umtx, id, 0);
521139013Sdavidxu	} else {
522139013Sdavidxu		for (;;) {
523139013Sdavidxu			ts1 = *abstime;
524139013Sdavidxu			getnanotime(&ts2);
525139013Sdavidxu			timespecsub(&ts1, &ts2);
526139013Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts1);
527139013Sdavidxu			if (tv.tv_sec < 0) {
528139013Sdavidxu				error = ETIMEDOUT;
529139013Sdavidxu				break;
530139013Sdavidxu			}
531139013Sdavidxu			timo = tvtohz(&tv);
532139013Sdavidxu			error = _do_lock(td, umtx, id, timo);
533139013Sdavidxu			if (error != ETIMEDOUT) {
534139013Sdavidxu				if (error == ERESTART)
535139013Sdavidxu					error = EINTR;
536139013Sdavidxu				break;
537139013Sdavidxu			}
538139013Sdavidxu		}
539139013Sdavidxu	}
540139013Sdavidxu	return (error);
541139013Sdavidxu}
542139013Sdavidxu
543139013Sdavidxustatic int
544139013Sdavidxudo_unlock(struct thread *td, struct umtx *umtx, long id)
545139013Sdavidxu{
546139013Sdavidxu	struct umtx_key key;
547112904Sjeff	intptr_t owner;
548112967Sjake	intptr_t old;
549139013Sdavidxu	int count, error;
550112904Sjeff
551112904Sjeff	/*
552112904Sjeff	 * Make sure we own this mtx.
553112904Sjeff	 *
554112904Sjeff	 * XXX Need a {fu,su}ptr this is not correct on arch where
555112904Sjeff	 * sizeof(intptr_t) != sizeof(long).
556112904Sjeff	 */
557115765Sjeff	if ((owner = fuword(&umtx->u_owner)) == -1)
558115765Sjeff		return (EFAULT);
559115765Sjeff
560139013Sdavidxu	if ((owner & ~UMTX_CONTESTED) != id)
561115765Sjeff		return (EPERM);
562112904Sjeff
563117685Smtm	/* We should only ever be in here for contested locks */
564119836Stjr	if ((owner & UMTX_CONTESTED) == 0)
565119836Stjr		return (EINVAL);
566112904Sjeff
567117743Smtm	/*
568117743Smtm	 * When unlocking the umtx, it must be marked as unowned if
569117743Smtm	 * there is zero or one thread only waiting for it.
570117743Smtm	 * Otherwise, it must be marked as contested.
571117743Smtm	 */
572138224Sdavidxu	old = casuptr((intptr_t *)&umtx->u_owner, owner, UMTX_UNOWNED);
573115765Sjeff	if (old == -1)
574115765Sjeff		return (EFAULT);
575138224Sdavidxu	if (old != owner)
576138224Sdavidxu		return (EINVAL);
577112904Sjeff
578139013Sdavidxu	if ((error = umtx_key_get(td, umtx, &key)) != 0)
579139013Sdavidxu		return (error);
580139013Sdavidxu
581112904Sjeff	/*
582138224Sdavidxu	 * At the point, a new thread can lock the umtx before we
583138224Sdavidxu	 * reach here, so contested bit will not be set, if there
584138224Sdavidxu	 * are two or more threads on wait queue, we should set
585138224Sdavidxu	 * contensted bit for them.
586112904Sjeff	 */
587139013Sdavidxu	count = umtxq_count(&key);
588139013Sdavidxu	if (count <= 0) {
589139013Sdavidxu		umtx_key_release(&key);
590138224Sdavidxu		return (0);
591139013Sdavidxu	}
592138224Sdavidxu
593138224Sdavidxu	/*
594138224Sdavidxu	 * If there is second thread waiting on umtx, set contested bit,
595138224Sdavidxu	 * if they are resumed before we reach here, it is harmless,
596138224Sdavidxu	 * just a bit unefficient.
597138224Sdavidxu	 */
598138224Sdavidxu	if (count > 1) {
599138224Sdavidxu		owner = UMTX_UNOWNED;
600138224Sdavidxu		for (;;) {
601138224Sdavidxu			old = casuptr((intptr_t *)&umtx->u_owner, owner,
602138224Sdavidxu				    owner | UMTX_CONTESTED);
603138224Sdavidxu			if (old == owner)
604138224Sdavidxu				break;
605139013Sdavidxu			if (old == -1) {
606139013Sdavidxu				umtx_key_release(&key);
607138224Sdavidxu				return (EFAULT);
608139013Sdavidxu			}
609138224Sdavidxu			owner = old;
610138224Sdavidxu		}
611138224Sdavidxu		/*
612138224Sdavidxu		 * Another thread locked the umtx before us, so don't bother
613138224Sdavidxu		 * to wake more threads, that thread will do it when it unlocks
614138224Sdavidxu		 * the umtx.
615138224Sdavidxu		 */
616139013Sdavidxu		if ((owner & ~UMTX_CONTESTED) != 0) {
617139013Sdavidxu			umtx_key_release(&key);
618138224Sdavidxu			return (0);
619139013Sdavidxu		}
620112904Sjeff	}
621112904Sjeff
622138224Sdavidxu	/* Wake blocked thread. */
623139013Sdavidxu	umtxq_signal(&key);
624139013Sdavidxu	umtx_key_release(&key);
625138224Sdavidxu
626115765Sjeff	return (0);
627112904Sjeff}
628139013Sdavidxu
629139013Sdavidxustatic int
630139013Sdavidxudo_unlock_and_wait(struct thread *td, struct umtx *umtx, long id, void *uaddr,
631139013Sdavidxu	struct timespec *abstime)
632139013Sdavidxu{
633139013Sdavidxu	struct umtx_q uq;
634139013Sdavidxu	intptr_t owner;
635139013Sdavidxu	intptr_t old;
636139013Sdavidxu	struct timespec ts1, ts2;
637139013Sdavidxu	struct timeval tv;
638139013Sdavidxu	int timo, error = 0;
639139013Sdavidxu
640139013Sdavidxu	if (umtx == uaddr)
641139013Sdavidxu		return (EINVAL);
642139013Sdavidxu
643139013Sdavidxu	/*
644139013Sdavidxu	 * Make sure we own this mtx.
645139013Sdavidxu	 *
646139013Sdavidxu	 * XXX Need a {fu,su}ptr this is not correct on arch where
647139013Sdavidxu	 * sizeof(intptr_t) != sizeof(long).
648139013Sdavidxu	 */
649139013Sdavidxu	if ((owner = fuword(&umtx->u_owner)) == -1)
650139013Sdavidxu		return (EFAULT);
651139013Sdavidxu
652139013Sdavidxu	if ((owner & ~UMTX_CONTESTED) != id)
653139013Sdavidxu		return (EPERM);
654139013Sdavidxu
655139013Sdavidxu	if ((error = umtxq_queue_me(td, uaddr, &uq)) != 0)
656139013Sdavidxu		return (error);
657139013Sdavidxu
658139013Sdavidxu	old = casuptr((intptr_t *)&umtx->u_owner, id, UMTX_UNOWNED);
659139013Sdavidxu	if (old == -1) {
660139013Sdavidxu		umtxq_lock(&uq.uq_key);
661139013Sdavidxu		umtxq_remove(&uq);
662139013Sdavidxu		umtxq_unlock(&uq.uq_key);
663139013Sdavidxu		umtx_key_release(&uq.uq_key);
664139013Sdavidxu		return (EFAULT);
665139013Sdavidxu	}
666139013Sdavidxu	if (old != id) {
667139013Sdavidxu		error = do_unlock(td, umtx, id);
668139013Sdavidxu		if (error) {
669139013Sdavidxu			umtxq_lock(&uq.uq_key);
670139013Sdavidxu			umtxq_remove(&uq);
671139013Sdavidxu			umtxq_unlock(&uq.uq_key);
672139013Sdavidxu			umtx_key_release(&uq.uq_key);
673139013Sdavidxu			return (error);
674139013Sdavidxu		}
675139013Sdavidxu	}
676139013Sdavidxu	if (abstime == NULL) {
677139013Sdavidxu		umtxq_lock(&uq.uq_key);
678139013Sdavidxu		if (td->td_flags & TDF_UMTXQ)
679139013Sdavidxu			error = umtxq_sleep(td, &uq.uq_key,
680139013Sdavidxu			       td->td_priority | PCATCH, "ucond", 0);
681139013Sdavidxu		umtxq_remove(&uq);
682139013Sdavidxu		umtxq_unlock(&uq.uq_key);
683139013Sdavidxu		if (error == ERESTART)
684139013Sdavidxu			error = EINTR;
685139013Sdavidxu	} else {
686139013Sdavidxu		for (;;) {
687139013Sdavidxu			ts1 = *abstime;
688139013Sdavidxu			getnanotime(&ts2);
689139013Sdavidxu			timespecsub(&ts1, &ts2);
690139013Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts1);
691139013Sdavidxu			if (tv.tv_sec < 0) {
692139013Sdavidxu				error = ETIMEDOUT;
693139013Sdavidxu				break;
694139013Sdavidxu			}
695139013Sdavidxu			timo = tvtohz(&tv);
696139013Sdavidxu			umtxq_lock(&uq.uq_key);
697139013Sdavidxu			if (td->td_flags & TDF_UMTXQ) {
698139013Sdavidxu				error = umtxq_sleep(td, &uq.uq_key,
699139013Sdavidxu						td->td_priority | PCATCH,
700139013Sdavidxu						"ucond", timo);
701139013Sdavidxu				if (!(td->td_flags & TDF_UMTXQ)) {
702139013Sdavidxu					umtxq_unlock(&uq.uq_key);
703139013Sdavidxu					error = 0;
704139013Sdavidxu					break;
705139013Sdavidxu				}
706139013Sdavidxu				if (error != 0 && error != ETIMEDOUT) {
707139013Sdavidxu					umtxq_unlock(&uq.uq_key);
708139013Sdavidxu					if (error == ERESTART)
709139013Sdavidxu						error = EINTR;
710139013Sdavidxu					break;
711139013Sdavidxu				}
712139013Sdavidxu			} else {
713139013Sdavidxu				umtxq_unlock(&uq.uq_key);
714139013Sdavidxu				error = 0;
715139013Sdavidxu				break;
716139013Sdavidxu			}
717139013Sdavidxu		}
718139013Sdavidxu		if (td->td_flags & TDF_UMTXQ) {
719139013Sdavidxu			umtxq_lock(&uq.uq_key);
720139013Sdavidxu			umtxq_remove(&uq);
721139013Sdavidxu			umtxq_unlock(&uq.uq_key);
722139013Sdavidxu		}
723139013Sdavidxu	}
724139013Sdavidxu	umtx_key_release(&uq.uq_key);
725139013Sdavidxu	return (error);
726139013Sdavidxu}
727139013Sdavidxu
728139013Sdavidxustatic int
729139013Sdavidxudo_wake(struct thread *td, void *uaddr, int broadcast)
730139013Sdavidxu{
731139013Sdavidxu	struct umtx_key key;
732139013Sdavidxu	int error;
733139013Sdavidxu
734139013Sdavidxu	if ((error = umtx_key_get(td, uaddr, &key)) != 0)
735139013Sdavidxu		return (error);
736139013Sdavidxu	if (!broadcast)
737139013Sdavidxu		umtxq_signal(&key);
738139013Sdavidxu	else
739139013Sdavidxu		umtxq_broadcast(&key);
740139013Sdavidxu	umtx_key_release(&key);
741139013Sdavidxu	return (0);
742139013Sdavidxu}
743139013Sdavidxu
744139013Sdavidxuint
745139013Sdavidxu_umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
746139013Sdavidxu    /* struct umtx *umtx */
747139013Sdavidxu{
748139013Sdavidxu	return _do_lock(td, uap->umtx, td->td_tid, 0);
749139013Sdavidxu}
750139013Sdavidxu
751139013Sdavidxuint
752139013Sdavidxu_umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
753139013Sdavidxu    /* struct umtx *umtx */
754139013Sdavidxu{
755139013Sdavidxu	return do_unlock(td, uap->umtx, td->td_tid);
756139013Sdavidxu}
757139013Sdavidxu
758139013Sdavidxuint
759139013Sdavidxu_umtx_op(struct thread *td, struct _umtx_op_args *uap)
760139013Sdavidxu{
761139013Sdavidxu	struct timespec abstime;
762139013Sdavidxu	struct timespec *ts;
763139013Sdavidxu	int error;
764139013Sdavidxu
765139013Sdavidxu	switch(uap->op) {
766139013Sdavidxu	case UMTX_OP_LOCK:
767139013Sdavidxu		/* Allow a null timespec (wait forever). */
768139013Sdavidxu		if (uap->abstime == NULL)
769139013Sdavidxu			ts = NULL;
770139013Sdavidxu		else {
771139013Sdavidxu			error = copyin(uap->abstime, &abstime, sizeof(abstime));
772139013Sdavidxu			if (error != 0)
773139013Sdavidxu				return (error);
774139013Sdavidxu			if (abstime.tv_nsec >= 1000000000 ||
775139013Sdavidxu			    abstime.tv_nsec < 0)
776139013Sdavidxu				return (EINVAL);
777139013Sdavidxu			ts = &abstime;
778139013Sdavidxu		}
779139013Sdavidxu		return do_lock(td, uap->umtx, uap->id, ts);
780139013Sdavidxu	case UMTX_OP_UNLOCK:
781139013Sdavidxu		return do_unlock(td, uap->umtx, uap->id);
782139013Sdavidxu	case UMTX_OP_UNLOCK_AND_WAIT:
783139013Sdavidxu		/* Allow a null timespec (wait forever). */
784139013Sdavidxu		if (uap->abstime == NULL)
785139013Sdavidxu			ts = NULL;
786139013Sdavidxu		else {
787139013Sdavidxu			error = copyin(uap->abstime, &abstime, sizeof(abstime));
788139013Sdavidxu			if (error != 0)
789139013Sdavidxu				return (error);
790139013Sdavidxu			if (abstime.tv_nsec >= 1000000000 ||
791139013Sdavidxu			    abstime.tv_nsec < 0)
792139013Sdavidxu				return (EINVAL);
793139013Sdavidxu			ts = &abstime;
794139013Sdavidxu		}
795139013Sdavidxu		return do_unlock_and_wait(td, uap->umtx, uap->id,
796139013Sdavidxu					  uap->uaddr, ts);
797139013Sdavidxu	case UMTX_OP_WAKE:
798139013Sdavidxu		return do_wake(td, uap->uaddr, uap->id);
799139013Sdavidxu	default:
800139013Sdavidxu		return (EINVAL);
801139013Sdavidxu	}
802139013Sdavidxu}
803