1285307Sed/*-
2285307Sed * Copyright (c) 2015 Nuxi, https://nuxi.nl/
3285307Sed *
4285307Sed * Redistribution and use in source and binary forms, with or without
5285307Sed * modification, are permitted provided that the following conditions
6285307Sed * are met:
7285307Sed * 1. Redistributions of source code must retain the above copyright
8285307Sed *    notice, this list of conditions and the following disclaimer.
9285307Sed * 2. Redistributions in binary form must reproduce the above copyright
10285307Sed *    notice, this list of conditions and the following disclaimer in the
11285307Sed *    documentation and/or other materials provided with the distribution.
12285307Sed *
13285307Sed * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14285307Sed * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15285307Sed * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16285307Sed * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17285307Sed * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18285307Sed * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19285307Sed * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20285307Sed * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21285307Sed * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22285307Sed * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23285307Sed * SUCH DAMAGE.
24285307Sed */
25285307Sed
26285307Sed#include <sys/cdefs.h>
27285307Sed__FBSDID("$FreeBSD: stable/11/sys/compat/cloudabi/cloudabi_futex.c 328127 2018-01-18 13:43:09Z ed $");
28285307Sed
29285908Sed#include <sys/param.h>
30285908Sed#include <sys/kernel.h>
31285908Sed#include <sys/limits.h>
32285908Sed#include <sys/lock.h>
33285908Sed#include <sys/malloc.h>
34285908Sed#include <sys/mutex.h>
35285908Sed#include <sys/proc.h>
36285908Sed#include <sys/sx.h>
37285908Sed#include <sys/systm.h>
38286278Sed#include <sys/umtx.h>
39285908Sed
40297247Sed#include <contrib/cloudabi/cloudabi_types_common.h>
41297247Sed
42285307Sed#include <compat/cloudabi/cloudabi_proto.h>
43285908Sed#include <compat/cloudabi/cloudabi_util.h>
44285307Sed
45285908Sed/*
46285908Sed * Futexes for CloudABI.
47285908Sed *
48285908Sed * On most systems, futexes are implemented as objects of a single type
49285908Sed * on which a set of operations can be performed. CloudABI makes a clear
50285908Sed * distinction between locks and condition variables. A lock may have
51285908Sed * zero or more associated condition variables. A condition variable is
52285908Sed * always associated with exactly one lock. There is a strict topology.
53285908Sed * This approach has two advantages:
54285908Sed *
55285908Sed * - This topology is guaranteed to be acyclic. Requeueing of threads
56285908Sed *   only happens in one direction (from condition variables to locks).
57285908Sed *   This eases locking.
58285908Sed * - It means that a futex object for a lock exists when it is unlocked,
59285908Sed *   but has threads waiting on associated condition variables. Threads
60285908Sed *   can be requeued to a lock even if the thread performing the wakeup
61285908Sed *   does not have the lock mapped in its address space.
62285908Sed *
63285908Sed * This futex implementation only implements a single lock type, namely
64285908Sed * a read-write lock. A regular mutex type would not be necessary, as
65285908Sed * the read-write lock is as efficient as a mutex if used as such.
66285908Sed * Userspace futex locks are 32 bits in size:
67285908Sed *
68285908Sed * - 1 bit: has threads waiting in kernel-space.
69285908Sed * - 1 bit: is write-locked.
70285908Sed * - 30 bits:
71285908Sed *   - if write-locked: thread ID of owner.
72285908Sed *   - if not write-locked: number of read locks held.
73285908Sed *
74285908Sed * Condition variables are also 32 bits in size. Its value is modified
75285908Sed * by kernel-space exclusively. Zero indicates that it has no waiting
76285908Sed * threads. Non-zero indicates the opposite.
77285908Sed *
78285908Sed * This implementation is optimal, in the sense that it only wakes up
79285908Sed * threads if they can actually continue execution. It does not suffer
80285908Sed * from the thundering herd problem. If multiple threads waiting on a
81285908Sed * condition variable need to be woken up, only a single thread is
82285908Sed * scheduled. All other threads are 'donated' to this thread. After the
83285908Sed * thread manages to reacquire the lock, it requeues its donated threads
84285908Sed * to the lock.
85285908Sed *
86285908Sed * TODO(ed): Integrate this functionality into kern_umtx.c instead.
87285908Sed * TODO(ed): Store futex objects in a hash table.
88285908Sed * TODO(ed): Add actual priority inheritance.
89285908Sed * TODO(ed): Let futex_queue also take priorities into account.
90285908Sed * TODO(ed): Make locking fine-grained.
91285908Sed * TODO(ed): Perform sleeps until an actual absolute point in time,
92285908Sed *           instead of converting the timestamp to a relative value.
93285908Sed */
94285908Sed
95285908Sedstruct futex_address;
96285908Sedstruct futex_condvar;
97285908Sedstruct futex_lock;
98285908Sedstruct futex_queue;
99285908Sedstruct futex_waiter;
100285908Sed
101285908Sed/* Identifier of a location in memory. */
102285908Sedstruct futex_address {
103286278Sed	struct umtx_key			fa_key;
104285908Sed};
105285908Sed
106285908Sed/* A set of waiting threads. */
107285908Sedstruct futex_queue {
108285908Sed	STAILQ_HEAD(, futex_waiter)	fq_list;
109285908Sed	unsigned int			fq_count;
110285908Sed};
111285908Sed
112285908Sed/* Condition variables. */
113285908Sedstruct futex_condvar {
114285908Sed	/* Address of the condition variable. */
115285908Sed	struct futex_address		fc_address;
116285908Sed
117285908Sed	/* The lock the waiters should be moved to when signalled. */
118285908Sed	struct futex_lock *		fc_lock;
119285908Sed
120285908Sed	/* Threads waiting on the condition variable. */
121285908Sed	struct futex_queue		fc_waiters;
122285908Sed	/*
123285908Sed	 * Number of threads blocked on this condition variable, or
124285908Sed	 * being blocked on the lock after being requeued.
125285908Sed	 */
126285908Sed	unsigned int			fc_waitcount;
127285908Sed
128285908Sed	/* Global list pointers. */
129285908Sed	LIST_ENTRY(futex_condvar)	fc_next;
130285908Sed};
131285908Sed
132285908Sed/* Read-write locks. */
133285908Sedstruct futex_lock {
134285908Sed	/* Address of the lock. */
135285908Sed	struct futex_address		fl_address;
136285908Sed
137285908Sed	/*
138285908Sed	 * Current owner of the lock. LOCK_UNMANAGED if the lock is
139285908Sed	 * currently not owned by the kernel. LOCK_OWNER_UNKNOWN in case
140285908Sed	 * the owner is not known (e.g., when the lock is read-locked).
141285908Sed	 */
142285908Sed	cloudabi_tid_t			fl_owner;
143285908Sed#define LOCK_UNMANAGED 0x0
144285908Sed#define LOCK_OWNER_UNKNOWN 0x1
145285908Sed
146285908Sed	/* Writers blocked on the lock. */
147285908Sed	struct futex_queue		fl_writers;
148285908Sed	/* Readers blocked on the lock. */
149285908Sed	struct futex_queue		fl_readers;
150285908Sed	/* Number of threads blocked on this lock + condition variables. */
151285908Sed	unsigned int			fl_waitcount;
152285908Sed
153285908Sed	/* Global list pointers. */
154285908Sed	LIST_ENTRY(futex_lock)		fl_next;
155285908Sed};
156285908Sed
157285908Sed/* Information associated with a thread blocked on an object. */
158285908Sedstruct futex_waiter {
159285908Sed	/* Thread ID. */
160285908Sed	cloudabi_tid_t			fw_tid;
161285908Sed	/* Condition variable used for waiting. */
162285908Sed	struct cv			fw_wait;
163285908Sed
164285908Sed	/* Queue this waiter is currently placed in. */
165285908Sed	struct futex_queue *		fw_queue;
166285908Sed	/* List pointers of fw_queue. */
167285908Sed	STAILQ_ENTRY(futex_waiter)	fw_next;
168285908Sed
169285908Sed	/* Lock has been acquired. */
170285908Sed	bool				fw_locked;
171285908Sed	/* If not locked, threads that should block after acquiring. */
172285908Sed	struct futex_queue		fw_donated;
173285908Sed};
174285908Sed
175285908Sed/* Global data structures. */
176285908Sedstatic MALLOC_DEFINE(M_FUTEX, "futex", "CloudABI futex");
177285908Sed
178285908Sedstatic struct sx futex_global_lock;
179285908SedSX_SYSINIT(futex_global_lock, &futex_global_lock, "CloudABI futex global lock");
180285908Sed
181285908Sedstatic LIST_HEAD(, futex_lock) futex_lock_list =
182285908Sed    LIST_HEAD_INITIALIZER(&futex_lock_list);
183285908Sedstatic LIST_HEAD(, futex_condvar) futex_condvar_list =
184285908Sed    LIST_HEAD_INITIALIZER(&futex_condvar_list);
185285908Sed
186285908Sed/* Utility functions. */
187285908Sedstatic void futex_lock_assert(const struct futex_lock *);
188285908Sedstatic struct futex_lock *futex_lock_lookup_locked(struct futex_address *);
189285908Sedstatic void futex_lock_release(struct futex_lock *);
190285908Sedstatic int futex_lock_tryrdlock(struct futex_lock *, cloudabi_lock_t *);
191285908Sedstatic int futex_lock_unmanage(struct futex_lock *, cloudabi_lock_t *);
192285908Sedstatic int futex_lock_update_owner(struct futex_lock *, cloudabi_lock_t *);
193285908Sedstatic int futex_lock_wake_up_next(struct futex_lock *, cloudabi_lock_t *);
194285908Sedstatic unsigned int futex_queue_count(const struct futex_queue *);
195285908Sedstatic void futex_queue_init(struct futex_queue *);
196285908Sedstatic void futex_queue_requeue(struct futex_queue *, struct futex_queue *,
197285908Sed    unsigned int);
198285908Sedstatic int futex_queue_sleep(struct futex_queue *, struct futex_lock *,
199285908Sed    struct futex_waiter *, struct thread *, cloudabi_clockid_t,
200328127Sed    cloudabi_timestamp_t, cloudabi_timestamp_t, bool);
201285908Sedstatic cloudabi_tid_t futex_queue_tid_best(const struct futex_queue *);
202285908Sedstatic void futex_queue_wake_up_all(struct futex_queue *);
203285908Sedstatic void futex_queue_wake_up_best(struct futex_queue *);
204285908Sedstatic void futex_queue_wake_up_donate(struct futex_queue *, unsigned int);
205285908Sedstatic int futex_user_load(uint32_t *, uint32_t *);
206285908Sedstatic int futex_user_store(uint32_t *, uint32_t);
207285908Sedstatic int futex_user_cmpxchg(uint32_t *, uint32_t, uint32_t *, uint32_t);
208285908Sed
209285908Sed/*
210285908Sed * futex_address operations.
211285908Sed */
212285908Sed
213285908Sedstatic int
214285908Sedfutex_address_create(struct futex_address *fa, struct thread *td,
215297468Sed    const void *object, cloudabi_scope_t scope)
216285908Sed{
217285908Sed
218286278Sed	KASSERT(td == curthread,
219286278Sed	    ("Can only create umtx keys for the current thread"));
220285908Sed	switch (scope) {
221297468Sed	case CLOUDABI_SCOPE_PRIVATE:
222286278Sed		return (umtx_key_get(object, TYPE_FUTEX, THREAD_SHARE,
223286278Sed		    &fa->fa_key));
224297468Sed	case CLOUDABI_SCOPE_SHARED:
225286278Sed		return (umtx_key_get(object, TYPE_FUTEX, AUTO_SHARE,
226286278Sed		    &fa->fa_key));
227285908Sed	default:
228285908Sed		return (EINVAL);
229285908Sed	}
230285908Sed}
231285908Sed
232285908Sedstatic void
233285908Sedfutex_address_free(struct futex_address *fa)
234285908Sed{
235285908Sed
236286278Sed	umtx_key_release(&fa->fa_key);
237285908Sed}
238285908Sed
239285908Sedstatic bool
240285908Sedfutex_address_match(const struct futex_address *fa1,
241285908Sed    const struct futex_address *fa2)
242285908Sed{
243285908Sed
244286278Sed	return (umtx_key_match(&fa1->fa_key, &fa2->fa_key));
245285908Sed}
246285908Sed
247285908Sed/*
248285908Sed * futex_condvar operations.
249285908Sed */
250285908Sed
251285908Sedstatic void
252285908Sedfutex_condvar_assert(const struct futex_condvar *fc)
253285908Sed{
254285908Sed
255285908Sed	KASSERT(fc->fc_waitcount >= futex_queue_count(&fc->fc_waiters),
256285908Sed	    ("Total number of waiters cannot be smaller than the wait queue"));
257285908Sed	futex_lock_assert(fc->fc_lock);
258285908Sed}
259285908Sed
260285908Sedstatic int
261285908Sedfutex_condvar_lookup(struct thread *td, const cloudabi_condvar_t *address,
262297468Sed    cloudabi_scope_t scope, struct futex_condvar **fcret)
263285908Sed{
264285908Sed	struct futex_address fa_condvar;
265285908Sed	struct futex_condvar *fc;
266285908Sed	int error;
267285908Sed
268285908Sed	error = futex_address_create(&fa_condvar, td, address, scope);
269285908Sed	if (error != 0)
270285908Sed		return (error);
271285908Sed
272285908Sed	sx_xlock(&futex_global_lock);
273285908Sed	LIST_FOREACH(fc, &futex_condvar_list, fc_next) {
274285908Sed		if (futex_address_match(&fc->fc_address, &fa_condvar)) {
275285908Sed			/* Found matching lock object. */
276285908Sed			futex_address_free(&fa_condvar);
277285908Sed			futex_condvar_assert(fc);
278285908Sed			*fcret = fc;
279285908Sed			return (0);
280285908Sed		}
281285908Sed	}
282285908Sed	sx_xunlock(&futex_global_lock);
283285908Sed	futex_address_free(&fa_condvar);
284285908Sed	return (ENOENT);
285285908Sed}
286285908Sed
287285908Sedstatic int
288285908Sedfutex_condvar_lookup_or_create(struct thread *td,
289297468Sed    const cloudabi_condvar_t *condvar, cloudabi_scope_t condvar_scope,
290297468Sed    const cloudabi_lock_t *lock, cloudabi_scope_t lock_scope,
291285908Sed    struct futex_condvar **fcret)
292285908Sed{
293285908Sed	struct futex_address fa_condvar, fa_lock;
294285908Sed	struct futex_condvar *fc;
295285908Sed	struct futex_lock *fl;
296285908Sed	int error;
297285908Sed
298285908Sed	error = futex_address_create(&fa_condvar, td, condvar, condvar_scope);
299285908Sed	if (error != 0)
300285908Sed		return (error);
301285908Sed	error = futex_address_create(&fa_lock, td, lock, lock_scope);
302285908Sed	if (error != 0) {
303285908Sed		futex_address_free(&fa_condvar);
304285908Sed		return (error);
305285908Sed	}
306285908Sed
307285908Sed	sx_xlock(&futex_global_lock);
308285908Sed	LIST_FOREACH(fc, &futex_condvar_list, fc_next) {
309285908Sed		if (!futex_address_match(&fc->fc_address, &fa_condvar))
310285908Sed			continue;
311285908Sed		fl = fc->fc_lock;
312285908Sed		if (!futex_address_match(&fl->fl_address, &fa_lock)) {
313285908Sed			/* Condition variable is owned by a different lock. */
314285908Sed			futex_address_free(&fa_condvar);
315285908Sed			futex_address_free(&fa_lock);
316285908Sed			sx_xunlock(&futex_global_lock);
317285908Sed			return (EINVAL);
318285908Sed		}
319285908Sed
320285908Sed		/* Found fully matching condition variable. */
321285908Sed		futex_address_free(&fa_condvar);
322285908Sed		futex_address_free(&fa_lock);
323285908Sed		futex_condvar_assert(fc);
324285908Sed		*fcret = fc;
325285908Sed		return (0);
326285908Sed	}
327285908Sed
328285908Sed	/* None found. Create new condition variable object. */
329285908Sed	fc = malloc(sizeof(*fc), M_FUTEX, M_WAITOK);
330285908Sed	fc->fc_address = fa_condvar;
331285908Sed	fc->fc_lock = futex_lock_lookup_locked(&fa_lock);
332285908Sed	futex_queue_init(&fc->fc_waiters);
333285908Sed	fc->fc_waitcount = 0;
334285908Sed	LIST_INSERT_HEAD(&futex_condvar_list, fc, fc_next);
335285908Sed	*fcret = fc;
336285908Sed	return (0);
337285908Sed}
338285908Sed
339285908Sedstatic void
340285908Sedfutex_condvar_release(struct futex_condvar *fc)
341285908Sed{
342285908Sed	struct futex_lock *fl;
343285908Sed
344285908Sed	futex_condvar_assert(fc);
345285908Sed	fl = fc->fc_lock;
346285908Sed	if (fc->fc_waitcount == 0) {
347285908Sed		/* Condition variable has no waiters. Deallocate it. */
348285908Sed		futex_address_free(&fc->fc_address);
349285908Sed		LIST_REMOVE(fc, fc_next);
350285908Sed		free(fc, M_FUTEX);
351285908Sed	}
352285908Sed	futex_lock_release(fl);
353285908Sed}
354285908Sed
355285908Sedstatic int
356285908Sedfutex_condvar_unmanage(struct futex_condvar *fc,
357285908Sed    cloudabi_condvar_t *condvar)
358285908Sed{
359285908Sed
360285908Sed	if (futex_queue_count(&fc->fc_waiters) != 0)
361285908Sed		return (0);
362285908Sed	return (futex_user_store(condvar, CLOUDABI_CONDVAR_HAS_NO_WAITERS));
363285908Sed}
364285908Sed
365285908Sed/*
366285908Sed * futex_lock operations.
367285908Sed */
368285908Sed
369285908Sedstatic void
370285908Sedfutex_lock_assert(const struct futex_lock *fl)
371285908Sed{
372285908Sed
373285908Sed	/*
374285908Sed	 * A futex lock can only be kernel-managed if it has waiters.
375285908Sed	 * Vice versa: if a futex lock has waiters, it must be
376285908Sed	 * kernel-managed.
377285908Sed	 */
378285908Sed	KASSERT((fl->fl_owner == LOCK_UNMANAGED) ==
379285908Sed	    (futex_queue_count(&fl->fl_readers) == 0 &&
380285908Sed	    futex_queue_count(&fl->fl_writers) == 0),
381285908Sed	    ("Managed locks must have waiting threads"));
382285908Sed	KASSERT(fl->fl_waitcount != 0 || fl->fl_owner == LOCK_UNMANAGED,
383285908Sed	    ("Lock with no waiters must be unmanaged"));
384285908Sed}
385285908Sed
386285908Sedstatic int
387285908Sedfutex_lock_lookup(struct thread *td, const cloudabi_lock_t *address,
388297468Sed    cloudabi_scope_t scope, struct futex_lock **flret)
389285908Sed{
390285908Sed	struct futex_address fa;
391285908Sed	int error;
392285908Sed
393285908Sed	error = futex_address_create(&fa, td, address, scope);
394285908Sed	if (error != 0)
395285908Sed		return (error);
396285908Sed
397285908Sed	sx_xlock(&futex_global_lock);
398285908Sed	*flret = futex_lock_lookup_locked(&fa);
399285908Sed	return (0);
400285908Sed}
401285908Sed
402285908Sedstatic struct futex_lock *
403285908Sedfutex_lock_lookup_locked(struct futex_address *fa)
404285908Sed{
405285908Sed	struct futex_lock *fl;
406285908Sed
407285908Sed	LIST_FOREACH(fl, &futex_lock_list, fl_next) {
408285908Sed		if (futex_address_match(&fl->fl_address, fa)) {
409285908Sed			/* Found matching lock object. */
410285908Sed			futex_address_free(fa);
411285908Sed			futex_lock_assert(fl);
412285908Sed			return (fl);
413285908Sed		}
414285908Sed	}
415285908Sed
416285908Sed	/* None found. Create new lock object. */
417285908Sed	fl = malloc(sizeof(*fl), M_FUTEX, M_WAITOK);
418285908Sed	fl->fl_address = *fa;
419285908Sed	fl->fl_owner = LOCK_UNMANAGED;
420285908Sed	futex_queue_init(&fl->fl_readers);
421285908Sed	futex_queue_init(&fl->fl_writers);
422285908Sed	fl->fl_waitcount = 0;
423285908Sed	LIST_INSERT_HEAD(&futex_lock_list, fl, fl_next);
424285908Sed	return (fl);
425285908Sed}
426285908Sed
427285908Sedstatic int
428285908Sedfutex_lock_rdlock(struct futex_lock *fl, struct thread *td,
429285908Sed    cloudabi_lock_t *lock, cloudabi_clockid_t clock_id,
430328127Sed    cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision, bool abstime)
431285908Sed{
432285908Sed	struct futex_waiter fw;
433285908Sed	int error;
434285908Sed
435285908Sed	error = futex_lock_tryrdlock(fl, lock);
436285908Sed	if (error == EBUSY) {
437285908Sed		/* Suspend execution. */
438285908Sed		KASSERT(fl->fl_owner != LOCK_UNMANAGED,
439285908Sed		    ("Attempted to sleep on an unmanaged lock"));
440285908Sed		error = futex_queue_sleep(&fl->fl_readers, fl, &fw, td,
441328127Sed		    clock_id, timeout, precision, abstime);
442285908Sed		KASSERT((error == 0) == fw.fw_locked,
443285908Sed		    ("Should have locked write lock on success"));
444285908Sed		KASSERT(futex_queue_count(&fw.fw_donated) == 0,
445285908Sed		    ("Lock functions cannot receive threads"));
446285908Sed	}
447285908Sed	if (error != 0)
448285908Sed		futex_lock_unmanage(fl, lock);
449285908Sed	return (error);
450285908Sed}
451285908Sed
452285908Sedstatic void
453285908Sedfutex_lock_release(struct futex_lock *fl)
454285908Sed{
455285908Sed
456285908Sed	futex_lock_assert(fl);
457285908Sed	if (fl->fl_waitcount == 0) {
458285908Sed		/* Lock object is unreferenced. Deallocate it. */
459285908Sed		KASSERT(fl->fl_owner == LOCK_UNMANAGED,
460285908Sed		    ("Attempted to free a managed lock"));
461285908Sed		futex_address_free(&fl->fl_address);
462285908Sed		LIST_REMOVE(fl, fl_next);
463285908Sed		free(fl, M_FUTEX);
464285908Sed	}
465285908Sed	sx_xunlock(&futex_global_lock);
466285908Sed}
467285908Sed
468285908Sedstatic int
469285908Sedfutex_lock_unmanage(struct futex_lock *fl, cloudabi_lock_t *lock)
470285908Sed{
471285908Sed	cloudabi_lock_t cmp, old;
472285908Sed	int error;
473285908Sed
474285908Sed	if (futex_queue_count(&fl->fl_readers) == 0 &&
475285908Sed	    futex_queue_count(&fl->fl_writers) == 0) {
476285908Sed		/* Lock should be unmanaged. */
477285908Sed		fl->fl_owner = LOCK_UNMANAGED;
478285908Sed
479285908Sed		/* Clear kernel-managed bit. */
480285908Sed		error = futex_user_load(lock, &old);
481285908Sed		if (error != 0)
482285908Sed			return (error);
483285908Sed		for (;;) {
484285908Sed			cmp = old;
485285908Sed			error = futex_user_cmpxchg(lock, cmp, &old,
486285908Sed			    cmp & ~CLOUDABI_LOCK_KERNEL_MANAGED);
487285908Sed			if (error != 0)
488285908Sed				return (error);
489285908Sed			if (old == cmp)
490285908Sed				break;
491285908Sed		}
492285908Sed	}
493285908Sed	return (0);
494285908Sed}
495285908Sed
496285908Sed/* Sets an owner of a lock, based on a userspace lock value. */
497285908Sedstatic void
498285908Sedfutex_lock_set_owner(struct futex_lock *fl, cloudabi_lock_t lock)
499285908Sed{
500285908Sed
501285908Sed	/* Lock has no explicit owner. */
502285908Sed	if ((lock & ~CLOUDABI_LOCK_WRLOCKED) == 0) {
503285908Sed		fl->fl_owner = LOCK_OWNER_UNKNOWN;
504285908Sed		return;
505285908Sed	}
506285908Sed	lock &= ~(CLOUDABI_LOCK_WRLOCKED | CLOUDABI_LOCK_KERNEL_MANAGED);
507285908Sed
508285908Sed	/* Don't allow userspace to silently unlock. */
509285908Sed	if (lock == LOCK_UNMANAGED) {
510285908Sed		fl->fl_owner = LOCK_OWNER_UNKNOWN;
511285908Sed		return;
512285908Sed	}
513285908Sed	fl->fl_owner = lock;
514285908Sed}
515285908Sed
516285908Sedstatic int
517285908Sedfutex_lock_unlock(struct futex_lock *fl, struct thread *td,
518285908Sed    cloudabi_lock_t *lock)
519285908Sed{
520285908Sed	int error;
521285908Sed
522285908Sed	/* Validate that this thread is allowed to unlock. */
523285908Sed	error = futex_lock_update_owner(fl, lock);
524285908Sed	if (error != 0)
525285908Sed		return (error);
526285908Sed	if (fl->fl_owner != LOCK_UNMANAGED && fl->fl_owner != td->td_tid)
527285908Sed		return (EPERM);
528285908Sed	return (futex_lock_wake_up_next(fl, lock));
529285908Sed}
530285908Sed
531285908Sed/* Syncs in the owner of the lock from userspace if needed. */
532285908Sedstatic int
533285908Sedfutex_lock_update_owner(struct futex_lock *fl, cloudabi_lock_t *address)
534285908Sed{
535285908Sed	cloudabi_lock_t lock;
536285908Sed	int error;
537285908Sed
538285908Sed	if (fl->fl_owner == LOCK_OWNER_UNKNOWN) {
539285908Sed		error = futex_user_load(address, &lock);
540285908Sed		if (error != 0)
541285908Sed			return (error);
542285908Sed		futex_lock_set_owner(fl, lock);
543285908Sed	}
544285908Sed	return (0);
545285908Sed}
546285908Sed
547285908Sedstatic int
548285908Sedfutex_lock_tryrdlock(struct futex_lock *fl, cloudabi_lock_t *address)
549285908Sed{
550285908Sed	cloudabi_lock_t old, cmp;
551285908Sed	int error;
552285908Sed
553285908Sed	if (fl->fl_owner != LOCK_UNMANAGED) {
554285908Sed		/* Lock is already acquired. */
555285908Sed		return (EBUSY);
556285908Sed	}
557285908Sed
558285908Sed	old = CLOUDABI_LOCK_UNLOCKED;
559285908Sed	for (;;) {
560285908Sed		if ((old & CLOUDABI_LOCK_KERNEL_MANAGED) != 0) {
561285908Sed			/*
562285908Sed			 * Userspace lock is kernel-managed, even though
563285908Sed			 * the kernel disagrees.
564285908Sed			 */
565285908Sed			return (EINVAL);
566285908Sed		}
567285908Sed
568285908Sed		if ((old & CLOUDABI_LOCK_WRLOCKED) == 0) {
569285908Sed			/*
570285908Sed			 * Lock is not write-locked. Attempt to acquire
571285908Sed			 * it by increasing the read count.
572285908Sed			 */
573285908Sed			cmp = old;
574285908Sed			error = futex_user_cmpxchg(address, cmp, &old, cmp + 1);
575285908Sed			if (error != 0)
576285908Sed				return (error);
577285908Sed			if (old == cmp) {
578285908Sed				/* Success. */
579285908Sed				return (0);
580285908Sed			}
581285908Sed		} else {
582285908Sed			/* Lock is write-locked. Make it kernel-managed. */
583285908Sed			cmp = old;
584285908Sed			error = futex_user_cmpxchg(address, cmp, &old,
585285908Sed			    cmp | CLOUDABI_LOCK_KERNEL_MANAGED);
586285908Sed			if (error != 0)
587285908Sed				return (error);
588285908Sed			if (old == cmp) {
589285908Sed				/* Success. */
590285908Sed				futex_lock_set_owner(fl, cmp);
591285908Sed				return (EBUSY);
592285908Sed			}
593285908Sed		}
594285908Sed	}
595285908Sed}
596285908Sed
597285908Sedstatic int
598285908Sedfutex_lock_trywrlock(struct futex_lock *fl, cloudabi_lock_t *address,
599285908Sed    cloudabi_tid_t tid, bool force_kernel_managed)
600285908Sed{
601285908Sed	cloudabi_lock_t old, new, cmp;
602285908Sed	int error;
603285908Sed
604285908Sed	if (fl->fl_owner == tid) {
605285908Sed		/* Attempted to acquire lock recursively. */
606285908Sed		return (EDEADLK);
607285908Sed	}
608285908Sed	if (fl->fl_owner != LOCK_UNMANAGED) {
609285908Sed		/* Lock is already acquired. */
610285908Sed		return (EBUSY);
611285908Sed	}
612285908Sed
613285908Sed	old = CLOUDABI_LOCK_UNLOCKED;
614285908Sed	for (;;) {
615285908Sed		if ((old & CLOUDABI_LOCK_KERNEL_MANAGED) != 0) {
616285908Sed			/*
617285908Sed			 * Userspace lock is kernel-managed, even though
618285908Sed			 * the kernel disagrees.
619285908Sed			 */
620285908Sed			return (EINVAL);
621285908Sed		}
622285908Sed		if (old == (tid | CLOUDABI_LOCK_WRLOCKED)) {
623285908Sed			/* Attempted to acquire lock recursively. */
624285908Sed			return (EDEADLK);
625285908Sed		}
626285908Sed
627285908Sed		if (old == CLOUDABI_LOCK_UNLOCKED) {
628285908Sed			/* Lock is unlocked. Attempt to acquire it. */
629285908Sed			new = tid | CLOUDABI_LOCK_WRLOCKED;
630285908Sed			if (force_kernel_managed)
631285908Sed				new |= CLOUDABI_LOCK_KERNEL_MANAGED;
632285908Sed			error = futex_user_cmpxchg(address,
633285908Sed			    CLOUDABI_LOCK_UNLOCKED, &old, new);
634285908Sed			if (error != 0)
635285908Sed				return (error);
636285908Sed			if (old == CLOUDABI_LOCK_UNLOCKED) {
637285908Sed				/* Success. */
638285908Sed				if (force_kernel_managed)
639285908Sed					fl->fl_owner = tid;
640285908Sed				return (0);
641285908Sed			}
642285908Sed		} else {
643285908Sed			/* Lock is still locked. Make it kernel-managed. */
644285908Sed			cmp = old;
645285908Sed			error = futex_user_cmpxchg(address, cmp, &old,
646285908Sed			    cmp | CLOUDABI_LOCK_KERNEL_MANAGED);
647285908Sed			if (error != 0)
648285908Sed				return (error);
649285908Sed			if (old == cmp) {
650285908Sed				/* Success. */
651285908Sed				futex_lock_set_owner(fl, cmp);
652285908Sed				return (EBUSY);
653285908Sed			}
654285908Sed		}
655285908Sed	}
656285908Sed}
657285908Sed
658285908Sedstatic int
659285908Sedfutex_lock_wake_up_next(struct futex_lock *fl, cloudabi_lock_t *lock)
660285908Sed{
661285908Sed	cloudabi_tid_t tid;
662285908Sed	int error;
663285908Sed
664285908Sed	/*
665285908Sed	 * Determine which thread(s) to wake up. Prefer waking up
666285908Sed	 * writers over readers to prevent write starvation.
667285908Sed	 */
668285908Sed	if (futex_queue_count(&fl->fl_writers) > 0) {
669285908Sed		/* Transfer ownership to a single write-locker. */
670285908Sed		if (futex_queue_count(&fl->fl_writers) > 1 ||
671285908Sed		    futex_queue_count(&fl->fl_readers) > 0) {
672285908Sed			/* Lock should remain managed afterwards. */
673285908Sed			tid = futex_queue_tid_best(&fl->fl_writers);
674285908Sed			error = futex_user_store(lock,
675285908Sed			    tid | CLOUDABI_LOCK_WRLOCKED |
676285908Sed			    CLOUDABI_LOCK_KERNEL_MANAGED);
677285908Sed			if (error != 0)
678285908Sed				return (error);
679285908Sed
680285908Sed			futex_queue_wake_up_best(&fl->fl_writers);
681285908Sed			fl->fl_owner = tid;
682285908Sed		} else {
683285908Sed			/* Lock can become unmanaged afterwards. */
684285908Sed			error = futex_user_store(lock,
685285908Sed			    futex_queue_tid_best(&fl->fl_writers) |
686285908Sed			    CLOUDABI_LOCK_WRLOCKED);
687285908Sed			if (error != 0)
688285908Sed				return (error);
689285908Sed
690285908Sed			futex_queue_wake_up_best(&fl->fl_writers);
691285908Sed			fl->fl_owner = LOCK_UNMANAGED;
692285908Sed		}
693285908Sed	} else {
694285908Sed		/* Transfer ownership to all read-lockers (if any). */
695285908Sed		error = futex_user_store(lock,
696285908Sed		    futex_queue_count(&fl->fl_readers));
697285908Sed		if (error != 0)
698285908Sed			return (error);
699285908Sed
700285908Sed		/* Wake up all threads. */
701285908Sed		futex_queue_wake_up_all(&fl->fl_readers);
702285908Sed		fl->fl_owner = LOCK_UNMANAGED;
703285908Sed	}
704285908Sed	return (0);
705285908Sed}
706285908Sed
707285908Sedstatic int
708285908Sedfutex_lock_wrlock(struct futex_lock *fl, struct thread *td,
709285908Sed    cloudabi_lock_t *lock, cloudabi_clockid_t clock_id,
710328127Sed    cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision, bool abstime,
711285908Sed    struct futex_queue *donated)
712285908Sed{
713285908Sed	struct futex_waiter fw;
714285908Sed	int error;
715285908Sed
716285908Sed	error = futex_lock_trywrlock(fl, lock, td->td_tid,
717285908Sed	    futex_queue_count(donated) > 0);
718285908Sed
719285908Sed	if (error == 0 || error == EBUSY) {
720285908Sed		/* Put donated threads in queue before suspending. */
721285908Sed		KASSERT(futex_queue_count(donated) == 0 ||
722285908Sed		    fl->fl_owner != LOCK_UNMANAGED,
723285908Sed		    ("Lock should be managed if we are going to donate"));
724285908Sed		futex_queue_requeue(donated, &fl->fl_writers, UINT_MAX);
725285908Sed	} else {
726285908Sed		/*
727285908Sed		 * This thread cannot deal with the donated threads.
728285908Sed		 * Wake up the next thread and let it try it by itself.
729285908Sed		 */
730285908Sed		futex_queue_wake_up_donate(donated, UINT_MAX);
731285908Sed	}
732285908Sed
733285908Sed	if (error == EBUSY) {
734285908Sed		/* Suspend execution if the lock was busy. */
735285908Sed		KASSERT(fl->fl_owner != LOCK_UNMANAGED,
736285908Sed		    ("Attempted to sleep on an unmanaged lock"));
737285908Sed		error = futex_queue_sleep(&fl->fl_writers, fl, &fw, td,
738328127Sed		    clock_id, timeout, precision, abstime);
739285908Sed		KASSERT((error == 0) == fw.fw_locked,
740285908Sed		    ("Should have locked write lock on success"));
741285908Sed		KASSERT(futex_queue_count(&fw.fw_donated) == 0,
742285908Sed		    ("Lock functions cannot receive threads"));
743285908Sed	}
744285908Sed	if (error != 0)
745285908Sed		futex_lock_unmanage(fl, lock);
746285908Sed	return (error);
747285908Sed}
748285908Sed
749285908Sed/*
750285908Sed * futex_queue operations.
751285908Sed */
752285908Sed
753285908Sedstatic cloudabi_tid_t
754285908Sedfutex_queue_tid_best(const struct futex_queue *fq)
755285908Sed{
756285908Sed
757285908Sed	return (STAILQ_FIRST(&fq->fq_list)->fw_tid);
758285908Sed}
759285908Sed
760285908Sedstatic unsigned int
761285908Sedfutex_queue_count(const struct futex_queue *fq)
762285908Sed{
763285908Sed
764285908Sed	return (fq->fq_count);
765285908Sed}
766285908Sed
767285908Sedstatic void
768285908Sedfutex_queue_init(struct futex_queue *fq)
769285908Sed{
770285908Sed
771285908Sed	STAILQ_INIT(&fq->fq_list);
772285908Sed	fq->fq_count = 0;
773285908Sed}
774285908Sed
775285908Sed/* Converts a relative timestamp to an sbintime. */
776285908Sedstatic sbintime_t
777285908Sedfutex_queue_convert_timestamp_relative(cloudabi_timestamp_t ts)
778285908Sed{
779285908Sed	cloudabi_timestamp_t s, ns;
780285908Sed
781285908Sed	s = ts / 1000000000;
782285908Sed	ns = ts % 1000000000;
783285908Sed	if (s > INT32_MAX)
784285908Sed		return (INT64_MAX);
785285908Sed	return ((s << 32) + (ns << 32) / 1000000000);
786285908Sed}
787285908Sed
788285908Sed/* Converts an absolute timestamp and precision to a pair of sbintime values. */
789285908Sedstatic int
790285908Sedfutex_queue_convert_timestamp(struct thread *td, cloudabi_clockid_t clock_id,
791285908Sed    cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision,
792328127Sed    sbintime_t *sbttimeout, sbintime_t *sbtprecision, bool abstime)
793285908Sed{
794285908Sed	cloudabi_timestamp_t now;
795285908Sed	int error;
796285908Sed
797328127Sed	if (abstime) {
798328127Sed		/* Make the time relative. */
799328127Sed		error = cloudabi_clock_time_get(td, clock_id, &now);
800328127Sed		if (error != 0)
801328127Sed			return (error);
802328127Sed		timeout = timeout < now ? 0 : timeout - now;
803328127Sed	}
804285908Sed
805285908Sed	*sbttimeout = futex_queue_convert_timestamp_relative(timeout);
806285908Sed	*sbtprecision = futex_queue_convert_timestamp_relative(precision);
807285908Sed	return (0);
808285908Sed}
809285908Sed
810285908Sedstatic int
811285908Sedfutex_queue_sleep(struct futex_queue *fq, struct futex_lock *fl,
812285908Sed    struct futex_waiter *fw, struct thread *td, cloudabi_clockid_t clock_id,
813328127Sed    cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision, bool abstime)
814285908Sed{
815285908Sed	sbintime_t sbttimeout, sbtprecision;
816285908Sed	int error;
817285908Sed
818285908Sed	/* Initialize futex_waiter object. */
819285908Sed	fw->fw_tid = td->td_tid;
820285908Sed	fw->fw_locked = false;
821285908Sed	futex_queue_init(&fw->fw_donated);
822285908Sed
823285908Sed	if (timeout != UINT64_MAX) {
824285908Sed		/* Convert timeout duration. */
825285908Sed		error = futex_queue_convert_timestamp(td, clock_id, timeout,
826328127Sed		    precision, &sbttimeout, &sbtprecision, abstime);
827285908Sed		if (error != 0)
828285908Sed			return (error);
829285908Sed	}
830285908Sed
831285908Sed	/* Place object in the queue. */
832285908Sed	fw->fw_queue = fq;
833285908Sed	STAILQ_INSERT_TAIL(&fq->fq_list, fw, fw_next);
834285908Sed	++fq->fq_count;
835285908Sed
836285908Sed	cv_init(&fw->fw_wait, "futex");
837285908Sed	++fl->fl_waitcount;
838285908Sed
839285908Sed	futex_lock_assert(fl);
840285908Sed	if (timeout == UINT64_MAX) {
841285908Sed		/* Wait without a timeout. */
842285908Sed		error = cv_wait_sig(&fw->fw_wait, &futex_global_lock);
843285908Sed	} else {
844285908Sed		/* Wait respecting the timeout. */
845285908Sed		error = cv_timedwait_sig_sbt(&fw->fw_wait, &futex_global_lock,
846285908Sed		    sbttimeout, sbtprecision, 0);
847285908Sed		futex_lock_assert(fl);
848285908Sed		if (error == EWOULDBLOCK &&
849285908Sed		    fw->fw_queue != NULL && fw->fw_queue != fq) {
850285908Sed			/*
851285908Sed			 * We got signalled on a condition variable, but
852285908Sed			 * observed a timeout while waiting to reacquire
853285908Sed			 * the lock. In other words, we didn't actually
854285908Sed			 * time out. Go back to sleep and wait for the
855285908Sed			 * lock to be reacquired.
856285908Sed			 */
857285908Sed			error = cv_wait_sig(&fw->fw_wait, &futex_global_lock);
858285908Sed		}
859285908Sed	}
860285908Sed	futex_lock_assert(fl);
861285908Sed
862285908Sed	--fl->fl_waitcount;
863285908Sed	cv_destroy(&fw->fw_wait);
864285908Sed
865285908Sed	fq = fw->fw_queue;
866285908Sed	if (fq == NULL) {
867285908Sed		/* Thread got dequeued, so we've slept successfully. */
868285908Sed		return (0);
869285908Sed	}
870285908Sed
871285908Sed	/* Thread is still enqueued. Remove it. */
872285908Sed	KASSERT(error != 0, ("Woken up thread is still enqueued"));
873285908Sed	STAILQ_REMOVE(&fq->fq_list, fw, futex_waiter, fw_next);
874285908Sed	--fq->fq_count;
875285908Sed	return (error == EWOULDBLOCK ? ETIMEDOUT : error);
876285908Sed}
877285908Sed
878285908Sed/* Moves up to nwaiters waiters from one queue to another. */
879285908Sedstatic void
880285908Sedfutex_queue_requeue(struct futex_queue *fqfrom, struct futex_queue *fqto,
881285908Sed    unsigned int nwaiters)
882285908Sed{
883285908Sed	struct futex_waiter *fw;
884285908Sed
885285908Sed	/* Move waiters to the target queue. */
886285908Sed	while (nwaiters-- > 0 && !STAILQ_EMPTY(&fqfrom->fq_list)) {
887285908Sed		fw = STAILQ_FIRST(&fqfrom->fq_list);
888285908Sed		STAILQ_REMOVE_HEAD(&fqfrom->fq_list, fw_next);
889285908Sed		--fqfrom->fq_count;
890285908Sed
891285908Sed		fw->fw_queue = fqto;
892285908Sed		STAILQ_INSERT_TAIL(&fqto->fq_list, fw, fw_next);
893285908Sed		++fqto->fq_count;
894285908Sed	}
895285908Sed}
896285908Sed
897285908Sed/* Wakes up all waiters in a queue. */
898285908Sedstatic void
899285908Sedfutex_queue_wake_up_all(struct futex_queue *fq)
900285908Sed{
901285908Sed	struct futex_waiter *fw;
902285908Sed
903285908Sed	STAILQ_FOREACH(fw, &fq->fq_list, fw_next) {
904285908Sed		fw->fw_locked = true;
905285908Sed		fw->fw_queue = NULL;
906285908Sed		cv_signal(&fw->fw_wait);
907285908Sed	}
908285908Sed
909285908Sed	STAILQ_INIT(&fq->fq_list);
910285908Sed	fq->fq_count = 0;
911285908Sed}
912285908Sed
913285908Sed/*
914285908Sed * Wakes up the best waiter (i.e., the waiter having the highest
915285908Sed * priority) in a queue.
916285908Sed */
917285908Sedstatic void
918285908Sedfutex_queue_wake_up_best(struct futex_queue *fq)
919285908Sed{
920285908Sed	struct futex_waiter *fw;
921285908Sed
922285908Sed	fw = STAILQ_FIRST(&fq->fq_list);
923285908Sed	fw->fw_locked = true;
924285908Sed	fw->fw_queue = NULL;
925285908Sed	cv_signal(&fw->fw_wait);
926285908Sed
927285908Sed	STAILQ_REMOVE_HEAD(&fq->fq_list, fw_next);
928285908Sed	--fq->fq_count;
929285908Sed}
930285908Sed
931285908Sedstatic void
932285908Sedfutex_queue_wake_up_donate(struct futex_queue *fq, unsigned int nwaiters)
933285908Sed{
934285908Sed	struct futex_waiter *fw;
935285908Sed
936285908Sed	fw = STAILQ_FIRST(&fq->fq_list);
937285908Sed	if (fw == NULL)
938285908Sed		return;
939285908Sed	fw->fw_locked = false;
940285908Sed	fw->fw_queue = NULL;
941285908Sed	cv_signal(&fw->fw_wait);
942285908Sed
943285908Sed	STAILQ_REMOVE_HEAD(&fq->fq_list, fw_next);
944285908Sed	--fq->fq_count;
945285908Sed	futex_queue_requeue(fq, &fw->fw_donated, nwaiters);
946285908Sed}
947285908Sed
948285908Sed/*
949285908Sed * futex_user operations. Used to adjust values in userspace.
950285908Sed */
951285908Sed
952285908Sedstatic int
953285908Sedfutex_user_load(uint32_t *obj, uint32_t *val)
954285908Sed{
955285908Sed
956285908Sed	return (fueword32(obj, val) != 0 ? EFAULT : 0);
957285908Sed}
958285908Sed
959285908Sedstatic int
960285908Sedfutex_user_store(uint32_t *obj, uint32_t val)
961285908Sed{
962285908Sed
963285908Sed	return (suword32(obj, val) != 0 ? EFAULT : 0);
964285908Sed}
965285908Sed
966285908Sedstatic int
967285908Sedfutex_user_cmpxchg(uint32_t *obj, uint32_t cmp, uint32_t *old, uint32_t new)
968285908Sed{
969285908Sed
970285908Sed	return (casueword32(obj, cmp, old, new) != 0 ? EFAULT : 0);
971285908Sed}
972285908Sed
973285908Sed/*
974285908Sed * Blocking calls: acquiring locks, waiting on condition variables.
975285908Sed */
976285908Sed
977285307Sedint
978285908Sedcloudabi_futex_condvar_wait(struct thread *td, cloudabi_condvar_t *condvar,
979297468Sed    cloudabi_scope_t condvar_scope, cloudabi_lock_t *lock,
980297468Sed    cloudabi_scope_t lock_scope, cloudabi_clockid_t clock_id,
981328127Sed    cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision, bool abstime)
982285908Sed{
983285908Sed	struct futex_condvar *fc;
984285908Sed	struct futex_lock *fl;
985285908Sed	struct futex_waiter fw;
986285908Sed	int error, error2;
987285908Sed
988285908Sed	/* Lookup condition variable object. */
989285908Sed	error = futex_condvar_lookup_or_create(td, condvar, condvar_scope, lock,
990285908Sed	    lock_scope, &fc);
991285908Sed	if (error != 0)
992285908Sed		return (error);
993285908Sed	fl = fc->fc_lock;
994285908Sed
995285908Sed	/*
996285908Sed	 * Set the condition variable to something other than
997285908Sed	 * CLOUDABI_CONDVAR_HAS_NO_WAITERS to make userspace threads
998285908Sed	 * call into the kernel to perform wakeups.
999285908Sed	 */
1000285908Sed	error = futex_user_store(condvar, ~CLOUDABI_CONDVAR_HAS_NO_WAITERS);
1001285908Sed	if (error != 0) {
1002285908Sed		futex_condvar_release(fc);
1003285908Sed		return (error);
1004285908Sed	}
1005285908Sed
1006285908Sed	/* Drop the lock. */
1007285908Sed	error = futex_lock_unlock(fl, td, lock);
1008285908Sed	if (error != 0) {
1009285908Sed		futex_condvar_unmanage(fc, condvar);
1010285908Sed		futex_condvar_release(fc);
1011285908Sed		return (error);
1012285908Sed	}
1013285908Sed
1014285908Sed	/* Go to sleep. */
1015285908Sed	++fc->fc_waitcount;
1016285908Sed	error = futex_queue_sleep(&fc->fc_waiters, fc->fc_lock, &fw, td,
1017328127Sed	    clock_id, timeout, precision, abstime);
1018285908Sed	if (fw.fw_locked) {
1019285908Sed		/* Waited and got the lock assigned to us. */
1020285908Sed		KASSERT(futex_queue_count(&fw.fw_donated) == 0,
1021285908Sed		    ("Received threads while being locked"));
1022285908Sed	} else if (error == 0 || error == ETIMEDOUT) {
1023285908Sed		if (error != 0)
1024285908Sed			futex_condvar_unmanage(fc, condvar);
1025285908Sed		/*
1026285908Sed		 * Got woken up without having the lock assigned to us.
1027285908Sed		 * This can happen in two cases:
1028285908Sed		 *
1029285908Sed		 * 1. We observed a timeout on a condition variable.
1030285908Sed		 * 2. We got signalled on a condition variable while the
1031285908Sed		 *    associated lock is unlocked. We are the first
1032285908Sed		 *    thread that gets woken up. This thread is
1033285908Sed		 *    responsible for reacquiring the userspace lock.
1034285908Sed		 */
1035285908Sed		error2 = futex_lock_wrlock(fl, td, lock,
1036328127Sed		    CLOUDABI_CLOCK_MONOTONIC, UINT64_MAX, 0, abstime,
1037328127Sed		    &fw.fw_donated);
1038285908Sed		if (error2 != 0)
1039285908Sed			error = error2;
1040285908Sed	} else {
1041285908Sed		KASSERT(futex_queue_count(&fw.fw_donated) == 0,
1042285908Sed		    ("Received threads on error"));
1043285908Sed		futex_condvar_unmanage(fc, condvar);
1044285908Sed		futex_lock_unmanage(fl, lock);
1045285908Sed	}
1046285908Sed	--fc->fc_waitcount;
1047285908Sed	futex_condvar_release(fc);
1048285908Sed	return (error);
1049285908Sed}
1050285908Sed
1051285908Sedint
1052285908Sedcloudabi_futex_lock_rdlock(struct thread *td, cloudabi_lock_t *lock,
1053297468Sed    cloudabi_scope_t scope, cloudabi_clockid_t clock_id,
1054328127Sed    cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision, bool abstime)
1055285908Sed{
1056285908Sed	struct futex_lock *fl;
1057285908Sed	int error;
1058285908Sed
1059285908Sed	/* Look up lock object. */
1060285908Sed	error = futex_lock_lookup(td, lock, scope, &fl);
1061285908Sed	if (error != 0)
1062285908Sed		return (error);
1063285908Sed
1064285908Sed	error = futex_lock_rdlock(fl, td, lock, clock_id, timeout,
1065328127Sed	    precision, abstime);
1066285908Sed	futex_lock_release(fl);
1067285908Sed	return (error);
1068285908Sed}
1069285908Sed
1070285908Sedint
1071285908Sedcloudabi_futex_lock_wrlock(struct thread *td, cloudabi_lock_t *lock,
1072297468Sed    cloudabi_scope_t scope, cloudabi_clockid_t clock_id,
1073328127Sed    cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision, bool abstime)
1074285908Sed{
1075285908Sed	struct futex_lock *fl;
1076285908Sed	struct futex_queue fq;
1077285908Sed	int error;
1078285908Sed
1079285908Sed	/* Look up lock object. */
1080285908Sed	error = futex_lock_lookup(td, lock, scope, &fl);
1081285908Sed	if (error != 0)
1082285908Sed		return (error);
1083285908Sed
1084285908Sed	futex_queue_init(&fq);
1085285908Sed	error = futex_lock_wrlock(fl, td, lock, clock_id, timeout,
1086328127Sed	    precision, abstime, &fq);
1087285908Sed	futex_lock_release(fl);
1088285908Sed	return (error);
1089285908Sed}
1090285908Sed
1091285908Sed/*
1092285908Sed * Non-blocking calls: releasing locks, signalling condition variables.
1093285908Sed */
1094285908Sed
1095285908Sedint
1096285307Sedcloudabi_sys_condvar_signal(struct thread *td,
1097285307Sed    struct cloudabi_sys_condvar_signal_args *uap)
1098285307Sed{
1099285908Sed	struct futex_condvar *fc;
1100285908Sed	struct futex_lock *fl;
1101285908Sed	cloudabi_nthreads_t nwaiters;
1102285908Sed	int error;
1103285307Sed
1104285908Sed	nwaiters = uap->nwaiters;
1105285908Sed	if (nwaiters == 0) {
1106285908Sed		/* No threads to wake up. */
1107285908Sed		return (0);
1108285908Sed	}
1109285908Sed
1110285908Sed	/* Look up futex object. */
1111285908Sed	error = futex_condvar_lookup(td, uap->condvar, uap->scope, &fc);
1112285908Sed	if (error != 0) {
1113285908Sed		/* Race condition: condition variable with no waiters. */
1114285908Sed		return (error == ENOENT ? 0 : error);
1115285908Sed	}
1116285908Sed	fl = fc->fc_lock;
1117285908Sed
1118285908Sed	if (fl->fl_owner == LOCK_UNMANAGED) {
1119285908Sed		/*
1120285908Sed		 * The lock is currently not managed by the kernel,
1121285908Sed		 * meaning we must attempt to acquire the userspace lock
1122285908Sed		 * first. We cannot requeue threads to an unmanaged lock,
1123285908Sed		 * as these threads will then never be scheduled.
1124285908Sed		 *
1125285908Sed		 * Unfortunately, the memory address of the lock is
1126285908Sed		 * unknown from this context, meaning that we cannot
1127285908Sed		 * acquire the lock on behalf of the first thread to be
1128285908Sed		 * scheduled. The lock may even not be mapped within the
1129285908Sed		 * address space of the current thread.
1130285908Sed		 *
1131285908Sed		 * To solve this, wake up a single waiter that will
1132285908Sed		 * attempt to acquire the lock. Donate all of the other
1133285908Sed		 * waiters that need to be woken up to this waiter, so
1134285908Sed		 * it can requeue them after acquiring the lock.
1135285908Sed		 */
1136285908Sed		futex_queue_wake_up_donate(&fc->fc_waiters, nwaiters - 1);
1137285908Sed	} else {
1138285908Sed		/*
1139285908Sed		 * Lock is already managed by the kernel. This makes it
1140285908Sed		 * easy, as we can requeue the threads from the
1141285908Sed		 * condition variable directly to the associated lock.
1142285908Sed		 */
1143285908Sed		futex_queue_requeue(&fc->fc_waiters, &fl->fl_writers, nwaiters);
1144285908Sed	}
1145285908Sed
1146285908Sed	/* Clear userspace condition variable if all waiters are gone. */
1147285908Sed	error = futex_condvar_unmanage(fc, uap->condvar);
1148285908Sed	futex_condvar_release(fc);
1149285908Sed	return (error);
1150285307Sed}
1151285307Sed
1152285307Sedint
1153285307Sedcloudabi_sys_lock_unlock(struct thread *td,
1154285307Sed    struct cloudabi_sys_lock_unlock_args *uap)
1155285307Sed{
1156285908Sed	struct futex_lock *fl;
1157285908Sed	int error;
1158285307Sed
1159285908Sed	error = futex_lock_lookup(td, uap->lock, uap->scope, &fl);
1160285908Sed	if (error != 0)
1161285908Sed		return (error);
1162285908Sed	error = futex_lock_unlock(fl, td, uap->lock);
1163285908Sed	futex_lock_release(fl);
1164285908Sed	return (error);
1165285307Sed}
1166