1/*- 2 * Copyright (c) 2015 Nuxi, https://nuxi.nl/ 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26#include <sys/cdefs.h> 27__FBSDID("$FreeBSD: stable/11/sys/compat/cloudabi/cloudabi_futex.c 328127 2018-01-18 13:43:09Z ed $"); 28 29#include <sys/param.h> 30#include <sys/kernel.h> 31#include <sys/limits.h> 32#include <sys/lock.h> 33#include <sys/malloc.h> 34#include <sys/mutex.h> 35#include <sys/proc.h> 36#include <sys/sx.h> 37#include <sys/systm.h> 38#include <sys/umtx.h> 39 40#include <contrib/cloudabi/cloudabi_types_common.h> 41 42#include <compat/cloudabi/cloudabi_proto.h> 43#include <compat/cloudabi/cloudabi_util.h> 44 45/* 46 * Futexes for CloudABI. 47 * 48 * On most systems, futexes are implemented as objects of a single type 49 * on which a set of operations can be performed. CloudABI makes a clear 50 * distinction between locks and condition variables. A lock may have 51 * zero or more associated condition variables. A condition variable is 52 * always associated with exactly one lock. There is a strict topology. 53 * This approach has two advantages: 54 * 55 * - This topology is guaranteed to be acyclic. Requeueing of threads 56 * only happens in one direction (from condition variables to locks). 57 * This eases locking. 58 * - It means that a futex object for a lock exists when it is unlocked, 59 * but has threads waiting on associated condition variables. Threads 60 * can be requeued to a lock even if the thread performing the wakeup 61 * does not have the lock mapped in its address space. 62 * 63 * This futex implementation only implements a single lock type, namely 64 * a read-write lock. A regular mutex type would not be necessary, as 65 * the read-write lock is as efficient as a mutex if used as such. 66 * Userspace futex locks are 32 bits in size: 67 * 68 * - 1 bit: has threads waiting in kernel-space. 69 * - 1 bit: is write-locked. 70 * - 30 bits: 71 * - if write-locked: thread ID of owner. 72 * - if not write-locked: number of read locks held. 73 * 74 * Condition variables are also 32 bits in size. Its value is modified 75 * by kernel-space exclusively. Zero indicates that it has no waiting 76 * threads. Non-zero indicates the opposite. 77 * 78 * This implementation is optimal, in the sense that it only wakes up 79 * threads if they can actually continue execution. It does not suffer 80 * from the thundering herd problem. If multiple threads waiting on a 81 * condition variable need to be woken up, only a single thread is 82 * scheduled. All other threads are 'donated' to this thread. After the 83 * thread manages to reacquire the lock, it requeues its donated threads 84 * to the lock. 85 * 86 * TODO(ed): Integrate this functionality into kern_umtx.c instead. 87 * TODO(ed): Store futex objects in a hash table. 88 * TODO(ed): Add actual priority inheritance. 89 * TODO(ed): Let futex_queue also take priorities into account. 90 * TODO(ed): Make locking fine-grained. 91 * TODO(ed): Perform sleeps until an actual absolute point in time, 92 * instead of converting the timestamp to a relative value. 93 */ 94 95struct futex_address; 96struct futex_condvar; 97struct futex_lock; 98struct futex_queue; 99struct futex_waiter; 100 101/* Identifier of a location in memory. */ 102struct futex_address { 103 struct umtx_key fa_key; 104}; 105 106/* A set of waiting threads. */ 107struct futex_queue { 108 STAILQ_HEAD(, futex_waiter) fq_list; 109 unsigned int fq_count; 110}; 111 112/* Condition variables. */ 113struct futex_condvar { 114 /* Address of the condition variable. */ 115 struct futex_address fc_address; 116 117 /* The lock the waiters should be moved to when signalled. */ 118 struct futex_lock * fc_lock; 119 120 /* Threads waiting on the condition variable. */ 121 struct futex_queue fc_waiters; 122 /* 123 * Number of threads blocked on this condition variable, or 124 * being blocked on the lock after being requeued. 125 */ 126 unsigned int fc_waitcount; 127 128 /* Global list pointers. */ 129 LIST_ENTRY(futex_condvar) fc_next; 130}; 131 132/* Read-write locks. */ 133struct futex_lock { 134 /* Address of the lock. */ 135 struct futex_address fl_address; 136 137 /* 138 * Current owner of the lock. LOCK_UNMANAGED if the lock is 139 * currently not owned by the kernel. LOCK_OWNER_UNKNOWN in case 140 * the owner is not known (e.g., when the lock is read-locked). 141 */ 142 cloudabi_tid_t fl_owner; 143#define LOCK_UNMANAGED 0x0 144#define LOCK_OWNER_UNKNOWN 0x1 145 146 /* Writers blocked on the lock. */ 147 struct futex_queue fl_writers; 148 /* Readers blocked on the lock. */ 149 struct futex_queue fl_readers; 150 /* Number of threads blocked on this lock + condition variables. */ 151 unsigned int fl_waitcount; 152 153 /* Global list pointers. */ 154 LIST_ENTRY(futex_lock) fl_next; 155}; 156 157/* Information associated with a thread blocked on an object. */ 158struct futex_waiter { 159 /* Thread ID. */ 160 cloudabi_tid_t fw_tid; 161 /* Condition variable used for waiting. */ 162 struct cv fw_wait; 163 164 /* Queue this waiter is currently placed in. */ 165 struct futex_queue * fw_queue; 166 /* List pointers of fw_queue. */ 167 STAILQ_ENTRY(futex_waiter) fw_next; 168 169 /* Lock has been acquired. */ 170 bool fw_locked; 171 /* If not locked, threads that should block after acquiring. */ 172 struct futex_queue fw_donated; 173}; 174 175/* Global data structures. */ 176static MALLOC_DEFINE(M_FUTEX, "futex", "CloudABI futex"); 177 178static struct sx futex_global_lock; 179SX_SYSINIT(futex_global_lock, &futex_global_lock, "CloudABI futex global lock"); 180 181static LIST_HEAD(, futex_lock) futex_lock_list = 182 LIST_HEAD_INITIALIZER(&futex_lock_list); 183static LIST_HEAD(, futex_condvar) futex_condvar_list = 184 LIST_HEAD_INITIALIZER(&futex_condvar_list); 185 186/* Utility functions. */ 187static void futex_lock_assert(const struct futex_lock *); 188static struct futex_lock *futex_lock_lookup_locked(struct futex_address *); 189static void futex_lock_release(struct futex_lock *); 190static int futex_lock_tryrdlock(struct futex_lock *, cloudabi_lock_t *); 191static int futex_lock_unmanage(struct futex_lock *, cloudabi_lock_t *); 192static int futex_lock_update_owner(struct futex_lock *, cloudabi_lock_t *); 193static int futex_lock_wake_up_next(struct futex_lock *, cloudabi_lock_t *); 194static unsigned int futex_queue_count(const struct futex_queue *); 195static void futex_queue_init(struct futex_queue *); 196static void futex_queue_requeue(struct futex_queue *, struct futex_queue *, 197 unsigned int); 198static int futex_queue_sleep(struct futex_queue *, struct futex_lock *, 199 struct futex_waiter *, struct thread *, cloudabi_clockid_t, 200 cloudabi_timestamp_t, cloudabi_timestamp_t, bool); 201static cloudabi_tid_t futex_queue_tid_best(const struct futex_queue *); 202static void futex_queue_wake_up_all(struct futex_queue *); 203static void futex_queue_wake_up_best(struct futex_queue *); 204static void futex_queue_wake_up_donate(struct futex_queue *, unsigned int); 205static int futex_user_load(uint32_t *, uint32_t *); 206static int futex_user_store(uint32_t *, uint32_t); 207static int futex_user_cmpxchg(uint32_t *, uint32_t, uint32_t *, uint32_t); 208 209/* 210 * futex_address operations. 211 */ 212 213static int 214futex_address_create(struct futex_address *fa, struct thread *td, 215 const void *object, cloudabi_scope_t scope) 216{ 217 218 KASSERT(td == curthread, 219 ("Can only create umtx keys for the current thread")); 220 switch (scope) { 221 case CLOUDABI_SCOPE_PRIVATE: 222 return (umtx_key_get(object, TYPE_FUTEX, THREAD_SHARE, 223 &fa->fa_key)); 224 case CLOUDABI_SCOPE_SHARED: 225 return (umtx_key_get(object, TYPE_FUTEX, AUTO_SHARE, 226 &fa->fa_key)); 227 default: 228 return (EINVAL); 229 } 230} 231 232static void 233futex_address_free(struct futex_address *fa) 234{ 235 236 umtx_key_release(&fa->fa_key); 237} 238 239static bool 240futex_address_match(const struct futex_address *fa1, 241 const struct futex_address *fa2) 242{ 243 244 return (umtx_key_match(&fa1->fa_key, &fa2->fa_key)); 245} 246 247/* 248 * futex_condvar operations. 249 */ 250 251static void 252futex_condvar_assert(const struct futex_condvar *fc) 253{ 254 255 KASSERT(fc->fc_waitcount >= futex_queue_count(&fc->fc_waiters), 256 ("Total number of waiters cannot be smaller than the wait queue")); 257 futex_lock_assert(fc->fc_lock); 258} 259 260static int 261futex_condvar_lookup(struct thread *td, const cloudabi_condvar_t *address, 262 cloudabi_scope_t scope, struct futex_condvar **fcret) 263{ 264 struct futex_address fa_condvar; 265 struct futex_condvar *fc; 266 int error; 267 268 error = futex_address_create(&fa_condvar, td, address, scope); 269 if (error != 0) 270 return (error); 271 272 sx_xlock(&futex_global_lock); 273 LIST_FOREACH(fc, &futex_condvar_list, fc_next) { 274 if (futex_address_match(&fc->fc_address, &fa_condvar)) { 275 /* Found matching lock object. */ 276 futex_address_free(&fa_condvar); 277 futex_condvar_assert(fc); 278 *fcret = fc; 279 return (0); 280 } 281 } 282 sx_xunlock(&futex_global_lock); 283 futex_address_free(&fa_condvar); 284 return (ENOENT); 285} 286 287static int 288futex_condvar_lookup_or_create(struct thread *td, 289 const cloudabi_condvar_t *condvar, cloudabi_scope_t condvar_scope, 290 const cloudabi_lock_t *lock, cloudabi_scope_t lock_scope, 291 struct futex_condvar **fcret) 292{ 293 struct futex_address fa_condvar, fa_lock; 294 struct futex_condvar *fc; 295 struct futex_lock *fl; 296 int error; 297 298 error = futex_address_create(&fa_condvar, td, condvar, condvar_scope); 299 if (error != 0) 300 return (error); 301 error = futex_address_create(&fa_lock, td, lock, lock_scope); 302 if (error != 0) { 303 futex_address_free(&fa_condvar); 304 return (error); 305 } 306 307 sx_xlock(&futex_global_lock); 308 LIST_FOREACH(fc, &futex_condvar_list, fc_next) { 309 if (!futex_address_match(&fc->fc_address, &fa_condvar)) 310 continue; 311 fl = fc->fc_lock; 312 if (!futex_address_match(&fl->fl_address, &fa_lock)) { 313 /* Condition variable is owned by a different lock. */ 314 futex_address_free(&fa_condvar); 315 futex_address_free(&fa_lock); 316 sx_xunlock(&futex_global_lock); 317 return (EINVAL); 318 } 319 320 /* Found fully matching condition variable. */ 321 futex_address_free(&fa_condvar); 322 futex_address_free(&fa_lock); 323 futex_condvar_assert(fc); 324 *fcret = fc; 325 return (0); 326 } 327 328 /* None found. Create new condition variable object. */ 329 fc = malloc(sizeof(*fc), M_FUTEX, M_WAITOK); 330 fc->fc_address = fa_condvar; 331 fc->fc_lock = futex_lock_lookup_locked(&fa_lock); 332 futex_queue_init(&fc->fc_waiters); 333 fc->fc_waitcount = 0; 334 LIST_INSERT_HEAD(&futex_condvar_list, fc, fc_next); 335 *fcret = fc; 336 return (0); 337} 338 339static void 340futex_condvar_release(struct futex_condvar *fc) 341{ 342 struct futex_lock *fl; 343 344 futex_condvar_assert(fc); 345 fl = fc->fc_lock; 346 if (fc->fc_waitcount == 0) { 347 /* Condition variable has no waiters. Deallocate it. */ 348 futex_address_free(&fc->fc_address); 349 LIST_REMOVE(fc, fc_next); 350 free(fc, M_FUTEX); 351 } 352 futex_lock_release(fl); 353} 354 355static int 356futex_condvar_unmanage(struct futex_condvar *fc, 357 cloudabi_condvar_t *condvar) 358{ 359 360 if (futex_queue_count(&fc->fc_waiters) != 0) 361 return (0); 362 return (futex_user_store(condvar, CLOUDABI_CONDVAR_HAS_NO_WAITERS)); 363} 364 365/* 366 * futex_lock operations. 367 */ 368 369static void 370futex_lock_assert(const struct futex_lock *fl) 371{ 372 373 /* 374 * A futex lock can only be kernel-managed if it has waiters. 375 * Vice versa: if a futex lock has waiters, it must be 376 * kernel-managed. 377 */ 378 KASSERT((fl->fl_owner == LOCK_UNMANAGED) == 379 (futex_queue_count(&fl->fl_readers) == 0 && 380 futex_queue_count(&fl->fl_writers) == 0), 381 ("Managed locks must have waiting threads")); 382 KASSERT(fl->fl_waitcount != 0 || fl->fl_owner == LOCK_UNMANAGED, 383 ("Lock with no waiters must be unmanaged")); 384} 385 386static int 387futex_lock_lookup(struct thread *td, const cloudabi_lock_t *address, 388 cloudabi_scope_t scope, struct futex_lock **flret) 389{ 390 struct futex_address fa; 391 int error; 392 393 error = futex_address_create(&fa, td, address, scope); 394 if (error != 0) 395 return (error); 396 397 sx_xlock(&futex_global_lock); 398 *flret = futex_lock_lookup_locked(&fa); 399 return (0); 400} 401 402static struct futex_lock * 403futex_lock_lookup_locked(struct futex_address *fa) 404{ 405 struct futex_lock *fl; 406 407 LIST_FOREACH(fl, &futex_lock_list, fl_next) { 408 if (futex_address_match(&fl->fl_address, fa)) { 409 /* Found matching lock object. */ 410 futex_address_free(fa); 411 futex_lock_assert(fl); 412 return (fl); 413 } 414 } 415 416 /* None found. Create new lock object. */ 417 fl = malloc(sizeof(*fl), M_FUTEX, M_WAITOK); 418 fl->fl_address = *fa; 419 fl->fl_owner = LOCK_UNMANAGED; 420 futex_queue_init(&fl->fl_readers); 421 futex_queue_init(&fl->fl_writers); 422 fl->fl_waitcount = 0; 423 LIST_INSERT_HEAD(&futex_lock_list, fl, fl_next); 424 return (fl); 425} 426 427static int 428futex_lock_rdlock(struct futex_lock *fl, struct thread *td, 429 cloudabi_lock_t *lock, cloudabi_clockid_t clock_id, 430 cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision, bool abstime) 431{ 432 struct futex_waiter fw; 433 int error; 434 435 error = futex_lock_tryrdlock(fl, lock); 436 if (error == EBUSY) { 437 /* Suspend execution. */ 438 KASSERT(fl->fl_owner != LOCK_UNMANAGED, 439 ("Attempted to sleep on an unmanaged lock")); 440 error = futex_queue_sleep(&fl->fl_readers, fl, &fw, td, 441 clock_id, timeout, precision, abstime); 442 KASSERT((error == 0) == fw.fw_locked, 443 ("Should have locked write lock on success")); 444 KASSERT(futex_queue_count(&fw.fw_donated) == 0, 445 ("Lock functions cannot receive threads")); 446 } 447 if (error != 0) 448 futex_lock_unmanage(fl, lock); 449 return (error); 450} 451 452static void 453futex_lock_release(struct futex_lock *fl) 454{ 455 456 futex_lock_assert(fl); 457 if (fl->fl_waitcount == 0) { 458 /* Lock object is unreferenced. Deallocate it. */ 459 KASSERT(fl->fl_owner == LOCK_UNMANAGED, 460 ("Attempted to free a managed lock")); 461 futex_address_free(&fl->fl_address); 462 LIST_REMOVE(fl, fl_next); 463 free(fl, M_FUTEX); 464 } 465 sx_xunlock(&futex_global_lock); 466} 467 468static int 469futex_lock_unmanage(struct futex_lock *fl, cloudabi_lock_t *lock) 470{ 471 cloudabi_lock_t cmp, old; 472 int error; 473 474 if (futex_queue_count(&fl->fl_readers) == 0 && 475 futex_queue_count(&fl->fl_writers) == 0) { 476 /* Lock should be unmanaged. */ 477 fl->fl_owner = LOCK_UNMANAGED; 478 479 /* Clear kernel-managed bit. */ 480 error = futex_user_load(lock, &old); 481 if (error != 0) 482 return (error); 483 for (;;) { 484 cmp = old; 485 error = futex_user_cmpxchg(lock, cmp, &old, 486 cmp & ~CLOUDABI_LOCK_KERNEL_MANAGED); 487 if (error != 0) 488 return (error); 489 if (old == cmp) 490 break; 491 } 492 } 493 return (0); 494} 495 496/* Sets an owner of a lock, based on a userspace lock value. */ 497static void 498futex_lock_set_owner(struct futex_lock *fl, cloudabi_lock_t lock) 499{ 500 501 /* Lock has no explicit owner. */ 502 if ((lock & ~CLOUDABI_LOCK_WRLOCKED) == 0) { 503 fl->fl_owner = LOCK_OWNER_UNKNOWN; 504 return; 505 } 506 lock &= ~(CLOUDABI_LOCK_WRLOCKED | CLOUDABI_LOCK_KERNEL_MANAGED); 507 508 /* Don't allow userspace to silently unlock. */ 509 if (lock == LOCK_UNMANAGED) { 510 fl->fl_owner = LOCK_OWNER_UNKNOWN; 511 return; 512 } 513 fl->fl_owner = lock; 514} 515 516static int 517futex_lock_unlock(struct futex_lock *fl, struct thread *td, 518 cloudabi_lock_t *lock) 519{ 520 int error; 521 522 /* Validate that this thread is allowed to unlock. */ 523 error = futex_lock_update_owner(fl, lock); 524 if (error != 0) 525 return (error); 526 if (fl->fl_owner != LOCK_UNMANAGED && fl->fl_owner != td->td_tid) 527 return (EPERM); 528 return (futex_lock_wake_up_next(fl, lock)); 529} 530 531/* Syncs in the owner of the lock from userspace if needed. */ 532static int 533futex_lock_update_owner(struct futex_lock *fl, cloudabi_lock_t *address) 534{ 535 cloudabi_lock_t lock; 536 int error; 537 538 if (fl->fl_owner == LOCK_OWNER_UNKNOWN) { 539 error = futex_user_load(address, &lock); 540 if (error != 0) 541 return (error); 542 futex_lock_set_owner(fl, lock); 543 } 544 return (0); 545} 546 547static int 548futex_lock_tryrdlock(struct futex_lock *fl, cloudabi_lock_t *address) 549{ 550 cloudabi_lock_t old, cmp; 551 int error; 552 553 if (fl->fl_owner != LOCK_UNMANAGED) { 554 /* Lock is already acquired. */ 555 return (EBUSY); 556 } 557 558 old = CLOUDABI_LOCK_UNLOCKED; 559 for (;;) { 560 if ((old & CLOUDABI_LOCK_KERNEL_MANAGED) != 0) { 561 /* 562 * Userspace lock is kernel-managed, even though 563 * the kernel disagrees. 564 */ 565 return (EINVAL); 566 } 567 568 if ((old & CLOUDABI_LOCK_WRLOCKED) == 0) { 569 /* 570 * Lock is not write-locked. Attempt to acquire 571 * it by increasing the read count. 572 */ 573 cmp = old; 574 error = futex_user_cmpxchg(address, cmp, &old, cmp + 1); 575 if (error != 0) 576 return (error); 577 if (old == cmp) { 578 /* Success. */ 579 return (0); 580 } 581 } else { 582 /* Lock is write-locked. Make it kernel-managed. */ 583 cmp = old; 584 error = futex_user_cmpxchg(address, cmp, &old, 585 cmp | CLOUDABI_LOCK_KERNEL_MANAGED); 586 if (error != 0) 587 return (error); 588 if (old == cmp) { 589 /* Success. */ 590 futex_lock_set_owner(fl, cmp); 591 return (EBUSY); 592 } 593 } 594 } 595} 596 597static int 598futex_lock_trywrlock(struct futex_lock *fl, cloudabi_lock_t *address, 599 cloudabi_tid_t tid, bool force_kernel_managed) 600{ 601 cloudabi_lock_t old, new, cmp; 602 int error; 603 604 if (fl->fl_owner == tid) { 605 /* Attempted to acquire lock recursively. */ 606 return (EDEADLK); 607 } 608 if (fl->fl_owner != LOCK_UNMANAGED) { 609 /* Lock is already acquired. */ 610 return (EBUSY); 611 } 612 613 old = CLOUDABI_LOCK_UNLOCKED; 614 for (;;) { 615 if ((old & CLOUDABI_LOCK_KERNEL_MANAGED) != 0) { 616 /* 617 * Userspace lock is kernel-managed, even though 618 * the kernel disagrees. 619 */ 620 return (EINVAL); 621 } 622 if (old == (tid | CLOUDABI_LOCK_WRLOCKED)) { 623 /* Attempted to acquire lock recursively. */ 624 return (EDEADLK); 625 } 626 627 if (old == CLOUDABI_LOCK_UNLOCKED) { 628 /* Lock is unlocked. Attempt to acquire it. */ 629 new = tid | CLOUDABI_LOCK_WRLOCKED; 630 if (force_kernel_managed) 631 new |= CLOUDABI_LOCK_KERNEL_MANAGED; 632 error = futex_user_cmpxchg(address, 633 CLOUDABI_LOCK_UNLOCKED, &old, new); 634 if (error != 0) 635 return (error); 636 if (old == CLOUDABI_LOCK_UNLOCKED) { 637 /* Success. */ 638 if (force_kernel_managed) 639 fl->fl_owner = tid; 640 return (0); 641 } 642 } else { 643 /* Lock is still locked. Make it kernel-managed. */ 644 cmp = old; 645 error = futex_user_cmpxchg(address, cmp, &old, 646 cmp | CLOUDABI_LOCK_KERNEL_MANAGED); 647 if (error != 0) 648 return (error); 649 if (old == cmp) { 650 /* Success. */ 651 futex_lock_set_owner(fl, cmp); 652 return (EBUSY); 653 } 654 } 655 } 656} 657 658static int 659futex_lock_wake_up_next(struct futex_lock *fl, cloudabi_lock_t *lock) 660{ 661 cloudabi_tid_t tid; 662 int error; 663 664 /* 665 * Determine which thread(s) to wake up. Prefer waking up 666 * writers over readers to prevent write starvation. 667 */ 668 if (futex_queue_count(&fl->fl_writers) > 0) { 669 /* Transfer ownership to a single write-locker. */ 670 if (futex_queue_count(&fl->fl_writers) > 1 || 671 futex_queue_count(&fl->fl_readers) > 0) { 672 /* Lock should remain managed afterwards. */ 673 tid = futex_queue_tid_best(&fl->fl_writers); 674 error = futex_user_store(lock, 675 tid | CLOUDABI_LOCK_WRLOCKED | 676 CLOUDABI_LOCK_KERNEL_MANAGED); 677 if (error != 0) 678 return (error); 679 680 futex_queue_wake_up_best(&fl->fl_writers); 681 fl->fl_owner = tid; 682 } else { 683 /* Lock can become unmanaged afterwards. */ 684 error = futex_user_store(lock, 685 futex_queue_tid_best(&fl->fl_writers) | 686 CLOUDABI_LOCK_WRLOCKED); 687 if (error != 0) 688 return (error); 689 690 futex_queue_wake_up_best(&fl->fl_writers); 691 fl->fl_owner = LOCK_UNMANAGED; 692 } 693 } else { 694 /* Transfer ownership to all read-lockers (if any). */ 695 error = futex_user_store(lock, 696 futex_queue_count(&fl->fl_readers)); 697 if (error != 0) 698 return (error); 699 700 /* Wake up all threads. */ 701 futex_queue_wake_up_all(&fl->fl_readers); 702 fl->fl_owner = LOCK_UNMANAGED; 703 } 704 return (0); 705} 706 707static int 708futex_lock_wrlock(struct futex_lock *fl, struct thread *td, 709 cloudabi_lock_t *lock, cloudabi_clockid_t clock_id, 710 cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision, bool abstime, 711 struct futex_queue *donated) 712{ 713 struct futex_waiter fw; 714 int error; 715 716 error = futex_lock_trywrlock(fl, lock, td->td_tid, 717 futex_queue_count(donated) > 0); 718 719 if (error == 0 || error == EBUSY) { 720 /* Put donated threads in queue before suspending. */ 721 KASSERT(futex_queue_count(donated) == 0 || 722 fl->fl_owner != LOCK_UNMANAGED, 723 ("Lock should be managed if we are going to donate")); 724 futex_queue_requeue(donated, &fl->fl_writers, UINT_MAX); 725 } else { 726 /* 727 * This thread cannot deal with the donated threads. 728 * Wake up the next thread and let it try it by itself. 729 */ 730 futex_queue_wake_up_donate(donated, UINT_MAX); 731 } 732 733 if (error == EBUSY) { 734 /* Suspend execution if the lock was busy. */ 735 KASSERT(fl->fl_owner != LOCK_UNMANAGED, 736 ("Attempted to sleep on an unmanaged lock")); 737 error = futex_queue_sleep(&fl->fl_writers, fl, &fw, td, 738 clock_id, timeout, precision, abstime); 739 KASSERT((error == 0) == fw.fw_locked, 740 ("Should have locked write lock on success")); 741 KASSERT(futex_queue_count(&fw.fw_donated) == 0, 742 ("Lock functions cannot receive threads")); 743 } 744 if (error != 0) 745 futex_lock_unmanage(fl, lock); 746 return (error); 747} 748 749/* 750 * futex_queue operations. 751 */ 752 753static cloudabi_tid_t 754futex_queue_tid_best(const struct futex_queue *fq) 755{ 756 757 return (STAILQ_FIRST(&fq->fq_list)->fw_tid); 758} 759 760static unsigned int 761futex_queue_count(const struct futex_queue *fq) 762{ 763 764 return (fq->fq_count); 765} 766 767static void 768futex_queue_init(struct futex_queue *fq) 769{ 770 771 STAILQ_INIT(&fq->fq_list); 772 fq->fq_count = 0; 773} 774 775/* Converts a relative timestamp to an sbintime. */ 776static sbintime_t 777futex_queue_convert_timestamp_relative(cloudabi_timestamp_t ts) 778{ 779 cloudabi_timestamp_t s, ns; 780 781 s = ts / 1000000000; 782 ns = ts % 1000000000; 783 if (s > INT32_MAX) 784 return (INT64_MAX); 785 return ((s << 32) + (ns << 32) / 1000000000); 786} 787 788/* Converts an absolute timestamp and precision to a pair of sbintime values. */ 789static int 790futex_queue_convert_timestamp(struct thread *td, cloudabi_clockid_t clock_id, 791 cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision, 792 sbintime_t *sbttimeout, sbintime_t *sbtprecision, bool abstime) 793{ 794 cloudabi_timestamp_t now; 795 int error; 796 797 if (abstime) { 798 /* Make the time relative. */ 799 error = cloudabi_clock_time_get(td, clock_id, &now); 800 if (error != 0) 801 return (error); 802 timeout = timeout < now ? 0 : timeout - now; 803 } 804 805 *sbttimeout = futex_queue_convert_timestamp_relative(timeout); 806 *sbtprecision = futex_queue_convert_timestamp_relative(precision); 807 return (0); 808} 809 810static int 811futex_queue_sleep(struct futex_queue *fq, struct futex_lock *fl, 812 struct futex_waiter *fw, struct thread *td, cloudabi_clockid_t clock_id, 813 cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision, bool abstime) 814{ 815 sbintime_t sbttimeout, sbtprecision; 816 int error; 817 818 /* Initialize futex_waiter object. */ 819 fw->fw_tid = td->td_tid; 820 fw->fw_locked = false; 821 futex_queue_init(&fw->fw_donated); 822 823 if (timeout != UINT64_MAX) { 824 /* Convert timeout duration. */ 825 error = futex_queue_convert_timestamp(td, clock_id, timeout, 826 precision, &sbttimeout, &sbtprecision, abstime); 827 if (error != 0) 828 return (error); 829 } 830 831 /* Place object in the queue. */ 832 fw->fw_queue = fq; 833 STAILQ_INSERT_TAIL(&fq->fq_list, fw, fw_next); 834 ++fq->fq_count; 835 836 cv_init(&fw->fw_wait, "futex"); 837 ++fl->fl_waitcount; 838 839 futex_lock_assert(fl); 840 if (timeout == UINT64_MAX) { 841 /* Wait without a timeout. */ 842 error = cv_wait_sig(&fw->fw_wait, &futex_global_lock); 843 } else { 844 /* Wait respecting the timeout. */ 845 error = cv_timedwait_sig_sbt(&fw->fw_wait, &futex_global_lock, 846 sbttimeout, sbtprecision, 0); 847 futex_lock_assert(fl); 848 if (error == EWOULDBLOCK && 849 fw->fw_queue != NULL && fw->fw_queue != fq) { 850 /* 851 * We got signalled on a condition variable, but 852 * observed a timeout while waiting to reacquire 853 * the lock. In other words, we didn't actually 854 * time out. Go back to sleep and wait for the 855 * lock to be reacquired. 856 */ 857 error = cv_wait_sig(&fw->fw_wait, &futex_global_lock); 858 } 859 } 860 futex_lock_assert(fl); 861 862 --fl->fl_waitcount; 863 cv_destroy(&fw->fw_wait); 864 865 fq = fw->fw_queue; 866 if (fq == NULL) { 867 /* Thread got dequeued, so we've slept successfully. */ 868 return (0); 869 } 870 871 /* Thread is still enqueued. Remove it. */ 872 KASSERT(error != 0, ("Woken up thread is still enqueued")); 873 STAILQ_REMOVE(&fq->fq_list, fw, futex_waiter, fw_next); 874 --fq->fq_count; 875 return (error == EWOULDBLOCK ? ETIMEDOUT : error); 876} 877 878/* Moves up to nwaiters waiters from one queue to another. */ 879static void 880futex_queue_requeue(struct futex_queue *fqfrom, struct futex_queue *fqto, 881 unsigned int nwaiters) 882{ 883 struct futex_waiter *fw; 884 885 /* Move waiters to the target queue. */ 886 while (nwaiters-- > 0 && !STAILQ_EMPTY(&fqfrom->fq_list)) { 887 fw = STAILQ_FIRST(&fqfrom->fq_list); 888 STAILQ_REMOVE_HEAD(&fqfrom->fq_list, fw_next); 889 --fqfrom->fq_count; 890 891 fw->fw_queue = fqto; 892 STAILQ_INSERT_TAIL(&fqto->fq_list, fw, fw_next); 893 ++fqto->fq_count; 894 } 895} 896 897/* Wakes up all waiters in a queue. */ 898static void 899futex_queue_wake_up_all(struct futex_queue *fq) 900{ 901 struct futex_waiter *fw; 902 903 STAILQ_FOREACH(fw, &fq->fq_list, fw_next) { 904 fw->fw_locked = true; 905 fw->fw_queue = NULL; 906 cv_signal(&fw->fw_wait); 907 } 908 909 STAILQ_INIT(&fq->fq_list); 910 fq->fq_count = 0; 911} 912 913/* 914 * Wakes up the best waiter (i.e., the waiter having the highest 915 * priority) in a queue. 916 */ 917static void 918futex_queue_wake_up_best(struct futex_queue *fq) 919{ 920 struct futex_waiter *fw; 921 922 fw = STAILQ_FIRST(&fq->fq_list); 923 fw->fw_locked = true; 924 fw->fw_queue = NULL; 925 cv_signal(&fw->fw_wait); 926 927 STAILQ_REMOVE_HEAD(&fq->fq_list, fw_next); 928 --fq->fq_count; 929} 930 931static void 932futex_queue_wake_up_donate(struct futex_queue *fq, unsigned int nwaiters) 933{ 934 struct futex_waiter *fw; 935 936 fw = STAILQ_FIRST(&fq->fq_list); 937 if (fw == NULL) 938 return; 939 fw->fw_locked = false; 940 fw->fw_queue = NULL; 941 cv_signal(&fw->fw_wait); 942 943 STAILQ_REMOVE_HEAD(&fq->fq_list, fw_next); 944 --fq->fq_count; 945 futex_queue_requeue(fq, &fw->fw_donated, nwaiters); 946} 947 948/* 949 * futex_user operations. Used to adjust values in userspace. 950 */ 951 952static int 953futex_user_load(uint32_t *obj, uint32_t *val) 954{ 955 956 return (fueword32(obj, val) != 0 ? EFAULT : 0); 957} 958 959static int 960futex_user_store(uint32_t *obj, uint32_t val) 961{ 962 963 return (suword32(obj, val) != 0 ? EFAULT : 0); 964} 965 966static int 967futex_user_cmpxchg(uint32_t *obj, uint32_t cmp, uint32_t *old, uint32_t new) 968{ 969 970 return (casueword32(obj, cmp, old, new) != 0 ? EFAULT : 0); 971} 972 973/* 974 * Blocking calls: acquiring locks, waiting on condition variables. 975 */ 976 977int 978cloudabi_futex_condvar_wait(struct thread *td, cloudabi_condvar_t *condvar, 979 cloudabi_scope_t condvar_scope, cloudabi_lock_t *lock, 980 cloudabi_scope_t lock_scope, cloudabi_clockid_t clock_id, 981 cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision, bool abstime) 982{ 983 struct futex_condvar *fc; 984 struct futex_lock *fl; 985 struct futex_waiter fw; 986 int error, error2; 987 988 /* Lookup condition variable object. */ 989 error = futex_condvar_lookup_or_create(td, condvar, condvar_scope, lock, 990 lock_scope, &fc); 991 if (error != 0) 992 return (error); 993 fl = fc->fc_lock; 994 995 /* 996 * Set the condition variable to something other than 997 * CLOUDABI_CONDVAR_HAS_NO_WAITERS to make userspace threads 998 * call into the kernel to perform wakeups. 999 */ 1000 error = futex_user_store(condvar, ~CLOUDABI_CONDVAR_HAS_NO_WAITERS); 1001 if (error != 0) { 1002 futex_condvar_release(fc); 1003 return (error); 1004 } 1005 1006 /* Drop the lock. */ 1007 error = futex_lock_unlock(fl, td, lock); 1008 if (error != 0) { 1009 futex_condvar_unmanage(fc, condvar); 1010 futex_condvar_release(fc); 1011 return (error); 1012 } 1013 1014 /* Go to sleep. */ 1015 ++fc->fc_waitcount; 1016 error = futex_queue_sleep(&fc->fc_waiters, fc->fc_lock, &fw, td, 1017 clock_id, timeout, precision, abstime); 1018 if (fw.fw_locked) { 1019 /* Waited and got the lock assigned to us. */ 1020 KASSERT(futex_queue_count(&fw.fw_donated) == 0, 1021 ("Received threads while being locked")); 1022 } else if (error == 0 || error == ETIMEDOUT) { 1023 if (error != 0) 1024 futex_condvar_unmanage(fc, condvar); 1025 /* 1026 * Got woken up without having the lock assigned to us. 1027 * This can happen in two cases: 1028 * 1029 * 1. We observed a timeout on a condition variable. 1030 * 2. We got signalled on a condition variable while the 1031 * associated lock is unlocked. We are the first 1032 * thread that gets woken up. This thread is 1033 * responsible for reacquiring the userspace lock. 1034 */ 1035 error2 = futex_lock_wrlock(fl, td, lock, 1036 CLOUDABI_CLOCK_MONOTONIC, UINT64_MAX, 0, abstime, 1037 &fw.fw_donated); 1038 if (error2 != 0) 1039 error = error2; 1040 } else { 1041 KASSERT(futex_queue_count(&fw.fw_donated) == 0, 1042 ("Received threads on error")); 1043 futex_condvar_unmanage(fc, condvar); 1044 futex_lock_unmanage(fl, lock); 1045 } 1046 --fc->fc_waitcount; 1047 futex_condvar_release(fc); 1048 return (error); 1049} 1050 1051int 1052cloudabi_futex_lock_rdlock(struct thread *td, cloudabi_lock_t *lock, 1053 cloudabi_scope_t scope, cloudabi_clockid_t clock_id, 1054 cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision, bool abstime) 1055{ 1056 struct futex_lock *fl; 1057 int error; 1058 1059 /* Look up lock object. */ 1060 error = futex_lock_lookup(td, lock, scope, &fl); 1061 if (error != 0) 1062 return (error); 1063 1064 error = futex_lock_rdlock(fl, td, lock, clock_id, timeout, 1065 precision, abstime); 1066 futex_lock_release(fl); 1067 return (error); 1068} 1069 1070int 1071cloudabi_futex_lock_wrlock(struct thread *td, cloudabi_lock_t *lock, 1072 cloudabi_scope_t scope, cloudabi_clockid_t clock_id, 1073 cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision, bool abstime) 1074{ 1075 struct futex_lock *fl; 1076 struct futex_queue fq; 1077 int error; 1078 1079 /* Look up lock object. */ 1080 error = futex_lock_lookup(td, lock, scope, &fl); 1081 if (error != 0) 1082 return (error); 1083 1084 futex_queue_init(&fq); 1085 error = futex_lock_wrlock(fl, td, lock, clock_id, timeout, 1086 precision, abstime, &fq); 1087 futex_lock_release(fl); 1088 return (error); 1089} 1090 1091/* 1092 * Non-blocking calls: releasing locks, signalling condition variables. 1093 */ 1094 1095int 1096cloudabi_sys_condvar_signal(struct thread *td, 1097 struct cloudabi_sys_condvar_signal_args *uap) 1098{ 1099 struct futex_condvar *fc; 1100 struct futex_lock *fl; 1101 cloudabi_nthreads_t nwaiters; 1102 int error; 1103 1104 nwaiters = uap->nwaiters; 1105 if (nwaiters == 0) { 1106 /* No threads to wake up. */ 1107 return (0); 1108 } 1109 1110 /* Look up futex object. */ 1111 error = futex_condvar_lookup(td, uap->condvar, uap->scope, &fc); 1112 if (error != 0) { 1113 /* Race condition: condition variable with no waiters. */ 1114 return (error == ENOENT ? 0 : error); 1115 } 1116 fl = fc->fc_lock; 1117 1118 if (fl->fl_owner == LOCK_UNMANAGED) { 1119 /* 1120 * The lock is currently not managed by the kernel, 1121 * meaning we must attempt to acquire the userspace lock 1122 * first. We cannot requeue threads to an unmanaged lock, 1123 * as these threads will then never be scheduled. 1124 * 1125 * Unfortunately, the memory address of the lock is 1126 * unknown from this context, meaning that we cannot 1127 * acquire the lock on behalf of the first thread to be 1128 * scheduled. The lock may even not be mapped within the 1129 * address space of the current thread. 1130 * 1131 * To solve this, wake up a single waiter that will 1132 * attempt to acquire the lock. Donate all of the other 1133 * waiters that need to be woken up to this waiter, so 1134 * it can requeue them after acquiring the lock. 1135 */ 1136 futex_queue_wake_up_donate(&fc->fc_waiters, nwaiters - 1); 1137 } else { 1138 /* 1139 * Lock is already managed by the kernel. This makes it 1140 * easy, as we can requeue the threads from the 1141 * condition variable directly to the associated lock. 1142 */ 1143 futex_queue_requeue(&fc->fc_waiters, &fl->fl_writers, nwaiters); 1144 } 1145 1146 /* Clear userspace condition variable if all waiters are gone. */ 1147 error = futex_condvar_unmanage(fc, uap->condvar); 1148 futex_condvar_release(fc); 1149 return (error); 1150} 1151 1152int 1153cloudabi_sys_lock_unlock(struct thread *td, 1154 struct cloudabi_sys_lock_unlock_args *uap) 1155{ 1156 struct futex_lock *fl; 1157 int error; 1158 1159 error = futex_lock_lookup(td, uap->lock, uap->scope, &fl); 1160 if (error != 0) 1161 return (error); 1162 error = futex_lock_unlock(fl, td, uap->lock); 1163 futex_lock_release(fl); 1164 return (error); 1165} 1166