kern_umtx.c revision 179970
1/*- 2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice unmodified, this list of conditions, and the following 11 * disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD: head/sys/kern/kern_umtx.c 179970 2008-06-24 07:32:12Z davidxu $"); 30 31#include "opt_compat.h" 32#include <sys/param.h> 33#include <sys/kernel.h> 34#include <sys/limits.h> 35#include <sys/lock.h> 36#include <sys/malloc.h> 37#include <sys/mutex.h> 38#include <sys/priv.h> 39#include <sys/proc.h> 40#include <sys/sched.h> 41#include <sys/smp.h> 42#include <sys/sysctl.h> 43#include <sys/sysent.h> 44#include <sys/systm.h> 45#include <sys/sysproto.h> 46#include <sys/eventhandler.h> 47#include <sys/umtx.h> 48 49#include <vm/vm.h> 50#include <vm/vm_param.h> 51#include <vm/pmap.h> 52#include <vm/vm_map.h> 53#include <vm/vm_object.h> 54 55#include <machine/cpu.h> 56 57#ifdef COMPAT_IA32 58#include <compat/freebsd32/freebsd32_proto.h> 59#endif 60 61#define TYPE_SIMPLE_WAIT 0 62#define TYPE_CV 1 63#define TYPE_SIMPLE_LOCK 2 64#define TYPE_NORMAL_UMUTEX 3 65#define TYPE_PI_UMUTEX 4 66#define TYPE_PP_UMUTEX 5 67#define TYPE_RWLOCK 6 68 69#define _UMUTEX_TRY 1 70#define _UMUTEX_WAIT 2 71 72/* Key to represent a unique userland synchronous object */ 73struct umtx_key { 74 int hash; 75 int type; 76 int shared; 77 union { 78 struct { 79 vm_object_t object; 80 uintptr_t offset; 81 } shared; 82 struct { 83 struct vmspace *vs; 84 uintptr_t addr; 85 } private; 86 struct { 87 void *a; 88 uintptr_t b; 89 } both; 90 } info; 91}; 92 93/* Priority inheritance mutex info. */ 94struct umtx_pi { 95 /* Owner thread */ 96 struct thread *pi_owner; 97 98 /* Reference count */ 99 int pi_refcount; 100 101 /* List entry to link umtx holding by thread */ 102 TAILQ_ENTRY(umtx_pi) pi_link; 103 104 /* List entry in hash */ 105 TAILQ_ENTRY(umtx_pi) pi_hashlink; 106 107 /* List for waiters */ 108 TAILQ_HEAD(,umtx_q) pi_blocked; 109 110 /* Identify a userland lock object */ 111 struct umtx_key pi_key; 112}; 113 114/* A userland synchronous object user. */ 115struct umtx_q { 116 /* Linked list for the hash. */ 117 TAILQ_ENTRY(umtx_q) uq_link; 118 119 /* Umtx key. */ 120 struct umtx_key uq_key; 121 122 /* Umtx flags. */ 123 int uq_flags; 124#define UQF_UMTXQ 0x0001 125 126 /* The thread waits on. */ 127 struct thread *uq_thread; 128 129 /* 130 * Blocked on PI mutex. read can use chain lock 131 * or umtx_lock, write must have both chain lock and 132 * umtx_lock being hold. 133 */ 134 struct umtx_pi *uq_pi_blocked; 135 136 /* On blocked list */ 137 TAILQ_ENTRY(umtx_q) uq_lockq; 138 139 /* Thread contending with us */ 140 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 141 142 /* Inherited priority from PP mutex */ 143 u_char uq_inherited_pri; 144}; 145 146TAILQ_HEAD(umtxq_head, umtx_q); 147 148/* Userland lock object's wait-queue chain */ 149struct umtxq_chain { 150 /* Lock for this chain. */ 151 struct mtx uc_lock; 152 153 /* List of sleep queues. */ 154 struct umtxq_head uc_queue[2]; 155#define UMTX_SHARED_QUEUE 0 156#define UMTX_EXCLUSIVE_QUEUE 1 157 158 /* Busy flag */ 159 char uc_busy; 160 161 /* Chain lock waiters */ 162 int uc_waiters; 163 164 /* All PI in the list */ 165 TAILQ_HEAD(,umtx_pi) uc_pi_list; 166}; 167 168#define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 169 170/* 171 * Don't propagate time-sharing priority, there is a security reason, 172 * a user can simply introduce PI-mutex, let thread A lock the mutex, 173 * and let another thread B block on the mutex, because B is 174 * sleeping, its priority will be boosted, this causes A's priority to 175 * be boosted via priority propagating too and will never be lowered even 176 * if it is using 100%CPU, this is unfair to other processes. 177 */ 178 179#define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 180 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 181 PRI_MAX_TIMESHARE : (td)->td_user_pri) 182 183#define GOLDEN_RATIO_PRIME 2654404609U 184#define UMTX_CHAINS 128 185#define UMTX_SHIFTS (__WORD_BIT - 7) 186 187#define THREAD_SHARE 0 188#define PROCESS_SHARE 1 189#define AUTO_SHARE 2 190 191#define GET_SHARE(flags) \ 192 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 193 194#define BUSY_SPINS 200 195 196static uma_zone_t umtx_pi_zone; 197static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 198static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 199static int umtx_pi_allocated; 200 201SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); 202SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 203 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 204 205static void umtxq_sysinit(void *); 206static void umtxq_hash(struct umtx_key *key); 207static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 208static void umtxq_lock(struct umtx_key *key); 209static void umtxq_unlock(struct umtx_key *key); 210static void umtxq_busy(struct umtx_key *key); 211static void umtxq_unbusy(struct umtx_key *key); 212static void umtxq_insert_queue(struct umtx_q *uq, int q); 213static void umtxq_remove_queue(struct umtx_q *uq, int q); 214static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo); 215static int umtxq_count(struct umtx_key *key); 216static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2); 217static int umtx_key_get(void *addr, int type, int share, 218 struct umtx_key *key); 219static void umtx_key_release(struct umtx_key *key); 220static struct umtx_pi *umtx_pi_alloc(int); 221static void umtx_pi_free(struct umtx_pi *pi); 222static void umtx_pi_adjust_locked(struct thread *td, u_char oldpri); 223static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags); 224static void umtx_thread_cleanup(struct thread *td); 225static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 226 struct image_params *imgp __unused); 227SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 228 229#define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 230#define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) 231#define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) 232 233static struct mtx umtx_lock; 234 235static void 236umtxq_sysinit(void *arg __unused) 237{ 238 int i, j; 239 240 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 241 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 242 for (i = 0; i < 2; ++i) { 243 for (j = 0; j < UMTX_CHAINS; ++j) { 244 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 245 MTX_DEF | MTX_DUPOK); 246 TAILQ_INIT(&umtxq_chains[i][j].uc_queue[0]); 247 TAILQ_INIT(&umtxq_chains[i][j].uc_queue[1]); 248 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 249 umtxq_chains[i][j].uc_busy = 0; 250 umtxq_chains[i][j].uc_waiters = 0; 251 } 252 } 253 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN); 254 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 255 EVENTHANDLER_PRI_ANY); 256} 257 258struct umtx_q * 259umtxq_alloc(void) 260{ 261 struct umtx_q *uq; 262 263 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 264 TAILQ_INIT(&uq->uq_pi_contested); 265 uq->uq_inherited_pri = PRI_MAX; 266 return (uq); 267} 268 269void 270umtxq_free(struct umtx_q *uq) 271{ 272 free(uq, M_UMTX); 273} 274 275static inline void 276umtxq_hash(struct umtx_key *key) 277{ 278 unsigned n = (uintptr_t)key->info.both.a + key->info.both.b; 279 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 280} 281 282static inline int 283umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2) 284{ 285 return (k1->type == k2->type && 286 k1->info.both.a == k2->info.both.a && 287 k1->info.both.b == k2->info.both.b); 288} 289 290static inline struct umtxq_chain * 291umtxq_getchain(struct umtx_key *key) 292{ 293 if (key->type <= TYPE_CV) 294 return (&umtxq_chains[1][key->hash]); 295 return (&umtxq_chains[0][key->hash]); 296} 297 298/* 299 * Lock a chain. 300 */ 301static inline void 302umtxq_lock(struct umtx_key *key) 303{ 304 struct umtxq_chain *uc; 305 306 uc = umtxq_getchain(key); 307 mtx_lock(&uc->uc_lock); 308} 309 310/* 311 * Unlock a chain. 312 */ 313static inline void 314umtxq_unlock(struct umtx_key *key) 315{ 316 struct umtxq_chain *uc; 317 318 uc = umtxq_getchain(key); 319 mtx_unlock(&uc->uc_lock); 320} 321 322/* 323 * Set chain to busy state when following operation 324 * may be blocked (kernel mutex can not be used). 325 */ 326static inline void 327umtxq_busy(struct umtx_key *key) 328{ 329 struct umtxq_chain *uc; 330 331 uc = umtxq_getchain(key); 332 mtx_assert(&uc->uc_lock, MA_OWNED); 333 if (uc->uc_busy) { 334#ifdef SMP 335 if (smp_cpus > 1) { 336 int count = BUSY_SPINS; 337 if (count > 0) { 338 umtxq_unlock(key); 339 while (uc->uc_busy && --count > 0) 340 cpu_spinwait(); 341 umtxq_lock(key); 342 } 343 } 344#endif 345 while (uc->uc_busy) { 346 uc->uc_waiters++; 347 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 348 uc->uc_waiters--; 349 } 350 } 351 uc->uc_busy = 1; 352} 353 354/* 355 * Unbusy a chain. 356 */ 357static inline void 358umtxq_unbusy(struct umtx_key *key) 359{ 360 struct umtxq_chain *uc; 361 362 uc = umtxq_getchain(key); 363 mtx_assert(&uc->uc_lock, MA_OWNED); 364 KASSERT(uc->uc_busy != 0, ("not busy")); 365 uc->uc_busy = 0; 366 if (uc->uc_waiters) 367 wakeup_one(uc); 368} 369 370static inline void 371umtxq_insert_queue(struct umtx_q *uq, int q) 372{ 373 struct umtxq_chain *uc; 374 375 uc = umtxq_getchain(&uq->uq_key); 376 UMTXQ_LOCKED_ASSERT(uc); 377 TAILQ_INSERT_TAIL(&uc->uc_queue[q], uq, uq_link); 378 uq->uq_flags |= UQF_UMTXQ; 379} 380 381static inline void 382umtxq_remove_queue(struct umtx_q *uq, int q) 383{ 384 struct umtxq_chain *uc; 385 386 uc = umtxq_getchain(&uq->uq_key); 387 UMTXQ_LOCKED_ASSERT(uc); 388 if (uq->uq_flags & UQF_UMTXQ) { 389 TAILQ_REMOVE(&uc->uc_queue[q], uq, uq_link); 390 uq->uq_flags &= ~UQF_UMTXQ; 391 } 392} 393 394/* 395 * Check if there are multiple waiters 396 */ 397static int 398umtxq_count(struct umtx_key *key) 399{ 400 struct umtxq_chain *uc; 401 struct umtx_q *uq; 402 int count = 0; 403 404 uc = umtxq_getchain(key); 405 UMTXQ_LOCKED_ASSERT(uc); 406 TAILQ_FOREACH(uq, &uc->uc_queue[UMTX_SHARED_QUEUE], uq_link) { 407 if (umtx_key_match(&uq->uq_key, key)) { 408 if (++count > 1) 409 break; 410 } 411 } 412 return (count); 413} 414 415/* 416 * Check if there are multiple PI waiters and returns first 417 * waiter. 418 */ 419static int 420umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 421{ 422 struct umtxq_chain *uc; 423 struct umtx_q *uq; 424 int count = 0; 425 426 *first = NULL; 427 uc = umtxq_getchain(key); 428 UMTXQ_LOCKED_ASSERT(uc); 429 TAILQ_FOREACH(uq, &uc->uc_queue[UMTX_SHARED_QUEUE], uq_link) { 430 if (umtx_key_match(&uq->uq_key, key)) { 431 if (++count > 1) 432 break; 433 *first = uq; 434 } 435 } 436 return (count); 437} 438 439/* 440 * Wake up threads waiting on an userland object. 441 */ 442 443static int 444umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 445{ 446 struct umtxq_chain *uc; 447 struct umtx_q *uq, *next; 448 int ret; 449 450 ret = 0; 451 uc = umtxq_getchain(key); 452 UMTXQ_LOCKED_ASSERT(uc); 453 TAILQ_FOREACH_SAFE(uq, &uc->uc_queue[q], uq_link, next) { 454 if (umtx_key_match(&uq->uq_key, key)) { 455 umtxq_remove_queue(uq, q); 456 wakeup(uq); 457 if (++ret >= n_wake) 458 break; 459 } 460 } 461 return (ret); 462} 463 464 465/* 466 * Wake up specified thread. 467 */ 468static inline void 469umtxq_signal_thread(struct umtx_q *uq) 470{ 471 struct umtxq_chain *uc; 472 473 uc = umtxq_getchain(&uq->uq_key); 474 UMTXQ_LOCKED_ASSERT(uc); 475 umtxq_remove(uq); 476 wakeup(uq); 477} 478 479/* 480 * Put thread into sleep state, before sleeping, check if 481 * thread was removed from umtx queue. 482 */ 483static inline int 484umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo) 485{ 486 struct umtxq_chain *uc; 487 int error; 488 489 uc = umtxq_getchain(&uq->uq_key); 490 UMTXQ_LOCKED_ASSERT(uc); 491 if (!(uq->uq_flags & UQF_UMTXQ)) 492 return (0); 493 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo); 494 if (error == EWOULDBLOCK) 495 error = ETIMEDOUT; 496 return (error); 497} 498 499/* 500 * Convert userspace address into unique logical address. 501 */ 502static int 503umtx_key_get(void *addr, int type, int share, struct umtx_key *key) 504{ 505 struct thread *td = curthread; 506 vm_map_t map; 507 vm_map_entry_t entry; 508 vm_pindex_t pindex; 509 vm_prot_t prot; 510 boolean_t wired; 511 512 key->type = type; 513 if (share == THREAD_SHARE) { 514 key->shared = 0; 515 key->info.private.vs = td->td_proc->p_vmspace; 516 key->info.private.addr = (uintptr_t)addr; 517 } else { 518 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 519 map = &td->td_proc->p_vmspace->vm_map; 520 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 521 &entry, &key->info.shared.object, &pindex, &prot, 522 &wired) != KERN_SUCCESS) { 523 return EFAULT; 524 } 525 526 if ((share == PROCESS_SHARE) || 527 (share == AUTO_SHARE && 528 VM_INHERIT_SHARE == entry->inheritance)) { 529 key->shared = 1; 530 key->info.shared.offset = entry->offset + entry->start - 531 (vm_offset_t)addr; 532 vm_object_reference(key->info.shared.object); 533 } else { 534 key->shared = 0; 535 key->info.private.vs = td->td_proc->p_vmspace; 536 key->info.private.addr = (uintptr_t)addr; 537 } 538 vm_map_lookup_done(map, entry); 539 } 540 541 umtxq_hash(key); 542 return (0); 543} 544 545/* 546 * Release key. 547 */ 548static inline void 549umtx_key_release(struct umtx_key *key) 550{ 551 if (key->shared) 552 vm_object_deallocate(key->info.shared.object); 553} 554 555/* 556 * Lock a umtx object. 557 */ 558static int 559_do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo) 560{ 561 struct umtx_q *uq; 562 u_long owner; 563 u_long old; 564 int error = 0; 565 566 uq = td->td_umtxq; 567 568 /* 569 * Care must be exercised when dealing with umtx structure. It 570 * can fault on any access. 571 */ 572 for (;;) { 573 /* 574 * Try the uncontested case. This should be done in userland. 575 */ 576 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id); 577 578 /* The acquire succeeded. */ 579 if (owner == UMTX_UNOWNED) 580 return (0); 581 582 /* The address was invalid. */ 583 if (owner == -1) 584 return (EFAULT); 585 586 /* If no one owns it but it is contested try to acquire it. */ 587 if (owner == UMTX_CONTESTED) { 588 owner = casuword(&umtx->u_owner, 589 UMTX_CONTESTED, id | UMTX_CONTESTED); 590 591 if (owner == UMTX_CONTESTED) 592 return (0); 593 594 /* The address was invalid. */ 595 if (owner == -1) 596 return (EFAULT); 597 598 /* If this failed the lock has changed, restart. */ 599 continue; 600 } 601 602 /* 603 * If we caught a signal, we have retried and now 604 * exit immediately. 605 */ 606 if (error != 0) 607 return (error); 608 609 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, 610 AUTO_SHARE, &uq->uq_key)) != 0) 611 return (error); 612 613 umtxq_lock(&uq->uq_key); 614 umtxq_busy(&uq->uq_key); 615 umtxq_insert(uq); 616 umtxq_unbusy(&uq->uq_key); 617 umtxq_unlock(&uq->uq_key); 618 619 /* 620 * Set the contested bit so that a release in user space 621 * knows to use the system call for unlock. If this fails 622 * either some one else has acquired the lock or it has been 623 * released. 624 */ 625 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED); 626 627 /* The address was invalid. */ 628 if (old == -1) { 629 umtxq_lock(&uq->uq_key); 630 umtxq_remove(uq); 631 umtxq_unlock(&uq->uq_key); 632 umtx_key_release(&uq->uq_key); 633 return (EFAULT); 634 } 635 636 /* 637 * We set the contested bit, sleep. Otherwise the lock changed 638 * and we need to retry or we lost a race to the thread 639 * unlocking the umtx. 640 */ 641 umtxq_lock(&uq->uq_key); 642 if (old == owner) 643 error = umtxq_sleep(uq, "umtx", timo); 644 umtxq_remove(uq); 645 umtxq_unlock(&uq->uq_key); 646 umtx_key_release(&uq->uq_key); 647 } 648 649 return (0); 650} 651 652/* 653 * Lock a umtx object. 654 */ 655static int 656do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, 657 struct timespec *timeout) 658{ 659 struct timespec ts, ts2, ts3; 660 struct timeval tv; 661 int error; 662 663 if (timeout == NULL) { 664 error = _do_lock_umtx(td, umtx, id, 0); 665 /* Mutex locking is restarted if it is interrupted. */ 666 if (error == EINTR) 667 error = ERESTART; 668 } else { 669 getnanouptime(&ts); 670 timespecadd(&ts, timeout); 671 TIMESPEC_TO_TIMEVAL(&tv, timeout); 672 for (;;) { 673 error = _do_lock_umtx(td, umtx, id, tvtohz(&tv)); 674 if (error != ETIMEDOUT) 675 break; 676 getnanouptime(&ts2); 677 if (timespeccmp(&ts2, &ts, >=)) { 678 error = ETIMEDOUT; 679 break; 680 } 681 ts3 = ts; 682 timespecsub(&ts3, &ts2); 683 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 684 } 685 /* Timed-locking is not restarted. */ 686 if (error == ERESTART) 687 error = EINTR; 688 } 689 return (error); 690} 691 692/* 693 * Unlock a umtx object. 694 */ 695static int 696do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id) 697{ 698 struct umtx_key key; 699 u_long owner; 700 u_long old; 701 int error; 702 int count; 703 704 /* 705 * Make sure we own this mtx. 706 */ 707 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner)); 708 if (owner == -1) 709 return (EFAULT); 710 711 if ((owner & ~UMTX_CONTESTED) != id) 712 return (EPERM); 713 714 /* This should be done in userland */ 715 if ((owner & UMTX_CONTESTED) == 0) { 716 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED); 717 if (old == -1) 718 return (EFAULT); 719 if (old == owner) 720 return (0); 721 owner = old; 722 } 723 724 /* We should only ever be in here for contested locks */ 725 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE, 726 &key)) != 0) 727 return (error); 728 729 umtxq_lock(&key); 730 umtxq_busy(&key); 731 count = umtxq_count(&key); 732 umtxq_unlock(&key); 733 734 /* 735 * When unlocking the umtx, it must be marked as unowned if 736 * there is zero or one thread only waiting for it. 737 * Otherwise, it must be marked as contested. 738 */ 739 old = casuword(&umtx->u_owner, owner, 740 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED); 741 umtxq_lock(&key); 742 umtxq_signal(&key,1); 743 umtxq_unbusy(&key); 744 umtxq_unlock(&key); 745 umtx_key_release(&key); 746 if (old == -1) 747 return (EFAULT); 748 if (old != owner) 749 return (EINVAL); 750 return (0); 751} 752 753#ifdef COMPAT_IA32 754 755/* 756 * Lock a umtx object. 757 */ 758static int 759_do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo) 760{ 761 struct umtx_q *uq; 762 uint32_t owner; 763 uint32_t old; 764 int error = 0; 765 766 uq = td->td_umtxq; 767 768 /* 769 * Care must be exercised when dealing with umtx structure. It 770 * can fault on any access. 771 */ 772 for (;;) { 773 /* 774 * Try the uncontested case. This should be done in userland. 775 */ 776 owner = casuword32(m, UMUTEX_UNOWNED, id); 777 778 /* The acquire succeeded. */ 779 if (owner == UMUTEX_UNOWNED) 780 return (0); 781 782 /* The address was invalid. */ 783 if (owner == -1) 784 return (EFAULT); 785 786 /* If no one owns it but it is contested try to acquire it. */ 787 if (owner == UMUTEX_CONTESTED) { 788 owner = casuword32(m, 789 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 790 if (owner == UMUTEX_CONTESTED) 791 return (0); 792 793 /* The address was invalid. */ 794 if (owner == -1) 795 return (EFAULT); 796 797 /* If this failed the lock has changed, restart. */ 798 continue; 799 } 800 801 /* 802 * If we caught a signal, we have retried and now 803 * exit immediately. 804 */ 805 if (error != 0) 806 return (error); 807 808 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, 809 AUTO_SHARE, &uq->uq_key)) != 0) 810 return (error); 811 812 umtxq_lock(&uq->uq_key); 813 umtxq_busy(&uq->uq_key); 814 umtxq_insert(uq); 815 umtxq_unbusy(&uq->uq_key); 816 umtxq_unlock(&uq->uq_key); 817 818 /* 819 * Set the contested bit so that a release in user space 820 * knows to use the system call for unlock. If this fails 821 * either some one else has acquired the lock or it has been 822 * released. 823 */ 824 old = casuword32(m, owner, owner | UMUTEX_CONTESTED); 825 826 /* The address was invalid. */ 827 if (old == -1) { 828 umtxq_lock(&uq->uq_key); 829 umtxq_remove(uq); 830 umtxq_unlock(&uq->uq_key); 831 umtx_key_release(&uq->uq_key); 832 return (EFAULT); 833 } 834 835 /* 836 * We set the contested bit, sleep. Otherwise the lock changed 837 * and we need to retry or we lost a race to the thread 838 * unlocking the umtx. 839 */ 840 umtxq_lock(&uq->uq_key); 841 if (old == owner) 842 error = umtxq_sleep(uq, "umtx", timo); 843 umtxq_remove(uq); 844 umtxq_unlock(&uq->uq_key); 845 umtx_key_release(&uq->uq_key); 846 } 847 848 return (0); 849} 850 851/* 852 * Lock a umtx object. 853 */ 854static int 855do_lock_umtx32(struct thread *td, void *m, uint32_t id, 856 struct timespec *timeout) 857{ 858 struct timespec ts, ts2, ts3; 859 struct timeval tv; 860 int error; 861 862 if (timeout == NULL) { 863 error = _do_lock_umtx32(td, m, id, 0); 864 /* Mutex locking is restarted if it is interrupted. */ 865 if (error == EINTR) 866 error = ERESTART; 867 } else { 868 getnanouptime(&ts); 869 timespecadd(&ts, timeout); 870 TIMESPEC_TO_TIMEVAL(&tv, timeout); 871 for (;;) { 872 error = _do_lock_umtx32(td, m, id, tvtohz(&tv)); 873 if (error != ETIMEDOUT) 874 break; 875 getnanouptime(&ts2); 876 if (timespeccmp(&ts2, &ts, >=)) { 877 error = ETIMEDOUT; 878 break; 879 } 880 ts3 = ts; 881 timespecsub(&ts3, &ts2); 882 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 883 } 884 /* Timed-locking is not restarted. */ 885 if (error == ERESTART) 886 error = EINTR; 887 } 888 return (error); 889} 890 891/* 892 * Unlock a umtx object. 893 */ 894static int 895do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id) 896{ 897 struct umtx_key key; 898 uint32_t owner; 899 uint32_t old; 900 int error; 901 int count; 902 903 /* 904 * Make sure we own this mtx. 905 */ 906 owner = fuword32(m); 907 if (owner == -1) 908 return (EFAULT); 909 910 if ((owner & ~UMUTEX_CONTESTED) != id) 911 return (EPERM); 912 913 /* This should be done in userland */ 914 if ((owner & UMUTEX_CONTESTED) == 0) { 915 old = casuword32(m, owner, UMUTEX_UNOWNED); 916 if (old == -1) 917 return (EFAULT); 918 if (old == owner) 919 return (0); 920 owner = old; 921 } 922 923 /* We should only ever be in here for contested locks */ 924 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE, 925 &key)) != 0) 926 return (error); 927 928 umtxq_lock(&key); 929 umtxq_busy(&key); 930 count = umtxq_count(&key); 931 umtxq_unlock(&key); 932 933 /* 934 * When unlocking the umtx, it must be marked as unowned if 935 * there is zero or one thread only waiting for it. 936 * Otherwise, it must be marked as contested. 937 */ 938 old = casuword32(m, owner, 939 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 940 umtxq_lock(&key); 941 umtxq_signal(&key,1); 942 umtxq_unbusy(&key); 943 umtxq_unlock(&key); 944 umtx_key_release(&key); 945 if (old == -1) 946 return (EFAULT); 947 if (old != owner) 948 return (EINVAL); 949 return (0); 950} 951#endif 952 953/* 954 * Fetch and compare value, sleep on the address if value is not changed. 955 */ 956static int 957do_wait(struct thread *td, void *addr, u_long id, 958 struct timespec *timeout, int compat32, int is_private) 959{ 960 struct umtx_q *uq; 961 struct timespec ts, ts2, ts3; 962 struct timeval tv; 963 u_long tmp; 964 int error = 0; 965 966 uq = td->td_umtxq; 967 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 968 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 969 return (error); 970 971 umtxq_lock(&uq->uq_key); 972 umtxq_insert(uq); 973 umtxq_unlock(&uq->uq_key); 974 if (compat32 == 0) 975 tmp = fuword(addr); 976 else 977 tmp = fuword32(addr); 978 if (tmp != id) { 979 umtxq_lock(&uq->uq_key); 980 umtxq_remove(uq); 981 umtxq_unlock(&uq->uq_key); 982 } else if (timeout == NULL) { 983 umtxq_lock(&uq->uq_key); 984 error = umtxq_sleep(uq, "uwait", 0); 985 umtxq_remove(uq); 986 umtxq_unlock(&uq->uq_key); 987 } else { 988 getnanouptime(&ts); 989 timespecadd(&ts, timeout); 990 TIMESPEC_TO_TIMEVAL(&tv, timeout); 991 umtxq_lock(&uq->uq_key); 992 for (;;) { 993 error = umtxq_sleep(uq, "uwait", tvtohz(&tv)); 994 if (!(uq->uq_flags & UQF_UMTXQ)) 995 break; 996 if (error != ETIMEDOUT) 997 break; 998 umtxq_unlock(&uq->uq_key); 999 getnanouptime(&ts2); 1000 if (timespeccmp(&ts2, &ts, >=)) { 1001 error = ETIMEDOUT; 1002 umtxq_lock(&uq->uq_key); 1003 break; 1004 } 1005 ts3 = ts; 1006 timespecsub(&ts3, &ts2); 1007 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 1008 umtxq_lock(&uq->uq_key); 1009 } 1010 umtxq_remove(uq); 1011 umtxq_unlock(&uq->uq_key); 1012 } 1013 umtx_key_release(&uq->uq_key); 1014 if (error == ERESTART) 1015 error = EINTR; 1016 return (error); 1017} 1018 1019/* 1020 * Wake up threads sleeping on the specified address. 1021 */ 1022int 1023kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 1024{ 1025 struct umtx_key key; 1026 int ret; 1027 1028 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 1029 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 1030 return (ret); 1031 umtxq_lock(&key); 1032 ret = umtxq_signal(&key, n_wake); 1033 umtxq_unlock(&key); 1034 umtx_key_release(&key); 1035 return (0); 1036} 1037 1038/* 1039 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1040 */ 1041static int 1042_do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo, 1043 int mode) 1044{ 1045 struct umtx_q *uq; 1046 uint32_t owner, old, id; 1047 int error = 0; 1048 1049 id = td->td_tid; 1050 uq = td->td_umtxq; 1051 1052 /* 1053 * Care must be exercised when dealing with umtx structure. It 1054 * can fault on any access. 1055 */ 1056 for (;;) { 1057 owner = fuword32(__DEVOLATILE(void *, &m->m_owner)); 1058 if (mode == _UMUTEX_WAIT) { 1059 if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED) 1060 return (0); 1061 } else { 1062 /* 1063 * Try the uncontested case. This should be done in userland. 1064 */ 1065 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id); 1066 1067 /* The acquire succeeded. */ 1068 if (owner == UMUTEX_UNOWNED) 1069 return (0); 1070 1071 /* The address was invalid. */ 1072 if (owner == -1) 1073 return (EFAULT); 1074 1075 /* If no one owns it but it is contested try to acquire it. */ 1076 if (owner == UMUTEX_CONTESTED) { 1077 owner = casuword32(&m->m_owner, 1078 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1079 1080 if (owner == UMUTEX_CONTESTED) 1081 return (0); 1082 1083 /* The address was invalid. */ 1084 if (owner == -1) 1085 return (EFAULT); 1086 1087 /* If this failed the lock has changed, restart. */ 1088 continue; 1089 } 1090 } 1091 1092 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 1093 (owner & ~UMUTEX_CONTESTED) == id) 1094 return (EDEADLK); 1095 1096 if (mode == _UMUTEX_TRY) 1097 return (EBUSY); 1098 1099 /* 1100 * If we caught a signal, we have retried and now 1101 * exit immediately. 1102 */ 1103 if (error != 0) 1104 return (error); 1105 1106 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1107 GET_SHARE(flags), &uq->uq_key)) != 0) 1108 return (error); 1109 1110 umtxq_lock(&uq->uq_key); 1111 umtxq_busy(&uq->uq_key); 1112 umtxq_insert(uq); 1113 umtxq_unlock(&uq->uq_key); 1114 1115 /* 1116 * Set the contested bit so that a release in user space 1117 * knows to use the system call for unlock. If this fails 1118 * either some one else has acquired the lock or it has been 1119 * released. 1120 */ 1121 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED); 1122 1123 /* The address was invalid. */ 1124 if (old == -1) { 1125 umtxq_lock(&uq->uq_key); 1126 umtxq_remove(uq); 1127 umtxq_unbusy(&uq->uq_key); 1128 umtxq_unlock(&uq->uq_key); 1129 umtx_key_release(&uq->uq_key); 1130 return (EFAULT); 1131 } 1132 1133 /* 1134 * We set the contested bit, sleep. Otherwise the lock changed 1135 * and we need to retry or we lost a race to the thread 1136 * unlocking the umtx. 1137 */ 1138 umtxq_lock(&uq->uq_key); 1139 umtxq_unbusy(&uq->uq_key); 1140 if (old == owner) 1141 error = umtxq_sleep(uq, "umtxn", timo); 1142 umtxq_remove(uq); 1143 umtxq_unlock(&uq->uq_key); 1144 umtx_key_release(&uq->uq_key); 1145 } 1146 1147 return (0); 1148} 1149 1150/* 1151 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1152 */ 1153/* 1154 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1155 */ 1156static int 1157do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags) 1158{ 1159 struct umtx_key key; 1160 uint32_t owner, old, id; 1161 int error; 1162 int count; 1163 1164 id = td->td_tid; 1165 /* 1166 * Make sure we own this mtx. 1167 */ 1168 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 1169 if (owner == -1) 1170 return (EFAULT); 1171 1172 if ((owner & ~UMUTEX_CONTESTED) != id) 1173 return (EPERM); 1174 1175 if ((owner & UMUTEX_CONTESTED) == 0) { 1176 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED); 1177 if (old == -1) 1178 return (EFAULT); 1179 if (old == owner) 1180 return (0); 1181 owner = old; 1182 } 1183 1184 /* We should only ever be in here for contested locks */ 1185 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1186 &key)) != 0) 1187 return (error); 1188 1189 umtxq_lock(&key); 1190 umtxq_busy(&key); 1191 count = umtxq_count(&key); 1192 umtxq_unlock(&key); 1193 1194 /* 1195 * When unlocking the umtx, it must be marked as unowned if 1196 * there is zero or one thread only waiting for it. 1197 * Otherwise, it must be marked as contested. 1198 */ 1199 old = casuword32(&m->m_owner, owner, 1200 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1201 umtxq_lock(&key); 1202 umtxq_signal(&key,1); 1203 umtxq_unbusy(&key); 1204 umtxq_unlock(&key); 1205 umtx_key_release(&key); 1206 if (old == -1) 1207 return (EFAULT); 1208 if (old != owner) 1209 return (EINVAL); 1210 return (0); 1211} 1212 1213/* 1214 * Check if the mutex is available and wake up a waiter, 1215 * only for simple mutex. 1216 */ 1217static int 1218do_wake_umutex(struct thread *td, struct umutex *m) 1219{ 1220 struct umtx_key key; 1221 uint32_t owner; 1222 uint32_t flags; 1223 int error; 1224 int count; 1225 1226 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 1227 if (owner == -1) 1228 return (EFAULT); 1229 1230 if ((owner & ~UMUTEX_CONTESTED) != 0) 1231 return (0); 1232 1233 flags = fuword32(&m->m_flags); 1234 1235 /* We should only ever be in here for contested locks */ 1236 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1237 &key)) != 0) 1238 return (error); 1239 1240 umtxq_lock(&key); 1241 umtxq_busy(&key); 1242 count = umtxq_count(&key); 1243 umtxq_unlock(&key); 1244 1245 if (count <= 1) 1246 owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED); 1247 1248 umtxq_lock(&key); 1249 if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0) 1250 umtxq_signal(&key, 1); 1251 umtxq_unbusy(&key); 1252 umtxq_unlock(&key); 1253 umtx_key_release(&key); 1254 return (0); 1255} 1256 1257static inline struct umtx_pi * 1258umtx_pi_alloc(int flags) 1259{ 1260 struct umtx_pi *pi; 1261 1262 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1263 TAILQ_INIT(&pi->pi_blocked); 1264 atomic_add_int(&umtx_pi_allocated, 1); 1265 return (pi); 1266} 1267 1268static inline void 1269umtx_pi_free(struct umtx_pi *pi) 1270{ 1271 uma_zfree(umtx_pi_zone, pi); 1272 atomic_add_int(&umtx_pi_allocated, -1); 1273} 1274 1275/* 1276 * Adjust the thread's position on a pi_state after its priority has been 1277 * changed. 1278 */ 1279static int 1280umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1281{ 1282 struct umtx_q *uq, *uq1, *uq2; 1283 struct thread *td1; 1284 1285 mtx_assert(&umtx_lock, MA_OWNED); 1286 if (pi == NULL) 1287 return (0); 1288 1289 uq = td->td_umtxq; 1290 1291 /* 1292 * Check if the thread needs to be moved on the blocked chain. 1293 * It needs to be moved if either its priority is lower than 1294 * the previous thread or higher than the next thread. 1295 */ 1296 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1297 uq2 = TAILQ_NEXT(uq, uq_lockq); 1298 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1299 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1300 /* 1301 * Remove thread from blocked chain and determine where 1302 * it should be moved to. 1303 */ 1304 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1305 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1306 td1 = uq1->uq_thread; 1307 MPASS(td1->td_proc->p_magic == P_MAGIC); 1308 if (UPRI(td1) > UPRI(td)) 1309 break; 1310 } 1311 1312 if (uq1 == NULL) 1313 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1314 else 1315 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1316 } 1317 return (1); 1318} 1319 1320/* 1321 * Propagate priority when a thread is blocked on POSIX 1322 * PI mutex. 1323 */ 1324static void 1325umtx_propagate_priority(struct thread *td) 1326{ 1327 struct umtx_q *uq; 1328 struct umtx_pi *pi; 1329 int pri; 1330 1331 mtx_assert(&umtx_lock, MA_OWNED); 1332 pri = UPRI(td); 1333 uq = td->td_umtxq; 1334 pi = uq->uq_pi_blocked; 1335 if (pi == NULL) 1336 return; 1337 1338 for (;;) { 1339 td = pi->pi_owner; 1340 if (td == NULL) 1341 return; 1342 1343 MPASS(td->td_proc != NULL); 1344 MPASS(td->td_proc->p_magic == P_MAGIC); 1345 1346 if (UPRI(td) <= pri) 1347 return; 1348 1349 thread_lock(td); 1350 sched_lend_user_prio(td, pri); 1351 thread_unlock(td); 1352 1353 /* 1354 * Pick up the lock that td is blocked on. 1355 */ 1356 uq = td->td_umtxq; 1357 pi = uq->uq_pi_blocked; 1358 /* Resort td on the list if needed. */ 1359 if (!umtx_pi_adjust_thread(pi, td)) 1360 break; 1361 } 1362} 1363 1364/* 1365 * Unpropagate priority for a PI mutex when a thread blocked on 1366 * it is interrupted by signal or resumed by others. 1367 */ 1368static void 1369umtx_unpropagate_priority(struct umtx_pi *pi) 1370{ 1371 struct umtx_q *uq, *uq_owner; 1372 struct umtx_pi *pi2; 1373 int pri, oldpri; 1374 1375 mtx_assert(&umtx_lock, MA_OWNED); 1376 1377 while (pi != NULL && pi->pi_owner != NULL) { 1378 pri = PRI_MAX; 1379 uq_owner = pi->pi_owner->td_umtxq; 1380 1381 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1382 uq = TAILQ_FIRST(&pi2->pi_blocked); 1383 if (uq != NULL) { 1384 if (pri > UPRI(uq->uq_thread)) 1385 pri = UPRI(uq->uq_thread); 1386 } 1387 } 1388 1389 if (pri > uq_owner->uq_inherited_pri) 1390 pri = uq_owner->uq_inherited_pri; 1391 thread_lock(pi->pi_owner); 1392 oldpri = pi->pi_owner->td_user_pri; 1393 sched_unlend_user_prio(pi->pi_owner, pri); 1394 thread_unlock(pi->pi_owner); 1395 umtx_pi_adjust_locked(pi->pi_owner, oldpri); 1396 pi = uq_owner->uq_pi_blocked; 1397 } 1398} 1399 1400/* 1401 * Insert a PI mutex into owned list. 1402 */ 1403static void 1404umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1405{ 1406 struct umtx_q *uq_owner; 1407 1408 uq_owner = owner->td_umtxq; 1409 mtx_assert(&umtx_lock, MA_OWNED); 1410 if (pi->pi_owner != NULL) 1411 panic("pi_ower != NULL"); 1412 pi->pi_owner = owner; 1413 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1414} 1415 1416/* 1417 * Claim ownership of a PI mutex. 1418 */ 1419static int 1420umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1421{ 1422 struct umtx_q *uq, *uq_owner; 1423 1424 uq_owner = owner->td_umtxq; 1425 mtx_lock_spin(&umtx_lock); 1426 if (pi->pi_owner == owner) { 1427 mtx_unlock_spin(&umtx_lock); 1428 return (0); 1429 } 1430 1431 if (pi->pi_owner != NULL) { 1432 /* 1433 * userland may have already messed the mutex, sigh. 1434 */ 1435 mtx_unlock_spin(&umtx_lock); 1436 return (EPERM); 1437 } 1438 umtx_pi_setowner(pi, owner); 1439 uq = TAILQ_FIRST(&pi->pi_blocked); 1440 if (uq != NULL) { 1441 int pri; 1442 1443 pri = UPRI(uq->uq_thread); 1444 thread_lock(owner); 1445 if (pri < UPRI(owner)) 1446 sched_lend_user_prio(owner, pri); 1447 thread_unlock(owner); 1448 } 1449 mtx_unlock_spin(&umtx_lock); 1450 return (0); 1451} 1452 1453static void 1454umtx_pi_adjust_locked(struct thread *td, u_char oldpri) 1455{ 1456 struct umtx_q *uq; 1457 struct umtx_pi *pi; 1458 1459 uq = td->td_umtxq; 1460 /* 1461 * Pick up the lock that td is blocked on. 1462 */ 1463 pi = uq->uq_pi_blocked; 1464 MPASS(pi != NULL); 1465 1466 /* Resort the turnstile on the list. */ 1467 if (!umtx_pi_adjust_thread(pi, td)) 1468 return; 1469 1470 /* 1471 * If our priority was lowered and we are at the head of the 1472 * turnstile, then propagate our new priority up the chain. 1473 */ 1474 if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri) 1475 umtx_propagate_priority(td); 1476} 1477 1478/* 1479 * Adjust a thread's order position in its blocked PI mutex, 1480 * this may result new priority propagating process. 1481 */ 1482void 1483umtx_pi_adjust(struct thread *td, u_char oldpri) 1484{ 1485 struct umtx_q *uq; 1486 struct umtx_pi *pi; 1487 1488 uq = td->td_umtxq; 1489 mtx_lock_spin(&umtx_lock); 1490 /* 1491 * Pick up the lock that td is blocked on. 1492 */ 1493 pi = uq->uq_pi_blocked; 1494 if (pi != NULL) 1495 umtx_pi_adjust_locked(td, oldpri); 1496 mtx_unlock_spin(&umtx_lock); 1497} 1498 1499/* 1500 * Sleep on a PI mutex. 1501 */ 1502static int 1503umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, 1504 uint32_t owner, const char *wmesg, int timo) 1505{ 1506 struct umtxq_chain *uc; 1507 struct thread *td, *td1; 1508 struct umtx_q *uq1; 1509 int pri; 1510 int error = 0; 1511 1512 td = uq->uq_thread; 1513 KASSERT(td == curthread, ("inconsistent uq_thread")); 1514 uc = umtxq_getchain(&uq->uq_key); 1515 UMTXQ_LOCKED_ASSERT(uc); 1516 umtxq_insert(uq); 1517 if (pi->pi_owner == NULL) { 1518 /* XXX 1519 * Current, We only support process private PI-mutex, 1520 * non-contended PI-mutexes are locked in userland. 1521 * Process shared PI-mutex should always be initialized 1522 * by kernel and be registered in kernel, locking should 1523 * always be done by kernel to avoid security problems. 1524 * For process private PI-mutex, we can find owner 1525 * thread and boost its priority safely. 1526 */ 1527 PROC_LOCK(curproc); 1528 td1 = thread_find(curproc, owner); 1529 mtx_lock_spin(&umtx_lock); 1530 if (td1 != NULL && pi->pi_owner == NULL) { 1531 uq1 = td1->td_umtxq; 1532 umtx_pi_setowner(pi, td1); 1533 } 1534 PROC_UNLOCK(curproc); 1535 } else { 1536 mtx_lock_spin(&umtx_lock); 1537 } 1538 1539 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1540 pri = UPRI(uq1->uq_thread); 1541 if (pri > UPRI(td)) 1542 break; 1543 } 1544 1545 if (uq1 != NULL) 1546 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1547 else 1548 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1549 1550 uq->uq_pi_blocked = pi; 1551 thread_lock(td); 1552 td->td_flags |= TDF_UPIBLOCKED; 1553 thread_unlock(td); 1554 mtx_unlock_spin(&umtx_lock); 1555 umtxq_unlock(&uq->uq_key); 1556 1557 mtx_lock_spin(&umtx_lock); 1558 umtx_propagate_priority(td); 1559 mtx_unlock_spin(&umtx_lock); 1560 1561 umtxq_lock(&uq->uq_key); 1562 if (uq->uq_flags & UQF_UMTXQ) { 1563 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo); 1564 if (error == EWOULDBLOCK) 1565 error = ETIMEDOUT; 1566 if (uq->uq_flags & UQF_UMTXQ) { 1567 umtxq_busy(&uq->uq_key); 1568 umtxq_remove(uq); 1569 umtxq_unbusy(&uq->uq_key); 1570 } 1571 } 1572 umtxq_unlock(&uq->uq_key); 1573 1574 mtx_lock_spin(&umtx_lock); 1575 uq->uq_pi_blocked = NULL; 1576 thread_lock(td); 1577 td->td_flags &= ~TDF_UPIBLOCKED; 1578 thread_unlock(td); 1579 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1580 umtx_unpropagate_priority(pi); 1581 mtx_unlock_spin(&umtx_lock); 1582 1583 umtxq_lock(&uq->uq_key); 1584 1585 return (error); 1586} 1587 1588/* 1589 * Add reference count for a PI mutex. 1590 */ 1591static void 1592umtx_pi_ref(struct umtx_pi *pi) 1593{ 1594 struct umtxq_chain *uc; 1595 1596 uc = umtxq_getchain(&pi->pi_key); 1597 UMTXQ_LOCKED_ASSERT(uc); 1598 pi->pi_refcount++; 1599} 1600 1601/* 1602 * Decrease reference count for a PI mutex, if the counter 1603 * is decreased to zero, its memory space is freed. 1604 */ 1605static void 1606umtx_pi_unref(struct umtx_pi *pi) 1607{ 1608 struct umtxq_chain *uc; 1609 int free = 0; 1610 1611 uc = umtxq_getchain(&pi->pi_key); 1612 UMTXQ_LOCKED_ASSERT(uc); 1613 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1614 if (--pi->pi_refcount == 0) { 1615 mtx_lock_spin(&umtx_lock); 1616 if (pi->pi_owner != NULL) { 1617 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, 1618 pi, pi_link); 1619 pi->pi_owner = NULL; 1620 } 1621 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1622 ("blocked queue not empty")); 1623 mtx_unlock_spin(&umtx_lock); 1624 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1625 free = 1; 1626 } 1627 if (free) 1628 umtx_pi_free(pi); 1629} 1630 1631/* 1632 * Find a PI mutex in hash table. 1633 */ 1634static struct umtx_pi * 1635umtx_pi_lookup(struct umtx_key *key) 1636{ 1637 struct umtxq_chain *uc; 1638 struct umtx_pi *pi; 1639 1640 uc = umtxq_getchain(key); 1641 UMTXQ_LOCKED_ASSERT(uc); 1642 1643 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1644 if (umtx_key_match(&pi->pi_key, key)) { 1645 return (pi); 1646 } 1647 } 1648 return (NULL); 1649} 1650 1651/* 1652 * Insert a PI mutex into hash table. 1653 */ 1654static inline void 1655umtx_pi_insert(struct umtx_pi *pi) 1656{ 1657 struct umtxq_chain *uc; 1658 1659 uc = umtxq_getchain(&pi->pi_key); 1660 UMTXQ_LOCKED_ASSERT(uc); 1661 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1662} 1663 1664/* 1665 * Lock a PI mutex. 1666 */ 1667static int 1668_do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo, 1669 int try) 1670{ 1671 struct umtx_q *uq; 1672 struct umtx_pi *pi, *new_pi; 1673 uint32_t id, owner, old; 1674 int error; 1675 1676 id = td->td_tid; 1677 uq = td->td_umtxq; 1678 1679 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 1680 &uq->uq_key)) != 0) 1681 return (error); 1682 umtxq_lock(&uq->uq_key); 1683 pi = umtx_pi_lookup(&uq->uq_key); 1684 if (pi == NULL) { 1685 new_pi = umtx_pi_alloc(M_NOWAIT); 1686 if (new_pi == NULL) { 1687 umtxq_unlock(&uq->uq_key); 1688 new_pi = umtx_pi_alloc(M_WAITOK); 1689 new_pi->pi_key = uq->uq_key; 1690 umtxq_lock(&uq->uq_key); 1691 pi = umtx_pi_lookup(&uq->uq_key); 1692 if (pi != NULL) { 1693 umtx_pi_free(new_pi); 1694 new_pi = NULL; 1695 } 1696 } 1697 if (new_pi != NULL) { 1698 new_pi->pi_key = uq->uq_key; 1699 umtx_pi_insert(new_pi); 1700 pi = new_pi; 1701 } 1702 } 1703 umtx_pi_ref(pi); 1704 umtxq_unlock(&uq->uq_key); 1705 1706 /* 1707 * Care must be exercised when dealing with umtx structure. It 1708 * can fault on any access. 1709 */ 1710 for (;;) { 1711 /* 1712 * Try the uncontested case. This should be done in userland. 1713 */ 1714 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id); 1715 1716 /* The acquire succeeded. */ 1717 if (owner == UMUTEX_UNOWNED) { 1718 error = 0; 1719 break; 1720 } 1721 1722 /* The address was invalid. */ 1723 if (owner == -1) { 1724 error = EFAULT; 1725 break; 1726 } 1727 1728 /* If no one owns it but it is contested try to acquire it. */ 1729 if (owner == UMUTEX_CONTESTED) { 1730 owner = casuword32(&m->m_owner, 1731 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1732 1733 if (owner == UMUTEX_CONTESTED) { 1734 umtxq_lock(&uq->uq_key); 1735 error = umtx_pi_claim(pi, td); 1736 umtxq_unlock(&uq->uq_key); 1737 break; 1738 } 1739 1740 /* The address was invalid. */ 1741 if (owner == -1) { 1742 error = EFAULT; 1743 break; 1744 } 1745 1746 /* If this failed the lock has changed, restart. */ 1747 continue; 1748 } 1749 1750 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 1751 (owner & ~UMUTEX_CONTESTED) == id) { 1752 error = EDEADLK; 1753 break; 1754 } 1755 1756 if (try != 0) { 1757 error = EBUSY; 1758 break; 1759 } 1760 1761 /* 1762 * If we caught a signal, we have retried and now 1763 * exit immediately. 1764 */ 1765 if (error != 0) 1766 break; 1767 1768 umtxq_lock(&uq->uq_key); 1769 umtxq_busy(&uq->uq_key); 1770 umtxq_unlock(&uq->uq_key); 1771 1772 /* 1773 * Set the contested bit so that a release in user space 1774 * knows to use the system call for unlock. If this fails 1775 * either some one else has acquired the lock or it has been 1776 * released. 1777 */ 1778 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED); 1779 1780 /* The address was invalid. */ 1781 if (old == -1) { 1782 umtxq_lock(&uq->uq_key); 1783 umtxq_unbusy(&uq->uq_key); 1784 umtxq_unlock(&uq->uq_key); 1785 error = EFAULT; 1786 break; 1787 } 1788 1789 umtxq_lock(&uq->uq_key); 1790 umtxq_unbusy(&uq->uq_key); 1791 /* 1792 * We set the contested bit, sleep. Otherwise the lock changed 1793 * and we need to retry or we lost a race to the thread 1794 * unlocking the umtx. 1795 */ 1796 if (old == owner) 1797 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 1798 "umtxpi", timo); 1799 umtxq_unlock(&uq->uq_key); 1800 } 1801 1802 umtxq_lock(&uq->uq_key); 1803 umtx_pi_unref(pi); 1804 umtxq_unlock(&uq->uq_key); 1805 1806 umtx_key_release(&uq->uq_key); 1807 return (error); 1808} 1809 1810/* 1811 * Unlock a PI mutex. 1812 */ 1813static int 1814do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags) 1815{ 1816 struct umtx_key key; 1817 struct umtx_q *uq_first, *uq_first2, *uq_me; 1818 struct umtx_pi *pi, *pi2; 1819 uint32_t owner, old, id; 1820 int error; 1821 int count; 1822 int pri; 1823 1824 id = td->td_tid; 1825 /* 1826 * Make sure we own this mtx. 1827 */ 1828 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 1829 if (owner == -1) 1830 return (EFAULT); 1831 1832 if ((owner & ~UMUTEX_CONTESTED) != id) 1833 return (EPERM); 1834 1835 /* This should be done in userland */ 1836 if ((owner & UMUTEX_CONTESTED) == 0) { 1837 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED); 1838 if (old == -1) 1839 return (EFAULT); 1840 if (old == owner) 1841 return (0); 1842 owner = old; 1843 } 1844 1845 /* We should only ever be in here for contested locks */ 1846 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 1847 &key)) != 0) 1848 return (error); 1849 1850 umtxq_lock(&key); 1851 umtxq_busy(&key); 1852 count = umtxq_count_pi(&key, &uq_first); 1853 if (uq_first != NULL) { 1854 pi = uq_first->uq_pi_blocked; 1855 if (pi->pi_owner != curthread) { 1856 umtxq_unbusy(&key); 1857 umtxq_unlock(&key); 1858 /* userland messed the mutex */ 1859 return (EPERM); 1860 } 1861 uq_me = curthread->td_umtxq; 1862 mtx_lock_spin(&umtx_lock); 1863 pi->pi_owner = NULL; 1864 TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link); 1865 uq_first = TAILQ_FIRST(&pi->pi_blocked); 1866 pri = PRI_MAX; 1867 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 1868 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 1869 if (uq_first2 != NULL) { 1870 if (pri > UPRI(uq_first2->uq_thread)) 1871 pri = UPRI(uq_first2->uq_thread); 1872 } 1873 } 1874 thread_lock(curthread); 1875 sched_unlend_user_prio(curthread, pri); 1876 thread_unlock(curthread); 1877 mtx_unlock_spin(&umtx_lock); 1878 } 1879 umtxq_unlock(&key); 1880 1881 /* 1882 * When unlocking the umtx, it must be marked as unowned if 1883 * there is zero or one thread only waiting for it. 1884 * Otherwise, it must be marked as contested. 1885 */ 1886 old = casuword32(&m->m_owner, owner, 1887 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1888 1889 umtxq_lock(&key); 1890 if (uq_first != NULL) 1891 umtxq_signal_thread(uq_first); 1892 umtxq_unbusy(&key); 1893 umtxq_unlock(&key); 1894 umtx_key_release(&key); 1895 if (old == -1) 1896 return (EFAULT); 1897 if (old != owner) 1898 return (EINVAL); 1899 return (0); 1900} 1901 1902/* 1903 * Lock a PP mutex. 1904 */ 1905static int 1906_do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo, 1907 int try) 1908{ 1909 struct umtx_q *uq, *uq2; 1910 struct umtx_pi *pi; 1911 uint32_t ceiling; 1912 uint32_t owner, id; 1913 int error, pri, old_inherited_pri, su; 1914 1915 id = td->td_tid; 1916 uq = td->td_umtxq; 1917 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 1918 &uq->uq_key)) != 0) 1919 return (error); 1920 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 1921 for (;;) { 1922 old_inherited_pri = uq->uq_inherited_pri; 1923 umtxq_lock(&uq->uq_key); 1924 umtxq_busy(&uq->uq_key); 1925 umtxq_unlock(&uq->uq_key); 1926 1927 ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]); 1928 if (ceiling > RTP_PRIO_MAX) { 1929 error = EINVAL; 1930 goto out; 1931 } 1932 1933 mtx_lock_spin(&umtx_lock); 1934 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 1935 mtx_unlock_spin(&umtx_lock); 1936 error = EINVAL; 1937 goto out; 1938 } 1939 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 1940 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 1941 thread_lock(td); 1942 if (uq->uq_inherited_pri < UPRI(td)) 1943 sched_lend_user_prio(td, uq->uq_inherited_pri); 1944 thread_unlock(td); 1945 } 1946 mtx_unlock_spin(&umtx_lock); 1947 1948 owner = casuword32(&m->m_owner, 1949 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1950 1951 if (owner == UMUTEX_CONTESTED) { 1952 error = 0; 1953 break; 1954 } 1955 1956 /* The address was invalid. */ 1957 if (owner == -1) { 1958 error = EFAULT; 1959 break; 1960 } 1961 1962 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 1963 (owner & ~UMUTEX_CONTESTED) == id) { 1964 error = EDEADLK; 1965 break; 1966 } 1967 1968 if (try != 0) { 1969 error = EBUSY; 1970 break; 1971 } 1972 1973 /* 1974 * If we caught a signal, we have retried and now 1975 * exit immediately. 1976 */ 1977 if (error != 0) 1978 break; 1979 1980 umtxq_lock(&uq->uq_key); 1981 umtxq_insert(uq); 1982 umtxq_unbusy(&uq->uq_key); 1983 error = umtxq_sleep(uq, "umtxpp", timo); 1984 umtxq_remove(uq); 1985 umtxq_unlock(&uq->uq_key); 1986 1987 mtx_lock_spin(&umtx_lock); 1988 uq->uq_inherited_pri = old_inherited_pri; 1989 pri = PRI_MAX; 1990 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 1991 uq2 = TAILQ_FIRST(&pi->pi_blocked); 1992 if (uq2 != NULL) { 1993 if (pri > UPRI(uq2->uq_thread)) 1994 pri = UPRI(uq2->uq_thread); 1995 } 1996 } 1997 if (pri > uq->uq_inherited_pri) 1998 pri = uq->uq_inherited_pri; 1999 thread_lock(td); 2000 sched_unlend_user_prio(td, pri); 2001 thread_unlock(td); 2002 mtx_unlock_spin(&umtx_lock); 2003 } 2004 2005 if (error != 0) { 2006 mtx_lock_spin(&umtx_lock); 2007 uq->uq_inherited_pri = old_inherited_pri; 2008 pri = PRI_MAX; 2009 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2010 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2011 if (uq2 != NULL) { 2012 if (pri > UPRI(uq2->uq_thread)) 2013 pri = UPRI(uq2->uq_thread); 2014 } 2015 } 2016 if (pri > uq->uq_inherited_pri) 2017 pri = uq->uq_inherited_pri; 2018 thread_lock(td); 2019 sched_unlend_user_prio(td, pri); 2020 thread_unlock(td); 2021 mtx_unlock_spin(&umtx_lock); 2022 } 2023 2024out: 2025 umtxq_lock(&uq->uq_key); 2026 umtxq_unbusy(&uq->uq_key); 2027 umtxq_unlock(&uq->uq_key); 2028 umtx_key_release(&uq->uq_key); 2029 return (error); 2030} 2031 2032/* 2033 * Unlock a PP mutex. 2034 */ 2035static int 2036do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags) 2037{ 2038 struct umtx_key key; 2039 struct umtx_q *uq, *uq2; 2040 struct umtx_pi *pi; 2041 uint32_t owner, id; 2042 uint32_t rceiling; 2043 int error, pri, new_inherited_pri, su; 2044 2045 id = td->td_tid; 2046 uq = td->td_umtxq; 2047 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2048 2049 /* 2050 * Make sure we own this mtx. 2051 */ 2052 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 2053 if (owner == -1) 2054 return (EFAULT); 2055 2056 if ((owner & ~UMUTEX_CONTESTED) != id) 2057 return (EPERM); 2058 2059 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2060 if (error != 0) 2061 return (error); 2062 2063 if (rceiling == -1) 2064 new_inherited_pri = PRI_MAX; 2065 else { 2066 rceiling = RTP_PRIO_MAX - rceiling; 2067 if (rceiling > RTP_PRIO_MAX) 2068 return (EINVAL); 2069 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2070 } 2071 2072 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 2073 &key)) != 0) 2074 return (error); 2075 umtxq_lock(&key); 2076 umtxq_busy(&key); 2077 umtxq_unlock(&key); 2078 /* 2079 * For priority protected mutex, always set unlocked state 2080 * to UMUTEX_CONTESTED, so that userland always enters kernel 2081 * to lock the mutex, it is necessary because thread priority 2082 * has to be adjusted for such mutex. 2083 */ 2084 error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner), 2085 UMUTEX_CONTESTED); 2086 2087 umtxq_lock(&key); 2088 if (error == 0) 2089 umtxq_signal(&key, 1); 2090 umtxq_unbusy(&key); 2091 umtxq_unlock(&key); 2092 2093 if (error == -1) 2094 error = EFAULT; 2095 else { 2096 mtx_lock_spin(&umtx_lock); 2097 if (su != 0) 2098 uq->uq_inherited_pri = new_inherited_pri; 2099 pri = PRI_MAX; 2100 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2101 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2102 if (uq2 != NULL) { 2103 if (pri > UPRI(uq2->uq_thread)) 2104 pri = UPRI(uq2->uq_thread); 2105 } 2106 } 2107 if (pri > uq->uq_inherited_pri) 2108 pri = uq->uq_inherited_pri; 2109 thread_lock(td); 2110 sched_unlend_user_prio(td, pri); 2111 thread_unlock(td); 2112 mtx_unlock_spin(&umtx_lock); 2113 } 2114 umtx_key_release(&key); 2115 return (error); 2116} 2117 2118static int 2119do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2120 uint32_t *old_ceiling) 2121{ 2122 struct umtx_q *uq; 2123 uint32_t save_ceiling; 2124 uint32_t owner, id; 2125 uint32_t flags; 2126 int error; 2127 2128 flags = fuword32(&m->m_flags); 2129 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2130 return (EINVAL); 2131 if (ceiling > RTP_PRIO_MAX) 2132 return (EINVAL); 2133 id = td->td_tid; 2134 uq = td->td_umtxq; 2135 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 2136 &uq->uq_key)) != 0) 2137 return (error); 2138 for (;;) { 2139 umtxq_lock(&uq->uq_key); 2140 umtxq_busy(&uq->uq_key); 2141 umtxq_unlock(&uq->uq_key); 2142 2143 save_ceiling = fuword32(&m->m_ceilings[0]); 2144 2145 owner = casuword32(&m->m_owner, 2146 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 2147 2148 if (owner == UMUTEX_CONTESTED) { 2149 suword32(&m->m_ceilings[0], ceiling); 2150 suword32(__DEVOLATILE(uint32_t *, &m->m_owner), 2151 UMUTEX_CONTESTED); 2152 error = 0; 2153 break; 2154 } 2155 2156 /* The address was invalid. */ 2157 if (owner == -1) { 2158 error = EFAULT; 2159 break; 2160 } 2161 2162 if ((owner & ~UMUTEX_CONTESTED) == id) { 2163 suword32(&m->m_ceilings[0], ceiling); 2164 error = 0; 2165 break; 2166 } 2167 2168 /* 2169 * If we caught a signal, we have retried and now 2170 * exit immediately. 2171 */ 2172 if (error != 0) 2173 break; 2174 2175 /* 2176 * We set the contested bit, sleep. Otherwise the lock changed 2177 * and we need to retry or we lost a race to the thread 2178 * unlocking the umtx. 2179 */ 2180 umtxq_lock(&uq->uq_key); 2181 umtxq_insert(uq); 2182 umtxq_unbusy(&uq->uq_key); 2183 error = umtxq_sleep(uq, "umtxpp", 0); 2184 umtxq_remove(uq); 2185 umtxq_unlock(&uq->uq_key); 2186 } 2187 umtxq_lock(&uq->uq_key); 2188 if (error == 0) 2189 umtxq_signal(&uq->uq_key, INT_MAX); 2190 umtxq_unbusy(&uq->uq_key); 2191 umtxq_unlock(&uq->uq_key); 2192 umtx_key_release(&uq->uq_key); 2193 if (error == 0 && old_ceiling != NULL) 2194 suword32(old_ceiling, save_ceiling); 2195 return (error); 2196} 2197 2198static int 2199_do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo, 2200 int mode) 2201{ 2202 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2203 case 0: 2204 return (_do_lock_normal(td, m, flags, timo, mode)); 2205 case UMUTEX_PRIO_INHERIT: 2206 return (_do_lock_pi(td, m, flags, timo, mode)); 2207 case UMUTEX_PRIO_PROTECT: 2208 return (_do_lock_pp(td, m, flags, timo, mode)); 2209 } 2210 return (EINVAL); 2211} 2212 2213/* 2214 * Lock a userland POSIX mutex. 2215 */ 2216static int 2217do_lock_umutex(struct thread *td, struct umutex *m, 2218 struct timespec *timeout, int mode) 2219{ 2220 struct timespec ts, ts2, ts3; 2221 struct timeval tv; 2222 uint32_t flags; 2223 int error; 2224 2225 flags = fuword32(&m->m_flags); 2226 if (flags == -1) 2227 return (EFAULT); 2228 2229 if (timeout == NULL) { 2230 error = _do_lock_umutex(td, m, flags, 0, mode); 2231 /* Mutex locking is restarted if it is interrupted. */ 2232 if (error == EINTR && mode != _UMUTEX_WAIT) 2233 error = ERESTART; 2234 } else { 2235 getnanouptime(&ts); 2236 timespecadd(&ts, timeout); 2237 TIMESPEC_TO_TIMEVAL(&tv, timeout); 2238 for (;;) { 2239 error = _do_lock_umutex(td, m, flags, tvtohz(&tv), mode); 2240 if (error != ETIMEDOUT) 2241 break; 2242 getnanouptime(&ts2); 2243 if (timespeccmp(&ts2, &ts, >=)) { 2244 error = ETIMEDOUT; 2245 break; 2246 } 2247 ts3 = ts; 2248 timespecsub(&ts3, &ts2); 2249 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 2250 } 2251 /* Timed-locking is not restarted. */ 2252 if (error == ERESTART) 2253 error = EINTR; 2254 } 2255 return (error); 2256} 2257 2258/* 2259 * Unlock a userland POSIX mutex. 2260 */ 2261static int 2262do_unlock_umutex(struct thread *td, struct umutex *m) 2263{ 2264 uint32_t flags; 2265 2266 flags = fuword32(&m->m_flags); 2267 if (flags == -1) 2268 return (EFAULT); 2269 2270 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2271 case 0: 2272 return (do_unlock_normal(td, m, flags)); 2273 case UMUTEX_PRIO_INHERIT: 2274 return (do_unlock_pi(td, m, flags)); 2275 case UMUTEX_PRIO_PROTECT: 2276 return (do_unlock_pp(td, m, flags)); 2277 } 2278 2279 return (EINVAL); 2280} 2281 2282static int 2283do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2284 struct timespec *timeout, u_long wflags) 2285{ 2286 struct umtx_q *uq; 2287 struct timeval tv; 2288 struct timespec cts, ets, tts; 2289 uint32_t flags; 2290 int error; 2291 2292 uq = td->td_umtxq; 2293 flags = fuword32(&cv->c_flags); 2294 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2295 if (error != 0) 2296 return (error); 2297 umtxq_lock(&uq->uq_key); 2298 umtxq_busy(&uq->uq_key); 2299 umtxq_insert(uq); 2300 umtxq_unlock(&uq->uq_key); 2301 2302 /* 2303 * The magic thing is we should set c_has_waiters to 1 before 2304 * releasing user mutex. 2305 */ 2306 suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1); 2307 2308 umtxq_lock(&uq->uq_key); 2309 umtxq_unbusy(&uq->uq_key); 2310 umtxq_unlock(&uq->uq_key); 2311 2312 error = do_unlock_umutex(td, m); 2313 2314 umtxq_lock(&uq->uq_key); 2315 if (error == 0) { 2316 if ((wflags & UMTX_CHECK_UNPARKING) && 2317 (td->td_pflags & TDP_WAKEUP)) { 2318 td->td_pflags &= ~TDP_WAKEUP; 2319 error = EINTR; 2320 } else if (timeout == NULL) { 2321 error = umtxq_sleep(uq, "ucond", 0); 2322 } else { 2323 getnanouptime(&ets); 2324 timespecadd(&ets, timeout); 2325 TIMESPEC_TO_TIMEVAL(&tv, timeout); 2326 for (;;) { 2327 error = umtxq_sleep(uq, "ucond", tvtohz(&tv)); 2328 if (error != ETIMEDOUT) 2329 break; 2330 getnanouptime(&cts); 2331 if (timespeccmp(&cts, &ets, >=)) { 2332 error = ETIMEDOUT; 2333 break; 2334 } 2335 tts = ets; 2336 timespecsub(&tts, &cts); 2337 TIMESPEC_TO_TIMEVAL(&tv, &tts); 2338 } 2339 } 2340 } 2341 2342 if (error != 0) { 2343 if ((uq->uq_flags & UQF_UMTXQ) == 0) { 2344 /* 2345 * If we concurrently got do_cv_signal()d 2346 * and we got an error or UNIX signals or a timeout, 2347 * then, perform another umtxq_signal to avoid 2348 * consuming the wakeup. This may cause supurious 2349 * wakeup for another thread which was just queued, 2350 * but SUSV3 explicitly allows supurious wakeup to 2351 * occur, and indeed a kernel based implementation 2352 * can not avoid it. 2353 */ 2354 if (!umtxq_signal(&uq->uq_key, 1)) 2355 error = 0; 2356 } 2357 if (error == ERESTART) 2358 error = EINTR; 2359 } 2360 umtxq_remove(uq); 2361 umtxq_unlock(&uq->uq_key); 2362 umtx_key_release(&uq->uq_key); 2363 return (error); 2364} 2365 2366/* 2367 * Signal a userland condition variable. 2368 */ 2369static int 2370do_cv_signal(struct thread *td, struct ucond *cv) 2371{ 2372 struct umtx_key key; 2373 int error, cnt, nwake; 2374 uint32_t flags; 2375 2376 flags = fuword32(&cv->c_flags); 2377 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2378 return (error); 2379 umtxq_lock(&key); 2380 umtxq_busy(&key); 2381 cnt = umtxq_count(&key); 2382 nwake = umtxq_signal(&key, 1); 2383 if (cnt <= nwake) { 2384 umtxq_unlock(&key); 2385 error = suword32( 2386 __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0); 2387 umtxq_lock(&key); 2388 } 2389 umtxq_unbusy(&key); 2390 umtxq_unlock(&key); 2391 umtx_key_release(&key); 2392 return (error); 2393} 2394 2395static int 2396do_cv_broadcast(struct thread *td, struct ucond *cv) 2397{ 2398 struct umtx_key key; 2399 int error; 2400 uint32_t flags; 2401 2402 flags = fuword32(&cv->c_flags); 2403 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2404 return (error); 2405 2406 umtxq_lock(&key); 2407 umtxq_busy(&key); 2408 umtxq_signal(&key, INT_MAX); 2409 umtxq_unlock(&key); 2410 2411 error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0); 2412 2413 umtxq_lock(&key); 2414 umtxq_unbusy(&key); 2415 umtxq_unlock(&key); 2416 2417 umtx_key_release(&key); 2418 return (error); 2419} 2420 2421static int 2422do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, int timo) 2423{ 2424 struct umtx_q *uq; 2425 uint32_t flags, wrflags; 2426 int32_t state, oldstate; 2427 int32_t blocked_readers; 2428 int error; 2429 2430 uq = td->td_umtxq; 2431 flags = fuword32(&rwlock->rw_flags); 2432 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2433 if (error != 0) 2434 return (error); 2435 2436 wrflags = URWLOCK_WRITE_OWNER; 2437 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 2438 wrflags |= URWLOCK_WRITE_WAITERS; 2439 2440 for (;;) { 2441 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2442 /* try to lock it */ 2443 while (!(state & wrflags)) { 2444 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) { 2445 umtx_key_release(&uq->uq_key); 2446 return (EAGAIN); 2447 } 2448 oldstate = casuword32(&rwlock->rw_state, state, state + 1); 2449 if (oldstate == state) { 2450 umtx_key_release(&uq->uq_key); 2451 return (0); 2452 } 2453 state = oldstate; 2454 } 2455 2456 if (error) 2457 break; 2458 2459 /* grab monitor lock */ 2460 umtxq_lock(&uq->uq_key); 2461 umtxq_busy(&uq->uq_key); 2462 umtxq_unlock(&uq->uq_key); 2463 2464 /* set read contention bit */ 2465 while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) { 2466 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS); 2467 if (oldstate == state) 2468 goto sleep; 2469 state = oldstate; 2470 } 2471 2472 /* state is changed while setting flags, restart */ 2473 if (!(state & wrflags)) { 2474 umtxq_lock(&uq->uq_key); 2475 umtxq_unbusy(&uq->uq_key); 2476 umtxq_unlock(&uq->uq_key); 2477 continue; 2478 } 2479 2480sleep: 2481 /* contention bit is set, before sleeping, increase read waiter count */ 2482 blocked_readers = fuword32(&rwlock->rw_blocked_readers); 2483 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 2484 2485 while (state & wrflags) { 2486 umtxq_lock(&uq->uq_key); 2487 umtxq_insert(uq); 2488 umtxq_unbusy(&uq->uq_key); 2489 2490 error = umtxq_sleep(uq, "urdlck", timo); 2491 2492 umtxq_busy(&uq->uq_key); 2493 umtxq_remove(uq); 2494 umtxq_unlock(&uq->uq_key); 2495 if (error) 2496 break; 2497 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2498 } 2499 2500 /* decrease read waiter count, and may clear read contention bit */ 2501 blocked_readers = fuword32(&rwlock->rw_blocked_readers); 2502 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 2503 if (blocked_readers == 1) { 2504 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2505 for (;;) { 2506 oldstate = casuword32(&rwlock->rw_state, state, 2507 state & ~URWLOCK_READ_WAITERS); 2508 if (oldstate == state) 2509 break; 2510 state = oldstate; 2511 } 2512 } 2513 2514 umtxq_lock(&uq->uq_key); 2515 umtxq_unbusy(&uq->uq_key); 2516 umtxq_unlock(&uq->uq_key); 2517 } 2518 umtx_key_release(&uq->uq_key); 2519 return (error); 2520} 2521 2522static int 2523do_rw_rdlock2(struct thread *td, void *obj, long val, struct timespec *timeout) 2524{ 2525 struct timespec ts, ts2, ts3; 2526 struct timeval tv; 2527 int error; 2528 2529 getnanouptime(&ts); 2530 timespecadd(&ts, timeout); 2531 TIMESPEC_TO_TIMEVAL(&tv, timeout); 2532 for (;;) { 2533 error = do_rw_rdlock(td, obj, val, tvtohz(&tv)); 2534 if (error != ETIMEDOUT) 2535 break; 2536 getnanouptime(&ts2); 2537 if (timespeccmp(&ts2, &ts, >=)) { 2538 error = ETIMEDOUT; 2539 break; 2540 } 2541 ts3 = ts; 2542 timespecsub(&ts3, &ts2); 2543 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 2544 } 2545 if (error == ERESTART) 2546 error = EINTR; 2547 return (error); 2548} 2549 2550static int 2551do_rw_wrlock(struct thread *td, struct urwlock *rwlock, int timo) 2552{ 2553 struct umtx_q *uq; 2554 uint32_t flags; 2555 int32_t state, oldstate; 2556 int32_t blocked_writers; 2557 int error; 2558 2559 uq = td->td_umtxq; 2560 flags = fuword32(&rwlock->rw_flags); 2561 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2562 if (error != 0) 2563 return (error); 2564 2565 for (;;) { 2566 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2567 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2568 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER); 2569 if (oldstate == state) { 2570 umtx_key_release(&uq->uq_key); 2571 return (0); 2572 } 2573 state = oldstate; 2574 } 2575 2576 if (error) 2577 break; 2578 2579 /* grab monitor lock */ 2580 umtxq_lock(&uq->uq_key); 2581 umtxq_busy(&uq->uq_key); 2582 umtxq_unlock(&uq->uq_key); 2583 2584 while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) && 2585 (state & URWLOCK_WRITE_WAITERS) == 0) { 2586 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS); 2587 if (oldstate == state) 2588 goto sleep; 2589 state = oldstate; 2590 } 2591 2592 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2593 umtxq_lock(&uq->uq_key); 2594 umtxq_unbusy(&uq->uq_key); 2595 umtxq_unlock(&uq->uq_key); 2596 continue; 2597 } 2598sleep: 2599 blocked_writers = fuword32(&rwlock->rw_blocked_writers); 2600 suword32(&rwlock->rw_blocked_writers, blocked_writers+1); 2601 2602 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) { 2603 umtxq_lock(&uq->uq_key); 2604 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2605 umtxq_unbusy(&uq->uq_key); 2606 2607 error = umtxq_sleep(uq, "uwrlck", timo); 2608 2609 umtxq_busy(&uq->uq_key); 2610 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2611 umtxq_unlock(&uq->uq_key); 2612 if (error) 2613 break; 2614 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2615 } 2616 2617 blocked_writers = fuword32(&rwlock->rw_blocked_writers); 2618 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 2619 if (blocked_writers == 1) { 2620 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2621 for (;;) { 2622 oldstate = casuword32(&rwlock->rw_state, state, 2623 state & ~URWLOCK_WRITE_WAITERS); 2624 if (oldstate == state) 2625 break; 2626 state = oldstate; 2627 } 2628 } 2629 2630 umtxq_lock(&uq->uq_key); 2631 umtxq_unbusy(&uq->uq_key); 2632 umtxq_unlock(&uq->uq_key); 2633 } 2634 2635 umtx_key_release(&uq->uq_key); 2636 return (error); 2637} 2638 2639static int 2640do_rw_wrlock2(struct thread *td, void *obj, struct timespec *timeout) 2641{ 2642 struct timespec ts, ts2, ts3; 2643 struct timeval tv; 2644 int error; 2645 2646 getnanouptime(&ts); 2647 timespecadd(&ts, timeout); 2648 TIMESPEC_TO_TIMEVAL(&tv, timeout); 2649 for (;;) { 2650 error = do_rw_wrlock(td, obj, tvtohz(&tv)); 2651 if (error != ETIMEDOUT) 2652 break; 2653 getnanouptime(&ts2); 2654 if (timespeccmp(&ts2, &ts, >=)) { 2655 error = ETIMEDOUT; 2656 break; 2657 } 2658 ts3 = ts; 2659 timespecsub(&ts3, &ts2); 2660 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 2661 } 2662 if (error == ERESTART) 2663 error = EINTR; 2664 return (error); 2665} 2666 2667static int 2668do_rw_unlock(struct thread *td, struct urwlock *rwlock) 2669{ 2670 struct umtx_q *uq; 2671 uint32_t flags; 2672 int32_t state, oldstate; 2673 int error, q, count; 2674 2675 uq = td->td_umtxq; 2676 flags = fuword32(&rwlock->rw_flags); 2677 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2678 if (error != 0) 2679 return (error); 2680 2681 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2682 if (state & URWLOCK_WRITE_OWNER) { 2683 for (;;) { 2684 oldstate = casuword32(&rwlock->rw_state, state, 2685 state & ~URWLOCK_WRITE_OWNER); 2686 if (oldstate != state) { 2687 state = oldstate; 2688 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 2689 error = EPERM; 2690 goto out; 2691 } 2692 } else 2693 break; 2694 } 2695 } else if (URWLOCK_READER_COUNT(state) != 0) { 2696 for (;;) { 2697 oldstate = casuword32(&rwlock->rw_state, state, 2698 state - 1); 2699 if (oldstate != state) { 2700 state = oldstate; 2701 if (URWLOCK_READER_COUNT(oldstate) == 0) { 2702 error = EPERM; 2703 goto out; 2704 } 2705 } 2706 else 2707 break; 2708 } 2709 } else { 2710 error = EPERM; 2711 goto out; 2712 } 2713 2714 count = 0; 2715 2716 if (!(flags & URWLOCK_PREFER_READER)) { 2717 if (state & URWLOCK_WRITE_WAITERS) { 2718 count = 1; 2719 q = UMTX_EXCLUSIVE_QUEUE; 2720 } else if (state & URWLOCK_READ_WAITERS) { 2721 count = INT_MAX; 2722 q = UMTX_SHARED_QUEUE; 2723 } 2724 } else { 2725 if (state & URWLOCK_READ_WAITERS) { 2726 count = INT_MAX; 2727 q = UMTX_SHARED_QUEUE; 2728 } else if (state & URWLOCK_WRITE_WAITERS) { 2729 count = 1; 2730 q = UMTX_EXCLUSIVE_QUEUE; 2731 } 2732 } 2733 2734 if (count) { 2735 umtxq_lock(&uq->uq_key); 2736 umtxq_busy(&uq->uq_key); 2737 umtxq_signal_queue(&uq->uq_key, count, q); 2738 umtxq_unbusy(&uq->uq_key); 2739 umtxq_unlock(&uq->uq_key); 2740 } 2741out: 2742 umtx_key_release(&uq->uq_key); 2743 return (error); 2744} 2745 2746int 2747_umtx_lock(struct thread *td, struct _umtx_lock_args *uap) 2748 /* struct umtx *umtx */ 2749{ 2750 return _do_lock_umtx(td, uap->umtx, td->td_tid, 0); 2751} 2752 2753int 2754_umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap) 2755 /* struct umtx *umtx */ 2756{ 2757 return do_unlock_umtx(td, uap->umtx, td->td_tid); 2758} 2759 2760static int 2761__umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap) 2762{ 2763 struct timespec *ts, timeout; 2764 int error; 2765 2766 /* Allow a null timespec (wait forever). */ 2767 if (uap->uaddr2 == NULL) 2768 ts = NULL; 2769 else { 2770 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 2771 if (error != 0) 2772 return (error); 2773 if (timeout.tv_nsec >= 1000000000 || 2774 timeout.tv_nsec < 0) { 2775 return (EINVAL); 2776 } 2777 ts = &timeout; 2778 } 2779 return (do_lock_umtx(td, uap->obj, uap->val, ts)); 2780} 2781 2782static int 2783__umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap) 2784{ 2785 return (do_unlock_umtx(td, uap->obj, uap->val)); 2786} 2787 2788static int 2789__umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) 2790{ 2791 struct timespec *ts, timeout; 2792 int error; 2793 2794 if (uap->uaddr2 == NULL) 2795 ts = NULL; 2796 else { 2797 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 2798 if (error != 0) 2799 return (error); 2800 if (timeout.tv_nsec >= 1000000000 || 2801 timeout.tv_nsec < 0) 2802 return (EINVAL); 2803 ts = &timeout; 2804 } 2805 return do_wait(td, uap->obj, uap->val, ts, 0, 0); 2806} 2807 2808static int 2809__umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) 2810{ 2811 struct timespec *ts, timeout; 2812 int error; 2813 2814 if (uap->uaddr2 == NULL) 2815 ts = NULL; 2816 else { 2817 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 2818 if (error != 0) 2819 return (error); 2820 if (timeout.tv_nsec >= 1000000000 || 2821 timeout.tv_nsec < 0) 2822 return (EINVAL); 2823 ts = &timeout; 2824 } 2825 return do_wait(td, uap->obj, uap->val, ts, 1, 0); 2826} 2827 2828static int 2829__umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap) 2830{ 2831 struct timespec *ts, timeout; 2832 int error; 2833 2834 if (uap->uaddr2 == NULL) 2835 ts = NULL; 2836 else { 2837 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 2838 if (error != 0) 2839 return (error); 2840 if (timeout.tv_nsec >= 1000000000 || 2841 timeout.tv_nsec < 0) 2842 return (EINVAL); 2843 ts = &timeout; 2844 } 2845 return do_wait(td, uap->obj, uap->val, ts, 1, 1); 2846} 2847 2848static int 2849__umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) 2850{ 2851 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 2852} 2853 2854static int 2855__umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap) 2856{ 2857 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 2858} 2859 2860static int 2861__umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) 2862{ 2863 struct timespec *ts, timeout; 2864 int error; 2865 2866 /* Allow a null timespec (wait forever). */ 2867 if (uap->uaddr2 == NULL) 2868 ts = NULL; 2869 else { 2870 error = copyin(uap->uaddr2, &timeout, 2871 sizeof(timeout)); 2872 if (error != 0) 2873 return (error); 2874 if (timeout.tv_nsec >= 1000000000 || 2875 timeout.tv_nsec < 0) { 2876 return (EINVAL); 2877 } 2878 ts = &timeout; 2879 } 2880 return do_lock_umutex(td, uap->obj, ts, 0); 2881} 2882 2883static int 2884__umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) 2885{ 2886 return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY); 2887} 2888 2889static int 2890__umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap) 2891{ 2892 struct timespec *ts, timeout; 2893 int error; 2894 2895 /* Allow a null timespec (wait forever). */ 2896 if (uap->uaddr2 == NULL) 2897 ts = NULL; 2898 else { 2899 error = copyin(uap->uaddr2, &timeout, 2900 sizeof(timeout)); 2901 if (error != 0) 2902 return (error); 2903 if (timeout.tv_nsec >= 1000000000 || 2904 timeout.tv_nsec < 0) { 2905 return (EINVAL); 2906 } 2907 ts = &timeout; 2908 } 2909 return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT); 2910} 2911 2912static int 2913__umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap) 2914{ 2915 return do_wake_umutex(td, uap->obj); 2916} 2917 2918static int 2919__umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) 2920{ 2921 return do_unlock_umutex(td, uap->obj); 2922} 2923 2924static int 2925__umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) 2926{ 2927 return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1); 2928} 2929 2930static int 2931__umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) 2932{ 2933 struct timespec *ts, timeout; 2934 int error; 2935 2936 /* Allow a null timespec (wait forever). */ 2937 if (uap->uaddr2 == NULL) 2938 ts = NULL; 2939 else { 2940 error = copyin(uap->uaddr2, &timeout, 2941 sizeof(timeout)); 2942 if (error != 0) 2943 return (error); 2944 if (timeout.tv_nsec >= 1000000000 || 2945 timeout.tv_nsec < 0) { 2946 return (EINVAL); 2947 } 2948 ts = &timeout; 2949 } 2950 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 2951} 2952 2953static int 2954__umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) 2955{ 2956 return do_cv_signal(td, uap->obj); 2957} 2958 2959static int 2960__umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) 2961{ 2962 return do_cv_broadcast(td, uap->obj); 2963} 2964 2965static int 2966__umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap) 2967{ 2968 struct timespec timeout; 2969 int error; 2970 2971 /* Allow a null timespec (wait forever). */ 2972 if (uap->uaddr2 == NULL) { 2973 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 2974 } else { 2975 error = copyin(uap->uaddr2, &timeout, 2976 sizeof(timeout)); 2977 if (error != 0) 2978 return (error); 2979 if (timeout.tv_nsec >= 1000000000 || 2980 timeout.tv_nsec < 0) { 2981 return (EINVAL); 2982 } 2983 error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout); 2984 } 2985 return (error); 2986} 2987 2988static int 2989__umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap) 2990{ 2991 struct timespec timeout; 2992 int error; 2993 2994 /* Allow a null timespec (wait forever). */ 2995 if (uap->uaddr2 == NULL) { 2996 error = do_rw_wrlock(td, uap->obj, 0); 2997 } else { 2998 error = copyin(uap->uaddr2, &timeout, 2999 sizeof(timeout)); 3000 if (error != 0) 3001 return (error); 3002 if (timeout.tv_nsec >= 1000000000 || 3003 timeout.tv_nsec < 0) { 3004 return (EINVAL); 3005 } 3006 3007 error = do_rw_wrlock2(td, uap->obj, &timeout); 3008 } 3009 return (error); 3010} 3011 3012static int 3013__umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap) 3014{ 3015 return do_rw_unlock(td, uap->obj); 3016} 3017 3018typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); 3019 3020static _umtx_op_func op_table[] = { 3021 __umtx_op_lock_umtx, /* UMTX_OP_LOCK */ 3022 __umtx_op_unlock_umtx, /* UMTX_OP_UNLOCK */ 3023 __umtx_op_wait, /* UMTX_OP_WAIT */ 3024 __umtx_op_wake, /* UMTX_OP_WAKE */ 3025 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_TRYLOCK */ 3026 __umtx_op_lock_umutex, /* UMTX_OP_MUTEX_LOCK */ 3027 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ 3028 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ 3029 __umtx_op_cv_wait, /* UMTX_OP_CV_WAIT*/ 3030 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ 3031 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */ 3032 __umtx_op_wait_uint, /* UMTX_OP_WAIT_UINT */ 3033 __umtx_op_rw_rdlock, /* UMTX_OP_RW_RDLOCK */ 3034 __umtx_op_rw_wrlock, /* UMTX_OP_RW_WRLOCK */ 3035 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */ 3036 __umtx_op_wait_uint_private, /* UMTX_OP_WAIT_UINT_PRIVATE */ 3037 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */ 3038 __umtx_op_wait_umutex, /* UMTX_OP_UMUTEX_WAIT */ 3039 __umtx_op_wake_umutex /* UMTX_OP_UMUTEX_WAKE */ 3040}; 3041 3042int 3043_umtx_op(struct thread *td, struct _umtx_op_args *uap) 3044{ 3045 if ((unsigned)uap->op < UMTX_OP_MAX) 3046 return (*op_table[uap->op])(td, uap); 3047 return (EINVAL); 3048} 3049 3050#ifdef COMPAT_IA32 3051int 3052freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap) 3053 /* struct umtx *umtx */ 3054{ 3055 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL)); 3056} 3057 3058int 3059freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap) 3060 /* struct umtx *umtx */ 3061{ 3062 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid)); 3063} 3064 3065struct timespec32 { 3066 u_int32_t tv_sec; 3067 u_int32_t tv_nsec; 3068}; 3069 3070static inline int 3071copyin_timeout32(void *addr, struct timespec *tsp) 3072{ 3073 struct timespec32 ts32; 3074 int error; 3075 3076 error = copyin(addr, &ts32, sizeof(struct timespec32)); 3077 if (error == 0) { 3078 tsp->tv_sec = ts32.tv_sec; 3079 tsp->tv_nsec = ts32.tv_nsec; 3080 } 3081 return (error); 3082} 3083 3084static int 3085__umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap) 3086{ 3087 struct timespec *ts, timeout; 3088 int error; 3089 3090 /* Allow a null timespec (wait forever). */ 3091 if (uap->uaddr2 == NULL) 3092 ts = NULL; 3093 else { 3094 error = copyin_timeout32(uap->uaddr2, &timeout); 3095 if (error != 0) 3096 return (error); 3097 if (timeout.tv_nsec >= 1000000000 || 3098 timeout.tv_nsec < 0) { 3099 return (EINVAL); 3100 } 3101 ts = &timeout; 3102 } 3103 return (do_lock_umtx32(td, uap->obj, uap->val, ts)); 3104} 3105 3106static int 3107__umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap) 3108{ 3109 return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val)); 3110} 3111 3112static int 3113__umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3114{ 3115 struct timespec *ts, timeout; 3116 int error; 3117 3118 if (uap->uaddr2 == NULL) 3119 ts = NULL; 3120 else { 3121 error = copyin_timeout32(uap->uaddr2, &timeout); 3122 if (error != 0) 3123 return (error); 3124 if (timeout.tv_nsec >= 1000000000 || 3125 timeout.tv_nsec < 0) 3126 return (EINVAL); 3127 ts = &timeout; 3128 } 3129 return do_wait(td, uap->obj, uap->val, ts, 1, 0); 3130} 3131 3132static int 3133__umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 3134{ 3135 struct timespec *ts, timeout; 3136 int error; 3137 3138 /* Allow a null timespec (wait forever). */ 3139 if (uap->uaddr2 == NULL) 3140 ts = NULL; 3141 else { 3142 error = copyin_timeout32(uap->uaddr2, &timeout); 3143 if (error != 0) 3144 return (error); 3145 if (timeout.tv_nsec >= 1000000000 || 3146 timeout.tv_nsec < 0) 3147 return (EINVAL); 3148 ts = &timeout; 3149 } 3150 return do_lock_umutex(td, uap->obj, ts, 0); 3151} 3152 3153static int 3154__umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 3155{ 3156 struct timespec *ts, timeout; 3157 int error; 3158 3159 /* Allow a null timespec (wait forever). */ 3160 if (uap->uaddr2 == NULL) 3161 ts = NULL; 3162 else { 3163 error = copyin_timeout32(uap->uaddr2, &timeout); 3164 if (error != 0) 3165 return (error); 3166 if (timeout.tv_nsec >= 1000000000 || 3167 timeout.tv_nsec < 0) 3168 return (EINVAL); 3169 ts = &timeout; 3170 } 3171 return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT); 3172} 3173 3174static int 3175__umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3176{ 3177 struct timespec *ts, timeout; 3178 int error; 3179 3180 /* Allow a null timespec (wait forever). */ 3181 if (uap->uaddr2 == NULL) 3182 ts = NULL; 3183 else { 3184 error = copyin_timeout32(uap->uaddr2, &timeout); 3185 if (error != 0) 3186 return (error); 3187 if (timeout.tv_nsec >= 1000000000 || 3188 timeout.tv_nsec < 0) 3189 return (EINVAL); 3190 ts = &timeout; 3191 } 3192 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3193} 3194 3195static int 3196__umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap) 3197{ 3198 struct timespec timeout; 3199 int error; 3200 3201 /* Allow a null timespec (wait forever). */ 3202 if (uap->uaddr2 == NULL) { 3203 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3204 } else { 3205 error = copyin(uap->uaddr2, &timeout, 3206 sizeof(timeout)); 3207 if (error != 0) 3208 return (error); 3209 if (timeout.tv_nsec >= 1000000000 || 3210 timeout.tv_nsec < 0) { 3211 return (EINVAL); 3212 } 3213 error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout); 3214 } 3215 return (error); 3216} 3217 3218static int 3219__umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap) 3220{ 3221 struct timespec timeout; 3222 int error; 3223 3224 /* Allow a null timespec (wait forever). */ 3225 if (uap->uaddr2 == NULL) { 3226 error = do_rw_wrlock(td, uap->obj, 0); 3227 } else { 3228 error = copyin_timeout32(uap->uaddr2, &timeout); 3229 if (error != 0) 3230 return (error); 3231 if (timeout.tv_nsec >= 1000000000 || 3232 timeout.tv_nsec < 0) { 3233 return (EINVAL); 3234 } 3235 3236 error = do_rw_wrlock2(td, uap->obj, &timeout); 3237 } 3238 return (error); 3239} 3240 3241static int 3242__umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap) 3243{ 3244 struct timespec *ts, timeout; 3245 int error; 3246 3247 if (uap->uaddr2 == NULL) 3248 ts = NULL; 3249 else { 3250 error = copyin_timeout32(uap->uaddr2, &timeout); 3251 if (error != 0) 3252 return (error); 3253 if (timeout.tv_nsec >= 1000000000 || 3254 timeout.tv_nsec < 0) 3255 return (EINVAL); 3256 ts = &timeout; 3257 } 3258 return do_wait(td, uap->obj, uap->val, ts, 1, 1); 3259} 3260 3261static _umtx_op_func op_table_compat32[] = { 3262 __umtx_op_lock_umtx_compat32, /* UMTX_OP_LOCK */ 3263 __umtx_op_unlock_umtx_compat32, /* UMTX_OP_UNLOCK */ 3264 __umtx_op_wait_compat32, /* UMTX_OP_WAIT */ 3265 __umtx_op_wake, /* UMTX_OP_WAKE */ 3266 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_LOCK */ 3267 __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */ 3268 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ 3269 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ 3270 __umtx_op_cv_wait_compat32, /* UMTX_OP_CV_WAIT*/ 3271 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ 3272 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */ 3273 __umtx_op_wait_compat32, /* UMTX_OP_WAIT_UINT */ 3274 __umtx_op_rw_rdlock_compat32, /* UMTX_OP_RW_RDLOCK */ 3275 __umtx_op_rw_wrlock_compat32, /* UMTX_OP_RW_WRLOCK */ 3276 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */ 3277 __umtx_op_wait_uint_private_compat32, /* UMTX_OP_WAIT_UINT_PRIVATE */ 3278 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */ 3279 __umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */ 3280 __umtx_op_wake_umutex /* UMTX_OP_UMUTEX_WAKE */ 3281}; 3282 3283int 3284freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap) 3285{ 3286 if ((unsigned)uap->op < UMTX_OP_MAX) 3287 return (*op_table_compat32[uap->op])(td, 3288 (struct _umtx_op_args *)uap); 3289 return (EINVAL); 3290} 3291#endif 3292 3293void 3294umtx_thread_init(struct thread *td) 3295{ 3296 td->td_umtxq = umtxq_alloc(); 3297 td->td_umtxq->uq_thread = td; 3298} 3299 3300void 3301umtx_thread_fini(struct thread *td) 3302{ 3303 umtxq_free(td->td_umtxq); 3304} 3305 3306/* 3307 * It will be called when new thread is created, e.g fork(). 3308 */ 3309void 3310umtx_thread_alloc(struct thread *td) 3311{ 3312 struct umtx_q *uq; 3313 3314 uq = td->td_umtxq; 3315 uq->uq_inherited_pri = PRI_MAX; 3316 3317 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 3318 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 3319 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 3320 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 3321} 3322 3323/* 3324 * exec() hook. 3325 */ 3326static void 3327umtx_exec_hook(void *arg __unused, struct proc *p __unused, 3328 struct image_params *imgp __unused) 3329{ 3330 umtx_thread_cleanup(curthread); 3331} 3332 3333/* 3334 * thread_exit() hook. 3335 */ 3336void 3337umtx_thread_exit(struct thread *td) 3338{ 3339 umtx_thread_cleanup(td); 3340} 3341 3342/* 3343 * clean up umtx data. 3344 */ 3345static void 3346umtx_thread_cleanup(struct thread *td) 3347{ 3348 struct umtx_q *uq; 3349 struct umtx_pi *pi; 3350 3351 if ((uq = td->td_umtxq) == NULL) 3352 return; 3353 3354 mtx_lock_spin(&umtx_lock); 3355 uq->uq_inherited_pri = PRI_MAX; 3356 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 3357 pi->pi_owner = NULL; 3358 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 3359 } 3360 thread_lock(td); 3361 td->td_flags &= ~TDF_UBORROWING; 3362 thread_unlock(td); 3363 mtx_unlock_spin(&umtx_lock); 3364} 3365