kern_umtx.c revision 174701
1/*- 2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice unmodified, this list of conditions, and the following 11 * disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD: head/sys/kern/kern_umtx.c 174701 2007-12-17 05:55:07Z davidxu $"); 30 31#include "opt_compat.h" 32#include <sys/param.h> 33#include <sys/kernel.h> 34#include <sys/limits.h> 35#include <sys/lock.h> 36#include <sys/malloc.h> 37#include <sys/mutex.h> 38#include <sys/priv.h> 39#include <sys/proc.h> 40#include <sys/sched.h> 41#include <sys/smp.h> 42#include <sys/sysctl.h> 43#include <sys/sysent.h> 44#include <sys/systm.h> 45#include <sys/sysproto.h> 46#include <sys/eventhandler.h> 47#include <sys/umtx.h> 48 49#include <vm/vm.h> 50#include <vm/vm_param.h> 51#include <vm/pmap.h> 52#include <vm/vm_map.h> 53#include <vm/vm_object.h> 54 55#include <machine/cpu.h> 56 57#ifdef COMPAT_IA32 58#include <compat/freebsd32/freebsd32_proto.h> 59#endif 60 61#define TYPE_SIMPLE_LOCK 0 62#define TYPE_SIMPLE_WAIT 1 63#define TYPE_NORMAL_UMUTEX 2 64#define TYPE_PI_UMUTEX 3 65#define TYPE_PP_UMUTEX 4 66#define TYPE_CV 5 67 68/* Key to represent a unique userland synchronous object */ 69struct umtx_key { 70 int hash; 71 int type; 72 int shared; 73 union { 74 struct { 75 vm_object_t object; 76 uintptr_t offset; 77 } shared; 78 struct { 79 struct vmspace *vs; 80 uintptr_t addr; 81 } private; 82 struct { 83 void *a; 84 uintptr_t b; 85 } both; 86 } info; 87}; 88 89/* Priority inheritance mutex info. */ 90struct umtx_pi { 91 /* Owner thread */ 92 struct thread *pi_owner; 93 94 /* Reference count */ 95 int pi_refcount; 96 97 /* List entry to link umtx holding by thread */ 98 TAILQ_ENTRY(umtx_pi) pi_link; 99 100 /* List entry in hash */ 101 TAILQ_ENTRY(umtx_pi) pi_hashlink; 102 103 /* List for waiters */ 104 TAILQ_HEAD(,umtx_q) pi_blocked; 105 106 /* Identify a userland lock object */ 107 struct umtx_key pi_key; 108}; 109 110/* A userland synchronous object user. */ 111struct umtx_q { 112 /* Linked list for the hash. */ 113 TAILQ_ENTRY(umtx_q) uq_link; 114 115 /* Umtx key. */ 116 struct umtx_key uq_key; 117 118 /* Umtx flags. */ 119 int uq_flags; 120#define UQF_UMTXQ 0x0001 121 122 /* The thread waits on. */ 123 struct thread *uq_thread; 124 125 /* 126 * Blocked on PI mutex. read can use chain lock 127 * or umtx_lock, write must have both chain lock and 128 * umtx_lock being hold. 129 */ 130 struct umtx_pi *uq_pi_blocked; 131 132 /* On blocked list */ 133 TAILQ_ENTRY(umtx_q) uq_lockq; 134 135 /* Thread contending with us */ 136 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 137 138 /* Inherited priority from PP mutex */ 139 u_char uq_inherited_pri; 140}; 141 142TAILQ_HEAD(umtxq_head, umtx_q); 143 144/* Userland lock object's wait-queue chain */ 145struct umtxq_chain { 146 /* Lock for this chain. */ 147 struct mtx uc_lock; 148 149 /* List of sleep queues. */ 150 struct umtxq_head uc_queue; 151 152 /* Busy flag */ 153 char uc_busy; 154 155 /* Chain lock waiters */ 156 int uc_waiters; 157 158 /* All PI in the list */ 159 TAILQ_HEAD(,umtx_pi) uc_pi_list; 160}; 161 162#define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 163 164/* 165 * Don't propagate time-sharing priority, there is a security reason, 166 * a user can simply introduce PI-mutex, let thread A lock the mutex, 167 * and let another thread B block on the mutex, because B is 168 * sleeping, its priority will be boosted, this causes A's priority to 169 * be boosted via priority propagating too and will never be lowered even 170 * if it is using 100%CPU, this is unfair to other processes. 171 */ 172 173#define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 174 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 175 PRI_MAX_TIMESHARE : (td)->td_user_pri) 176 177#define GOLDEN_RATIO_PRIME 2654404609U 178#define UMTX_CHAINS 128 179#define UMTX_SHIFTS (__WORD_BIT - 7) 180 181#define THREAD_SHARE 0 182#define PROCESS_SHARE 1 183#define AUTO_SHARE 2 184 185#define GET_SHARE(flags) \ 186 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 187 188static uma_zone_t umtx_pi_zone; 189static struct umtxq_chain umtxq_chains[UMTX_CHAINS]; 190static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 191static int umtx_pi_allocated; 192 193SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); 194SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 195 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 196 197static void umtxq_sysinit(void *); 198static void umtxq_hash(struct umtx_key *key); 199static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 200static void umtxq_lock(struct umtx_key *key); 201static void umtxq_unlock(struct umtx_key *key); 202static void umtxq_busy(struct umtx_key *key); 203static void umtxq_unbusy(struct umtx_key *key); 204static void umtxq_insert(struct umtx_q *uq); 205static void umtxq_remove(struct umtx_q *uq); 206static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo); 207static int umtxq_count(struct umtx_key *key); 208static int umtxq_signal(struct umtx_key *key, int nr_wakeup); 209static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2); 210static int umtx_key_get(void *addr, int type, int share, 211 struct umtx_key *key); 212static void umtx_key_release(struct umtx_key *key); 213static struct umtx_pi *umtx_pi_alloc(int); 214static void umtx_pi_free(struct umtx_pi *pi); 215static void umtx_pi_adjust_locked(struct thread *td, u_char oldpri); 216static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags); 217static void umtx_thread_cleanup(struct thread *td); 218static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 219 struct image_params *imgp __unused); 220SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 221 222static struct mtx umtx_lock; 223 224static void 225umtxq_sysinit(void *arg __unused) 226{ 227 int i; 228 229 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 230 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 231 for (i = 0; i < UMTX_CHAINS; ++i) { 232 mtx_init(&umtxq_chains[i].uc_lock, "umtxql", NULL, 233 MTX_DEF | MTX_DUPOK); 234 TAILQ_INIT(&umtxq_chains[i].uc_queue); 235 TAILQ_INIT(&umtxq_chains[i].uc_pi_list); 236 umtxq_chains[i].uc_busy = 0; 237 umtxq_chains[i].uc_waiters = 0; 238 } 239 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN); 240 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 241 EVENTHANDLER_PRI_ANY); 242} 243 244struct umtx_q * 245umtxq_alloc(void) 246{ 247 struct umtx_q *uq; 248 249 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 250 TAILQ_INIT(&uq->uq_pi_contested); 251 uq->uq_inherited_pri = PRI_MAX; 252 return (uq); 253} 254 255void 256umtxq_free(struct umtx_q *uq) 257{ 258 free(uq, M_UMTX); 259} 260 261static inline void 262umtxq_hash(struct umtx_key *key) 263{ 264 unsigned n = (uintptr_t)key->info.both.a + key->info.both.b; 265 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 266} 267 268static inline int 269umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2) 270{ 271 return (k1->type == k2->type && 272 k1->info.both.a == k2->info.both.a && 273 k1->info.both.b == k2->info.both.b); 274} 275 276static inline struct umtxq_chain * 277umtxq_getchain(struct umtx_key *key) 278{ 279 return (&umtxq_chains[key->hash]); 280} 281 282/* 283 * Set chain to busy state when following operation 284 * may be blocked (kernel mutex can not be used). 285 */ 286static inline void 287umtxq_busy(struct umtx_key *key) 288{ 289 struct umtxq_chain *uc; 290 291 uc = umtxq_getchain(key); 292 mtx_assert(&uc->uc_lock, MA_OWNED); 293 while (uc->uc_busy != 0) { 294 uc->uc_waiters++; 295 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 296 uc->uc_waiters--; 297 } 298 uc->uc_busy = 1; 299} 300 301/* 302 * Unbusy a chain. 303 */ 304static inline void 305umtxq_unbusy(struct umtx_key *key) 306{ 307 struct umtxq_chain *uc; 308 309 uc = umtxq_getchain(key); 310 mtx_assert(&uc->uc_lock, MA_OWNED); 311 KASSERT(uc->uc_busy != 0, ("not busy")); 312 uc->uc_busy = 0; 313 if (uc->uc_waiters) 314 wakeup_one(uc); 315} 316 317/* 318 * Lock a chain. 319 */ 320static inline void 321umtxq_lock(struct umtx_key *key) 322{ 323 struct umtxq_chain *uc; 324 325 uc = umtxq_getchain(key); 326 mtx_lock(&uc->uc_lock); 327} 328 329/* 330 * Unlock a chain. 331 */ 332static inline void 333umtxq_unlock(struct umtx_key *key) 334{ 335 struct umtxq_chain *uc; 336 337 uc = umtxq_getchain(key); 338 mtx_unlock(&uc->uc_lock); 339} 340 341/* 342 * Insert a thread onto the umtx queue. 343 */ 344static inline void 345umtxq_insert(struct umtx_q *uq) 346{ 347 struct umtxq_chain *uc; 348 349 uc = umtxq_getchain(&uq->uq_key); 350 UMTXQ_LOCKED_ASSERT(uc); 351 TAILQ_INSERT_TAIL(&uc->uc_queue, uq, uq_link); 352 uq->uq_flags |= UQF_UMTXQ; 353} 354 355/* 356 * Remove thread from the umtx queue. 357 */ 358static inline void 359umtxq_remove(struct umtx_q *uq) 360{ 361 struct umtxq_chain *uc; 362 363 uc = umtxq_getchain(&uq->uq_key); 364 UMTXQ_LOCKED_ASSERT(uc); 365 if (uq->uq_flags & UQF_UMTXQ) { 366 TAILQ_REMOVE(&uc->uc_queue, uq, uq_link); 367 uq->uq_flags &= ~UQF_UMTXQ; 368 } 369} 370 371/* 372 * Check if there are multiple waiters 373 */ 374static int 375umtxq_count(struct umtx_key *key) 376{ 377 struct umtxq_chain *uc; 378 struct umtx_q *uq; 379 int count = 0; 380 381 uc = umtxq_getchain(key); 382 UMTXQ_LOCKED_ASSERT(uc); 383 TAILQ_FOREACH(uq, &uc->uc_queue, uq_link) { 384 if (umtx_key_match(&uq->uq_key, key)) { 385 if (++count > 1) 386 break; 387 } 388 } 389 return (count); 390} 391 392/* 393 * Check if there are multiple PI waiters and returns first 394 * waiter. 395 */ 396static int 397umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 398{ 399 struct umtxq_chain *uc; 400 struct umtx_q *uq; 401 int count = 0; 402 403 *first = NULL; 404 uc = umtxq_getchain(key); 405 UMTXQ_LOCKED_ASSERT(uc); 406 TAILQ_FOREACH(uq, &uc->uc_queue, uq_link) { 407 if (umtx_key_match(&uq->uq_key, key)) { 408 if (++count > 1) 409 break; 410 *first = uq; 411 } 412 } 413 return (count); 414} 415 416/* 417 * Wake up threads waiting on an userland object. 418 */ 419static int 420umtxq_signal(struct umtx_key *key, int n_wake) 421{ 422 struct umtxq_chain *uc; 423 struct umtx_q *uq, *next; 424 int ret; 425 426 ret = 0; 427 uc = umtxq_getchain(key); 428 UMTXQ_LOCKED_ASSERT(uc); 429 TAILQ_FOREACH_SAFE(uq, &uc->uc_queue, uq_link, next) { 430 if (umtx_key_match(&uq->uq_key, key)) { 431 umtxq_remove(uq); 432 wakeup(uq); 433 if (++ret >= n_wake) 434 break; 435 } 436 } 437 return (ret); 438} 439 440/* 441 * Wake up specified thread. 442 */ 443static inline void 444umtxq_signal_thread(struct umtx_q *uq) 445{ 446 struct umtxq_chain *uc; 447 448 uc = umtxq_getchain(&uq->uq_key); 449 UMTXQ_LOCKED_ASSERT(uc); 450 umtxq_remove(uq); 451 wakeup(uq); 452} 453 454/* 455 * Put thread into sleep state, before sleeping, check if 456 * thread was removed from umtx queue. 457 */ 458static inline int 459umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo) 460{ 461 struct umtxq_chain *uc; 462 int error; 463 464 uc = umtxq_getchain(&uq->uq_key); 465 UMTXQ_LOCKED_ASSERT(uc); 466 if (!(uq->uq_flags & UQF_UMTXQ)) 467 return (0); 468 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo); 469 if (error == EWOULDBLOCK) 470 error = ETIMEDOUT; 471 return (error); 472} 473 474/* 475 * Convert userspace address into unique logical address. 476 */ 477static int 478umtx_key_get(void *addr, int type, int share, struct umtx_key *key) 479{ 480 struct thread *td = curthread; 481 vm_map_t map; 482 vm_map_entry_t entry; 483 vm_pindex_t pindex; 484 vm_prot_t prot; 485 boolean_t wired; 486 487 key->type = type; 488 if (share == THREAD_SHARE) { 489 key->shared = 0; 490 key->info.private.vs = td->td_proc->p_vmspace; 491 key->info.private.addr = (uintptr_t)addr; 492 } else { 493 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 494 map = &td->td_proc->p_vmspace->vm_map; 495 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 496 &entry, &key->info.shared.object, &pindex, &prot, 497 &wired) != KERN_SUCCESS) { 498 return EFAULT; 499 } 500 501 if ((share == PROCESS_SHARE) || 502 (share == AUTO_SHARE && 503 VM_INHERIT_SHARE == entry->inheritance)) { 504 key->shared = 1; 505 key->info.shared.offset = entry->offset + entry->start - 506 (vm_offset_t)addr; 507 vm_object_reference(key->info.shared.object); 508 } else { 509 key->shared = 0; 510 key->info.private.vs = td->td_proc->p_vmspace; 511 key->info.private.addr = (uintptr_t)addr; 512 } 513 vm_map_lookup_done(map, entry); 514 } 515 516 umtxq_hash(key); 517 return (0); 518} 519 520/* 521 * Release key. 522 */ 523static inline void 524umtx_key_release(struct umtx_key *key) 525{ 526 if (key->shared) 527 vm_object_deallocate(key->info.shared.object); 528} 529 530/* 531 * Lock a umtx object. 532 */ 533static int 534_do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo) 535{ 536 struct umtx_q *uq; 537 u_long owner; 538 u_long old; 539 int error = 0; 540 541 uq = td->td_umtxq; 542 543 /* 544 * Care must be exercised when dealing with umtx structure. It 545 * can fault on any access. 546 */ 547 for (;;) { 548 /* 549 * Try the uncontested case. This should be done in userland. 550 */ 551 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id); 552 553 /* The acquire succeeded. */ 554 if (owner == UMTX_UNOWNED) 555 return (0); 556 557 /* The address was invalid. */ 558 if (owner == -1) 559 return (EFAULT); 560 561 /* If no one owns it but it is contested try to acquire it. */ 562 if (owner == UMTX_CONTESTED) { 563 owner = casuword(&umtx->u_owner, 564 UMTX_CONTESTED, id | UMTX_CONTESTED); 565 566 if (owner == UMTX_CONTESTED) 567 return (0); 568 569 /* The address was invalid. */ 570 if (owner == -1) 571 return (EFAULT); 572 573 /* If this failed the lock has changed, restart. */ 574 continue; 575 } 576 577 /* 578 * If we caught a signal, we have retried and now 579 * exit immediately. 580 */ 581 if (error != 0) 582 return (error); 583 584 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, 585 AUTO_SHARE, &uq->uq_key)) != 0) 586 return (error); 587 588 umtxq_lock(&uq->uq_key); 589 umtxq_busy(&uq->uq_key); 590 umtxq_insert(uq); 591 umtxq_unbusy(&uq->uq_key); 592 umtxq_unlock(&uq->uq_key); 593 594 /* 595 * Set the contested bit so that a release in user space 596 * knows to use the system call for unlock. If this fails 597 * either some one else has acquired the lock or it has been 598 * released. 599 */ 600 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED); 601 602 /* The address was invalid. */ 603 if (old == -1) { 604 umtxq_lock(&uq->uq_key); 605 umtxq_remove(uq); 606 umtxq_unlock(&uq->uq_key); 607 umtx_key_release(&uq->uq_key); 608 return (EFAULT); 609 } 610 611 /* 612 * We set the contested bit, sleep. Otherwise the lock changed 613 * and we need to retry or we lost a race to the thread 614 * unlocking the umtx. 615 */ 616 umtxq_lock(&uq->uq_key); 617 if (old == owner) 618 error = umtxq_sleep(uq, "umtx", timo); 619 umtxq_remove(uq); 620 umtxq_unlock(&uq->uq_key); 621 umtx_key_release(&uq->uq_key); 622 } 623 624 return (0); 625} 626 627/* 628 * Lock a umtx object. 629 */ 630static int 631do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, 632 struct timespec *timeout) 633{ 634 struct timespec ts, ts2, ts3; 635 struct timeval tv; 636 int error; 637 638 if (timeout == NULL) { 639 error = _do_lock_umtx(td, umtx, id, 0); 640 /* Mutex locking is restarted if it is interrupted. */ 641 if (error == EINTR) 642 error = ERESTART; 643 } else { 644 getnanouptime(&ts); 645 timespecadd(&ts, timeout); 646 TIMESPEC_TO_TIMEVAL(&tv, timeout); 647 for (;;) { 648 error = _do_lock_umtx(td, umtx, id, tvtohz(&tv)); 649 if (error != ETIMEDOUT) 650 break; 651 getnanouptime(&ts2); 652 if (timespeccmp(&ts2, &ts, >=)) { 653 error = ETIMEDOUT; 654 break; 655 } 656 ts3 = ts; 657 timespecsub(&ts3, &ts2); 658 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 659 } 660 /* Timed-locking is not restarted. */ 661 if (error == ERESTART) 662 error = EINTR; 663 } 664 return (error); 665} 666 667/* 668 * Unlock a umtx object. 669 */ 670static int 671do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id) 672{ 673 struct umtx_key key; 674 u_long owner; 675 u_long old; 676 int error; 677 int count; 678 679 /* 680 * Make sure we own this mtx. 681 */ 682 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner)); 683 if (owner == -1) 684 return (EFAULT); 685 686 if ((owner & ~UMTX_CONTESTED) != id) 687 return (EPERM); 688 689 /* This should be done in userland */ 690 if ((owner & UMTX_CONTESTED) == 0) { 691 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED); 692 if (old == -1) 693 return (EFAULT); 694 if (old == owner) 695 return (0); 696 owner = old; 697 } 698 699 /* We should only ever be in here for contested locks */ 700 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE, 701 &key)) != 0) 702 return (error); 703 704 umtxq_lock(&key); 705 umtxq_busy(&key); 706 count = umtxq_count(&key); 707 umtxq_unlock(&key); 708 709 /* 710 * When unlocking the umtx, it must be marked as unowned if 711 * there is zero or one thread only waiting for it. 712 * Otherwise, it must be marked as contested. 713 */ 714 old = casuword(&umtx->u_owner, owner, 715 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED); 716 umtxq_lock(&key); 717 umtxq_signal(&key,1); 718 umtxq_unbusy(&key); 719 umtxq_unlock(&key); 720 umtx_key_release(&key); 721 if (old == -1) 722 return (EFAULT); 723 if (old != owner) 724 return (EINVAL); 725 return (0); 726} 727 728#ifdef COMPAT_IA32 729 730/* 731 * Lock a umtx object. 732 */ 733static int 734_do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo) 735{ 736 struct umtx_q *uq; 737 uint32_t owner; 738 uint32_t old; 739 int error = 0; 740 741 uq = td->td_umtxq; 742 743 /* 744 * Care must be exercised when dealing with umtx structure. It 745 * can fault on any access. 746 */ 747 for (;;) { 748 /* 749 * Try the uncontested case. This should be done in userland. 750 */ 751 owner = casuword32(m, UMUTEX_UNOWNED, id); 752 753 /* The acquire succeeded. */ 754 if (owner == UMUTEX_UNOWNED) 755 return (0); 756 757 /* The address was invalid. */ 758 if (owner == -1) 759 return (EFAULT); 760 761 /* If no one owns it but it is contested try to acquire it. */ 762 if (owner == UMUTEX_CONTESTED) { 763 owner = casuword32(m, 764 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 765 if (owner == UMUTEX_CONTESTED) 766 return (0); 767 768 /* The address was invalid. */ 769 if (owner == -1) 770 return (EFAULT); 771 772 /* If this failed the lock has changed, restart. */ 773 continue; 774 } 775 776 /* 777 * If we caught a signal, we have retried and now 778 * exit immediately. 779 */ 780 if (error != 0) 781 return (error); 782 783 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, 784 AUTO_SHARE, &uq->uq_key)) != 0) 785 return (error); 786 787 umtxq_lock(&uq->uq_key); 788 umtxq_busy(&uq->uq_key); 789 umtxq_insert(uq); 790 umtxq_unbusy(&uq->uq_key); 791 umtxq_unlock(&uq->uq_key); 792 793 /* 794 * Set the contested bit so that a release in user space 795 * knows to use the system call for unlock. If this fails 796 * either some one else has acquired the lock or it has been 797 * released. 798 */ 799 old = casuword32(m, owner, owner | UMUTEX_CONTESTED); 800 801 /* The address was invalid. */ 802 if (old == -1) { 803 umtxq_lock(&uq->uq_key); 804 umtxq_remove(uq); 805 umtxq_unlock(&uq->uq_key); 806 umtx_key_release(&uq->uq_key); 807 return (EFAULT); 808 } 809 810 /* 811 * We set the contested bit, sleep. Otherwise the lock changed 812 * and we need to retry or we lost a race to the thread 813 * unlocking the umtx. 814 */ 815 umtxq_lock(&uq->uq_key); 816 if (old == owner) 817 error = umtxq_sleep(uq, "umtx", timo); 818 umtxq_remove(uq); 819 umtxq_unlock(&uq->uq_key); 820 umtx_key_release(&uq->uq_key); 821 } 822 823 return (0); 824} 825 826/* 827 * Lock a umtx object. 828 */ 829static int 830do_lock_umtx32(struct thread *td, void *m, uint32_t id, 831 struct timespec *timeout) 832{ 833 struct timespec ts, ts2, ts3; 834 struct timeval tv; 835 int error; 836 837 if (timeout == NULL) { 838 error = _do_lock_umtx32(td, m, id, 0); 839 /* Mutex locking is restarted if it is interrupted. */ 840 if (error == EINTR) 841 error = ERESTART; 842 } else { 843 getnanouptime(&ts); 844 timespecadd(&ts, timeout); 845 TIMESPEC_TO_TIMEVAL(&tv, timeout); 846 for (;;) { 847 error = _do_lock_umtx32(td, m, id, tvtohz(&tv)); 848 if (error != ETIMEDOUT) 849 break; 850 getnanouptime(&ts2); 851 if (timespeccmp(&ts2, &ts, >=)) { 852 error = ETIMEDOUT; 853 break; 854 } 855 ts3 = ts; 856 timespecsub(&ts3, &ts2); 857 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 858 } 859 /* Timed-locking is not restarted. */ 860 if (error == ERESTART) 861 error = EINTR; 862 } 863 return (error); 864} 865 866/* 867 * Unlock a umtx object. 868 */ 869static int 870do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id) 871{ 872 struct umtx_key key; 873 uint32_t owner; 874 uint32_t old; 875 int error; 876 int count; 877 878 /* 879 * Make sure we own this mtx. 880 */ 881 owner = fuword32(m); 882 if (owner == -1) 883 return (EFAULT); 884 885 if ((owner & ~UMUTEX_CONTESTED) != id) 886 return (EPERM); 887 888 /* This should be done in userland */ 889 if ((owner & UMUTEX_CONTESTED) == 0) { 890 old = casuword32(m, owner, UMUTEX_UNOWNED); 891 if (old == -1) 892 return (EFAULT); 893 if (old == owner) 894 return (0); 895 owner = old; 896 } 897 898 /* We should only ever be in here for contested locks */ 899 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE, 900 &key)) != 0) 901 return (error); 902 903 umtxq_lock(&key); 904 umtxq_busy(&key); 905 count = umtxq_count(&key); 906 umtxq_unlock(&key); 907 908 /* 909 * When unlocking the umtx, it must be marked as unowned if 910 * there is zero or one thread only waiting for it. 911 * Otherwise, it must be marked as contested. 912 */ 913 old = casuword32(m, owner, 914 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 915 umtxq_lock(&key); 916 umtxq_signal(&key,1); 917 umtxq_unbusy(&key); 918 umtxq_unlock(&key); 919 umtx_key_release(&key); 920 if (old == -1) 921 return (EFAULT); 922 if (old != owner) 923 return (EINVAL); 924 return (0); 925} 926#endif 927 928/* 929 * Fetch and compare value, sleep on the address if value is not changed. 930 */ 931static int 932do_wait(struct thread *td, void *addr, u_long id, 933 struct timespec *timeout, int compat32) 934{ 935 struct umtx_q *uq; 936 struct timespec ts, ts2, ts3; 937 struct timeval tv; 938 u_long tmp; 939 int error = 0; 940 941 uq = td->td_umtxq; 942 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, AUTO_SHARE, 943 &uq->uq_key)) != 0) 944 return (error); 945 946 umtxq_lock(&uq->uq_key); 947 umtxq_insert(uq); 948 umtxq_unlock(&uq->uq_key); 949 if (compat32 == 0) 950 tmp = fuword(addr); 951 else 952 tmp = fuword32(addr); 953 if (tmp != id) { 954 umtxq_lock(&uq->uq_key); 955 umtxq_remove(uq); 956 umtxq_unlock(&uq->uq_key); 957 } else if (timeout == NULL) { 958 umtxq_lock(&uq->uq_key); 959 error = umtxq_sleep(uq, "uwait", 0); 960 umtxq_remove(uq); 961 umtxq_unlock(&uq->uq_key); 962 } else { 963 getnanouptime(&ts); 964 timespecadd(&ts, timeout); 965 TIMESPEC_TO_TIMEVAL(&tv, timeout); 966 umtxq_lock(&uq->uq_key); 967 for (;;) { 968 error = umtxq_sleep(uq, "uwait", tvtohz(&tv)); 969 if (!(uq->uq_flags & UQF_UMTXQ)) 970 break; 971 if (error != ETIMEDOUT) 972 break; 973 umtxq_unlock(&uq->uq_key); 974 getnanouptime(&ts2); 975 if (timespeccmp(&ts2, &ts, >=)) { 976 error = ETIMEDOUT; 977 umtxq_lock(&uq->uq_key); 978 break; 979 } 980 ts3 = ts; 981 timespecsub(&ts3, &ts2); 982 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 983 umtxq_lock(&uq->uq_key); 984 } 985 umtxq_remove(uq); 986 umtxq_unlock(&uq->uq_key); 987 } 988 umtx_key_release(&uq->uq_key); 989 if (error == ERESTART) 990 error = EINTR; 991 return (error); 992} 993 994/* 995 * Wake up threads sleeping on the specified address. 996 */ 997int 998kern_umtx_wake(struct thread *td, void *uaddr, int n_wake) 999{ 1000 struct umtx_key key; 1001 int ret; 1002 1003 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, AUTO_SHARE, 1004 &key)) != 0) 1005 return (ret); 1006 umtxq_lock(&key); 1007 ret = umtxq_signal(&key, n_wake); 1008 umtxq_unlock(&key); 1009 umtx_key_release(&key); 1010 return (0); 1011} 1012 1013/* 1014 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1015 */ 1016static int 1017_do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo, 1018 int try) 1019{ 1020 struct umtx_q *uq; 1021 uint32_t owner, old, id; 1022 int error = 0; 1023 1024 id = td->td_tid; 1025 uq = td->td_umtxq; 1026 1027 /* 1028 * Care must be exercised when dealing with umtx structure. It 1029 * can fault on any access. 1030 */ 1031 for (;;) { 1032 /* 1033 * Try the uncontested case. This should be done in userland. 1034 */ 1035 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id); 1036 1037 /* The acquire succeeded. */ 1038 if (owner == UMUTEX_UNOWNED) 1039 return (0); 1040 1041 /* The address was invalid. */ 1042 if (owner == -1) 1043 return (EFAULT); 1044 1045 /* If no one owns it but it is contested try to acquire it. */ 1046 if (owner == UMUTEX_CONTESTED) { 1047 owner = casuword32(&m->m_owner, 1048 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1049 1050 if (owner == UMUTEX_CONTESTED) 1051 return (0); 1052 1053 /* The address was invalid. */ 1054 if (owner == -1) 1055 return (EFAULT); 1056 1057 /* If this failed the lock has changed, restart. */ 1058 continue; 1059 } 1060 1061 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 1062 (owner & ~UMUTEX_CONTESTED) == id) 1063 return (EDEADLK); 1064 1065 if (try != 0) 1066 return (EBUSY); 1067 1068 /* 1069 * If we caught a signal, we have retried and now 1070 * exit immediately. 1071 */ 1072 if (error != 0) 1073 return (error); 1074 1075 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1076 GET_SHARE(flags), &uq->uq_key)) != 0) 1077 return (error); 1078 1079 umtxq_lock(&uq->uq_key); 1080 umtxq_busy(&uq->uq_key); 1081 umtxq_insert(uq); 1082 umtxq_unbusy(&uq->uq_key); 1083 umtxq_unlock(&uq->uq_key); 1084 1085 /* 1086 * Set the contested bit so that a release in user space 1087 * knows to use the system call for unlock. If this fails 1088 * either some one else has acquired the lock or it has been 1089 * released. 1090 */ 1091 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED); 1092 1093 /* The address was invalid. */ 1094 if (old == -1) { 1095 umtxq_lock(&uq->uq_key); 1096 umtxq_remove(uq); 1097 umtxq_unlock(&uq->uq_key); 1098 umtx_key_release(&uq->uq_key); 1099 return (EFAULT); 1100 } 1101 1102 /* 1103 * We set the contested bit, sleep. Otherwise the lock changed 1104 * and we need to retry or we lost a race to the thread 1105 * unlocking the umtx. 1106 */ 1107 umtxq_lock(&uq->uq_key); 1108 if (old == owner) 1109 error = umtxq_sleep(uq, "umtxn", timo); 1110 umtxq_remove(uq); 1111 umtxq_unlock(&uq->uq_key); 1112 umtx_key_release(&uq->uq_key); 1113 } 1114 1115 return (0); 1116} 1117 1118/* 1119 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1120 */ 1121/* 1122 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1123 */ 1124static int 1125do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags) 1126{ 1127 struct umtx_key key; 1128 uint32_t owner, old, id; 1129 int error; 1130 int count; 1131 1132 id = td->td_tid; 1133 /* 1134 * Make sure we own this mtx. 1135 */ 1136 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 1137 if (owner == -1) 1138 return (EFAULT); 1139 1140 if ((owner & ~UMUTEX_CONTESTED) != id) 1141 return (EPERM); 1142 1143 /* This should be done in userland */ 1144 if ((owner & UMUTEX_CONTESTED) == 0) { 1145 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED); 1146 if (old == -1) 1147 return (EFAULT); 1148 if (old == owner) 1149 return (0); 1150 owner = old; 1151 } 1152 1153 /* We should only ever be in here for contested locks */ 1154 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1155 &key)) != 0) 1156 return (error); 1157 1158 umtxq_lock(&key); 1159 umtxq_busy(&key); 1160 count = umtxq_count(&key); 1161 umtxq_unlock(&key); 1162 1163 /* 1164 * When unlocking the umtx, it must be marked as unowned if 1165 * there is zero or one thread only waiting for it. 1166 * Otherwise, it must be marked as contested. 1167 */ 1168 old = casuword32(&m->m_owner, owner, 1169 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1170 umtxq_lock(&key); 1171 umtxq_signal(&key,1); 1172 umtxq_unbusy(&key); 1173 umtxq_unlock(&key); 1174 umtx_key_release(&key); 1175 if (old == -1) 1176 return (EFAULT); 1177 if (old != owner) 1178 return (EINVAL); 1179 return (0); 1180} 1181 1182static inline struct umtx_pi * 1183umtx_pi_alloc(int flags) 1184{ 1185 struct umtx_pi *pi; 1186 1187 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1188 TAILQ_INIT(&pi->pi_blocked); 1189 atomic_add_int(&umtx_pi_allocated, 1); 1190 return (pi); 1191} 1192 1193static inline void 1194umtx_pi_free(struct umtx_pi *pi) 1195{ 1196 uma_zfree(umtx_pi_zone, pi); 1197 atomic_add_int(&umtx_pi_allocated, -1); 1198} 1199 1200/* 1201 * Adjust the thread's position on a pi_state after its priority has been 1202 * changed. 1203 */ 1204static int 1205umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1206{ 1207 struct umtx_q *uq, *uq1, *uq2; 1208 struct thread *td1; 1209 1210 mtx_assert(&umtx_lock, MA_OWNED); 1211 if (pi == NULL) 1212 return (0); 1213 1214 uq = td->td_umtxq; 1215 1216 /* 1217 * Check if the thread needs to be moved on the blocked chain. 1218 * It needs to be moved if either its priority is lower than 1219 * the previous thread or higher than the next thread. 1220 */ 1221 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1222 uq2 = TAILQ_NEXT(uq, uq_lockq); 1223 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1224 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1225 /* 1226 * Remove thread from blocked chain and determine where 1227 * it should be moved to. 1228 */ 1229 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1230 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1231 td1 = uq1->uq_thread; 1232 MPASS(td1->td_proc->p_magic == P_MAGIC); 1233 if (UPRI(td1) > UPRI(td)) 1234 break; 1235 } 1236 1237 if (uq1 == NULL) 1238 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1239 else 1240 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1241 } 1242 return (1); 1243} 1244 1245/* 1246 * Propagate priority when a thread is blocked on POSIX 1247 * PI mutex. 1248 */ 1249static void 1250umtx_propagate_priority(struct thread *td) 1251{ 1252 struct umtx_q *uq; 1253 struct umtx_pi *pi; 1254 int pri; 1255 1256 mtx_assert(&umtx_lock, MA_OWNED); 1257 pri = UPRI(td); 1258 uq = td->td_umtxq; 1259 pi = uq->uq_pi_blocked; 1260 if (pi == NULL) 1261 return; 1262 1263 for (;;) { 1264 td = pi->pi_owner; 1265 if (td == NULL) 1266 return; 1267 1268 MPASS(td->td_proc != NULL); 1269 MPASS(td->td_proc->p_magic == P_MAGIC); 1270 1271 if (UPRI(td) <= pri) 1272 return; 1273 1274 thread_lock(td); 1275 sched_lend_user_prio(td, pri); 1276 thread_unlock(td); 1277 1278 /* 1279 * Pick up the lock that td is blocked on. 1280 */ 1281 uq = td->td_umtxq; 1282 pi = uq->uq_pi_blocked; 1283 /* Resort td on the list if needed. */ 1284 if (!umtx_pi_adjust_thread(pi, td)) 1285 break; 1286 } 1287} 1288 1289/* 1290 * Unpropagate priority for a PI mutex when a thread blocked on 1291 * it is interrupted by signal or resumed by others. 1292 */ 1293static void 1294umtx_unpropagate_priority(struct umtx_pi *pi) 1295{ 1296 struct umtx_q *uq, *uq_owner; 1297 struct umtx_pi *pi2; 1298 int pri, oldpri; 1299 1300 mtx_assert(&umtx_lock, MA_OWNED); 1301 1302 while (pi != NULL && pi->pi_owner != NULL) { 1303 pri = PRI_MAX; 1304 uq_owner = pi->pi_owner->td_umtxq; 1305 1306 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1307 uq = TAILQ_FIRST(&pi2->pi_blocked); 1308 if (uq != NULL) { 1309 if (pri > UPRI(uq->uq_thread)) 1310 pri = UPRI(uq->uq_thread); 1311 } 1312 } 1313 1314 if (pri > uq_owner->uq_inherited_pri) 1315 pri = uq_owner->uq_inherited_pri; 1316 thread_lock(pi->pi_owner); 1317 oldpri = pi->pi_owner->td_user_pri; 1318 sched_unlend_user_prio(pi->pi_owner, pri); 1319 thread_unlock(pi->pi_owner); 1320 umtx_pi_adjust_locked(pi->pi_owner, oldpri); 1321 pi = uq_owner->uq_pi_blocked; 1322 } 1323} 1324 1325/* 1326 * Insert a PI mutex into owned list. 1327 */ 1328static void 1329umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1330{ 1331 struct umtx_q *uq_owner; 1332 1333 uq_owner = owner->td_umtxq; 1334 mtx_assert(&umtx_lock, MA_OWNED); 1335 if (pi->pi_owner != NULL) 1336 panic("pi_ower != NULL"); 1337 pi->pi_owner = owner; 1338 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1339} 1340 1341/* 1342 * Claim ownership of a PI mutex. 1343 */ 1344static int 1345umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1346{ 1347 struct umtx_q *uq, *uq_owner; 1348 1349 uq_owner = owner->td_umtxq; 1350 mtx_lock_spin(&umtx_lock); 1351 if (pi->pi_owner == owner) { 1352 mtx_unlock_spin(&umtx_lock); 1353 return (0); 1354 } 1355 1356 if (pi->pi_owner != NULL) { 1357 /* 1358 * userland may have already messed the mutex, sigh. 1359 */ 1360 mtx_unlock_spin(&umtx_lock); 1361 return (EPERM); 1362 } 1363 umtx_pi_setowner(pi, owner); 1364 uq = TAILQ_FIRST(&pi->pi_blocked); 1365 if (uq != NULL) { 1366 int pri; 1367 1368 pri = UPRI(uq->uq_thread); 1369 thread_lock(owner); 1370 if (pri < UPRI(owner)) 1371 sched_lend_user_prio(owner, pri); 1372 thread_unlock(owner); 1373 } 1374 mtx_unlock_spin(&umtx_lock); 1375 return (0); 1376} 1377 1378static void 1379umtx_pi_adjust_locked(struct thread *td, u_char oldpri) 1380{ 1381 struct umtx_q *uq; 1382 struct umtx_pi *pi; 1383 1384 uq = td->td_umtxq; 1385 /* 1386 * Pick up the lock that td is blocked on. 1387 */ 1388 pi = uq->uq_pi_blocked; 1389 MPASS(pi != NULL); 1390 1391 /* Resort the turnstile on the list. */ 1392 if (!umtx_pi_adjust_thread(pi, td)) 1393 return; 1394 1395 /* 1396 * If our priority was lowered and we are at the head of the 1397 * turnstile, then propagate our new priority up the chain. 1398 */ 1399 if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri) 1400 umtx_propagate_priority(td); 1401} 1402 1403/* 1404 * Adjust a thread's order position in its blocked PI mutex, 1405 * this may result new priority propagating process. 1406 */ 1407void 1408umtx_pi_adjust(struct thread *td, u_char oldpri) 1409{ 1410 mtx_lock_spin(&umtx_lock); 1411 umtx_pi_adjust_locked(td, oldpri); 1412 mtx_unlock_spin(&umtx_lock); 1413} 1414 1415/* 1416 * Sleep on a PI mutex. 1417 */ 1418static int 1419umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, 1420 uint32_t owner, const char *wmesg, int timo) 1421{ 1422 struct umtxq_chain *uc; 1423 struct thread *td, *td1; 1424 struct umtx_q *uq1; 1425 int pri; 1426 int error = 0; 1427 1428 td = uq->uq_thread; 1429 KASSERT(td == curthread, ("inconsistent uq_thread")); 1430 uc = umtxq_getchain(&uq->uq_key); 1431 UMTXQ_LOCKED_ASSERT(uc); 1432 umtxq_insert(uq); 1433 if (pi->pi_owner == NULL) { 1434 /* XXX 1435 * Current, We only support process private PI-mutex, 1436 * non-contended PI-mutexes are locked in userland. 1437 * Process shared PI-mutex should always be initialized 1438 * by kernel and be registered in kernel, locking should 1439 * always be done by kernel to avoid security problems. 1440 * For process private PI-mutex, we can find owner 1441 * thread and boost its priority safely. 1442 */ 1443 PROC_LOCK(curproc); 1444 td1 = thread_find(curproc, owner); 1445 mtx_lock_spin(&umtx_lock); 1446 if (td1 != NULL && pi->pi_owner == NULL) { 1447 uq1 = td1->td_umtxq; 1448 umtx_pi_setowner(pi, td1); 1449 } 1450 PROC_UNLOCK(curproc); 1451 } else { 1452 mtx_lock_spin(&umtx_lock); 1453 } 1454 1455 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1456 pri = UPRI(uq1->uq_thread); 1457 if (pri > UPRI(td)) 1458 break; 1459 } 1460 1461 if (uq1 != NULL) 1462 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1463 else 1464 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1465 1466 uq->uq_pi_blocked = pi; 1467 thread_lock(td); 1468 td->td_flags |= TDF_UPIBLOCKED; 1469 thread_unlock(td); 1470 mtx_unlock_spin(&umtx_lock); 1471 umtxq_unlock(&uq->uq_key); 1472 1473 mtx_lock_spin(&umtx_lock); 1474 umtx_propagate_priority(td); 1475 mtx_unlock_spin(&umtx_lock); 1476 1477 umtxq_lock(&uq->uq_key); 1478 if (uq->uq_flags & UQF_UMTXQ) { 1479 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo); 1480 if (error == EWOULDBLOCK) 1481 error = ETIMEDOUT; 1482 if (uq->uq_flags & UQF_UMTXQ) { 1483 umtxq_busy(&uq->uq_key); 1484 umtxq_remove(uq); 1485 umtxq_unbusy(&uq->uq_key); 1486 } 1487 } 1488 umtxq_unlock(&uq->uq_key); 1489 1490 mtx_lock_spin(&umtx_lock); 1491 uq->uq_pi_blocked = NULL; 1492 thread_lock(td); 1493 td->td_flags &= ~TDF_UPIBLOCKED; 1494 thread_unlock(td); 1495 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1496 umtx_unpropagate_priority(pi); 1497 mtx_unlock_spin(&umtx_lock); 1498 1499 umtxq_lock(&uq->uq_key); 1500 1501 return (error); 1502} 1503 1504/* 1505 * Add reference count for a PI mutex. 1506 */ 1507static void 1508umtx_pi_ref(struct umtx_pi *pi) 1509{ 1510 struct umtxq_chain *uc; 1511 1512 uc = umtxq_getchain(&pi->pi_key); 1513 UMTXQ_LOCKED_ASSERT(uc); 1514 pi->pi_refcount++; 1515} 1516 1517/* 1518 * Decrease reference count for a PI mutex, if the counter 1519 * is decreased to zero, its memory space is freed. 1520 */ 1521static void 1522umtx_pi_unref(struct umtx_pi *pi) 1523{ 1524 struct umtxq_chain *uc; 1525 int free = 0; 1526 1527 uc = umtxq_getchain(&pi->pi_key); 1528 UMTXQ_LOCKED_ASSERT(uc); 1529 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1530 if (--pi->pi_refcount == 0) { 1531 mtx_lock_spin(&umtx_lock); 1532 if (pi->pi_owner != NULL) { 1533 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, 1534 pi, pi_link); 1535 pi->pi_owner = NULL; 1536 } 1537 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1538 ("blocked queue not empty")); 1539 mtx_unlock_spin(&umtx_lock); 1540 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1541 free = 1; 1542 } 1543 if (free) 1544 umtx_pi_free(pi); 1545} 1546 1547/* 1548 * Find a PI mutex in hash table. 1549 */ 1550static struct umtx_pi * 1551umtx_pi_lookup(struct umtx_key *key) 1552{ 1553 struct umtxq_chain *uc; 1554 struct umtx_pi *pi; 1555 1556 uc = umtxq_getchain(key); 1557 UMTXQ_LOCKED_ASSERT(uc); 1558 1559 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1560 if (umtx_key_match(&pi->pi_key, key)) { 1561 return (pi); 1562 } 1563 } 1564 return (NULL); 1565} 1566 1567/* 1568 * Insert a PI mutex into hash table. 1569 */ 1570static inline void 1571umtx_pi_insert(struct umtx_pi *pi) 1572{ 1573 struct umtxq_chain *uc; 1574 1575 uc = umtxq_getchain(&pi->pi_key); 1576 UMTXQ_LOCKED_ASSERT(uc); 1577 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1578} 1579 1580/* 1581 * Lock a PI mutex. 1582 */ 1583static int 1584_do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo, 1585 int try) 1586{ 1587 struct umtx_q *uq; 1588 struct umtx_pi *pi, *new_pi; 1589 uint32_t id, owner, old; 1590 int error; 1591 1592 id = td->td_tid; 1593 uq = td->td_umtxq; 1594 1595 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 1596 &uq->uq_key)) != 0) 1597 return (error); 1598 umtxq_lock(&uq->uq_key); 1599 pi = umtx_pi_lookup(&uq->uq_key); 1600 if (pi == NULL) { 1601 new_pi = umtx_pi_alloc(M_NOWAIT); 1602 if (new_pi == NULL) { 1603 umtxq_unlock(&uq->uq_key); 1604 new_pi = umtx_pi_alloc(M_WAITOK); 1605 new_pi->pi_key = uq->uq_key; 1606 umtxq_lock(&uq->uq_key); 1607 pi = umtx_pi_lookup(&uq->uq_key); 1608 if (pi != NULL) { 1609 umtx_pi_free(new_pi); 1610 new_pi = NULL; 1611 } 1612 } 1613 if (new_pi != NULL) { 1614 new_pi->pi_key = uq->uq_key; 1615 umtx_pi_insert(new_pi); 1616 pi = new_pi; 1617 } 1618 } 1619 umtx_pi_ref(pi); 1620 umtxq_unlock(&uq->uq_key); 1621 1622 /* 1623 * Care must be exercised when dealing with umtx structure. It 1624 * can fault on any access. 1625 */ 1626 for (;;) { 1627 /* 1628 * Try the uncontested case. This should be done in userland. 1629 */ 1630 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id); 1631 1632 /* The acquire succeeded. */ 1633 if (owner == UMUTEX_UNOWNED) { 1634 error = 0; 1635 break; 1636 } 1637 1638 /* The address was invalid. */ 1639 if (owner == -1) { 1640 error = EFAULT; 1641 break; 1642 } 1643 1644 /* If no one owns it but it is contested try to acquire it. */ 1645 if (owner == UMUTEX_CONTESTED) { 1646 owner = casuword32(&m->m_owner, 1647 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1648 1649 if (owner == UMUTEX_CONTESTED) { 1650 umtxq_lock(&uq->uq_key); 1651 error = umtx_pi_claim(pi, td); 1652 umtxq_unlock(&uq->uq_key); 1653 break; 1654 } 1655 1656 /* The address was invalid. */ 1657 if (owner == -1) { 1658 error = EFAULT; 1659 break; 1660 } 1661 1662 /* If this failed the lock has changed, restart. */ 1663 continue; 1664 } 1665 1666 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 1667 (owner & ~UMUTEX_CONTESTED) == id) { 1668 error = EDEADLK; 1669 break; 1670 } 1671 1672 if (try != 0) { 1673 error = EBUSY; 1674 break; 1675 } 1676 1677 /* 1678 * If we caught a signal, we have retried and now 1679 * exit immediately. 1680 */ 1681 if (error != 0) 1682 break; 1683 1684 umtxq_lock(&uq->uq_key); 1685 umtxq_busy(&uq->uq_key); 1686 umtxq_unlock(&uq->uq_key); 1687 1688 /* 1689 * Set the contested bit so that a release in user space 1690 * knows to use the system call for unlock. If this fails 1691 * either some one else has acquired the lock or it has been 1692 * released. 1693 */ 1694 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED); 1695 1696 /* The address was invalid. */ 1697 if (old == -1) { 1698 umtxq_lock(&uq->uq_key); 1699 umtxq_unbusy(&uq->uq_key); 1700 umtxq_unlock(&uq->uq_key); 1701 error = EFAULT; 1702 break; 1703 } 1704 1705 umtxq_lock(&uq->uq_key); 1706 umtxq_unbusy(&uq->uq_key); 1707 /* 1708 * We set the contested bit, sleep. Otherwise the lock changed 1709 * and we need to retry or we lost a race to the thread 1710 * unlocking the umtx. 1711 */ 1712 if (old == owner) 1713 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 1714 "umtxpi", timo); 1715 umtxq_unlock(&uq->uq_key); 1716 } 1717 1718 umtxq_lock(&uq->uq_key); 1719 umtx_pi_unref(pi); 1720 umtxq_unlock(&uq->uq_key); 1721 1722 umtx_key_release(&uq->uq_key); 1723 return (error); 1724} 1725 1726/* 1727 * Unlock a PI mutex. 1728 */ 1729static int 1730do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags) 1731{ 1732 struct umtx_key key; 1733 struct umtx_q *uq_first, *uq_first2, *uq_me; 1734 struct umtx_pi *pi, *pi2; 1735 uint32_t owner, old, id; 1736 int error; 1737 int count; 1738 int pri; 1739 1740 id = td->td_tid; 1741 /* 1742 * Make sure we own this mtx. 1743 */ 1744 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 1745 if (owner == -1) 1746 return (EFAULT); 1747 1748 if ((owner & ~UMUTEX_CONTESTED) != id) 1749 return (EPERM); 1750 1751 /* This should be done in userland */ 1752 if ((owner & UMUTEX_CONTESTED) == 0) { 1753 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED); 1754 if (old == -1) 1755 return (EFAULT); 1756 if (old == owner) 1757 return (0); 1758 owner = old; 1759 } 1760 1761 /* We should only ever be in here for contested locks */ 1762 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 1763 &key)) != 0) 1764 return (error); 1765 1766 umtxq_lock(&key); 1767 umtxq_busy(&key); 1768 count = umtxq_count_pi(&key, &uq_first); 1769 if (uq_first != NULL) { 1770 pi = uq_first->uq_pi_blocked; 1771 if (pi->pi_owner != curthread) { 1772 umtxq_unbusy(&key); 1773 umtxq_unlock(&key); 1774 /* userland messed the mutex */ 1775 return (EPERM); 1776 } 1777 uq_me = curthread->td_umtxq; 1778 mtx_lock_spin(&umtx_lock); 1779 pi->pi_owner = NULL; 1780 TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link); 1781 uq_first = TAILQ_FIRST(&pi->pi_blocked); 1782 pri = PRI_MAX; 1783 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 1784 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 1785 if (uq_first2 != NULL) { 1786 if (pri > UPRI(uq_first2->uq_thread)) 1787 pri = UPRI(uq_first2->uq_thread); 1788 } 1789 } 1790 thread_lock(curthread); 1791 sched_unlend_user_prio(curthread, pri); 1792 thread_unlock(curthread); 1793 mtx_unlock_spin(&umtx_lock); 1794 } 1795 umtxq_unlock(&key); 1796 1797 /* 1798 * When unlocking the umtx, it must be marked as unowned if 1799 * there is zero or one thread only waiting for it. 1800 * Otherwise, it must be marked as contested. 1801 */ 1802 old = casuword32(&m->m_owner, owner, 1803 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1804 1805 umtxq_lock(&key); 1806 if (uq_first != NULL) 1807 umtxq_signal_thread(uq_first); 1808 umtxq_unbusy(&key); 1809 umtxq_unlock(&key); 1810 umtx_key_release(&key); 1811 if (old == -1) 1812 return (EFAULT); 1813 if (old != owner) 1814 return (EINVAL); 1815 return (0); 1816} 1817 1818/* 1819 * Lock a PP mutex. 1820 */ 1821static int 1822_do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo, 1823 int try) 1824{ 1825 struct umtx_q *uq, *uq2; 1826 struct umtx_pi *pi; 1827 uint32_t ceiling; 1828 uint32_t owner, id; 1829 int error, pri, old_inherited_pri, su; 1830 1831 id = td->td_tid; 1832 uq = td->td_umtxq; 1833 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 1834 &uq->uq_key)) != 0) 1835 return (error); 1836 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 1837 for (;;) { 1838 old_inherited_pri = uq->uq_inherited_pri; 1839 umtxq_lock(&uq->uq_key); 1840 umtxq_busy(&uq->uq_key); 1841 umtxq_unlock(&uq->uq_key); 1842 1843 ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]); 1844 if (ceiling > RTP_PRIO_MAX) { 1845 error = EINVAL; 1846 goto out; 1847 } 1848 1849 mtx_lock_spin(&umtx_lock); 1850 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 1851 mtx_unlock_spin(&umtx_lock); 1852 error = EINVAL; 1853 goto out; 1854 } 1855 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 1856 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 1857 thread_lock(td); 1858 if (uq->uq_inherited_pri < UPRI(td)) 1859 sched_lend_user_prio(td, uq->uq_inherited_pri); 1860 thread_unlock(td); 1861 } 1862 mtx_unlock_spin(&umtx_lock); 1863 1864 owner = casuword32(&m->m_owner, 1865 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1866 1867 if (owner == UMUTEX_CONTESTED) { 1868 error = 0; 1869 break; 1870 } 1871 1872 /* The address was invalid. */ 1873 if (owner == -1) { 1874 error = EFAULT; 1875 break; 1876 } 1877 1878 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 1879 (owner & ~UMUTEX_CONTESTED) == id) { 1880 error = EDEADLK; 1881 break; 1882 } 1883 1884 if (try != 0) { 1885 error = EBUSY; 1886 break; 1887 } 1888 1889 /* 1890 * If we caught a signal, we have retried and now 1891 * exit immediately. 1892 */ 1893 if (error != 0) 1894 break; 1895 1896 umtxq_lock(&uq->uq_key); 1897 umtxq_insert(uq); 1898 umtxq_unbusy(&uq->uq_key); 1899 error = umtxq_sleep(uq, "umtxpp", timo); 1900 umtxq_remove(uq); 1901 umtxq_unlock(&uq->uq_key); 1902 1903 mtx_lock_spin(&umtx_lock); 1904 uq->uq_inherited_pri = old_inherited_pri; 1905 pri = PRI_MAX; 1906 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 1907 uq2 = TAILQ_FIRST(&pi->pi_blocked); 1908 if (uq2 != NULL) { 1909 if (pri > UPRI(uq2->uq_thread)) 1910 pri = UPRI(uq2->uq_thread); 1911 } 1912 } 1913 if (pri > uq->uq_inherited_pri) 1914 pri = uq->uq_inherited_pri; 1915 thread_lock(td); 1916 sched_unlend_user_prio(td, pri); 1917 thread_unlock(td); 1918 mtx_unlock_spin(&umtx_lock); 1919 } 1920 1921 if (error != 0) { 1922 mtx_lock_spin(&umtx_lock); 1923 uq->uq_inherited_pri = old_inherited_pri; 1924 pri = PRI_MAX; 1925 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 1926 uq2 = TAILQ_FIRST(&pi->pi_blocked); 1927 if (uq2 != NULL) { 1928 if (pri > UPRI(uq2->uq_thread)) 1929 pri = UPRI(uq2->uq_thread); 1930 } 1931 } 1932 if (pri > uq->uq_inherited_pri) 1933 pri = uq->uq_inherited_pri; 1934 thread_lock(td); 1935 sched_unlend_user_prio(td, pri); 1936 thread_unlock(td); 1937 mtx_unlock_spin(&umtx_lock); 1938 } 1939 1940out: 1941 umtxq_lock(&uq->uq_key); 1942 umtxq_unbusy(&uq->uq_key); 1943 umtxq_unlock(&uq->uq_key); 1944 umtx_key_release(&uq->uq_key); 1945 return (error); 1946} 1947 1948/* 1949 * Unlock a PP mutex. 1950 */ 1951static int 1952do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags) 1953{ 1954 struct umtx_key key; 1955 struct umtx_q *uq, *uq2; 1956 struct umtx_pi *pi; 1957 uint32_t owner, id; 1958 uint32_t rceiling; 1959 int error, pri, new_inherited_pri, su; 1960 1961 id = td->td_tid; 1962 uq = td->td_umtxq; 1963 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 1964 1965 /* 1966 * Make sure we own this mtx. 1967 */ 1968 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 1969 if (owner == -1) 1970 return (EFAULT); 1971 1972 if ((owner & ~UMUTEX_CONTESTED) != id) 1973 return (EPERM); 1974 1975 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 1976 if (error != 0) 1977 return (error); 1978 1979 if (rceiling == -1) 1980 new_inherited_pri = PRI_MAX; 1981 else { 1982 rceiling = RTP_PRIO_MAX - rceiling; 1983 if (rceiling > RTP_PRIO_MAX) 1984 return (EINVAL); 1985 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 1986 } 1987 1988 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 1989 &key)) != 0) 1990 return (error); 1991 umtxq_lock(&key); 1992 umtxq_busy(&key); 1993 umtxq_unlock(&key); 1994 /* 1995 * For priority protected mutex, always set unlocked state 1996 * to UMUTEX_CONTESTED, so that userland always enters kernel 1997 * to lock the mutex, it is necessary because thread priority 1998 * has to be adjusted for such mutex. 1999 */ 2000 error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner), 2001 UMUTEX_CONTESTED); 2002 2003 umtxq_lock(&key); 2004 if (error == 0) 2005 umtxq_signal(&key, 1); 2006 umtxq_unbusy(&key); 2007 umtxq_unlock(&key); 2008 2009 if (error == -1) 2010 error = EFAULT; 2011 else { 2012 mtx_lock_spin(&umtx_lock); 2013 if (su != 0) 2014 uq->uq_inherited_pri = new_inherited_pri; 2015 pri = PRI_MAX; 2016 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2017 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2018 if (uq2 != NULL) { 2019 if (pri > UPRI(uq2->uq_thread)) 2020 pri = UPRI(uq2->uq_thread); 2021 } 2022 } 2023 if (pri > uq->uq_inherited_pri) 2024 pri = uq->uq_inherited_pri; 2025 thread_lock(td); 2026 sched_unlend_user_prio(td, pri); 2027 thread_unlock(td); 2028 mtx_unlock_spin(&umtx_lock); 2029 } 2030 umtx_key_release(&key); 2031 return (error); 2032} 2033 2034static int 2035do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2036 uint32_t *old_ceiling) 2037{ 2038 struct umtx_q *uq; 2039 uint32_t save_ceiling; 2040 uint32_t owner, id; 2041 uint32_t flags; 2042 int error; 2043 2044 flags = fuword32(&m->m_flags); 2045 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2046 return (EINVAL); 2047 if (ceiling > RTP_PRIO_MAX) 2048 return (EINVAL); 2049 id = td->td_tid; 2050 uq = td->td_umtxq; 2051 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 2052 &uq->uq_key)) != 0) 2053 return (error); 2054 for (;;) { 2055 umtxq_lock(&uq->uq_key); 2056 umtxq_busy(&uq->uq_key); 2057 umtxq_unlock(&uq->uq_key); 2058 2059 save_ceiling = fuword32(&m->m_ceilings[0]); 2060 2061 owner = casuword32(&m->m_owner, 2062 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 2063 2064 if (owner == UMUTEX_CONTESTED) { 2065 suword32(&m->m_ceilings[0], ceiling); 2066 suword32(__DEVOLATILE(uint32_t *, &m->m_owner), 2067 UMUTEX_CONTESTED); 2068 error = 0; 2069 break; 2070 } 2071 2072 /* The address was invalid. */ 2073 if (owner == -1) { 2074 error = EFAULT; 2075 break; 2076 } 2077 2078 if ((owner & ~UMUTEX_CONTESTED) == id) { 2079 suword32(&m->m_ceilings[0], ceiling); 2080 error = 0; 2081 break; 2082 } 2083 2084 /* 2085 * If we caught a signal, we have retried and now 2086 * exit immediately. 2087 */ 2088 if (error != 0) 2089 break; 2090 2091 /* 2092 * We set the contested bit, sleep. Otherwise the lock changed 2093 * and we need to retry or we lost a race to the thread 2094 * unlocking the umtx. 2095 */ 2096 umtxq_lock(&uq->uq_key); 2097 umtxq_insert(uq); 2098 umtxq_unbusy(&uq->uq_key); 2099 error = umtxq_sleep(uq, "umtxpp", 0); 2100 umtxq_remove(uq); 2101 umtxq_unlock(&uq->uq_key); 2102 } 2103 umtxq_lock(&uq->uq_key); 2104 if (error == 0) 2105 umtxq_signal(&uq->uq_key, INT_MAX); 2106 umtxq_unbusy(&uq->uq_key); 2107 umtxq_unlock(&uq->uq_key); 2108 umtx_key_release(&uq->uq_key); 2109 if (error == 0 && old_ceiling != NULL) 2110 suword32(old_ceiling, save_ceiling); 2111 return (error); 2112} 2113 2114static int 2115_do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo, 2116 int try) 2117{ 2118 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2119 case 0: 2120 return (_do_lock_normal(td, m, flags, timo, try)); 2121 case UMUTEX_PRIO_INHERIT: 2122 return (_do_lock_pi(td, m, flags, timo, try)); 2123 case UMUTEX_PRIO_PROTECT: 2124 return (_do_lock_pp(td, m, flags, timo, try)); 2125 } 2126 return (EINVAL); 2127} 2128 2129/* 2130 * Lock a userland POSIX mutex. 2131 */ 2132static int 2133do_lock_umutex(struct thread *td, struct umutex *m, 2134 struct timespec *timeout, int try) 2135{ 2136 struct timespec ts, ts2, ts3; 2137 struct timeval tv; 2138 uint32_t flags; 2139 int error; 2140 2141 flags = fuword32(&m->m_flags); 2142 if (flags == -1) 2143 return (EFAULT); 2144 2145 if (timeout == NULL) { 2146 error = _do_lock_umutex(td, m, flags, 0, try); 2147 /* Mutex locking is restarted if it is interrupted. */ 2148 if (error == EINTR) 2149 error = ERESTART; 2150 } else { 2151 getnanouptime(&ts); 2152 timespecadd(&ts, timeout); 2153 TIMESPEC_TO_TIMEVAL(&tv, timeout); 2154 for (;;) { 2155 error = _do_lock_umutex(td, m, flags, tvtohz(&tv), try); 2156 if (error != ETIMEDOUT) 2157 break; 2158 getnanouptime(&ts2); 2159 if (timespeccmp(&ts2, &ts, >=)) { 2160 error = ETIMEDOUT; 2161 break; 2162 } 2163 ts3 = ts; 2164 timespecsub(&ts3, &ts2); 2165 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 2166 } 2167 /* Timed-locking is not restarted. */ 2168 if (error == ERESTART) 2169 error = EINTR; 2170 } 2171 return (error); 2172} 2173 2174/* 2175 * Unlock a userland POSIX mutex. 2176 */ 2177static int 2178do_unlock_umutex(struct thread *td, struct umutex *m) 2179{ 2180 uint32_t flags; 2181 2182 flags = fuword32(&m->m_flags); 2183 if (flags == -1) 2184 return (EFAULT); 2185 2186 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2187 case 0: 2188 return (do_unlock_normal(td, m, flags)); 2189 case UMUTEX_PRIO_INHERIT: 2190 return (do_unlock_pi(td, m, flags)); 2191 case UMUTEX_PRIO_PROTECT: 2192 return (do_unlock_pp(td, m, flags)); 2193 } 2194 2195 return (EINVAL); 2196} 2197 2198static int 2199do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2200 struct timespec *timeout, u_long wflags) 2201{ 2202 struct umtx_q *uq; 2203 struct timeval tv; 2204 struct timespec cts, ets, tts; 2205 uint32_t flags; 2206 int error; 2207 2208 uq = td->td_umtxq; 2209 flags = fuword32(&cv->c_flags); 2210 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2211 if (error != 0) 2212 return (error); 2213 umtxq_lock(&uq->uq_key); 2214 umtxq_busy(&uq->uq_key); 2215 umtxq_insert(uq); 2216 umtxq_unlock(&uq->uq_key); 2217 2218 /* 2219 * The magic thing is we should set c_has_waiters to 1 before 2220 * releasing user mutex. 2221 */ 2222 suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1); 2223 2224 umtxq_lock(&uq->uq_key); 2225 umtxq_unbusy(&uq->uq_key); 2226 umtxq_unlock(&uq->uq_key); 2227 2228 error = do_unlock_umutex(td, m); 2229 2230 umtxq_lock(&uq->uq_key); 2231 if (error == 0) { 2232 if ((wflags & UMTX_CHECK_UNPARKING) && 2233 (td->td_pflags & TDP_WAKEUP)) { 2234 td->td_pflags &= ~TDP_WAKEUP; 2235 error = EINTR; 2236 } else if (timeout == NULL) { 2237 error = umtxq_sleep(uq, "ucond", 0); 2238 } else { 2239 getnanouptime(&ets); 2240 timespecadd(&ets, timeout); 2241 TIMESPEC_TO_TIMEVAL(&tv, timeout); 2242 for (;;) { 2243 error = umtxq_sleep(uq, "ucond", tvtohz(&tv)); 2244 if (error != ETIMEDOUT) 2245 break; 2246 getnanouptime(&cts); 2247 if (timespeccmp(&cts, &ets, >=)) { 2248 error = ETIMEDOUT; 2249 break; 2250 } 2251 tts = ets; 2252 timespecsub(&tts, &cts); 2253 TIMESPEC_TO_TIMEVAL(&tv, &tts); 2254 } 2255 } 2256 } 2257 2258 if (error != 0) { 2259 if ((uq->uq_flags & UQF_UMTXQ) == 0) { 2260 /* 2261 * If we concurrently got do_cv_signal()d 2262 * and we got an error or UNIX signals or a timeout, 2263 * then, perform another umtxq_signal to avoid 2264 * consuming the wakeup. This may cause supurious 2265 * wakeup for another thread which was just queued, 2266 * but SUSV3 explicitly allows supurious wakeup to 2267 * occur, and indeed a kernel based implementation 2268 * can not avoid it. 2269 */ 2270 if (!umtxq_signal(&uq->uq_key, 1)) 2271 error = 0; 2272 } 2273 if (error == ERESTART) 2274 error = EINTR; 2275 } 2276 umtxq_remove(uq); 2277 umtxq_unlock(&uq->uq_key); 2278 umtx_key_release(&uq->uq_key); 2279 return (error); 2280} 2281 2282/* 2283 * Signal a userland condition variable. 2284 */ 2285static int 2286do_cv_signal(struct thread *td, struct ucond *cv) 2287{ 2288 struct umtx_key key; 2289 int error, cnt, nwake; 2290 uint32_t flags; 2291 2292 flags = fuword32(&cv->c_flags); 2293 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2294 return (error); 2295 umtxq_lock(&key); 2296 umtxq_busy(&key); 2297 cnt = umtxq_count(&key); 2298 nwake = umtxq_signal(&key, 1); 2299 if (cnt <= nwake) { 2300 umtxq_unlock(&key); 2301 error = suword32( 2302 __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0); 2303 umtxq_lock(&key); 2304 } 2305 umtxq_unbusy(&key); 2306 umtxq_unlock(&key); 2307 umtx_key_release(&key); 2308 return (error); 2309} 2310 2311static int 2312do_cv_broadcast(struct thread *td, struct ucond *cv) 2313{ 2314 struct umtx_key key; 2315 int error; 2316 uint32_t flags; 2317 2318 flags = fuword32(&cv->c_flags); 2319 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2320 return (error); 2321 2322 umtxq_lock(&key); 2323 umtxq_busy(&key); 2324 umtxq_signal(&key, INT_MAX); 2325 umtxq_unlock(&key); 2326 2327 error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0); 2328 2329 umtxq_lock(&key); 2330 umtxq_unbusy(&key); 2331 umtxq_unlock(&key); 2332 2333 umtx_key_release(&key); 2334 return (error); 2335} 2336 2337int 2338_umtx_lock(struct thread *td, struct _umtx_lock_args *uap) 2339 /* struct umtx *umtx */ 2340{ 2341 return _do_lock_umtx(td, uap->umtx, td->td_tid, 0); 2342} 2343 2344int 2345_umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap) 2346 /* struct umtx *umtx */ 2347{ 2348 return do_unlock_umtx(td, uap->umtx, td->td_tid); 2349} 2350 2351static int 2352__umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap) 2353{ 2354 struct timespec *ts, timeout; 2355 int error; 2356 2357 /* Allow a null timespec (wait forever). */ 2358 if (uap->uaddr2 == NULL) 2359 ts = NULL; 2360 else { 2361 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 2362 if (error != 0) 2363 return (error); 2364 if (timeout.tv_nsec >= 1000000000 || 2365 timeout.tv_nsec < 0) { 2366 return (EINVAL); 2367 } 2368 ts = &timeout; 2369 } 2370 return (do_lock_umtx(td, uap->obj, uap->val, ts)); 2371} 2372 2373static int 2374__umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap) 2375{ 2376 return (do_unlock_umtx(td, uap->obj, uap->val)); 2377} 2378 2379static int 2380__umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) 2381{ 2382 struct timespec *ts, timeout; 2383 int error; 2384 2385 if (uap->uaddr2 == NULL) 2386 ts = NULL; 2387 else { 2388 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 2389 if (error != 0) 2390 return (error); 2391 if (timeout.tv_nsec >= 1000000000 || 2392 timeout.tv_nsec < 0) 2393 return (EINVAL); 2394 ts = &timeout; 2395 } 2396 return do_wait(td, uap->obj, uap->val, ts, 0); 2397} 2398 2399static int 2400__umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) 2401{ 2402 struct timespec *ts, timeout; 2403 int error; 2404 2405 if (uap->uaddr2 == NULL) 2406 ts = NULL; 2407 else { 2408 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 2409 if (error != 0) 2410 return (error); 2411 if (timeout.tv_nsec >= 1000000000 || 2412 timeout.tv_nsec < 0) 2413 return (EINVAL); 2414 ts = &timeout; 2415 } 2416 return do_wait(td, uap->obj, uap->val, ts, 1); 2417} 2418 2419static int 2420__umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) 2421{ 2422 return (kern_umtx_wake(td, uap->obj, uap->val)); 2423} 2424 2425static int 2426__umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) 2427{ 2428 struct timespec *ts, timeout; 2429 int error; 2430 2431 /* Allow a null timespec (wait forever). */ 2432 if (uap->uaddr2 == NULL) 2433 ts = NULL; 2434 else { 2435 error = copyin(uap->uaddr2, &timeout, 2436 sizeof(timeout)); 2437 if (error != 0) 2438 return (error); 2439 if (timeout.tv_nsec >= 1000000000 || 2440 timeout.tv_nsec < 0) { 2441 return (EINVAL); 2442 } 2443 ts = &timeout; 2444 } 2445 return do_lock_umutex(td, uap->obj, ts, 0); 2446} 2447 2448static int 2449__umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) 2450{ 2451 return do_lock_umutex(td, uap->obj, NULL, 1); 2452} 2453 2454static int 2455__umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) 2456{ 2457 return do_unlock_umutex(td, uap->obj); 2458} 2459 2460static int 2461__umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) 2462{ 2463 return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1); 2464} 2465 2466static int 2467__umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) 2468{ 2469 struct timespec *ts, timeout; 2470 int error; 2471 2472 /* Allow a null timespec (wait forever). */ 2473 if (uap->uaddr2 == NULL) 2474 ts = NULL; 2475 else { 2476 error = copyin(uap->uaddr2, &timeout, 2477 sizeof(timeout)); 2478 if (error != 0) 2479 return (error); 2480 if (timeout.tv_nsec >= 1000000000 || 2481 timeout.tv_nsec < 0) { 2482 return (EINVAL); 2483 } 2484 ts = &timeout; 2485 } 2486 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 2487} 2488 2489static int 2490__umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) 2491{ 2492 return do_cv_signal(td, uap->obj); 2493} 2494 2495static int 2496__umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) 2497{ 2498 return do_cv_broadcast(td, uap->obj); 2499} 2500 2501typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); 2502 2503static _umtx_op_func op_table[] = { 2504 __umtx_op_lock_umtx, /* UMTX_OP_LOCK */ 2505 __umtx_op_unlock_umtx, /* UMTX_OP_UNLOCK */ 2506 __umtx_op_wait, /* UMTX_OP_WAIT */ 2507 __umtx_op_wake, /* UMTX_OP_WAKE */ 2508 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_TRYLOCK */ 2509 __umtx_op_lock_umutex, /* UMTX_OP_MUTEX_LOCK */ 2510 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ 2511 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ 2512 __umtx_op_cv_wait, /* UMTX_OP_CV_WAIT*/ 2513 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ 2514 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */ 2515 __umtx_op_wait_uint /* UMTX_OP_WAIT_UINT */ 2516}; 2517 2518int 2519_umtx_op(struct thread *td, struct _umtx_op_args *uap) 2520{ 2521 if ((unsigned)uap->op < UMTX_OP_MAX) 2522 return (*op_table[uap->op])(td, uap); 2523 return (EINVAL); 2524} 2525 2526#ifdef COMPAT_IA32 2527int 2528freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap) 2529 /* struct umtx *umtx */ 2530{ 2531 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL)); 2532} 2533 2534int 2535freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap) 2536 /* struct umtx *umtx */ 2537{ 2538 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid)); 2539} 2540 2541struct timespec32 { 2542 u_int32_t tv_sec; 2543 u_int32_t tv_nsec; 2544}; 2545 2546static inline int 2547copyin_timeout32(void *addr, struct timespec *tsp) 2548{ 2549 struct timespec32 ts32; 2550 int error; 2551 2552 error = copyin(addr, &ts32, sizeof(struct timespec32)); 2553 if (error == 0) { 2554 tsp->tv_sec = ts32.tv_sec; 2555 tsp->tv_nsec = ts32.tv_nsec; 2556 } 2557 return (error); 2558} 2559 2560static int 2561__umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap) 2562{ 2563 struct timespec *ts, timeout; 2564 int error; 2565 2566 /* Allow a null timespec (wait forever). */ 2567 if (uap->uaddr2 == NULL) 2568 ts = NULL; 2569 else { 2570 error = copyin_timeout32(uap->uaddr2, &timeout); 2571 if (error != 0) 2572 return (error); 2573 if (timeout.tv_nsec >= 1000000000 || 2574 timeout.tv_nsec < 0) { 2575 return (EINVAL); 2576 } 2577 ts = &timeout; 2578 } 2579 return (do_lock_umtx32(td, uap->obj, uap->val, ts)); 2580} 2581 2582static int 2583__umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap) 2584{ 2585 return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val)); 2586} 2587 2588static int 2589__umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 2590{ 2591 struct timespec *ts, timeout; 2592 int error; 2593 2594 if (uap->uaddr2 == NULL) 2595 ts = NULL; 2596 else { 2597 error = copyin_timeout32(uap->uaddr2, &timeout); 2598 if (error != 0) 2599 return (error); 2600 if (timeout.tv_nsec >= 1000000000 || 2601 timeout.tv_nsec < 0) 2602 return (EINVAL); 2603 ts = &timeout; 2604 } 2605 return do_wait(td, uap->obj, uap->val, ts, 1); 2606} 2607 2608static int 2609__umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 2610{ 2611 struct timespec *ts, timeout; 2612 int error; 2613 2614 /* Allow a null timespec (wait forever). */ 2615 if (uap->uaddr2 == NULL) 2616 ts = NULL; 2617 else { 2618 error = copyin_timeout32(uap->uaddr2, &timeout); 2619 if (error != 0) 2620 return (error); 2621 if (timeout.tv_nsec >= 1000000000 || 2622 timeout.tv_nsec < 0) 2623 return (EINVAL); 2624 ts = &timeout; 2625 } 2626 return do_lock_umutex(td, uap->obj, ts, 0); 2627} 2628 2629static int 2630__umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 2631{ 2632 struct timespec *ts, timeout; 2633 int error; 2634 2635 /* Allow a null timespec (wait forever). */ 2636 if (uap->uaddr2 == NULL) 2637 ts = NULL; 2638 else { 2639 error = copyin_timeout32(uap->uaddr2, &timeout); 2640 if (error != 0) 2641 return (error); 2642 if (timeout.tv_nsec >= 1000000000 || 2643 timeout.tv_nsec < 0) 2644 return (EINVAL); 2645 ts = &timeout; 2646 } 2647 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 2648} 2649 2650static _umtx_op_func op_table_compat32[] = { 2651 __umtx_op_lock_umtx_compat32, /* UMTX_OP_LOCK */ 2652 __umtx_op_unlock_umtx_compat32, /* UMTX_OP_UNLOCK */ 2653 __umtx_op_wait_compat32, /* UMTX_OP_WAIT */ 2654 __umtx_op_wake, /* UMTX_OP_WAKE */ 2655 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_LOCK */ 2656 __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */ 2657 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ 2658 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ 2659 __umtx_op_cv_wait_compat32, /* UMTX_OP_CV_WAIT*/ 2660 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ 2661 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */ 2662 __umtx_op_wait_compat32 /* UMTX_OP_WAIT_UINT */ 2663}; 2664 2665int 2666freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap) 2667{ 2668 if ((unsigned)uap->op < UMTX_OP_MAX) 2669 return (*op_table_compat32[uap->op])(td, 2670 (struct _umtx_op_args *)uap); 2671 return (EINVAL); 2672} 2673#endif 2674 2675void 2676umtx_thread_init(struct thread *td) 2677{ 2678 td->td_umtxq = umtxq_alloc(); 2679 td->td_umtxq->uq_thread = td; 2680} 2681 2682void 2683umtx_thread_fini(struct thread *td) 2684{ 2685 umtxq_free(td->td_umtxq); 2686} 2687 2688/* 2689 * It will be called when new thread is created, e.g fork(). 2690 */ 2691void 2692umtx_thread_alloc(struct thread *td) 2693{ 2694 struct umtx_q *uq; 2695 2696 uq = td->td_umtxq; 2697 uq->uq_inherited_pri = PRI_MAX; 2698 2699 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 2700 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 2701 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 2702 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 2703} 2704 2705/* 2706 * exec() hook. 2707 */ 2708static void 2709umtx_exec_hook(void *arg __unused, struct proc *p __unused, 2710 struct image_params *imgp __unused) 2711{ 2712 umtx_thread_cleanup(curthread); 2713} 2714 2715/* 2716 * thread_exit() hook. 2717 */ 2718void 2719umtx_thread_exit(struct thread *td) 2720{ 2721 umtx_thread_cleanup(td); 2722} 2723 2724/* 2725 * clean up umtx data. 2726 */ 2727static void 2728umtx_thread_cleanup(struct thread *td) 2729{ 2730 struct umtx_q *uq; 2731 struct umtx_pi *pi; 2732 2733 if ((uq = td->td_umtxq) == NULL) 2734 return; 2735 2736 mtx_lock_spin(&umtx_lock); 2737 uq->uq_inherited_pri = PRI_MAX; 2738 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 2739 pi->pi_owner = NULL; 2740 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 2741 } 2742 thread_lock(td); 2743 td->td_flags &= ~TDF_UBORROWING; 2744 thread_unlock(td); 2745 mtx_unlock_spin(&umtx_lock); 2746} 2747