kern_umtx.c revision 162030
1/*- 2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice unmodified, this list of conditions, and the following 11 * disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD: head/sys/kern/kern_umtx.c 162030 2006-09-05 12:01:09Z davidxu $"); 30 31#include <sys/param.h> 32#include <sys/kernel.h> 33#include <sys/limits.h> 34#include <sys/lock.h> 35#include <sys/malloc.h> 36#include <sys/mutex.h> 37#include <sys/proc.h> 38#include <sys/sched.h> 39#include <sys/sysctl.h> 40#include <sys/sysent.h> 41#include <sys/systm.h> 42#include <sys/sysproto.h> 43#include <sys/eventhandler.h> 44#include <sys/umtx.h> 45 46#include <vm/vm.h> 47#include <vm/vm_param.h> 48#include <vm/pmap.h> 49#include <vm/vm_map.h> 50#include <vm/vm_object.h> 51 52#define TYPE_SIMPLE_LOCK 0 53#define TYPE_SIMPLE_WAIT 1 54#define TYPE_NORMAL_UMUTEX 2 55#define TYPE_PI_UMUTEX 3 56#define TYPE_PP_UMUTEX 4 57#define TYPE_CV 5 58 59/* Key to represent a unique userland synchronous object */ 60struct umtx_key { 61 int hash; 62 int type; 63 int shared; 64 union { 65 struct { 66 vm_object_t object; 67 uintptr_t offset; 68 } shared; 69 struct { 70 struct vmspace *vs; 71 uintptr_t addr; 72 } private; 73 struct { 74 void *a; 75 uintptr_t b; 76 } both; 77 } info; 78}; 79 80/* Priority inheritance mutex info. */ 81struct umtx_pi { 82 /* Owner thread */ 83 struct thread *pi_owner; 84 85 /* Reference count */ 86 int pi_refcount; 87 88 /* List entry to link umtx holding by thread */ 89 TAILQ_ENTRY(umtx_pi) pi_link; 90 91 /* List entry in hash */ 92 TAILQ_ENTRY(umtx_pi) pi_hashlink; 93 94 /* List for waiters */ 95 TAILQ_HEAD(,umtx_q) pi_blocked; 96 97 /* Identify a userland lock object */ 98 struct umtx_key pi_key; 99}; 100 101/* A userland synchronous object user. */ 102struct umtx_q { 103 /* Linked list for the hash. */ 104 TAILQ_ENTRY(umtx_q) uq_link; 105 106 /* Umtx key. */ 107 struct umtx_key uq_key; 108 109 /* Umtx flags. */ 110 int uq_flags; 111#define UQF_UMTXQ 0x0001 112 113 /* The thread waits on. */ 114 struct thread *uq_thread; 115 116 /* 117 * Blocked on PI mutex. read can use chain lock 118 * or sched_lock, write must have both chain lock and 119 * sched_lock being hold. 120 */ 121 struct umtx_pi *uq_pi_blocked; 122 123 /* On blocked list */ 124 TAILQ_ENTRY(umtx_q) uq_lockq; 125 126 /* Thread contending with us */ 127 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 128 129 /* Inherited priority from PP mutex */ 130 u_char uq_inherited_pri; 131}; 132 133TAILQ_HEAD(umtxq_head, umtx_q); 134 135/* Userland lock object's wait-queue chain */ 136struct umtxq_chain { 137 /* Lock for this chain. */ 138 struct mtx uc_lock; 139 140 /* List of sleep queues. */ 141 struct umtxq_head uc_queue; 142 143 /* Busy flag */ 144 char uc_busy; 145 146 /* Chain lock waiters */ 147 int uc_waiters; 148 149 /* All PI in the list */ 150 TAILQ_HEAD(,umtx_pi) uc_pi_list; 151}; 152 153#define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 154 155/* 156 * Don't propagate time-sharing priority, there is a security reason, 157 * a user can simply introduce PI-mutex, let thread A lock the mutex, 158 * and let another thread B block on the mutex, because B is 159 * sleeping, its priority will be boosted, this causes A's priority to 160 * be boosted via priority propagating too and will never be lowered even 161 * if it is using 100%CPU, this is unfair to other processes. 162 */ 163 164#define UPRI(td) (((td)->td_ksegrp->kg_user_pri >= PRI_MIN_TIMESHARE &&\ 165 (td)->td_ksegrp->kg_user_pri <= PRI_MAX_TIMESHARE) ?\ 166 PRI_MAX_TIMESHARE : (td)->td_ksegrp->kg_user_pri) 167 168#define GOLDEN_RATIO_PRIME 2654404609U 169#define UMTX_CHAINS 128 170#define UMTX_SHIFTS (__WORD_BIT - 7) 171 172#define THREAD_SHARE 0 173#define PROCESS_SHARE 1 174#define AUTO_SHARE 2 175 176#define GET_SHARE(flags) \ 177 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 178 179static uma_zone_t umtx_pi_zone; 180static struct umtxq_chain umtxq_chains[UMTX_CHAINS]; 181static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 182static int umtx_pi_allocated; 183 184SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); 185SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 186 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 187 188static void umtxq_sysinit(void *); 189static void umtxq_hash(struct umtx_key *key); 190static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 191static void umtxq_lock(struct umtx_key *key); 192static void umtxq_unlock(struct umtx_key *key); 193static void umtxq_busy(struct umtx_key *key); 194static void umtxq_unbusy(struct umtx_key *key); 195static void umtxq_insert(struct umtx_q *uq); 196static void umtxq_remove(struct umtx_q *uq); 197static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo); 198static int umtxq_count(struct umtx_key *key); 199static int umtxq_signal(struct umtx_key *key, int nr_wakeup); 200static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2); 201static int umtx_key_get(void *addr, int type, int share, 202 struct umtx_key *key); 203static void umtx_key_release(struct umtx_key *key); 204static struct umtx_pi *umtx_pi_alloc(void); 205static void umtx_pi_free(struct umtx_pi *pi); 206static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags); 207static void umtx_thread_cleanup(struct thread *td); 208static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 209 struct image_params *imgp __unused); 210SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 211 212static void 213umtxq_sysinit(void *arg __unused) 214{ 215 int i; 216 217 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 218 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 219 for (i = 0; i < UMTX_CHAINS; ++i) { 220 mtx_init(&umtxq_chains[i].uc_lock, "umtxql", NULL, 221 MTX_DEF | MTX_DUPOK); 222 TAILQ_INIT(&umtxq_chains[i].uc_queue); 223 TAILQ_INIT(&umtxq_chains[i].uc_pi_list); 224 umtxq_chains[i].uc_busy = 0; 225 umtxq_chains[i].uc_waiters = 0; 226 } 227 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 228 EVENTHANDLER_PRI_ANY); 229} 230 231struct umtx_q * 232umtxq_alloc(void) 233{ 234 struct umtx_q *uq; 235 236 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 237 TAILQ_INIT(&uq->uq_pi_contested); 238 uq->uq_inherited_pri = PRI_MAX; 239 return (uq); 240} 241 242void 243umtxq_free(struct umtx_q *uq) 244{ 245 free(uq, M_UMTX); 246} 247 248static inline void 249umtxq_hash(struct umtx_key *key) 250{ 251 unsigned n = (uintptr_t)key->info.both.a + key->info.both.b; 252 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 253} 254 255static inline int 256umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2) 257{ 258 return (k1->type == k2->type && 259 k1->info.both.a == k2->info.both.a && 260 k1->info.both.b == k2->info.both.b); 261} 262 263static inline struct umtxq_chain * 264umtxq_getchain(struct umtx_key *key) 265{ 266 return (&umtxq_chains[key->hash]); 267} 268 269/* 270 * Set chain to busy state when following operation 271 * may be blocked (kernel mutex can not be used). 272 */ 273static inline void 274umtxq_busy(struct umtx_key *key) 275{ 276 struct umtxq_chain *uc; 277 278 uc = umtxq_getchain(key); 279 mtx_assert(&uc->uc_lock, MA_OWNED); 280 while (uc->uc_busy != 0) { 281 uc->uc_waiters++; 282 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 283 uc->uc_waiters--; 284 } 285 uc->uc_busy = 1; 286} 287 288/* 289 * Unbusy a chain. 290 */ 291static inline void 292umtxq_unbusy(struct umtx_key *key) 293{ 294 struct umtxq_chain *uc; 295 296 uc = umtxq_getchain(key); 297 mtx_assert(&uc->uc_lock, MA_OWNED); 298 KASSERT(uc->uc_busy != 0, ("not busy")); 299 uc->uc_busy = 0; 300 if (uc->uc_waiters) 301 wakeup_one(uc); 302} 303 304/* 305 * Lock a chain. 306 */ 307static inline void 308umtxq_lock(struct umtx_key *key) 309{ 310 struct umtxq_chain *uc; 311 312 uc = umtxq_getchain(key); 313 mtx_lock(&uc->uc_lock); 314} 315 316/* 317 * Unlock a chain. 318 */ 319static inline void 320umtxq_unlock(struct umtx_key *key) 321{ 322 struct umtxq_chain *uc; 323 324 uc = umtxq_getchain(key); 325 mtx_unlock(&uc->uc_lock); 326} 327 328/* 329 * Insert a thread onto the umtx queue. 330 */ 331static inline void 332umtxq_insert(struct umtx_q *uq) 333{ 334 struct umtxq_chain *uc; 335 336 uc = umtxq_getchain(&uq->uq_key); 337 UMTXQ_LOCKED_ASSERT(uc); 338 TAILQ_INSERT_TAIL(&uc->uc_queue, uq, uq_link); 339 uq->uq_flags |= UQF_UMTXQ; 340} 341 342/* 343 * Remove thread from the umtx queue. 344 */ 345static inline void 346umtxq_remove(struct umtx_q *uq) 347{ 348 struct umtxq_chain *uc; 349 350 uc = umtxq_getchain(&uq->uq_key); 351 UMTXQ_LOCKED_ASSERT(uc); 352 if (uq->uq_flags & UQF_UMTXQ) { 353 TAILQ_REMOVE(&uc->uc_queue, uq, uq_link); 354 uq->uq_flags &= ~UQF_UMTXQ; 355 } 356} 357 358/* 359 * Check if there are multiple waiters 360 */ 361static int 362umtxq_count(struct umtx_key *key) 363{ 364 struct umtxq_chain *uc; 365 struct umtx_q *uq; 366 int count = 0; 367 368 uc = umtxq_getchain(key); 369 UMTXQ_LOCKED_ASSERT(uc); 370 TAILQ_FOREACH(uq, &uc->uc_queue, uq_link) { 371 if (umtx_key_match(&uq->uq_key, key)) { 372 if (++count > 1) 373 break; 374 } 375 } 376 return (count); 377} 378 379/* 380 * Check if there are multiple PI waiters and returns first 381 * waiter. 382 */ 383static int 384umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 385{ 386 struct umtxq_chain *uc; 387 struct umtx_q *uq; 388 int count = 0; 389 390 *first = NULL; 391 uc = umtxq_getchain(key); 392 UMTXQ_LOCKED_ASSERT(uc); 393 TAILQ_FOREACH(uq, &uc->uc_queue, uq_link) { 394 if (umtx_key_match(&uq->uq_key, key)) { 395 if (++count > 1) 396 break; 397 *first = uq; 398 } 399 } 400 return (count); 401} 402 403/* 404 * Wake up threads waiting on an userland object. 405 */ 406static int 407umtxq_signal(struct umtx_key *key, int n_wake) 408{ 409 struct umtxq_chain *uc; 410 struct umtx_q *uq, *next; 411 int ret; 412 413 ret = 0; 414 uc = umtxq_getchain(key); 415 UMTXQ_LOCKED_ASSERT(uc); 416 TAILQ_FOREACH_SAFE(uq, &uc->uc_queue, uq_link, next) { 417 if (umtx_key_match(&uq->uq_key, key)) { 418 umtxq_remove(uq); 419 wakeup(uq); 420 if (++ret >= n_wake) 421 break; 422 } 423 } 424 return (ret); 425} 426 427/* 428 * Wake up specified thread. 429 */ 430static inline void 431umtxq_signal_thread(struct umtx_q *uq) 432{ 433 struct umtxq_chain *uc; 434 435 uc = umtxq_getchain(&uq->uq_key); 436 UMTXQ_LOCKED_ASSERT(uc); 437 umtxq_remove(uq); 438 wakeup(uq); 439} 440 441/* 442 * Put thread into sleep state, before sleeping, check if 443 * thread was removed from umtx queue. 444 */ 445static inline int 446umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo) 447{ 448 struct umtxq_chain *uc; 449 int error; 450 451 uc = umtxq_getchain(&uq->uq_key); 452 UMTXQ_LOCKED_ASSERT(uc); 453 if (!(uq->uq_flags & UQF_UMTXQ)) 454 return (0); 455 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo); 456 if (error == EWOULDBLOCK) 457 error = ETIMEDOUT; 458 return (error); 459} 460 461/* 462 * Convert userspace address into unique logical address. 463 */ 464static int 465umtx_key_get(void *addr, int type, int share, struct umtx_key *key) 466{ 467 struct thread *td = curthread; 468 vm_map_t map; 469 vm_map_entry_t entry; 470 vm_pindex_t pindex; 471 vm_prot_t prot; 472 boolean_t wired; 473 474 key->type = type; 475 if (share == THREAD_SHARE) { 476 key->shared = 0; 477 key->info.private.vs = td->td_proc->p_vmspace; 478 key->info.private.addr = (uintptr_t)addr; 479 } else if (share == PROCESS_SHARE || share == AUTO_SHARE) { 480 map = &td->td_proc->p_vmspace->vm_map; 481 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 482 &entry, &key->info.shared.object, &pindex, &prot, 483 &wired) != KERN_SUCCESS) { 484 return EFAULT; 485 } 486 487 if ((share == PROCESS_SHARE) || 488 (share == AUTO_SHARE && 489 VM_INHERIT_SHARE == entry->inheritance)) { 490 key->shared = 1; 491 key->info.shared.offset = entry->offset + entry->start - 492 (vm_offset_t)addr; 493 vm_object_reference(key->info.shared.object); 494 } else { 495 key->shared = 0; 496 key->info.private.vs = td->td_proc->p_vmspace; 497 key->info.private.addr = (uintptr_t)addr; 498 } 499 vm_map_lookup_done(map, entry); 500 } 501 502 umtxq_hash(key); 503 return (0); 504} 505 506/* 507 * Release key. 508 */ 509static inline void 510umtx_key_release(struct umtx_key *key) 511{ 512 if (key->shared) 513 vm_object_deallocate(key->info.shared.object); 514} 515 516/* 517 * Lock a umtx object. 518 */ 519static int 520_do_lock(struct thread *td, struct umtx *umtx, uintptr_t id, int timo) 521{ 522 struct umtx_q *uq; 523 intptr_t owner; 524 intptr_t old; 525 int error = 0; 526 527 uq = td->td_umtxq; 528 529 /* 530 * Care must be exercised when dealing with umtx structure. It 531 * can fault on any access. 532 */ 533 for (;;) { 534 /* 535 * Try the uncontested case. This should be done in userland. 536 */ 537 owner = casuptr((intptr_t *)&umtx->u_owner, UMTX_UNOWNED, id); 538 539 /* The acquire succeeded. */ 540 if (owner == UMTX_UNOWNED) 541 return (0); 542 543 /* The address was invalid. */ 544 if (owner == -1) 545 return (EFAULT); 546 547 /* If no one owns it but it is contested try to acquire it. */ 548 if (owner == UMTX_CONTESTED) { 549 owner = casuptr((intptr_t *)&umtx->u_owner, 550 UMTX_CONTESTED, id | UMTX_CONTESTED); 551 552 if (owner == UMTX_CONTESTED) 553 return (0); 554 555 /* The address was invalid. */ 556 if (owner == -1) 557 return (EFAULT); 558 559 /* If this failed the lock has changed, restart. */ 560 continue; 561 } 562 563 /* 564 * If we caught a signal, we have retried and now 565 * exit immediately. 566 */ 567 if (error != 0) 568 return (error); 569 570 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, 571 AUTO_SHARE, &uq->uq_key)) != 0) 572 return (error); 573 574 umtxq_lock(&uq->uq_key); 575 umtxq_busy(&uq->uq_key); 576 umtxq_insert(uq); 577 umtxq_unbusy(&uq->uq_key); 578 umtxq_unlock(&uq->uq_key); 579 580 /* 581 * Set the contested bit so that a release in user space 582 * knows to use the system call for unlock. If this fails 583 * either some one else has acquired the lock or it has been 584 * released. 585 */ 586 old = casuptr((intptr_t *)&umtx->u_owner, owner, 587 owner | UMTX_CONTESTED); 588 589 /* The address was invalid. */ 590 if (old == -1) { 591 umtxq_lock(&uq->uq_key); 592 umtxq_remove(uq); 593 umtxq_unlock(&uq->uq_key); 594 umtx_key_release(&uq->uq_key); 595 return (EFAULT); 596 } 597 598 /* 599 * We set the contested bit, sleep. Otherwise the lock changed 600 * and we need to retry or we lost a race to the thread 601 * unlocking the umtx. 602 */ 603 umtxq_lock(&uq->uq_key); 604 if (old == owner) 605 error = umtxq_sleep(uq, "umtx", timo); 606 umtxq_remove(uq); 607 umtxq_unlock(&uq->uq_key); 608 umtx_key_release(&uq->uq_key); 609 } 610 611 return (0); 612} 613 614/* 615 * Lock a umtx object. 616 */ 617static int 618do_lock(struct thread *td, struct umtx *umtx, uintptr_t id, 619 struct timespec *timeout) 620{ 621 struct timespec ts, ts2, ts3; 622 struct timeval tv; 623 int error; 624 625 if (timeout == NULL) { 626 error = _do_lock(td, umtx, id, 0); 627 /* Mutex locking is restarted if it is interrupted. */ 628 if (error == EINTR) 629 error = ERESTART; 630 } else { 631 getnanouptime(&ts); 632 timespecadd(&ts, timeout); 633 TIMESPEC_TO_TIMEVAL(&tv, timeout); 634 for (;;) { 635 error = _do_lock(td, umtx, id, tvtohz(&tv)); 636 if (error != ETIMEDOUT) 637 break; 638 getnanouptime(&ts2); 639 if (timespeccmp(&ts2, &ts, >=)) { 640 error = ETIMEDOUT; 641 break; 642 } 643 ts3 = ts; 644 timespecsub(&ts3, &ts2); 645 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 646 } 647 /* Timed-locking is not restarted. */ 648 if (error == ERESTART) 649 error = EINTR; 650 } 651 return (error); 652} 653 654/* 655 * Unlock a umtx object. 656 */ 657static int 658do_unlock(struct thread *td, struct umtx *umtx, uintptr_t id) 659{ 660 struct umtx_key key; 661 intptr_t owner; 662 intptr_t old; 663 int error; 664 int count; 665 666 /* 667 * Make sure we own this mtx. 668 * 669 * XXX Need a {fu,su}ptr this is not correct on arch where 670 * sizeof(intptr_t) != sizeof(long). 671 */ 672 owner = fuword(&umtx->u_owner); 673 if (owner == -1) 674 return (EFAULT); 675 676 if ((owner & ~UMTX_CONTESTED) != id) 677 return (EPERM); 678 679 /* This should be done in userland */ 680 if ((owner & UMTX_CONTESTED) == 0) { 681 old = casuptr((intptr_t *)&umtx->u_owner, owner, 682 UMTX_UNOWNED); 683 if (old == -1) 684 return (EFAULT); 685 if (old == owner) 686 return (0); 687 owner = old; 688 } 689 690 /* We should only ever be in here for contested locks */ 691 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE, 692 &key)) != 0) 693 return (error); 694 695 umtxq_lock(&key); 696 umtxq_busy(&key); 697 count = umtxq_count(&key); 698 umtxq_unlock(&key); 699 700 /* 701 * When unlocking the umtx, it must be marked as unowned if 702 * there is zero or one thread only waiting for it. 703 * Otherwise, it must be marked as contested. 704 */ 705 old = casuptr((intptr_t *)&umtx->u_owner, owner, 706 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED); 707 umtxq_lock(&key); 708 umtxq_signal(&key,1); 709 umtxq_unbusy(&key); 710 umtxq_unlock(&key); 711 umtx_key_release(&key); 712 if (old == -1) 713 return (EFAULT); 714 if (old != owner) 715 return (EINVAL); 716 return (0); 717} 718 719/* 720 * Fetch and compare value, sleep on the address if value is not changed. 721 */ 722static int 723do_wait(struct thread *td, struct umtx *umtx, uintptr_t id, struct timespec *timeout) 724{ 725 struct umtx_q *uq; 726 struct timespec ts, ts2, ts3; 727 struct timeval tv; 728 uintptr_t tmp; 729 int error = 0; 730 731 uq = td->td_umtxq; 732 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_WAIT, AUTO_SHARE, 733 &uq->uq_key)) != 0) 734 return (error); 735 736 umtxq_lock(&uq->uq_key); 737 umtxq_insert(uq); 738 umtxq_unlock(&uq->uq_key); 739 tmp = fuword(&umtx->u_owner); 740 if (tmp != id) { 741 umtxq_lock(&uq->uq_key); 742 umtxq_remove(uq); 743 umtxq_unlock(&uq->uq_key); 744 } else if (timeout == NULL) { 745 umtxq_lock(&uq->uq_key); 746 error = umtxq_sleep(uq, "ucond", 0); 747 umtxq_remove(uq); 748 umtxq_unlock(&uq->uq_key); 749 } else { 750 getnanouptime(&ts); 751 timespecadd(&ts, timeout); 752 TIMESPEC_TO_TIMEVAL(&tv, timeout); 753 umtxq_lock(&uq->uq_key); 754 for (;;) { 755 error = umtxq_sleep(uq, "ucond", tvtohz(&tv)); 756 if (!(uq->uq_flags & UQF_UMTXQ)) 757 break; 758 if (error != ETIMEDOUT) 759 break; 760 umtxq_unlock(&uq->uq_key); 761 getnanouptime(&ts2); 762 if (timespeccmp(&ts2, &ts, >=)) { 763 error = ETIMEDOUT; 764 umtxq_lock(&uq->uq_key); 765 break; 766 } 767 ts3 = ts; 768 timespecsub(&ts3, &ts2); 769 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 770 umtxq_lock(&uq->uq_key); 771 } 772 umtxq_remove(uq); 773 umtxq_unlock(&uq->uq_key); 774 } 775 umtx_key_release(&uq->uq_key); 776 if (error == ERESTART) 777 error = EINTR; 778 return (error); 779} 780 781/* 782 * Wake up threads sleeping on the specified address. 783 */ 784int 785kern_umtx_wake(struct thread *td, void *uaddr, int n_wake) 786{ 787 struct umtx_key key; 788 int ret; 789 790 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, AUTO_SHARE, 791 &key)) != 0) 792 return (ret); 793 umtxq_lock(&key); 794 ret = umtxq_signal(&key, n_wake); 795 umtxq_unlock(&key); 796 umtx_key_release(&key); 797 return (0); 798} 799 800/* 801 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 802 */ 803static int 804_do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo, 805 int try) 806{ 807 struct umtx_q *uq; 808 uint32_t owner, old, id; 809 int error = 0; 810 811 id = td->td_tid; 812 uq = td->td_umtxq; 813 814 /* 815 * Care must be exercised when dealing with umtx structure. It 816 * can fault on any access. 817 */ 818 for (;;) { 819 /* 820 * Try the uncontested case. This should be done in userland. 821 */ 822 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id); 823 824 /* The acquire succeeded. */ 825 if (owner == UMUTEX_UNOWNED) 826 return (0); 827 828 /* The address was invalid. */ 829 if (owner == -1) 830 return (EFAULT); 831 832 /* If no one owns it but it is contested try to acquire it. */ 833 if (owner == UMUTEX_CONTESTED) { 834 owner = casuword32(&m->m_owner, 835 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 836 837 if (owner == UMUTEX_CONTESTED) 838 return (0); 839 840 /* The address was invalid. */ 841 if (owner == -1) 842 return (EFAULT); 843 844 /* If this failed the lock has changed, restart. */ 845 continue; 846 } 847 848 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 849 (owner & ~UMUTEX_CONTESTED) == id) 850 return (EDEADLK); 851 852 if (try != 0) 853 return (EBUSY); 854 855 /* 856 * If we caught a signal, we have retried and now 857 * exit immediately. 858 */ 859 if (error != 0) 860 return (error); 861 862 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 863 GET_SHARE(flags), &uq->uq_key)) != 0) 864 return (error); 865 866 umtxq_lock(&uq->uq_key); 867 umtxq_busy(&uq->uq_key); 868 umtxq_insert(uq); 869 umtxq_unbusy(&uq->uq_key); 870 umtxq_unlock(&uq->uq_key); 871 872 /* 873 * Set the contested bit so that a release in user space 874 * knows to use the system call for unlock. If this fails 875 * either some one else has acquired the lock or it has been 876 * released. 877 */ 878 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED); 879 880 /* The address was invalid. */ 881 if (old == -1) { 882 umtxq_lock(&uq->uq_key); 883 umtxq_remove(uq); 884 umtxq_unlock(&uq->uq_key); 885 umtx_key_release(&uq->uq_key); 886 return (EFAULT); 887 } 888 889 /* 890 * We set the contested bit, sleep. Otherwise the lock changed 891 * and we need to retry or we lost a race to the thread 892 * unlocking the umtx. 893 */ 894 umtxq_lock(&uq->uq_key); 895 if (old == owner) 896 error = umtxq_sleep(uq, "umtxn", timo); 897 umtxq_remove(uq); 898 umtxq_unlock(&uq->uq_key); 899 umtx_key_release(&uq->uq_key); 900 } 901 902 return (0); 903} 904 905/* 906 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 907 */ 908/* 909 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 910 */ 911static int 912do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags) 913{ 914 struct umtx_key key; 915 uint32_t owner, old, id; 916 int error; 917 int count; 918 919 id = td->td_tid; 920 /* 921 * Make sure we own this mtx. 922 */ 923 owner = fuword32(&m->m_owner); 924 if (owner == -1) 925 return (EFAULT); 926 927 if ((owner & ~UMUTEX_CONTESTED) != id) 928 return (EPERM); 929 930 /* This should be done in userland */ 931 if ((owner & UMUTEX_CONTESTED) == 0) { 932 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED); 933 if (old == -1) 934 return (EFAULT); 935 if (old == owner) 936 return (0); 937 owner = old; 938 } 939 940 /* We should only ever be in here for contested locks */ 941 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 942 &key)) != 0) 943 return (error); 944 945 umtxq_lock(&key); 946 umtxq_busy(&key); 947 count = umtxq_count(&key); 948 umtxq_unlock(&key); 949 950 /* 951 * When unlocking the umtx, it must be marked as unowned if 952 * there is zero or one thread only waiting for it. 953 * Otherwise, it must be marked as contested. 954 */ 955 old = casuword32(&m->m_owner, owner, 956 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 957 umtxq_lock(&key); 958 umtxq_signal(&key,1); 959 umtxq_unbusy(&key); 960 umtxq_unlock(&key); 961 umtx_key_release(&key); 962 if (old == -1) 963 return (EFAULT); 964 if (old != owner) 965 return (EINVAL); 966 return (0); 967} 968 969static inline struct umtx_pi * 970umtx_pi_alloc(void) 971{ 972 struct umtx_pi *pi; 973 974 pi = uma_zalloc(umtx_pi_zone, M_ZERO | M_WAITOK); 975 TAILQ_INIT(&pi->pi_blocked); 976 atomic_add_int(&umtx_pi_allocated, 1); 977 return (pi); 978} 979 980static inline void 981umtx_pi_free(struct umtx_pi *pi) 982{ 983 uma_zfree(umtx_pi_zone, pi); 984 atomic_add_int(&umtx_pi_allocated, -1); 985} 986 987/* 988 * Adjust the thread's position on a pi_state after its priority has been 989 * changed. 990 */ 991static int 992umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 993{ 994 struct umtx_q *uq, *uq1, *uq2; 995 struct thread *td1; 996 997 mtx_assert(&sched_lock, MA_OWNED); 998 if (pi == NULL) 999 return (0); 1000 1001 uq = td->td_umtxq; 1002 1003 /* 1004 * Check if the thread needs to be moved on the blocked chain. 1005 * It needs to be moved if either its priority is lower than 1006 * the previous thread or higher than the next thread. 1007 */ 1008 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1009 uq2 = TAILQ_NEXT(uq, uq_lockq); 1010 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1011 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1012 /* 1013 * Remove thread from blocked chain and determine where 1014 * it should be moved to. 1015 */ 1016 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1017 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1018 td1 = uq1->uq_thread; 1019 MPASS(td1->td_proc->p_magic == P_MAGIC); 1020 if (UPRI(td1) > UPRI(td)) 1021 break; 1022 } 1023 1024 if (uq1 == NULL) 1025 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1026 else 1027 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1028 } 1029 return (1); 1030} 1031 1032/* 1033 * Propagate priority when a thread is blocked on POSIX 1034 * PI mutex. 1035 */ 1036static void 1037umtx_propagate_priority(struct thread *td) 1038{ 1039 struct umtx_q *uq; 1040 struct umtx_pi *pi; 1041 int pri; 1042 1043 mtx_assert(&sched_lock, MA_OWNED); 1044 pri = UPRI(td); 1045 uq = td->td_umtxq; 1046 pi = uq->uq_pi_blocked; 1047 if (pi == NULL) 1048 return; 1049 1050 for (;;) { 1051 td = pi->pi_owner; 1052 if (td == NULL) 1053 return; 1054 1055 MPASS(td->td_proc != NULL); 1056 MPASS(td->td_proc->p_magic == P_MAGIC); 1057 1058 if (UPRI(td) <= pri) 1059 return; 1060 1061 sched_lend_user_prio(td, pri); 1062 1063 /* 1064 * Pick up the lock that td is blocked on. 1065 */ 1066 uq = td->td_umtxq; 1067 pi = uq->uq_pi_blocked; 1068 /* Resort td on the list if needed. */ 1069 if (!umtx_pi_adjust_thread(pi, td)) 1070 break; 1071 } 1072} 1073 1074/* 1075 * Unpropagate priority for a PI mutex when a thread blocked on 1076 * it is interrupted by signal or resumed by others. 1077 */ 1078static void 1079umtx_unpropagate_priority(struct umtx_pi *pi) 1080{ 1081 struct umtx_q *uq, *uq_owner; 1082 struct umtx_pi *pi2; 1083 int pri; 1084 1085 mtx_assert(&sched_lock, MA_OWNED); 1086 1087 while (pi != NULL && pi->pi_owner != NULL) { 1088 pri = PRI_MAX; 1089 uq_owner = pi->pi_owner->td_umtxq; 1090 1091 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1092 uq = TAILQ_FIRST(&pi2->pi_blocked); 1093 if (uq != NULL) { 1094 if (pri > UPRI(uq->uq_thread)) 1095 pri = UPRI(uq->uq_thread); 1096 } 1097 } 1098 1099 if (pri > uq_owner->uq_inherited_pri) 1100 pri = uq_owner->uq_inherited_pri; 1101 sched_unlend_user_prio(pi->pi_owner, pri); 1102 pi = uq_owner->uq_pi_blocked; 1103 } 1104} 1105 1106/* 1107 * Insert a PI mutex into owned list. 1108 */ 1109static void 1110umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1111{ 1112 struct umtx_q *uq_owner; 1113 1114 uq_owner = owner->td_umtxq; 1115 mtx_assert(&sched_lock, MA_OWNED); 1116 if (pi->pi_owner != NULL) 1117 panic("pi_ower != NULL"); 1118 pi->pi_owner = owner; 1119 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1120} 1121 1122/* 1123 * Claim ownership of a PI mutex. 1124 */ 1125static int 1126umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1127{ 1128 struct umtx_q *uq, *uq_owner; 1129 1130 uq_owner = owner->td_umtxq; 1131 mtx_lock_spin(&sched_lock); 1132 if (pi->pi_owner == owner) { 1133 mtx_unlock_spin(&sched_lock); 1134 return (0); 1135 } 1136 1137 if (pi->pi_owner != NULL) { 1138 /* 1139 * userland may have already messed the mutex, sigh. 1140 */ 1141 mtx_unlock_spin(&sched_lock); 1142 return (EPERM); 1143 } 1144 umtx_pi_setowner(pi, owner); 1145 uq = TAILQ_FIRST(&pi->pi_blocked); 1146 if (uq != NULL) { 1147 int pri; 1148 1149 pri = UPRI(uq->uq_thread); 1150 if (pri < UPRI(owner)) 1151 sched_lend_user_prio(owner, pri); 1152 } 1153 mtx_unlock_spin(&sched_lock); 1154 return (0); 1155} 1156 1157/* 1158 * Adjust a thread's order position in its blocked PI mutex, 1159 * this may result new priority propagating process. 1160 */ 1161void 1162umtx_pi_adjust(struct thread *td, u_char oldpri) 1163{ 1164 struct umtx_q *uq; 1165 struct umtx_pi *pi; 1166 1167 uq = td->td_umtxq; 1168 1169 mtx_assert(&sched_lock, MA_OWNED); 1170 MPASS(TD_ON_UPILOCK(td)); 1171 1172 /* 1173 * Pick up the lock that td is blocked on. 1174 */ 1175 pi = uq->uq_pi_blocked; 1176 MPASS(pi != NULL); 1177 1178 /* Resort the turnstile on the list. */ 1179 if (!umtx_pi_adjust_thread(pi, td)) 1180 return; 1181 1182 /* 1183 * If our priority was lowered and we are at the head of the 1184 * turnstile, then propagate our new priority up the chain. 1185 */ 1186 if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri) 1187 umtx_propagate_priority(td); 1188} 1189 1190/* 1191 * Sleep on a PI mutex. 1192 */ 1193static int 1194umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, 1195 uint32_t owner, const char *wmesg, int timo) 1196{ 1197 struct umtxq_chain *uc; 1198 struct thread *td, *td1; 1199 struct umtx_q *uq1; 1200 int pri; 1201 int error = 0; 1202 1203 td = uq->uq_thread; 1204 KASSERT(td == curthread, ("inconsistent uq_thread")); 1205 uc = umtxq_getchain(&uq->uq_key); 1206 UMTXQ_LOCKED_ASSERT(uc); 1207 umtxq_insert(uq); 1208 if (pi->pi_owner == NULL) { 1209 /* XXX 1210 * Current, We only support process private PI-mutex, 1211 * non-contended PI-mutexes are locked in userland. 1212 * Process shared PI-mutex should always be initialized 1213 * by kernel and be registered in kernel, locking should 1214 * always be done by kernel to avoid security problems. 1215 * For process private PI-mutex, we can find owner 1216 * thread and boost its priority safely. 1217 */ 1218 PROC_LOCK(curproc); 1219 td1 = thread_find(curproc, owner); 1220 mtx_lock_spin(&sched_lock); 1221 if (td1 != NULL && pi->pi_owner == NULL) { 1222 uq1 = td1->td_umtxq; 1223 umtx_pi_setowner(pi, td1); 1224 } 1225 PROC_UNLOCK(curproc); 1226 } else { 1227 mtx_lock_spin(&sched_lock); 1228 } 1229 1230 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1231 pri = UPRI(uq1->uq_thread); 1232 if (pri > UPRI(td)) 1233 break; 1234 } 1235 1236 if (uq1 != NULL) 1237 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1238 else 1239 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1240 1241 uq->uq_pi_blocked = pi; 1242 td->td_flags |= TDF_UPIBLOCKED; 1243 mtx_unlock_spin(&sched_lock); 1244 umtxq_unlock(&uq->uq_key); 1245 1246 mtx_lock_spin(&sched_lock); 1247 umtx_propagate_priority(td); 1248 mtx_unlock_spin(&sched_lock); 1249 1250 umtxq_lock(&uq->uq_key); 1251 if (uq->uq_flags & UQF_UMTXQ) { 1252 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo); 1253 if (error == EWOULDBLOCK) 1254 error = ETIMEDOUT; 1255 if (uq->uq_flags & UQF_UMTXQ) { 1256 umtxq_busy(&uq->uq_key); 1257 umtxq_remove(uq); 1258 umtxq_unbusy(&uq->uq_key); 1259 } 1260 } 1261 umtxq_unlock(&uq->uq_key); 1262 1263 mtx_lock_spin(&sched_lock); 1264 uq->uq_pi_blocked = NULL; 1265 td->td_flags &= ~TDF_UPIBLOCKED; 1266 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1267 umtx_unpropagate_priority(pi); 1268 mtx_unlock_spin(&sched_lock); 1269 1270 umtxq_lock(&uq->uq_key); 1271 1272 return (error); 1273} 1274 1275/* 1276 * Add reference count for a PI mutex. 1277 */ 1278static void 1279umtx_pi_ref(struct umtx_pi *pi) 1280{ 1281 struct umtxq_chain *uc; 1282 1283 uc = umtxq_getchain(&pi->pi_key); 1284 UMTXQ_LOCKED_ASSERT(uc); 1285 pi->pi_refcount++; 1286} 1287 1288/* 1289 * Decrease reference count for a PI mutex, if the counter 1290 * is decreased to zero, its memory space is freed. 1291 */ 1292static void 1293umtx_pi_unref(struct umtx_pi *pi) 1294{ 1295 struct umtxq_chain *uc; 1296 int free = 0; 1297 1298 uc = umtxq_getchain(&pi->pi_key); 1299 UMTXQ_LOCKED_ASSERT(uc); 1300 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1301 if (--pi->pi_refcount == 0) { 1302 mtx_lock_spin(&sched_lock); 1303 if (pi->pi_owner != NULL) { 1304 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, 1305 pi, pi_link); 1306 pi->pi_owner = NULL; 1307 } 1308 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1309 ("blocked queue not empty")); 1310 mtx_unlock_spin(&sched_lock); 1311 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1312 free = 1; 1313 } 1314 if (free) 1315 umtx_pi_free(pi); 1316} 1317 1318/* 1319 * Find a PI mutex in hash table. 1320 */ 1321static struct umtx_pi * 1322umtx_pi_lookup(struct umtx_key *key) 1323{ 1324 struct umtxq_chain *uc; 1325 struct umtx_pi *pi; 1326 1327 uc = umtxq_getchain(key); 1328 UMTXQ_LOCKED_ASSERT(uc); 1329 1330 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1331 if (umtx_key_match(&pi->pi_key, key)) { 1332 return (pi); 1333 } 1334 } 1335 return (NULL); 1336} 1337 1338/* 1339 * Insert a PI mutex into hash table. 1340 */ 1341static inline void 1342umtx_pi_insert(struct umtx_pi *pi) 1343{ 1344 struct umtxq_chain *uc; 1345 1346 uc = umtxq_getchain(&pi->pi_key); 1347 UMTXQ_LOCKED_ASSERT(uc); 1348 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1349} 1350 1351/* 1352 * Lock a PI mutex. 1353 */ 1354static int 1355_do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo, 1356 int try) 1357{ 1358 struct umtx_q *uq; 1359 struct umtx_pi *pi, *new_pi; 1360 uint32_t id, owner, old; 1361 int error; 1362 1363 id = td->td_tid; 1364 uq = td->td_umtxq; 1365 1366 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 1367 &uq->uq_key)) != 0) 1368 return (error); 1369 for (;;) { 1370 pi = NULL; 1371 umtxq_lock(&uq->uq_key); 1372 pi = umtx_pi_lookup(&uq->uq_key); 1373 if (pi == NULL) { 1374 umtxq_unlock(&uq->uq_key); 1375 new_pi = umtx_pi_alloc(); 1376 new_pi->pi_key = uq->uq_key; 1377 umtxq_lock(&uq->uq_key); 1378 pi = umtx_pi_lookup(&uq->uq_key); 1379 if (pi != NULL) 1380 umtx_pi_free(new_pi); 1381 else { 1382 umtx_pi_insert(new_pi); 1383 pi = new_pi; 1384 } 1385 } 1386 1387 umtx_pi_ref(pi); 1388 umtxq_unlock(&uq->uq_key); 1389 1390 /* 1391 * Care must be exercised when dealing with umtx structure. It 1392 * can fault on any access. 1393 */ 1394 1395 /* 1396 * Try the uncontested case. This should be done in userland. 1397 */ 1398 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id); 1399 1400 /* The acquire succeeded. */ 1401 if (owner == UMUTEX_UNOWNED) { 1402 error = 0; 1403 break; 1404 } 1405 1406 /* The address was invalid. */ 1407 if (owner == -1) { 1408 error = EFAULT; 1409 break; 1410 } 1411 1412 /* If no one owns it but it is contested try to acquire it. */ 1413 if (owner == UMUTEX_CONTESTED) { 1414 owner = casuword32(&m->m_owner, 1415 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1416 1417 if (owner == UMUTEX_CONTESTED) { 1418 umtxq_lock(&uq->uq_key); 1419 error = umtx_pi_claim(pi, td); 1420 umtxq_unlock(&uq->uq_key); 1421 break; 1422 } 1423 1424 /* The address was invalid. */ 1425 if (owner == -1) { 1426 error = EFAULT; 1427 break; 1428 } 1429 1430 /* If this failed the lock has changed, restart. */ 1431 umtxq_lock(&uq->uq_key); 1432 umtx_pi_unref(pi); 1433 umtxq_unlock(&uq->uq_key); 1434 pi = NULL; 1435 continue; 1436 } 1437 1438 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 1439 (owner & ~UMUTEX_CONTESTED) == id) { 1440 error = EDEADLK; 1441 break; 1442 } 1443 1444 if (try != 0) { 1445 error = EBUSY; 1446 break; 1447 } 1448 1449 /* 1450 * If we caught a signal, we have retried and now 1451 * exit immediately. 1452 */ 1453 if (error != 0) 1454 break; 1455 1456 umtxq_lock(&uq->uq_key); 1457 umtxq_busy(&uq->uq_key); 1458 umtxq_unlock(&uq->uq_key); 1459 1460 /* 1461 * Set the contested bit so that a release in user space 1462 * knows to use the system call for unlock. If this fails 1463 * either some one else has acquired the lock or it has been 1464 * released. 1465 */ 1466 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED); 1467 1468 /* The address was invalid. */ 1469 if (old == -1) { 1470 umtxq_lock(&uq->uq_key); 1471 umtxq_unbusy(&uq->uq_key); 1472 umtxq_unlock(&uq->uq_key); 1473 error = EFAULT; 1474 break; 1475 } 1476 1477 umtxq_lock(&uq->uq_key); 1478 umtxq_unbusy(&uq->uq_key); 1479 /* 1480 * We set the contested bit, sleep. Otherwise the lock changed 1481 * and we need to retry or we lost a race to the thread 1482 * unlocking the umtx. 1483 */ 1484 if (old == owner) 1485 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 1486 "umtxpi", timo); 1487 umtx_pi_unref(pi); 1488 umtxq_unlock(&uq->uq_key); 1489 pi = NULL; 1490 } 1491 1492 if (pi != NULL) { 1493 umtxq_lock(&uq->uq_key); 1494 umtx_pi_unref(pi); 1495 umtxq_unlock(&uq->uq_key); 1496 } 1497 1498 umtx_key_release(&uq->uq_key); 1499 return (error); 1500} 1501 1502/* 1503 * Unlock a PI mutex. 1504 */ 1505static int 1506do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags) 1507{ 1508 struct umtx_key key; 1509 struct umtx_q *uq_first, *uq_first2, *uq_me; 1510 struct umtx_pi *pi, *pi2; 1511 uint32_t owner, old, id; 1512 int error; 1513 int count; 1514 int pri; 1515 1516 id = td->td_tid; 1517 /* 1518 * Make sure we own this mtx. 1519 */ 1520 owner = fuword32(&m->m_owner); 1521 if (owner == -1) 1522 return (EFAULT); 1523 1524 if ((owner & ~UMUTEX_CONTESTED) != id) 1525 return (EPERM); 1526 1527 /* This should be done in userland */ 1528 if ((owner & UMUTEX_CONTESTED) == 0) { 1529 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED); 1530 if (old == -1) 1531 return (EFAULT); 1532 if (old == owner) 1533 return (0); 1534 owner = old; 1535 } 1536 1537 /* We should only ever be in here for contested locks */ 1538 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 1539 &key)) != 0) 1540 return (error); 1541 1542 umtxq_lock(&key); 1543 umtxq_busy(&key); 1544 count = umtxq_count_pi(&key, &uq_first); 1545 if (uq_first != NULL) { 1546 pi = uq_first->uq_pi_blocked; 1547 if (pi->pi_owner != curthread) { 1548 umtxq_unbusy(&key); 1549 umtxq_unlock(&key); 1550 /* userland messed the mutex */ 1551 return (EPERM); 1552 } 1553 uq_me = curthread->td_umtxq; 1554 mtx_lock_spin(&sched_lock); 1555 pi->pi_owner = NULL; 1556 TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link); 1557 uq_first = TAILQ_FIRST(&pi->pi_blocked); 1558 pri = PRI_MAX; 1559 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 1560 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 1561 if (uq_first2 != NULL) { 1562 if (pri > UPRI(uq_first2->uq_thread)) 1563 pri = UPRI(uq_first2->uq_thread); 1564 } 1565 } 1566 sched_unlend_user_prio(curthread, pri); 1567 mtx_unlock_spin(&sched_lock); 1568 } 1569 umtxq_unlock(&key); 1570 1571 /* 1572 * When unlocking the umtx, it must be marked as unowned if 1573 * there is zero or one thread only waiting for it. 1574 * Otherwise, it must be marked as contested. 1575 */ 1576 old = casuword32(&m->m_owner, owner, 1577 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1578 1579 umtxq_lock(&key); 1580 if (uq_first != NULL) 1581 umtxq_signal_thread(uq_first); 1582 umtxq_unbusy(&key); 1583 umtxq_unlock(&key); 1584 umtx_key_release(&key); 1585 if (old == -1) 1586 return (EFAULT); 1587 if (old != owner) 1588 return (EINVAL); 1589 return (0); 1590} 1591 1592/* 1593 * Lock a PP mutex. 1594 */ 1595static int 1596_do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo, 1597 int try) 1598{ 1599 struct umtx_q *uq, *uq2; 1600 struct umtx_pi *pi; 1601 uint32_t ceiling; 1602 uint32_t owner, id; 1603 int error, pri, old_inherited_pri, su; 1604 1605 id = td->td_tid; 1606 uq = td->td_umtxq; 1607 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 1608 &uq->uq_key)) != 0) 1609 return (error); 1610 su = (suser(td) == 0); 1611 for (;;) { 1612 old_inherited_pri = uq->uq_inherited_pri; 1613 umtxq_lock(&uq->uq_key); 1614 umtxq_busy(&uq->uq_key); 1615 umtxq_unlock(&uq->uq_key); 1616 1617 ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]); 1618 if (ceiling > RTP_PRIO_MAX) { 1619 error = EINVAL; 1620 goto out; 1621 } 1622 1623 mtx_lock_spin(&sched_lock); 1624 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 1625 mtx_unlock_spin(&sched_lock); 1626 error = EINVAL; 1627 goto out; 1628 } 1629 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 1630 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 1631 if (uq->uq_inherited_pri < UPRI(td)) 1632 sched_lend_user_prio(td, uq->uq_inherited_pri); 1633 } 1634 mtx_unlock_spin(&sched_lock); 1635 1636 owner = casuword32(&m->m_owner, 1637 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1638 1639 if (owner == UMUTEX_CONTESTED) { 1640 error = 0; 1641 break; 1642 } 1643 1644 /* The address was invalid. */ 1645 if (owner == -1) { 1646 error = EFAULT; 1647 break; 1648 } 1649 1650 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 1651 (owner & ~UMUTEX_CONTESTED) == id) { 1652 error = EDEADLK; 1653 break; 1654 } 1655 1656 if (try != 0) { 1657 error = EBUSY; 1658 break; 1659 } 1660 1661 /* 1662 * If we caught a signal, we have retried and now 1663 * exit immediately. 1664 */ 1665 if (error != 0) 1666 break; 1667 1668 umtxq_lock(&uq->uq_key); 1669 umtxq_insert(uq); 1670 umtxq_unbusy(&uq->uq_key); 1671 error = umtxq_sleep(uq, "umtxpp", timo); 1672 umtxq_remove(uq); 1673 umtxq_unlock(&uq->uq_key); 1674 1675 mtx_lock_spin(&sched_lock); 1676 uq->uq_inherited_pri = old_inherited_pri; 1677 pri = PRI_MAX; 1678 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 1679 uq2 = TAILQ_FIRST(&pi->pi_blocked); 1680 if (uq2 != NULL) { 1681 if (pri > UPRI(uq2->uq_thread)) 1682 pri = UPRI(uq2->uq_thread); 1683 } 1684 } 1685 if (pri > uq->uq_inherited_pri) 1686 pri = uq->uq_inherited_pri; 1687 sched_unlend_user_prio(td, pri); 1688 mtx_unlock_spin(&sched_lock); 1689 } 1690 1691 if (error != 0) { 1692 mtx_lock_spin(&sched_lock); 1693 uq->uq_inherited_pri = old_inherited_pri; 1694 pri = PRI_MAX; 1695 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 1696 uq2 = TAILQ_FIRST(&pi->pi_blocked); 1697 if (uq2 != NULL) { 1698 if (pri > UPRI(uq2->uq_thread)) 1699 pri = UPRI(uq2->uq_thread); 1700 } 1701 } 1702 if (pri > uq->uq_inherited_pri) 1703 pri = uq->uq_inherited_pri; 1704 sched_unlend_user_prio(td, pri); 1705 mtx_unlock_spin(&sched_lock); 1706 } 1707 1708out: 1709 umtxq_lock(&uq->uq_key); 1710 umtxq_unbusy(&uq->uq_key); 1711 umtxq_unlock(&uq->uq_key); 1712 umtx_key_release(&uq->uq_key); 1713 return (error); 1714} 1715 1716/* 1717 * Unlock a PP mutex. 1718 */ 1719static int 1720do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags) 1721{ 1722 struct umtx_key key; 1723 struct umtx_q *uq, *uq2; 1724 struct umtx_pi *pi; 1725 uint32_t owner, id; 1726 uint32_t rceiling; 1727 int error, pri, new_inherited_pri, su; 1728 1729 id = td->td_tid; 1730 uq = td->td_umtxq; 1731 su = (suser(td) == 0); 1732 1733 /* 1734 * Make sure we own this mtx. 1735 */ 1736 owner = fuword32(&m->m_owner); 1737 if (owner == -1) 1738 return (EFAULT); 1739 1740 if ((owner & ~UMUTEX_CONTESTED) != id) 1741 return (EPERM); 1742 1743 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 1744 if (error != 0) 1745 return (error); 1746 1747 if (rceiling == -1) 1748 new_inherited_pri = PRI_MAX; 1749 else { 1750 rceiling = RTP_PRIO_MAX - rceiling; 1751 if (rceiling > RTP_PRIO_MAX) 1752 return (EINVAL); 1753 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 1754 } 1755 1756 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 1757 &key)) != 0) 1758 return (error); 1759 umtxq_lock(&key); 1760 umtxq_busy(&key); 1761 umtxq_unlock(&key); 1762 /* 1763 * For priority protected mutex, always set unlocked state 1764 * to UMUTEX_CONTESTED, so that userland always enters kernel 1765 * to lock the mutex, it is necessary because thread priority 1766 * has to be adjusted for such mutex. 1767 */ 1768 error = suword32(&m->m_owner, UMUTEX_CONTESTED); 1769 1770 umtxq_lock(&key); 1771 if (error == 0) 1772 umtxq_signal(&key, 1); 1773 umtxq_unbusy(&key); 1774 umtxq_unlock(&key); 1775 1776 if (error == -1) 1777 error = EFAULT; 1778 else { 1779 mtx_lock_spin(&sched_lock); 1780 if (su != 0) 1781 uq->uq_inherited_pri = new_inherited_pri; 1782 pri = PRI_MAX; 1783 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 1784 uq2 = TAILQ_FIRST(&pi->pi_blocked); 1785 if (uq2 != NULL) { 1786 if (pri > UPRI(uq2->uq_thread)) 1787 pri = UPRI(uq2->uq_thread); 1788 } 1789 } 1790 if (pri > uq->uq_inherited_pri) 1791 pri = uq->uq_inherited_pri; 1792 sched_unlend_user_prio(td, pri); 1793 mtx_unlock_spin(&sched_lock); 1794 } 1795 umtx_key_release(&key); 1796 return (error); 1797} 1798 1799static int 1800do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 1801 uint32_t *old_ceiling) 1802{ 1803 struct umtx_q *uq; 1804 uint32_t save_ceiling; 1805 uint32_t owner, id; 1806 uint32_t flags; 1807 int error; 1808 1809 flags = fuword32(&m->m_flags); 1810 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 1811 return (EINVAL); 1812 if (ceiling > RTP_PRIO_MAX) 1813 return (EINVAL); 1814 id = td->td_tid; 1815 uq = td->td_umtxq; 1816 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 1817 &uq->uq_key)) != 0) 1818 return (error); 1819 for (;;) { 1820 umtxq_lock(&uq->uq_key); 1821 umtxq_busy(&uq->uq_key); 1822 umtxq_unlock(&uq->uq_key); 1823 1824 save_ceiling = fuword32(&m->m_ceilings[0]); 1825 1826 owner = casuword32(&m->m_owner, 1827 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1828 1829 if (owner == UMUTEX_CONTESTED) { 1830 suword32(&m->m_ceilings[0], ceiling); 1831 suword32(&m->m_owner, UMUTEX_CONTESTED); 1832 error = 0; 1833 break; 1834 } 1835 1836 /* The address was invalid. */ 1837 if (owner == -1) { 1838 error = EFAULT; 1839 break; 1840 } 1841 1842 if ((owner & ~UMUTEX_CONTESTED) == id) { 1843 suword32(&m->m_ceilings[0], ceiling); 1844 error = 0; 1845 break; 1846 } 1847 1848 /* 1849 * If we caught a signal, we have retried and now 1850 * exit immediately. 1851 */ 1852 if (error != 0) 1853 break; 1854 1855 /* 1856 * We set the contested bit, sleep. Otherwise the lock changed 1857 * and we need to retry or we lost a race to the thread 1858 * unlocking the umtx. 1859 */ 1860 umtxq_lock(&uq->uq_key); 1861 umtxq_insert(uq); 1862 umtxq_unbusy(&uq->uq_key); 1863 error = umtxq_sleep(uq, "umtxpp", 0); 1864 umtxq_remove(uq); 1865 umtxq_unlock(&uq->uq_key); 1866 } 1867 umtxq_lock(&uq->uq_key); 1868 if (error == 0) 1869 umtxq_signal(&uq->uq_key, INT_MAX); 1870 umtxq_unbusy(&uq->uq_key); 1871 umtxq_unlock(&uq->uq_key); 1872 umtx_key_release(&uq->uq_key); 1873 if (error == 0 && old_ceiling != NULL) 1874 suword32(old_ceiling, save_ceiling); 1875 return (error); 1876} 1877 1878static int 1879_do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo, 1880 int try) 1881{ 1882 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 1883 case 0: 1884 return (_do_lock_normal(td, m, flags, timo, try)); 1885 case UMUTEX_PRIO_INHERIT: 1886 return (_do_lock_pi(td, m, flags, timo, try)); 1887 case UMUTEX_PRIO_PROTECT: 1888 return (_do_lock_pp(td, m, flags, timo, try)); 1889 } 1890 return (EINVAL); 1891} 1892 1893/* 1894 * Lock a userland POSIX mutex. 1895 */ 1896static int 1897do_lock_umutex(struct thread *td, struct umutex *m, 1898 struct timespec *timeout, int try) 1899{ 1900 struct timespec ts, ts2, ts3; 1901 struct timeval tv; 1902 uint32_t flags; 1903 int error; 1904 1905 flags = fuword32(&m->m_flags); 1906 if (flags == -1) 1907 return (EFAULT); 1908 1909 if (timeout == NULL) { 1910 error = _do_lock_umutex(td, m, flags, 0, try); 1911 /* Mutex locking is restarted if it is interrupted. */ 1912 if (error == EINTR) 1913 error = ERESTART; 1914 } else { 1915 getnanouptime(&ts); 1916 timespecadd(&ts, timeout); 1917 TIMESPEC_TO_TIMEVAL(&tv, timeout); 1918 for (;;) { 1919 error = _do_lock_umutex(td, m, flags, tvtohz(&tv), try); 1920 if (error != ETIMEDOUT) 1921 break; 1922 getnanouptime(&ts2); 1923 if (timespeccmp(&ts2, &ts, >=)) { 1924 error = ETIMEDOUT; 1925 break; 1926 } 1927 ts3 = ts; 1928 timespecsub(&ts3, &ts2); 1929 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 1930 } 1931 /* Timed-locking is not restarted. */ 1932 if (error == ERESTART) 1933 error = EINTR; 1934 } 1935 return (error); 1936} 1937 1938/* 1939 * Unlock a userland POSIX mutex. 1940 */ 1941static int 1942do_unlock_umutex(struct thread *td, struct umutex *m) 1943{ 1944 uint32_t flags; 1945 1946 flags = fuword32(&m->m_flags); 1947 if (flags == -1) 1948 return (EFAULT); 1949 1950 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 1951 case 0: 1952 return (do_unlock_normal(td, m, flags)); 1953 case UMUTEX_PRIO_INHERIT: 1954 return (do_unlock_pi(td, m, flags)); 1955 case UMUTEX_PRIO_PROTECT: 1956 return (do_unlock_pp(td, m, flags)); 1957 } 1958 1959 return (EINVAL); 1960} 1961 1962int 1963_umtx_lock(struct thread *td, struct _umtx_lock_args *uap) 1964 /* struct umtx *umtx */ 1965{ 1966 return _do_lock(td, uap->umtx, td->td_tid, 0); 1967} 1968 1969int 1970_umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap) 1971 /* struct umtx *umtx */ 1972{ 1973 return do_unlock(td, uap->umtx, td->td_tid); 1974} 1975 1976int 1977_umtx_op(struct thread *td, struct _umtx_op_args *uap) 1978{ 1979 struct timespec timeout; 1980 struct timespec *ts; 1981 int error; 1982 1983 switch(uap->op) { 1984 case UMTX_OP_MUTEX_LOCK: 1985 /* Allow a null timespec (wait forever). */ 1986 if (uap->uaddr2 == NULL) 1987 ts = NULL; 1988 else { 1989 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 1990 if (error != 0) 1991 break; 1992 if (timeout.tv_nsec >= 1000000000 || 1993 timeout.tv_nsec < 0) { 1994 error = EINVAL; 1995 break; 1996 } 1997 ts = &timeout; 1998 } 1999 error = do_lock_umutex(td, uap->obj, ts, 0); 2000 break; 2001 case UMTX_OP_MUTEX_UNLOCK: 2002 error = do_unlock_umutex(td, uap->obj); 2003 break; 2004 case UMTX_OP_LOCK: 2005 /* Allow a null timespec (wait forever). */ 2006 if (uap->uaddr2 == NULL) 2007 ts = NULL; 2008 else { 2009 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 2010 if (error != 0) 2011 break; 2012 if (timeout.tv_nsec >= 1000000000 || 2013 timeout.tv_nsec < 0) { 2014 error = EINVAL; 2015 break; 2016 } 2017 ts = &timeout; 2018 } 2019 error = do_lock(td, uap->obj, uap->val, ts); 2020 break; 2021 case UMTX_OP_UNLOCK: 2022 error = do_unlock(td, uap->obj, uap->val); 2023 break; 2024 case UMTX_OP_WAIT: 2025 /* Allow a null timespec (wait forever). */ 2026 if (uap->uaddr2 == NULL) 2027 ts = NULL; 2028 else { 2029 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 2030 if (error != 0) 2031 break; 2032 if (timeout.tv_nsec >= 1000000000 || 2033 timeout.tv_nsec < 0) { 2034 error = EINVAL; 2035 break; 2036 } 2037 ts = &timeout; 2038 } 2039 error = do_wait(td, uap->obj, uap->val, ts); 2040 break; 2041 case UMTX_OP_WAKE: 2042 error = kern_umtx_wake(td, uap->obj, uap->val); 2043 break; 2044 case UMTX_OP_MUTEX_TRYLOCK: 2045 error = do_lock_umutex(td, uap->obj, NULL, 1); 2046 break; 2047 case UMTX_OP_SET_CEILING: 2048 error = do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1); 2049 break; 2050 default: 2051 error = EINVAL; 2052 break; 2053 } 2054 return (error); 2055} 2056 2057void 2058umtx_thread_init(struct thread *td) 2059{ 2060 td->td_umtxq = umtxq_alloc(); 2061 td->td_umtxq->uq_thread = td; 2062} 2063 2064void 2065umtx_thread_fini(struct thread *td) 2066{ 2067 umtxq_free(td->td_umtxq); 2068} 2069 2070/* 2071 * It will be called when new thread is created, e.g fork(). 2072 */ 2073void 2074umtx_thread_alloc(struct thread *td) 2075{ 2076 struct umtx_q *uq; 2077 2078 uq = td->td_umtxq; 2079 uq->uq_inherited_pri = PRI_MAX; 2080 2081 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 2082 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 2083 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 2084 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 2085} 2086 2087/* 2088 * exec() hook. 2089 */ 2090static void 2091umtx_exec_hook(void *arg __unused, struct proc *p __unused, 2092 struct image_params *imgp __unused) 2093{ 2094 umtx_thread_cleanup(curthread); 2095} 2096 2097/* 2098 * thread_exit() hook. 2099 */ 2100void 2101umtx_thread_exit(struct thread *td) 2102{ 2103 umtx_thread_cleanup(td); 2104} 2105 2106/* 2107 * clean up umtx data. 2108 */ 2109static void 2110umtx_thread_cleanup(struct thread *td) 2111{ 2112 struct umtx_q *uq; 2113 struct umtx_pi *pi; 2114 2115 if ((uq = td->td_umtxq) == NULL) 2116 return; 2117 2118 mtx_lock_spin(&sched_lock); 2119 uq->uq_inherited_pri = PRI_MAX; 2120 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 2121 pi->pi_owner = NULL; 2122 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 2123 } 2124 td->td_flags &= ~TDF_UBORROWING; 2125 mtx_unlock_spin(&sched_lock); 2126} 2127