kern_umtx.c revision 278345
1/*- 2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice unmodified, this list of conditions, and the following 11 * disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD: stable/10/sys/kern/kern_umtx.c 278345 2015-02-07 08:35:18Z kib $"); 30 31#include "opt_compat.h" 32#include "opt_umtx_profiling.h" 33 34#include <sys/param.h> 35#include <sys/kernel.h> 36#include <sys/limits.h> 37#include <sys/lock.h> 38#include <sys/malloc.h> 39#include <sys/mutex.h> 40#include <sys/priv.h> 41#include <sys/proc.h> 42#include <sys/sbuf.h> 43#include <sys/sched.h> 44#include <sys/smp.h> 45#include <sys/sysctl.h> 46#include <sys/sysent.h> 47#include <sys/systm.h> 48#include <sys/sysproto.h> 49#include <sys/syscallsubr.h> 50#include <sys/eventhandler.h> 51#include <sys/umtx.h> 52 53#include <vm/vm.h> 54#include <vm/vm_param.h> 55#include <vm/pmap.h> 56#include <vm/vm_map.h> 57#include <vm/vm_object.h> 58 59#include <machine/cpu.h> 60 61#ifdef COMPAT_FREEBSD32 62#include <compat/freebsd32/freebsd32_proto.h> 63#endif 64 65#define _UMUTEX_TRY 1 66#define _UMUTEX_WAIT 2 67 68#ifdef UMTX_PROFILING 69#define UPROF_PERC_BIGGER(w, f, sw, sf) \ 70 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 71#endif 72 73/* Priority inheritance mutex info. */ 74struct umtx_pi { 75 /* Owner thread */ 76 struct thread *pi_owner; 77 78 /* Reference count */ 79 int pi_refcount; 80 81 /* List entry to link umtx holding by thread */ 82 TAILQ_ENTRY(umtx_pi) pi_link; 83 84 /* List entry in hash */ 85 TAILQ_ENTRY(umtx_pi) pi_hashlink; 86 87 /* List for waiters */ 88 TAILQ_HEAD(,umtx_q) pi_blocked; 89 90 /* Identify a userland lock object */ 91 struct umtx_key pi_key; 92}; 93 94/* A userland synchronous object user. */ 95struct umtx_q { 96 /* Linked list for the hash. */ 97 TAILQ_ENTRY(umtx_q) uq_link; 98 99 /* Umtx key. */ 100 struct umtx_key uq_key; 101 102 /* Umtx flags. */ 103 int uq_flags; 104#define UQF_UMTXQ 0x0001 105 106 /* The thread waits on. */ 107 struct thread *uq_thread; 108 109 /* 110 * Blocked on PI mutex. read can use chain lock 111 * or umtx_lock, write must have both chain lock and 112 * umtx_lock being hold. 113 */ 114 struct umtx_pi *uq_pi_blocked; 115 116 /* On blocked list */ 117 TAILQ_ENTRY(umtx_q) uq_lockq; 118 119 /* Thread contending with us */ 120 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 121 122 /* Inherited priority from PP mutex */ 123 u_char uq_inherited_pri; 124 125 /* Spare queue ready to be reused */ 126 struct umtxq_queue *uq_spare_queue; 127 128 /* The queue we on */ 129 struct umtxq_queue *uq_cur_queue; 130}; 131 132TAILQ_HEAD(umtxq_head, umtx_q); 133 134/* Per-key wait-queue */ 135struct umtxq_queue { 136 struct umtxq_head head; 137 struct umtx_key key; 138 LIST_ENTRY(umtxq_queue) link; 139 int length; 140}; 141 142LIST_HEAD(umtxq_list, umtxq_queue); 143 144/* Userland lock object's wait-queue chain */ 145struct umtxq_chain { 146 /* Lock for this chain. */ 147 struct mtx uc_lock; 148 149 /* List of sleep queues. */ 150 struct umtxq_list uc_queue[2]; 151#define UMTX_SHARED_QUEUE 0 152#define UMTX_EXCLUSIVE_QUEUE 1 153 154 LIST_HEAD(, umtxq_queue) uc_spare_queue; 155 156 /* Busy flag */ 157 char uc_busy; 158 159 /* Chain lock waiters */ 160 int uc_waiters; 161 162 /* All PI in the list */ 163 TAILQ_HEAD(,umtx_pi) uc_pi_list; 164 165#ifdef UMTX_PROFILING 166 u_int length; 167 u_int max_length; 168#endif 169}; 170 171#define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 172 173/* 174 * Don't propagate time-sharing priority, there is a security reason, 175 * a user can simply introduce PI-mutex, let thread A lock the mutex, 176 * and let another thread B block on the mutex, because B is 177 * sleeping, its priority will be boosted, this causes A's priority to 178 * be boosted via priority propagating too and will never be lowered even 179 * if it is using 100%CPU, this is unfair to other processes. 180 */ 181 182#define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 183 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 184 PRI_MAX_TIMESHARE : (td)->td_user_pri) 185 186#define GOLDEN_RATIO_PRIME 2654404609U 187#define UMTX_CHAINS 512 188#define UMTX_SHIFTS (__WORD_BIT - 9) 189 190#define GET_SHARE(flags) \ 191 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 192 193#define BUSY_SPINS 200 194 195struct abs_timeout { 196 int clockid; 197 struct timespec cur; 198 struct timespec end; 199}; 200 201static uma_zone_t umtx_pi_zone; 202static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 203static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 204static int umtx_pi_allocated; 205 206static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); 207SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 208 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 209 210#ifdef UMTX_PROFILING 211static long max_length; 212SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 213static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats"); 214#endif 215 216static void umtxq_sysinit(void *); 217static void umtxq_hash(struct umtx_key *key); 218static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 219static void umtxq_lock(struct umtx_key *key); 220static void umtxq_unlock(struct umtx_key *key); 221static void umtxq_busy(struct umtx_key *key); 222static void umtxq_unbusy(struct umtx_key *key); 223static void umtxq_insert_queue(struct umtx_q *uq, int q); 224static void umtxq_remove_queue(struct umtx_q *uq, int q); 225static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *); 226static int umtxq_count(struct umtx_key *key); 227static struct umtx_pi *umtx_pi_alloc(int); 228static void umtx_pi_free(struct umtx_pi *pi); 229static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags); 230static void umtx_thread_cleanup(struct thread *td); 231static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 232 struct image_params *imgp __unused); 233SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 234 235#define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 236#define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) 237#define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) 238 239static struct mtx umtx_lock; 240 241#ifdef UMTX_PROFILING 242static void 243umtx_init_profiling(void) 244{ 245 struct sysctl_oid *chain_oid; 246 char chain_name[10]; 247 int i; 248 249 for (i = 0; i < UMTX_CHAINS; ++i) { 250 snprintf(chain_name, sizeof(chain_name), "%d", i); 251 chain_oid = SYSCTL_ADD_NODE(NULL, 252 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 253 chain_name, CTLFLAG_RD, NULL, "umtx hash stats"); 254 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 255 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 256 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 257 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 258 } 259} 260 261static int 262sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 263{ 264 char buf[512]; 265 struct sbuf sb; 266 struct umtxq_chain *uc; 267 u_int fract, i, j, tot, whole; 268 u_int sf0, sf1, sf2, sf3, sf4; 269 u_int si0, si1, si2, si3, si4; 270 u_int sw0, sw1, sw2, sw3, sw4; 271 272 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 273 for (i = 0; i < 2; i++) { 274 tot = 0; 275 for (j = 0; j < UMTX_CHAINS; ++j) { 276 uc = &umtxq_chains[i][j]; 277 mtx_lock(&uc->uc_lock); 278 tot += uc->max_length; 279 mtx_unlock(&uc->uc_lock); 280 } 281 if (tot == 0) 282 sbuf_printf(&sb, "%u) Empty ", i); 283 else { 284 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 285 si0 = si1 = si2 = si3 = si4 = 0; 286 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 287 for (j = 0; j < UMTX_CHAINS; j++) { 288 uc = &umtxq_chains[i][j]; 289 mtx_lock(&uc->uc_lock); 290 whole = uc->max_length * 100; 291 mtx_unlock(&uc->uc_lock); 292 fract = (whole % tot) * 100; 293 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 294 sf0 = fract; 295 si0 = j; 296 sw0 = whole; 297 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 298 sf1)) { 299 sf1 = fract; 300 si1 = j; 301 sw1 = whole; 302 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 303 sf2)) { 304 sf2 = fract; 305 si2 = j; 306 sw2 = whole; 307 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 308 sf3)) { 309 sf3 = fract; 310 si3 = j; 311 sw3 = whole; 312 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 313 sf4)) { 314 sf4 = fract; 315 si4 = j; 316 sw4 = whole; 317 } 318 } 319 sbuf_printf(&sb, "queue %u:\n", i); 320 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 321 sf0 / tot, si0); 322 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 323 sf1 / tot, si1); 324 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 325 sf2 / tot, si2); 326 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 327 sf3 / tot, si3); 328 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 329 sf4 / tot, si4); 330 } 331 } 332 sbuf_trim(&sb); 333 sbuf_finish(&sb); 334 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 335 sbuf_delete(&sb); 336 return (0); 337} 338 339static int 340sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 341{ 342 struct umtxq_chain *uc; 343 u_int i, j; 344 int clear, error; 345 346 clear = 0; 347 error = sysctl_handle_int(oidp, &clear, 0, req); 348 if (error != 0 || req->newptr == NULL) 349 return (error); 350 351 if (clear != 0) { 352 for (i = 0; i < 2; ++i) { 353 for (j = 0; j < UMTX_CHAINS; ++j) { 354 uc = &umtxq_chains[i][j]; 355 mtx_lock(&uc->uc_lock); 356 uc->length = 0; 357 uc->max_length = 0; 358 mtx_unlock(&uc->uc_lock); 359 } 360 } 361 } 362 return (0); 363} 364 365SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 366 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 367 sysctl_debug_umtx_chains_clear, "I", "Clear umtx chains statistics"); 368SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 369 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 370 sysctl_debug_umtx_chains_peaks, "A", "Highest peaks in chains max length"); 371#endif 372 373static void 374umtxq_sysinit(void *arg __unused) 375{ 376 int i, j; 377 378 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 379 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 380 for (i = 0; i < 2; ++i) { 381 for (j = 0; j < UMTX_CHAINS; ++j) { 382 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 383 MTX_DEF | MTX_DUPOK); 384 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 385 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 386 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 387 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 388 umtxq_chains[i][j].uc_busy = 0; 389 umtxq_chains[i][j].uc_waiters = 0; 390#ifdef UMTX_PROFILING 391 umtxq_chains[i][j].length = 0; 392 umtxq_chains[i][j].max_length = 0; 393#endif 394 } 395 } 396#ifdef UMTX_PROFILING 397 umtx_init_profiling(); 398#endif 399 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN); 400 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 401 EVENTHANDLER_PRI_ANY); 402} 403 404struct umtx_q * 405umtxq_alloc(void) 406{ 407 struct umtx_q *uq; 408 409 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 410 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, M_WAITOK | M_ZERO); 411 TAILQ_INIT(&uq->uq_spare_queue->head); 412 TAILQ_INIT(&uq->uq_pi_contested); 413 uq->uq_inherited_pri = PRI_MAX; 414 return (uq); 415} 416 417void 418umtxq_free(struct umtx_q *uq) 419{ 420 MPASS(uq->uq_spare_queue != NULL); 421 free(uq->uq_spare_queue, M_UMTX); 422 free(uq, M_UMTX); 423} 424 425static inline void 426umtxq_hash(struct umtx_key *key) 427{ 428 unsigned n = (uintptr_t)key->info.both.a + key->info.both.b; 429 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 430} 431 432static inline struct umtxq_chain * 433umtxq_getchain(struct umtx_key *key) 434{ 435 if (key->type <= TYPE_SEM) 436 return (&umtxq_chains[1][key->hash]); 437 return (&umtxq_chains[0][key->hash]); 438} 439 440/* 441 * Lock a chain. 442 */ 443static inline void 444umtxq_lock(struct umtx_key *key) 445{ 446 struct umtxq_chain *uc; 447 448 uc = umtxq_getchain(key); 449 mtx_lock(&uc->uc_lock); 450} 451 452/* 453 * Unlock a chain. 454 */ 455static inline void 456umtxq_unlock(struct umtx_key *key) 457{ 458 struct umtxq_chain *uc; 459 460 uc = umtxq_getchain(key); 461 mtx_unlock(&uc->uc_lock); 462} 463 464/* 465 * Set chain to busy state when following operation 466 * may be blocked (kernel mutex can not be used). 467 */ 468static inline void 469umtxq_busy(struct umtx_key *key) 470{ 471 struct umtxq_chain *uc; 472 473 uc = umtxq_getchain(key); 474 mtx_assert(&uc->uc_lock, MA_OWNED); 475 if (uc->uc_busy) { 476#ifdef SMP 477 if (smp_cpus > 1) { 478 int count = BUSY_SPINS; 479 if (count > 0) { 480 umtxq_unlock(key); 481 while (uc->uc_busy && --count > 0) 482 cpu_spinwait(); 483 umtxq_lock(key); 484 } 485 } 486#endif 487 while (uc->uc_busy) { 488 uc->uc_waiters++; 489 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 490 uc->uc_waiters--; 491 } 492 } 493 uc->uc_busy = 1; 494} 495 496/* 497 * Unbusy a chain. 498 */ 499static inline void 500umtxq_unbusy(struct umtx_key *key) 501{ 502 struct umtxq_chain *uc; 503 504 uc = umtxq_getchain(key); 505 mtx_assert(&uc->uc_lock, MA_OWNED); 506 KASSERT(uc->uc_busy != 0, ("not busy")); 507 uc->uc_busy = 0; 508 if (uc->uc_waiters) 509 wakeup_one(uc); 510} 511 512static inline void 513umtxq_unbusy_unlocked(struct umtx_key *key) 514{ 515 516 umtxq_lock(key); 517 umtxq_unbusy(key); 518 umtxq_unlock(key); 519} 520 521static struct umtxq_queue * 522umtxq_queue_lookup(struct umtx_key *key, int q) 523{ 524 struct umtxq_queue *uh; 525 struct umtxq_chain *uc; 526 527 uc = umtxq_getchain(key); 528 UMTXQ_LOCKED_ASSERT(uc); 529 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 530 if (umtx_key_match(&uh->key, key)) 531 return (uh); 532 } 533 534 return (NULL); 535} 536 537static inline void 538umtxq_insert_queue(struct umtx_q *uq, int q) 539{ 540 struct umtxq_queue *uh; 541 struct umtxq_chain *uc; 542 543 uc = umtxq_getchain(&uq->uq_key); 544 UMTXQ_LOCKED_ASSERT(uc); 545 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 546 uh = umtxq_queue_lookup(&uq->uq_key, q); 547 if (uh != NULL) { 548 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 549 } else { 550 uh = uq->uq_spare_queue; 551 uh->key = uq->uq_key; 552 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 553#ifdef UMTX_PROFILING 554 uc->length++; 555 if (uc->length > uc->max_length) { 556 uc->max_length = uc->length; 557 if (uc->max_length > max_length) 558 max_length = uc->max_length; 559 } 560#endif 561 } 562 uq->uq_spare_queue = NULL; 563 564 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 565 uh->length++; 566 uq->uq_flags |= UQF_UMTXQ; 567 uq->uq_cur_queue = uh; 568 return; 569} 570 571static inline void 572umtxq_remove_queue(struct umtx_q *uq, int q) 573{ 574 struct umtxq_chain *uc; 575 struct umtxq_queue *uh; 576 577 uc = umtxq_getchain(&uq->uq_key); 578 UMTXQ_LOCKED_ASSERT(uc); 579 if (uq->uq_flags & UQF_UMTXQ) { 580 uh = uq->uq_cur_queue; 581 TAILQ_REMOVE(&uh->head, uq, uq_link); 582 uh->length--; 583 uq->uq_flags &= ~UQF_UMTXQ; 584 if (TAILQ_EMPTY(&uh->head)) { 585 KASSERT(uh->length == 0, 586 ("inconsistent umtxq_queue length")); 587#ifdef UMTX_PROFILING 588 uc->length--; 589#endif 590 LIST_REMOVE(uh, link); 591 } else { 592 uh = LIST_FIRST(&uc->uc_spare_queue); 593 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 594 LIST_REMOVE(uh, link); 595 } 596 uq->uq_spare_queue = uh; 597 uq->uq_cur_queue = NULL; 598 } 599} 600 601/* 602 * Check if there are multiple waiters 603 */ 604static int 605umtxq_count(struct umtx_key *key) 606{ 607 struct umtxq_chain *uc; 608 struct umtxq_queue *uh; 609 610 uc = umtxq_getchain(key); 611 UMTXQ_LOCKED_ASSERT(uc); 612 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 613 if (uh != NULL) 614 return (uh->length); 615 return (0); 616} 617 618/* 619 * Check if there are multiple PI waiters and returns first 620 * waiter. 621 */ 622static int 623umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 624{ 625 struct umtxq_chain *uc; 626 struct umtxq_queue *uh; 627 628 *first = NULL; 629 uc = umtxq_getchain(key); 630 UMTXQ_LOCKED_ASSERT(uc); 631 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 632 if (uh != NULL) { 633 *first = TAILQ_FIRST(&uh->head); 634 return (uh->length); 635 } 636 return (0); 637} 638 639static int 640umtxq_check_susp(struct thread *td) 641{ 642 struct proc *p; 643 int error; 644 645 /* 646 * The check for TDF_NEEDSUSPCHK is racy, but it is enough to 647 * eventually break the lockstep loop. 648 */ 649 if ((td->td_flags & TDF_NEEDSUSPCHK) == 0) 650 return (0); 651 error = 0; 652 p = td->td_proc; 653 PROC_LOCK(p); 654 if (P_SHOULDSTOP(p) || 655 ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) { 656 if (p->p_flag & P_SINGLE_EXIT) 657 error = EINTR; 658 else 659 error = ERESTART; 660 } 661 PROC_UNLOCK(p); 662 return (error); 663} 664 665/* 666 * Wake up threads waiting on an userland object. 667 */ 668 669static int 670umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 671{ 672 struct umtxq_chain *uc; 673 struct umtxq_queue *uh; 674 struct umtx_q *uq; 675 int ret; 676 677 ret = 0; 678 uc = umtxq_getchain(key); 679 UMTXQ_LOCKED_ASSERT(uc); 680 uh = umtxq_queue_lookup(key, q); 681 if (uh != NULL) { 682 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 683 umtxq_remove_queue(uq, q); 684 wakeup(uq); 685 if (++ret >= n_wake) 686 return (ret); 687 } 688 } 689 return (ret); 690} 691 692 693/* 694 * Wake up specified thread. 695 */ 696static inline void 697umtxq_signal_thread(struct umtx_q *uq) 698{ 699 struct umtxq_chain *uc; 700 701 uc = umtxq_getchain(&uq->uq_key); 702 UMTXQ_LOCKED_ASSERT(uc); 703 umtxq_remove(uq); 704 wakeup(uq); 705} 706 707static inline int 708tstohz(const struct timespec *tsp) 709{ 710 struct timeval tv; 711 712 TIMESPEC_TO_TIMEVAL(&tv, tsp); 713 return tvtohz(&tv); 714} 715 716static void 717abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute, 718 const struct timespec *timeout) 719{ 720 721 timo->clockid = clockid; 722 if (!absolute) { 723 kern_clock_gettime(curthread, clockid, &timo->end); 724 timo->cur = timo->end; 725 timespecadd(&timo->end, timeout); 726 } else { 727 timo->end = *timeout; 728 kern_clock_gettime(curthread, clockid, &timo->cur); 729 } 730} 731 732static void 733abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime) 734{ 735 736 abs_timeout_init(timo, umtxtime->_clockid, 737 (umtxtime->_flags & UMTX_ABSTIME) != 0, 738 &umtxtime->_timeout); 739} 740 741static inline void 742abs_timeout_update(struct abs_timeout *timo) 743{ 744 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 745} 746 747static int 748abs_timeout_gethz(struct abs_timeout *timo) 749{ 750 struct timespec tts; 751 752 if (timespeccmp(&timo->end, &timo->cur, <=)) 753 return (-1); 754 tts = timo->end; 755 timespecsub(&tts, &timo->cur); 756 return (tstohz(&tts)); 757} 758 759/* 760 * Put thread into sleep state, before sleeping, check if 761 * thread was removed from umtx queue. 762 */ 763static inline int 764umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime) 765{ 766 struct umtxq_chain *uc; 767 int error, timo; 768 769 uc = umtxq_getchain(&uq->uq_key); 770 UMTXQ_LOCKED_ASSERT(uc); 771 for (;;) { 772 if (!(uq->uq_flags & UQF_UMTXQ)) 773 return (0); 774 if (abstime != NULL) { 775 timo = abs_timeout_gethz(abstime); 776 if (timo < 0) 777 return (ETIMEDOUT); 778 } else 779 timo = 0; 780 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); 781 if (error != EWOULDBLOCK) { 782 umtxq_lock(&uq->uq_key); 783 break; 784 } 785 if (abstime != NULL) 786 abs_timeout_update(abstime); 787 umtxq_lock(&uq->uq_key); 788 } 789 return (error); 790} 791 792/* 793 * Convert userspace address into unique logical address. 794 */ 795int 796umtx_key_get(void *addr, int type, int share, struct umtx_key *key) 797{ 798 struct thread *td = curthread; 799 vm_map_t map; 800 vm_map_entry_t entry; 801 vm_pindex_t pindex; 802 vm_prot_t prot; 803 boolean_t wired; 804 805 key->type = type; 806 if (share == THREAD_SHARE) { 807 key->shared = 0; 808 key->info.private.vs = td->td_proc->p_vmspace; 809 key->info.private.addr = (uintptr_t)addr; 810 } else { 811 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 812 map = &td->td_proc->p_vmspace->vm_map; 813 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 814 &entry, &key->info.shared.object, &pindex, &prot, 815 &wired) != KERN_SUCCESS) { 816 return EFAULT; 817 } 818 819 if ((share == PROCESS_SHARE) || 820 (share == AUTO_SHARE && 821 VM_INHERIT_SHARE == entry->inheritance)) { 822 key->shared = 1; 823 key->info.shared.offset = entry->offset + entry->start - 824 (vm_offset_t)addr; 825 vm_object_reference(key->info.shared.object); 826 } else { 827 key->shared = 0; 828 key->info.private.vs = td->td_proc->p_vmspace; 829 key->info.private.addr = (uintptr_t)addr; 830 } 831 vm_map_lookup_done(map, entry); 832 } 833 834 umtxq_hash(key); 835 return (0); 836} 837 838/* 839 * Release key. 840 */ 841void 842umtx_key_release(struct umtx_key *key) 843{ 844 if (key->shared) 845 vm_object_deallocate(key->info.shared.object); 846} 847 848/* 849 * Lock a umtx object. 850 */ 851static int 852do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, 853 const struct timespec *timeout) 854{ 855 struct abs_timeout timo; 856 struct umtx_q *uq; 857 u_long owner; 858 u_long old; 859 int error = 0; 860 861 uq = td->td_umtxq; 862 if (timeout != NULL) 863 abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 864 865 /* 866 * Care must be exercised when dealing with umtx structure. It 867 * can fault on any access. 868 */ 869 for (;;) { 870 /* 871 * Try the uncontested case. This should be done in userland. 872 */ 873 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id); 874 875 /* The acquire succeeded. */ 876 if (owner == UMTX_UNOWNED) 877 return (0); 878 879 /* The address was invalid. */ 880 if (owner == -1) 881 return (EFAULT); 882 883 /* If no one owns it but it is contested try to acquire it. */ 884 if (owner == UMTX_CONTESTED) { 885 owner = casuword(&umtx->u_owner, 886 UMTX_CONTESTED, id | UMTX_CONTESTED); 887 888 if (owner == UMTX_CONTESTED) 889 return (0); 890 891 /* The address was invalid. */ 892 if (owner == -1) 893 return (EFAULT); 894 895 error = umtxq_check_susp(td); 896 if (error != 0) 897 break; 898 899 /* If this failed the lock has changed, restart. */ 900 continue; 901 } 902 903 /* 904 * If we caught a signal, we have retried and now 905 * exit immediately. 906 */ 907 if (error != 0) 908 break; 909 910 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, 911 AUTO_SHARE, &uq->uq_key)) != 0) 912 return (error); 913 914 umtxq_lock(&uq->uq_key); 915 umtxq_busy(&uq->uq_key); 916 umtxq_insert(uq); 917 umtxq_unbusy(&uq->uq_key); 918 umtxq_unlock(&uq->uq_key); 919 920 /* 921 * Set the contested bit so that a release in user space 922 * knows to use the system call for unlock. If this fails 923 * either some one else has acquired the lock or it has been 924 * released. 925 */ 926 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED); 927 928 /* The address was invalid. */ 929 if (old == -1) { 930 umtxq_lock(&uq->uq_key); 931 umtxq_remove(uq); 932 umtxq_unlock(&uq->uq_key); 933 umtx_key_release(&uq->uq_key); 934 return (EFAULT); 935 } 936 937 /* 938 * We set the contested bit, sleep. Otherwise the lock changed 939 * and we need to retry or we lost a race to the thread 940 * unlocking the umtx. 941 */ 942 umtxq_lock(&uq->uq_key); 943 if (old == owner) 944 error = umtxq_sleep(uq, "umtx", timeout == NULL ? NULL : 945 &timo); 946 umtxq_remove(uq); 947 umtxq_unlock(&uq->uq_key); 948 umtx_key_release(&uq->uq_key); 949 950 if (error == 0) 951 error = umtxq_check_susp(td); 952 } 953 954 if (timeout == NULL) { 955 /* Mutex locking is restarted if it is interrupted. */ 956 if (error == EINTR) 957 error = ERESTART; 958 } else { 959 /* Timed-locking is not restarted. */ 960 if (error == ERESTART) 961 error = EINTR; 962 } 963 return (error); 964} 965 966/* 967 * Unlock a umtx object. 968 */ 969static int 970do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id) 971{ 972 struct umtx_key key; 973 u_long owner; 974 u_long old; 975 int error; 976 int count; 977 978 /* 979 * Make sure we own this mtx. 980 */ 981 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner)); 982 if (owner == -1) 983 return (EFAULT); 984 985 if ((owner & ~UMTX_CONTESTED) != id) 986 return (EPERM); 987 988 /* This should be done in userland */ 989 if ((owner & UMTX_CONTESTED) == 0) { 990 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED); 991 if (old == -1) 992 return (EFAULT); 993 if (old == owner) 994 return (0); 995 owner = old; 996 } 997 998 /* We should only ever be in here for contested locks */ 999 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE, 1000 &key)) != 0) 1001 return (error); 1002 1003 umtxq_lock(&key); 1004 umtxq_busy(&key); 1005 count = umtxq_count(&key); 1006 umtxq_unlock(&key); 1007 1008 /* 1009 * When unlocking the umtx, it must be marked as unowned if 1010 * there is zero or one thread only waiting for it. 1011 * Otherwise, it must be marked as contested. 1012 */ 1013 old = casuword(&umtx->u_owner, owner, 1014 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED); 1015 umtxq_lock(&key); 1016 umtxq_signal(&key,1); 1017 umtxq_unbusy(&key); 1018 umtxq_unlock(&key); 1019 umtx_key_release(&key); 1020 if (old == -1) 1021 return (EFAULT); 1022 if (old != owner) 1023 return (EINVAL); 1024 return (0); 1025} 1026 1027#ifdef COMPAT_FREEBSD32 1028 1029/* 1030 * Lock a umtx object. 1031 */ 1032static int 1033do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, 1034 const struct timespec *timeout) 1035{ 1036 struct abs_timeout timo; 1037 struct umtx_q *uq; 1038 uint32_t owner; 1039 uint32_t old; 1040 int error = 0; 1041 1042 uq = td->td_umtxq; 1043 1044 if (timeout != NULL) 1045 abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 1046 1047 /* 1048 * Care must be exercised when dealing with umtx structure. It 1049 * can fault on any access. 1050 */ 1051 for (;;) { 1052 /* 1053 * Try the uncontested case. This should be done in userland. 1054 */ 1055 owner = casuword32(m, UMUTEX_UNOWNED, id); 1056 1057 /* The acquire succeeded. */ 1058 if (owner == UMUTEX_UNOWNED) 1059 return (0); 1060 1061 /* The address was invalid. */ 1062 if (owner == -1) 1063 return (EFAULT); 1064 1065 /* If no one owns it but it is contested try to acquire it. */ 1066 if (owner == UMUTEX_CONTESTED) { 1067 owner = casuword32(m, 1068 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1069 if (owner == UMUTEX_CONTESTED) 1070 return (0); 1071 1072 /* The address was invalid. */ 1073 if (owner == -1) 1074 return (EFAULT); 1075 1076 error = umtxq_check_susp(td); 1077 if (error != 0) 1078 break; 1079 1080 /* If this failed the lock has changed, restart. */ 1081 continue; 1082 } 1083 1084 /* 1085 * If we caught a signal, we have retried and now 1086 * exit immediately. 1087 */ 1088 if (error != 0) 1089 return (error); 1090 1091 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, 1092 AUTO_SHARE, &uq->uq_key)) != 0) 1093 return (error); 1094 1095 umtxq_lock(&uq->uq_key); 1096 umtxq_busy(&uq->uq_key); 1097 umtxq_insert(uq); 1098 umtxq_unbusy(&uq->uq_key); 1099 umtxq_unlock(&uq->uq_key); 1100 1101 /* 1102 * Set the contested bit so that a release in user space 1103 * knows to use the system call for unlock. If this fails 1104 * either some one else has acquired the lock or it has been 1105 * released. 1106 */ 1107 old = casuword32(m, owner, owner | UMUTEX_CONTESTED); 1108 1109 /* The address was invalid. */ 1110 if (old == -1) { 1111 umtxq_lock(&uq->uq_key); 1112 umtxq_remove(uq); 1113 umtxq_unlock(&uq->uq_key); 1114 umtx_key_release(&uq->uq_key); 1115 return (EFAULT); 1116 } 1117 1118 /* 1119 * We set the contested bit, sleep. Otherwise the lock changed 1120 * and we need to retry or we lost a race to the thread 1121 * unlocking the umtx. 1122 */ 1123 umtxq_lock(&uq->uq_key); 1124 if (old == owner) 1125 error = umtxq_sleep(uq, "umtx", timeout == NULL ? 1126 NULL : &timo); 1127 umtxq_remove(uq); 1128 umtxq_unlock(&uq->uq_key); 1129 umtx_key_release(&uq->uq_key); 1130 1131 if (error == 0) 1132 error = umtxq_check_susp(td); 1133 } 1134 1135 if (timeout == NULL) { 1136 /* Mutex locking is restarted if it is interrupted. */ 1137 if (error == EINTR) 1138 error = ERESTART; 1139 } else { 1140 /* Timed-locking is not restarted. */ 1141 if (error == ERESTART) 1142 error = EINTR; 1143 } 1144 return (error); 1145} 1146 1147/* 1148 * Unlock a umtx object. 1149 */ 1150static int 1151do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id) 1152{ 1153 struct umtx_key key; 1154 uint32_t owner; 1155 uint32_t old; 1156 int error; 1157 int count; 1158 1159 /* 1160 * Make sure we own this mtx. 1161 */ 1162 owner = fuword32(m); 1163 if (owner == -1) 1164 return (EFAULT); 1165 1166 if ((owner & ~UMUTEX_CONTESTED) != id) 1167 return (EPERM); 1168 1169 /* This should be done in userland */ 1170 if ((owner & UMUTEX_CONTESTED) == 0) { 1171 old = casuword32(m, owner, UMUTEX_UNOWNED); 1172 if (old == -1) 1173 return (EFAULT); 1174 if (old == owner) 1175 return (0); 1176 owner = old; 1177 } 1178 1179 /* We should only ever be in here for contested locks */ 1180 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE, 1181 &key)) != 0) 1182 return (error); 1183 1184 umtxq_lock(&key); 1185 umtxq_busy(&key); 1186 count = umtxq_count(&key); 1187 umtxq_unlock(&key); 1188 1189 /* 1190 * When unlocking the umtx, it must be marked as unowned if 1191 * there is zero or one thread only waiting for it. 1192 * Otherwise, it must be marked as contested. 1193 */ 1194 old = casuword32(m, owner, 1195 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1196 umtxq_lock(&key); 1197 umtxq_signal(&key,1); 1198 umtxq_unbusy(&key); 1199 umtxq_unlock(&key); 1200 umtx_key_release(&key); 1201 if (old == -1) 1202 return (EFAULT); 1203 if (old != owner) 1204 return (EINVAL); 1205 return (0); 1206} 1207#endif 1208 1209/* 1210 * Fetch and compare value, sleep on the address if value is not changed. 1211 */ 1212static int 1213do_wait(struct thread *td, void *addr, u_long id, 1214 struct _umtx_time *timeout, int compat32, int is_private) 1215{ 1216 struct abs_timeout timo; 1217 struct umtx_q *uq; 1218 u_long tmp; 1219 uint32_t tmp32; 1220 int error = 0; 1221 1222 uq = td->td_umtxq; 1223 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 1224 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 1225 return (error); 1226 1227 if (timeout != NULL) 1228 abs_timeout_init2(&timo, timeout); 1229 1230 umtxq_lock(&uq->uq_key); 1231 umtxq_insert(uq); 1232 umtxq_unlock(&uq->uq_key); 1233 if (compat32 == 0) { 1234 error = fueword(addr, &tmp); 1235 if (error != 0) 1236 error = EFAULT; 1237 } else { 1238 error = fueword32(addr, &tmp32); 1239 if (error == 0) 1240 tmp = tmp32; 1241 else 1242 error = EFAULT; 1243 } 1244 umtxq_lock(&uq->uq_key); 1245 if (error == 0) { 1246 if (tmp == id) 1247 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 1248 NULL : &timo); 1249 if ((uq->uq_flags & UQF_UMTXQ) == 0) 1250 error = 0; 1251 else 1252 umtxq_remove(uq); 1253 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 1254 umtxq_remove(uq); 1255 } 1256 umtxq_unlock(&uq->uq_key); 1257 umtx_key_release(&uq->uq_key); 1258 if (error == ERESTART) 1259 error = EINTR; 1260 return (error); 1261} 1262 1263/* 1264 * Wake up threads sleeping on the specified address. 1265 */ 1266int 1267kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 1268{ 1269 struct umtx_key key; 1270 int ret; 1271 1272 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 1273 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 1274 return (ret); 1275 umtxq_lock(&key); 1276 ret = umtxq_signal(&key, n_wake); 1277 umtxq_unlock(&key); 1278 umtx_key_release(&key); 1279 return (0); 1280} 1281 1282/* 1283 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1284 */ 1285static int 1286do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 1287 struct _umtx_time *timeout, int mode) 1288{ 1289 struct abs_timeout timo; 1290 struct umtx_q *uq; 1291 uint32_t owner, old, id; 1292 int error, rv; 1293 1294 id = td->td_tid; 1295 uq = td->td_umtxq; 1296 error = 0; 1297 if (timeout != NULL) 1298 abs_timeout_init2(&timo, timeout); 1299 1300 /* 1301 * Care must be exercised when dealing with umtx structure. It 1302 * can fault on any access. 1303 */ 1304 for (;;) { 1305 rv = fueword32(&m->m_owner, &owner); 1306 if (rv == -1) 1307 return (EFAULT); 1308 if (mode == _UMUTEX_WAIT) { 1309 if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED) 1310 return (0); 1311 } else { 1312 /* 1313 * Try the uncontested case. This should be done in userland. 1314 */ 1315 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1316 &owner, id); 1317 /* The address was invalid. */ 1318 if (rv == -1) 1319 return (EFAULT); 1320 1321 /* The acquire succeeded. */ 1322 if (owner == UMUTEX_UNOWNED) 1323 return (0); 1324 1325 /* If no one owns it but it is contested try to acquire it. */ 1326 if (owner == UMUTEX_CONTESTED) { 1327 rv = casueword32(&m->m_owner, 1328 UMUTEX_CONTESTED, &owner, 1329 id | UMUTEX_CONTESTED); 1330 /* The address was invalid. */ 1331 if (rv == -1) 1332 return (EFAULT); 1333 1334 if (owner == UMUTEX_CONTESTED) 1335 return (0); 1336 1337 rv = umtxq_check_susp(td); 1338 if (rv != 0) 1339 return (rv); 1340 1341 /* If this failed the lock has changed, restart. */ 1342 continue; 1343 } 1344 } 1345 1346 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 1347 (owner & ~UMUTEX_CONTESTED) == id) 1348 return (EDEADLK); 1349 1350 if (mode == _UMUTEX_TRY) 1351 return (EBUSY); 1352 1353 /* 1354 * If we caught a signal, we have retried and now 1355 * exit immediately. 1356 */ 1357 if (error != 0) 1358 return (error); 1359 1360 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1361 GET_SHARE(flags), &uq->uq_key)) != 0) 1362 return (error); 1363 1364 umtxq_lock(&uq->uq_key); 1365 umtxq_busy(&uq->uq_key); 1366 umtxq_insert(uq); 1367 umtxq_unlock(&uq->uq_key); 1368 1369 /* 1370 * Set the contested bit so that a release in user space 1371 * knows to use the system call for unlock. If this fails 1372 * either some one else has acquired the lock or it has been 1373 * released. 1374 */ 1375 rv = casueword32(&m->m_owner, owner, &old, 1376 owner | UMUTEX_CONTESTED); 1377 1378 /* The address was invalid. */ 1379 if (rv == -1) { 1380 umtxq_lock(&uq->uq_key); 1381 umtxq_remove(uq); 1382 umtxq_unbusy(&uq->uq_key); 1383 umtxq_unlock(&uq->uq_key); 1384 umtx_key_release(&uq->uq_key); 1385 return (EFAULT); 1386 } 1387 1388 /* 1389 * We set the contested bit, sleep. Otherwise the lock changed 1390 * and we need to retry or we lost a race to the thread 1391 * unlocking the umtx. 1392 */ 1393 umtxq_lock(&uq->uq_key); 1394 umtxq_unbusy(&uq->uq_key); 1395 if (old == owner) 1396 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1397 NULL : &timo); 1398 umtxq_remove(uq); 1399 umtxq_unlock(&uq->uq_key); 1400 umtx_key_release(&uq->uq_key); 1401 1402 if (error == 0) 1403 error = umtxq_check_susp(td); 1404 } 1405 1406 return (0); 1407} 1408 1409/* 1410 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1411 */ 1412static int 1413do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags) 1414{ 1415 struct umtx_key key; 1416 uint32_t owner, old, id; 1417 int error; 1418 int count; 1419 1420 id = td->td_tid; 1421 /* 1422 * Make sure we own this mtx. 1423 */ 1424 error = fueword32(&m->m_owner, &owner); 1425 if (error == -1) 1426 return (EFAULT); 1427 1428 if ((owner & ~UMUTEX_CONTESTED) != id) 1429 return (EPERM); 1430 1431 if ((owner & UMUTEX_CONTESTED) == 0) { 1432 error = casueword32(&m->m_owner, owner, &old, UMUTEX_UNOWNED); 1433 if (error == -1) 1434 return (EFAULT); 1435 if (old == owner) 1436 return (0); 1437 owner = old; 1438 } 1439 1440 /* We should only ever be in here for contested locks */ 1441 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1442 &key)) != 0) 1443 return (error); 1444 1445 umtxq_lock(&key); 1446 umtxq_busy(&key); 1447 count = umtxq_count(&key); 1448 umtxq_unlock(&key); 1449 1450 /* 1451 * When unlocking the umtx, it must be marked as unowned if 1452 * there is zero or one thread only waiting for it. 1453 * Otherwise, it must be marked as contested. 1454 */ 1455 error = casueword32(&m->m_owner, owner, &old, 1456 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1457 umtxq_lock(&key); 1458 umtxq_signal(&key,1); 1459 umtxq_unbusy(&key); 1460 umtxq_unlock(&key); 1461 umtx_key_release(&key); 1462 if (error == -1) 1463 return (EFAULT); 1464 if (old != owner) 1465 return (EINVAL); 1466 return (0); 1467} 1468 1469/* 1470 * Check if the mutex is available and wake up a waiter, 1471 * only for simple mutex. 1472 */ 1473static int 1474do_wake_umutex(struct thread *td, struct umutex *m) 1475{ 1476 struct umtx_key key; 1477 uint32_t owner; 1478 uint32_t flags; 1479 int error; 1480 int count; 1481 1482 error = fueword32(&m->m_owner, &owner); 1483 if (error == -1) 1484 return (EFAULT); 1485 1486 if ((owner & ~UMUTEX_CONTESTED) != 0) 1487 return (0); 1488 1489 error = fueword32(&m->m_flags, &flags); 1490 if (error == -1) 1491 return (EFAULT); 1492 1493 /* We should only ever be in here for contested locks */ 1494 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1495 &key)) != 0) 1496 return (error); 1497 1498 umtxq_lock(&key); 1499 umtxq_busy(&key); 1500 count = umtxq_count(&key); 1501 umtxq_unlock(&key); 1502 1503 if (count <= 1) { 1504 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1505 UMUTEX_UNOWNED); 1506 if (error == -1) 1507 error = EFAULT; 1508 } 1509 1510 umtxq_lock(&key); 1511 if (error == 0 && count != 0 && (owner & ~UMUTEX_CONTESTED) == 0) 1512 umtxq_signal(&key, 1); 1513 umtxq_unbusy(&key); 1514 umtxq_unlock(&key); 1515 umtx_key_release(&key); 1516 return (error); 1517} 1518 1519/* 1520 * Check if the mutex has waiters and tries to fix contention bit. 1521 */ 1522static int 1523do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1524{ 1525 struct umtx_key key; 1526 uint32_t owner, old; 1527 int type; 1528 int error; 1529 int count; 1530 1531 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 1532 case 0: 1533 type = TYPE_NORMAL_UMUTEX; 1534 break; 1535 case UMUTEX_PRIO_INHERIT: 1536 type = TYPE_PI_UMUTEX; 1537 break; 1538 case UMUTEX_PRIO_PROTECT: 1539 type = TYPE_PP_UMUTEX; 1540 break; 1541 default: 1542 return (EINVAL); 1543 } 1544 if ((error = umtx_key_get(m, type, GET_SHARE(flags), 1545 &key)) != 0) 1546 return (error); 1547 1548 owner = 0; 1549 umtxq_lock(&key); 1550 umtxq_busy(&key); 1551 count = umtxq_count(&key); 1552 umtxq_unlock(&key); 1553 /* 1554 * Only repair contention bit if there is a waiter, this means the mutex 1555 * is still being referenced by userland code, otherwise don't update 1556 * any memory. 1557 */ 1558 if (count > 1) { 1559 error = fueword32(&m->m_owner, &owner); 1560 if (error == -1) 1561 error = EFAULT; 1562 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0) { 1563 error = casueword32(&m->m_owner, owner, &old, 1564 owner | UMUTEX_CONTESTED); 1565 if (error == -1) { 1566 error = EFAULT; 1567 break; 1568 } 1569 if (old == owner) 1570 break; 1571 owner = old; 1572 error = umtxq_check_susp(td); 1573 if (error != 0) 1574 break; 1575 } 1576 } else if (count == 1) { 1577 error = fueword32(&m->m_owner, &owner); 1578 if (error == -1) 1579 error = EFAULT; 1580 while (error == 0 && (owner & ~UMUTEX_CONTESTED) != 0 && 1581 (owner & UMUTEX_CONTESTED) == 0) { 1582 error = casueword32(&m->m_owner, owner, &old, 1583 owner | UMUTEX_CONTESTED); 1584 if (error == -1) { 1585 error = EFAULT; 1586 break; 1587 } 1588 if (old == owner) 1589 break; 1590 owner = old; 1591 error = umtxq_check_susp(td); 1592 if (error != 0) 1593 break; 1594 } 1595 } 1596 umtxq_lock(&key); 1597 if (error == EFAULT) { 1598 umtxq_signal(&key, INT_MAX); 1599 } else if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0) 1600 umtxq_signal(&key, 1); 1601 umtxq_unbusy(&key); 1602 umtxq_unlock(&key); 1603 umtx_key_release(&key); 1604 return (error); 1605} 1606 1607static inline struct umtx_pi * 1608umtx_pi_alloc(int flags) 1609{ 1610 struct umtx_pi *pi; 1611 1612 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1613 TAILQ_INIT(&pi->pi_blocked); 1614 atomic_add_int(&umtx_pi_allocated, 1); 1615 return (pi); 1616} 1617 1618static inline void 1619umtx_pi_free(struct umtx_pi *pi) 1620{ 1621 uma_zfree(umtx_pi_zone, pi); 1622 atomic_add_int(&umtx_pi_allocated, -1); 1623} 1624 1625/* 1626 * Adjust the thread's position on a pi_state after its priority has been 1627 * changed. 1628 */ 1629static int 1630umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1631{ 1632 struct umtx_q *uq, *uq1, *uq2; 1633 struct thread *td1; 1634 1635 mtx_assert(&umtx_lock, MA_OWNED); 1636 if (pi == NULL) 1637 return (0); 1638 1639 uq = td->td_umtxq; 1640 1641 /* 1642 * Check if the thread needs to be moved on the blocked chain. 1643 * It needs to be moved if either its priority is lower than 1644 * the previous thread or higher than the next thread. 1645 */ 1646 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1647 uq2 = TAILQ_NEXT(uq, uq_lockq); 1648 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1649 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1650 /* 1651 * Remove thread from blocked chain and determine where 1652 * it should be moved to. 1653 */ 1654 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1655 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1656 td1 = uq1->uq_thread; 1657 MPASS(td1->td_proc->p_magic == P_MAGIC); 1658 if (UPRI(td1) > UPRI(td)) 1659 break; 1660 } 1661 1662 if (uq1 == NULL) 1663 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1664 else 1665 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1666 } 1667 return (1); 1668} 1669 1670static struct umtx_pi * 1671umtx_pi_next(struct umtx_pi *pi) 1672{ 1673 struct umtx_q *uq_owner; 1674 1675 if (pi->pi_owner == NULL) 1676 return (NULL); 1677 uq_owner = pi->pi_owner->td_umtxq; 1678 if (uq_owner == NULL) 1679 return (NULL); 1680 return (uq_owner->uq_pi_blocked); 1681} 1682 1683/* 1684 * Floyd's Cycle-Finding Algorithm. 1685 */ 1686static bool 1687umtx_pi_check_loop(struct umtx_pi *pi) 1688{ 1689 struct umtx_pi *pi1; /* fast iterator */ 1690 1691 mtx_assert(&umtx_lock, MA_OWNED); 1692 if (pi == NULL) 1693 return (false); 1694 pi1 = pi; 1695 for (;;) { 1696 pi = umtx_pi_next(pi); 1697 if (pi == NULL) 1698 break; 1699 pi1 = umtx_pi_next(pi1); 1700 if (pi1 == NULL) 1701 break; 1702 pi1 = umtx_pi_next(pi1); 1703 if (pi1 == NULL) 1704 break; 1705 if (pi == pi1) 1706 return (true); 1707 } 1708 return (false); 1709} 1710 1711/* 1712 * Propagate priority when a thread is blocked on POSIX 1713 * PI mutex. 1714 */ 1715static void 1716umtx_propagate_priority(struct thread *td) 1717{ 1718 struct umtx_q *uq; 1719 struct umtx_pi *pi; 1720 int pri; 1721 1722 mtx_assert(&umtx_lock, MA_OWNED); 1723 pri = UPRI(td); 1724 uq = td->td_umtxq; 1725 pi = uq->uq_pi_blocked; 1726 if (pi == NULL) 1727 return; 1728 if (umtx_pi_check_loop(pi)) 1729 return; 1730 1731 for (;;) { 1732 td = pi->pi_owner; 1733 if (td == NULL || td == curthread) 1734 return; 1735 1736 MPASS(td->td_proc != NULL); 1737 MPASS(td->td_proc->p_magic == P_MAGIC); 1738 1739 thread_lock(td); 1740 if (td->td_lend_user_pri > pri) 1741 sched_lend_user_prio(td, pri); 1742 else { 1743 thread_unlock(td); 1744 break; 1745 } 1746 thread_unlock(td); 1747 1748 /* 1749 * Pick up the lock that td is blocked on. 1750 */ 1751 uq = td->td_umtxq; 1752 pi = uq->uq_pi_blocked; 1753 if (pi == NULL) 1754 break; 1755 /* Resort td on the list if needed. */ 1756 umtx_pi_adjust_thread(pi, td); 1757 } 1758} 1759 1760/* 1761 * Unpropagate priority for a PI mutex when a thread blocked on 1762 * it is interrupted by signal or resumed by others. 1763 */ 1764static void 1765umtx_repropagate_priority(struct umtx_pi *pi) 1766{ 1767 struct umtx_q *uq, *uq_owner; 1768 struct umtx_pi *pi2; 1769 int pri; 1770 1771 mtx_assert(&umtx_lock, MA_OWNED); 1772 1773 if (umtx_pi_check_loop(pi)) 1774 return; 1775 while (pi != NULL && pi->pi_owner != NULL) { 1776 pri = PRI_MAX; 1777 uq_owner = pi->pi_owner->td_umtxq; 1778 1779 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1780 uq = TAILQ_FIRST(&pi2->pi_blocked); 1781 if (uq != NULL) { 1782 if (pri > UPRI(uq->uq_thread)) 1783 pri = UPRI(uq->uq_thread); 1784 } 1785 } 1786 1787 if (pri > uq_owner->uq_inherited_pri) 1788 pri = uq_owner->uq_inherited_pri; 1789 thread_lock(pi->pi_owner); 1790 sched_lend_user_prio(pi->pi_owner, pri); 1791 thread_unlock(pi->pi_owner); 1792 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1793 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1794 } 1795} 1796 1797/* 1798 * Insert a PI mutex into owned list. 1799 */ 1800static void 1801umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1802{ 1803 struct umtx_q *uq_owner; 1804 1805 uq_owner = owner->td_umtxq; 1806 mtx_assert(&umtx_lock, MA_OWNED); 1807 if (pi->pi_owner != NULL) 1808 panic("pi_ower != NULL"); 1809 pi->pi_owner = owner; 1810 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1811} 1812 1813/* 1814 * Claim ownership of a PI mutex. 1815 */ 1816static int 1817umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1818{ 1819 struct umtx_q *uq, *uq_owner; 1820 1821 uq_owner = owner->td_umtxq; 1822 mtx_lock_spin(&umtx_lock); 1823 if (pi->pi_owner == owner) { 1824 mtx_unlock_spin(&umtx_lock); 1825 return (0); 1826 } 1827 1828 if (pi->pi_owner != NULL) { 1829 /* 1830 * userland may have already messed the mutex, sigh. 1831 */ 1832 mtx_unlock_spin(&umtx_lock); 1833 return (EPERM); 1834 } 1835 umtx_pi_setowner(pi, owner); 1836 uq = TAILQ_FIRST(&pi->pi_blocked); 1837 if (uq != NULL) { 1838 int pri; 1839 1840 pri = UPRI(uq->uq_thread); 1841 thread_lock(owner); 1842 if (pri < UPRI(owner)) 1843 sched_lend_user_prio(owner, pri); 1844 thread_unlock(owner); 1845 } 1846 mtx_unlock_spin(&umtx_lock); 1847 return (0); 1848} 1849 1850/* 1851 * Adjust a thread's order position in its blocked PI mutex, 1852 * this may result new priority propagating process. 1853 */ 1854void 1855umtx_pi_adjust(struct thread *td, u_char oldpri) 1856{ 1857 struct umtx_q *uq; 1858 struct umtx_pi *pi; 1859 1860 uq = td->td_umtxq; 1861 mtx_lock_spin(&umtx_lock); 1862 /* 1863 * Pick up the lock that td is blocked on. 1864 */ 1865 pi = uq->uq_pi_blocked; 1866 if (pi != NULL) { 1867 umtx_pi_adjust_thread(pi, td); 1868 umtx_repropagate_priority(pi); 1869 } 1870 mtx_unlock_spin(&umtx_lock); 1871} 1872 1873/* 1874 * Sleep on a PI mutex. 1875 */ 1876static int 1877umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, 1878 uint32_t owner, const char *wmesg, struct abs_timeout *timo) 1879{ 1880 struct umtxq_chain *uc; 1881 struct thread *td, *td1; 1882 struct umtx_q *uq1; 1883 int pri; 1884 int error = 0; 1885 1886 td = uq->uq_thread; 1887 KASSERT(td == curthread, ("inconsistent uq_thread")); 1888 uc = umtxq_getchain(&uq->uq_key); 1889 UMTXQ_LOCKED_ASSERT(uc); 1890 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 1891 umtxq_insert(uq); 1892 mtx_lock_spin(&umtx_lock); 1893 if (pi->pi_owner == NULL) { 1894 mtx_unlock_spin(&umtx_lock); 1895 /* XXX Only look up thread in current process. */ 1896 td1 = tdfind(owner, curproc->p_pid); 1897 mtx_lock_spin(&umtx_lock); 1898 if (td1 != NULL) { 1899 if (pi->pi_owner == NULL) 1900 umtx_pi_setowner(pi, td1); 1901 PROC_UNLOCK(td1->td_proc); 1902 } 1903 } 1904 1905 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1906 pri = UPRI(uq1->uq_thread); 1907 if (pri > UPRI(td)) 1908 break; 1909 } 1910 1911 if (uq1 != NULL) 1912 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1913 else 1914 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1915 1916 uq->uq_pi_blocked = pi; 1917 thread_lock(td); 1918 td->td_flags |= TDF_UPIBLOCKED; 1919 thread_unlock(td); 1920 umtx_propagate_priority(td); 1921 mtx_unlock_spin(&umtx_lock); 1922 umtxq_unbusy(&uq->uq_key); 1923 1924 error = umtxq_sleep(uq, wmesg, timo); 1925 umtxq_remove(uq); 1926 1927 mtx_lock_spin(&umtx_lock); 1928 uq->uq_pi_blocked = NULL; 1929 thread_lock(td); 1930 td->td_flags &= ~TDF_UPIBLOCKED; 1931 thread_unlock(td); 1932 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1933 umtx_repropagate_priority(pi); 1934 mtx_unlock_spin(&umtx_lock); 1935 umtxq_unlock(&uq->uq_key); 1936 1937 return (error); 1938} 1939 1940/* 1941 * Add reference count for a PI mutex. 1942 */ 1943static void 1944umtx_pi_ref(struct umtx_pi *pi) 1945{ 1946 struct umtxq_chain *uc; 1947 1948 uc = umtxq_getchain(&pi->pi_key); 1949 UMTXQ_LOCKED_ASSERT(uc); 1950 pi->pi_refcount++; 1951} 1952 1953/* 1954 * Decrease reference count for a PI mutex, if the counter 1955 * is decreased to zero, its memory space is freed. 1956 */ 1957static void 1958umtx_pi_unref(struct umtx_pi *pi) 1959{ 1960 struct umtxq_chain *uc; 1961 1962 uc = umtxq_getchain(&pi->pi_key); 1963 UMTXQ_LOCKED_ASSERT(uc); 1964 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1965 if (--pi->pi_refcount == 0) { 1966 mtx_lock_spin(&umtx_lock); 1967 if (pi->pi_owner != NULL) { 1968 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, 1969 pi, pi_link); 1970 pi->pi_owner = NULL; 1971 } 1972 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1973 ("blocked queue not empty")); 1974 mtx_unlock_spin(&umtx_lock); 1975 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1976 umtx_pi_free(pi); 1977 } 1978} 1979 1980/* 1981 * Find a PI mutex in hash table. 1982 */ 1983static struct umtx_pi * 1984umtx_pi_lookup(struct umtx_key *key) 1985{ 1986 struct umtxq_chain *uc; 1987 struct umtx_pi *pi; 1988 1989 uc = umtxq_getchain(key); 1990 UMTXQ_LOCKED_ASSERT(uc); 1991 1992 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1993 if (umtx_key_match(&pi->pi_key, key)) { 1994 return (pi); 1995 } 1996 } 1997 return (NULL); 1998} 1999 2000/* 2001 * Insert a PI mutex into hash table. 2002 */ 2003static inline void 2004umtx_pi_insert(struct umtx_pi *pi) 2005{ 2006 struct umtxq_chain *uc; 2007 2008 uc = umtxq_getchain(&pi->pi_key); 2009 UMTXQ_LOCKED_ASSERT(uc); 2010 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 2011} 2012 2013/* 2014 * Lock a PI mutex. 2015 */ 2016static int 2017do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 2018 struct _umtx_time *timeout, int try) 2019{ 2020 struct abs_timeout timo; 2021 struct umtx_q *uq; 2022 struct umtx_pi *pi, *new_pi; 2023 uint32_t id, owner, old; 2024 int error, rv; 2025 2026 id = td->td_tid; 2027 uq = td->td_umtxq; 2028 2029 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 2030 &uq->uq_key)) != 0) 2031 return (error); 2032 2033 if (timeout != NULL) 2034 abs_timeout_init2(&timo, timeout); 2035 2036 umtxq_lock(&uq->uq_key); 2037 pi = umtx_pi_lookup(&uq->uq_key); 2038 if (pi == NULL) { 2039 new_pi = umtx_pi_alloc(M_NOWAIT); 2040 if (new_pi == NULL) { 2041 umtxq_unlock(&uq->uq_key); 2042 new_pi = umtx_pi_alloc(M_WAITOK); 2043 umtxq_lock(&uq->uq_key); 2044 pi = umtx_pi_lookup(&uq->uq_key); 2045 if (pi != NULL) { 2046 umtx_pi_free(new_pi); 2047 new_pi = NULL; 2048 } 2049 } 2050 if (new_pi != NULL) { 2051 new_pi->pi_key = uq->uq_key; 2052 umtx_pi_insert(new_pi); 2053 pi = new_pi; 2054 } 2055 } 2056 umtx_pi_ref(pi); 2057 umtxq_unlock(&uq->uq_key); 2058 2059 /* 2060 * Care must be exercised when dealing with umtx structure. It 2061 * can fault on any access. 2062 */ 2063 for (;;) { 2064 /* 2065 * Try the uncontested case. This should be done in userland. 2066 */ 2067 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 2068 /* The address was invalid. */ 2069 if (rv == -1) { 2070 error = EFAULT; 2071 break; 2072 } 2073 2074 /* The acquire succeeded. */ 2075 if (owner == UMUTEX_UNOWNED) { 2076 error = 0; 2077 break; 2078 } 2079 2080 /* If no one owns it but it is contested try to acquire it. */ 2081 if (owner == UMUTEX_CONTESTED) { 2082 rv = casueword32(&m->m_owner, 2083 UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED); 2084 /* The address was invalid. */ 2085 if (rv == -1) { 2086 error = EFAULT; 2087 break; 2088 } 2089 2090 if (owner == UMUTEX_CONTESTED) { 2091 umtxq_lock(&uq->uq_key); 2092 umtxq_busy(&uq->uq_key); 2093 error = umtx_pi_claim(pi, td); 2094 umtxq_unbusy(&uq->uq_key); 2095 umtxq_unlock(&uq->uq_key); 2096 break; 2097 } 2098 2099 error = umtxq_check_susp(td); 2100 if (error != 0) 2101 break; 2102 2103 /* If this failed the lock has changed, restart. */ 2104 continue; 2105 } 2106 2107 if ((owner & ~UMUTEX_CONTESTED) == id) { 2108 error = EDEADLK; 2109 break; 2110 } 2111 2112 if (try != 0) { 2113 error = EBUSY; 2114 break; 2115 } 2116 2117 /* 2118 * If we caught a signal, we have retried and now 2119 * exit immediately. 2120 */ 2121 if (error != 0) 2122 break; 2123 2124 umtxq_lock(&uq->uq_key); 2125 umtxq_busy(&uq->uq_key); 2126 umtxq_unlock(&uq->uq_key); 2127 2128 /* 2129 * Set the contested bit so that a release in user space 2130 * knows to use the system call for unlock. If this fails 2131 * either some one else has acquired the lock or it has been 2132 * released. 2133 */ 2134 rv = casueword32(&m->m_owner, owner, &old, 2135 owner | UMUTEX_CONTESTED); 2136 2137 /* The address was invalid. */ 2138 if (rv == -1) { 2139 umtxq_unbusy_unlocked(&uq->uq_key); 2140 error = EFAULT; 2141 break; 2142 } 2143 2144 umtxq_lock(&uq->uq_key); 2145 /* 2146 * We set the contested bit, sleep. Otherwise the lock changed 2147 * and we need to retry or we lost a race to the thread 2148 * unlocking the umtx. 2149 */ 2150 if (old == owner) { 2151 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 2152 "umtxpi", timeout == NULL ? NULL : &timo); 2153 if (error != 0) 2154 continue; 2155 } else { 2156 umtxq_unbusy(&uq->uq_key); 2157 umtxq_unlock(&uq->uq_key); 2158 } 2159 2160 error = umtxq_check_susp(td); 2161 if (error != 0) 2162 break; 2163 } 2164 2165 umtxq_lock(&uq->uq_key); 2166 umtx_pi_unref(pi); 2167 umtxq_unlock(&uq->uq_key); 2168 2169 umtx_key_release(&uq->uq_key); 2170 return (error); 2171} 2172 2173/* 2174 * Unlock a PI mutex. 2175 */ 2176static int 2177do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags) 2178{ 2179 struct umtx_key key; 2180 struct umtx_q *uq_first, *uq_first2, *uq_me; 2181 struct umtx_pi *pi, *pi2; 2182 uint32_t owner, old, id; 2183 int error; 2184 int count; 2185 int pri; 2186 2187 id = td->td_tid; 2188 /* 2189 * Make sure we own this mtx. 2190 */ 2191 error = fueword32(&m->m_owner, &owner); 2192 if (error == -1) 2193 return (EFAULT); 2194 2195 if ((owner & ~UMUTEX_CONTESTED) != id) 2196 return (EPERM); 2197 2198 /* This should be done in userland */ 2199 if ((owner & UMUTEX_CONTESTED) == 0) { 2200 error = casueword32(&m->m_owner, owner, &old, UMUTEX_UNOWNED); 2201 if (error == -1) 2202 return (EFAULT); 2203 if (old == owner) 2204 return (0); 2205 owner = old; 2206 } 2207 2208 /* We should only ever be in here for contested locks */ 2209 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 2210 &key)) != 0) 2211 return (error); 2212 2213 umtxq_lock(&key); 2214 umtxq_busy(&key); 2215 count = umtxq_count_pi(&key, &uq_first); 2216 if (uq_first != NULL) { 2217 mtx_lock_spin(&umtx_lock); 2218 pi = uq_first->uq_pi_blocked; 2219 KASSERT(pi != NULL, ("pi == NULL?")); 2220 if (pi->pi_owner != curthread) { 2221 mtx_unlock_spin(&umtx_lock); 2222 umtxq_unbusy(&key); 2223 umtxq_unlock(&key); 2224 umtx_key_release(&key); 2225 /* userland messed the mutex */ 2226 return (EPERM); 2227 } 2228 uq_me = curthread->td_umtxq; 2229 pi->pi_owner = NULL; 2230 TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link); 2231 /* get highest priority thread which is still sleeping. */ 2232 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2233 while (uq_first != NULL && 2234 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2235 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2236 } 2237 pri = PRI_MAX; 2238 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2239 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2240 if (uq_first2 != NULL) { 2241 if (pri > UPRI(uq_first2->uq_thread)) 2242 pri = UPRI(uq_first2->uq_thread); 2243 } 2244 } 2245 thread_lock(curthread); 2246 sched_lend_user_prio(curthread, pri); 2247 thread_unlock(curthread); 2248 mtx_unlock_spin(&umtx_lock); 2249 if (uq_first) 2250 umtxq_signal_thread(uq_first); 2251 } 2252 umtxq_unlock(&key); 2253 2254 /* 2255 * When unlocking the umtx, it must be marked as unowned if 2256 * there is zero or one thread only waiting for it. 2257 * Otherwise, it must be marked as contested. 2258 */ 2259 error = casueword32(&m->m_owner, owner, &old, 2260 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 2261 2262 umtxq_unbusy_unlocked(&key); 2263 umtx_key_release(&key); 2264 if (error == -1) 2265 return (EFAULT); 2266 if (old != owner) 2267 return (EINVAL); 2268 return (0); 2269} 2270 2271/* 2272 * Lock a PP mutex. 2273 */ 2274static int 2275do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2276 struct _umtx_time *timeout, int try) 2277{ 2278 struct abs_timeout timo; 2279 struct umtx_q *uq, *uq2; 2280 struct umtx_pi *pi; 2281 uint32_t ceiling; 2282 uint32_t owner, id; 2283 int error, pri, old_inherited_pri, su, rv; 2284 2285 id = td->td_tid; 2286 uq = td->td_umtxq; 2287 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 2288 &uq->uq_key)) != 0) 2289 return (error); 2290 2291 if (timeout != NULL) 2292 abs_timeout_init2(&timo, timeout); 2293 2294 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2295 for (;;) { 2296 old_inherited_pri = uq->uq_inherited_pri; 2297 umtxq_lock(&uq->uq_key); 2298 umtxq_busy(&uq->uq_key); 2299 umtxq_unlock(&uq->uq_key); 2300 2301 rv = fueword32(&m->m_ceilings[0], &ceiling); 2302 if (rv == -1) { 2303 error = EFAULT; 2304 goto out; 2305 } 2306 ceiling = RTP_PRIO_MAX - ceiling; 2307 if (ceiling > RTP_PRIO_MAX) { 2308 error = EINVAL; 2309 goto out; 2310 } 2311 2312 mtx_lock_spin(&umtx_lock); 2313 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 2314 mtx_unlock_spin(&umtx_lock); 2315 error = EINVAL; 2316 goto out; 2317 } 2318 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 2319 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 2320 thread_lock(td); 2321 if (uq->uq_inherited_pri < UPRI(td)) 2322 sched_lend_user_prio(td, uq->uq_inherited_pri); 2323 thread_unlock(td); 2324 } 2325 mtx_unlock_spin(&umtx_lock); 2326 2327 rv = casueword32(&m->m_owner, 2328 UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED); 2329 /* The address was invalid. */ 2330 if (rv == -1) { 2331 error = EFAULT; 2332 break; 2333 } 2334 2335 if (owner == UMUTEX_CONTESTED) { 2336 error = 0; 2337 break; 2338 } 2339 2340 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 2341 (owner & ~UMUTEX_CONTESTED) == id) { 2342 error = EDEADLK; 2343 break; 2344 } 2345 2346 if (try != 0) { 2347 error = EBUSY; 2348 break; 2349 } 2350 2351 /* 2352 * If we caught a signal, we have retried and now 2353 * exit immediately. 2354 */ 2355 if (error != 0) 2356 break; 2357 2358 umtxq_lock(&uq->uq_key); 2359 umtxq_insert(uq); 2360 umtxq_unbusy(&uq->uq_key); 2361 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2362 NULL : &timo); 2363 umtxq_remove(uq); 2364 umtxq_unlock(&uq->uq_key); 2365 2366 mtx_lock_spin(&umtx_lock); 2367 uq->uq_inherited_pri = old_inherited_pri; 2368 pri = PRI_MAX; 2369 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2370 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2371 if (uq2 != NULL) { 2372 if (pri > UPRI(uq2->uq_thread)) 2373 pri = UPRI(uq2->uq_thread); 2374 } 2375 } 2376 if (pri > uq->uq_inherited_pri) 2377 pri = uq->uq_inherited_pri; 2378 thread_lock(td); 2379 sched_lend_user_prio(td, pri); 2380 thread_unlock(td); 2381 mtx_unlock_spin(&umtx_lock); 2382 } 2383 2384 if (error != 0) { 2385 mtx_lock_spin(&umtx_lock); 2386 uq->uq_inherited_pri = old_inherited_pri; 2387 pri = PRI_MAX; 2388 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2389 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2390 if (uq2 != NULL) { 2391 if (pri > UPRI(uq2->uq_thread)) 2392 pri = UPRI(uq2->uq_thread); 2393 } 2394 } 2395 if (pri > uq->uq_inherited_pri) 2396 pri = uq->uq_inherited_pri; 2397 thread_lock(td); 2398 sched_lend_user_prio(td, pri); 2399 thread_unlock(td); 2400 mtx_unlock_spin(&umtx_lock); 2401 } 2402 2403out: 2404 umtxq_unbusy_unlocked(&uq->uq_key); 2405 umtx_key_release(&uq->uq_key); 2406 return (error); 2407} 2408 2409/* 2410 * Unlock a PP mutex. 2411 */ 2412static int 2413do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags) 2414{ 2415 struct umtx_key key; 2416 struct umtx_q *uq, *uq2; 2417 struct umtx_pi *pi; 2418 uint32_t owner, id; 2419 uint32_t rceiling; 2420 int error, pri, new_inherited_pri, su; 2421 2422 id = td->td_tid; 2423 uq = td->td_umtxq; 2424 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2425 2426 /* 2427 * Make sure we own this mtx. 2428 */ 2429 error = fueword32(&m->m_owner, &owner); 2430 if (error == -1) 2431 return (EFAULT); 2432 2433 if ((owner & ~UMUTEX_CONTESTED) != id) 2434 return (EPERM); 2435 2436 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2437 if (error != 0) 2438 return (error); 2439 2440 if (rceiling == -1) 2441 new_inherited_pri = PRI_MAX; 2442 else { 2443 rceiling = RTP_PRIO_MAX - rceiling; 2444 if (rceiling > RTP_PRIO_MAX) 2445 return (EINVAL); 2446 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2447 } 2448 2449 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 2450 &key)) != 0) 2451 return (error); 2452 umtxq_lock(&key); 2453 umtxq_busy(&key); 2454 umtxq_unlock(&key); 2455 /* 2456 * For priority protected mutex, always set unlocked state 2457 * to UMUTEX_CONTESTED, so that userland always enters kernel 2458 * to lock the mutex, it is necessary because thread priority 2459 * has to be adjusted for such mutex. 2460 */ 2461 error = suword32(&m->m_owner, UMUTEX_CONTESTED); 2462 2463 umtxq_lock(&key); 2464 if (error == 0) 2465 umtxq_signal(&key, 1); 2466 umtxq_unbusy(&key); 2467 umtxq_unlock(&key); 2468 2469 if (error == -1) 2470 error = EFAULT; 2471 else { 2472 mtx_lock_spin(&umtx_lock); 2473 if (su != 0) 2474 uq->uq_inherited_pri = new_inherited_pri; 2475 pri = PRI_MAX; 2476 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2477 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2478 if (uq2 != NULL) { 2479 if (pri > UPRI(uq2->uq_thread)) 2480 pri = UPRI(uq2->uq_thread); 2481 } 2482 } 2483 if (pri > uq->uq_inherited_pri) 2484 pri = uq->uq_inherited_pri; 2485 thread_lock(td); 2486 sched_lend_user_prio(td, pri); 2487 thread_unlock(td); 2488 mtx_unlock_spin(&umtx_lock); 2489 } 2490 umtx_key_release(&key); 2491 return (error); 2492} 2493 2494static int 2495do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2496 uint32_t *old_ceiling) 2497{ 2498 struct umtx_q *uq; 2499 uint32_t save_ceiling; 2500 uint32_t owner, id; 2501 uint32_t flags; 2502 int error, rv; 2503 2504 error = fueword32(&m->m_flags, &flags); 2505 if (error == -1) 2506 return (EFAULT); 2507 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2508 return (EINVAL); 2509 if (ceiling > RTP_PRIO_MAX) 2510 return (EINVAL); 2511 id = td->td_tid; 2512 uq = td->td_umtxq; 2513 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 2514 &uq->uq_key)) != 0) 2515 return (error); 2516 for (;;) { 2517 umtxq_lock(&uq->uq_key); 2518 umtxq_busy(&uq->uq_key); 2519 umtxq_unlock(&uq->uq_key); 2520 2521 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2522 if (rv == -1) { 2523 error = EFAULT; 2524 break; 2525 } 2526 2527 rv = casueword32(&m->m_owner, 2528 UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED); 2529 if (rv == -1) { 2530 error = EFAULT; 2531 break; 2532 } 2533 2534 if (owner == UMUTEX_CONTESTED) { 2535 suword32(&m->m_ceilings[0], ceiling); 2536 suword32(&m->m_owner, UMUTEX_CONTESTED); 2537 error = 0; 2538 break; 2539 } 2540 2541 if ((owner & ~UMUTEX_CONTESTED) == id) { 2542 suword32(&m->m_ceilings[0], ceiling); 2543 error = 0; 2544 break; 2545 } 2546 2547 /* 2548 * If we caught a signal, we have retried and now 2549 * exit immediately. 2550 */ 2551 if (error != 0) 2552 break; 2553 2554 /* 2555 * We set the contested bit, sleep. Otherwise the lock changed 2556 * and we need to retry or we lost a race to the thread 2557 * unlocking the umtx. 2558 */ 2559 umtxq_lock(&uq->uq_key); 2560 umtxq_insert(uq); 2561 umtxq_unbusy(&uq->uq_key); 2562 error = umtxq_sleep(uq, "umtxpp", NULL); 2563 umtxq_remove(uq); 2564 umtxq_unlock(&uq->uq_key); 2565 } 2566 umtxq_lock(&uq->uq_key); 2567 if (error == 0) 2568 umtxq_signal(&uq->uq_key, INT_MAX); 2569 umtxq_unbusy(&uq->uq_key); 2570 umtxq_unlock(&uq->uq_key); 2571 umtx_key_release(&uq->uq_key); 2572 if (error == 0 && old_ceiling != NULL) 2573 suword32(old_ceiling, save_ceiling); 2574 return (error); 2575} 2576 2577/* 2578 * Lock a userland POSIX mutex. 2579 */ 2580static int 2581do_lock_umutex(struct thread *td, struct umutex *m, 2582 struct _umtx_time *timeout, int mode) 2583{ 2584 uint32_t flags; 2585 int error; 2586 2587 error = fueword32(&m->m_flags, &flags); 2588 if (error == -1) 2589 return (EFAULT); 2590 2591 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2592 case 0: 2593 error = do_lock_normal(td, m, flags, timeout, mode); 2594 break; 2595 case UMUTEX_PRIO_INHERIT: 2596 error = do_lock_pi(td, m, flags, timeout, mode); 2597 break; 2598 case UMUTEX_PRIO_PROTECT: 2599 error = do_lock_pp(td, m, flags, timeout, mode); 2600 break; 2601 default: 2602 return (EINVAL); 2603 } 2604 if (timeout == NULL) { 2605 if (error == EINTR && mode != _UMUTEX_WAIT) 2606 error = ERESTART; 2607 } else { 2608 /* Timed-locking is not restarted. */ 2609 if (error == ERESTART) 2610 error = EINTR; 2611 } 2612 return (error); 2613} 2614 2615/* 2616 * Unlock a userland POSIX mutex. 2617 */ 2618static int 2619do_unlock_umutex(struct thread *td, struct umutex *m) 2620{ 2621 uint32_t flags; 2622 int error; 2623 2624 error = fueword32(&m->m_flags, &flags); 2625 if (error == -1) 2626 return (EFAULT); 2627 2628 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2629 case 0: 2630 return (do_unlock_normal(td, m, flags)); 2631 case UMUTEX_PRIO_INHERIT: 2632 return (do_unlock_pi(td, m, flags)); 2633 case UMUTEX_PRIO_PROTECT: 2634 return (do_unlock_pp(td, m, flags)); 2635 } 2636 2637 return (EINVAL); 2638} 2639 2640static int 2641do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2642 struct timespec *timeout, u_long wflags) 2643{ 2644 struct abs_timeout timo; 2645 struct umtx_q *uq; 2646 uint32_t flags, clockid, hasw; 2647 int error; 2648 2649 uq = td->td_umtxq; 2650 error = fueword32(&cv->c_flags, &flags); 2651 if (error == -1) 2652 return (EFAULT); 2653 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2654 if (error != 0) 2655 return (error); 2656 2657 if ((wflags & CVWAIT_CLOCKID) != 0) { 2658 error = fueword32(&cv->c_clockid, &clockid); 2659 if (error == -1) { 2660 umtx_key_release(&uq->uq_key); 2661 return (EFAULT); 2662 } 2663 if (clockid < CLOCK_REALTIME || 2664 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2665 /* hmm, only HW clock id will work. */ 2666 umtx_key_release(&uq->uq_key); 2667 return (EINVAL); 2668 } 2669 } else { 2670 clockid = CLOCK_REALTIME; 2671 } 2672 2673 umtxq_lock(&uq->uq_key); 2674 umtxq_busy(&uq->uq_key); 2675 umtxq_insert(uq); 2676 umtxq_unlock(&uq->uq_key); 2677 2678 /* 2679 * Set c_has_waiters to 1 before releasing user mutex, also 2680 * don't modify cache line when unnecessary. 2681 */ 2682 error = fueword32(&cv->c_has_waiters, &hasw); 2683 if (error == 0 && hasw == 0) 2684 suword32(&cv->c_has_waiters, 1); 2685 2686 umtxq_unbusy_unlocked(&uq->uq_key); 2687 2688 error = do_unlock_umutex(td, m); 2689 2690 if (timeout != NULL) 2691 abs_timeout_init(&timo, clockid, ((wflags & CVWAIT_ABSTIME) != 0), 2692 timeout); 2693 2694 umtxq_lock(&uq->uq_key); 2695 if (error == 0) { 2696 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2697 NULL : &timo); 2698 } 2699 2700 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2701 error = 0; 2702 else { 2703 /* 2704 * This must be timeout,interrupted by signal or 2705 * surprious wakeup, clear c_has_waiter flag when 2706 * necessary. 2707 */ 2708 umtxq_busy(&uq->uq_key); 2709 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2710 int oldlen = uq->uq_cur_queue->length; 2711 umtxq_remove(uq); 2712 if (oldlen == 1) { 2713 umtxq_unlock(&uq->uq_key); 2714 suword32(&cv->c_has_waiters, 0); 2715 umtxq_lock(&uq->uq_key); 2716 } 2717 } 2718 umtxq_unbusy(&uq->uq_key); 2719 if (error == ERESTART) 2720 error = EINTR; 2721 } 2722 2723 umtxq_unlock(&uq->uq_key); 2724 umtx_key_release(&uq->uq_key); 2725 return (error); 2726} 2727 2728/* 2729 * Signal a userland condition variable. 2730 */ 2731static int 2732do_cv_signal(struct thread *td, struct ucond *cv) 2733{ 2734 struct umtx_key key; 2735 int error, cnt, nwake; 2736 uint32_t flags; 2737 2738 error = fueword32(&cv->c_flags, &flags); 2739 if (error == -1) 2740 return (EFAULT); 2741 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2742 return (error); 2743 umtxq_lock(&key); 2744 umtxq_busy(&key); 2745 cnt = umtxq_count(&key); 2746 nwake = umtxq_signal(&key, 1); 2747 if (cnt <= nwake) { 2748 umtxq_unlock(&key); 2749 error = suword32(&cv->c_has_waiters, 0); 2750 if (error == -1) 2751 error = EFAULT; 2752 umtxq_lock(&key); 2753 } 2754 umtxq_unbusy(&key); 2755 umtxq_unlock(&key); 2756 umtx_key_release(&key); 2757 return (error); 2758} 2759 2760static int 2761do_cv_broadcast(struct thread *td, struct ucond *cv) 2762{ 2763 struct umtx_key key; 2764 int error; 2765 uint32_t flags; 2766 2767 error = fueword32(&cv->c_flags, &flags); 2768 if (error == -1) 2769 return (EFAULT); 2770 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2771 return (error); 2772 2773 umtxq_lock(&key); 2774 umtxq_busy(&key); 2775 umtxq_signal(&key, INT_MAX); 2776 umtxq_unlock(&key); 2777 2778 error = suword32(&cv->c_has_waiters, 0); 2779 if (error == -1) 2780 error = EFAULT; 2781 2782 umtxq_unbusy_unlocked(&key); 2783 2784 umtx_key_release(&key); 2785 return (error); 2786} 2787 2788static int 2789do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, struct _umtx_time *timeout) 2790{ 2791 struct abs_timeout timo; 2792 struct umtx_q *uq; 2793 uint32_t flags, wrflags; 2794 int32_t state, oldstate; 2795 int32_t blocked_readers; 2796 int error, rv; 2797 2798 uq = td->td_umtxq; 2799 error = fueword32(&rwlock->rw_flags, &flags); 2800 if (error == -1) 2801 return (EFAULT); 2802 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2803 if (error != 0) 2804 return (error); 2805 2806 if (timeout != NULL) 2807 abs_timeout_init2(&timo, timeout); 2808 2809 wrflags = URWLOCK_WRITE_OWNER; 2810 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 2811 wrflags |= URWLOCK_WRITE_WAITERS; 2812 2813 for (;;) { 2814 rv = fueword32(&rwlock->rw_state, &state); 2815 if (rv == -1) { 2816 umtx_key_release(&uq->uq_key); 2817 return (EFAULT); 2818 } 2819 2820 /* try to lock it */ 2821 while (!(state & wrflags)) { 2822 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) { 2823 umtx_key_release(&uq->uq_key); 2824 return (EAGAIN); 2825 } 2826 rv = casueword32(&rwlock->rw_state, state, 2827 &oldstate, state + 1); 2828 if (rv == -1) { 2829 umtx_key_release(&uq->uq_key); 2830 return (EFAULT); 2831 } 2832 if (oldstate == state) { 2833 umtx_key_release(&uq->uq_key); 2834 return (0); 2835 } 2836 error = umtxq_check_susp(td); 2837 if (error != 0) 2838 break; 2839 state = oldstate; 2840 } 2841 2842 if (error) 2843 break; 2844 2845 /* grab monitor lock */ 2846 umtxq_lock(&uq->uq_key); 2847 umtxq_busy(&uq->uq_key); 2848 umtxq_unlock(&uq->uq_key); 2849 2850 /* 2851 * re-read the state, in case it changed between the try-lock above 2852 * and the check below 2853 */ 2854 rv = fueword32(&rwlock->rw_state, &state); 2855 if (rv == -1) 2856 error = EFAULT; 2857 2858 /* set read contention bit */ 2859 while (error == 0 && (state & wrflags) && 2860 !(state & URWLOCK_READ_WAITERS)) { 2861 rv = casueword32(&rwlock->rw_state, state, 2862 &oldstate, state | URWLOCK_READ_WAITERS); 2863 if (rv == -1) { 2864 error = EFAULT; 2865 break; 2866 } 2867 if (oldstate == state) 2868 goto sleep; 2869 state = oldstate; 2870 error = umtxq_check_susp(td); 2871 if (error != 0) 2872 break; 2873 } 2874 if (error != 0) { 2875 umtxq_unbusy_unlocked(&uq->uq_key); 2876 break; 2877 } 2878 2879 /* state is changed while setting flags, restart */ 2880 if (!(state & wrflags)) { 2881 umtxq_unbusy_unlocked(&uq->uq_key); 2882 error = umtxq_check_susp(td); 2883 if (error != 0) 2884 break; 2885 continue; 2886 } 2887 2888sleep: 2889 /* contention bit is set, before sleeping, increase read waiter count */ 2890 rv = fueword32(&rwlock->rw_blocked_readers, 2891 &blocked_readers); 2892 if (rv == -1) { 2893 umtxq_unbusy_unlocked(&uq->uq_key); 2894 error = EFAULT; 2895 break; 2896 } 2897 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 2898 2899 while (state & wrflags) { 2900 umtxq_lock(&uq->uq_key); 2901 umtxq_insert(uq); 2902 umtxq_unbusy(&uq->uq_key); 2903 2904 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 2905 NULL : &timo); 2906 2907 umtxq_busy(&uq->uq_key); 2908 umtxq_remove(uq); 2909 umtxq_unlock(&uq->uq_key); 2910 if (error) 2911 break; 2912 rv = fueword32(&rwlock->rw_state, &state); 2913 if (rv == -1) { 2914 error = EFAULT; 2915 break; 2916 } 2917 } 2918 2919 /* decrease read waiter count, and may clear read contention bit */ 2920 rv = fueword32(&rwlock->rw_blocked_readers, 2921 &blocked_readers); 2922 if (rv == -1) { 2923 umtxq_unbusy_unlocked(&uq->uq_key); 2924 error = EFAULT; 2925 break; 2926 } 2927 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 2928 if (blocked_readers == 1) { 2929 rv = fueword32(&rwlock->rw_state, &state); 2930 if (rv == -1) 2931 error = EFAULT; 2932 while (error == 0) { 2933 rv = casueword32(&rwlock->rw_state, state, 2934 &oldstate, state & ~URWLOCK_READ_WAITERS); 2935 if (rv == -1) { 2936 error = EFAULT; 2937 break; 2938 } 2939 if (oldstate == state) 2940 break; 2941 state = oldstate; 2942 error = umtxq_check_susp(td); 2943 } 2944 } 2945 2946 umtxq_unbusy_unlocked(&uq->uq_key); 2947 if (error != 0) 2948 break; 2949 } 2950 umtx_key_release(&uq->uq_key); 2951 if (error == ERESTART) 2952 error = EINTR; 2953 return (error); 2954} 2955 2956static int 2957do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 2958{ 2959 struct abs_timeout timo; 2960 struct umtx_q *uq; 2961 uint32_t flags; 2962 int32_t state, oldstate; 2963 int32_t blocked_writers; 2964 int32_t blocked_readers; 2965 int error, rv; 2966 2967 uq = td->td_umtxq; 2968 error = fueword32(&rwlock->rw_flags, &flags); 2969 if (error == -1) 2970 return (EFAULT); 2971 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2972 if (error != 0) 2973 return (error); 2974 2975 if (timeout != NULL) 2976 abs_timeout_init2(&timo, timeout); 2977 2978 blocked_readers = 0; 2979 for (;;) { 2980 rv = fueword32(&rwlock->rw_state, &state); 2981 if (rv == -1) { 2982 umtx_key_release(&uq->uq_key); 2983 return (EFAULT); 2984 } 2985 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2986 rv = casueword32(&rwlock->rw_state, state, 2987 &oldstate, state | URWLOCK_WRITE_OWNER); 2988 if (rv == -1) { 2989 umtx_key_release(&uq->uq_key); 2990 return (EFAULT); 2991 } 2992 if (oldstate == state) { 2993 umtx_key_release(&uq->uq_key); 2994 return (0); 2995 } 2996 state = oldstate; 2997 error = umtxq_check_susp(td); 2998 if (error != 0) 2999 break; 3000 } 3001 3002 if (error) { 3003 if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) && 3004 blocked_readers != 0) { 3005 umtxq_lock(&uq->uq_key); 3006 umtxq_busy(&uq->uq_key); 3007 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE); 3008 umtxq_unbusy(&uq->uq_key); 3009 umtxq_unlock(&uq->uq_key); 3010 } 3011 3012 break; 3013 } 3014 3015 /* grab monitor lock */ 3016 umtxq_lock(&uq->uq_key); 3017 umtxq_busy(&uq->uq_key); 3018 umtxq_unlock(&uq->uq_key); 3019 3020 /* 3021 * re-read the state, in case it changed between the try-lock above 3022 * and the check below 3023 */ 3024 rv = fueword32(&rwlock->rw_state, &state); 3025 if (rv == -1) 3026 error = EFAULT; 3027 3028 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 3029 URWLOCK_READER_COUNT(state) != 0) && 3030 (state & URWLOCK_WRITE_WAITERS) == 0) { 3031 rv = casueword32(&rwlock->rw_state, state, 3032 &oldstate, state | URWLOCK_WRITE_WAITERS); 3033 if (rv == -1) { 3034 error = EFAULT; 3035 break; 3036 } 3037 if (oldstate == state) 3038 goto sleep; 3039 state = oldstate; 3040 error = umtxq_check_susp(td); 3041 if (error != 0) 3042 break; 3043 } 3044 if (error != 0) { 3045 umtxq_unbusy_unlocked(&uq->uq_key); 3046 break; 3047 } 3048 3049 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 3050 umtxq_unbusy_unlocked(&uq->uq_key); 3051 error = umtxq_check_susp(td); 3052 if (error != 0) 3053 break; 3054 continue; 3055 } 3056sleep: 3057 rv = fueword32(&rwlock->rw_blocked_writers, 3058 &blocked_writers); 3059 if (rv == -1) { 3060 umtxq_unbusy_unlocked(&uq->uq_key); 3061 error = EFAULT; 3062 break; 3063 } 3064 suword32(&rwlock->rw_blocked_writers, blocked_writers+1); 3065 3066 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) { 3067 umtxq_lock(&uq->uq_key); 3068 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 3069 umtxq_unbusy(&uq->uq_key); 3070 3071 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 3072 NULL : &timo); 3073 3074 umtxq_busy(&uq->uq_key); 3075 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 3076 umtxq_unlock(&uq->uq_key); 3077 if (error) 3078 break; 3079 rv = fueword32(&rwlock->rw_state, &state); 3080 if (rv == -1) { 3081 error = EFAULT; 3082 break; 3083 } 3084 } 3085 3086 rv = fueword32(&rwlock->rw_blocked_writers, 3087 &blocked_writers); 3088 if (rv == -1) { 3089 umtxq_unbusy_unlocked(&uq->uq_key); 3090 error = EFAULT; 3091 break; 3092 } 3093 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 3094 if (blocked_writers == 1) { 3095 rv = fueword32(&rwlock->rw_state, &state); 3096 if (rv == -1) { 3097 umtxq_unbusy_unlocked(&uq->uq_key); 3098 error = EFAULT; 3099 break; 3100 } 3101 for (;;) { 3102 rv = casueword32(&rwlock->rw_state, state, 3103 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 3104 if (rv == -1) { 3105 error = EFAULT; 3106 break; 3107 } 3108 if (oldstate == state) 3109 break; 3110 state = oldstate; 3111 error = umtxq_check_susp(td); 3112 /* 3113 * We are leaving the URWLOCK_WRITE_WAITERS 3114 * behind, but this should not harm the 3115 * correctness. 3116 */ 3117 if (error != 0) 3118 break; 3119 } 3120 rv = fueword32(&rwlock->rw_blocked_readers, 3121 &blocked_readers); 3122 if (rv == -1) { 3123 umtxq_unbusy_unlocked(&uq->uq_key); 3124 error = EFAULT; 3125 break; 3126 } 3127 } else 3128 blocked_readers = 0; 3129 3130 umtxq_unbusy_unlocked(&uq->uq_key); 3131 } 3132 3133 umtx_key_release(&uq->uq_key); 3134 if (error == ERESTART) 3135 error = EINTR; 3136 return (error); 3137} 3138 3139static int 3140do_rw_unlock(struct thread *td, struct urwlock *rwlock) 3141{ 3142 struct umtx_q *uq; 3143 uint32_t flags; 3144 int32_t state, oldstate; 3145 int error, rv, q, count; 3146 3147 uq = td->td_umtxq; 3148 error = fueword32(&rwlock->rw_flags, &flags); 3149 if (error == -1) 3150 return (EFAULT); 3151 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3152 if (error != 0) 3153 return (error); 3154 3155 error = fueword32(&rwlock->rw_state, &state); 3156 if (error == -1) { 3157 error = EFAULT; 3158 goto out; 3159 } 3160 if (state & URWLOCK_WRITE_OWNER) { 3161 for (;;) { 3162 rv = casueword32(&rwlock->rw_state, state, 3163 &oldstate, state & ~URWLOCK_WRITE_OWNER); 3164 if (rv == -1) { 3165 error = EFAULT; 3166 goto out; 3167 } 3168 if (oldstate != state) { 3169 state = oldstate; 3170 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3171 error = EPERM; 3172 goto out; 3173 } 3174 error = umtxq_check_susp(td); 3175 if (error != 0) 3176 goto out; 3177 } else 3178 break; 3179 } 3180 } else if (URWLOCK_READER_COUNT(state) != 0) { 3181 for (;;) { 3182 rv = casueword32(&rwlock->rw_state, state, 3183 &oldstate, state - 1); 3184 if (rv == -1) { 3185 error = EFAULT; 3186 goto out; 3187 } 3188 if (oldstate != state) { 3189 state = oldstate; 3190 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3191 error = EPERM; 3192 goto out; 3193 } 3194 error = umtxq_check_susp(td); 3195 if (error != 0) 3196 goto out; 3197 } else 3198 break; 3199 } 3200 } else { 3201 error = EPERM; 3202 goto out; 3203 } 3204 3205 count = 0; 3206 3207 if (!(flags & URWLOCK_PREFER_READER)) { 3208 if (state & URWLOCK_WRITE_WAITERS) { 3209 count = 1; 3210 q = UMTX_EXCLUSIVE_QUEUE; 3211 } else if (state & URWLOCK_READ_WAITERS) { 3212 count = INT_MAX; 3213 q = UMTX_SHARED_QUEUE; 3214 } 3215 } else { 3216 if (state & URWLOCK_READ_WAITERS) { 3217 count = INT_MAX; 3218 q = UMTX_SHARED_QUEUE; 3219 } else if (state & URWLOCK_WRITE_WAITERS) { 3220 count = 1; 3221 q = UMTX_EXCLUSIVE_QUEUE; 3222 } 3223 } 3224 3225 if (count) { 3226 umtxq_lock(&uq->uq_key); 3227 umtxq_busy(&uq->uq_key); 3228 umtxq_signal_queue(&uq->uq_key, count, q); 3229 umtxq_unbusy(&uq->uq_key); 3230 umtxq_unlock(&uq->uq_key); 3231 } 3232out: 3233 umtx_key_release(&uq->uq_key); 3234 return (error); 3235} 3236 3237static int 3238do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3239{ 3240 struct abs_timeout timo; 3241 struct umtx_q *uq; 3242 uint32_t flags, count, count1; 3243 int error, rv; 3244 3245 uq = td->td_umtxq; 3246 error = fueword32(&sem->_flags, &flags); 3247 if (error == -1) 3248 return (EFAULT); 3249 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3250 if (error != 0) 3251 return (error); 3252 3253 if (timeout != NULL) 3254 abs_timeout_init2(&timo, timeout); 3255 3256 umtxq_lock(&uq->uq_key); 3257 umtxq_busy(&uq->uq_key); 3258 umtxq_insert(uq); 3259 umtxq_unlock(&uq->uq_key); 3260 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3261 if (rv == 0) 3262 rv = fueword32(&sem->_count, &count); 3263 if (rv == -1 || count != 0) { 3264 umtxq_lock(&uq->uq_key); 3265 umtxq_unbusy(&uq->uq_key); 3266 umtxq_remove(uq); 3267 umtxq_unlock(&uq->uq_key); 3268 umtx_key_release(&uq->uq_key); 3269 return (rv == -1 ? EFAULT : 0); 3270 } 3271 umtxq_lock(&uq->uq_key); 3272 umtxq_unbusy(&uq->uq_key); 3273 3274 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3275 3276 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3277 error = 0; 3278 else { 3279 umtxq_remove(uq); 3280 /* A relative timeout cannot be restarted. */ 3281 if (error == ERESTART && timeout != NULL && 3282 (timeout->_flags & UMTX_ABSTIME) == 0) 3283 error = EINTR; 3284 } 3285 umtxq_unlock(&uq->uq_key); 3286 umtx_key_release(&uq->uq_key); 3287 return (error); 3288} 3289 3290/* 3291 * Signal a userland condition variable. 3292 */ 3293static int 3294do_sem_wake(struct thread *td, struct _usem *sem) 3295{ 3296 struct umtx_key key; 3297 int error, cnt; 3298 uint32_t flags; 3299 3300 error = fueword32(&sem->_flags, &flags); 3301 if (error == -1) 3302 return (EFAULT); 3303 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3304 return (error); 3305 umtxq_lock(&key); 3306 umtxq_busy(&key); 3307 cnt = umtxq_count(&key); 3308 if (cnt > 0) { 3309 umtxq_signal(&key, 1); 3310 /* 3311 * Check if count is greater than 0, this means the memory is 3312 * still being referenced by user code, so we can safely 3313 * update _has_waiters flag. 3314 */ 3315 if (cnt == 1) { 3316 umtxq_unlock(&key); 3317 error = suword32(&sem->_has_waiters, 0); 3318 umtxq_lock(&key); 3319 if (error == -1) 3320 error = EFAULT; 3321 } 3322 } 3323 umtxq_unbusy(&key); 3324 umtxq_unlock(&key); 3325 umtx_key_release(&key); 3326 return (error); 3327} 3328 3329int 3330sys__umtx_lock(struct thread *td, struct _umtx_lock_args *uap) 3331 /* struct umtx *umtx */ 3332{ 3333 return do_lock_umtx(td, uap->umtx, td->td_tid, 0); 3334} 3335 3336int 3337sys__umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap) 3338 /* struct umtx *umtx */ 3339{ 3340 return do_unlock_umtx(td, uap->umtx, td->td_tid); 3341} 3342 3343inline int 3344umtx_copyin_timeout(const void *addr, struct timespec *tsp) 3345{ 3346 int error; 3347 3348 error = copyin(addr, tsp, sizeof(struct timespec)); 3349 if (error == 0) { 3350 if (tsp->tv_sec < 0 || 3351 tsp->tv_nsec >= 1000000000 || 3352 tsp->tv_nsec < 0) 3353 error = EINVAL; 3354 } 3355 return (error); 3356} 3357 3358static inline int 3359umtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp) 3360{ 3361 int error; 3362 3363 if (size <= sizeof(struct timespec)) { 3364 tp->_clockid = CLOCK_REALTIME; 3365 tp->_flags = 0; 3366 error = copyin(addr, &tp->_timeout, sizeof(struct timespec)); 3367 } else 3368 error = copyin(addr, tp, sizeof(struct _umtx_time)); 3369 if (error != 0) 3370 return (error); 3371 if (tp->_timeout.tv_sec < 0 || 3372 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) 3373 return (EINVAL); 3374 return (0); 3375} 3376 3377static int 3378__umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap) 3379{ 3380 struct timespec *ts, timeout; 3381 int error; 3382 3383 /* Allow a null timespec (wait forever). */ 3384 if (uap->uaddr2 == NULL) 3385 ts = NULL; 3386 else { 3387 error = umtx_copyin_timeout(uap->uaddr2, &timeout); 3388 if (error != 0) 3389 return (error); 3390 ts = &timeout; 3391 } 3392 return (do_lock_umtx(td, uap->obj, uap->val, ts)); 3393} 3394 3395static int 3396__umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap) 3397{ 3398 return (do_unlock_umtx(td, uap->obj, uap->val)); 3399} 3400 3401static int 3402__umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) 3403{ 3404 struct _umtx_time timeout, *tm_p; 3405 int error; 3406 3407 if (uap->uaddr2 == NULL) 3408 tm_p = NULL; 3409 else { 3410 error = umtx_copyin_umtx_time( 3411 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3412 if (error != 0) 3413 return (error); 3414 tm_p = &timeout; 3415 } 3416 return do_wait(td, uap->obj, uap->val, tm_p, 0, 0); 3417} 3418 3419static int 3420__umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) 3421{ 3422 struct _umtx_time timeout, *tm_p; 3423 int error; 3424 3425 if (uap->uaddr2 == NULL) 3426 tm_p = NULL; 3427 else { 3428 error = umtx_copyin_umtx_time( 3429 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3430 if (error != 0) 3431 return (error); 3432 tm_p = &timeout; 3433 } 3434 return do_wait(td, uap->obj, uap->val, tm_p, 1, 0); 3435} 3436 3437static int 3438__umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap) 3439{ 3440 struct _umtx_time *tm_p, timeout; 3441 int error; 3442 3443 if (uap->uaddr2 == NULL) 3444 tm_p = NULL; 3445 else { 3446 error = umtx_copyin_umtx_time( 3447 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3448 if (error != 0) 3449 return (error); 3450 tm_p = &timeout; 3451 } 3452 return do_wait(td, uap->obj, uap->val, tm_p, 1, 1); 3453} 3454 3455static int 3456__umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) 3457{ 3458 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3459} 3460 3461#define BATCH_SIZE 128 3462static int 3463__umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap) 3464{ 3465 int count = uap->val; 3466 void *uaddrs[BATCH_SIZE]; 3467 char **upp = (char **)uap->obj; 3468 int tocopy; 3469 int error = 0; 3470 int i, pos = 0; 3471 3472 while (count > 0) { 3473 tocopy = count; 3474 if (tocopy > BATCH_SIZE) 3475 tocopy = BATCH_SIZE; 3476 error = copyin(upp+pos, uaddrs, tocopy * sizeof(char *)); 3477 if (error != 0) 3478 break; 3479 for (i = 0; i < tocopy; ++i) 3480 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3481 count -= tocopy; 3482 pos += tocopy; 3483 } 3484 return (error); 3485} 3486 3487static int 3488__umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap) 3489{ 3490 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 3491} 3492 3493static int 3494__umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) 3495{ 3496 struct _umtx_time *tm_p, timeout; 3497 int error; 3498 3499 /* Allow a null timespec (wait forever). */ 3500 if (uap->uaddr2 == NULL) 3501 tm_p = NULL; 3502 else { 3503 error = umtx_copyin_umtx_time( 3504 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3505 if (error != 0) 3506 return (error); 3507 tm_p = &timeout; 3508 } 3509 return do_lock_umutex(td, uap->obj, tm_p, 0); 3510} 3511 3512static int 3513__umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) 3514{ 3515 return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY); 3516} 3517 3518static int 3519__umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap) 3520{ 3521 struct _umtx_time *tm_p, timeout; 3522 int error; 3523 3524 /* Allow a null timespec (wait forever). */ 3525 if (uap->uaddr2 == NULL) 3526 tm_p = NULL; 3527 else { 3528 error = umtx_copyin_umtx_time( 3529 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3530 if (error != 0) 3531 return (error); 3532 tm_p = &timeout; 3533 } 3534 return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT); 3535} 3536 3537static int 3538__umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap) 3539{ 3540 return do_wake_umutex(td, uap->obj); 3541} 3542 3543static int 3544__umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) 3545{ 3546 return do_unlock_umutex(td, uap->obj); 3547} 3548 3549static int 3550__umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) 3551{ 3552 return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1); 3553} 3554 3555static int 3556__umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) 3557{ 3558 struct timespec *ts, timeout; 3559 int error; 3560 3561 /* Allow a null timespec (wait forever). */ 3562 if (uap->uaddr2 == NULL) 3563 ts = NULL; 3564 else { 3565 error = umtx_copyin_timeout(uap->uaddr2, &timeout); 3566 if (error != 0) 3567 return (error); 3568 ts = &timeout; 3569 } 3570 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3571} 3572 3573static int 3574__umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) 3575{ 3576 return do_cv_signal(td, uap->obj); 3577} 3578 3579static int 3580__umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) 3581{ 3582 return do_cv_broadcast(td, uap->obj); 3583} 3584 3585static int 3586__umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap) 3587{ 3588 struct _umtx_time timeout; 3589 int error; 3590 3591 /* Allow a null timespec (wait forever). */ 3592 if (uap->uaddr2 == NULL) { 3593 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3594 } else { 3595 error = umtx_copyin_umtx_time(uap->uaddr2, 3596 (size_t)uap->uaddr1, &timeout); 3597 if (error != 0) 3598 return (error); 3599 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3600 } 3601 return (error); 3602} 3603 3604static int 3605__umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap) 3606{ 3607 struct _umtx_time timeout; 3608 int error; 3609 3610 /* Allow a null timespec (wait forever). */ 3611 if (uap->uaddr2 == NULL) { 3612 error = do_rw_wrlock(td, uap->obj, 0); 3613 } else { 3614 error = umtx_copyin_umtx_time(uap->uaddr2, 3615 (size_t)uap->uaddr1, &timeout); 3616 if (error != 0) 3617 return (error); 3618 3619 error = do_rw_wrlock(td, uap->obj, &timeout); 3620 } 3621 return (error); 3622} 3623 3624static int 3625__umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap) 3626{ 3627 return do_rw_unlock(td, uap->obj); 3628} 3629 3630static int 3631__umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap) 3632{ 3633 struct _umtx_time *tm_p, timeout; 3634 int error; 3635 3636 /* Allow a null timespec (wait forever). */ 3637 if (uap->uaddr2 == NULL) 3638 tm_p = NULL; 3639 else { 3640 error = umtx_copyin_umtx_time( 3641 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3642 if (error != 0) 3643 return (error); 3644 tm_p = &timeout; 3645 } 3646 return (do_sem_wait(td, uap->obj, tm_p)); 3647} 3648 3649static int 3650__umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap) 3651{ 3652 return do_sem_wake(td, uap->obj); 3653} 3654 3655static int 3656__umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap) 3657{ 3658 return do_wake2_umutex(td, uap->obj, uap->val); 3659} 3660 3661typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); 3662 3663static _umtx_op_func op_table[] = { 3664 __umtx_op_lock_umtx, /* UMTX_OP_LOCK */ 3665 __umtx_op_unlock_umtx, /* UMTX_OP_UNLOCK */ 3666 __umtx_op_wait, /* UMTX_OP_WAIT */ 3667 __umtx_op_wake, /* UMTX_OP_WAKE */ 3668 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_TRYLOCK */ 3669 __umtx_op_lock_umutex, /* UMTX_OP_MUTEX_LOCK */ 3670 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ 3671 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ 3672 __umtx_op_cv_wait, /* UMTX_OP_CV_WAIT*/ 3673 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ 3674 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */ 3675 __umtx_op_wait_uint, /* UMTX_OP_WAIT_UINT */ 3676 __umtx_op_rw_rdlock, /* UMTX_OP_RW_RDLOCK */ 3677 __umtx_op_rw_wrlock, /* UMTX_OP_RW_WRLOCK */ 3678 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */ 3679 __umtx_op_wait_uint_private, /* UMTX_OP_WAIT_UINT_PRIVATE */ 3680 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */ 3681 __umtx_op_wait_umutex, /* UMTX_OP_UMUTEX_WAIT */ 3682 __umtx_op_wake_umutex, /* UMTX_OP_UMUTEX_WAKE */ 3683 __umtx_op_sem_wait, /* UMTX_OP_SEM_WAIT */ 3684 __umtx_op_sem_wake, /* UMTX_OP_SEM_WAKE */ 3685 __umtx_op_nwake_private, /* UMTX_OP_NWAKE_PRIVATE */ 3686 __umtx_op_wake2_umutex /* UMTX_OP_UMUTEX_WAKE2 */ 3687}; 3688 3689int 3690sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 3691{ 3692 if ((unsigned)uap->op < UMTX_OP_MAX) 3693 return (*op_table[uap->op])(td, uap); 3694 return (EINVAL); 3695} 3696 3697#ifdef COMPAT_FREEBSD32 3698int 3699freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap) 3700 /* struct umtx *umtx */ 3701{ 3702 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL)); 3703} 3704 3705int 3706freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap) 3707 /* struct umtx *umtx */ 3708{ 3709 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid)); 3710} 3711 3712struct timespec32 { 3713 int32_t tv_sec; 3714 int32_t tv_nsec; 3715}; 3716 3717struct umtx_time32 { 3718 struct timespec32 timeout; 3719 uint32_t flags; 3720 uint32_t clockid; 3721}; 3722 3723static inline int 3724umtx_copyin_timeout32(void *addr, struct timespec *tsp) 3725{ 3726 struct timespec32 ts32; 3727 int error; 3728 3729 error = copyin(addr, &ts32, sizeof(struct timespec32)); 3730 if (error == 0) { 3731 if (ts32.tv_sec < 0 || 3732 ts32.tv_nsec >= 1000000000 || 3733 ts32.tv_nsec < 0) 3734 error = EINVAL; 3735 else { 3736 tsp->tv_sec = ts32.tv_sec; 3737 tsp->tv_nsec = ts32.tv_nsec; 3738 } 3739 } 3740 return (error); 3741} 3742 3743static inline int 3744umtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp) 3745{ 3746 struct umtx_time32 t32; 3747 int error; 3748 3749 t32.clockid = CLOCK_REALTIME; 3750 t32.flags = 0; 3751 if (size <= sizeof(struct timespec32)) 3752 error = copyin(addr, &t32.timeout, sizeof(struct timespec32)); 3753 else 3754 error = copyin(addr, &t32, sizeof(struct umtx_time32)); 3755 if (error != 0) 3756 return (error); 3757 if (t32.timeout.tv_sec < 0 || 3758 t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0) 3759 return (EINVAL); 3760 tp->_timeout.tv_sec = t32.timeout.tv_sec; 3761 tp->_timeout.tv_nsec = t32.timeout.tv_nsec; 3762 tp->_flags = t32.flags; 3763 tp->_clockid = t32.clockid; 3764 return (0); 3765} 3766 3767static int 3768__umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap) 3769{ 3770 struct timespec *ts, timeout; 3771 int error; 3772 3773 /* Allow a null timespec (wait forever). */ 3774 if (uap->uaddr2 == NULL) 3775 ts = NULL; 3776 else { 3777 error = umtx_copyin_timeout32(uap->uaddr2, &timeout); 3778 if (error != 0) 3779 return (error); 3780 ts = &timeout; 3781 } 3782 return (do_lock_umtx32(td, uap->obj, uap->val, ts)); 3783} 3784 3785static int 3786__umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap) 3787{ 3788 return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val)); 3789} 3790 3791static int 3792__umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3793{ 3794 struct _umtx_time *tm_p, timeout; 3795 int error; 3796 3797 if (uap->uaddr2 == NULL) 3798 tm_p = NULL; 3799 else { 3800 error = umtx_copyin_umtx_time32(uap->uaddr2, 3801 (size_t)uap->uaddr1, &timeout); 3802 if (error != 0) 3803 return (error); 3804 tm_p = &timeout; 3805 } 3806 return do_wait(td, uap->obj, uap->val, tm_p, 1, 0); 3807} 3808 3809static int 3810__umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 3811{ 3812 struct _umtx_time *tm_p, timeout; 3813 int error; 3814 3815 /* Allow a null timespec (wait forever). */ 3816 if (uap->uaddr2 == NULL) 3817 tm_p = NULL; 3818 else { 3819 error = umtx_copyin_umtx_time(uap->uaddr2, 3820 (size_t)uap->uaddr1, &timeout); 3821 if (error != 0) 3822 return (error); 3823 tm_p = &timeout; 3824 } 3825 return do_lock_umutex(td, uap->obj, tm_p, 0); 3826} 3827 3828static int 3829__umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 3830{ 3831 struct _umtx_time *tm_p, timeout; 3832 int error; 3833 3834 /* Allow a null timespec (wait forever). */ 3835 if (uap->uaddr2 == NULL) 3836 tm_p = NULL; 3837 else { 3838 error = umtx_copyin_umtx_time32(uap->uaddr2, 3839 (size_t)uap->uaddr1, &timeout); 3840 if (error != 0) 3841 return (error); 3842 tm_p = &timeout; 3843 } 3844 return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT); 3845} 3846 3847static int 3848__umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3849{ 3850 struct timespec *ts, timeout; 3851 int error; 3852 3853 /* Allow a null timespec (wait forever). */ 3854 if (uap->uaddr2 == NULL) 3855 ts = NULL; 3856 else { 3857 error = umtx_copyin_timeout32(uap->uaddr2, &timeout); 3858 if (error != 0) 3859 return (error); 3860 ts = &timeout; 3861 } 3862 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3863} 3864 3865static int 3866__umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap) 3867{ 3868 struct _umtx_time timeout; 3869 int error; 3870 3871 /* Allow a null timespec (wait forever). */ 3872 if (uap->uaddr2 == NULL) { 3873 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3874 } else { 3875 error = umtx_copyin_umtx_time32(uap->uaddr2, 3876 (size_t)uap->uaddr1, &timeout); 3877 if (error != 0) 3878 return (error); 3879 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3880 } 3881 return (error); 3882} 3883 3884static int 3885__umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap) 3886{ 3887 struct _umtx_time timeout; 3888 int error; 3889 3890 /* Allow a null timespec (wait forever). */ 3891 if (uap->uaddr2 == NULL) { 3892 error = do_rw_wrlock(td, uap->obj, 0); 3893 } else { 3894 error = umtx_copyin_umtx_time32(uap->uaddr2, 3895 (size_t)uap->uaddr1, &timeout); 3896 if (error != 0) 3897 return (error); 3898 error = do_rw_wrlock(td, uap->obj, &timeout); 3899 } 3900 return (error); 3901} 3902 3903static int 3904__umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap) 3905{ 3906 struct _umtx_time *tm_p, timeout; 3907 int error; 3908 3909 if (uap->uaddr2 == NULL) 3910 tm_p = NULL; 3911 else { 3912 error = umtx_copyin_umtx_time32( 3913 uap->uaddr2, (size_t)uap->uaddr1,&timeout); 3914 if (error != 0) 3915 return (error); 3916 tm_p = &timeout; 3917 } 3918 return do_wait(td, uap->obj, uap->val, tm_p, 1, 1); 3919} 3920 3921static int 3922__umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3923{ 3924 struct _umtx_time *tm_p, timeout; 3925 int error; 3926 3927 /* Allow a null timespec (wait forever). */ 3928 if (uap->uaddr2 == NULL) 3929 tm_p = NULL; 3930 else { 3931 error = umtx_copyin_umtx_time32(uap->uaddr2, 3932 (size_t)uap->uaddr1, &timeout); 3933 if (error != 0) 3934 return (error); 3935 tm_p = &timeout; 3936 } 3937 return (do_sem_wait(td, uap->obj, tm_p)); 3938} 3939 3940static int 3941__umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap) 3942{ 3943 int count = uap->val; 3944 uint32_t uaddrs[BATCH_SIZE]; 3945 uint32_t **upp = (uint32_t **)uap->obj; 3946 int tocopy; 3947 int error = 0; 3948 int i, pos = 0; 3949 3950 while (count > 0) { 3951 tocopy = count; 3952 if (tocopy > BATCH_SIZE) 3953 tocopy = BATCH_SIZE; 3954 error = copyin(upp+pos, uaddrs, tocopy * sizeof(uint32_t)); 3955 if (error != 0) 3956 break; 3957 for (i = 0; i < tocopy; ++i) 3958 kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i], 3959 INT_MAX, 1); 3960 count -= tocopy; 3961 pos += tocopy; 3962 } 3963 return (error); 3964} 3965 3966static _umtx_op_func op_table_compat32[] = { 3967 __umtx_op_lock_umtx_compat32, /* UMTX_OP_LOCK */ 3968 __umtx_op_unlock_umtx_compat32, /* UMTX_OP_UNLOCK */ 3969 __umtx_op_wait_compat32, /* UMTX_OP_WAIT */ 3970 __umtx_op_wake, /* UMTX_OP_WAKE */ 3971 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_LOCK */ 3972 __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */ 3973 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ 3974 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ 3975 __umtx_op_cv_wait_compat32, /* UMTX_OP_CV_WAIT*/ 3976 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ 3977 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */ 3978 __umtx_op_wait_compat32, /* UMTX_OP_WAIT_UINT */ 3979 __umtx_op_rw_rdlock_compat32, /* UMTX_OP_RW_RDLOCK */ 3980 __umtx_op_rw_wrlock_compat32, /* UMTX_OP_RW_WRLOCK */ 3981 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */ 3982 __umtx_op_wait_uint_private_compat32, /* UMTX_OP_WAIT_UINT_PRIVATE */ 3983 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */ 3984 __umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */ 3985 __umtx_op_wake_umutex, /* UMTX_OP_UMUTEX_WAKE */ 3986 __umtx_op_sem_wait_compat32, /* UMTX_OP_SEM_WAIT */ 3987 __umtx_op_sem_wake, /* UMTX_OP_SEM_WAKE */ 3988 __umtx_op_nwake_private32, /* UMTX_OP_NWAKE_PRIVATE */ 3989 __umtx_op_wake2_umutex /* UMTX_OP_UMUTEX_WAKE2 */ 3990}; 3991 3992int 3993freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap) 3994{ 3995 if ((unsigned)uap->op < UMTX_OP_MAX) 3996 return (*op_table_compat32[uap->op])(td, 3997 (struct _umtx_op_args *)uap); 3998 return (EINVAL); 3999} 4000#endif 4001 4002void 4003umtx_thread_init(struct thread *td) 4004{ 4005 td->td_umtxq = umtxq_alloc(); 4006 td->td_umtxq->uq_thread = td; 4007} 4008 4009void 4010umtx_thread_fini(struct thread *td) 4011{ 4012 umtxq_free(td->td_umtxq); 4013} 4014 4015/* 4016 * It will be called when new thread is created, e.g fork(). 4017 */ 4018void 4019umtx_thread_alloc(struct thread *td) 4020{ 4021 struct umtx_q *uq; 4022 4023 uq = td->td_umtxq; 4024 uq->uq_inherited_pri = PRI_MAX; 4025 4026 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 4027 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 4028 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 4029 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 4030} 4031 4032/* 4033 * exec() hook. 4034 */ 4035static void 4036umtx_exec_hook(void *arg __unused, struct proc *p __unused, 4037 struct image_params *imgp __unused) 4038{ 4039 umtx_thread_cleanup(curthread); 4040} 4041 4042/* 4043 * thread_exit() hook. 4044 */ 4045void 4046umtx_thread_exit(struct thread *td) 4047{ 4048 umtx_thread_cleanup(td); 4049} 4050 4051/* 4052 * clean up umtx data. 4053 */ 4054static void 4055umtx_thread_cleanup(struct thread *td) 4056{ 4057 struct umtx_q *uq; 4058 struct umtx_pi *pi; 4059 4060 if ((uq = td->td_umtxq) == NULL) 4061 return; 4062 4063 mtx_lock_spin(&umtx_lock); 4064 uq->uq_inherited_pri = PRI_MAX; 4065 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 4066 pi->pi_owner = NULL; 4067 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 4068 } 4069 mtx_unlock_spin(&umtx_lock); 4070 thread_lock(td); 4071 sched_lend_user_prio(td, PRI_MAX); 4072 thread_unlock(td); 4073} 4074