1/*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2015, 2016 The FreeBSD Foundation 5 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 6 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 7 * All rights reserved. 8 * 9 * Portions of this software were developed by Konstantin Belousov 10 * under sponsorship from the FreeBSD Foundation. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice unmodified, this list of conditions, and the following 17 * disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34#include <sys/cdefs.h> 35__FBSDID("$FreeBSD$"); 36 37#include "opt_umtx_profiling.h" 38 39#include <sys/param.h> 40#include <sys/kernel.h> 41#include <sys/fcntl.h> 42#include <sys/file.h> 43#include <sys/filedesc.h> 44#include <sys/limits.h> 45#include <sys/lock.h> 46#include <sys/malloc.h> 47#include <sys/mman.h> 48#include <sys/mutex.h> 49#include <sys/priv.h> 50#include <sys/proc.h> 51#include <sys/resource.h> 52#include <sys/resourcevar.h> 53#include <sys/rwlock.h> 54#include <sys/sbuf.h> 55#include <sys/sched.h> 56#include <sys/smp.h> 57#include <sys/sysctl.h> 58#include <sys/sysent.h> 59#include <sys/systm.h> 60#include <sys/sysproto.h> 61#include <sys/syscallsubr.h> 62#include <sys/taskqueue.h> 63#include <sys/time.h> 64#include <sys/eventhandler.h> 65#include <sys/umtx.h> 66 67#include <security/mac/mac_framework.h> 68 69#include <vm/vm.h> 70#include <vm/vm_param.h> 71#include <vm/pmap.h> 72#include <vm/vm_map.h> 73#include <vm/vm_object.h> 74 75#include <machine/atomic.h> 76#include <machine/cpu.h> 77 78#include <compat/freebsd32/freebsd32.h> 79#ifdef COMPAT_FREEBSD32 80#include <compat/freebsd32/freebsd32_proto.h> 81#endif 82 83#define _UMUTEX_TRY 1 84#define _UMUTEX_WAIT 2 85 86#ifdef UMTX_PROFILING 87#define UPROF_PERC_BIGGER(w, f, sw, sf) \ 88 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 89#endif 90 91/* Priority inheritance mutex info. */ 92struct umtx_pi { 93 /* Owner thread */ 94 struct thread *pi_owner; 95 96 /* Reference count */ 97 int pi_refcount; 98 99 /* List entry to link umtx holding by thread */ 100 TAILQ_ENTRY(umtx_pi) pi_link; 101 102 /* List entry in hash */ 103 TAILQ_ENTRY(umtx_pi) pi_hashlink; 104 105 /* List for waiters */ 106 TAILQ_HEAD(,umtx_q) pi_blocked; 107 108 /* Identify a userland lock object */ 109 struct umtx_key pi_key; 110}; 111 112/* A userland synchronous object user. */ 113struct umtx_q { 114 /* Linked list for the hash. */ 115 TAILQ_ENTRY(umtx_q) uq_link; 116 117 /* Umtx key. */ 118 struct umtx_key uq_key; 119 120 /* Umtx flags. */ 121 int uq_flags; 122#define UQF_UMTXQ 0x0001 123 124 /* The thread waits on. */ 125 struct thread *uq_thread; 126 127 /* 128 * Blocked on PI mutex. read can use chain lock 129 * or umtx_lock, write must have both chain lock and 130 * umtx_lock being hold. 131 */ 132 struct umtx_pi *uq_pi_blocked; 133 134 /* On blocked list */ 135 TAILQ_ENTRY(umtx_q) uq_lockq; 136 137 /* Thread contending with us */ 138 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 139 140 /* Inherited priority from PP mutex */ 141 u_char uq_inherited_pri; 142 143 /* Spare queue ready to be reused */ 144 struct umtxq_queue *uq_spare_queue; 145 146 /* The queue we on */ 147 struct umtxq_queue *uq_cur_queue; 148}; 149 150TAILQ_HEAD(umtxq_head, umtx_q); 151 152/* Per-key wait-queue */ 153struct umtxq_queue { 154 struct umtxq_head head; 155 struct umtx_key key; 156 LIST_ENTRY(umtxq_queue) link; 157 int length; 158}; 159 160LIST_HEAD(umtxq_list, umtxq_queue); 161 162/* Userland lock object's wait-queue chain */ 163struct umtxq_chain { 164 /* Lock for this chain. */ 165 struct mtx uc_lock; 166 167 /* List of sleep queues. */ 168 struct umtxq_list uc_queue[2]; 169#define UMTX_SHARED_QUEUE 0 170#define UMTX_EXCLUSIVE_QUEUE 1 171 172 LIST_HEAD(, umtxq_queue) uc_spare_queue; 173 174 /* Busy flag */ 175 char uc_busy; 176 177 /* Chain lock waiters */ 178 int uc_waiters; 179 180 /* All PI in the list */ 181 TAILQ_HEAD(,umtx_pi) uc_pi_list; 182 183#ifdef UMTX_PROFILING 184 u_int length; 185 u_int max_length; 186#endif 187}; 188 189#define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 190 191/* 192 * Don't propagate time-sharing priority, there is a security reason, 193 * a user can simply introduce PI-mutex, let thread A lock the mutex, 194 * and let another thread B block on the mutex, because B is 195 * sleeping, its priority will be boosted, this causes A's priority to 196 * be boosted via priority propagating too and will never be lowered even 197 * if it is using 100%CPU, this is unfair to other processes. 198 */ 199 200#define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 201 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 202 PRI_MAX_TIMESHARE : (td)->td_user_pri) 203 204#define GOLDEN_RATIO_PRIME 2654404609U 205#ifndef UMTX_CHAINS 206#define UMTX_CHAINS 512 207#endif 208#define UMTX_SHIFTS (__WORD_BIT - 9) 209 210#define GET_SHARE(flags) \ 211 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 212 213#define BUSY_SPINS 200 214 215struct abs_timeout { 216 int clockid; 217 bool is_abs_real; /* TIMER_ABSTIME && CLOCK_REALTIME* */ 218 struct timespec cur; 219 struct timespec end; 220}; 221 222struct umtx_copyops { 223 int (*copyin_timeout)(const void *uaddr, struct timespec *tsp); 224 int (*copyin_umtx_time)(const void *uaddr, size_t size, 225 struct _umtx_time *tp); 226 int (*copyin_robust_lists)(const void *uaddr, size_t size, 227 struct umtx_robust_lists_params *rbp); 228 int (*copyout_timeout)(void *uaddr, size_t size, 229 struct timespec *tsp); 230 const size_t timespec_sz; 231 const size_t umtx_time_sz; 232 const bool compat32; 233}; 234 235_Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); 236_Static_assert(__offsetof(struct umutex, m_spare[0]) == 237 __offsetof(struct umutex32, m_spare[0]), "m_spare32"); 238 239int umtx_shm_vnobj_persistent = 0; 240SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, 241 &umtx_shm_vnobj_persistent, 0, 242 "False forces destruction of umtx attached to file, on last close"); 243static int umtx_max_rb = 1000; 244SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, 245 &umtx_max_rb, 0, 246 ""); 247 248static uma_zone_t umtx_pi_zone; 249static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 250static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 251static int umtx_pi_allocated; 252 253static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); 254SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 255 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 256static int umtx_verbose_rb = 1; 257SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, 258 &umtx_verbose_rb, 0, 259 ""); 260 261#ifdef UMTX_PROFILING 262static long max_length; 263SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 264static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats"); 265#endif 266 267static void abs_timeout_update(struct abs_timeout *timo); 268 269static void umtx_shm_init(void); 270static void umtxq_sysinit(void *); 271static void umtxq_hash(struct umtx_key *key); 272static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 273static void umtxq_lock(struct umtx_key *key); 274static void umtxq_unlock(struct umtx_key *key); 275static void umtxq_busy(struct umtx_key *key); 276static void umtxq_unbusy(struct umtx_key *key); 277static void umtxq_insert_queue(struct umtx_q *uq, int q); 278static void umtxq_remove_queue(struct umtx_q *uq, int q); 279static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *); 280static int umtxq_count(struct umtx_key *key); 281static struct umtx_pi *umtx_pi_alloc(int); 282static void umtx_pi_free(struct umtx_pi *pi); 283static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, 284 bool rb); 285static void umtx_thread_cleanup(struct thread *td); 286SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 287 288#define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 289#define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) 290#define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) 291 292static struct mtx umtx_lock; 293 294#ifdef UMTX_PROFILING 295static void 296umtx_init_profiling(void) 297{ 298 struct sysctl_oid *chain_oid; 299 char chain_name[10]; 300 int i; 301 302 for (i = 0; i < UMTX_CHAINS; ++i) { 303 snprintf(chain_name, sizeof(chain_name), "%d", i); 304 chain_oid = SYSCTL_ADD_NODE(NULL, 305 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 306 chain_name, CTLFLAG_RD, NULL, "umtx hash stats"); 307 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 308 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 309 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 310 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 311 } 312} 313 314static int 315sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 316{ 317 char buf[512]; 318 struct sbuf sb; 319 struct umtxq_chain *uc; 320 u_int fract, i, j, tot, whole; 321 u_int sf0, sf1, sf2, sf3, sf4; 322 u_int si0, si1, si2, si3, si4; 323 u_int sw0, sw1, sw2, sw3, sw4; 324 325 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 326 for (i = 0; i < 2; i++) { 327 tot = 0; 328 for (j = 0; j < UMTX_CHAINS; ++j) { 329 uc = &umtxq_chains[i][j]; 330 mtx_lock(&uc->uc_lock); 331 tot += uc->max_length; 332 mtx_unlock(&uc->uc_lock); 333 } 334 if (tot == 0) 335 sbuf_printf(&sb, "%u) Empty ", i); 336 else { 337 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 338 si0 = si1 = si2 = si3 = si4 = 0; 339 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 340 for (j = 0; j < UMTX_CHAINS; j++) { 341 uc = &umtxq_chains[i][j]; 342 mtx_lock(&uc->uc_lock); 343 whole = uc->max_length * 100; 344 mtx_unlock(&uc->uc_lock); 345 fract = (whole % tot) * 100; 346 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 347 sf0 = fract; 348 si0 = j; 349 sw0 = whole; 350 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 351 sf1)) { 352 sf1 = fract; 353 si1 = j; 354 sw1 = whole; 355 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 356 sf2)) { 357 sf2 = fract; 358 si2 = j; 359 sw2 = whole; 360 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 361 sf3)) { 362 sf3 = fract; 363 si3 = j; 364 sw3 = whole; 365 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 366 sf4)) { 367 sf4 = fract; 368 si4 = j; 369 sw4 = whole; 370 } 371 } 372 sbuf_printf(&sb, "queue %u:\n", i); 373 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 374 sf0 / tot, si0); 375 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 376 sf1 / tot, si1); 377 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 378 sf2 / tot, si2); 379 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 380 sf3 / tot, si3); 381 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 382 sf4 / tot, si4); 383 } 384 } 385 sbuf_trim(&sb); 386 sbuf_finish(&sb); 387 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 388 sbuf_delete(&sb); 389 return (0); 390} 391 392static int 393sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 394{ 395 struct umtxq_chain *uc; 396 u_int i, j; 397 int clear, error; 398 399 clear = 0; 400 error = sysctl_handle_int(oidp, &clear, 0, req); 401 if (error != 0 || req->newptr == NULL) 402 return (error); 403 404 if (clear != 0) { 405 for (i = 0; i < 2; ++i) { 406 for (j = 0; j < UMTX_CHAINS; ++j) { 407 uc = &umtxq_chains[i][j]; 408 mtx_lock(&uc->uc_lock); 409 uc->length = 0; 410 uc->max_length = 0; 411 mtx_unlock(&uc->uc_lock); 412 } 413 } 414 } 415 return (0); 416} 417 418SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 419 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 420 sysctl_debug_umtx_chains_clear, "I", "Clear umtx chains statistics"); 421SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 422 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 423 sysctl_debug_umtx_chains_peaks, "A", "Highest peaks in chains max length"); 424#endif 425 426static void 427umtxq_sysinit(void *arg __unused) 428{ 429 int i, j; 430 431 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 432 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 433 for (i = 0; i < 2; ++i) { 434 for (j = 0; j < UMTX_CHAINS; ++j) { 435 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 436 MTX_DEF | MTX_DUPOK); 437 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 438 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 439 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 440 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 441 umtxq_chains[i][j].uc_busy = 0; 442 umtxq_chains[i][j].uc_waiters = 0; 443#ifdef UMTX_PROFILING 444 umtxq_chains[i][j].length = 0; 445 umtxq_chains[i][j].max_length = 0; 446#endif 447 } 448 } 449#ifdef UMTX_PROFILING 450 umtx_init_profiling(); 451#endif 452 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 453 umtx_shm_init(); 454} 455 456struct umtx_q * 457umtxq_alloc(void) 458{ 459 struct umtx_q *uq; 460 461 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 462 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, 463 M_WAITOK | M_ZERO); 464 TAILQ_INIT(&uq->uq_spare_queue->head); 465 TAILQ_INIT(&uq->uq_pi_contested); 466 uq->uq_inherited_pri = PRI_MAX; 467 return (uq); 468} 469 470void 471umtxq_free(struct umtx_q *uq) 472{ 473 474 MPASS(uq->uq_spare_queue != NULL); 475 free(uq->uq_spare_queue, M_UMTX); 476 free(uq, M_UMTX); 477} 478 479static inline void 480umtxq_hash(struct umtx_key *key) 481{ 482 unsigned n; 483 484 n = (uintptr_t)key->info.both.a + key->info.both.b; 485 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 486} 487 488static inline struct umtxq_chain * 489umtxq_getchain(struct umtx_key *key) 490{ 491 492 if (key->type <= TYPE_SEM) 493 return (&umtxq_chains[1][key->hash]); 494 return (&umtxq_chains[0][key->hash]); 495} 496 497/* 498 * Lock a chain. 499 */ 500static inline void 501umtxq_lock(struct umtx_key *key) 502{ 503 struct umtxq_chain *uc; 504 505 uc = umtxq_getchain(key); 506 mtx_lock(&uc->uc_lock); 507} 508 509/* 510 * Unlock a chain. 511 */ 512static inline void 513umtxq_unlock(struct umtx_key *key) 514{ 515 struct umtxq_chain *uc; 516 517 uc = umtxq_getchain(key); 518 mtx_unlock(&uc->uc_lock); 519} 520 521/* 522 * Set chain to busy state when following operation 523 * may be blocked (kernel mutex can not be used). 524 */ 525static inline void 526umtxq_busy(struct umtx_key *key) 527{ 528 struct umtxq_chain *uc; 529 530 uc = umtxq_getchain(key); 531 mtx_assert(&uc->uc_lock, MA_OWNED); 532 if (uc->uc_busy) { 533#ifdef SMP 534 if (smp_cpus > 1) { 535 int count = BUSY_SPINS; 536 if (count > 0) { 537 umtxq_unlock(key); 538 while (uc->uc_busy && --count > 0) 539 cpu_spinwait(); 540 umtxq_lock(key); 541 } 542 } 543#endif 544 while (uc->uc_busy) { 545 uc->uc_waiters++; 546 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 547 uc->uc_waiters--; 548 } 549 } 550 uc->uc_busy = 1; 551} 552 553/* 554 * Unbusy a chain. 555 */ 556static inline void 557umtxq_unbusy(struct umtx_key *key) 558{ 559 struct umtxq_chain *uc; 560 561 uc = umtxq_getchain(key); 562 mtx_assert(&uc->uc_lock, MA_OWNED); 563 KASSERT(uc->uc_busy != 0, ("not busy")); 564 uc->uc_busy = 0; 565 if (uc->uc_waiters) 566 wakeup_one(uc); 567} 568 569static inline void 570umtxq_unbusy_unlocked(struct umtx_key *key) 571{ 572 573 umtxq_lock(key); 574 umtxq_unbusy(key); 575 umtxq_unlock(key); 576} 577 578static struct umtxq_queue * 579umtxq_queue_lookup(struct umtx_key *key, int q) 580{ 581 struct umtxq_queue *uh; 582 struct umtxq_chain *uc; 583 584 uc = umtxq_getchain(key); 585 UMTXQ_LOCKED_ASSERT(uc); 586 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 587 if (umtx_key_match(&uh->key, key)) 588 return (uh); 589 } 590 591 return (NULL); 592} 593 594static inline void 595umtxq_insert_queue(struct umtx_q *uq, int q) 596{ 597 struct umtxq_queue *uh; 598 struct umtxq_chain *uc; 599 600 uc = umtxq_getchain(&uq->uq_key); 601 UMTXQ_LOCKED_ASSERT(uc); 602 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 603 uh = umtxq_queue_lookup(&uq->uq_key, q); 604 if (uh != NULL) { 605 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 606 } else { 607 uh = uq->uq_spare_queue; 608 uh->key = uq->uq_key; 609 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 610#ifdef UMTX_PROFILING 611 uc->length++; 612 if (uc->length > uc->max_length) { 613 uc->max_length = uc->length; 614 if (uc->max_length > max_length) 615 max_length = uc->max_length; 616 } 617#endif 618 } 619 uq->uq_spare_queue = NULL; 620 621 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 622 uh->length++; 623 uq->uq_flags |= UQF_UMTXQ; 624 uq->uq_cur_queue = uh; 625 return; 626} 627 628static inline void 629umtxq_remove_queue(struct umtx_q *uq, int q) 630{ 631 struct umtxq_chain *uc; 632 struct umtxq_queue *uh; 633 634 uc = umtxq_getchain(&uq->uq_key); 635 UMTXQ_LOCKED_ASSERT(uc); 636 if (uq->uq_flags & UQF_UMTXQ) { 637 uh = uq->uq_cur_queue; 638 TAILQ_REMOVE(&uh->head, uq, uq_link); 639 uh->length--; 640 uq->uq_flags &= ~UQF_UMTXQ; 641 if (TAILQ_EMPTY(&uh->head)) { 642 KASSERT(uh->length == 0, 643 ("inconsistent umtxq_queue length")); 644#ifdef UMTX_PROFILING 645 uc->length--; 646#endif 647 LIST_REMOVE(uh, link); 648 } else { 649 uh = LIST_FIRST(&uc->uc_spare_queue); 650 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 651 LIST_REMOVE(uh, link); 652 } 653 uq->uq_spare_queue = uh; 654 uq->uq_cur_queue = NULL; 655 } 656} 657 658/* 659 * Check if there are multiple waiters 660 */ 661static int 662umtxq_count(struct umtx_key *key) 663{ 664 struct umtxq_queue *uh; 665 666 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 667 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 668 if (uh != NULL) 669 return (uh->length); 670 return (0); 671} 672 673/* 674 * Check if there are multiple PI waiters and returns first 675 * waiter. 676 */ 677static int 678umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 679{ 680 struct umtxq_queue *uh; 681 682 *first = NULL; 683 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 684 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 685 if (uh != NULL) { 686 *first = TAILQ_FIRST(&uh->head); 687 return (uh->length); 688 } 689 return (0); 690} 691 692/* 693 * Wake up threads waiting on an userland object. 694 */ 695 696static int 697umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 698{ 699 struct umtxq_queue *uh; 700 struct umtx_q *uq; 701 int ret; 702 703 ret = 0; 704 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 705 uh = umtxq_queue_lookup(key, q); 706 if (uh != NULL) { 707 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 708 umtxq_remove_queue(uq, q); 709 wakeup(uq); 710 if (++ret >= n_wake) 711 return (ret); 712 } 713 } 714 return (ret); 715} 716 717 718/* 719 * Wake up specified thread. 720 */ 721static inline void 722umtxq_signal_thread(struct umtx_q *uq) 723{ 724 725 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 726 umtxq_remove(uq); 727 wakeup(uq); 728} 729 730static inline int 731tstohz(const struct timespec *tsp) 732{ 733 struct timeval tv; 734 735 TIMESPEC_TO_TIMEVAL(&tv, tsp); 736 return tvtohz(&tv); 737} 738 739static void 740abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute, 741 const struct timespec *timeout) 742{ 743 744 timo->clockid = clockid; 745 if (!absolute) { 746 timo->is_abs_real = false; 747 abs_timeout_update(timo); 748 timespecadd(&timo->cur, timeout, &timo->end); 749 } else { 750 timo->end = *timeout; 751 timo->is_abs_real = clockid == CLOCK_REALTIME || 752 clockid == CLOCK_REALTIME_FAST || 753 clockid == CLOCK_REALTIME_PRECISE; 754 /* 755 * If is_abs_real, umtxq_sleep will read the clock 756 * after setting td_rtcgen; otherwise, read it here. 757 */ 758 if (!timo->is_abs_real) { 759 abs_timeout_update(timo); 760 } 761 } 762} 763 764static void 765abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime) 766{ 767 768 abs_timeout_init(timo, umtxtime->_clockid, 769 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); 770} 771 772static inline void 773abs_timeout_update(struct abs_timeout *timo) 774{ 775 776 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 777} 778 779static int 780abs_timeout_gethz(struct abs_timeout *timo) 781{ 782 struct timespec tts; 783 784 if (timespeccmp(&timo->end, &timo->cur, <=)) 785 return (-1); 786 timespecsub(&timo->end, &timo->cur, &tts); 787 return (tstohz(&tts)); 788} 789 790static uint32_t 791umtx_unlock_val(uint32_t flags, bool rb) 792{ 793 794 if (rb) 795 return (UMUTEX_RB_OWNERDEAD); 796 else if ((flags & UMUTEX_NONCONSISTENT) != 0) 797 return (UMUTEX_RB_NOTRECOV); 798 else 799 return (UMUTEX_UNOWNED); 800 801} 802 803/* 804 * Put thread into sleep state, before sleeping, check if 805 * thread was removed from umtx queue. 806 */ 807static inline int 808umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime) 809{ 810 struct umtxq_chain *uc; 811 int error, timo; 812 813 if (abstime != NULL && abstime->is_abs_real) { 814 curthread->td_rtcgen = atomic_load_acq_int(&rtc_generation); 815 abs_timeout_update(abstime); 816 } 817 818 uc = umtxq_getchain(&uq->uq_key); 819 UMTXQ_LOCKED_ASSERT(uc); 820 for (;;) { 821 if (!(uq->uq_flags & UQF_UMTXQ)) { 822 error = 0; 823 break; 824 } 825 if (abstime != NULL) { 826 timo = abs_timeout_gethz(abstime); 827 if (timo < 0) { 828 error = ETIMEDOUT; 829 break; 830 } 831 } else 832 timo = 0; 833 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); 834 if (error == EINTR || error == ERESTART) { 835 umtxq_lock(&uq->uq_key); 836 break; 837 } 838 if (abstime != NULL) { 839 if (abstime->is_abs_real) 840 curthread->td_rtcgen = 841 atomic_load_acq_int(&rtc_generation); 842 abs_timeout_update(abstime); 843 } 844 umtxq_lock(&uq->uq_key); 845 } 846 847 curthread->td_rtcgen = 0; 848 return (error); 849} 850 851/* 852 * Convert userspace address into unique logical address. 853 */ 854int 855umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 856{ 857 struct thread *td = curthread; 858 vm_map_t map; 859 vm_map_entry_t entry; 860 vm_pindex_t pindex; 861 vm_prot_t prot; 862 boolean_t wired; 863 864 key->type = type; 865 if (share == THREAD_SHARE) { 866 key->shared = 0; 867 key->info.private.vs = td->td_proc->p_vmspace; 868 key->info.private.addr = (uintptr_t)addr; 869 } else { 870 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 871 map = &td->td_proc->p_vmspace->vm_map; 872 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 873 &entry, &key->info.shared.object, &pindex, &prot, 874 &wired) != KERN_SUCCESS) { 875 return (EFAULT); 876 } 877 878 if ((share == PROCESS_SHARE) || 879 (share == AUTO_SHARE && 880 VM_INHERIT_SHARE == entry->inheritance)) { 881 key->shared = 1; 882 key->info.shared.offset = (vm_offset_t)addr - 883 entry->start + entry->offset; 884 vm_object_reference(key->info.shared.object); 885 } else { 886 key->shared = 0; 887 key->info.private.vs = td->td_proc->p_vmspace; 888 key->info.private.addr = (uintptr_t)addr; 889 } 890 vm_map_lookup_done(map, entry); 891 } 892 893 umtxq_hash(key); 894 return (0); 895} 896 897/* 898 * Release key. 899 */ 900void 901umtx_key_release(struct umtx_key *key) 902{ 903 if (key->shared) 904 vm_object_deallocate(key->info.shared.object); 905} 906 907/* 908 * Fetch and compare value, sleep on the address if value is not changed. 909 */ 910static int 911do_wait(struct thread *td, void *addr, u_long id, 912 struct _umtx_time *timeout, int compat32, int is_private) 913{ 914 struct abs_timeout timo; 915 struct umtx_q *uq; 916 u_long tmp; 917 uint32_t tmp32; 918 int error = 0; 919 920 uq = td->td_umtxq; 921 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 922 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 923 return (error); 924 925 if (timeout != NULL) 926 abs_timeout_init2(&timo, timeout); 927 928 umtxq_lock(&uq->uq_key); 929 umtxq_insert(uq); 930 umtxq_unlock(&uq->uq_key); 931 if (compat32 == 0) { 932 error = fueword(addr, &tmp); 933 if (error != 0) 934 error = EFAULT; 935 } else { 936 error = fueword32(addr, &tmp32); 937 if (error == 0) 938 tmp = tmp32; 939 else 940 error = EFAULT; 941 } 942 umtxq_lock(&uq->uq_key); 943 if (error == 0) { 944 if (tmp == id) 945 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 946 NULL : &timo); 947 if ((uq->uq_flags & UQF_UMTXQ) == 0) 948 error = 0; 949 else 950 umtxq_remove(uq); 951 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 952 umtxq_remove(uq); 953 } 954 umtxq_unlock(&uq->uq_key); 955 umtx_key_release(&uq->uq_key); 956 if (error == ERESTART) 957 error = EINTR; 958 return (error); 959} 960 961/* 962 * Wake up threads sleeping on the specified address. 963 */ 964int 965kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 966{ 967 struct umtx_key key; 968 int ret; 969 970 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 971 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 972 return (ret); 973 umtxq_lock(&key); 974 umtxq_signal(&key, n_wake); 975 umtxq_unlock(&key); 976 umtx_key_release(&key); 977 return (0); 978} 979 980/* 981 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 982 */ 983static int 984do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 985 struct _umtx_time *timeout, int mode) 986{ 987 struct abs_timeout timo; 988 struct umtx_q *uq; 989 uint32_t owner, old, id; 990 int error, rv; 991 992 id = td->td_tid; 993 uq = td->td_umtxq; 994 error = 0; 995 if (timeout != NULL) 996 abs_timeout_init2(&timo, timeout); 997 998 /* 999 * Care must be exercised when dealing with umtx structure. It 1000 * can fault on any access. 1001 */ 1002 for (;;) { 1003 rv = fueword32(&m->m_owner, &owner); 1004 if (rv == -1) 1005 return (EFAULT); 1006 if (mode == _UMUTEX_WAIT) { 1007 if (owner == UMUTEX_UNOWNED || 1008 owner == UMUTEX_CONTESTED || 1009 owner == UMUTEX_RB_OWNERDEAD || 1010 owner == UMUTEX_RB_NOTRECOV) 1011 return (0); 1012 } else { 1013 /* 1014 * Robust mutex terminated. Kernel duty is to 1015 * return EOWNERDEAD to the userspace. The 1016 * umutex.m_flags UMUTEX_NONCONSISTENT is set 1017 * by the common userspace code. 1018 */ 1019 if (owner == UMUTEX_RB_OWNERDEAD) { 1020 rv = casueword32(&m->m_owner, 1021 UMUTEX_RB_OWNERDEAD, &owner, 1022 id | UMUTEX_CONTESTED); 1023 if (rv == -1) 1024 return (EFAULT); 1025 if (rv == 0) { 1026 MPASS(owner == UMUTEX_RB_OWNERDEAD); 1027 return (EOWNERDEAD); /* success */ 1028 } 1029 MPASS(rv == 1); 1030 rv = thread_check_susp(td, false); 1031 if (rv != 0) 1032 return (rv); 1033 continue; 1034 } 1035 if (owner == UMUTEX_RB_NOTRECOV) 1036 return (ENOTRECOVERABLE); 1037 1038 /* 1039 * Try the uncontested case. This should be 1040 * done in userland. 1041 */ 1042 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1043 &owner, id); 1044 /* The address was invalid. */ 1045 if (rv == -1) 1046 return (EFAULT); 1047 1048 /* The acquire succeeded. */ 1049 if (rv == 0) { 1050 MPASS(owner == UMUTEX_UNOWNED); 1051 return (0); 1052 } 1053 1054 /* 1055 * If no one owns it but it is contested try 1056 * to acquire it. 1057 */ 1058 MPASS(rv == 1); 1059 if (owner == UMUTEX_CONTESTED) { 1060 rv = casueword32(&m->m_owner, 1061 UMUTEX_CONTESTED, &owner, 1062 id | UMUTEX_CONTESTED); 1063 /* The address was invalid. */ 1064 if (rv == -1) 1065 return (EFAULT); 1066 if (rv == 0) { 1067 MPASS(owner == UMUTEX_CONTESTED); 1068 return (0); 1069 } 1070 if (rv == 1) { 1071 rv = thread_check_susp(td, false); 1072 if (rv != 0) 1073 return (rv); 1074 } 1075 1076 /* 1077 * If this failed the lock has 1078 * changed, restart. 1079 */ 1080 continue; 1081 } 1082 1083 /* rv == 1 but not contested, likely store failure */ 1084 rv = thread_check_susp(td, false); 1085 if (rv != 0) 1086 return (rv); 1087 } 1088 1089 if (mode == _UMUTEX_TRY) 1090 return (EBUSY); 1091 1092 /* 1093 * If we caught a signal, we have retried and now 1094 * exit immediately. 1095 */ 1096 if (error != 0) 1097 return (error); 1098 1099 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1100 GET_SHARE(flags), &uq->uq_key)) != 0) 1101 return (error); 1102 1103 umtxq_lock(&uq->uq_key); 1104 umtxq_busy(&uq->uq_key); 1105 umtxq_insert(uq); 1106 umtxq_unlock(&uq->uq_key); 1107 1108 /* 1109 * Set the contested bit so that a release in user space 1110 * knows to use the system call for unlock. If this fails 1111 * either some one else has acquired the lock or it has been 1112 * released. 1113 */ 1114 rv = casueword32(&m->m_owner, owner, &old, 1115 owner | UMUTEX_CONTESTED); 1116 1117 /* The address was invalid or casueword failed to store. */ 1118 if (rv == -1 || rv == 1) { 1119 umtxq_lock(&uq->uq_key); 1120 umtxq_remove(uq); 1121 umtxq_unbusy(&uq->uq_key); 1122 umtxq_unlock(&uq->uq_key); 1123 umtx_key_release(&uq->uq_key); 1124 if (rv == -1) 1125 return (EFAULT); 1126 if (rv == 1) { 1127 rv = thread_check_susp(td, false); 1128 if (rv != 0) 1129 return (rv); 1130 } 1131 continue; 1132 } 1133 1134 /* 1135 * We set the contested bit, sleep. Otherwise the lock changed 1136 * and we need to retry or we lost a race to the thread 1137 * unlocking the umtx. 1138 */ 1139 umtxq_lock(&uq->uq_key); 1140 umtxq_unbusy(&uq->uq_key); 1141 MPASS(old == owner); 1142 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1143 NULL : &timo); 1144 umtxq_remove(uq); 1145 umtxq_unlock(&uq->uq_key); 1146 umtx_key_release(&uq->uq_key); 1147 1148 if (error == 0) 1149 error = thread_check_susp(td, false); 1150 } 1151 1152 return (0); 1153} 1154 1155/* 1156 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1157 */ 1158static int 1159do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1160{ 1161 struct umtx_key key; 1162 uint32_t owner, old, id, newlock; 1163 int error, count; 1164 1165 id = td->td_tid; 1166 1167again: 1168 /* 1169 * Make sure we own this mtx. 1170 */ 1171 error = fueword32(&m->m_owner, &owner); 1172 if (error == -1) 1173 return (EFAULT); 1174 1175 if ((owner & ~UMUTEX_CONTESTED) != id) 1176 return (EPERM); 1177 1178 newlock = umtx_unlock_val(flags, rb); 1179 if ((owner & UMUTEX_CONTESTED) == 0) { 1180 error = casueword32(&m->m_owner, owner, &old, newlock); 1181 if (error == -1) 1182 return (EFAULT); 1183 if (error == 1) { 1184 error = thread_check_susp(td, false); 1185 if (error != 0) 1186 return (error); 1187 goto again; 1188 } 1189 MPASS(old == owner); 1190 return (0); 1191 } 1192 1193 /* We should only ever be in here for contested locks */ 1194 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1195 &key)) != 0) 1196 return (error); 1197 1198 umtxq_lock(&key); 1199 umtxq_busy(&key); 1200 count = umtxq_count(&key); 1201 umtxq_unlock(&key); 1202 1203 /* 1204 * When unlocking the umtx, it must be marked as unowned if 1205 * there is zero or one thread only waiting for it. 1206 * Otherwise, it must be marked as contested. 1207 */ 1208 if (count > 1) 1209 newlock |= UMUTEX_CONTESTED; 1210 error = casueword32(&m->m_owner, owner, &old, newlock); 1211 umtxq_lock(&key); 1212 umtxq_signal(&key, 1); 1213 umtxq_unbusy(&key); 1214 umtxq_unlock(&key); 1215 umtx_key_release(&key); 1216 if (error == -1) 1217 return (EFAULT); 1218 if (error == 1) { 1219 if (old != owner) 1220 return (EINVAL); 1221 error = thread_check_susp(td, false); 1222 if (error != 0) 1223 return (error); 1224 goto again; 1225 } 1226 return (0); 1227} 1228 1229/* 1230 * Check if the mutex is available and wake up a waiter, 1231 * only for simple mutex. 1232 */ 1233static int 1234do_wake_umutex(struct thread *td, struct umutex *m) 1235{ 1236 struct umtx_key key; 1237 uint32_t owner; 1238 uint32_t flags; 1239 int error; 1240 int count; 1241 1242again: 1243 error = fueword32(&m->m_owner, &owner); 1244 if (error == -1) 1245 return (EFAULT); 1246 1247 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && 1248 owner != UMUTEX_RB_NOTRECOV) 1249 return (0); 1250 1251 error = fueword32(&m->m_flags, &flags); 1252 if (error == -1) 1253 return (EFAULT); 1254 1255 /* We should only ever be in here for contested locks */ 1256 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1257 &key)) != 0) 1258 return (error); 1259 1260 umtxq_lock(&key); 1261 umtxq_busy(&key); 1262 count = umtxq_count(&key); 1263 umtxq_unlock(&key); 1264 1265 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && 1266 owner != UMUTEX_RB_NOTRECOV) { 1267 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1268 UMUTEX_UNOWNED); 1269 if (error == -1) { 1270 error = EFAULT; 1271 } else if (error == 1) { 1272 umtxq_lock(&key); 1273 umtxq_unbusy(&key); 1274 umtxq_unlock(&key); 1275 umtx_key_release(&key); 1276 error = thread_check_susp(td, false); 1277 if (error != 0) 1278 return (error); 1279 goto again; 1280 } 1281 } 1282 1283 umtxq_lock(&key); 1284 if (error == 0 && count != 0) { 1285 MPASS((owner & ~UMUTEX_CONTESTED) == 0 || 1286 owner == UMUTEX_RB_OWNERDEAD || 1287 owner == UMUTEX_RB_NOTRECOV); 1288 umtxq_signal(&key, 1); 1289 } 1290 umtxq_unbusy(&key); 1291 umtxq_unlock(&key); 1292 umtx_key_release(&key); 1293 return (error); 1294} 1295 1296/* 1297 * Check if the mutex has waiters and tries to fix contention bit. 1298 */ 1299static int 1300do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1301{ 1302 struct umtx_key key; 1303 uint32_t owner, old; 1304 int type; 1305 int error; 1306 int count; 1307 1308 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | 1309 UMUTEX_ROBUST)) { 1310 case 0: 1311 case UMUTEX_ROBUST: 1312 type = TYPE_NORMAL_UMUTEX; 1313 break; 1314 case UMUTEX_PRIO_INHERIT: 1315 type = TYPE_PI_UMUTEX; 1316 break; 1317 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): 1318 type = TYPE_PI_ROBUST_UMUTEX; 1319 break; 1320 case UMUTEX_PRIO_PROTECT: 1321 type = TYPE_PP_UMUTEX; 1322 break; 1323 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): 1324 type = TYPE_PP_ROBUST_UMUTEX; 1325 break; 1326 default: 1327 return (EINVAL); 1328 } 1329 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) 1330 return (error); 1331 1332 owner = 0; 1333 umtxq_lock(&key); 1334 umtxq_busy(&key); 1335 count = umtxq_count(&key); 1336 umtxq_unlock(&key); 1337 1338 error = fueword32(&m->m_owner, &owner); 1339 if (error == -1) 1340 error = EFAULT; 1341 1342 /* 1343 * Only repair contention bit if there is a waiter, this means 1344 * the mutex is still being referenced by userland code, 1345 * otherwise don't update any memory. 1346 */ 1347 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0 && 1348 (count > 1 || (count == 1 && (owner & ~UMUTEX_CONTESTED) != 0))) { 1349 error = casueword32(&m->m_owner, owner, &old, 1350 owner | UMUTEX_CONTESTED); 1351 if (error == -1) { 1352 error = EFAULT; 1353 break; 1354 } 1355 if (error == 0) { 1356 MPASS(old == owner); 1357 break; 1358 } 1359 owner = old; 1360 error = thread_check_susp(td, false); 1361 } 1362 1363 umtxq_lock(&key); 1364 if (error == EFAULT) { 1365 umtxq_signal(&key, INT_MAX); 1366 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1367 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1368 umtxq_signal(&key, 1); 1369 umtxq_unbusy(&key); 1370 umtxq_unlock(&key); 1371 umtx_key_release(&key); 1372 return (error); 1373} 1374 1375static inline struct umtx_pi * 1376umtx_pi_alloc(int flags) 1377{ 1378 struct umtx_pi *pi; 1379 1380 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1381 TAILQ_INIT(&pi->pi_blocked); 1382 atomic_add_int(&umtx_pi_allocated, 1); 1383 return (pi); 1384} 1385 1386static inline void 1387umtx_pi_free(struct umtx_pi *pi) 1388{ 1389 uma_zfree(umtx_pi_zone, pi); 1390 atomic_add_int(&umtx_pi_allocated, -1); 1391} 1392 1393/* 1394 * Adjust the thread's position on a pi_state after its priority has been 1395 * changed. 1396 */ 1397static int 1398umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1399{ 1400 struct umtx_q *uq, *uq1, *uq2; 1401 struct thread *td1; 1402 1403 mtx_assert(&umtx_lock, MA_OWNED); 1404 if (pi == NULL) 1405 return (0); 1406 1407 uq = td->td_umtxq; 1408 1409 /* 1410 * Check if the thread needs to be moved on the blocked chain. 1411 * It needs to be moved if either its priority is lower than 1412 * the previous thread or higher than the next thread. 1413 */ 1414 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1415 uq2 = TAILQ_NEXT(uq, uq_lockq); 1416 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1417 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1418 /* 1419 * Remove thread from blocked chain and determine where 1420 * it should be moved to. 1421 */ 1422 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1423 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1424 td1 = uq1->uq_thread; 1425 MPASS(td1->td_proc->p_magic == P_MAGIC); 1426 if (UPRI(td1) > UPRI(td)) 1427 break; 1428 } 1429 1430 if (uq1 == NULL) 1431 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1432 else 1433 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1434 } 1435 return (1); 1436} 1437 1438static struct umtx_pi * 1439umtx_pi_next(struct umtx_pi *pi) 1440{ 1441 struct umtx_q *uq_owner; 1442 1443 if (pi->pi_owner == NULL) 1444 return (NULL); 1445 uq_owner = pi->pi_owner->td_umtxq; 1446 if (uq_owner == NULL) 1447 return (NULL); 1448 return (uq_owner->uq_pi_blocked); 1449} 1450 1451/* 1452 * Floyd's Cycle-Finding Algorithm. 1453 */ 1454static bool 1455umtx_pi_check_loop(struct umtx_pi *pi) 1456{ 1457 struct umtx_pi *pi1; /* fast iterator */ 1458 1459 mtx_assert(&umtx_lock, MA_OWNED); 1460 if (pi == NULL) 1461 return (false); 1462 pi1 = pi; 1463 for (;;) { 1464 pi = umtx_pi_next(pi); 1465 if (pi == NULL) 1466 break; 1467 pi1 = umtx_pi_next(pi1); 1468 if (pi1 == NULL) 1469 break; 1470 pi1 = umtx_pi_next(pi1); 1471 if (pi1 == NULL) 1472 break; 1473 if (pi == pi1) 1474 return (true); 1475 } 1476 return (false); 1477} 1478 1479/* 1480 * Propagate priority when a thread is blocked on POSIX 1481 * PI mutex. 1482 */ 1483static void 1484umtx_propagate_priority(struct thread *td) 1485{ 1486 struct umtx_q *uq; 1487 struct umtx_pi *pi; 1488 int pri; 1489 1490 mtx_assert(&umtx_lock, MA_OWNED); 1491 pri = UPRI(td); 1492 uq = td->td_umtxq; 1493 pi = uq->uq_pi_blocked; 1494 if (pi == NULL) 1495 return; 1496 if (umtx_pi_check_loop(pi)) 1497 return; 1498 1499 for (;;) { 1500 td = pi->pi_owner; 1501 if (td == NULL || td == curthread) 1502 return; 1503 1504 MPASS(td->td_proc != NULL); 1505 MPASS(td->td_proc->p_magic == P_MAGIC); 1506 1507 thread_lock(td); 1508 if (td->td_lend_user_pri > pri) 1509 sched_lend_user_prio(td, pri); 1510 else { 1511 thread_unlock(td); 1512 break; 1513 } 1514 thread_unlock(td); 1515 1516 /* 1517 * Pick up the lock that td is blocked on. 1518 */ 1519 uq = td->td_umtxq; 1520 pi = uq->uq_pi_blocked; 1521 if (pi == NULL) 1522 break; 1523 /* Resort td on the list if needed. */ 1524 umtx_pi_adjust_thread(pi, td); 1525 } 1526} 1527 1528/* 1529 * Unpropagate priority for a PI mutex when a thread blocked on 1530 * it is interrupted by signal or resumed by others. 1531 */ 1532static void 1533umtx_repropagate_priority(struct umtx_pi *pi) 1534{ 1535 struct umtx_q *uq, *uq_owner; 1536 struct umtx_pi *pi2; 1537 int pri; 1538 1539 mtx_assert(&umtx_lock, MA_OWNED); 1540 1541 if (umtx_pi_check_loop(pi)) 1542 return; 1543 while (pi != NULL && pi->pi_owner != NULL) { 1544 pri = PRI_MAX; 1545 uq_owner = pi->pi_owner->td_umtxq; 1546 1547 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1548 uq = TAILQ_FIRST(&pi2->pi_blocked); 1549 if (uq != NULL) { 1550 if (pri > UPRI(uq->uq_thread)) 1551 pri = UPRI(uq->uq_thread); 1552 } 1553 } 1554 1555 if (pri > uq_owner->uq_inherited_pri) 1556 pri = uq_owner->uq_inherited_pri; 1557 thread_lock(pi->pi_owner); 1558 sched_lend_user_prio(pi->pi_owner, pri); 1559 thread_unlock(pi->pi_owner); 1560 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1561 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1562 } 1563} 1564 1565/* 1566 * Insert a PI mutex into owned list. 1567 */ 1568static void 1569umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1570{ 1571 struct umtx_q *uq_owner; 1572 1573 uq_owner = owner->td_umtxq; 1574 mtx_assert(&umtx_lock, MA_OWNED); 1575 MPASS(pi->pi_owner == NULL); 1576 pi->pi_owner = owner; 1577 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1578} 1579 1580 1581/* 1582 * Disown a PI mutex, and remove it from the owned list. 1583 */ 1584static void 1585umtx_pi_disown(struct umtx_pi *pi) 1586{ 1587 1588 mtx_assert(&umtx_lock, MA_OWNED); 1589 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1590 pi->pi_owner = NULL; 1591} 1592 1593/* 1594 * Claim ownership of a PI mutex. 1595 */ 1596static int 1597umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1598{ 1599 struct umtx_q *uq; 1600 int pri; 1601 1602 mtx_lock(&umtx_lock); 1603 if (pi->pi_owner == owner) { 1604 mtx_unlock(&umtx_lock); 1605 return (0); 1606 } 1607 1608 if (pi->pi_owner != NULL) { 1609 /* 1610 * userland may have already messed the mutex, sigh. 1611 */ 1612 mtx_unlock(&umtx_lock); 1613 return (EPERM); 1614 } 1615 umtx_pi_setowner(pi, owner); 1616 uq = TAILQ_FIRST(&pi->pi_blocked); 1617 if (uq != NULL) { 1618 pri = UPRI(uq->uq_thread); 1619 thread_lock(owner); 1620 if (pri < UPRI(owner)) 1621 sched_lend_user_prio(owner, pri); 1622 thread_unlock(owner); 1623 } 1624 mtx_unlock(&umtx_lock); 1625 return (0); 1626} 1627 1628/* 1629 * Adjust a thread's order position in its blocked PI mutex, 1630 * this may result new priority propagating process. 1631 */ 1632void 1633umtx_pi_adjust(struct thread *td, u_char oldpri) 1634{ 1635 struct umtx_q *uq; 1636 struct umtx_pi *pi; 1637 1638 uq = td->td_umtxq; 1639 mtx_lock(&umtx_lock); 1640 /* 1641 * Pick up the lock that td is blocked on. 1642 */ 1643 pi = uq->uq_pi_blocked; 1644 if (pi != NULL) { 1645 umtx_pi_adjust_thread(pi, td); 1646 umtx_repropagate_priority(pi); 1647 } 1648 mtx_unlock(&umtx_lock); 1649} 1650 1651/* 1652 * Sleep on a PI mutex. 1653 */ 1654static int 1655umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, 1656 const char *wmesg, struct abs_timeout *timo, bool shared) 1657{ 1658 struct thread *td, *td1; 1659 struct umtx_q *uq1; 1660 int error, pri; 1661#ifdef INVARIANTS 1662 struct umtxq_chain *uc; 1663 1664 uc = umtxq_getchain(&pi->pi_key); 1665#endif 1666 error = 0; 1667 td = uq->uq_thread; 1668 KASSERT(td == curthread, ("inconsistent uq_thread")); 1669 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 1670 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 1671 umtxq_insert(uq); 1672 mtx_lock(&umtx_lock); 1673 if (pi->pi_owner == NULL) { 1674 mtx_unlock(&umtx_lock); 1675 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); 1676 mtx_lock(&umtx_lock); 1677 if (td1 != NULL) { 1678 if (pi->pi_owner == NULL) 1679 umtx_pi_setowner(pi, td1); 1680 PROC_UNLOCK(td1->td_proc); 1681 } 1682 } 1683 1684 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1685 pri = UPRI(uq1->uq_thread); 1686 if (pri > UPRI(td)) 1687 break; 1688 } 1689 1690 if (uq1 != NULL) 1691 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1692 else 1693 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1694 1695 uq->uq_pi_blocked = pi; 1696 thread_lock(td); 1697 td->td_flags |= TDF_UPIBLOCKED; 1698 thread_unlock(td); 1699 umtx_propagate_priority(td); 1700 mtx_unlock(&umtx_lock); 1701 umtxq_unbusy(&uq->uq_key); 1702 1703 error = umtxq_sleep(uq, wmesg, timo); 1704 umtxq_remove(uq); 1705 1706 mtx_lock(&umtx_lock); 1707 uq->uq_pi_blocked = NULL; 1708 thread_lock(td); 1709 td->td_flags &= ~TDF_UPIBLOCKED; 1710 thread_unlock(td); 1711 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1712 umtx_repropagate_priority(pi); 1713 mtx_unlock(&umtx_lock); 1714 umtxq_unlock(&uq->uq_key); 1715 1716 return (error); 1717} 1718 1719/* 1720 * Add reference count for a PI mutex. 1721 */ 1722static void 1723umtx_pi_ref(struct umtx_pi *pi) 1724{ 1725 1726 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&pi->pi_key)); 1727 pi->pi_refcount++; 1728} 1729 1730/* 1731 * Decrease reference count for a PI mutex, if the counter 1732 * is decreased to zero, its memory space is freed. 1733 */ 1734static void 1735umtx_pi_unref(struct umtx_pi *pi) 1736{ 1737 struct umtxq_chain *uc; 1738 1739 uc = umtxq_getchain(&pi->pi_key); 1740 UMTXQ_LOCKED_ASSERT(uc); 1741 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1742 if (--pi->pi_refcount == 0) { 1743 mtx_lock(&umtx_lock); 1744 if (pi->pi_owner != NULL) 1745 umtx_pi_disown(pi); 1746 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1747 ("blocked queue not empty")); 1748 mtx_unlock(&umtx_lock); 1749 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1750 umtx_pi_free(pi); 1751 } 1752} 1753 1754/* 1755 * Find a PI mutex in hash table. 1756 */ 1757static struct umtx_pi * 1758umtx_pi_lookup(struct umtx_key *key) 1759{ 1760 struct umtxq_chain *uc; 1761 struct umtx_pi *pi; 1762 1763 uc = umtxq_getchain(key); 1764 UMTXQ_LOCKED_ASSERT(uc); 1765 1766 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1767 if (umtx_key_match(&pi->pi_key, key)) { 1768 return (pi); 1769 } 1770 } 1771 return (NULL); 1772} 1773 1774/* 1775 * Insert a PI mutex into hash table. 1776 */ 1777static inline void 1778umtx_pi_insert(struct umtx_pi *pi) 1779{ 1780 struct umtxq_chain *uc; 1781 1782 uc = umtxq_getchain(&pi->pi_key); 1783 UMTXQ_LOCKED_ASSERT(uc); 1784 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1785} 1786 1787/* 1788 * Lock a PI mutex. 1789 */ 1790static int 1791do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 1792 struct _umtx_time *timeout, int try) 1793{ 1794 struct abs_timeout timo; 1795 struct umtx_q *uq; 1796 struct umtx_pi *pi, *new_pi; 1797 uint32_t id, old_owner, owner, old; 1798 int error, rv; 1799 1800 id = td->td_tid; 1801 uq = td->td_umtxq; 1802 1803 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 1804 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 1805 &uq->uq_key)) != 0) 1806 return (error); 1807 1808 if (timeout != NULL) 1809 abs_timeout_init2(&timo, timeout); 1810 1811 umtxq_lock(&uq->uq_key); 1812 pi = umtx_pi_lookup(&uq->uq_key); 1813 if (pi == NULL) { 1814 new_pi = umtx_pi_alloc(M_NOWAIT); 1815 if (new_pi == NULL) { 1816 umtxq_unlock(&uq->uq_key); 1817 new_pi = umtx_pi_alloc(M_WAITOK); 1818 umtxq_lock(&uq->uq_key); 1819 pi = umtx_pi_lookup(&uq->uq_key); 1820 if (pi != NULL) { 1821 umtx_pi_free(new_pi); 1822 new_pi = NULL; 1823 } 1824 } 1825 if (new_pi != NULL) { 1826 new_pi->pi_key = uq->uq_key; 1827 umtx_pi_insert(new_pi); 1828 pi = new_pi; 1829 } 1830 } 1831 umtx_pi_ref(pi); 1832 umtxq_unlock(&uq->uq_key); 1833 1834 /* 1835 * Care must be exercised when dealing with umtx structure. It 1836 * can fault on any access. 1837 */ 1838 for (;;) { 1839 /* 1840 * Try the uncontested case. This should be done in userland. 1841 */ 1842 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 1843 /* The address was invalid. */ 1844 if (rv == -1) { 1845 error = EFAULT; 1846 break; 1847 } 1848 /* The acquire succeeded. */ 1849 if (rv == 0) { 1850 MPASS(owner == UMUTEX_UNOWNED); 1851 error = 0; 1852 break; 1853 } 1854 1855 if (owner == UMUTEX_RB_NOTRECOV) { 1856 error = ENOTRECOVERABLE; 1857 break; 1858 } 1859 1860 /* 1861 * Avoid overwriting a possible error from sleep due 1862 * to the pending signal with suspension check result. 1863 */ 1864 if (error == 0) { 1865 error = thread_check_susp(td, true); 1866 if (error != 0) 1867 break; 1868 } 1869 1870 /* If no one owns it but it is contested try to acquire it. */ 1871 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { 1872 old_owner = owner; 1873 rv = casueword32(&m->m_owner, owner, &owner, 1874 id | UMUTEX_CONTESTED); 1875 /* The address was invalid. */ 1876 if (rv == -1) { 1877 error = EFAULT; 1878 break; 1879 } 1880 if (rv == 1) { 1881 if (error == 0) { 1882 error = thread_check_susp(td, true); 1883 if (error != 0) 1884 break; 1885 } 1886 1887 /* 1888 * If this failed the lock could 1889 * changed, restart. 1890 */ 1891 continue; 1892 } 1893 1894 MPASS(rv == 0); 1895 MPASS(owner == old_owner); 1896 umtxq_lock(&uq->uq_key); 1897 umtxq_busy(&uq->uq_key); 1898 error = umtx_pi_claim(pi, td); 1899 umtxq_unbusy(&uq->uq_key); 1900 umtxq_unlock(&uq->uq_key); 1901 if (error != 0) { 1902 /* 1903 * Since we're going to return an 1904 * error, restore the m_owner to its 1905 * previous, unowned state to avoid 1906 * compounding the problem. 1907 */ 1908 (void)casuword32(&m->m_owner, 1909 id | UMUTEX_CONTESTED, old_owner); 1910 } 1911 if (error == 0 && old_owner == UMUTEX_RB_OWNERDEAD) 1912 error = EOWNERDEAD; 1913 break; 1914 } 1915 1916 if ((owner & ~UMUTEX_CONTESTED) == id) { 1917 error = EDEADLK; 1918 break; 1919 } 1920 1921 if (try != 0) { 1922 error = EBUSY; 1923 break; 1924 } 1925 1926 /* 1927 * If we caught a signal, we have retried and now 1928 * exit immediately. 1929 */ 1930 if (error != 0) 1931 break; 1932 1933 umtxq_lock(&uq->uq_key); 1934 umtxq_busy(&uq->uq_key); 1935 umtxq_unlock(&uq->uq_key); 1936 1937 /* 1938 * Set the contested bit so that a release in user space 1939 * knows to use the system call for unlock. If this fails 1940 * either some one else has acquired the lock or it has been 1941 * released. 1942 */ 1943 rv = casueword32(&m->m_owner, owner, &old, owner | 1944 UMUTEX_CONTESTED); 1945 1946 /* The address was invalid. */ 1947 if (rv == -1) { 1948 umtxq_unbusy_unlocked(&uq->uq_key); 1949 error = EFAULT; 1950 break; 1951 } 1952 if (rv == 1) { 1953 umtxq_unbusy_unlocked(&uq->uq_key); 1954 error = thread_check_susp(td, true); 1955 if (error != 0) 1956 break; 1957 1958 /* 1959 * The lock changed and we need to retry or we 1960 * lost a race to the thread unlocking the 1961 * umtx. Note that the UMUTEX_RB_OWNERDEAD 1962 * value for owner is impossible there. 1963 */ 1964 continue; 1965 } 1966 1967 umtxq_lock(&uq->uq_key); 1968 1969 /* We set the contested bit, sleep. */ 1970 MPASS(old == owner); 1971 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 1972 "umtxpi", timeout == NULL ? NULL : &timo, 1973 (flags & USYNC_PROCESS_SHARED) != 0); 1974 if (error != 0) 1975 continue; 1976 1977 error = thread_check_susp(td, false); 1978 if (error != 0) 1979 break; 1980 } 1981 1982 umtxq_lock(&uq->uq_key); 1983 umtx_pi_unref(pi); 1984 umtxq_unlock(&uq->uq_key); 1985 1986 umtx_key_release(&uq->uq_key); 1987 return (error); 1988} 1989 1990/* 1991 * Unlock a PI mutex. 1992 */ 1993static int 1994do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1995{ 1996 struct umtx_key key; 1997 struct umtx_q *uq_first, *uq_first2, *uq_me; 1998 struct umtx_pi *pi, *pi2; 1999 uint32_t id, new_owner, old, owner; 2000 int count, error, pri; 2001 2002 id = td->td_tid; 2003 2004usrloop: 2005 /* 2006 * Make sure we own this mtx. 2007 */ 2008 error = fueword32(&m->m_owner, &owner); 2009 if (error == -1) 2010 return (EFAULT); 2011 2012 if ((owner & ~UMUTEX_CONTESTED) != id) 2013 return (EPERM); 2014 2015 new_owner = umtx_unlock_val(flags, rb); 2016 2017 /* This should be done in userland */ 2018 if ((owner & UMUTEX_CONTESTED) == 0) { 2019 error = casueword32(&m->m_owner, owner, &old, new_owner); 2020 if (error == -1) 2021 return (EFAULT); 2022 if (error == 1) { 2023 error = thread_check_susp(td, true); 2024 if (error != 0) 2025 return (error); 2026 goto usrloop; 2027 } 2028 if (old == owner) 2029 return (0); 2030 owner = old; 2031 } 2032 2033 /* We should only ever be in here for contested locks */ 2034 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2035 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2036 &key)) != 0) 2037 return (error); 2038 2039 umtxq_lock(&key); 2040 umtxq_busy(&key); 2041 count = umtxq_count_pi(&key, &uq_first); 2042 if (uq_first != NULL) { 2043 mtx_lock(&umtx_lock); 2044 pi = uq_first->uq_pi_blocked; 2045 KASSERT(pi != NULL, ("pi == NULL?")); 2046 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { 2047 mtx_unlock(&umtx_lock); 2048 umtxq_unbusy(&key); 2049 umtxq_unlock(&key); 2050 umtx_key_release(&key); 2051 /* userland messed the mutex */ 2052 return (EPERM); 2053 } 2054 uq_me = td->td_umtxq; 2055 if (pi->pi_owner == td) 2056 umtx_pi_disown(pi); 2057 /* get highest priority thread which is still sleeping. */ 2058 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2059 while (uq_first != NULL && 2060 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2061 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2062 } 2063 pri = PRI_MAX; 2064 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2065 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2066 if (uq_first2 != NULL) { 2067 if (pri > UPRI(uq_first2->uq_thread)) 2068 pri = UPRI(uq_first2->uq_thread); 2069 } 2070 } 2071 thread_lock(td); 2072 sched_lend_user_prio(td, pri); 2073 thread_unlock(td); 2074 mtx_unlock(&umtx_lock); 2075 if (uq_first) 2076 umtxq_signal_thread(uq_first); 2077 } else { 2078 pi = umtx_pi_lookup(&key); 2079 /* 2080 * A umtx_pi can exist if a signal or timeout removed the 2081 * last waiter from the umtxq, but there is still 2082 * a thread in do_lock_pi() holding the umtx_pi. 2083 */ 2084 if (pi != NULL) { 2085 /* 2086 * The umtx_pi can be unowned, such as when a thread 2087 * has just entered do_lock_pi(), allocated the 2088 * umtx_pi, and unlocked the umtxq. 2089 * If the current thread owns it, it must disown it. 2090 */ 2091 mtx_lock(&umtx_lock); 2092 if (pi->pi_owner == td) 2093 umtx_pi_disown(pi); 2094 mtx_unlock(&umtx_lock); 2095 } 2096 } 2097 umtxq_unlock(&key); 2098 2099 /* 2100 * When unlocking the umtx, it must be marked as unowned if 2101 * there is zero or one thread only waiting for it. 2102 * Otherwise, it must be marked as contested. 2103 */ 2104 2105 if (count > 1) 2106 new_owner |= UMUTEX_CONTESTED; 2107again: 2108 error = casueword32(&m->m_owner, owner, &old, new_owner); 2109 if (error == 1) { 2110 error = thread_check_susp(td, false); 2111 if (error == 0) 2112 goto again; 2113 } 2114 umtxq_unbusy_unlocked(&key); 2115 umtx_key_release(&key); 2116 if (error == -1) 2117 return (EFAULT); 2118 if (error == 0 && old != owner) 2119 return (EINVAL); 2120 return (error); 2121} 2122 2123/* 2124 * Lock a PP mutex. 2125 */ 2126static int 2127do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2128 struct _umtx_time *timeout, int try) 2129{ 2130 struct abs_timeout timo; 2131 struct umtx_q *uq, *uq2; 2132 struct umtx_pi *pi; 2133 uint32_t ceiling; 2134 uint32_t owner, id; 2135 int error, pri, old_inherited_pri, su, rv; 2136 2137 id = td->td_tid; 2138 uq = td->td_umtxq; 2139 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2140 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2141 &uq->uq_key)) != 0) 2142 return (error); 2143 2144 if (timeout != NULL) 2145 abs_timeout_init2(&timo, timeout); 2146 2147 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2148 for (;;) { 2149 old_inherited_pri = uq->uq_inherited_pri; 2150 umtxq_lock(&uq->uq_key); 2151 umtxq_busy(&uq->uq_key); 2152 umtxq_unlock(&uq->uq_key); 2153 2154 rv = fueword32(&m->m_ceilings[0], &ceiling); 2155 if (rv == -1) { 2156 error = EFAULT; 2157 goto out; 2158 } 2159 ceiling = RTP_PRIO_MAX - ceiling; 2160 if (ceiling > RTP_PRIO_MAX) { 2161 error = EINVAL; 2162 goto out; 2163 } 2164 2165 mtx_lock(&umtx_lock); 2166 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 2167 mtx_unlock(&umtx_lock); 2168 error = EINVAL; 2169 goto out; 2170 } 2171 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 2172 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 2173 thread_lock(td); 2174 if (uq->uq_inherited_pri < UPRI(td)) 2175 sched_lend_user_prio(td, uq->uq_inherited_pri); 2176 thread_unlock(td); 2177 } 2178 mtx_unlock(&umtx_lock); 2179 2180 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2181 id | UMUTEX_CONTESTED); 2182 /* The address was invalid. */ 2183 if (rv == -1) { 2184 error = EFAULT; 2185 break; 2186 } 2187 if (rv == 0) { 2188 MPASS(owner == UMUTEX_CONTESTED); 2189 error = 0; 2190 break; 2191 } 2192 /* rv == 1 */ 2193 if (owner == UMUTEX_RB_OWNERDEAD) { 2194 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, 2195 &owner, id | UMUTEX_CONTESTED); 2196 if (rv == -1) { 2197 error = EFAULT; 2198 break; 2199 } 2200 if (rv == 0) { 2201 MPASS(owner == UMUTEX_RB_OWNERDEAD); 2202 error = EOWNERDEAD; /* success */ 2203 break; 2204 } 2205 2206 /* 2207 * rv == 1, only check for suspension if we 2208 * did not already catched a signal. If we 2209 * get an error from the check, the same 2210 * condition is checked by the umtxq_sleep() 2211 * call below, so we should obliterate the 2212 * error to not skip the last loop iteration. 2213 */ 2214 if (error == 0) { 2215 error = thread_check_susp(td, false); 2216 if (error == 0) { 2217 if (try != 0) 2218 error = EBUSY; 2219 else 2220 continue; 2221 } 2222 error = 0; 2223 } 2224 } else if (owner == UMUTEX_RB_NOTRECOV) { 2225 error = ENOTRECOVERABLE; 2226 } 2227 2228 if (try != 0) 2229 error = EBUSY; 2230 2231 /* 2232 * If we caught a signal, we have retried and now 2233 * exit immediately. 2234 */ 2235 if (error != 0) 2236 break; 2237 2238 umtxq_lock(&uq->uq_key); 2239 umtxq_insert(uq); 2240 umtxq_unbusy(&uq->uq_key); 2241 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2242 NULL : &timo); 2243 umtxq_remove(uq); 2244 umtxq_unlock(&uq->uq_key); 2245 2246 mtx_lock(&umtx_lock); 2247 uq->uq_inherited_pri = old_inherited_pri; 2248 pri = PRI_MAX; 2249 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2250 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2251 if (uq2 != NULL) { 2252 if (pri > UPRI(uq2->uq_thread)) 2253 pri = UPRI(uq2->uq_thread); 2254 } 2255 } 2256 if (pri > uq->uq_inherited_pri) 2257 pri = uq->uq_inherited_pri; 2258 thread_lock(td); 2259 sched_lend_user_prio(td, pri); 2260 thread_unlock(td); 2261 mtx_unlock(&umtx_lock); 2262 } 2263 2264 if (error != 0 && error != EOWNERDEAD) { 2265 mtx_lock(&umtx_lock); 2266 uq->uq_inherited_pri = old_inherited_pri; 2267 pri = PRI_MAX; 2268 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2269 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2270 if (uq2 != NULL) { 2271 if (pri > UPRI(uq2->uq_thread)) 2272 pri = UPRI(uq2->uq_thread); 2273 } 2274 } 2275 if (pri > uq->uq_inherited_pri) 2276 pri = uq->uq_inherited_pri; 2277 thread_lock(td); 2278 sched_lend_user_prio(td, pri); 2279 thread_unlock(td); 2280 mtx_unlock(&umtx_lock); 2281 } 2282 2283out: 2284 umtxq_unbusy_unlocked(&uq->uq_key); 2285 umtx_key_release(&uq->uq_key); 2286 return (error); 2287} 2288 2289/* 2290 * Unlock a PP mutex. 2291 */ 2292static int 2293do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2294{ 2295 struct umtx_key key; 2296 struct umtx_q *uq, *uq2; 2297 struct umtx_pi *pi; 2298 uint32_t id, owner, rceiling; 2299 int error, pri, new_inherited_pri, su; 2300 2301 id = td->td_tid; 2302 uq = td->td_umtxq; 2303 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2304 2305 /* 2306 * Make sure we own this mtx. 2307 */ 2308 error = fueword32(&m->m_owner, &owner); 2309 if (error == -1) 2310 return (EFAULT); 2311 2312 if ((owner & ~UMUTEX_CONTESTED) != id) 2313 return (EPERM); 2314 2315 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2316 if (error != 0) 2317 return (error); 2318 2319 if (rceiling == -1) 2320 new_inherited_pri = PRI_MAX; 2321 else { 2322 rceiling = RTP_PRIO_MAX - rceiling; 2323 if (rceiling > RTP_PRIO_MAX) 2324 return (EINVAL); 2325 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2326 } 2327 2328 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2329 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2330 &key)) != 0) 2331 return (error); 2332 umtxq_lock(&key); 2333 umtxq_busy(&key); 2334 umtxq_unlock(&key); 2335 /* 2336 * For priority protected mutex, always set unlocked state 2337 * to UMUTEX_CONTESTED, so that userland always enters kernel 2338 * to lock the mutex, it is necessary because thread priority 2339 * has to be adjusted for such mutex. 2340 */ 2341 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | 2342 UMUTEX_CONTESTED); 2343 2344 umtxq_lock(&key); 2345 if (error == 0) 2346 umtxq_signal(&key, 1); 2347 umtxq_unbusy(&key); 2348 umtxq_unlock(&key); 2349 2350 if (error == -1) 2351 error = EFAULT; 2352 else { 2353 mtx_lock(&umtx_lock); 2354 if (su != 0) 2355 uq->uq_inherited_pri = new_inherited_pri; 2356 pri = PRI_MAX; 2357 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2358 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2359 if (uq2 != NULL) { 2360 if (pri > UPRI(uq2->uq_thread)) 2361 pri = UPRI(uq2->uq_thread); 2362 } 2363 } 2364 if (pri > uq->uq_inherited_pri) 2365 pri = uq->uq_inherited_pri; 2366 thread_lock(td); 2367 sched_lend_user_prio(td, pri); 2368 thread_unlock(td); 2369 mtx_unlock(&umtx_lock); 2370 } 2371 umtx_key_release(&key); 2372 return (error); 2373} 2374 2375static int 2376do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2377 uint32_t *old_ceiling) 2378{ 2379 struct umtx_q *uq; 2380 uint32_t flags, id, owner, save_ceiling; 2381 int error, rv, rv1; 2382 2383 error = fueword32(&m->m_flags, &flags); 2384 if (error == -1) 2385 return (EFAULT); 2386 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2387 return (EINVAL); 2388 if (ceiling > RTP_PRIO_MAX) 2389 return (EINVAL); 2390 id = td->td_tid; 2391 uq = td->td_umtxq; 2392 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2393 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2394 &uq->uq_key)) != 0) 2395 return (error); 2396 for (;;) { 2397 umtxq_lock(&uq->uq_key); 2398 umtxq_busy(&uq->uq_key); 2399 umtxq_unlock(&uq->uq_key); 2400 2401 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2402 if (rv == -1) { 2403 error = EFAULT; 2404 break; 2405 } 2406 2407 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2408 id | UMUTEX_CONTESTED); 2409 if (rv == -1) { 2410 error = EFAULT; 2411 break; 2412 } 2413 2414 if (rv == 0) { 2415 MPASS(owner == UMUTEX_CONTESTED); 2416 rv = suword32(&m->m_ceilings[0], ceiling); 2417 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); 2418 error = (rv == 0 && rv1 == 0) ? 0: EFAULT; 2419 break; 2420 } 2421 2422 if ((owner & ~UMUTEX_CONTESTED) == id) { 2423 rv = suword32(&m->m_ceilings[0], ceiling); 2424 error = rv == 0 ? 0 : EFAULT; 2425 break; 2426 } 2427 2428 if (owner == UMUTEX_RB_OWNERDEAD) { 2429 error = EOWNERDEAD; 2430 break; 2431 } else if (owner == UMUTEX_RB_NOTRECOV) { 2432 error = ENOTRECOVERABLE; 2433 break; 2434 } 2435 2436 /* 2437 * If we caught a signal, we have retried and now 2438 * exit immediately. 2439 */ 2440 if (error != 0) 2441 break; 2442 2443 /* 2444 * We set the contested bit, sleep. Otherwise the lock changed 2445 * and we need to retry or we lost a race to the thread 2446 * unlocking the umtx. 2447 */ 2448 umtxq_lock(&uq->uq_key); 2449 umtxq_insert(uq); 2450 umtxq_unbusy(&uq->uq_key); 2451 error = umtxq_sleep(uq, "umtxpp", NULL); 2452 umtxq_remove(uq); 2453 umtxq_unlock(&uq->uq_key); 2454 } 2455 umtxq_lock(&uq->uq_key); 2456 if (error == 0) 2457 umtxq_signal(&uq->uq_key, INT_MAX); 2458 umtxq_unbusy(&uq->uq_key); 2459 umtxq_unlock(&uq->uq_key); 2460 umtx_key_release(&uq->uq_key); 2461 if (error == 0 && old_ceiling != NULL) { 2462 rv = suword32(old_ceiling, save_ceiling); 2463 error = rv == 0 ? 0 : EFAULT; 2464 } 2465 return (error); 2466} 2467 2468/* 2469 * Lock a userland POSIX mutex. 2470 */ 2471static int 2472do_lock_umutex(struct thread *td, struct umutex *m, 2473 struct _umtx_time *timeout, int mode) 2474{ 2475 uint32_t flags; 2476 int error; 2477 2478 error = fueword32(&m->m_flags, &flags); 2479 if (error == -1) 2480 return (EFAULT); 2481 2482 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2483 case 0: 2484 error = do_lock_normal(td, m, flags, timeout, mode); 2485 break; 2486 case UMUTEX_PRIO_INHERIT: 2487 error = do_lock_pi(td, m, flags, timeout, mode); 2488 break; 2489 case UMUTEX_PRIO_PROTECT: 2490 error = do_lock_pp(td, m, flags, timeout, mode); 2491 break; 2492 default: 2493 return (EINVAL); 2494 } 2495 if (timeout == NULL) { 2496 if (error == EINTR && mode != _UMUTEX_WAIT) 2497 error = ERESTART; 2498 } else { 2499 /* Timed-locking is not restarted. */ 2500 if (error == ERESTART) 2501 error = EINTR; 2502 } 2503 return (error); 2504} 2505 2506/* 2507 * Unlock a userland POSIX mutex. 2508 */ 2509static int 2510do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) 2511{ 2512 uint32_t flags; 2513 int error; 2514 2515 error = fueword32(&m->m_flags, &flags); 2516 if (error == -1) 2517 return (EFAULT); 2518 2519 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2520 case 0: 2521 return (do_unlock_normal(td, m, flags, rb)); 2522 case UMUTEX_PRIO_INHERIT: 2523 return (do_unlock_pi(td, m, flags, rb)); 2524 case UMUTEX_PRIO_PROTECT: 2525 return (do_unlock_pp(td, m, flags, rb)); 2526 } 2527 2528 return (EINVAL); 2529} 2530 2531static int 2532do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2533 struct timespec *timeout, u_long wflags) 2534{ 2535 struct abs_timeout timo; 2536 struct umtx_q *uq; 2537 uint32_t flags, clockid, hasw; 2538 int error; 2539 2540 uq = td->td_umtxq; 2541 error = fueword32(&cv->c_flags, &flags); 2542 if (error == -1) 2543 return (EFAULT); 2544 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2545 if (error != 0) 2546 return (error); 2547 2548 if ((wflags & CVWAIT_CLOCKID) != 0) { 2549 error = fueword32(&cv->c_clockid, &clockid); 2550 if (error == -1) { 2551 umtx_key_release(&uq->uq_key); 2552 return (EFAULT); 2553 } 2554 if (clockid < CLOCK_REALTIME || 2555 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2556 /* hmm, only HW clock id will work. */ 2557 umtx_key_release(&uq->uq_key); 2558 return (EINVAL); 2559 } 2560 } else { 2561 clockid = CLOCK_REALTIME; 2562 } 2563 2564 umtxq_lock(&uq->uq_key); 2565 umtxq_busy(&uq->uq_key); 2566 umtxq_insert(uq); 2567 umtxq_unlock(&uq->uq_key); 2568 2569 /* 2570 * Set c_has_waiters to 1 before releasing user mutex, also 2571 * don't modify cache line when unnecessary. 2572 */ 2573 error = fueword32(&cv->c_has_waiters, &hasw); 2574 if (error == 0 && hasw == 0) 2575 suword32(&cv->c_has_waiters, 1); 2576 2577 umtxq_unbusy_unlocked(&uq->uq_key); 2578 2579 error = do_unlock_umutex(td, m, false); 2580 2581 if (timeout != NULL) 2582 abs_timeout_init(&timo, clockid, (wflags & CVWAIT_ABSTIME) != 0, 2583 timeout); 2584 2585 umtxq_lock(&uq->uq_key); 2586 if (error == 0) { 2587 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2588 NULL : &timo); 2589 } 2590 2591 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2592 error = 0; 2593 else { 2594 /* 2595 * This must be timeout,interrupted by signal or 2596 * surprious wakeup, clear c_has_waiter flag when 2597 * necessary. 2598 */ 2599 umtxq_busy(&uq->uq_key); 2600 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2601 int oldlen = uq->uq_cur_queue->length; 2602 umtxq_remove(uq); 2603 if (oldlen == 1) { 2604 umtxq_unlock(&uq->uq_key); 2605 suword32(&cv->c_has_waiters, 0); 2606 umtxq_lock(&uq->uq_key); 2607 } 2608 } 2609 umtxq_unbusy(&uq->uq_key); 2610 if (error == ERESTART) 2611 error = EINTR; 2612 } 2613 2614 umtxq_unlock(&uq->uq_key); 2615 umtx_key_release(&uq->uq_key); 2616 return (error); 2617} 2618 2619/* 2620 * Signal a userland condition variable. 2621 */ 2622static int 2623do_cv_signal(struct thread *td, struct ucond *cv) 2624{ 2625 struct umtx_key key; 2626 int error, cnt, nwake; 2627 uint32_t flags; 2628 2629 error = fueword32(&cv->c_flags, &flags); 2630 if (error == -1) 2631 return (EFAULT); 2632 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2633 return (error); 2634 umtxq_lock(&key); 2635 umtxq_busy(&key); 2636 cnt = umtxq_count(&key); 2637 nwake = umtxq_signal(&key, 1); 2638 if (cnt <= nwake) { 2639 umtxq_unlock(&key); 2640 error = suword32(&cv->c_has_waiters, 0); 2641 if (error == -1) 2642 error = EFAULT; 2643 umtxq_lock(&key); 2644 } 2645 umtxq_unbusy(&key); 2646 umtxq_unlock(&key); 2647 umtx_key_release(&key); 2648 return (error); 2649} 2650 2651static int 2652do_cv_broadcast(struct thread *td, struct ucond *cv) 2653{ 2654 struct umtx_key key; 2655 int error; 2656 uint32_t flags; 2657 2658 error = fueword32(&cv->c_flags, &flags); 2659 if (error == -1) 2660 return (EFAULT); 2661 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2662 return (error); 2663 2664 umtxq_lock(&key); 2665 umtxq_busy(&key); 2666 umtxq_signal(&key, INT_MAX); 2667 umtxq_unlock(&key); 2668 2669 error = suword32(&cv->c_has_waiters, 0); 2670 if (error == -1) 2671 error = EFAULT; 2672 2673 umtxq_unbusy_unlocked(&key); 2674 2675 umtx_key_release(&key); 2676 return (error); 2677} 2678 2679static int 2680do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, 2681 struct _umtx_time *timeout) 2682{ 2683 struct abs_timeout timo; 2684 struct umtx_q *uq; 2685 uint32_t flags, wrflags; 2686 int32_t state, oldstate; 2687 int32_t blocked_readers; 2688 int error, error1, rv; 2689 2690 uq = td->td_umtxq; 2691 error = fueword32(&rwlock->rw_flags, &flags); 2692 if (error == -1) 2693 return (EFAULT); 2694 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2695 if (error != 0) 2696 return (error); 2697 2698 if (timeout != NULL) 2699 abs_timeout_init2(&timo, timeout); 2700 2701 wrflags = URWLOCK_WRITE_OWNER; 2702 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 2703 wrflags |= URWLOCK_WRITE_WAITERS; 2704 2705 for (;;) { 2706 rv = fueword32(&rwlock->rw_state, &state); 2707 if (rv == -1) { 2708 umtx_key_release(&uq->uq_key); 2709 return (EFAULT); 2710 } 2711 2712 /* try to lock it */ 2713 while (!(state & wrflags)) { 2714 if (__predict_false(URWLOCK_READER_COUNT(state) == 2715 URWLOCK_MAX_READERS)) { 2716 umtx_key_release(&uq->uq_key); 2717 return (EAGAIN); 2718 } 2719 rv = casueword32(&rwlock->rw_state, state, 2720 &oldstate, state + 1); 2721 if (rv == -1) { 2722 umtx_key_release(&uq->uq_key); 2723 return (EFAULT); 2724 } 2725 if (rv == 0) { 2726 MPASS(oldstate == state); 2727 umtx_key_release(&uq->uq_key); 2728 return (0); 2729 } 2730 error = thread_check_susp(td, true); 2731 if (error != 0) 2732 break; 2733 state = oldstate; 2734 } 2735 2736 if (error) 2737 break; 2738 2739 /* grab monitor lock */ 2740 umtxq_lock(&uq->uq_key); 2741 umtxq_busy(&uq->uq_key); 2742 umtxq_unlock(&uq->uq_key); 2743 2744 /* 2745 * re-read the state, in case it changed between the try-lock above 2746 * and the check below 2747 */ 2748 rv = fueword32(&rwlock->rw_state, &state); 2749 if (rv == -1) 2750 error = EFAULT; 2751 2752 /* set read contention bit */ 2753 while (error == 0 && (state & wrflags) && 2754 !(state & URWLOCK_READ_WAITERS)) { 2755 rv = casueword32(&rwlock->rw_state, state, 2756 &oldstate, state | URWLOCK_READ_WAITERS); 2757 if (rv == -1) { 2758 error = EFAULT; 2759 break; 2760 } 2761 if (rv == 0) { 2762 MPASS(oldstate == state); 2763 goto sleep; 2764 } 2765 state = oldstate; 2766 error = thread_check_susp(td, false); 2767 if (error != 0) 2768 break; 2769 } 2770 if (error != 0) { 2771 umtxq_unbusy_unlocked(&uq->uq_key); 2772 break; 2773 } 2774 2775 /* state is changed while setting flags, restart */ 2776 if (!(state & wrflags)) { 2777 umtxq_unbusy_unlocked(&uq->uq_key); 2778 error = thread_check_susp(td, true); 2779 if (error != 0) 2780 break; 2781 continue; 2782 } 2783 2784sleep: 2785 /* 2786 * Contention bit is set, before sleeping, increase 2787 * read waiter count. 2788 */ 2789 rv = fueword32(&rwlock->rw_blocked_readers, 2790 &blocked_readers); 2791 if (rv == -1) { 2792 umtxq_unbusy_unlocked(&uq->uq_key); 2793 error = EFAULT; 2794 break; 2795 } 2796 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 2797 2798 while (state & wrflags) { 2799 umtxq_lock(&uq->uq_key); 2800 umtxq_insert(uq); 2801 umtxq_unbusy(&uq->uq_key); 2802 2803 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 2804 NULL : &timo); 2805 2806 umtxq_busy(&uq->uq_key); 2807 umtxq_remove(uq); 2808 umtxq_unlock(&uq->uq_key); 2809 if (error) 2810 break; 2811 rv = fueword32(&rwlock->rw_state, &state); 2812 if (rv == -1) { 2813 error = EFAULT; 2814 break; 2815 } 2816 } 2817 2818 /* decrease read waiter count, and may clear read contention bit */ 2819 rv = fueword32(&rwlock->rw_blocked_readers, 2820 &blocked_readers); 2821 if (rv == -1) { 2822 umtxq_unbusy_unlocked(&uq->uq_key); 2823 error = EFAULT; 2824 break; 2825 } 2826 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 2827 if (blocked_readers == 1) { 2828 rv = fueword32(&rwlock->rw_state, &state); 2829 if (rv == -1) { 2830 umtxq_unbusy_unlocked(&uq->uq_key); 2831 error = EFAULT; 2832 break; 2833 } 2834 for (;;) { 2835 rv = casueword32(&rwlock->rw_state, state, 2836 &oldstate, state & ~URWLOCK_READ_WAITERS); 2837 if (rv == -1) { 2838 error = EFAULT; 2839 break; 2840 } 2841 if (rv == 0) { 2842 MPASS(oldstate == state); 2843 break; 2844 } 2845 state = oldstate; 2846 error1 = thread_check_susp(td, false); 2847 if (error1 != 0) { 2848 if (error == 0) 2849 error = error1; 2850 break; 2851 } 2852 } 2853 } 2854 2855 umtxq_unbusy_unlocked(&uq->uq_key); 2856 if (error != 0) 2857 break; 2858 } 2859 umtx_key_release(&uq->uq_key); 2860 if (error == ERESTART) 2861 error = EINTR; 2862 return (error); 2863} 2864 2865static int 2866do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 2867{ 2868 struct abs_timeout timo; 2869 struct umtx_q *uq; 2870 uint32_t flags; 2871 int32_t state, oldstate; 2872 int32_t blocked_writers; 2873 int32_t blocked_readers; 2874 int error, error1, rv; 2875 2876 uq = td->td_umtxq; 2877 error = fueword32(&rwlock->rw_flags, &flags); 2878 if (error == -1) 2879 return (EFAULT); 2880 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2881 if (error != 0) 2882 return (error); 2883 2884 if (timeout != NULL) 2885 abs_timeout_init2(&timo, timeout); 2886 2887 blocked_readers = 0; 2888 for (;;) { 2889 rv = fueword32(&rwlock->rw_state, &state); 2890 if (rv == -1) { 2891 umtx_key_release(&uq->uq_key); 2892 return (EFAULT); 2893 } 2894 while ((state & URWLOCK_WRITE_OWNER) == 0 && 2895 URWLOCK_READER_COUNT(state) == 0) { 2896 rv = casueword32(&rwlock->rw_state, state, 2897 &oldstate, state | URWLOCK_WRITE_OWNER); 2898 if (rv == -1) { 2899 umtx_key_release(&uq->uq_key); 2900 return (EFAULT); 2901 } 2902 if (rv == 0) { 2903 MPASS(oldstate == state); 2904 umtx_key_release(&uq->uq_key); 2905 return (0); 2906 } 2907 state = oldstate; 2908 error = thread_check_susp(td, true); 2909 if (error != 0) 2910 break; 2911 } 2912 2913 if (error) { 2914 if ((state & (URWLOCK_WRITE_OWNER | 2915 URWLOCK_WRITE_WAITERS)) == 0 && 2916 blocked_readers != 0) { 2917 umtxq_lock(&uq->uq_key); 2918 umtxq_busy(&uq->uq_key); 2919 umtxq_signal_queue(&uq->uq_key, INT_MAX, 2920 UMTX_SHARED_QUEUE); 2921 umtxq_unbusy(&uq->uq_key); 2922 umtxq_unlock(&uq->uq_key); 2923 } 2924 2925 break; 2926 } 2927 2928 /* grab monitor lock */ 2929 umtxq_lock(&uq->uq_key); 2930 umtxq_busy(&uq->uq_key); 2931 umtxq_unlock(&uq->uq_key); 2932 2933 /* 2934 * Re-read the state, in case it changed between the 2935 * try-lock above and the check below. 2936 */ 2937 rv = fueword32(&rwlock->rw_state, &state); 2938 if (rv == -1) 2939 error = EFAULT; 2940 2941 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 2942 URWLOCK_READER_COUNT(state) != 0) && 2943 (state & URWLOCK_WRITE_WAITERS) == 0) { 2944 rv = casueword32(&rwlock->rw_state, state, 2945 &oldstate, state | URWLOCK_WRITE_WAITERS); 2946 if (rv == -1) { 2947 error = EFAULT; 2948 break; 2949 } 2950 if (rv == 0) { 2951 MPASS(oldstate == state); 2952 goto sleep; 2953 } 2954 state = oldstate; 2955 error = thread_check_susp(td, false); 2956 if (error != 0) 2957 break; 2958 } 2959 if (error != 0) { 2960 umtxq_unbusy_unlocked(&uq->uq_key); 2961 break; 2962 } 2963 2964 if ((state & URWLOCK_WRITE_OWNER) == 0 && 2965 URWLOCK_READER_COUNT(state) == 0) { 2966 umtxq_unbusy_unlocked(&uq->uq_key); 2967 error = thread_check_susp(td, false); 2968 if (error != 0) 2969 break; 2970 continue; 2971 } 2972sleep: 2973 rv = fueword32(&rwlock->rw_blocked_writers, 2974 &blocked_writers); 2975 if (rv == -1) { 2976 umtxq_unbusy_unlocked(&uq->uq_key); 2977 error = EFAULT; 2978 break; 2979 } 2980 suword32(&rwlock->rw_blocked_writers, blocked_writers + 1); 2981 2982 while ((state & URWLOCK_WRITE_OWNER) || 2983 URWLOCK_READER_COUNT(state) != 0) { 2984 umtxq_lock(&uq->uq_key); 2985 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2986 umtxq_unbusy(&uq->uq_key); 2987 2988 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 2989 NULL : &timo); 2990 2991 umtxq_busy(&uq->uq_key); 2992 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2993 umtxq_unlock(&uq->uq_key); 2994 if (error) 2995 break; 2996 rv = fueword32(&rwlock->rw_state, &state); 2997 if (rv == -1) { 2998 error = EFAULT; 2999 break; 3000 } 3001 } 3002 3003 rv = fueword32(&rwlock->rw_blocked_writers, 3004 &blocked_writers); 3005 if (rv == -1) { 3006 umtxq_unbusy_unlocked(&uq->uq_key); 3007 error = EFAULT; 3008 break; 3009 } 3010 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 3011 if (blocked_writers == 1) { 3012 rv = fueword32(&rwlock->rw_state, &state); 3013 if (rv == -1) { 3014 umtxq_unbusy_unlocked(&uq->uq_key); 3015 error = EFAULT; 3016 break; 3017 } 3018 for (;;) { 3019 rv = casueword32(&rwlock->rw_state, state, 3020 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 3021 if (rv == -1) { 3022 error = EFAULT; 3023 break; 3024 } 3025 if (rv == 0) { 3026 MPASS(oldstate == state); 3027 break; 3028 } 3029 state = oldstate; 3030 error1 = thread_check_susp(td, false); 3031 /* 3032 * We are leaving the URWLOCK_WRITE_WAITERS 3033 * behind, but this should not harm the 3034 * correctness. 3035 */ 3036 if (error1 != 0) { 3037 if (error == 0) 3038 error = error1; 3039 break; 3040 } 3041 } 3042 rv = fueword32(&rwlock->rw_blocked_readers, 3043 &blocked_readers); 3044 if (rv == -1) { 3045 umtxq_unbusy_unlocked(&uq->uq_key); 3046 error = EFAULT; 3047 break; 3048 } 3049 } else 3050 blocked_readers = 0; 3051 3052 umtxq_unbusy_unlocked(&uq->uq_key); 3053 } 3054 3055 umtx_key_release(&uq->uq_key); 3056 if (error == ERESTART) 3057 error = EINTR; 3058 return (error); 3059} 3060 3061static int 3062do_rw_unlock(struct thread *td, struct urwlock *rwlock) 3063{ 3064 struct umtx_q *uq; 3065 uint32_t flags; 3066 int32_t state, oldstate; 3067 int error, rv, q, count; 3068 3069 uq = td->td_umtxq; 3070 error = fueword32(&rwlock->rw_flags, &flags); 3071 if (error == -1) 3072 return (EFAULT); 3073 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3074 if (error != 0) 3075 return (error); 3076 3077 error = fueword32(&rwlock->rw_state, &state); 3078 if (error == -1) { 3079 error = EFAULT; 3080 goto out; 3081 } 3082 if (state & URWLOCK_WRITE_OWNER) { 3083 for (;;) { 3084 rv = casueword32(&rwlock->rw_state, state, 3085 &oldstate, state & ~URWLOCK_WRITE_OWNER); 3086 if (rv == -1) { 3087 error = EFAULT; 3088 goto out; 3089 } 3090 if (rv == 1) { 3091 state = oldstate; 3092 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3093 error = EPERM; 3094 goto out; 3095 } 3096 error = thread_check_susp(td, true); 3097 if (error != 0) 3098 goto out; 3099 } else 3100 break; 3101 } 3102 } else if (URWLOCK_READER_COUNT(state) != 0) { 3103 for (;;) { 3104 rv = casueword32(&rwlock->rw_state, state, 3105 &oldstate, state - 1); 3106 if (rv == -1) { 3107 error = EFAULT; 3108 goto out; 3109 } 3110 if (rv == 1) { 3111 state = oldstate; 3112 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3113 error = EPERM; 3114 goto out; 3115 } 3116 error = thread_check_susp(td, true); 3117 if (error != 0) 3118 goto out; 3119 } else 3120 break; 3121 } 3122 } else { 3123 error = EPERM; 3124 goto out; 3125 } 3126 3127 count = 0; 3128 3129 if (!(flags & URWLOCK_PREFER_READER)) { 3130 if (state & URWLOCK_WRITE_WAITERS) { 3131 count = 1; 3132 q = UMTX_EXCLUSIVE_QUEUE; 3133 } else if (state & URWLOCK_READ_WAITERS) { 3134 count = INT_MAX; 3135 q = UMTX_SHARED_QUEUE; 3136 } 3137 } else { 3138 if (state & URWLOCK_READ_WAITERS) { 3139 count = INT_MAX; 3140 q = UMTX_SHARED_QUEUE; 3141 } else if (state & URWLOCK_WRITE_WAITERS) { 3142 count = 1; 3143 q = UMTX_EXCLUSIVE_QUEUE; 3144 } 3145 } 3146 3147 if (count) { 3148 umtxq_lock(&uq->uq_key); 3149 umtxq_busy(&uq->uq_key); 3150 umtxq_signal_queue(&uq->uq_key, count, q); 3151 umtxq_unbusy(&uq->uq_key); 3152 umtxq_unlock(&uq->uq_key); 3153 } 3154out: 3155 umtx_key_release(&uq->uq_key); 3156 return (error); 3157} 3158 3159#if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3160static int 3161do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3162{ 3163 struct abs_timeout timo; 3164 struct umtx_q *uq; 3165 uint32_t flags, count, count1; 3166 int error, rv, rv1; 3167 3168 uq = td->td_umtxq; 3169 error = fueword32(&sem->_flags, &flags); 3170 if (error == -1) 3171 return (EFAULT); 3172 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3173 if (error != 0) 3174 return (error); 3175 3176 if (timeout != NULL) 3177 abs_timeout_init2(&timo, timeout); 3178 3179again: 3180 umtxq_lock(&uq->uq_key); 3181 umtxq_busy(&uq->uq_key); 3182 umtxq_insert(uq); 3183 umtxq_unlock(&uq->uq_key); 3184 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3185 if (rv == 0) 3186 rv1 = fueword32(&sem->_count, &count); 3187 if (rv == -1 || (rv == 0 && (rv1 == -1 || count != 0)) || 3188 (rv == 1 && count1 == 0)) { 3189 umtxq_lock(&uq->uq_key); 3190 umtxq_unbusy(&uq->uq_key); 3191 umtxq_remove(uq); 3192 umtxq_unlock(&uq->uq_key); 3193 if (rv == 1) { 3194 rv = thread_check_susp(td, true); 3195 if (rv == 0) 3196 goto again; 3197 error = rv; 3198 goto out; 3199 } 3200 if (rv == 0) 3201 rv = rv1; 3202 error = rv == -1 ? EFAULT : 0; 3203 goto out; 3204 } 3205 umtxq_lock(&uq->uq_key); 3206 umtxq_unbusy(&uq->uq_key); 3207 3208 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3209 3210 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3211 error = 0; 3212 else { 3213 umtxq_remove(uq); 3214 /* A relative timeout cannot be restarted. */ 3215 if (error == ERESTART && timeout != NULL && 3216 (timeout->_flags & UMTX_ABSTIME) == 0) 3217 error = EINTR; 3218 } 3219 umtxq_unlock(&uq->uq_key); 3220out: 3221 umtx_key_release(&uq->uq_key); 3222 return (error); 3223} 3224 3225/* 3226 * Signal a userland semaphore. 3227 */ 3228static int 3229do_sem_wake(struct thread *td, struct _usem *sem) 3230{ 3231 struct umtx_key key; 3232 int error, cnt; 3233 uint32_t flags; 3234 3235 error = fueword32(&sem->_flags, &flags); 3236 if (error == -1) 3237 return (EFAULT); 3238 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3239 return (error); 3240 umtxq_lock(&key); 3241 umtxq_busy(&key); 3242 cnt = umtxq_count(&key); 3243 if (cnt > 0) { 3244 /* 3245 * Check if count is greater than 0, this means the memory is 3246 * still being referenced by user code, so we can safely 3247 * update _has_waiters flag. 3248 */ 3249 if (cnt == 1) { 3250 umtxq_unlock(&key); 3251 error = suword32(&sem->_has_waiters, 0); 3252 umtxq_lock(&key); 3253 if (error == -1) 3254 error = EFAULT; 3255 } 3256 umtxq_signal(&key, 1); 3257 } 3258 umtxq_unbusy(&key); 3259 umtxq_unlock(&key); 3260 umtx_key_release(&key); 3261 return (error); 3262} 3263#endif 3264 3265static int 3266do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3267{ 3268 struct abs_timeout timo; 3269 struct umtx_q *uq; 3270 uint32_t count, flags; 3271 int error, rv; 3272 3273 uq = td->td_umtxq; 3274 flags = fuword32(&sem->_flags); 3275 if (timeout != NULL) 3276 abs_timeout_init2(&timo, timeout); 3277 3278again: 3279 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3280 if (error != 0) 3281 return (error); 3282 umtxq_lock(&uq->uq_key); 3283 umtxq_busy(&uq->uq_key); 3284 umtxq_insert(uq); 3285 umtxq_unlock(&uq->uq_key); 3286 rv = fueword32(&sem->_count, &count); 3287 if (rv == -1) { 3288 umtxq_lock(&uq->uq_key); 3289 umtxq_unbusy(&uq->uq_key); 3290 umtxq_remove(uq); 3291 umtxq_unlock(&uq->uq_key); 3292 umtx_key_release(&uq->uq_key); 3293 return (EFAULT); 3294 } 3295 for (;;) { 3296 if (USEM_COUNT(count) != 0) { 3297 umtxq_lock(&uq->uq_key); 3298 umtxq_unbusy(&uq->uq_key); 3299 umtxq_remove(uq); 3300 umtxq_unlock(&uq->uq_key); 3301 umtx_key_release(&uq->uq_key); 3302 return (0); 3303 } 3304 if (count == USEM_HAS_WAITERS) 3305 break; 3306 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3307 if (rv == 0) 3308 break; 3309 umtxq_lock(&uq->uq_key); 3310 umtxq_unbusy(&uq->uq_key); 3311 umtxq_remove(uq); 3312 umtxq_unlock(&uq->uq_key); 3313 umtx_key_release(&uq->uq_key); 3314 if (rv == -1) 3315 return (EFAULT); 3316 rv = thread_check_susp(td, true); 3317 if (rv != 0) 3318 return (rv); 3319 goto again; 3320 } 3321 umtxq_lock(&uq->uq_key); 3322 umtxq_unbusy(&uq->uq_key); 3323 3324 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3325 3326 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3327 error = 0; 3328 else { 3329 umtxq_remove(uq); 3330 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) { 3331 /* A relative timeout cannot be restarted. */ 3332 if (error == ERESTART) 3333 error = EINTR; 3334 if (error == EINTR) { 3335 abs_timeout_update(&timo); 3336 timespecsub(&timo.end, &timo.cur, 3337 &timeout->_timeout); 3338 } 3339 } 3340 } 3341 umtxq_unlock(&uq->uq_key); 3342 umtx_key_release(&uq->uq_key); 3343 return (error); 3344} 3345 3346/* 3347 * Signal a userland semaphore. 3348 */ 3349static int 3350do_sem2_wake(struct thread *td, struct _usem2 *sem) 3351{ 3352 struct umtx_key key; 3353 int error, cnt, rv; 3354 uint32_t count, flags; 3355 3356 rv = fueword32(&sem->_flags, &flags); 3357 if (rv == -1) 3358 return (EFAULT); 3359 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3360 return (error); 3361 umtxq_lock(&key); 3362 umtxq_busy(&key); 3363 cnt = umtxq_count(&key); 3364 if (cnt > 0) { 3365 /* 3366 * If this was the last sleeping thread, clear the waiters 3367 * flag in _count. 3368 */ 3369 if (cnt == 1) { 3370 umtxq_unlock(&key); 3371 rv = fueword32(&sem->_count, &count); 3372 while (rv != -1 && count & USEM_HAS_WAITERS) { 3373 rv = casueword32(&sem->_count, count, &count, 3374 count & ~USEM_HAS_WAITERS); 3375 if (rv == 1) { 3376 rv = thread_check_susp(td, true); 3377 if (rv != 0) 3378 break; 3379 } 3380 } 3381 if (rv == -1) 3382 error = EFAULT; 3383 else if (rv > 0) { 3384 error = rv; 3385 } 3386 umtxq_lock(&key); 3387 } 3388 3389 umtxq_signal(&key, 1); 3390 } 3391 umtxq_unbusy(&key); 3392 umtxq_unlock(&key); 3393 umtx_key_release(&key); 3394 return (error); 3395} 3396 3397inline int 3398umtx_copyin_timeout(const void *uaddr, struct timespec *tsp) 3399{ 3400 int error; 3401 3402 error = copyin(uaddr, tsp, sizeof(*tsp)); 3403 if (error == 0) { 3404 if (tsp->tv_sec < 0 || 3405 tsp->tv_nsec >= 1000000000 || 3406 tsp->tv_nsec < 0) 3407 error = EINVAL; 3408 } 3409 return (error); 3410} 3411 3412static inline int 3413umtx_copyin_umtx_time(const void *uaddr, size_t size, struct _umtx_time *tp) 3414{ 3415 int error; 3416 3417 if (size <= sizeof(tp->_timeout)) { 3418 tp->_clockid = CLOCK_REALTIME; 3419 tp->_flags = 0; 3420 error = copyin(uaddr, &tp->_timeout, sizeof(tp->_timeout)); 3421 } else 3422 error = copyin(uaddr, tp, sizeof(*tp)); 3423 if (error != 0) 3424 return (error); 3425 if (tp->_timeout.tv_sec < 0 || 3426 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) 3427 return (EINVAL); 3428 return (0); 3429} 3430 3431static int 3432umtx_copyin_robust_lists(const void *uaddr, size_t size, 3433 struct umtx_robust_lists_params *rb) 3434{ 3435 3436 if (size > sizeof(*rb)) 3437 return (EINVAL); 3438 return (copyin(uaddr, rb, size)); 3439} 3440 3441static int 3442umtx_copyout_timeout(void *uaddr, size_t sz, struct timespec *tsp) 3443{ 3444 3445 /* 3446 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 3447 * and we're only called if sz >= sizeof(timespec) as supplied in the 3448 * copyops. 3449 */ 3450 KASSERT(sz >= sizeof(*tsp), 3451 ("umtx_copyops specifies incorrect sizes")); 3452 3453 return (copyout(tsp, uaddr, sizeof(*tsp))); 3454} 3455 3456static int 3457__umtx_op_unimpl(struct thread *td, struct _umtx_op_args *uap, 3458 const struct umtx_copyops *ops __unused) 3459{ 3460 3461 return (EOPNOTSUPP); 3462} 3463 3464static int 3465__umtx_op_wait(struct thread *td, struct _umtx_op_args *uap, 3466 const struct umtx_copyops *ops) 3467{ 3468 struct _umtx_time timeout, *tm_p; 3469 int error; 3470 3471 if (uap->uaddr2 == NULL) 3472 tm_p = NULL; 3473 else { 3474 error = ops->copyin_umtx_time( 3475 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3476 if (error != 0) 3477 return (error); 3478 tm_p = &timeout; 3479 } 3480 return (do_wait(td, uap->obj, uap->val, tm_p, ops->compat32, 0)); 3481} 3482 3483static int 3484__umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap, 3485 const struct umtx_copyops *ops) 3486{ 3487 struct _umtx_time timeout, *tm_p; 3488 int error; 3489 3490 if (uap->uaddr2 == NULL) 3491 tm_p = NULL; 3492 else { 3493 error = ops->copyin_umtx_time( 3494 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3495 if (error != 0) 3496 return (error); 3497 tm_p = &timeout; 3498 } 3499 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 3500} 3501 3502static int 3503__umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap, 3504 const struct umtx_copyops *ops) 3505{ 3506 struct _umtx_time *tm_p, timeout; 3507 int error; 3508 3509 if (uap->uaddr2 == NULL) 3510 tm_p = NULL; 3511 else { 3512 error = ops->copyin_umtx_time( 3513 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3514 if (error != 0) 3515 return (error); 3516 tm_p = &timeout; 3517 } 3518 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 3519} 3520 3521static int 3522__umtx_op_wake(struct thread *td, struct _umtx_op_args *uap, 3523 const struct umtx_copyops *ops __unused) 3524{ 3525 3526 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3527} 3528 3529#define BATCH_SIZE 128 3530static int 3531__umtx_op_nwake_private_native(struct thread *td, struct _umtx_op_args *uap) 3532{ 3533 char *uaddrs[BATCH_SIZE], **upp; 3534 int count, error, i, pos, tocopy; 3535 3536 upp = (char **)uap->obj; 3537 error = 0; 3538 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3539 pos += tocopy) { 3540 tocopy = MIN(count, BATCH_SIZE); 3541 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); 3542 if (error != 0) 3543 break; 3544 for (i = 0; i < tocopy; ++i) { 3545 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3546 } 3547 maybe_yield(); 3548 } 3549 return (error); 3550} 3551 3552static int 3553__umtx_op_nwake_private_compat32(struct thread *td, struct _umtx_op_args *uap) 3554{ 3555 uint32_t uaddrs[BATCH_SIZE], *upp; 3556 int count, error, i, pos, tocopy; 3557 3558 upp = (uint32_t *)uap->obj; 3559 error = 0; 3560 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3561 pos += tocopy) { 3562 tocopy = MIN(count, BATCH_SIZE); 3563 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); 3564 if (error != 0) 3565 break; 3566 for (i = 0; i < tocopy; ++i) { 3567 kern_umtx_wake(td, (void *)(uintptr_t)uaddrs[i], 3568 INT_MAX, 1); 3569 } 3570 maybe_yield(); 3571 } 3572 return (error); 3573} 3574 3575static int 3576__umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap, 3577 const struct umtx_copyops *ops) 3578{ 3579 3580 if (ops->compat32) 3581 return (__umtx_op_nwake_private_compat32(td, uap)); 3582 return (__umtx_op_nwake_private_native(td, uap)); 3583} 3584 3585static int 3586__umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap, 3587 const struct umtx_copyops *ops __unused) 3588{ 3589 3590 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 3591} 3592 3593static int 3594__umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap, 3595 const struct umtx_copyops *ops) 3596{ 3597 struct _umtx_time *tm_p, timeout; 3598 int error; 3599 3600 /* Allow a null timespec (wait forever). */ 3601 if (uap->uaddr2 == NULL) 3602 tm_p = NULL; 3603 else { 3604 error = ops->copyin_umtx_time( 3605 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3606 if (error != 0) 3607 return (error); 3608 tm_p = &timeout; 3609 } 3610 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 3611} 3612 3613static int 3614__umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap, 3615 const struct umtx_copyops *ops __unused) 3616{ 3617 3618 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); 3619} 3620 3621static int 3622__umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap, 3623 const struct umtx_copyops *ops) 3624{ 3625 struct _umtx_time *tm_p, timeout; 3626 int error; 3627 3628 /* Allow a null timespec (wait forever). */ 3629 if (uap->uaddr2 == NULL) 3630 tm_p = NULL; 3631 else { 3632 error = ops->copyin_umtx_time( 3633 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3634 if (error != 0) 3635 return (error); 3636 tm_p = &timeout; 3637 } 3638 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 3639} 3640 3641static int 3642__umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap, 3643 const struct umtx_copyops *ops __unused) 3644{ 3645 3646 return (do_wake_umutex(td, uap->obj)); 3647} 3648 3649static int 3650__umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap, 3651 const struct umtx_copyops *ops __unused) 3652{ 3653 3654 return (do_unlock_umutex(td, uap->obj, false)); 3655} 3656 3657static int 3658__umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap, 3659 const struct umtx_copyops *ops __unused) 3660{ 3661 3662 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); 3663} 3664 3665static int 3666__umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap, 3667 const struct umtx_copyops *ops) 3668{ 3669 struct timespec *ts, timeout; 3670 int error; 3671 3672 /* Allow a null timespec (wait forever). */ 3673 if (uap->uaddr2 == NULL) 3674 ts = NULL; 3675 else { 3676 error = ops->copyin_timeout(uap->uaddr2, &timeout); 3677 if (error != 0) 3678 return (error); 3679 ts = &timeout; 3680 } 3681 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3682} 3683 3684static int 3685__umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap, 3686 const struct umtx_copyops *ops __unused) 3687{ 3688 3689 return (do_cv_signal(td, uap->obj)); 3690} 3691 3692static int 3693__umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap, 3694 const struct umtx_copyops *ops __unused) 3695{ 3696 3697 return (do_cv_broadcast(td, uap->obj)); 3698} 3699 3700static int 3701__umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap, 3702 const struct umtx_copyops *ops) 3703{ 3704 struct _umtx_time timeout; 3705 int error; 3706 3707 /* Allow a null timespec (wait forever). */ 3708 if (uap->uaddr2 == NULL) { 3709 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3710 } else { 3711 error = ops->copyin_umtx_time(uap->uaddr2, 3712 (size_t)uap->uaddr1, &timeout); 3713 if (error != 0) 3714 return (error); 3715 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3716 } 3717 return (error); 3718} 3719 3720static int 3721__umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap, 3722 const struct umtx_copyops *ops) 3723{ 3724 struct _umtx_time timeout; 3725 int error; 3726 3727 /* Allow a null timespec (wait forever). */ 3728 if (uap->uaddr2 == NULL) { 3729 error = do_rw_wrlock(td, uap->obj, 0); 3730 } else { 3731 error = ops->copyin_umtx_time(uap->uaddr2, 3732 (size_t)uap->uaddr1, &timeout); 3733 if (error != 0) 3734 return (error); 3735 3736 error = do_rw_wrlock(td, uap->obj, &timeout); 3737 } 3738 return (error); 3739} 3740 3741static int 3742__umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap, 3743 const struct umtx_copyops *ops __unused) 3744{ 3745 3746 return (do_rw_unlock(td, uap->obj)); 3747} 3748 3749#if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3750static int 3751__umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap, 3752 const struct umtx_copyops *ops) 3753{ 3754 struct _umtx_time *tm_p, timeout; 3755 int error; 3756 3757 /* Allow a null timespec (wait forever). */ 3758 if (uap->uaddr2 == NULL) 3759 tm_p = NULL; 3760 else { 3761 error = ops->copyin_umtx_time( 3762 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3763 if (error != 0) 3764 return (error); 3765 tm_p = &timeout; 3766 } 3767 return (do_sem_wait(td, uap->obj, tm_p)); 3768} 3769 3770static int 3771__umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap, 3772 const struct umtx_copyops *ops __unused) 3773{ 3774 3775 return (do_sem_wake(td, uap->obj)); 3776} 3777#endif 3778 3779static int 3780__umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap, 3781 const struct umtx_copyops *ops __unused) 3782{ 3783 3784 return (do_wake2_umutex(td, uap->obj, uap->val)); 3785} 3786 3787static int 3788__umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap, 3789 const struct umtx_copyops *ops) 3790{ 3791 struct _umtx_time *tm_p, timeout; 3792 size_t uasize; 3793 int error; 3794 3795 /* Allow a null timespec (wait forever). */ 3796 if (uap->uaddr2 == NULL) { 3797 uasize = 0; 3798 tm_p = NULL; 3799 } else { 3800 uasize = (size_t)uap->uaddr1; 3801 error = ops->copyin_umtx_time(uap->uaddr2, uasize, &timeout); 3802 if (error != 0) 3803 return (error); 3804 tm_p = &timeout; 3805 } 3806 error = do_sem2_wait(td, uap->obj, tm_p); 3807 if (error == EINTR && uap->uaddr2 != NULL && 3808 (timeout._flags & UMTX_ABSTIME) == 0 && 3809 uasize >= ops->umtx_time_sz + ops->timespec_sz) { 3810 error = ops->copyout_timeout( 3811 (void *)((uintptr_t)uap->uaddr2 + ops->umtx_time_sz), 3812 uasize - ops->umtx_time_sz, &timeout._timeout); 3813 if (error == 0) { 3814 error = EINTR; 3815 } 3816 } 3817 3818 return (error); 3819} 3820 3821static int 3822__umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap, 3823 const struct umtx_copyops *ops __unused) 3824{ 3825 3826 return (do_sem2_wake(td, uap->obj)); 3827} 3828 3829#define USHM_OBJ_UMTX(o) \ 3830 ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) 3831 3832#define USHMF_REG_LINKED 0x0001 3833#define USHMF_OBJ_LINKED 0x0002 3834struct umtx_shm_reg { 3835 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; 3836 LIST_ENTRY(umtx_shm_reg) ushm_obj_link; 3837 struct umtx_key ushm_key; 3838 struct ucred *ushm_cred; 3839 struct shmfd *ushm_obj; 3840 u_int ushm_refcnt; 3841 u_int ushm_flags; 3842}; 3843 3844LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); 3845TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); 3846 3847static uma_zone_t umtx_shm_reg_zone; 3848static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; 3849static struct mtx umtx_shm_lock; 3850static struct umtx_shm_reg_head umtx_shm_reg_delfree = 3851 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); 3852 3853static void umtx_shm_free_reg(struct umtx_shm_reg *reg); 3854 3855static void 3856umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) 3857{ 3858 struct umtx_shm_reg_head d; 3859 struct umtx_shm_reg *reg, *reg1; 3860 3861 TAILQ_INIT(&d); 3862 mtx_lock(&umtx_shm_lock); 3863 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); 3864 mtx_unlock(&umtx_shm_lock); 3865 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { 3866 TAILQ_REMOVE(&d, reg, ushm_reg_link); 3867 umtx_shm_free_reg(reg); 3868 } 3869} 3870 3871static struct task umtx_shm_reg_delfree_task = 3872 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); 3873 3874static struct umtx_shm_reg * 3875umtx_shm_find_reg_locked(const struct umtx_key *key) 3876{ 3877 struct umtx_shm_reg *reg; 3878 struct umtx_shm_reg_head *reg_head; 3879 3880 KASSERT(key->shared, ("umtx_p_find_rg: private key")); 3881 mtx_assert(&umtx_shm_lock, MA_OWNED); 3882 reg_head = &umtx_shm_registry[key->hash]; 3883 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { 3884 KASSERT(reg->ushm_key.shared, 3885 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); 3886 if (reg->ushm_key.info.shared.object == 3887 key->info.shared.object && 3888 reg->ushm_key.info.shared.offset == 3889 key->info.shared.offset) { 3890 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); 3891 KASSERT(reg->ushm_refcnt > 0, 3892 ("reg %p refcnt 0 onlist", reg)); 3893 KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0, 3894 ("reg %p not linked", reg)); 3895 reg->ushm_refcnt++; 3896 return (reg); 3897 } 3898 } 3899 return (NULL); 3900} 3901 3902static struct umtx_shm_reg * 3903umtx_shm_find_reg(const struct umtx_key *key) 3904{ 3905 struct umtx_shm_reg *reg; 3906 3907 mtx_lock(&umtx_shm_lock); 3908 reg = umtx_shm_find_reg_locked(key); 3909 mtx_unlock(&umtx_shm_lock); 3910 return (reg); 3911} 3912 3913static void 3914umtx_shm_free_reg(struct umtx_shm_reg *reg) 3915{ 3916 3917 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); 3918 crfree(reg->ushm_cred); 3919 shm_drop(reg->ushm_obj); 3920 uma_zfree(umtx_shm_reg_zone, reg); 3921} 3922 3923static bool 3924umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force) 3925{ 3926 bool res; 3927 3928 mtx_assert(&umtx_shm_lock, MA_OWNED); 3929 KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg)); 3930 reg->ushm_refcnt--; 3931 res = reg->ushm_refcnt == 0; 3932 if (res || force) { 3933 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) { 3934 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], 3935 reg, ushm_reg_link); 3936 reg->ushm_flags &= ~USHMF_REG_LINKED; 3937 } 3938 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) { 3939 LIST_REMOVE(reg, ushm_obj_link); 3940 reg->ushm_flags &= ~USHMF_OBJ_LINKED; 3941 } 3942 } 3943 return (res); 3944} 3945 3946static void 3947umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force) 3948{ 3949 vm_object_t object; 3950 bool dofree; 3951 3952 if (force) { 3953 object = reg->ushm_obj->shm_object; 3954 VM_OBJECT_WLOCK(object); 3955 object->flags |= OBJ_UMTXDEAD; 3956 VM_OBJECT_WUNLOCK(object); 3957 } 3958 mtx_lock(&umtx_shm_lock); 3959 dofree = umtx_shm_unref_reg_locked(reg, force); 3960 mtx_unlock(&umtx_shm_lock); 3961 if (dofree) 3962 umtx_shm_free_reg(reg); 3963} 3964 3965void 3966umtx_shm_object_init(vm_object_t object) 3967{ 3968 3969 LIST_INIT(USHM_OBJ_UMTX(object)); 3970} 3971 3972void 3973umtx_shm_object_terminated(vm_object_t object) 3974{ 3975 struct umtx_shm_reg *reg, *reg1; 3976 bool dofree; 3977 3978 dofree = false; 3979 mtx_lock(&umtx_shm_lock); 3980 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { 3981 if (umtx_shm_unref_reg_locked(reg, true)) { 3982 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, 3983 ushm_reg_link); 3984 dofree = true; 3985 } 3986 } 3987 mtx_unlock(&umtx_shm_lock); 3988 if (dofree) 3989 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); 3990} 3991 3992static int 3993umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, 3994 struct umtx_shm_reg **res) 3995{ 3996 struct umtx_shm_reg *reg, *reg1; 3997 struct ucred *cred; 3998 int error; 3999 4000 reg = umtx_shm_find_reg(key); 4001 if (reg != NULL) { 4002 *res = reg; 4003 return (0); 4004 } 4005 cred = td->td_ucred; 4006 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) 4007 return (ENOMEM); 4008 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); 4009 reg->ushm_refcnt = 1; 4010 bcopy(key, ®->ushm_key, sizeof(*key)); 4011 reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR); 4012 reg->ushm_cred = crhold(cred); 4013 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); 4014 if (error != 0) { 4015 umtx_shm_free_reg(reg); 4016 return (error); 4017 } 4018 mtx_lock(&umtx_shm_lock); 4019 reg1 = umtx_shm_find_reg_locked(key); 4020 if (reg1 != NULL) { 4021 mtx_unlock(&umtx_shm_lock); 4022 umtx_shm_free_reg(reg); 4023 *res = reg1; 4024 return (0); 4025 } 4026 reg->ushm_refcnt++; 4027 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); 4028 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, 4029 ushm_obj_link); 4030 reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED; 4031 mtx_unlock(&umtx_shm_lock); 4032 *res = reg; 4033 return (0); 4034} 4035 4036static int 4037umtx_shm_alive(struct thread *td, void *addr) 4038{ 4039 vm_map_t map; 4040 vm_map_entry_t entry; 4041 vm_object_t object; 4042 vm_pindex_t pindex; 4043 vm_prot_t prot; 4044 int res, ret; 4045 boolean_t wired; 4046 4047 map = &td->td_proc->p_vmspace->vm_map; 4048 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, 4049 &object, &pindex, &prot, &wired); 4050 if (res != KERN_SUCCESS) 4051 return (EFAULT); 4052 if (object == NULL) 4053 ret = EINVAL; 4054 else 4055 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; 4056 vm_map_lookup_done(map, entry); 4057 return (ret); 4058} 4059 4060static void 4061umtx_shm_init(void) 4062{ 4063 int i; 4064 4065 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), 4066 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 4067 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); 4068 for (i = 0; i < nitems(umtx_shm_registry); i++) 4069 TAILQ_INIT(&umtx_shm_registry[i]); 4070} 4071 4072static int 4073umtx_shm(struct thread *td, void *addr, u_int flags) 4074{ 4075 struct umtx_key key; 4076 struct umtx_shm_reg *reg; 4077 struct file *fp; 4078 int error, fd; 4079 4080 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | 4081 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) 4082 return (EINVAL); 4083 if ((flags & UMTX_SHM_ALIVE) != 0) 4084 return (umtx_shm_alive(td, addr)); 4085 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); 4086 if (error != 0) 4087 return (error); 4088 KASSERT(key.shared == 1, ("non-shared key")); 4089 if ((flags & UMTX_SHM_CREAT) != 0) { 4090 error = umtx_shm_create_reg(td, &key, ®); 4091 } else { 4092 reg = umtx_shm_find_reg(&key); 4093 if (reg == NULL) 4094 error = ESRCH; 4095 } 4096 umtx_key_release(&key); 4097 if (error != 0) 4098 return (error); 4099 KASSERT(reg != NULL, ("no reg")); 4100 if ((flags & UMTX_SHM_DESTROY) != 0) { 4101 umtx_shm_unref_reg(reg, true); 4102 } else { 4103#if 0 4104#ifdef MAC 4105 error = mac_posixshm_check_open(td->td_ucred, 4106 reg->ushm_obj, FFLAGS(O_RDWR)); 4107 if (error == 0) 4108#endif 4109 error = shm_access(reg->ushm_obj, td->td_ucred, 4110 FFLAGS(O_RDWR)); 4111 if (error == 0) 4112#endif 4113 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); 4114 if (error == 0) { 4115 shm_hold(reg->ushm_obj); 4116 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, 4117 &shm_ops); 4118 td->td_retval[0] = fd; 4119 fdrop(fp, td); 4120 } 4121 } 4122 umtx_shm_unref_reg(reg, false); 4123 return (error); 4124} 4125 4126static int 4127__umtx_op_shm(struct thread *td, struct _umtx_op_args *uap, 4128 const struct umtx_copyops *ops __unused) 4129{ 4130 4131 return (umtx_shm(td, uap->uaddr1, uap->val)); 4132} 4133 4134static int 4135__umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap, 4136 const struct umtx_copyops *ops) 4137{ 4138 struct umtx_robust_lists_params rb; 4139 int error; 4140 4141 if (ops->compat32) { 4142 if ((td->td_pflags2 & TDP2_COMPAT32RB) == 0 && 4143 (td->td_rb_list != 0 || td->td_rbp_list != 0 || 4144 td->td_rb_inact != 0)) 4145 return (EBUSY); 4146 } else if ((td->td_pflags2 & TDP2_COMPAT32RB) != 0) { 4147 return (EBUSY); 4148 } 4149 4150 bzero(&rb, sizeof(rb)); 4151 error = ops->copyin_robust_lists(uap->uaddr1, uap->val, &rb); 4152 if (error != 0) 4153 return (error); 4154 4155 if (ops->compat32) 4156 td->td_pflags2 |= TDP2_COMPAT32RB; 4157 4158 td->td_rb_list = rb.robust_list_offset; 4159 td->td_rbp_list = rb.robust_priv_list_offset; 4160 td->td_rb_inact = rb.robust_inact_offset; 4161 return (0); 4162} 4163 4164#if defined(__i386__) || defined(__amd64__) 4165/* 4166 * Provide the standard 32-bit definitions for x86, since native/compat32 use a 4167 * 32-bit time_t there. Other architectures just need the i386 definitions 4168 * along with their standard compat32. 4169 */ 4170struct timespecx32 { 4171 int64_t tv_sec; 4172 int32_t tv_nsec; 4173}; 4174 4175struct umtx_timex32 { 4176 struct timespecx32 _timeout; 4177 uint32_t _flags; 4178 uint32_t _clockid; 4179}; 4180 4181#ifndef __i386__ 4182#define timespeci386 timespec32 4183#define umtx_timei386 umtx_time32 4184#endif 4185#else /* !__i386__ && !__amd64__ */ 4186/* 32-bit architectures can emulate i386, so define these almost everywhere. */ 4187struct timespeci386 { 4188 int32_t tv_sec; 4189 int32_t tv_nsec; 4190}; 4191 4192struct umtx_timei386 { 4193 struct timespeci386 _timeout; 4194 uint32_t _flags; 4195 uint32_t _clockid; 4196}; 4197 4198#if defined(__LP64__) 4199#define timespecx32 timespec32 4200#define umtx_timex32 umtx_time32 4201#endif 4202#endif 4203 4204static int 4205umtx_copyin_robust_lists32(const void *uaddr, size_t size, 4206 struct umtx_robust_lists_params *rbp) 4207{ 4208 struct umtx_robust_lists_params_compat32 rb32; 4209 int error; 4210 4211 if (size > sizeof(rb32)) 4212 return (EINVAL); 4213 bzero(&rb32, sizeof(rb32)); 4214 error = copyin(uaddr, &rb32, size); 4215 if (error != 0) 4216 return (error); 4217 CP(rb32, *rbp, robust_list_offset); 4218 CP(rb32, *rbp, robust_priv_list_offset); 4219 CP(rb32, *rbp, robust_inact_offset); 4220 return (0); 4221} 4222 4223#ifndef __i386__ 4224static inline int 4225umtx_copyin_timeouti386(const void *uaddr, struct timespec *tsp) 4226{ 4227 struct timespeci386 ts32; 4228 int error; 4229 4230 error = copyin(uaddr, &ts32, sizeof(ts32)); 4231 if (error == 0) { 4232 if (ts32.tv_sec < 0 || 4233 ts32.tv_nsec >= 1000000000 || 4234 ts32.tv_nsec < 0) 4235 error = EINVAL; 4236 else { 4237 CP(ts32, *tsp, tv_sec); 4238 CP(ts32, *tsp, tv_nsec); 4239 } 4240 } 4241 return (error); 4242} 4243 4244static inline int 4245umtx_copyin_umtx_timei386(const void *uaddr, size_t size, struct _umtx_time *tp) 4246{ 4247 struct umtx_timei386 t32; 4248 int error; 4249 4250 t32._clockid = CLOCK_REALTIME; 4251 t32._flags = 0; 4252 if (size <= sizeof(t32._timeout)) 4253 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4254 else 4255 error = copyin(uaddr, &t32, sizeof(t32)); 4256 if (error != 0) 4257 return (error); 4258 if (t32._timeout.tv_sec < 0 || 4259 t32._timeout.tv_nsec >= 1000000000 || t32._timeout.tv_nsec < 0) 4260 return (EINVAL); 4261 TS_CP(t32, *tp, _timeout); 4262 CP(t32, *tp, _flags); 4263 CP(t32, *tp, _clockid); 4264 return (0); 4265} 4266 4267static int 4268umtx_copyout_timeouti386(void *uaddr, size_t sz, struct timespec *tsp) 4269{ 4270 struct timespeci386 remain32 = { 4271 .tv_sec = tsp->tv_sec, 4272 .tv_nsec = tsp->tv_nsec, 4273 }; 4274 4275 /* 4276 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4277 * and we're only called if sz >= sizeof(timespec) as supplied in the 4278 * copyops. 4279 */ 4280 KASSERT(sz >= sizeof(remain32), 4281 ("umtx_copyops specifies incorrect sizes")); 4282 4283 return (copyout(&remain32, uaddr, sizeof(remain32))); 4284} 4285#endif /* !__i386__ */ 4286 4287#if defined(__i386__) || defined(__LP64__) 4288static inline int 4289umtx_copyin_timeoutx32(const void *uaddr, struct timespec *tsp) 4290{ 4291 struct timespecx32 ts32; 4292 int error; 4293 4294 error = copyin(uaddr, &ts32, sizeof(ts32)); 4295 if (error == 0) { 4296 if (ts32.tv_sec < 0 || 4297 ts32.tv_nsec >= 1000000000 || 4298 ts32.tv_nsec < 0) 4299 error = EINVAL; 4300 else { 4301 CP(ts32, *tsp, tv_sec); 4302 CP(ts32, *tsp, tv_nsec); 4303 } 4304 } 4305 return (error); 4306} 4307 4308static inline int 4309umtx_copyin_umtx_timex32(const void *uaddr, size_t size, struct _umtx_time *tp) 4310{ 4311 struct umtx_timex32 t32; 4312 int error; 4313 4314 t32._clockid = CLOCK_REALTIME; 4315 t32._flags = 0; 4316 if (size <= sizeof(t32._timeout)) 4317 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4318 else 4319 error = copyin(uaddr, &t32, sizeof(t32)); 4320 if (error != 0) 4321 return (error); 4322 if (t32._timeout.tv_sec < 0 || 4323 t32._timeout.tv_nsec >= 1000000000 || t32._timeout.tv_nsec < 0) 4324 return (EINVAL); 4325 TS_CP(t32, *tp, _timeout); 4326 CP(t32, *tp, _flags); 4327 CP(t32, *tp, _clockid); 4328 return (0); 4329} 4330 4331static int 4332umtx_copyout_timeoutx32(void *uaddr, size_t sz, struct timespec *tsp) 4333{ 4334 struct timespecx32 remain32 = { 4335 .tv_sec = tsp->tv_sec, 4336 .tv_nsec = tsp->tv_nsec, 4337 }; 4338 4339 /* 4340 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4341 * and we're only called if sz >= sizeof(timespec) as supplied in the 4342 * copyops. 4343 */ 4344 KASSERT(sz >= sizeof(remain32), 4345 ("umtx_copyops specifies incorrect sizes")); 4346 4347 return (copyout(&remain32, uaddr, sizeof(remain32))); 4348} 4349#endif /* __i386__ || __LP64__ */ 4350 4351typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap, 4352 const struct umtx_copyops *umtx_ops); 4353 4354static const _umtx_op_func op_table[] = { 4355 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 4356 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 4357 [UMTX_OP_WAIT] = __umtx_op_wait, 4358 [UMTX_OP_WAKE] = __umtx_op_wake, 4359 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4360 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, 4361 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4362 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4363 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, 4364 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4365 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4366 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, 4367 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, 4368 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, 4369 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4370 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, 4371 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4372 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, 4373 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4374#if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4375 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, 4376 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4377#else 4378 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4379 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4380#endif 4381 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, 4382 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4383 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, 4384 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4385 [UMTX_OP_SHM] = __umtx_op_shm, 4386 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, 4387}; 4388 4389static const struct umtx_copyops umtx_native_ops = { 4390 .copyin_timeout = umtx_copyin_timeout, 4391 .copyin_umtx_time = umtx_copyin_umtx_time, 4392 .copyin_robust_lists = umtx_copyin_robust_lists, 4393 .copyout_timeout = umtx_copyout_timeout, 4394 .timespec_sz = sizeof(struct timespec), 4395 .umtx_time_sz = sizeof(struct _umtx_time), 4396}; 4397 4398#ifndef __i386__ 4399static const struct umtx_copyops umtx_native_opsi386 = { 4400 .copyin_timeout = umtx_copyin_timeouti386, 4401 .copyin_umtx_time = umtx_copyin_umtx_timei386, 4402 .copyin_robust_lists = umtx_copyin_robust_lists32, 4403 .copyout_timeout = umtx_copyout_timeouti386, 4404 .timespec_sz = sizeof(struct timespeci386), 4405 .umtx_time_sz = sizeof(struct umtx_timei386), 4406 .compat32 = true, 4407}; 4408#endif 4409 4410#if defined(__i386__) || defined(__LP64__) 4411/* i386 can emulate other 32-bit archs, too! */ 4412static const struct umtx_copyops umtx_native_opsx32 = { 4413 .copyin_timeout = umtx_copyin_timeoutx32, 4414 .copyin_umtx_time = umtx_copyin_umtx_timex32, 4415 .copyin_robust_lists = umtx_copyin_robust_lists32, 4416 .copyout_timeout = umtx_copyout_timeoutx32, 4417 .timespec_sz = sizeof(struct timespecx32), 4418 .umtx_time_sz = sizeof(struct umtx_timex32), 4419 .compat32 = true, 4420}; 4421 4422#ifdef COMPAT_FREEBSD32 4423#ifdef __amd64__ 4424#define umtx_native_ops32 umtx_native_opsi386 4425#else 4426#define umtx_native_ops32 umtx_native_opsx32 4427#endif 4428#endif /* COMPAT_FREEBSD32 */ 4429#endif /* __i386__ || __LP64__ */ 4430 4431#define UMTX_OP__FLAGS (UMTX_OP__32BIT | UMTX_OP__I386) 4432 4433static int 4434kern__umtx_op(struct thread *td, void *obj, int op, unsigned long val, 4435 void *uaddr1, void *uaddr2, const struct umtx_copyops *ops) 4436{ 4437 struct _umtx_op_args uap = { 4438 .obj = obj, 4439 .op = op & ~UMTX_OP__FLAGS, 4440 .val = val, 4441 .uaddr1 = uaddr1, 4442 .uaddr2 = uaddr2 4443 }; 4444 4445 if ((uap.op >= nitems(op_table))) 4446 return (EINVAL); 4447 return ((*op_table[uap.op])(td, &uap, ops)); 4448} 4449 4450int 4451sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 4452{ 4453 static const struct umtx_copyops *umtx_ops; 4454 4455 umtx_ops = &umtx_native_ops; 4456#ifdef __LP64__ 4457 if ((uap->op & (UMTX_OP__32BIT | UMTX_OP__I386)) != 0) { 4458 if ((uap->op & UMTX_OP__I386) != 0) 4459 umtx_ops = &umtx_native_opsi386; 4460 else 4461 umtx_ops = &umtx_native_opsx32; 4462 } 4463#elif !defined(__i386__) 4464 /* We consider UMTX_OP__32BIT a nop on !i386 ILP32. */ 4465 if ((uap->op & UMTX_OP__I386) != 0) 4466 umtx_ops = &umtx_native_opsi386; 4467#else 4468 /* Likewise, UMTX_OP__I386 is a nop on i386. */ 4469 if ((uap->op & UMTX_OP__32BIT) != 0) 4470 umtx_ops = &umtx_native_opsx32; 4471#endif 4472 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1, 4473 uap->uaddr2, umtx_ops)); 4474} 4475 4476#ifdef COMPAT_FREEBSD32 4477int 4478freebsd32__umtx_op(struct thread *td, struct freebsd32__umtx_op_args *uap) 4479{ 4480 4481 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr, 4482 uap->uaddr2, &umtx_native_ops32)); 4483} 4484#endif 4485 4486void 4487umtx_thread_init(struct thread *td) 4488{ 4489 4490 td->td_umtxq = umtxq_alloc(); 4491 td->td_umtxq->uq_thread = td; 4492} 4493 4494void 4495umtx_thread_fini(struct thread *td) 4496{ 4497 4498 umtxq_free(td->td_umtxq); 4499} 4500 4501/* 4502 * It will be called when new thread is created, e.g fork(). 4503 */ 4504void 4505umtx_thread_alloc(struct thread *td) 4506{ 4507 struct umtx_q *uq; 4508 4509 uq = td->td_umtxq; 4510 uq->uq_inherited_pri = PRI_MAX; 4511 4512 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 4513 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 4514 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 4515 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 4516} 4517 4518/* 4519 * exec() hook. 4520 * 4521 * Clear robust lists for all process' threads, not delaying the 4522 * cleanup to thread exit, since the relevant address space is 4523 * destroyed right now. 4524 */ 4525void 4526umtx_exec(struct proc *p) 4527{ 4528 struct thread *td; 4529 4530 KASSERT(p == curproc, ("need curproc")); 4531 PROC_LOCK(p); 4532 KASSERT((p->p_flag & P_HADTHREADS) == 0 || 4533 (p->p_flag & P_STOPPED_SINGLE) != 0, 4534 ("curproc must be single-threaded")); 4535 FOREACH_THREAD_IN_PROC(p, td) { 4536 KASSERT(td == curthread || 4537 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), 4538 ("running thread %p %p", p, td)); 4539 PROC_UNLOCK(p); 4540 umtx_thread_cleanup(td); 4541 PROC_LOCK(p); 4542 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; 4543 } 4544 PROC_UNLOCK(p); 4545} 4546 4547/* 4548 * thread exit hook. 4549 */ 4550void 4551umtx_thread_exit(struct thread *td) 4552{ 4553 4554 umtx_thread_cleanup(td); 4555} 4556 4557static int 4558umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res, bool compat32) 4559{ 4560 u_long res1; 4561 uint32_t res32; 4562 int error; 4563 4564 if (compat32) { 4565 error = fueword32((void *)ptr, &res32); 4566 if (error == 0) 4567 res1 = res32; 4568 } else { 4569 error = fueword((void *)ptr, &res1); 4570 } 4571 if (error == 0) 4572 *res = res1; 4573 else 4574 error = EFAULT; 4575 return (error); 4576} 4577 4578static void 4579umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list, 4580 bool compat32) 4581{ 4582 struct umutex32 m32; 4583 4584 if (compat32) { 4585 memcpy(&m32, m, sizeof(m32)); 4586 *rb_list = m32.m_rb_lnk; 4587 } else { 4588 *rb_list = m->m_rb_lnk; 4589 } 4590} 4591 4592static int 4593umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact, 4594 bool compat32) 4595{ 4596 struct umutex m; 4597 int error; 4598 4599 KASSERT(td->td_proc == curproc, ("need current vmspace")); 4600 error = copyin((void *)rbp, &m, sizeof(m)); 4601 if (error != 0) 4602 return (error); 4603 if (rb_list != NULL) 4604 umtx_read_rb_list(td, &m, rb_list, compat32); 4605 if ((m.m_flags & UMUTEX_ROBUST) == 0) 4606 return (EINVAL); 4607 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) 4608 /* inact is cleared after unlock, allow the inconsistency */ 4609 return (inact ? 0 : EINVAL); 4610 return (do_unlock_umutex(td, (struct umutex *)rbp, true)); 4611} 4612 4613static void 4614umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, 4615 const char *name, bool compat32) 4616{ 4617 int error, i; 4618 uintptr_t rbp; 4619 bool inact; 4620 4621 if (rb_list == 0) 4622 return; 4623 error = umtx_read_uptr(td, rb_list, &rbp, compat32); 4624 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { 4625 if (rbp == *rb_inact) { 4626 inact = true; 4627 *rb_inact = 0; 4628 } else 4629 inact = false; 4630 error = umtx_handle_rb(td, rbp, &rbp, inact, compat32); 4631 } 4632 if (i == umtx_max_rb && umtx_verbose_rb) { 4633 uprintf("comm %s pid %d: reached umtx %smax rb %d\n", 4634 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); 4635 } 4636 if (error != 0 && umtx_verbose_rb) { 4637 uprintf("comm %s pid %d: handling %srb error %d\n", 4638 td->td_proc->p_comm, td->td_proc->p_pid, name, error); 4639 } 4640} 4641 4642/* 4643 * Clean up umtx data. 4644 */ 4645static void 4646umtx_thread_cleanup(struct thread *td) 4647{ 4648 struct umtx_q *uq; 4649 struct umtx_pi *pi; 4650 uintptr_t rb_inact; 4651 bool compat32; 4652 4653 /* 4654 * Disown pi mutexes. 4655 */ 4656 uq = td->td_umtxq; 4657 if (uq != NULL) { 4658 mtx_lock(&umtx_lock); 4659 uq->uq_inherited_pri = PRI_MAX; 4660 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 4661 pi->pi_owner = NULL; 4662 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 4663 } 4664 mtx_unlock(&umtx_lock); 4665 thread_lock(td); 4666 sched_lend_user_prio(td, PRI_MAX); 4667 thread_unlock(td); 4668 } 4669 4670 compat32 = (td->td_pflags2 & TDP2_COMPAT32RB) != 0; 4671 td->td_pflags2 &= ~TDP2_COMPAT32RB; 4672 4673 /* 4674 * Handle terminated robust mutexes. Must be done after 4675 * robust pi disown, otherwise unlock could see unowned 4676 * entries. 4677 */ 4678 rb_inact = td->td_rb_inact; 4679 if (rb_inact != 0) 4680 (void)umtx_read_uptr(td, rb_inact, &rb_inact, compat32); 4681 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, "", compat32); 4682 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv ", compat32); 4683 if (rb_inact != 0) 4684 (void)umtx_handle_rb(td, rb_inact, NULL, true, compat32); 4685} 4686