kern_umtx.c revision 157815
1/*- 2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice unmodified, this list of conditions, and the following 11 * disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD: head/sys/kern/kern_umtx.c 157815 2006-04-17 18:20:38Z jhb $"); 30 31#include <sys/param.h> 32#include <sys/kernel.h> 33#include <sys/limits.h> 34#include <sys/lock.h> 35#include <sys/malloc.h> 36#include <sys/mutex.h> 37#include <sys/proc.h> 38#include <sys/sysent.h> 39#include <sys/systm.h> 40#include <sys/sysproto.h> 41#include <sys/eventhandler.h> 42#include <sys/thr.h> 43#include <sys/umtx.h> 44 45#include <vm/vm.h> 46#include <vm/vm_param.h> 47#include <vm/pmap.h> 48#include <vm/vm_map.h> 49#include <vm/vm_object.h> 50 51#define UMTX_PRIVATE 0 52#define UMTX_SHARED 1 53 54struct umtx_key { 55 int type; 56 union { 57 struct { 58 vm_object_t object; 59 long offset; 60 } shared; 61 struct { 62 struct umtx *umtx; 63 long pid; 64 } private; 65 struct { 66 void *ptr; 67 long word; 68 } both; 69 } info; 70}; 71 72struct umtx_q { 73 LIST_ENTRY(umtx_q) uq_next; /* Linked list for the hash. */ 74 struct umtx_key uq_key; /* Umtx key. */ 75 struct thread *uq_thread; /* The thread waits on. */ 76 LIST_ENTRY(umtx_q) uq_rqnext; /* Linked list for requeuing. */ 77 vm_offset_t uq_addr; /* Umtx's virtual address. */ 78}; 79 80LIST_HEAD(umtx_head, umtx_q); 81struct umtxq_chain { 82 struct mtx uc_lock; /* Lock for this chain. */ 83 struct umtx_head uc_queue; /* List of sleep queues. */ 84#define UCF_BUSY 0x01 85#define UCF_WANT 0x02 86 int uc_flags; 87}; 88 89#define GOLDEN_RATIO_PRIME 2654404609U 90#define UMTX_CHAINS 128 91#define UMTX_SHIFTS (__WORD_BIT - 7) 92 93static struct umtxq_chain umtxq_chains[UMTX_CHAINS]; 94static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 95 96static void umtxq_init_chains(void *); 97static int umtxq_hash(struct umtx_key *key); 98static struct mtx *umtxq_mtx(int chain); 99static void umtxq_lock(struct umtx_key *key); 100static void umtxq_unlock(struct umtx_key *key); 101static void umtxq_busy(struct umtx_key *key); 102static void umtxq_unbusy(struct umtx_key *key); 103static void umtxq_insert(struct umtx_q *uq); 104static void umtxq_remove(struct umtx_q *uq); 105static int umtxq_sleep(struct thread *td, struct umtx_key *key, 106 int prio, const char *wmesg, int timo); 107static int umtxq_count(struct umtx_key *key); 108static int umtxq_signal(struct umtx_key *key, int nr_wakeup); 109static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2); 110static int umtx_key_get(struct thread *td, struct umtx *umtx, 111 struct umtx_key *key); 112static void umtx_key_release(struct umtx_key *key); 113 114SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_init_chains, NULL); 115 116struct umtx_q * 117umtxq_alloc(void) 118{ 119 return (malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK)); 120} 121 122void 123umtxq_free(struct umtx_q *uq) 124{ 125 free(uq, M_UMTX); 126} 127 128static void 129umtxq_init_chains(void *arg __unused) 130{ 131 int i; 132 133 for (i = 0; i < UMTX_CHAINS; ++i) { 134 mtx_init(&umtxq_chains[i].uc_lock, "umtxq_lock", NULL, 135 MTX_DEF | MTX_DUPOK); 136 LIST_INIT(&umtxq_chains[i].uc_queue); 137 umtxq_chains[i].uc_flags = 0; 138 } 139} 140 141static inline int 142umtxq_hash(struct umtx_key *key) 143{ 144 unsigned n = (uintptr_t)key->info.both.ptr + key->info.both.word; 145 return (((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS); 146} 147 148static inline int 149umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2) 150{ 151 return (k1->type == k2->type && 152 k1->info.both.ptr == k2->info.both.ptr && 153 k1->info.both.word == k2->info.both.word); 154} 155 156static inline struct mtx * 157umtxq_mtx(int chain) 158{ 159 return (&umtxq_chains[chain].uc_lock); 160} 161 162static inline void 163umtxq_busy(struct umtx_key *key) 164{ 165 int chain = umtxq_hash(key); 166 167 mtx_assert(umtxq_mtx(chain), MA_OWNED); 168 while (umtxq_chains[chain].uc_flags & UCF_BUSY) { 169 umtxq_chains[chain].uc_flags |= UCF_WANT; 170 msleep(&umtxq_chains[chain], umtxq_mtx(chain), 171 0, "umtxq_busy", 0); 172 } 173 umtxq_chains[chain].uc_flags |= UCF_BUSY; 174} 175 176static inline void 177umtxq_unbusy(struct umtx_key *key) 178{ 179 int chain = umtxq_hash(key); 180 181 mtx_assert(umtxq_mtx(chain), MA_OWNED); 182 KASSERT(umtxq_chains[chain].uc_flags & UCF_BUSY, ("not busy")); 183 umtxq_chains[chain].uc_flags &= ~UCF_BUSY; 184 if (umtxq_chains[chain].uc_flags & UCF_WANT) { 185 umtxq_chains[chain].uc_flags &= ~UCF_WANT; 186 wakeup(&umtxq_chains[chain]); 187 } 188} 189 190static inline void 191umtxq_lock(struct umtx_key *key) 192{ 193 int chain = umtxq_hash(key); 194 mtx_lock(umtxq_mtx(chain)); 195} 196 197static inline void 198umtxq_unlock(struct umtx_key *key) 199{ 200 int chain = umtxq_hash(key); 201 mtx_unlock(umtxq_mtx(chain)); 202} 203 204/* 205 * Insert a thread onto the umtx queue. 206 */ 207static inline void 208umtxq_insert(struct umtx_q *uq) 209{ 210 struct umtx_head *head; 211 int chain = umtxq_hash(&uq->uq_key); 212 213 mtx_assert(umtxq_mtx(chain), MA_OWNED); 214 head = &umtxq_chains[chain].uc_queue; 215 LIST_INSERT_HEAD(head, uq, uq_next); 216 mtx_lock_spin(&sched_lock); 217 uq->uq_thread->td_flags |= TDF_UMTXQ; 218 mtx_unlock_spin(&sched_lock); 219} 220 221/* 222 * Remove thread from the umtx queue. 223 */ 224static inline void 225umtxq_remove(struct umtx_q *uq) 226{ 227 mtx_assert(umtxq_mtx(umtxq_hash(&uq->uq_key)), MA_OWNED); 228 if (uq->uq_thread->td_flags & TDF_UMTXQ) { 229 LIST_REMOVE(uq, uq_next); 230 /* turning off TDF_UMTXQ should be the last thing. */ 231 mtx_lock_spin(&sched_lock); 232 uq->uq_thread->td_flags &= ~TDF_UMTXQ; 233 mtx_unlock_spin(&sched_lock); 234 } 235} 236 237static int 238umtxq_count(struct umtx_key *key) 239{ 240 struct umtx_q *uq; 241 struct umtx_head *head; 242 int chain, count = 0; 243 244 chain = umtxq_hash(key); 245 mtx_assert(umtxq_mtx(chain), MA_OWNED); 246 head = &umtxq_chains[chain].uc_queue; 247 LIST_FOREACH(uq, head, uq_next) { 248 if (umtx_key_match(&uq->uq_key, key)) { 249 if (++count > 1) 250 break; 251 } 252 } 253 return (count); 254} 255 256static int 257umtxq_signal(struct umtx_key *key, int n_wake) 258{ 259 struct umtx_q *uq, *next; 260 struct umtx_head *head; 261 struct thread *blocked = NULL; 262 int chain, ret; 263 264 ret = 0; 265 chain = umtxq_hash(key); 266 mtx_assert(umtxq_mtx(chain), MA_OWNED); 267 head = &umtxq_chains[chain].uc_queue; 268 for (uq = LIST_FIRST(head); uq; uq = next) { 269 next = LIST_NEXT(uq, uq_next); 270 if (umtx_key_match(&uq->uq_key, key)) { 271 blocked = uq->uq_thread; 272 umtxq_remove(uq); 273 wakeup(blocked); 274 if (++ret >= n_wake) 275 break; 276 } 277 } 278 return (ret); 279} 280 281static inline int 282umtxq_sleep(struct thread *td, struct umtx_key *key, int priority, 283 const char *wmesg, int timo) 284{ 285 int chain = umtxq_hash(key); 286 int error = msleep(td, umtxq_mtx(chain), priority, wmesg, timo); 287 if (error == EWOULDBLOCK) 288 error = ETIMEDOUT; 289 return (error); 290} 291 292static int 293umtx_key_get(struct thread *td, struct umtx *umtx, struct umtx_key *key) 294{ 295 vm_map_t map; 296 vm_map_entry_t entry; 297 vm_pindex_t pindex; 298 vm_prot_t prot; 299 boolean_t wired; 300 301 map = &td->td_proc->p_vmspace->vm_map; 302 if (vm_map_lookup(&map, (vm_offset_t)umtx, VM_PROT_WRITE, 303 &entry, &key->info.shared.object, &pindex, &prot, 304 &wired) != KERN_SUCCESS) { 305 return EFAULT; 306 } 307 308 if (VM_INHERIT_SHARE == entry->inheritance) { 309 key->type = UMTX_SHARED; 310 key->info.shared.offset = entry->offset + entry->start - 311 (vm_offset_t)umtx; 312 vm_object_reference(key->info.shared.object); 313 } else { 314 key->type = UMTX_PRIVATE; 315 key->info.private.umtx = umtx; 316 key->info.private.pid = td->td_proc->p_pid; 317 } 318 vm_map_lookup_done(map, entry); 319 return (0); 320} 321 322static inline void 323umtx_key_release(struct umtx_key *key) 324{ 325 if (key->type == UMTX_SHARED) 326 vm_object_deallocate(key->info.shared.object); 327} 328 329static inline int 330umtxq_queue_me(struct thread *td, struct umtx *umtx, struct umtx_q *uq) 331{ 332 int error; 333 334 if ((error = umtx_key_get(td, umtx, &uq->uq_key)) != 0) 335 return (error); 336 337 uq->uq_addr = (vm_offset_t)umtx; 338 uq->uq_thread = td; 339 umtxq_lock(&uq->uq_key); 340 /* hmm, for condition variable, we don't need busy flag. */ 341 umtxq_busy(&uq->uq_key); 342 umtxq_insert(uq); 343 umtxq_unbusy(&uq->uq_key); 344 umtxq_unlock(&uq->uq_key); 345 return (0); 346} 347 348static int 349_do_lock(struct thread *td, struct umtx *umtx, long id, int timo) 350{ 351 struct umtx_q *uq; 352 intptr_t owner; 353 intptr_t old; 354 int error = 0; 355 356 uq = td->td_umtxq; 357 /* 358 * Care must be exercised when dealing with umtx structure. It 359 * can fault on any access. 360 */ 361 362 for (;;) { 363 /* 364 * Try the uncontested case. This should be done in userland. 365 */ 366 owner = casuptr((intptr_t *)&umtx->u_owner, 367 UMTX_UNOWNED, id); 368 369 /* The acquire succeeded. */ 370 if (owner == UMTX_UNOWNED) 371 return (0); 372 373 /* The address was invalid. */ 374 if (owner == -1) 375 return (EFAULT); 376 377 /* If no one owns it but it is contested try to acquire it. */ 378 if (owner == UMTX_CONTESTED) { 379 owner = casuptr((intptr_t *)&umtx->u_owner, 380 UMTX_CONTESTED, id | UMTX_CONTESTED); 381 382 if (owner == UMTX_CONTESTED) 383 return (0); 384 385 /* The address was invalid. */ 386 if (owner == -1) 387 return (EFAULT); 388 389 /* If this failed the lock has changed, restart. */ 390 continue; 391 } 392 393 /* 394 * If we caught a signal, we have retried and now 395 * exit immediately. 396 */ 397 if (error || (error = umtxq_queue_me(td, umtx, uq)) != 0) 398 return (error); 399 400 /* 401 * Set the contested bit so that a release in user space 402 * knows to use the system call for unlock. If this fails 403 * either some one else has acquired the lock or it has been 404 * released. 405 */ 406 old = casuptr((intptr_t *)&umtx->u_owner, owner, 407 owner | UMTX_CONTESTED); 408 409 /* The address was invalid. */ 410 if (old == -1) { 411 umtxq_lock(&uq->uq_key); 412 umtxq_busy(&uq->uq_key); 413 umtxq_remove(uq); 414 umtxq_unbusy(&uq->uq_key); 415 umtxq_unlock(&uq->uq_key); 416 umtx_key_release(&uq->uq_key); 417 return (EFAULT); 418 } 419 420 /* 421 * We set the contested bit, sleep. Otherwise the lock changed 422 * and we need to retry or we lost a race to the thread 423 * unlocking the umtx. 424 */ 425 umtxq_lock(&uq->uq_key); 426 if (old == owner && (td->td_flags & TDF_UMTXQ)) { 427 error = umtxq_sleep(td, &uq->uq_key, PCATCH, 428 "umtx", timo); 429 } 430 umtxq_busy(&uq->uq_key); 431 umtxq_remove(uq); 432 umtxq_unbusy(&uq->uq_key); 433 umtxq_unlock(&uq->uq_key); 434 umtx_key_release(&uq->uq_key); 435 } 436 437 return (0); 438} 439 440static int 441do_lock(struct thread *td, struct umtx *umtx, long id, 442 struct timespec *timeout) 443{ 444 struct timespec ts, ts2, ts3; 445 struct timeval tv; 446 int error; 447 448 if (timeout == NULL) { 449 error = _do_lock(td, umtx, id, 0); 450 } else { 451 getnanouptime(&ts); 452 timespecadd(&ts, timeout); 453 TIMESPEC_TO_TIMEVAL(&tv, timeout); 454 for (;;) { 455 error = _do_lock(td, umtx, id, tvtohz(&tv)); 456 if (error != ETIMEDOUT) 457 break; 458 getnanouptime(&ts2); 459 if (timespeccmp(&ts2, &ts, >=)) { 460 error = ETIMEDOUT; 461 break; 462 } 463 ts3 = ts; 464 timespecsub(&ts3, &ts2); 465 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 466 } 467 } 468 /* 469 * This lets userland back off critical region if needed. 470 */ 471 if (error == ERESTART) 472 error = EINTR; 473 return (error); 474} 475 476static int 477do_unlock(struct thread *td, struct umtx *umtx, long id) 478{ 479 struct umtx_key key; 480 intptr_t owner; 481 intptr_t old; 482 int error; 483 int count; 484 485 /* 486 * Make sure we own this mtx. 487 * 488 * XXX Need a {fu,su}ptr this is not correct on arch where 489 * sizeof(intptr_t) != sizeof(long). 490 */ 491 if ((owner = fuword(&umtx->u_owner)) == -1) 492 return (EFAULT); 493 494 if ((owner & ~UMTX_CONTESTED) != id) 495 return (EPERM); 496 497 /* We should only ever be in here for contested locks */ 498 if ((owner & UMTX_CONTESTED) == 0) 499 return (EINVAL); 500 501 if ((error = umtx_key_get(td, umtx, &key)) != 0) 502 return (error); 503 504 umtxq_lock(&key); 505 umtxq_busy(&key); 506 count = umtxq_count(&key); 507 umtxq_unlock(&key); 508 509 /* 510 * When unlocking the umtx, it must be marked as unowned if 511 * there is zero or one thread only waiting for it. 512 * Otherwise, it must be marked as contested. 513 */ 514 old = casuptr((intptr_t *)&umtx->u_owner, owner, 515 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED); 516 umtxq_lock(&key); 517 umtxq_signal(&key, 0); 518 umtxq_unbusy(&key); 519 umtxq_unlock(&key); 520 umtx_key_release(&key); 521 if (old == -1) 522 return (EFAULT); 523 if (old != owner) 524 return (EINVAL); 525 return (0); 526} 527 528static int 529do_wait(struct thread *td, struct umtx *umtx, long id, struct timespec *timeout) 530{ 531 struct umtx_q *uq; 532 struct timespec ts, ts2, ts3; 533 struct timeval tv; 534 long tmp; 535 int error = 0; 536 537 uq = td->td_umtxq; 538 if ((error = umtxq_queue_me(td, umtx, uq)) != 0) 539 return (error); 540 tmp = fuword(&umtx->u_owner); 541 if (tmp != id) { 542 umtxq_lock(&uq->uq_key); 543 umtxq_remove(uq); 544 umtxq_unlock(&uq->uq_key); 545 } else if (timeout == NULL) { 546 umtxq_lock(&uq->uq_key); 547 if (td->td_flags & TDF_UMTXQ) 548 error = umtxq_sleep(td, &uq->uq_key, 549 PCATCH, "ucond", 0); 550 if (!(td->td_flags & TDF_UMTXQ)) 551 error = 0; 552 else 553 umtxq_remove(uq); 554 umtxq_unlock(&uq->uq_key); 555 } else { 556 getnanouptime(&ts); 557 timespecadd(&ts, timeout); 558 TIMESPEC_TO_TIMEVAL(&tv, timeout); 559 for (;;) { 560 umtxq_lock(&uq->uq_key); 561 if (td->td_flags & TDF_UMTXQ) { 562 error = umtxq_sleep(td, &uq->uq_key, PCATCH, 563 "ucond", tvtohz(&tv)); 564 } 565 if (!(td->td_flags & TDF_UMTXQ)) { 566 umtxq_unlock(&uq->uq_key); 567 goto out; 568 } 569 umtxq_unlock(&uq->uq_key); 570 if (error != ETIMEDOUT) 571 break; 572 getnanouptime(&ts2); 573 if (timespeccmp(&ts2, &ts, >=)) { 574 error = ETIMEDOUT; 575 break; 576 } 577 ts3 = ts; 578 timespecsub(&ts3, &ts2); 579 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 580 } 581 umtxq_lock(&uq->uq_key); 582 umtxq_remove(uq); 583 umtxq_unlock(&uq->uq_key); 584 } 585out: 586 umtx_key_release(&uq->uq_key); 587 if (error == ERESTART) 588 error = EINTR; 589 return (error); 590} 591 592int 593kern_umtx_wake(struct thread *td, void *uaddr, int n_wake) 594{ 595 struct umtx_key key; 596 int ret; 597 598 if ((ret = umtx_key_get(td, uaddr, &key)) != 0) 599 return (ret); 600 umtxq_lock(&key); 601 ret = umtxq_signal(&key, n_wake); 602 umtxq_unlock(&key); 603 umtx_key_release(&key); 604 return (0); 605} 606 607int 608_umtx_lock(struct thread *td, struct _umtx_lock_args *uap) 609 /* struct umtx *umtx */ 610{ 611 return _do_lock(td, uap->umtx, td->td_tid, 0); 612} 613 614int 615_umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap) 616 /* struct umtx *umtx */ 617{ 618 return do_unlock(td, uap->umtx, td->td_tid); 619} 620 621int 622_umtx_op(struct thread *td, struct _umtx_op_args *uap) 623{ 624 struct timespec timeout; 625 struct timespec *ts; 626 int error; 627 628 switch(uap->op) { 629 case UMTX_OP_LOCK: 630 /* Allow a null timespec (wait forever). */ 631 if (uap->uaddr2 == NULL) 632 ts = NULL; 633 else { 634 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 635 if (error != 0) 636 break; 637 if (timeout.tv_nsec >= 1000000000 || 638 timeout.tv_nsec < 0) { 639 error = EINVAL; 640 break; 641 } 642 ts = &timeout; 643 } 644 error = do_lock(td, uap->umtx, uap->id, ts); 645 break; 646 case UMTX_OP_UNLOCK: 647 error = do_unlock(td, uap->umtx, uap->id); 648 break; 649 case UMTX_OP_WAIT: 650 /* Allow a null timespec (wait forever). */ 651 if (uap->uaddr2 == NULL) 652 ts = NULL; 653 else { 654 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 655 if (error != 0) 656 break; 657 if (timeout.tv_nsec >= 1000000000 || 658 timeout.tv_nsec < 0) { 659 error = EINVAL; 660 break; 661 } 662 ts = &timeout; 663 } 664 error = do_wait(td, uap->umtx, uap->id, ts); 665 break; 666 case UMTX_OP_WAKE: 667 error = kern_umtx_wake(td, uap->umtx, uap->id); 668 break; 669 default: 670 error = EINVAL; 671 break; 672 } 673 return (error); 674} 675