1/* 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_COPYRIGHT@ 30 */ 31/* 32 * Mach Operating System 33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56/* 57 * File: kern/lock.c 58 * Author: Avadis Tevanian, Jr., Michael Wayne Young 59 * Date: 1985 60 * 61 * Locking primitives implementation 62 */ 63 64#include <mach_ldebug.h> 65 66#include <kern/locks.h> 67#include <kern/kalloc.h> 68#include <kern/misc_protos.h> 69#include <kern/thread.h> 70#include <kern/processor.h> 71#include <kern/cpu_data.h> 72#include <kern/cpu_number.h> 73#include <kern/sched_prim.h> 74#include <kern/xpr.h> 75#include <kern/debug.h> 76#include <string.h> 77 78#include <i386/machine_routines.h> /* machine_timeout_suspended() */ 79#include <machine/machine_cpu.h> 80#include <i386/mp.h> 81 82#include <sys/kdebug.h> 83#include <mach/branch_predicates.h> 84 85/* 86 * We need only enough declarations from the BSD-side to be able to 87 * test if our probe is active, and to call __dtrace_probe(). Setting 88 * NEED_DTRACE_DEFS gets a local copy of those definitions pulled in. 89 */ 90#if CONFIG_DTRACE 91#define NEED_DTRACE_DEFS 92#include <../bsd/sys/lockstat.h> 93#endif 94 95#define LCK_RW_LCK_EXCLUSIVE_CODE 0x100 96#define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101 97#define LCK_RW_LCK_SHARED_CODE 0x102 98#define LCK_RW_LCK_SH_TO_EX_CODE 0x103 99#define LCK_RW_LCK_SH_TO_EX1_CODE 0x104 100#define LCK_RW_LCK_EX_TO_SH_CODE 0x105 101 102#define LCK_RW_LCK_EX_WRITER_SPIN_CODE 0x106 103#define LCK_RW_LCK_EX_WRITER_WAIT_CODE 0x107 104#define LCK_RW_LCK_EX_READER_SPIN_CODE 0x108 105#define LCK_RW_LCK_EX_READER_WAIT_CODE 0x109 106#define LCK_RW_LCK_SHARED_SPIN_CODE 0x110 107#define LCK_RW_LCK_SHARED_WAIT_CODE 0x111 108#define LCK_RW_LCK_SH_TO_EX_SPIN_CODE 0x112 109#define LCK_RW_LCK_SH_TO_EX_WAIT_CODE 0x113 110 111 112#define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG) 113 114unsigned int LcksOpts=0; 115 116/* Forwards */ 117 118#if USLOCK_DEBUG 119/* 120 * Perform simple lock checks. 121 */ 122int uslock_check = 1; 123int max_lock_loops = 100000000; 124decl_simple_lock_data(extern , printf_lock) 125decl_simple_lock_data(extern , panic_lock) 126#endif /* USLOCK_DEBUG */ 127 128extern unsigned int not_in_kdp; 129 130/* 131 * We often want to know the addresses of the callers 132 * of the various lock routines. However, this information 133 * is only used for debugging and statistics. 134 */ 135typedef void *pc_t; 136#define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS) 137#define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS) 138#if ANY_LOCK_DEBUG 139#define OBTAIN_PC(pc) ((pc) = GET_RETURN_PC()) 140#define DECL_PC(pc) pc_t pc; 141#else /* ANY_LOCK_DEBUG */ 142#define DECL_PC(pc) 143#ifdef lint 144/* 145 * Eliminate lint complaints about unused local pc variables. 146 */ 147#define OBTAIN_PC(pc) ++pc 148#else /* lint */ 149#define OBTAIN_PC(pc) 150#endif /* lint */ 151#endif /* USLOCK_DEBUG */ 152 153 154/* 155 * Portable lock package implementation of usimple_locks. 156 */ 157 158#if USLOCK_DEBUG 159#define USLDBG(stmt) stmt 160void usld_lock_init(usimple_lock_t, unsigned short); 161void usld_lock_pre(usimple_lock_t, pc_t); 162void usld_lock_post(usimple_lock_t, pc_t); 163void usld_unlock(usimple_lock_t, pc_t); 164void usld_lock_try_pre(usimple_lock_t, pc_t); 165void usld_lock_try_post(usimple_lock_t, pc_t); 166int usld_lock_common_checks(usimple_lock_t, char *); 167#else /* USLOCK_DEBUG */ 168#define USLDBG(stmt) 169#endif /* USLOCK_DEBUG */ 170 171 172extern int lck_rw_grab_want(lck_rw_t *lck); 173extern int lck_rw_grab_shared(lck_rw_t *lck); 174extern int lck_rw_held_read_or_upgrade(lck_rw_t *lck); 175 176 177/* 178 * Forward definitions 179 */ 180 181void lck_rw_lock_shared_gen( 182 lck_rw_t *lck); 183 184void lck_rw_lock_exclusive_gen( 185 lck_rw_t *lck); 186 187boolean_t lck_rw_lock_shared_to_exclusive_success( 188 lck_rw_t *lck); 189 190boolean_t lck_rw_lock_shared_to_exclusive_failure( 191 lck_rw_t *lck, 192 int prior_lock_state); 193 194void lck_rw_lock_exclusive_to_shared_gen( 195 lck_rw_t *lck, 196 int prior_lock_state); 197 198lck_rw_type_t lck_rw_done_gen( 199 lck_rw_t *lck, 200 int prior_lock_state); 201 202void lck_rw_clear_promotions_x86(thread_t thread); 203 204/* 205 * Routine: lck_spin_alloc_init 206 */ 207lck_spin_t * 208lck_spin_alloc_init( 209 lck_grp_t *grp, 210 lck_attr_t *attr) 211{ 212 lck_spin_t *lck; 213 214 if ((lck = (lck_spin_t *)kalloc(sizeof(lck_spin_t))) != 0) 215 lck_spin_init(lck, grp, attr); 216 217 return(lck); 218} 219 220/* 221 * Routine: lck_spin_free 222 */ 223void 224lck_spin_free( 225 lck_spin_t *lck, 226 lck_grp_t *grp) 227{ 228 lck_spin_destroy(lck, grp); 229 kfree(lck, sizeof(lck_spin_t)); 230} 231 232/* 233 * Routine: lck_spin_init 234 */ 235void 236lck_spin_init( 237 lck_spin_t *lck, 238 lck_grp_t *grp, 239 __unused lck_attr_t *attr) 240{ 241 usimple_lock_init((usimple_lock_t) lck, 0); 242 lck_grp_reference(grp); 243 lck_grp_lckcnt_incr(grp, LCK_TYPE_SPIN); 244} 245 246/* 247 * Routine: lck_spin_destroy 248 */ 249void 250lck_spin_destroy( 251 lck_spin_t *lck, 252 lck_grp_t *grp) 253{ 254 if (lck->interlock == LCK_SPIN_TAG_DESTROYED) 255 return; 256 lck->interlock = LCK_SPIN_TAG_DESTROYED; 257 lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN); 258 lck_grp_deallocate(grp); 259 return; 260} 261 262/* 263 * Routine: lck_spin_lock 264 */ 265void 266lck_spin_lock( 267 lck_spin_t *lck) 268{ 269 usimple_lock((usimple_lock_t) lck); 270} 271 272/* 273 * Routine: lck_spin_unlock 274 */ 275void 276lck_spin_unlock( 277 lck_spin_t *lck) 278{ 279 usimple_unlock((usimple_lock_t) lck); 280} 281 282 283/* 284 * Routine: lck_spin_try_lock 285 */ 286boolean_t 287lck_spin_try_lock( 288 lck_spin_t *lck) 289{ 290 return((boolean_t)usimple_lock_try((usimple_lock_t) lck)); 291} 292 293/* 294 * Routine: lck_spin_is_acquired 295 * NOT SAFE: To be used only by kernel debugger to avoid deadlock. 296 * Returns: TRUE if lock is acquired. 297 */ 298boolean_t 299lck_spin_is_acquired(lck_spin_t *lck) { 300 if (not_in_kdp) { 301 panic("panic: spinlock acquired check done outside of kernel debugger"); 302 } 303 return (lck->interlock != 0)? TRUE : FALSE; 304} 305 306/* 307 * Initialize a usimple_lock. 308 * 309 * No change in preemption state. 310 */ 311void 312usimple_lock_init( 313 usimple_lock_t l, 314 __unused unsigned short tag) 315{ 316#ifndef MACHINE_SIMPLE_LOCK 317 USLDBG(usld_lock_init(l, tag)); 318 hw_lock_init(&l->interlock); 319#else 320 simple_lock_init((simple_lock_t)l,tag); 321#endif 322} 323 324volatile uint32_t spinlock_owner_cpu = ~0; 325volatile usimple_lock_t spinlock_timed_out; 326 327uint32_t spinlock_timeout_NMI(uintptr_t thread_addr) { 328 uint64_t deadline; 329 uint32_t i; 330 331 for (i = 0; i < real_ncpus; i++) { 332 if ((uintptr_t)cpu_data_ptr[i]->cpu_active_thread == thread_addr) { 333 spinlock_owner_cpu = i; 334 if ((uint32_t) cpu_number() == i) 335 break; 336 cpu_datap(i)->cpu_NMI_acknowledged = FALSE; 337 cpu_NMI_interrupt(i); 338 deadline = mach_absolute_time() + (LockTimeOut * 2); 339 while (mach_absolute_time() < deadline && cpu_datap(i)->cpu_NMI_acknowledged == FALSE) 340 cpu_pause(); 341 break; 342 } 343 } 344 345 return spinlock_owner_cpu; 346} 347 348/* 349 * Acquire a usimple_lock. 350 * 351 * Returns with preemption disabled. Note 352 * that the hw_lock routines are responsible for 353 * maintaining preemption state. 354 */ 355void 356usimple_lock( 357 usimple_lock_t l) 358{ 359#ifndef MACHINE_SIMPLE_LOCK 360 DECL_PC(pc); 361 362 OBTAIN_PC(pc); 363 USLDBG(usld_lock_pre(l, pc)); 364 365 if(__improbable(hw_lock_to(&l->interlock, LockTimeOutTSC) == 0)) { 366 boolean_t uslock_acquired = FALSE; 367 while (machine_timeout_suspended()) { 368 enable_preemption(); 369 if ((uslock_acquired = hw_lock_to(&l->interlock, LockTimeOutTSC))) 370 break; 371 } 372 373 if (uslock_acquired == FALSE) { 374 uint32_t lock_cpu; 375 uintptr_t lowner = (uintptr_t)l->interlock.lock_data; 376 spinlock_timed_out = l; 377 lock_cpu = spinlock_timeout_NMI(lowner); 378 panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p, lock owner active on CPU 0x%x, current owner: 0x%lx", l, lowner, current_thread(), lock_cpu, (uintptr_t)l->interlock.lock_data); 379 } 380 } 381 USLDBG(usld_lock_post(l, pc)); 382#else 383 simple_lock((simple_lock_t)l); 384#endif 385} 386 387 388/* 389 * Release a usimple_lock. 390 * 391 * Returns with preemption enabled. Note 392 * that the hw_lock routines are responsible for 393 * maintaining preemption state. 394 */ 395void 396usimple_unlock( 397 usimple_lock_t l) 398{ 399#ifndef MACHINE_SIMPLE_LOCK 400 DECL_PC(pc); 401 402 OBTAIN_PC(pc); 403 USLDBG(usld_unlock(l, pc)); 404 hw_lock_unlock(&l->interlock); 405#else 406 simple_unlock_rwmb((simple_lock_t)l); 407#endif 408} 409 410 411/* 412 * Conditionally acquire a usimple_lock. 413 * 414 * On success, returns with preemption disabled. 415 * On failure, returns with preemption in the same state 416 * as when first invoked. Note that the hw_lock routines 417 * are responsible for maintaining preemption state. 418 * 419 * XXX No stats are gathered on a miss; I preserved this 420 * behavior from the original assembly-language code, but 421 * doesn't it make sense to log misses? XXX 422 */ 423unsigned int 424usimple_lock_try( 425 usimple_lock_t l) 426{ 427#ifndef MACHINE_SIMPLE_LOCK 428 unsigned int success; 429 DECL_PC(pc); 430 431 OBTAIN_PC(pc); 432 USLDBG(usld_lock_try_pre(l, pc)); 433 if ((success = hw_lock_try(&l->interlock))) { 434 USLDBG(usld_lock_try_post(l, pc)); 435 } 436 return success; 437#else 438 return(simple_lock_try((simple_lock_t)l)); 439#endif 440} 441 442#if USLOCK_DEBUG 443/* 444 * States of a usimple_lock. The default when initializing 445 * a usimple_lock is setting it up for debug checking. 446 */ 447#define USLOCK_CHECKED 0x0001 /* lock is being checked */ 448#define USLOCK_TAKEN 0x0002 /* lock has been taken */ 449#define USLOCK_INIT 0xBAA0 /* lock has been initialized */ 450#define USLOCK_INITIALIZED (USLOCK_INIT|USLOCK_CHECKED) 451#define USLOCK_CHECKING(l) (uslock_check && \ 452 ((l)->debug.state & USLOCK_CHECKED)) 453 454/* 455 * Trace activities of a particularly interesting lock. 456 */ 457void usl_trace(usimple_lock_t, int, pc_t, const char *); 458 459 460/* 461 * Initialize the debugging information contained 462 * in a usimple_lock. 463 */ 464void 465usld_lock_init( 466 usimple_lock_t l, 467 __unused unsigned short tag) 468{ 469 if (l == USIMPLE_LOCK_NULL) 470 panic("lock initialization: null lock pointer"); 471 l->lock_type = USLOCK_TAG; 472 l->debug.state = uslock_check ? USLOCK_INITIALIZED : 0; 473 l->debug.lock_cpu = l->debug.unlock_cpu = 0; 474 l->debug.lock_pc = l->debug.unlock_pc = INVALID_PC; 475 l->debug.lock_thread = l->debug.unlock_thread = INVALID_THREAD; 476 l->debug.duration[0] = l->debug.duration[1] = 0; 477 l->debug.unlock_cpu = l->debug.unlock_cpu = 0; 478 l->debug.unlock_pc = l->debug.unlock_pc = INVALID_PC; 479 l->debug.unlock_thread = l->debug.unlock_thread = INVALID_THREAD; 480} 481 482 483/* 484 * These checks apply to all usimple_locks, not just 485 * those with USLOCK_CHECKED turned on. 486 */ 487int 488usld_lock_common_checks( 489 usimple_lock_t l, 490 char *caller) 491{ 492 if (l == USIMPLE_LOCK_NULL) 493 panic("%s: null lock pointer", caller); 494 if (l->lock_type != USLOCK_TAG) 495 panic("%s: %p is not a usimple lock, 0x%x", caller, l, l->lock_type); 496 if (!(l->debug.state & USLOCK_INIT)) 497 panic("%s: %p is not an initialized lock, 0x%x", caller, l, l->debug.state); 498 return USLOCK_CHECKING(l); 499} 500 501 502/* 503 * Debug checks on a usimple_lock just before attempting 504 * to acquire it. 505 */ 506/* ARGSUSED */ 507void 508usld_lock_pre( 509 usimple_lock_t l, 510 pc_t pc) 511{ 512 char caller[] = "usimple_lock"; 513 514 515 if (!usld_lock_common_checks(l, caller)) 516 return; 517 518/* 519 * Note that we have a weird case where we are getting a lock when we are] 520 * in the process of putting the system to sleep. We are running with no 521 * current threads, therefore we can't tell if we are trying to retake a lock 522 * we have or someone on the other processor has it. Therefore we just 523 * ignore this test if the locking thread is 0. 524 */ 525 526 if ((l->debug.state & USLOCK_TAKEN) && l->debug.lock_thread && 527 l->debug.lock_thread == (void *) current_thread()) { 528 printf("%s: lock %p already locked (at %p) by", 529 caller, l, l->debug.lock_pc); 530 printf(" current thread %p (new attempt at pc %p)\n", 531 l->debug.lock_thread, pc); 532 panic("%s", caller); 533 } 534 mp_disable_preemption(); 535 usl_trace(l, cpu_number(), pc, caller); 536 mp_enable_preemption(); 537} 538 539 540/* 541 * Debug checks on a usimple_lock just after acquiring it. 542 * 543 * Pre-emption has been disabled at this point, 544 * so we are safe in using cpu_number. 545 */ 546void 547usld_lock_post( 548 usimple_lock_t l, 549 pc_t pc) 550{ 551 register int mycpu; 552 char caller[] = "successful usimple_lock"; 553 554 555 if (!usld_lock_common_checks(l, caller)) 556 return; 557 558 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED)) 559 panic("%s: lock %p became uninitialized", 560 caller, l); 561 if ((l->debug.state & USLOCK_TAKEN)) 562 panic("%s: lock 0x%p became TAKEN by someone else", 563 caller, l); 564 565 mycpu = cpu_number(); 566 l->debug.lock_thread = (void *)current_thread(); 567 l->debug.state |= USLOCK_TAKEN; 568 l->debug.lock_pc = pc; 569 l->debug.lock_cpu = mycpu; 570 571 usl_trace(l, mycpu, pc, caller); 572} 573 574 575/* 576 * Debug checks on a usimple_lock just before 577 * releasing it. Note that the caller has not 578 * yet released the hardware lock. 579 * 580 * Preemption is still disabled, so there's 581 * no problem using cpu_number. 582 */ 583void 584usld_unlock( 585 usimple_lock_t l, 586 pc_t pc) 587{ 588 register int mycpu; 589 char caller[] = "usimple_unlock"; 590 591 592 if (!usld_lock_common_checks(l, caller)) 593 return; 594 595 mycpu = cpu_number(); 596 597 if (!(l->debug.state & USLOCK_TAKEN)) 598 panic("%s: lock 0x%p hasn't been taken", 599 caller, l); 600 if (l->debug.lock_thread != (void *) current_thread()) 601 panic("%s: unlocking lock 0x%p, owned by thread %p", 602 caller, l, l->debug.lock_thread); 603 if (l->debug.lock_cpu != mycpu) { 604 printf("%s: unlocking lock 0x%p on cpu 0x%x", 605 caller, l, mycpu); 606 printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu); 607 panic("%s", caller); 608 } 609 usl_trace(l, mycpu, pc, caller); 610 611 l->debug.unlock_thread = l->debug.lock_thread; 612 l->debug.lock_thread = INVALID_PC; 613 l->debug.state &= ~USLOCK_TAKEN; 614 l->debug.unlock_pc = pc; 615 l->debug.unlock_cpu = mycpu; 616} 617 618 619/* 620 * Debug checks on a usimple_lock just before 621 * attempting to acquire it. 622 * 623 * Preemption isn't guaranteed to be disabled. 624 */ 625void 626usld_lock_try_pre( 627 usimple_lock_t l, 628 pc_t pc) 629{ 630 char caller[] = "usimple_lock_try"; 631 632 if (!usld_lock_common_checks(l, caller)) 633 return; 634 mp_disable_preemption(); 635 usl_trace(l, cpu_number(), pc, caller); 636 mp_enable_preemption(); 637} 638 639 640/* 641 * Debug checks on a usimple_lock just after 642 * successfully attempting to acquire it. 643 * 644 * Preemption has been disabled by the 645 * lock acquisition attempt, so it's safe 646 * to use cpu_number. 647 */ 648void 649usld_lock_try_post( 650 usimple_lock_t l, 651 pc_t pc) 652{ 653 register int mycpu; 654 char caller[] = "successful usimple_lock_try"; 655 656 if (!usld_lock_common_checks(l, caller)) 657 return; 658 659 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED)) 660 panic("%s: lock 0x%p became uninitialized", 661 caller, l); 662 if ((l->debug.state & USLOCK_TAKEN)) 663 panic("%s: lock 0x%p became TAKEN by someone else", 664 caller, l); 665 666 mycpu = cpu_number(); 667 l->debug.lock_thread = (void *) current_thread(); 668 l->debug.state |= USLOCK_TAKEN; 669 l->debug.lock_pc = pc; 670 l->debug.lock_cpu = mycpu; 671 672 usl_trace(l, mycpu, pc, caller); 673} 674 675 676/* 677 * For very special cases, set traced_lock to point to a 678 * specific lock of interest. The result is a series of 679 * XPRs showing lock operations on that lock. The lock_seq 680 * value is used to show the order of those operations. 681 */ 682usimple_lock_t traced_lock; 683unsigned int lock_seq; 684 685void 686usl_trace( 687 usimple_lock_t l, 688 int mycpu, 689 pc_t pc, 690 const char * op_name) 691{ 692 if (traced_lock == l) { 693 XPR(XPR_SLOCK, 694 "seq %d, cpu %d, %s @ %x\n", 695 (uintptr_t) lock_seq, (uintptr_t) mycpu, 696 (uintptr_t) op_name, (uintptr_t) pc, 0); 697 lock_seq++; 698 } 699} 700 701 702#endif /* USLOCK_DEBUG */ 703 704/* 705 * Routine: lck_rw_alloc_init 706 */ 707lck_rw_t * 708lck_rw_alloc_init( 709 lck_grp_t *grp, 710 lck_attr_t *attr) { 711 lck_rw_t *lck; 712 713 if ((lck = (lck_rw_t *)kalloc(sizeof(lck_rw_t))) != 0) { 714 bzero(lck, sizeof(lck_rw_t)); 715 lck_rw_init(lck, grp, attr); 716 } 717 718 return(lck); 719} 720 721/* 722 * Routine: lck_rw_free 723 */ 724void 725lck_rw_free( 726 lck_rw_t *lck, 727 lck_grp_t *grp) { 728 lck_rw_destroy(lck, grp); 729 kfree(lck, sizeof(lck_rw_t)); 730} 731 732/* 733 * Routine: lck_rw_init 734 */ 735void 736lck_rw_init( 737 lck_rw_t *lck, 738 lck_grp_t *grp, 739 lck_attr_t *attr) 740{ 741 lck_attr_t *lck_attr = (attr != LCK_ATTR_NULL) ? 742 attr : &LockDefaultLckAttr; 743 744 hw_lock_byte_init(&lck->lck_rw_interlock); 745 lck->lck_rw_want_write = FALSE; 746 lck->lck_rw_want_upgrade = FALSE; 747 lck->lck_rw_shared_count = 0; 748 lck->lck_rw_can_sleep = TRUE; 749 lck->lck_r_waiting = lck->lck_w_waiting = 0; 750 lck->lck_rw_tag = 0; 751 lck->lck_rw_priv_excl = ((lck_attr->lck_attr_val & 752 LCK_ATTR_RW_SHARED_PRIORITY) == 0); 753 754 lck_grp_reference(grp); 755 lck_grp_lckcnt_incr(grp, LCK_TYPE_RW); 756} 757 758/* 759 * Routine: lck_rw_destroy 760 */ 761void 762lck_rw_destroy( 763 lck_rw_t *lck, 764 lck_grp_t *grp) 765{ 766 if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED) 767 return; 768#if MACH_LDEBUG 769 lck_rw_assert(lck, LCK_RW_ASSERT_NOTHELD); 770#endif 771 lck->lck_rw_tag = LCK_RW_TAG_DESTROYED; 772 lck_grp_lckcnt_decr(grp, LCK_TYPE_RW); 773 lck_grp_deallocate(grp); 774 return; 775} 776 777/* 778 * Sleep locks. These use the same data structure and algorithm 779 * as the spin locks, but the process sleeps while it is waiting 780 * for the lock. These work on uniprocessor systems. 781 */ 782 783#define DECREMENTER_TIMEOUT 1000000 784 785#define RW_LOCK_READER_EVENT(x) \ 786 ((event_t) (((unsigned char*) (x)) + (offsetof(lck_rw_t, lck_rw_tag)))) 787 788#define RW_LOCK_WRITER_EVENT(x) \ 789 ((event_t) (((unsigned char*) (x)) + (offsetof(lck_rw_t, lck_rw_pad8)))) 790 791/* 792 * We disable interrupts while holding the RW interlock to prevent an 793 * interrupt from exacerbating hold time. 794 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock(). 795 */ 796static boolean_t 797lck_interlock_lock(lck_rw_t *lck) 798{ 799 boolean_t istate; 800 801 istate = ml_set_interrupts_enabled(FALSE); 802 hw_lock_byte_lock(&lck->lck_rw_interlock); 803 804 return istate; 805} 806 807static void 808lck_interlock_unlock(lck_rw_t *lck, boolean_t istate) 809{ 810 hw_lock_byte_unlock(&lck->lck_rw_interlock); 811 ml_set_interrupts_enabled(istate); 812} 813 814/* 815 * This inline is used when busy-waiting for an rw lock. 816 * If interrupts were disabled when the lock primitive was called, 817 * we poll the IPI handler for pending tlb flushes. 818 * XXX This is a hack to avoid deadlocking on the pmap_system_lock. 819 */ 820static inline void 821lck_rw_lock_pause(boolean_t interrupts_enabled) 822{ 823 if (!interrupts_enabled) 824 handle_pending_TLB_flushes(); 825 cpu_pause(); 826} 827 828 829/* 830 * compute the deadline to spin against when 831 * waiting for a change of state on a lck_rw_t 832 */ 833static inline uint64_t 834lck_rw_deadline_for_spin(lck_rw_t *lck) 835{ 836 if (lck->lck_rw_can_sleep) { 837 if (lck->lck_r_waiting || lck->lck_w_waiting || lck->lck_rw_shared_count > machine_info.max_cpus) { 838 /* 839 * there are already threads waiting on this lock... this 840 * implies that they have spun beyond their deadlines waiting for 841 * the desired state to show up so we will not bother spinning at this time... 842 * or 843 * the current number of threads sharing this lock exceeds our capacity to run them 844 * concurrently and since all states we're going to spin for require the rw_shared_count 845 * to be at 0, we'll not bother spinning since the latency for this to happen is 846 * unpredictable... 847 */ 848 return (mach_absolute_time()); 849 } 850 return (mach_absolute_time() + MutexSpin); 851 } else 852 return (mach_absolute_time() + (100000LL * 1000000000LL)); 853} 854 855 856/* 857 * Routine: lck_rw_lock_exclusive 858 */ 859void 860lck_rw_lock_exclusive_gen( 861 lck_rw_t *lck) 862{ 863 uint64_t deadline = 0; 864 int slept = 0; 865 int gotlock = 0; 866 int lockheld = 0; 867 wait_result_t res = 0; 868 boolean_t istate = -1; 869 870#if CONFIG_DTRACE 871 boolean_t dtrace_ls_initialized = FALSE; 872 boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled= FALSE; 873 uint64_t wait_interval = 0; 874 int readers_at_sleep = 0; 875#endif 876 877 /* 878 * Try to acquire the lck_rw_want_write bit. 879 */ 880 while ( !lck_rw_grab_want(lck)) { 881 882#if CONFIG_DTRACE 883 if (dtrace_ls_initialized == FALSE) { 884 dtrace_ls_initialized = TRUE; 885 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0); 886 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0); 887 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block; 888 if (dtrace_ls_enabled) { 889 /* 890 * Either sleeping or spinning is happening, 891 * start a timing of our delay interval now. 892 */ 893 readers_at_sleep = lck->lck_rw_shared_count; 894 wait_interval = mach_absolute_time(); 895 } 896 } 897#endif 898 if (istate == -1) 899 istate = ml_get_interrupts_enabled(); 900 901 deadline = lck_rw_deadline_for_spin(lck); 902 903 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0); 904 905 while (((gotlock = lck_rw_grab_want(lck)) == 0) && mach_absolute_time() < deadline) 906 lck_rw_lock_pause(istate); 907 908 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END, (int)lck, 0, 0, gotlock, 0); 909 910 if (gotlock) 911 break; 912 /* 913 * if we get here, the deadline has expired w/o us 914 * being able to grab the lock exclusively 915 * check to see if we're allowed to do a thread_block 916 */ 917 if (lck->lck_rw_can_sleep) { 918 919 istate = lck_interlock_lock(lck); 920 921 if (lck->lck_rw_want_write) { 922 923 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0); 924 925 lck->lck_w_waiting = TRUE; 926 927 res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT); 928 lck_interlock_unlock(lck, istate); 929 930 if (res == THREAD_WAITING) { 931 res = thread_block(THREAD_CONTINUE_NULL); 932 slept++; 933 } 934 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_END, (int)lck, res, slept, 0, 0); 935 } else { 936 lck->lck_rw_want_write = TRUE; 937 lck_interlock_unlock(lck, istate); 938 break; 939 } 940 } 941 } 942 /* 943 * Wait for readers (and upgrades) to finish... 944 * the test for these conditions must be done simultaneously with 945 * a check of the interlock not being held since 946 * the rw_shared_count will drop to 0 first and then want_upgrade 947 * will be set to 1 in the shared_to_exclusive scenario... those 948 * adjustments are done behind the interlock and represent an 949 * atomic change in state and must be considered as such 950 * however, once we see the read count at 0, the want_upgrade not set 951 * and the interlock not held, we are safe to proceed 952 */ 953 while (lck_rw_held_read_or_upgrade(lck)) { 954 955#if CONFIG_DTRACE 956 /* 957 * Either sleeping or spinning is happening, start 958 * a timing of our delay interval now. If we set it 959 * to -1 we don't have accurate data so we cannot later 960 * decide to record a dtrace spin or sleep event. 961 */ 962 if (dtrace_ls_initialized == FALSE) { 963 dtrace_ls_initialized = TRUE; 964 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0); 965 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0); 966 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block; 967 if (dtrace_ls_enabled) { 968 /* 969 * Either sleeping or spinning is happening, 970 * start a timing of our delay interval now. 971 */ 972 readers_at_sleep = lck->lck_rw_shared_count; 973 wait_interval = mach_absolute_time(); 974 } 975 } 976#endif 977 if (istate == -1) 978 istate = ml_get_interrupts_enabled(); 979 980 deadline = lck_rw_deadline_for_spin(lck); 981 982 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0); 983 984 while ((lockheld = lck_rw_held_read_or_upgrade(lck)) && mach_absolute_time() < deadline) 985 lck_rw_lock_pause(istate); 986 987 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, (int)lck, 0, 0, lockheld, 0); 988 989 if ( !lockheld) 990 break; 991 /* 992 * if we get here, the deadline has expired w/o us 993 * being able to grab the lock exclusively 994 * check to see if we're allowed to do a thread_block 995 */ 996 if (lck->lck_rw_can_sleep) { 997 998 istate = lck_interlock_lock(lck); 999 1000 if (lck->lck_rw_shared_count != 0 || lck->lck_rw_want_upgrade) { 1001 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0); 1002 1003 lck->lck_w_waiting = TRUE; 1004 1005 res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT); 1006 lck_interlock_unlock(lck, istate); 1007 1008 if (res == THREAD_WAITING) { 1009 res = thread_block(THREAD_CONTINUE_NULL); 1010 slept++; 1011 } 1012 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_END, (int)lck, res, slept, 0, 0); 1013 } else { 1014 lck_interlock_unlock(lck, istate); 1015 /* 1016 * must own the lock now, since we checked for 1017 * readers or upgrade owner behind the interlock 1018 * no need for a call to 'lck_rw_held_read_or_upgrade' 1019 */ 1020 break; 1021 } 1022 } 1023 } 1024 1025#if CONFIG_DTRACE 1026 /* 1027 * Decide what latencies we suffered that are Dtrace events. 1028 * If we have set wait_interval, then we either spun or slept. 1029 * At least we get out from under the interlock before we record 1030 * which is the best we can do here to minimize the impact 1031 * of the tracing. 1032 * If we have set wait_interval to -1, then dtrace was not enabled when we 1033 * started sleeping/spinning so we don't record this event. 1034 */ 1035 if (dtrace_ls_enabled == TRUE) { 1036 if (slept == 0) { 1037 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_EXCL_SPIN, lck, 1038 mach_absolute_time() - wait_interval, 1); 1039 } else { 1040 /* 1041 * For the blocking case, we also record if when we blocked 1042 * it was held for read or write, and how many readers. 1043 * Notice that above we recorded this before we dropped 1044 * the interlock so the count is accurate. 1045 */ 1046 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_EXCL_BLOCK, lck, 1047 mach_absolute_time() - wait_interval, 1, 1048 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep); 1049 } 1050 } 1051 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lck, 1); 1052#endif 1053} 1054 1055 1056/* 1057 * Routine: lck_rw_done_gen 1058 * 1059 * called from the assembly language wrapper... 1060 * prior_lock_state is the value in the 1st 1061 * word of the lock at the time of a successful 1062 * atomic compare and exchange with the new value... 1063 * it represents the state of the lock before we 1064 * decremented the rw_shared_count or cleared either 1065 * rw_want_upgrade or rw_want_write and 1066 * the lck_x_waiting bits... since the wrapper 1067 * routine has already changed the state atomically, 1068 * we just need to decide if we should 1069 * wake up anyone and what value to return... we do 1070 * this by examining the state of the lock before 1071 * we changed it 1072 */ 1073lck_rw_type_t 1074lck_rw_done_gen( 1075 lck_rw_t *lck, 1076 int prior_lock_state) 1077{ 1078 lck_rw_t *fake_lck; 1079 lck_rw_type_t lock_type; 1080 thread_t thread; 1081 uint32_t rwlock_count; 1082 1083 /* 1084 * prior_lock state is a snapshot of the 1st word of the 1085 * lock in question... we'll fake up a pointer to it 1086 * and carefully not access anything beyond whats defined 1087 * in the first word of a lck_rw_t 1088 */ 1089 fake_lck = (lck_rw_t *)&prior_lock_state; 1090 1091 if (fake_lck->lck_rw_shared_count <= 1) { 1092 if (fake_lck->lck_w_waiting) 1093 thread_wakeup(RW_LOCK_WRITER_EVENT(lck)); 1094 1095 if (!(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting) && fake_lck->lck_r_waiting) 1096 thread_wakeup(RW_LOCK_READER_EVENT(lck)); 1097 } 1098 if (fake_lck->lck_rw_shared_count) 1099 lock_type = LCK_RW_TYPE_SHARED; 1100 else 1101 lock_type = LCK_RW_TYPE_EXCLUSIVE; 1102 1103 /* Check if dropping the lock means that we need to unpromote */ 1104 thread = current_thread(); 1105 rwlock_count = thread->rwlock_count--; 1106#if MACH_LDEBUG 1107 if (rwlock_count == 0) { 1108 panic("rw lock count underflow for thread %p", thread); 1109 } 1110#endif 1111 if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) { 1112 /* sched_flags checked without lock, but will be rechecked while clearing */ 1113 lck_rw_clear_promotion(thread); 1114 } 1115 1116#if CONFIG_DTRACE 1117 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lock_type == LCK_RW_TYPE_SHARED ? 0 : 1); 1118#endif 1119 1120 return(lock_type); 1121} 1122 1123 1124/* 1125 * Routine: lck_rw_unlock 1126 */ 1127void 1128lck_rw_unlock( 1129 lck_rw_t *lck, 1130 lck_rw_type_t lck_rw_type) 1131{ 1132 if (lck_rw_type == LCK_RW_TYPE_SHARED) 1133 lck_rw_unlock_shared(lck); 1134 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) 1135 lck_rw_unlock_exclusive(lck); 1136 else 1137 panic("lck_rw_unlock(): Invalid RW lock type: %d\n", lck_rw_type); 1138} 1139 1140 1141/* 1142 * Routine: lck_rw_unlock_shared 1143 */ 1144void 1145lck_rw_unlock_shared( 1146 lck_rw_t *lck) 1147{ 1148 lck_rw_type_t ret; 1149 1150 ret = lck_rw_done(lck); 1151 1152 if (ret != LCK_RW_TYPE_SHARED) 1153 panic("lck_rw_unlock(): lock held in mode: %d\n", ret); 1154} 1155 1156 1157/* 1158 * Routine: lck_rw_unlock_exclusive 1159 */ 1160void 1161lck_rw_unlock_exclusive( 1162 lck_rw_t *lck) 1163{ 1164 lck_rw_type_t ret; 1165 1166 ret = lck_rw_done(lck); 1167 1168 if (ret != LCK_RW_TYPE_EXCLUSIVE) 1169 panic("lck_rw_unlock_exclusive(): lock held in mode: %d\n", ret); 1170} 1171 1172 1173/* 1174 * Routine: lck_rw_lock 1175 */ 1176void 1177lck_rw_lock( 1178 lck_rw_t *lck, 1179 lck_rw_type_t lck_rw_type) 1180{ 1181 if (lck_rw_type == LCK_RW_TYPE_SHARED) 1182 lck_rw_lock_shared(lck); 1183 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) 1184 lck_rw_lock_exclusive(lck); 1185 else 1186 panic("lck_rw_lock(): Invalid RW lock type: %x\n", lck_rw_type); 1187} 1188 1189 1190/* 1191 * Routine: lck_rw_lock_shared_gen 1192 * Function: 1193 * assembly fast path code has determined that this lock 1194 * is held exclusively... this is where we spin/block 1195 * until we can acquire the lock in the shared mode 1196 */ 1197void 1198lck_rw_lock_shared_gen( 1199 lck_rw_t *lck) 1200{ 1201 uint64_t deadline = 0; 1202 int gotlock = 0; 1203 int slept = 0; 1204 wait_result_t res = 0; 1205 boolean_t istate = -1; 1206 1207#if CONFIG_DTRACE 1208 uint64_t wait_interval = 0; 1209 int readers_at_sleep = 0; 1210 boolean_t dtrace_ls_initialized = FALSE; 1211 boolean_t dtrace_rwl_shared_spin, dtrace_rwl_shared_block, dtrace_ls_enabled = FALSE; 1212#endif 1213 1214 while ( !lck_rw_grab_shared(lck)) { 1215 1216#if CONFIG_DTRACE 1217 if (dtrace_ls_initialized == FALSE) { 1218 dtrace_ls_initialized = TRUE; 1219 dtrace_rwl_shared_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] != 0); 1220 dtrace_rwl_shared_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK] != 0); 1221 dtrace_ls_enabled = dtrace_rwl_shared_spin || dtrace_rwl_shared_block; 1222 if (dtrace_ls_enabled) { 1223 /* 1224 * Either sleeping or spinning is happening, 1225 * start a timing of our delay interval now. 1226 */ 1227 readers_at_sleep = lck->lck_rw_shared_count; 1228 wait_interval = mach_absolute_time(); 1229 } 1230 } 1231#endif 1232 if (istate == -1) 1233 istate = ml_get_interrupts_enabled(); 1234 1235 deadline = lck_rw_deadline_for_spin(lck); 1236 1237 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_START, 1238 (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0); 1239 1240 while (((gotlock = lck_rw_grab_shared(lck)) == 0) && mach_absolute_time() < deadline) 1241 lck_rw_lock_pause(istate); 1242 1243 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_END, 1244 (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, gotlock, 0); 1245 1246 if (gotlock) 1247 break; 1248 /* 1249 * if we get here, the deadline has expired w/o us 1250 * being able to grab the lock for read 1251 * check to see if we're allowed to do a thread_block 1252 */ 1253 if (lck->lck_rw_can_sleep) { 1254 1255 istate = lck_interlock_lock(lck); 1256 1257 if ((lck->lck_rw_want_write || lck->lck_rw_want_upgrade) && 1258 ((lck->lck_rw_shared_count == 0) || lck->lck_rw_priv_excl)) { 1259 1260 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_START, 1261 (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0); 1262 1263 lck->lck_r_waiting = TRUE; 1264 1265 res = assert_wait(RW_LOCK_READER_EVENT(lck), THREAD_UNINT); 1266 lck_interlock_unlock(lck, istate); 1267 1268 if (res == THREAD_WAITING) { 1269 res = thread_block(THREAD_CONTINUE_NULL); 1270 slept++; 1271 } 1272 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_END, 1273 (int)lck, res, slept, 0, 0); 1274 } else { 1275 lck->lck_rw_shared_count++; 1276 lck_interlock_unlock(lck, istate); 1277 break; 1278 } 1279 } 1280 } 1281 1282#if CONFIG_DTRACE 1283 if (dtrace_ls_enabled == TRUE) { 1284 if (slept == 0) { 1285 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0); 1286 } else { 1287 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_BLOCK, lck, 1288 mach_absolute_time() - wait_interval, 0, 1289 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep); 1290 } 1291 } 1292 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0); 1293#endif 1294} 1295 1296 1297/* 1298 * Routine: lck_rw_lock_shared_to_exclusive_failure 1299 * Function: 1300 * assembly fast path code has already dropped our read 1301 * count and determined that someone else owns 'lck_rw_want_upgrade' 1302 * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting' 1303 * all we need to do here is determine if a wakeup is needed 1304 */ 1305boolean_t 1306lck_rw_lock_shared_to_exclusive_failure( 1307 lck_rw_t *lck, 1308 int prior_lock_state) 1309{ 1310 lck_rw_t *fake_lck; 1311 thread_t thread = current_thread(); 1312 uint32_t rwlock_count; 1313 1314 /* Check if dropping the lock means that we need to unpromote */ 1315 rwlock_count = thread->rwlock_count--; 1316#if MACH_LDEBUG 1317 if (rwlock_count == 0) { 1318 panic("rw lock count underflow for thread %p", thread); 1319 } 1320#endif 1321 if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) { 1322 /* sched_flags checked without lock, but will be rechecked while clearing */ 1323 lck_rw_clear_promotion(thread); 1324 } 1325 1326 /* 1327 * prior_lock state is a snapshot of the 1st word of the 1328 * lock in question... we'll fake up a pointer to it 1329 * and carefully not access anything beyond whats defined 1330 * in the first word of a lck_rw_t 1331 */ 1332 fake_lck = (lck_rw_t *)&prior_lock_state; 1333 1334 if (fake_lck->lck_w_waiting && fake_lck->lck_rw_shared_count == 1) { 1335 /* 1336 * Someone else has requested upgrade. 1337 * Since we've released the read lock, wake 1338 * him up if he's blocked waiting 1339 */ 1340 thread_wakeup(RW_LOCK_WRITER_EVENT(lck)); 1341 } 1342 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE, 1343 (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0); 1344 1345 return (FALSE); 1346} 1347 1348 1349/* 1350 * Routine: lck_rw_lock_shared_to_exclusive_failure 1351 * Function: 1352 * assembly fast path code has already dropped our read 1353 * count and successfully acquired 'lck_rw_want_upgrade' 1354 * we just need to wait for the rest of the readers to drain 1355 * and then we can return as the exclusive holder of this lock 1356 */ 1357boolean_t 1358lck_rw_lock_shared_to_exclusive_success( 1359 lck_rw_t *lck) 1360{ 1361 uint64_t deadline = 0; 1362 int slept = 0; 1363 int still_shared = 0; 1364 wait_result_t res; 1365 boolean_t istate = -1; 1366 1367#if CONFIG_DTRACE 1368 uint64_t wait_interval = 0; 1369 int readers_at_sleep = 0; 1370 boolean_t dtrace_ls_initialized = FALSE; 1371 boolean_t dtrace_rwl_shared_to_excl_spin, dtrace_rwl_shared_to_excl_block, dtrace_ls_enabled = FALSE; 1372#endif 1373 1374 while (lck->lck_rw_shared_count != 0) { 1375 1376#if CONFIG_DTRACE 1377 if (dtrace_ls_initialized == FALSE) { 1378 dtrace_ls_initialized = TRUE; 1379 dtrace_rwl_shared_to_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] != 0); 1380 dtrace_rwl_shared_to_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK] != 0); 1381 dtrace_ls_enabled = dtrace_rwl_shared_to_excl_spin || dtrace_rwl_shared_to_excl_block; 1382 if (dtrace_ls_enabled) { 1383 /* 1384 * Either sleeping or spinning is happening, 1385 * start a timing of our delay interval now. 1386 */ 1387 readers_at_sleep = lck->lck_rw_shared_count; 1388 wait_interval = mach_absolute_time(); 1389 } 1390 } 1391#endif 1392 if (istate == -1) 1393 istate = ml_get_interrupts_enabled(); 1394 1395 deadline = lck_rw_deadline_for_spin(lck); 1396 1397 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_START, 1398 (int)lck, lck->lck_rw_shared_count, 0, 0, 0); 1399 1400 while ((still_shared = lck->lck_rw_shared_count) && mach_absolute_time() < deadline) 1401 lck_rw_lock_pause(istate); 1402 1403 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_END, 1404 (int)lck, lck->lck_rw_shared_count, 0, 0, 0); 1405 1406 if ( !still_shared) 1407 break; 1408 /* 1409 * if we get here, the deadline has expired w/o 1410 * the rw_shared_count having drained to 0 1411 * check to see if we're allowed to do a thread_block 1412 */ 1413 if (lck->lck_rw_can_sleep) { 1414 1415 istate = lck_interlock_lock(lck); 1416 1417 if (lck->lck_rw_shared_count != 0) { 1418 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START, 1419 (int)lck, lck->lck_rw_shared_count, 0, 0, 0); 1420 1421 lck->lck_w_waiting = TRUE; 1422 1423 res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT); 1424 lck_interlock_unlock(lck, istate); 1425 1426 if (res == THREAD_WAITING) { 1427 res = thread_block(THREAD_CONTINUE_NULL); 1428 slept++; 1429 } 1430 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_END, 1431 (int)lck, res, slept, 0, 0); 1432 } else { 1433 lck_interlock_unlock(lck, istate); 1434 break; 1435 } 1436 } 1437 } 1438#if CONFIG_DTRACE 1439 /* 1440 * We infer whether we took the sleep/spin path above by checking readers_at_sleep. 1441 */ 1442 if (dtrace_ls_enabled == TRUE) { 1443 if (slept == 0) { 1444 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lck, mach_absolute_time() - wait_interval, 0); 1445 } else { 1446 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lck, 1447 mach_absolute_time() - wait_interval, 1, 1448 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep); 1449 } 1450 } 1451 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lck, 1); 1452#endif 1453 return (TRUE); 1454} 1455 1456 1457/* 1458 * Routine: lck_rw_lock_exclusive_to_shared 1459 * Function: 1460 * assembly fast path has already dropped 1461 * our exclusive state and bumped lck_rw_shared_count 1462 * all we need to do here is determine if anyone 1463 * needs to be awakened. 1464 */ 1465void 1466lck_rw_lock_exclusive_to_shared_gen( 1467 lck_rw_t *lck, 1468 int prior_lock_state) 1469{ 1470 lck_rw_t *fake_lck; 1471 1472 /* 1473 * prior_lock state is a snapshot of the 1st word of the 1474 * lock in question... we'll fake up a pointer to it 1475 * and carefully not access anything beyond whats defined 1476 * in the first word of a lck_rw_t 1477 */ 1478 fake_lck = (lck_rw_t *)&prior_lock_state; 1479 1480 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START, 1481 (int)lck, fake_lck->lck_rw_want_write, fake_lck->lck_rw_want_upgrade, 0, 0); 1482 1483 /* 1484 * don't wake up anyone waiting to take the lock exclusively 1485 * since we hold a read count... when the read count drops to 0, 1486 * the writers will be woken. 1487 * 1488 * wake up any waiting readers if we don't have any writers waiting, 1489 * or the lock is NOT marked as rw_priv_excl (writers have privilege) 1490 */ 1491 if (!(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting) && fake_lck->lck_r_waiting) 1492 thread_wakeup(RW_LOCK_READER_EVENT(lck)); 1493 1494 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END, 1495 (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0); 1496 1497#if CONFIG_DTRACE 1498 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0); 1499#endif 1500} 1501 1502 1503/* 1504 * Routine: lck_rw_try_lock 1505 */ 1506boolean_t 1507lck_rw_try_lock( 1508 lck_rw_t *lck, 1509 lck_rw_type_t lck_rw_type) 1510{ 1511 if (lck_rw_type == LCK_RW_TYPE_SHARED) 1512 return(lck_rw_try_lock_shared(lck)); 1513 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) 1514 return(lck_rw_try_lock_exclusive(lck)); 1515 else 1516 panic("lck_rw_try_lock(): Invalid rw lock type: %x\n", lck_rw_type); 1517 return(FALSE); 1518} 1519 1520 1521void 1522lck_rw_assert( 1523 lck_rw_t *lck, 1524 unsigned int type) 1525{ 1526 switch (type) { 1527 case LCK_RW_ASSERT_SHARED: 1528 if (lck->lck_rw_shared_count != 0) { 1529 return; 1530 } 1531 break; 1532 case LCK_RW_ASSERT_EXCLUSIVE: 1533 if ((lck->lck_rw_want_write || 1534 lck->lck_rw_want_upgrade) && 1535 lck->lck_rw_shared_count == 0) { 1536 return; 1537 } 1538 break; 1539 case LCK_RW_ASSERT_HELD: 1540 if (lck->lck_rw_want_write || 1541 lck->lck_rw_want_upgrade || 1542 lck->lck_rw_shared_count != 0) { 1543 return; 1544 } 1545 break; 1546 case LCK_RW_ASSERT_NOTHELD: 1547 if (!(lck->lck_rw_want_write || 1548 lck->lck_rw_want_upgrade || 1549 lck->lck_rw_shared_count != 0)) { 1550 return; 1551 } 1552 break; 1553 default: 1554 break; 1555 } 1556 1557 panic("rw lock (%p)%s held (mode=%u), first word %08x\n", lck, (type == LCK_RW_ASSERT_NOTHELD ? "" : " not"), type, *(uint32_t *)lck); 1558} 1559 1560/* On return to userspace, this routine is called if the rwlock_count is somehow imbalanced */ 1561void 1562lck_rw_clear_promotions_x86(thread_t thread) 1563{ 1564#if MACH_LDEBUG 1565 /* It's fatal to leave a RW lock locked and return to userspace */ 1566 panic("%u rw lock(s) held on return to userspace for thread %p", thread->rwlock_count, thread); 1567#else 1568 /* Paper over the issue */ 1569 thread->rwlock_count = 0; 1570 lck_rw_clear_promotion(thread); 1571#endif 1572} 1573 1574 1575#ifdef MUTEX_ZONE 1576extern zone_t lck_mtx_zone; 1577#endif 1578/* 1579 * Routine: lck_mtx_alloc_init 1580 */ 1581lck_mtx_t * 1582lck_mtx_alloc_init( 1583 lck_grp_t *grp, 1584 lck_attr_t *attr) 1585{ 1586 lck_mtx_t *lck; 1587#ifdef MUTEX_ZONE 1588 if ((lck = (lck_mtx_t *)zalloc(lck_mtx_zone)) != 0) 1589 lck_mtx_init(lck, grp, attr); 1590#else 1591 if ((lck = (lck_mtx_t *)kalloc(sizeof(lck_mtx_t))) != 0) 1592 lck_mtx_init(lck, grp, attr); 1593#endif 1594 return(lck); 1595} 1596 1597/* 1598 * Routine: lck_mtx_free 1599 */ 1600void 1601lck_mtx_free( 1602 lck_mtx_t *lck, 1603 lck_grp_t *grp) 1604{ 1605 lck_mtx_destroy(lck, grp); 1606#ifdef MUTEX_ZONE 1607 zfree(lck_mtx_zone, lck); 1608#else 1609 kfree(lck, sizeof(lck_mtx_t)); 1610#endif 1611} 1612 1613/* 1614 * Routine: lck_mtx_ext_init 1615 */ 1616static void 1617lck_mtx_ext_init( 1618 lck_mtx_ext_t *lck, 1619 lck_grp_t *grp, 1620 lck_attr_t *attr) 1621{ 1622 bzero((void *)lck, sizeof(lck_mtx_ext_t)); 1623 1624 if ((attr->lck_attr_val) & LCK_ATTR_DEBUG) { 1625 lck->lck_mtx_deb.type = MUTEX_TAG; 1626 lck->lck_mtx_attr |= LCK_MTX_ATTR_DEBUG; 1627 } 1628 1629 lck->lck_mtx_grp = grp; 1630 1631 if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT) 1632 lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT; 1633 1634 lck->lck_mtx.lck_mtx_is_ext = 1; 1635 lck->lck_mtx.lck_mtx_sw.lck_mtxd.lck_mtxd_pad32 = 0xFFFFFFFF; 1636} 1637 1638/* 1639 * Routine: lck_mtx_init 1640 */ 1641void 1642lck_mtx_init( 1643 lck_mtx_t *lck, 1644 lck_grp_t *grp, 1645 lck_attr_t *attr) 1646{ 1647 lck_mtx_ext_t *lck_ext; 1648 lck_attr_t *lck_attr; 1649 1650 if (attr != LCK_ATTR_NULL) 1651 lck_attr = attr; 1652 else 1653 lck_attr = &LockDefaultLckAttr; 1654 1655 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) { 1656 if ((lck_ext = (lck_mtx_ext_t *)kalloc(sizeof(lck_mtx_ext_t))) != 0) { 1657 lck_mtx_ext_init(lck_ext, grp, lck_attr); 1658 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT; 1659 lck->lck_mtx_ptr = lck_ext; 1660 } 1661 } else { 1662 lck->lck_mtx_owner = 0; 1663 lck->lck_mtx_state = 0; 1664 } 1665 lck->lck_mtx_sw.lck_mtxd.lck_mtxd_pad32 = 0xFFFFFFFF; 1666 lck_grp_reference(grp); 1667 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX); 1668} 1669 1670/* 1671 * Routine: lck_mtx_init_ext 1672 */ 1673void 1674lck_mtx_init_ext( 1675 lck_mtx_t *lck, 1676 lck_mtx_ext_t *lck_ext, 1677 lck_grp_t *grp, 1678 lck_attr_t *attr) 1679{ 1680 lck_attr_t *lck_attr; 1681 1682 if (attr != LCK_ATTR_NULL) 1683 lck_attr = attr; 1684 else 1685 lck_attr = &LockDefaultLckAttr; 1686 1687 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) { 1688 lck_mtx_ext_init(lck_ext, grp, lck_attr); 1689 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT; 1690 lck->lck_mtx_ptr = lck_ext; 1691 } else { 1692 lck->lck_mtx_owner = 0; 1693 lck->lck_mtx_state = 0; 1694 } 1695 lck->lck_mtx_sw.lck_mtxd.lck_mtxd_pad32 = 0xFFFFFFFF; 1696 1697 lck_grp_reference(grp); 1698 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX); 1699} 1700 1701/* 1702 * Routine: lck_mtx_destroy 1703 */ 1704void 1705lck_mtx_destroy( 1706 lck_mtx_t *lck, 1707 lck_grp_t *grp) 1708{ 1709 boolean_t lck_is_indirect; 1710 1711 if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED) 1712 return; 1713#if MACH_LDEBUG 1714 lck_mtx_assert(lck, LCK_MTX_ASSERT_NOTOWNED); 1715#endif 1716 lck_is_indirect = (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT); 1717 1718 lck_mtx_lock_mark_destroyed(lck); 1719 1720 if (lck_is_indirect) 1721 kfree(lck->lck_mtx_ptr, sizeof(lck_mtx_ext_t)); 1722 lck_grp_lckcnt_decr(grp, LCK_TYPE_MTX); 1723 lck_grp_deallocate(grp); 1724 return; 1725} 1726 1727 1728#define LCK_MTX_LCK_WAIT_CODE 0x20 1729#define LCK_MTX_LCK_WAKEUP_CODE 0x21 1730#define LCK_MTX_LCK_SPIN_CODE 0x22 1731#define LCK_MTX_LCK_ACQUIRE_CODE 0x23 1732#define LCK_MTX_LCK_DEMOTE_CODE 0x24 1733 1734 1735/* 1736 * Routine: lck_mtx_unlock_wakeup_x86 1737 * 1738 * Invoked on unlock when there is 1739 * contention (i.e. the assembly routine sees that 1740 * that mutex->lck_mtx_waiters != 0 or 1741 * that mutex->lck_mtx_promoted != 0... 1742 * 1743 * neither the mutex or interlock is held 1744 */ 1745void 1746lck_mtx_unlock_wakeup_x86 ( 1747 lck_mtx_t *mutex, 1748 int prior_lock_state) 1749{ 1750 lck_mtx_t fake_lck; 1751 1752 /* 1753 * prior_lock state is a snapshot of the 2nd word of the 1754 * lock in question... we'll fake up a lock with the bits 1755 * copied into place and carefully not access anything 1756 * beyond whats defined in the second word of a lck_mtx_t 1757 */ 1758 fake_lck.lck_mtx_state = prior_lock_state; 1759 1760 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_START, 1761 mutex, fake_lck.lck_mtx_promoted, fake_lck.lck_mtx_waiters, fake_lck.lck_mtx_pri, 0); 1762 1763 if (__probable(fake_lck.lck_mtx_waiters)) { 1764 if (fake_lck.lck_mtx_waiters > 1) 1765 thread_wakeup_one_with_pri((event_t)(((unsigned int*)mutex)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int)), fake_lck.lck_mtx_pri); 1766 else 1767 thread_wakeup_one((event_t)(((unsigned int*)mutex)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int))); 1768 } 1769 1770 if (__improbable(fake_lck.lck_mtx_promoted)) { 1771 thread_t thread = current_thread(); 1772 1773 1774 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_DEMOTE_CODE) | DBG_FUNC_NONE, 1775 thread_tid(thread), thread->promotions, thread->sched_flags & TH_SFLAG_PROMOTED, 0, 0); 1776 1777 if (thread->promotions > 0) { 1778 spl_t s = splsched(); 1779 1780 thread_lock(thread); 1781 1782 if (--thread->promotions == 0 && (thread->sched_flags & TH_SFLAG_PROMOTED)) { 1783 1784 thread->sched_flags &= ~TH_SFLAG_PROMOTED; 1785 1786 if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) { 1787 /* Thread still has a RW lock promotion */ 1788 } else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) { 1789 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_DEMOTE) | DBG_FUNC_NONE, 1790 thread->sched_pri, DEPRESSPRI, 0, mutex, 0); 1791 1792 set_sched_pri(thread, DEPRESSPRI); 1793 } 1794 else { 1795 if (thread->priority < thread->sched_pri) { 1796 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_DEMOTE) | DBG_FUNC_NONE, 1797 thread->sched_pri, thread->priority, 0, mutex, 0); 1798 1799 SCHED(compute_priority)(thread, FALSE); 1800 } 1801 } 1802 } 1803 thread_unlock(thread); 1804 splx(s); 1805 } 1806 } 1807 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_END, 1808 mutex, 0, mutex->lck_mtx_waiters, 0, 0); 1809} 1810 1811 1812/* 1813 * Routine: lck_mtx_lock_acquire_x86 1814 * 1815 * Invoked on acquiring the mutex when there is 1816 * contention (i.e. the assembly routine sees that 1817 * that mutex->lck_mtx_waiters != 0 or 1818 * thread->was_promoted_on_wakeup != 0)... 1819 * 1820 * mutex is owned... interlock is held... preemption is disabled 1821 */ 1822void 1823lck_mtx_lock_acquire_x86( 1824 lck_mtx_t *mutex) 1825{ 1826 thread_t thread; 1827 integer_t priority; 1828 spl_t s; 1829 1830 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_START, 1831 mutex, thread->was_promoted_on_wakeup, mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0); 1832 1833 if (mutex->lck_mtx_waiters) 1834 priority = mutex->lck_mtx_pri; 1835 else 1836 priority = 0; 1837 1838 thread = (thread_t)mutex->lck_mtx_owner; /* faster then current_thread() */ 1839 1840 if (thread->sched_pri < priority || thread->was_promoted_on_wakeup) { 1841 1842 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROMOTE) | DBG_FUNC_NONE, 1843 thread->sched_pri, priority, thread->was_promoted_on_wakeup, mutex, 0); 1844 1845 s = splsched(); 1846 thread_lock(thread); 1847 1848 if (thread->sched_pri < priority) { 1849 /* Do not promote past promotion ceiling */ 1850 assert(priority <= MAXPRI_PROMOTE); 1851 set_sched_pri(thread, priority); 1852 } 1853 if (mutex->lck_mtx_promoted == 0) { 1854 mutex->lck_mtx_promoted = 1; 1855 1856 thread->promotions++; 1857 thread->sched_flags |= TH_SFLAG_PROMOTED; 1858 } 1859 thread->was_promoted_on_wakeup = 0; 1860 1861 thread_unlock(thread); 1862 splx(s); 1863 } 1864 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_END, 1865 mutex, 0, mutex->lck_mtx_waiters, 0, 0); 1866} 1867 1868 1869 1870/* 1871 * Routine: lck_mtx_lock_spinwait_x86 1872 * 1873 * Invoked trying to acquire a mutex when there is contention but 1874 * the holder is running on another processor. We spin for up to a maximum 1875 * time waiting for the lock to be released. 1876 * 1877 * Called with the interlock unlocked. 1878 * returns 0 if mutex acquired 1879 * returns 1 if we spun 1880 * returns 2 if we didn't spin due to the holder not running 1881 */ 1882int 1883lck_mtx_lock_spinwait_x86( 1884 lck_mtx_t *mutex) 1885{ 1886 thread_t holder; 1887 uint64_t deadline; 1888 int retval = 1; 1889 int loopcount = 0; 1890 1891 1892 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START, 1893 mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, 0, 0); 1894 1895 deadline = mach_absolute_time() + MutexSpin; 1896 1897 /* 1898 * Spin while: 1899 * - mutex is locked, and 1900 * - its locked as a spin lock, and 1901 * - owner is running on another processor, and 1902 * - owner (processor) is not idling, and 1903 * - we haven't spun for long enough. 1904 */ 1905 do { 1906 if (__probable(lck_mtx_lock_grab_mutex(mutex))) { 1907 retval = 0; 1908 break; 1909 } 1910 if ((holder = (thread_t) mutex->lck_mtx_owner) != NULL) { 1911 1912 if ( !(holder->machine.specFlags & OnProc) || 1913 (holder->state & TH_IDLE)) { 1914 if (loopcount == 0) 1915 retval = 2; 1916 break; 1917 } 1918 } 1919 cpu_pause(); 1920 1921 loopcount++; 1922 1923 } while (mach_absolute_time() < deadline); 1924 1925 1926#if CONFIG_DTRACE 1927 /* 1928 * We've already kept a count via deadline of how long we spun. 1929 * If dtrace is active, then we compute backwards to decide how 1930 * long we spun. 1931 * 1932 * Note that we record a different probe id depending on whether 1933 * this is a direct or indirect mutex. This allows us to 1934 * penalize only lock groups that have debug/stats enabled 1935 * with dtrace processing if desired. 1936 */ 1937 if (__probable(mutex->lck_mtx_is_ext == 0)) { 1938 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN, mutex, 1939 mach_absolute_time() - (deadline - MutexSpin)); 1940 } else { 1941 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN, mutex, 1942 mach_absolute_time() - (deadline - MutexSpin)); 1943 } 1944 /* The lockstat acquire event is recorded by the assembly code beneath us. */ 1945#endif 1946 1947 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END, 1948 mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, retval, 0); 1949 1950 return retval; 1951} 1952 1953 1954 1955/* 1956 * Routine: lck_mtx_lock_wait_x86 1957 * 1958 * Invoked in order to wait on contention. 1959 * 1960 * Called with the interlock locked and 1961 * preemption disabled... 1962 * returns it unlocked and with preemption enabled 1963 */ 1964void 1965lck_mtx_lock_wait_x86 ( 1966 lck_mtx_t *mutex) 1967{ 1968 thread_t self = current_thread(); 1969 thread_t holder; 1970 integer_t priority; 1971 spl_t s; 1972#if CONFIG_DTRACE 1973 uint64_t sleep_start = 0; 1974 1975 if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) { 1976 sleep_start = mach_absolute_time(); 1977 } 1978#endif 1979 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START, 1980 mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0); 1981 1982 priority = self->sched_pri; 1983 1984 if (priority < self->priority) 1985 priority = self->priority; 1986 if (priority < BASEPRI_DEFAULT) 1987 priority = BASEPRI_DEFAULT; 1988 1989 /* Do not promote past promotion ceiling */ 1990 priority = MIN(priority, MAXPRI_PROMOTE); 1991 1992 if (mutex->lck_mtx_waiters == 0 || priority > mutex->lck_mtx_pri) 1993 mutex->lck_mtx_pri = priority; 1994 mutex->lck_mtx_waiters++; 1995 1996 if ( (holder = (thread_t)mutex->lck_mtx_owner) && 1997 holder->sched_pri < mutex->lck_mtx_pri ) { 1998 s = splsched(); 1999 thread_lock(holder); 2000 2001 /* holder priority may have been bumped by another thread 2002 * before thread_lock was taken 2003 */ 2004 if (holder->sched_pri < mutex->lck_mtx_pri) { 2005 KERNEL_DEBUG_CONSTANT( 2006 MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROMOTE) | DBG_FUNC_NONE, 2007 holder->sched_pri, priority, thread_tid(holder), mutex, 0); 2008 /* Assert that we're not altering the priority of a 2009 * thread above the MAXPRI_PROMOTE band 2010 */ 2011 assert(holder->sched_pri < MAXPRI_PROMOTE); 2012 set_sched_pri(holder, priority); 2013 2014 if (mutex->lck_mtx_promoted == 0) { 2015 holder->promotions++; 2016 holder->sched_flags |= TH_SFLAG_PROMOTED; 2017 2018 mutex->lck_mtx_promoted = 1; 2019 } 2020 } 2021 thread_unlock(holder); 2022 splx(s); 2023 } 2024 assert_wait((event_t)(((unsigned int*)mutex)+((sizeof(lck_mtx_t)-1)/sizeof(unsigned int))), THREAD_UNINT); 2025 2026 lck_mtx_ilk_unlock(mutex); 2027 2028 thread_block(THREAD_CONTINUE_NULL); 2029 2030 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END, 2031 mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0); 2032 2033#if CONFIG_DTRACE 2034 /* 2035 * Record the Dtrace lockstat probe for blocking, block time 2036 * measured from when we were entered. 2037 */ 2038 if (sleep_start) { 2039 if (mutex->lck_mtx_is_ext == 0) { 2040 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, mutex, 2041 mach_absolute_time() - sleep_start); 2042 } else { 2043 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, mutex, 2044 mach_absolute_time() - sleep_start); 2045 } 2046 } 2047#endif 2048} 2049