1/* 2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_COPYRIGHT@ 30 */ 31/* 32 * Mach Operating System 33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56/* 57 * File: kern/lock.c 58 * Author: Avadis Tevanian, Jr., Michael Wayne Young 59 * Date: 1985 60 * 61 * Locking primitives implementation 62 */ 63 64#include <mach_ldebug.h> 65 66#include <kern/lock.h> 67#include <kern/locks.h> 68#include <kern/kalloc.h> 69#include <kern/misc_protos.h> 70#include <kern/thread.h> 71#include <kern/processor.h> 72#include <kern/cpu_data.h> 73#include <kern/cpu_number.h> 74#include <kern/sched_prim.h> 75#include <kern/xpr.h> 76#include <kern/debug.h> 77#include <string.h> 78 79#include <i386/machine_routines.h> /* machine_timeout_suspended() */ 80#include <machine/machine_cpu.h> 81#include <i386/mp.h> 82 83#include <sys/kdebug.h> 84#include <mach/branch_predicates.h> 85 86/* 87 * We need only enough declarations from the BSD-side to be able to 88 * test if our probe is active, and to call __dtrace_probe(). Setting 89 * NEED_DTRACE_DEFS gets a local copy of those definitions pulled in. 90 */ 91#if CONFIG_DTRACE 92#define NEED_DTRACE_DEFS 93#include <../bsd/sys/lockstat.h> 94#endif 95 96#define LCK_RW_LCK_EXCLUSIVE_CODE 0x100 97#define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101 98#define LCK_RW_LCK_SHARED_CODE 0x102 99#define LCK_RW_LCK_SH_TO_EX_CODE 0x103 100#define LCK_RW_LCK_SH_TO_EX1_CODE 0x104 101#define LCK_RW_LCK_EX_TO_SH_CODE 0x105 102 103#define LCK_RW_LCK_EX_WRITER_SPIN_CODE 0x106 104#define LCK_RW_LCK_EX_WRITER_WAIT_CODE 0x107 105#define LCK_RW_LCK_EX_READER_SPIN_CODE 0x108 106#define LCK_RW_LCK_EX_READER_WAIT_CODE 0x109 107#define LCK_RW_LCK_SHARED_SPIN_CODE 0x110 108#define LCK_RW_LCK_SHARED_WAIT_CODE 0x111 109#define LCK_RW_LCK_SH_TO_EX_SPIN_CODE 0x112 110#define LCK_RW_LCK_SH_TO_EX_WAIT_CODE 0x113 111 112 113#define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG) 114 115unsigned int LcksOpts=0; 116 117/* Forwards */ 118 119#if USLOCK_DEBUG 120/* 121 * Perform simple lock checks. 122 */ 123int uslock_check = 1; 124int max_lock_loops = 100000000; 125decl_simple_lock_data(extern , printf_lock) 126decl_simple_lock_data(extern , panic_lock) 127#endif /* USLOCK_DEBUG */ 128 129 130/* 131 * We often want to know the addresses of the callers 132 * of the various lock routines. However, this information 133 * is only used for debugging and statistics. 134 */ 135typedef void *pc_t; 136#define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS) 137#define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS) 138#if ANY_LOCK_DEBUG 139#define OBTAIN_PC(pc) ((pc) = GET_RETURN_PC()) 140#define DECL_PC(pc) pc_t pc; 141#else /* ANY_LOCK_DEBUG */ 142#define DECL_PC(pc) 143#ifdef lint 144/* 145 * Eliminate lint complaints about unused local pc variables. 146 */ 147#define OBTAIN_PC(pc) ++pc 148#else /* lint */ 149#define OBTAIN_PC(pc) 150#endif /* lint */ 151#endif /* USLOCK_DEBUG */ 152 153 154/* 155 * Portable lock package implementation of usimple_locks. 156 */ 157 158#if USLOCK_DEBUG 159#define USLDBG(stmt) stmt 160void usld_lock_init(usimple_lock_t, unsigned short); 161void usld_lock_pre(usimple_lock_t, pc_t); 162void usld_lock_post(usimple_lock_t, pc_t); 163void usld_unlock(usimple_lock_t, pc_t); 164void usld_lock_try_pre(usimple_lock_t, pc_t); 165void usld_lock_try_post(usimple_lock_t, pc_t); 166int usld_lock_common_checks(usimple_lock_t, char *); 167#else /* USLOCK_DEBUG */ 168#define USLDBG(stmt) 169#endif /* USLOCK_DEBUG */ 170 171 172extern int lck_rw_grab_want(lck_rw_t *lck); 173extern int lck_rw_grab_shared(lck_rw_t *lck); 174extern int lck_rw_held_read_or_upgrade(lck_rw_t *lck); 175 176 177/* 178 * Forward definitions 179 */ 180 181void lck_rw_lock_shared_gen( 182 lck_rw_t *lck); 183 184void lck_rw_lock_exclusive_gen( 185 lck_rw_t *lck); 186 187boolean_t lck_rw_lock_shared_to_exclusive_success( 188 lck_rw_t *lck); 189 190boolean_t lck_rw_lock_shared_to_exclusive_failure( 191 lck_rw_t *lck, 192 int prior_lock_state); 193 194void lck_rw_lock_exclusive_to_shared_gen( 195 lck_rw_t *lck, 196 int prior_lock_state); 197 198lck_rw_type_t lck_rw_done_gen( 199 lck_rw_t *lck, 200 int prior_lock_state); 201 202/* 203 * Routine: lck_spin_alloc_init 204 */ 205lck_spin_t * 206lck_spin_alloc_init( 207 lck_grp_t *grp, 208 lck_attr_t *attr) 209{ 210 lck_spin_t *lck; 211 212 if ((lck = (lck_spin_t *)kalloc(sizeof(lck_spin_t))) != 0) 213 lck_spin_init(lck, grp, attr); 214 215 return(lck); 216} 217 218/* 219 * Routine: lck_spin_free 220 */ 221void 222lck_spin_free( 223 lck_spin_t *lck, 224 lck_grp_t *grp) 225{ 226 lck_spin_destroy(lck, grp); 227 kfree(lck, sizeof(lck_spin_t)); 228} 229 230/* 231 * Routine: lck_spin_init 232 */ 233void 234lck_spin_init( 235 lck_spin_t *lck, 236 lck_grp_t *grp, 237 __unused lck_attr_t *attr) 238{ 239 usimple_lock_init((usimple_lock_t) lck, 0); 240 lck_grp_reference(grp); 241 lck_grp_lckcnt_incr(grp, LCK_TYPE_SPIN); 242} 243 244/* 245 * Routine: lck_spin_destroy 246 */ 247void 248lck_spin_destroy( 249 lck_spin_t *lck, 250 lck_grp_t *grp) 251{ 252 if (lck->interlock == LCK_SPIN_TAG_DESTROYED) 253 return; 254 lck->interlock = LCK_SPIN_TAG_DESTROYED; 255 lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN); 256 lck_grp_deallocate(grp); 257 return; 258} 259 260/* 261 * Routine: lck_spin_lock 262 */ 263void 264lck_spin_lock( 265 lck_spin_t *lck) 266{ 267 usimple_lock((usimple_lock_t) lck); 268} 269 270/* 271 * Routine: lck_spin_unlock 272 */ 273void 274lck_spin_unlock( 275 lck_spin_t *lck) 276{ 277 usimple_unlock((usimple_lock_t) lck); 278} 279 280 281/* 282 * Routine: lck_spin_try_lock 283 */ 284boolean_t 285lck_spin_try_lock( 286 lck_spin_t *lck) 287{ 288 return((boolean_t)usimple_lock_try((usimple_lock_t) lck)); 289} 290 291/* 292 * Initialize a usimple_lock. 293 * 294 * No change in preemption state. 295 */ 296void 297usimple_lock_init( 298 usimple_lock_t l, 299 __unused unsigned short tag) 300{ 301#ifndef MACHINE_SIMPLE_LOCK 302 USLDBG(usld_lock_init(l, tag)); 303 hw_lock_init(&l->interlock); 304#else 305 simple_lock_init((simple_lock_t)l,tag); 306#endif 307} 308 309volatile uint32_t spinlock_owner_cpu = ~0; 310volatile usimple_lock_t spinlock_timed_out; 311 312static uint32_t spinlock_timeout_NMI(uintptr_t thread_addr) { 313 uint64_t deadline; 314 uint32_t i; 315 316 for (i = 0; i < real_ncpus; i++) { 317 if ((uintptr_t)cpu_data_ptr[i]->cpu_active_thread == thread_addr) { 318 spinlock_owner_cpu = i; 319 if ((uint32_t) cpu_number() == i) 320 break; 321 cpu_datap(i)->cpu_NMI_acknowledged = FALSE; 322 cpu_NMI_interrupt(i); 323 deadline = mach_absolute_time() + (LockTimeOut * 2); 324 while (mach_absolute_time() < deadline && cpu_datap(i)->cpu_NMI_acknowledged == FALSE) 325 cpu_pause(); 326 break; 327 } 328 } 329 330 return spinlock_owner_cpu; 331} 332 333/* 334 * Acquire a usimple_lock. 335 * 336 * Returns with preemption disabled. Note 337 * that the hw_lock routines are responsible for 338 * maintaining preemption state. 339 */ 340void 341usimple_lock( 342 usimple_lock_t l) 343{ 344#ifndef MACHINE_SIMPLE_LOCK 345 DECL_PC(pc); 346 347 OBTAIN_PC(pc); 348 USLDBG(usld_lock_pre(l, pc)); 349 350 if(__improbable(hw_lock_to(&l->interlock, LockTimeOutTSC) == 0)) { 351 boolean_t uslock_acquired = FALSE; 352 while (machine_timeout_suspended()) { 353 enable_preemption(); 354 if ((uslock_acquired = hw_lock_to(&l->interlock, LockTimeOutTSC))) 355 break; 356 } 357 358 if (uslock_acquired == FALSE) { 359 uint32_t lock_cpu; 360 uintptr_t lowner = (uintptr_t)l->interlock.lock_data; 361 spinlock_timed_out = l; 362 lock_cpu = spinlock_timeout_NMI(lowner); 363 panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p, lock owner active on CPU 0x%x, current owner: 0x%lx", l, lowner, current_thread(), lock_cpu, (uintptr_t)l->interlock.lock_data); 364 } 365 } 366 USLDBG(usld_lock_post(l, pc)); 367#else 368 simple_lock((simple_lock_t)l); 369#endif 370} 371 372 373/* 374 * Release a usimple_lock. 375 * 376 * Returns with preemption enabled. Note 377 * that the hw_lock routines are responsible for 378 * maintaining preemption state. 379 */ 380void 381usimple_unlock( 382 usimple_lock_t l) 383{ 384#ifndef MACHINE_SIMPLE_LOCK 385 DECL_PC(pc); 386 387 OBTAIN_PC(pc); 388 USLDBG(usld_unlock(l, pc)); 389 hw_lock_unlock(&l->interlock); 390#else 391 simple_unlock_rwmb((simple_lock_t)l); 392#endif 393} 394 395 396/* 397 * Conditionally acquire a usimple_lock. 398 * 399 * On success, returns with preemption disabled. 400 * On failure, returns with preemption in the same state 401 * as when first invoked. Note that the hw_lock routines 402 * are responsible for maintaining preemption state. 403 * 404 * XXX No stats are gathered on a miss; I preserved this 405 * behavior from the original assembly-language code, but 406 * doesn't it make sense to log misses? XXX 407 */ 408unsigned int 409usimple_lock_try( 410 usimple_lock_t l) 411{ 412#ifndef MACHINE_SIMPLE_LOCK 413 unsigned int success; 414 DECL_PC(pc); 415 416 OBTAIN_PC(pc); 417 USLDBG(usld_lock_try_pre(l, pc)); 418 if ((success = hw_lock_try(&l->interlock))) { 419 USLDBG(usld_lock_try_post(l, pc)); 420 } 421 return success; 422#else 423 return(simple_lock_try((simple_lock_t)l)); 424#endif 425} 426 427#if USLOCK_DEBUG 428/* 429 * States of a usimple_lock. The default when initializing 430 * a usimple_lock is setting it up for debug checking. 431 */ 432#define USLOCK_CHECKED 0x0001 /* lock is being checked */ 433#define USLOCK_TAKEN 0x0002 /* lock has been taken */ 434#define USLOCK_INIT 0xBAA0 /* lock has been initialized */ 435#define USLOCK_INITIALIZED (USLOCK_INIT|USLOCK_CHECKED) 436#define USLOCK_CHECKING(l) (uslock_check && \ 437 ((l)->debug.state & USLOCK_CHECKED)) 438 439/* 440 * Trace activities of a particularly interesting lock. 441 */ 442void usl_trace(usimple_lock_t, int, pc_t, const char *); 443 444 445/* 446 * Initialize the debugging information contained 447 * in a usimple_lock. 448 */ 449void 450usld_lock_init( 451 usimple_lock_t l, 452 __unused unsigned short tag) 453{ 454 if (l == USIMPLE_LOCK_NULL) 455 panic("lock initialization: null lock pointer"); 456 l->lock_type = USLOCK_TAG; 457 l->debug.state = uslock_check ? USLOCK_INITIALIZED : 0; 458 l->debug.lock_cpu = l->debug.unlock_cpu = 0; 459 l->debug.lock_pc = l->debug.unlock_pc = INVALID_PC; 460 l->debug.lock_thread = l->debug.unlock_thread = INVALID_THREAD; 461 l->debug.duration[0] = l->debug.duration[1] = 0; 462 l->debug.unlock_cpu = l->debug.unlock_cpu = 0; 463 l->debug.unlock_pc = l->debug.unlock_pc = INVALID_PC; 464 l->debug.unlock_thread = l->debug.unlock_thread = INVALID_THREAD; 465} 466 467 468/* 469 * These checks apply to all usimple_locks, not just 470 * those with USLOCK_CHECKED turned on. 471 */ 472int 473usld_lock_common_checks( 474 usimple_lock_t l, 475 char *caller) 476{ 477 if (l == USIMPLE_LOCK_NULL) 478 panic("%s: null lock pointer", caller); 479 if (l->lock_type != USLOCK_TAG) 480 panic("%s: %p is not a usimple lock, 0x%x", caller, l, l->lock_type); 481 if (!(l->debug.state & USLOCK_INIT)) 482 panic("%s: %p is not an initialized lock, 0x%x", caller, l, l->debug.state); 483 return USLOCK_CHECKING(l); 484} 485 486 487/* 488 * Debug checks on a usimple_lock just before attempting 489 * to acquire it. 490 */ 491/* ARGSUSED */ 492void 493usld_lock_pre( 494 usimple_lock_t l, 495 pc_t pc) 496{ 497 char caller[] = "usimple_lock"; 498 499 500 if (!usld_lock_common_checks(l, caller)) 501 return; 502 503/* 504 * Note that we have a weird case where we are getting a lock when we are] 505 * in the process of putting the system to sleep. We are running with no 506 * current threads, therefore we can't tell if we are trying to retake a lock 507 * we have or someone on the other processor has it. Therefore we just 508 * ignore this test if the locking thread is 0. 509 */ 510 511 if ((l->debug.state & USLOCK_TAKEN) && l->debug.lock_thread && 512 l->debug.lock_thread == (void *) current_thread()) { 513 printf("%s: lock %p already locked (at %p) by", 514 caller, l, l->debug.lock_pc); 515 printf(" current thread %p (new attempt at pc %p)\n", 516 l->debug.lock_thread, pc); 517 panic("%s", caller); 518 } 519 mp_disable_preemption(); 520 usl_trace(l, cpu_number(), pc, caller); 521 mp_enable_preemption(); 522} 523 524 525/* 526 * Debug checks on a usimple_lock just after acquiring it. 527 * 528 * Pre-emption has been disabled at this point, 529 * so we are safe in using cpu_number. 530 */ 531void 532usld_lock_post( 533 usimple_lock_t l, 534 pc_t pc) 535{ 536 register int mycpu; 537 char caller[] = "successful usimple_lock"; 538 539 540 if (!usld_lock_common_checks(l, caller)) 541 return; 542 543 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED)) 544 panic("%s: lock %p became uninitialized", 545 caller, l); 546 if ((l->debug.state & USLOCK_TAKEN)) 547 panic("%s: lock 0x%p became TAKEN by someone else", 548 caller, l); 549 550 mycpu = cpu_number(); 551 l->debug.lock_thread = (void *)current_thread(); 552 l->debug.state |= USLOCK_TAKEN; 553 l->debug.lock_pc = pc; 554 l->debug.lock_cpu = mycpu; 555 556 usl_trace(l, mycpu, pc, caller); 557} 558 559 560/* 561 * Debug checks on a usimple_lock just before 562 * releasing it. Note that the caller has not 563 * yet released the hardware lock. 564 * 565 * Preemption is still disabled, so there's 566 * no problem using cpu_number. 567 */ 568void 569usld_unlock( 570 usimple_lock_t l, 571 pc_t pc) 572{ 573 register int mycpu; 574 char caller[] = "usimple_unlock"; 575 576 577 if (!usld_lock_common_checks(l, caller)) 578 return; 579 580 mycpu = cpu_number(); 581 582 if (!(l->debug.state & USLOCK_TAKEN)) 583 panic("%s: lock 0x%p hasn't been taken", 584 caller, l); 585 if (l->debug.lock_thread != (void *) current_thread()) 586 panic("%s: unlocking lock 0x%p, owned by thread %p", 587 caller, l, l->debug.lock_thread); 588 if (l->debug.lock_cpu != mycpu) { 589 printf("%s: unlocking lock 0x%p on cpu 0x%x", 590 caller, l, mycpu); 591 printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu); 592 panic("%s", caller); 593 } 594 usl_trace(l, mycpu, pc, caller); 595 596 l->debug.unlock_thread = l->debug.lock_thread; 597 l->debug.lock_thread = INVALID_PC; 598 l->debug.state &= ~USLOCK_TAKEN; 599 l->debug.unlock_pc = pc; 600 l->debug.unlock_cpu = mycpu; 601} 602 603 604/* 605 * Debug checks on a usimple_lock just before 606 * attempting to acquire it. 607 * 608 * Preemption isn't guaranteed to be disabled. 609 */ 610void 611usld_lock_try_pre( 612 usimple_lock_t l, 613 pc_t pc) 614{ 615 char caller[] = "usimple_lock_try"; 616 617 if (!usld_lock_common_checks(l, caller)) 618 return; 619 mp_disable_preemption(); 620 usl_trace(l, cpu_number(), pc, caller); 621 mp_enable_preemption(); 622} 623 624 625/* 626 * Debug checks on a usimple_lock just after 627 * successfully attempting to acquire it. 628 * 629 * Preemption has been disabled by the 630 * lock acquisition attempt, so it's safe 631 * to use cpu_number. 632 */ 633void 634usld_lock_try_post( 635 usimple_lock_t l, 636 pc_t pc) 637{ 638 register int mycpu; 639 char caller[] = "successful usimple_lock_try"; 640 641 if (!usld_lock_common_checks(l, caller)) 642 return; 643 644 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED)) 645 panic("%s: lock 0x%p became uninitialized", 646 caller, l); 647 if ((l->debug.state & USLOCK_TAKEN)) 648 panic("%s: lock 0x%p became TAKEN by someone else", 649 caller, l); 650 651 mycpu = cpu_number(); 652 l->debug.lock_thread = (void *) current_thread(); 653 l->debug.state |= USLOCK_TAKEN; 654 l->debug.lock_pc = pc; 655 l->debug.lock_cpu = mycpu; 656 657 usl_trace(l, mycpu, pc, caller); 658} 659 660 661/* 662 * For very special cases, set traced_lock to point to a 663 * specific lock of interest. The result is a series of 664 * XPRs showing lock operations on that lock. The lock_seq 665 * value is used to show the order of those operations. 666 */ 667usimple_lock_t traced_lock; 668unsigned int lock_seq; 669 670void 671usl_trace( 672 usimple_lock_t l, 673 int mycpu, 674 pc_t pc, 675 const char * op_name) 676{ 677 if (traced_lock == l) { 678 XPR(XPR_SLOCK, 679 "seq %d, cpu %d, %s @ %x\n", 680 (uintptr_t) lock_seq, (uintptr_t) mycpu, 681 (uintptr_t) op_name, (uintptr_t) pc, 0); 682 lock_seq++; 683 } 684} 685 686 687#endif /* USLOCK_DEBUG */ 688 689/* 690 * Routine: lock_alloc 691 * Function: 692 * Allocate a lock for external users who cannot 693 * hard-code the structure definition into their 694 * objects. 695 * For now just use kalloc, but a zone is probably 696 * warranted. 697 */ 698lock_t * 699lock_alloc( 700 boolean_t can_sleep, 701 unsigned short tag, 702 unsigned short tag1) 703{ 704 lock_t *l; 705 706 if ((l = (lock_t *)kalloc(sizeof(lock_t))) != 0) 707 lock_init(l, can_sleep, tag, tag1); 708 return(l); 709} 710 711/* 712 * Routine: lock_free 713 * Function: 714 * Free a lock allocated for external users. 715 * For now just use kfree, but a zone is probably 716 * warranted. 717 */ 718void 719lock_free( 720 lock_t *l) 721{ 722 kfree(l, sizeof(lock_t)); 723} 724 725 726/* 727 * Routine: lock_init 728 * Function: 729 * Initialize a lock; required before use. 730 * Note that clients declare the "struct lock" 731 * variables and then initialize them, rather 732 * than getting a new one from this module. 733 */ 734void 735lock_init( 736 lock_t *l, 737 boolean_t can_sleep, 738 __unused unsigned short tag, 739 __unused unsigned short tag1) 740{ 741 hw_lock_byte_init(&l->lck_rw_interlock); 742 l->lck_rw_want_write = FALSE; 743 l->lck_rw_want_upgrade = FALSE; 744 l->lck_rw_shared_count = 0; 745 l->lck_rw_can_sleep = can_sleep; 746 l->lck_rw_tag = tag; 747 l->lck_rw_priv_excl = 1; 748 l->lck_r_waiting = l->lck_w_waiting = 0; 749} 750 751 752/* 753 * Sleep locks. These use the same data structure and algorithm 754 * as the spin locks, but the process sleeps while it is waiting 755 * for the lock. These work on uniprocessor systems. 756 */ 757 758#define DECREMENTER_TIMEOUT 1000000 759 760void 761lock_write( 762 register lock_t * l) 763{ 764 lck_rw_lock_exclusive(l); 765} 766 767void 768lock_done( 769 register lock_t * l) 770{ 771 (void) lck_rw_done(l); 772} 773 774void 775lock_read( 776 register lock_t * l) 777{ 778 lck_rw_lock_shared(l); 779} 780 781 782/* 783 * Routine: lock_read_to_write 784 * Function: 785 * Improves a read-only lock to one with 786 * write permission. If another reader has 787 * already requested an upgrade to a write lock, 788 * no lock is held upon return. 789 * 790 * Returns FALSE if the upgrade *failed*. 791 */ 792 793boolean_t 794lock_read_to_write( 795 register lock_t * l) 796{ 797 return lck_rw_lock_shared_to_exclusive(l); 798} 799 800void 801lock_write_to_read( 802 register lock_t * l) 803{ 804 lck_rw_lock_exclusive_to_shared(l); 805} 806 807 808 809/* 810 * Routine: lck_rw_alloc_init 811 */ 812lck_rw_t * 813lck_rw_alloc_init( 814 lck_grp_t *grp, 815 lck_attr_t *attr) { 816 lck_rw_t *lck; 817 818 if ((lck = (lck_rw_t *)kalloc(sizeof(lck_rw_t))) != 0) { 819 bzero(lck, sizeof(lck_rw_t)); 820 lck_rw_init(lck, grp, attr); 821 } 822 823 return(lck); 824} 825 826/* 827 * Routine: lck_rw_free 828 */ 829void 830lck_rw_free( 831 lck_rw_t *lck, 832 lck_grp_t *grp) { 833 lck_rw_destroy(lck, grp); 834 kfree(lck, sizeof(lck_rw_t)); 835} 836 837/* 838 * Routine: lck_rw_init 839 */ 840void 841lck_rw_init( 842 lck_rw_t *lck, 843 lck_grp_t *grp, 844 lck_attr_t *attr) 845{ 846 lck_attr_t *lck_attr = (attr != LCK_ATTR_NULL) ? 847 attr : &LockDefaultLckAttr; 848 849 hw_lock_byte_init(&lck->lck_rw_interlock); 850 lck->lck_rw_want_write = FALSE; 851 lck->lck_rw_want_upgrade = FALSE; 852 lck->lck_rw_shared_count = 0; 853 lck->lck_rw_can_sleep = TRUE; 854 lck->lck_r_waiting = lck->lck_w_waiting = 0; 855 lck->lck_rw_tag = 0; 856 lck->lck_rw_priv_excl = ((lck_attr->lck_attr_val & 857 LCK_ATTR_RW_SHARED_PRIORITY) == 0); 858 859 lck_grp_reference(grp); 860 lck_grp_lckcnt_incr(grp, LCK_TYPE_RW); 861} 862 863/* 864 * Routine: lck_rw_destroy 865 */ 866void 867lck_rw_destroy( 868 lck_rw_t *lck, 869 lck_grp_t *grp) 870{ 871 if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED) 872 return; 873 lck->lck_rw_tag = LCK_RW_TAG_DESTROYED; 874 lck_grp_lckcnt_decr(grp, LCK_TYPE_RW); 875 lck_grp_deallocate(grp); 876 return; 877} 878 879/* 880 * Sleep locks. These use the same data structure and algorithm 881 * as the spin locks, but the process sleeps while it is waiting 882 * for the lock. These work on uniprocessor systems. 883 */ 884 885#define DECREMENTER_TIMEOUT 1000000 886 887#define RW_LOCK_READER_EVENT(x) \ 888 ((event_t) (((unsigned char*) (x)) + (offsetof(lck_rw_t, lck_rw_tag)))) 889 890#define RW_LOCK_WRITER_EVENT(x) \ 891 ((event_t) (((unsigned char*) (x)) + (offsetof(lck_rw_t, lck_rw_pad8)))) 892 893/* 894 * We disable interrupts while holding the RW interlock to prevent an 895 * interrupt from exacerbating hold time. 896 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock(). 897 */ 898static boolean_t 899lck_interlock_lock(lck_rw_t *lck) 900{ 901 boolean_t istate; 902 903 istate = ml_set_interrupts_enabled(FALSE); 904 hw_lock_byte_lock(&lck->lck_rw_interlock); 905 906 return istate; 907} 908 909static void 910lck_interlock_unlock(lck_rw_t *lck, boolean_t istate) 911{ 912 hw_lock_byte_unlock(&lck->lck_rw_interlock); 913 ml_set_interrupts_enabled(istate); 914} 915 916/* 917 * This inline is used when busy-waiting for an rw lock. 918 * If interrupts were disabled when the lock primitive was called, 919 * we poll the IPI handler for pending tlb flushes. 920 * XXX This is a hack to avoid deadlocking on the pmap_system_lock. 921 */ 922static inline void 923lck_rw_lock_pause(boolean_t interrupts_enabled) 924{ 925 if (!interrupts_enabled) 926 handle_pending_TLB_flushes(); 927 cpu_pause(); 928} 929 930 931/* 932 * compute the deadline to spin against when 933 * waiting for a change of state on a lck_rw_t 934 */ 935static inline uint64_t 936lck_rw_deadline_for_spin(lck_rw_t *lck) 937{ 938 if (lck->lck_rw_can_sleep) { 939 if (lck->lck_r_waiting || lck->lck_w_waiting || lck->lck_rw_shared_count > machine_info.max_cpus) { 940 /* 941 * there are already threads waiting on this lock... this 942 * implies that they have spun beyond their deadlines waiting for 943 * the desired state to show up so we will not bother spinning at this time... 944 * or 945 * the current number of threads sharing this lock exceeds our capacity to run them 946 * concurrently and since all states we're going to spin for require the rw_shared_count 947 * to be at 0, we'll not bother spinning since the latency for this to happen is 948 * unpredictable... 949 */ 950 return (mach_absolute_time()); 951 } 952 return (mach_absolute_time() + MutexSpin); 953 } else 954 return (mach_absolute_time() + (100000LL * 1000000000LL)); 955} 956 957 958/* 959 * Routine: lck_rw_lock_exclusive 960 */ 961void 962lck_rw_lock_exclusive_gen( 963 lck_rw_t *lck) 964{ 965 uint64_t deadline = 0; 966 int slept = 0; 967 int gotlock = 0; 968 int lockheld = 0; 969 wait_result_t res = 0; 970 boolean_t istate = -1; 971 972#if CONFIG_DTRACE 973 boolean_t dtrace_ls_initialized = FALSE; 974 boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled= FALSE; 975 uint64_t wait_interval = 0; 976 int readers_at_sleep = 0; 977#endif 978 979 /* 980 * Try to acquire the lck_rw_want_write bit. 981 */ 982 while ( !lck_rw_grab_want(lck)) { 983 984#if CONFIG_DTRACE 985 if (dtrace_ls_initialized == FALSE) { 986 dtrace_ls_initialized = TRUE; 987 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0); 988 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0); 989 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block; 990 if (dtrace_ls_enabled) { 991 /* 992 * Either sleeping or spinning is happening, 993 * start a timing of our delay interval now. 994 */ 995 readers_at_sleep = lck->lck_rw_shared_count; 996 wait_interval = mach_absolute_time(); 997 } 998 } 999#endif 1000 if (istate == -1) 1001 istate = ml_get_interrupts_enabled(); 1002 1003 deadline = lck_rw_deadline_for_spin(lck); 1004 1005 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0); 1006 1007 while (((gotlock = lck_rw_grab_want(lck)) == 0) && mach_absolute_time() < deadline) 1008 lck_rw_lock_pause(istate); 1009 1010 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END, (int)lck, 0, 0, gotlock, 0); 1011 1012 if (gotlock) 1013 break; 1014 /* 1015 * if we get here, the deadline has expired w/o us 1016 * being able to grab the lock exclusively 1017 * check to see if we're allowed to do a thread_block 1018 */ 1019 if (lck->lck_rw_can_sleep) { 1020 1021 istate = lck_interlock_lock(lck); 1022 1023 if (lck->lck_rw_want_write) { 1024 1025 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0); 1026 1027 lck->lck_w_waiting = TRUE; 1028 1029 res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT); 1030 lck_interlock_unlock(lck, istate); 1031 1032 if (res == THREAD_WAITING) { 1033 res = thread_block(THREAD_CONTINUE_NULL); 1034 slept++; 1035 } 1036 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_END, (int)lck, res, slept, 0, 0); 1037 } else { 1038 lck->lck_rw_want_write = TRUE; 1039 lck_interlock_unlock(lck, istate); 1040 break; 1041 } 1042 } 1043 } 1044 /* 1045 * Wait for readers (and upgrades) to finish... 1046 * the test for these conditions must be done simultaneously with 1047 * a check of the interlock not being held since 1048 * the rw_shared_count will drop to 0 first and then want_upgrade 1049 * will be set to 1 in the shared_to_exclusive scenario... those 1050 * adjustments are done behind the interlock and represent an 1051 * atomic change in state and must be considered as such 1052 * however, once we see the read count at 0, the want_upgrade not set 1053 * and the interlock not held, we are safe to proceed 1054 */ 1055 while (lck_rw_held_read_or_upgrade(lck)) { 1056 1057#if CONFIG_DTRACE 1058 /* 1059 * Either sleeping or spinning is happening, start 1060 * a timing of our delay interval now. If we set it 1061 * to -1 we don't have accurate data so we cannot later 1062 * decide to record a dtrace spin or sleep event. 1063 */ 1064 if (dtrace_ls_initialized == FALSE) { 1065 dtrace_ls_initialized = TRUE; 1066 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0); 1067 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0); 1068 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block; 1069 if (dtrace_ls_enabled) { 1070 /* 1071 * Either sleeping or spinning is happening, 1072 * start a timing of our delay interval now. 1073 */ 1074 readers_at_sleep = lck->lck_rw_shared_count; 1075 wait_interval = mach_absolute_time(); 1076 } 1077 } 1078#endif 1079 if (istate == -1) 1080 istate = ml_get_interrupts_enabled(); 1081 1082 deadline = lck_rw_deadline_for_spin(lck); 1083 1084 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0); 1085 1086 while ((lockheld = lck_rw_held_read_or_upgrade(lck)) && mach_absolute_time() < deadline) 1087 lck_rw_lock_pause(istate); 1088 1089 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, (int)lck, 0, 0, lockheld, 0); 1090 1091 if ( !lockheld) 1092 break; 1093 /* 1094 * if we get here, the deadline has expired w/o us 1095 * being able to grab the lock exclusively 1096 * check to see if we're allowed to do a thread_block 1097 */ 1098 if (lck->lck_rw_can_sleep) { 1099 1100 istate = lck_interlock_lock(lck); 1101 1102 if (lck->lck_rw_shared_count != 0 || lck->lck_rw_want_upgrade) { 1103 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0); 1104 1105 lck->lck_w_waiting = TRUE; 1106 1107 res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT); 1108 lck_interlock_unlock(lck, istate); 1109 1110 if (res == THREAD_WAITING) { 1111 res = thread_block(THREAD_CONTINUE_NULL); 1112 slept++; 1113 } 1114 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_END, (int)lck, res, slept, 0, 0); 1115 } else { 1116 lck_interlock_unlock(lck, istate); 1117 /* 1118 * must own the lock now, since we checked for 1119 * readers or upgrade owner behind the interlock 1120 * no need for a call to 'lck_rw_held_read_or_upgrade' 1121 */ 1122 break; 1123 } 1124 } 1125 } 1126 1127#if CONFIG_DTRACE 1128 /* 1129 * Decide what latencies we suffered that are Dtrace events. 1130 * If we have set wait_interval, then we either spun or slept. 1131 * At least we get out from under the interlock before we record 1132 * which is the best we can do here to minimize the impact 1133 * of the tracing. 1134 * If we have set wait_interval to -1, then dtrace was not enabled when we 1135 * started sleeping/spinning so we don't record this event. 1136 */ 1137 if (dtrace_ls_enabled == TRUE) { 1138 if (slept == 0) { 1139 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_EXCL_SPIN, lck, 1140 mach_absolute_time() - wait_interval, 1); 1141 } else { 1142 /* 1143 * For the blocking case, we also record if when we blocked 1144 * it was held for read or write, and how many readers. 1145 * Notice that above we recorded this before we dropped 1146 * the interlock so the count is accurate. 1147 */ 1148 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_EXCL_BLOCK, lck, 1149 mach_absolute_time() - wait_interval, 1, 1150 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep); 1151 } 1152 } 1153 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lck, 1); 1154#endif 1155} 1156 1157 1158/* 1159 * Routine: lck_rw_done_gen 1160 * 1161 * called from the assembly language wrapper... 1162 * prior_lock_state is the value in the 1st 1163 * word of the lock at the time of a successful 1164 * atomic compare and exchange with the new value... 1165 * it represents the state of the lock before we 1166 * decremented the rw_shared_count or cleared either 1167 * rw_want_upgrade or rw_want_write and 1168 * the lck_x_waiting bits... since the wrapper 1169 * routine has already changed the state atomically, 1170 * we just need to decide if we should 1171 * wake up anyone and what value to return... we do 1172 * this by examining the state of the lock before 1173 * we changed it 1174 */ 1175lck_rw_type_t 1176lck_rw_done_gen( 1177 lck_rw_t *lck, 1178 int prior_lock_state) 1179{ 1180 lck_rw_t *fake_lck; 1181 lck_rw_type_t lock_type; 1182 1183 /* 1184 * prior_lock state is a snapshot of the 1st word of the 1185 * lock in question... we'll fake up a pointer to it 1186 * and carefully not access anything beyond whats defined 1187 * in the first word of a lck_rw_t 1188 */ 1189 fake_lck = (lck_rw_t *)&prior_lock_state; 1190 1191 if (fake_lck->lck_rw_shared_count <= 1) { 1192 if (fake_lck->lck_w_waiting) 1193 thread_wakeup(RW_LOCK_WRITER_EVENT(lck)); 1194 1195 if (!(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting) && fake_lck->lck_r_waiting) 1196 thread_wakeup(RW_LOCK_READER_EVENT(lck)); 1197 } 1198 if (fake_lck->lck_rw_shared_count) 1199 lock_type = LCK_RW_TYPE_SHARED; 1200 else 1201 lock_type = LCK_RW_TYPE_EXCLUSIVE; 1202 1203#if CONFIG_DTRACE 1204 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lock_type == LCK_RW_TYPE_SHARED ? 0 : 1); 1205#endif 1206 1207 return(lock_type); 1208} 1209 1210 1211/* 1212 * Routine: lck_rw_unlock 1213 */ 1214void 1215lck_rw_unlock( 1216 lck_rw_t *lck, 1217 lck_rw_type_t lck_rw_type) 1218{ 1219 if (lck_rw_type == LCK_RW_TYPE_SHARED) 1220 lck_rw_unlock_shared(lck); 1221 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) 1222 lck_rw_unlock_exclusive(lck); 1223 else 1224 panic("lck_rw_unlock(): Invalid RW lock type: %d\n", lck_rw_type); 1225} 1226 1227 1228/* 1229 * Routine: lck_rw_unlock_shared 1230 */ 1231void 1232lck_rw_unlock_shared( 1233 lck_rw_t *lck) 1234{ 1235 lck_rw_type_t ret; 1236 1237 ret = lck_rw_done(lck); 1238 1239 if (ret != LCK_RW_TYPE_SHARED) 1240 panic("lck_rw_unlock(): lock held in mode: %d\n", ret); 1241} 1242 1243 1244/* 1245 * Routine: lck_rw_unlock_exclusive 1246 */ 1247void 1248lck_rw_unlock_exclusive( 1249 lck_rw_t *lck) 1250{ 1251 lck_rw_type_t ret; 1252 1253 ret = lck_rw_done(lck); 1254 1255 if (ret != LCK_RW_TYPE_EXCLUSIVE) 1256 panic("lck_rw_unlock_exclusive(): lock held in mode: %d\n", ret); 1257} 1258 1259 1260/* 1261 * Routine: lck_rw_lock 1262 */ 1263void 1264lck_rw_lock( 1265 lck_rw_t *lck, 1266 lck_rw_type_t lck_rw_type) 1267{ 1268 if (lck_rw_type == LCK_RW_TYPE_SHARED) 1269 lck_rw_lock_shared(lck); 1270 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) 1271 lck_rw_lock_exclusive(lck); 1272 else 1273 panic("lck_rw_lock(): Invalid RW lock type: %x\n", lck_rw_type); 1274} 1275 1276 1277/* 1278 * Routine: lck_rw_lock_shared_gen 1279 * Function: 1280 * assembly fast path code has determined that this lock 1281 * is held exclusively... this is where we spin/block 1282 * until we can acquire the lock in the shared mode 1283 */ 1284void 1285lck_rw_lock_shared_gen( 1286 lck_rw_t *lck) 1287{ 1288 uint64_t deadline = 0; 1289 int gotlock = 0; 1290 int slept = 0; 1291 wait_result_t res = 0; 1292 boolean_t istate = -1; 1293 1294#if CONFIG_DTRACE 1295 uint64_t wait_interval = 0; 1296 int readers_at_sleep = 0; 1297 boolean_t dtrace_ls_initialized = FALSE; 1298 boolean_t dtrace_rwl_shared_spin, dtrace_rwl_shared_block, dtrace_ls_enabled = FALSE; 1299#endif 1300 1301 while ( !lck_rw_grab_shared(lck)) { 1302 1303#if CONFIG_DTRACE 1304 if (dtrace_ls_initialized == FALSE) { 1305 dtrace_ls_initialized = TRUE; 1306 dtrace_rwl_shared_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] != 0); 1307 dtrace_rwl_shared_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK] != 0); 1308 dtrace_ls_enabled = dtrace_rwl_shared_spin || dtrace_rwl_shared_block; 1309 if (dtrace_ls_enabled) { 1310 /* 1311 * Either sleeping or spinning is happening, 1312 * start a timing of our delay interval now. 1313 */ 1314 readers_at_sleep = lck->lck_rw_shared_count; 1315 wait_interval = mach_absolute_time(); 1316 } 1317 } 1318#endif 1319 if (istate == -1) 1320 istate = ml_get_interrupts_enabled(); 1321 1322 deadline = lck_rw_deadline_for_spin(lck); 1323 1324 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_START, 1325 (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0); 1326 1327 while (((gotlock = lck_rw_grab_shared(lck)) == 0) && mach_absolute_time() < deadline) 1328 lck_rw_lock_pause(istate); 1329 1330 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_END, 1331 (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, gotlock, 0); 1332 1333 if (gotlock) 1334 break; 1335 /* 1336 * if we get here, the deadline has expired w/o us 1337 * being able to grab the lock for read 1338 * check to see if we're allowed to do a thread_block 1339 */ 1340 if (lck->lck_rw_can_sleep) { 1341 1342 istate = lck_interlock_lock(lck); 1343 1344 if ((lck->lck_rw_want_write || lck->lck_rw_want_upgrade) && 1345 ((lck->lck_rw_shared_count == 0) || lck->lck_rw_priv_excl)) { 1346 1347 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_START, 1348 (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0); 1349 1350 lck->lck_r_waiting = TRUE; 1351 1352 res = assert_wait(RW_LOCK_READER_EVENT(lck), THREAD_UNINT); 1353 lck_interlock_unlock(lck, istate); 1354 1355 if (res == THREAD_WAITING) { 1356 res = thread_block(THREAD_CONTINUE_NULL); 1357 slept++; 1358 } 1359 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_END, 1360 (int)lck, res, slept, 0, 0); 1361 } else { 1362 lck->lck_rw_shared_count++; 1363 lck_interlock_unlock(lck, istate); 1364 break; 1365 } 1366 } 1367 } 1368 1369#if CONFIG_DTRACE 1370 if (dtrace_ls_enabled == TRUE) { 1371 if (slept == 0) { 1372 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0); 1373 } else { 1374 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_BLOCK, lck, 1375 mach_absolute_time() - wait_interval, 0, 1376 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep); 1377 } 1378 } 1379 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0); 1380#endif 1381} 1382 1383 1384/* 1385 * Routine: lck_rw_lock_shared_to_exclusive_failure 1386 * Function: 1387 * assembly fast path code has already dropped our read 1388 * count and determined that someone else owns 'lck_rw_want_upgrade' 1389 * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting' 1390 * all we need to do here is determine if a wakeup is needed 1391 */ 1392boolean_t 1393lck_rw_lock_shared_to_exclusive_failure( 1394 lck_rw_t *lck, 1395 int prior_lock_state) 1396{ 1397 lck_rw_t *fake_lck; 1398 1399 /* 1400 * prior_lock state is a snapshot of the 1st word of the 1401 * lock in question... we'll fake up a pointer to it 1402 * and carefully not access anything beyond whats defined 1403 * in the first word of a lck_rw_t 1404 */ 1405 fake_lck = (lck_rw_t *)&prior_lock_state; 1406 1407 if (fake_lck->lck_w_waiting && fake_lck->lck_rw_shared_count == 1) { 1408 /* 1409 * Someone else has requested upgrade. 1410 * Since we've released the read lock, wake 1411 * him up if he's blocked waiting 1412 */ 1413 thread_wakeup(RW_LOCK_WRITER_EVENT(lck)); 1414 } 1415 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE, 1416 (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0); 1417 1418 return (FALSE); 1419} 1420 1421 1422/* 1423 * Routine: lck_rw_lock_shared_to_exclusive_failure 1424 * Function: 1425 * assembly fast path code has already dropped our read 1426 * count and successfully acquired 'lck_rw_want_upgrade' 1427 * we just need to wait for the rest of the readers to drain 1428 * and then we can return as the exclusive holder of this lock 1429 */ 1430boolean_t 1431lck_rw_lock_shared_to_exclusive_success( 1432 lck_rw_t *lck) 1433{ 1434 uint64_t deadline = 0; 1435 int slept = 0; 1436 int still_shared = 0; 1437 wait_result_t res; 1438 boolean_t istate = -1; 1439 1440#if CONFIG_DTRACE 1441 uint64_t wait_interval = 0; 1442 int readers_at_sleep = 0; 1443 boolean_t dtrace_ls_initialized = FALSE; 1444 boolean_t dtrace_rwl_shared_to_excl_spin, dtrace_rwl_shared_to_excl_block, dtrace_ls_enabled = FALSE; 1445#endif 1446 1447 while (lck->lck_rw_shared_count != 0) { 1448 1449#if CONFIG_DTRACE 1450 if (dtrace_ls_initialized == FALSE) { 1451 dtrace_ls_initialized = TRUE; 1452 dtrace_rwl_shared_to_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] != 0); 1453 dtrace_rwl_shared_to_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK] != 0); 1454 dtrace_ls_enabled = dtrace_rwl_shared_to_excl_spin || dtrace_rwl_shared_to_excl_block; 1455 if (dtrace_ls_enabled) { 1456 /* 1457 * Either sleeping or spinning is happening, 1458 * start a timing of our delay interval now. 1459 */ 1460 readers_at_sleep = lck->lck_rw_shared_count; 1461 wait_interval = mach_absolute_time(); 1462 } 1463 } 1464#endif 1465 if (istate == -1) 1466 istate = ml_get_interrupts_enabled(); 1467 1468 deadline = lck_rw_deadline_for_spin(lck); 1469 1470 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_START, 1471 (int)lck, lck->lck_rw_shared_count, 0, 0, 0); 1472 1473 while ((still_shared = lck->lck_rw_shared_count) && mach_absolute_time() < deadline) 1474 lck_rw_lock_pause(istate); 1475 1476 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_END, 1477 (int)lck, lck->lck_rw_shared_count, 0, 0, 0); 1478 1479 if ( !still_shared) 1480 break; 1481 /* 1482 * if we get here, the deadline has expired w/o 1483 * the rw_shared_count having drained to 0 1484 * check to see if we're allowed to do a thread_block 1485 */ 1486 if (lck->lck_rw_can_sleep) { 1487 1488 istate = lck_interlock_lock(lck); 1489 1490 if (lck->lck_rw_shared_count != 0) { 1491 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START, 1492 (int)lck, lck->lck_rw_shared_count, 0, 0, 0); 1493 1494 lck->lck_w_waiting = TRUE; 1495 1496 res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT); 1497 lck_interlock_unlock(lck, istate); 1498 1499 if (res == THREAD_WAITING) { 1500 res = thread_block(THREAD_CONTINUE_NULL); 1501 slept++; 1502 } 1503 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_END, 1504 (int)lck, res, slept, 0, 0); 1505 } else { 1506 lck_interlock_unlock(lck, istate); 1507 break; 1508 } 1509 } 1510 } 1511#if CONFIG_DTRACE 1512 /* 1513 * We infer whether we took the sleep/spin path above by checking readers_at_sleep. 1514 */ 1515 if (dtrace_ls_enabled == TRUE) { 1516 if (slept == 0) { 1517 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lck, mach_absolute_time() - wait_interval, 0); 1518 } else { 1519 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lck, 1520 mach_absolute_time() - wait_interval, 1, 1521 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep); 1522 } 1523 } 1524 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lck, 1); 1525#endif 1526 return (TRUE); 1527} 1528 1529 1530/* 1531 * Routine: lck_rw_lock_exclusive_to_shared 1532 * Function: 1533 * assembly fast path has already dropped 1534 * our exclusive state and bumped lck_rw_shared_count 1535 * all we need to do here is determine if anyone 1536 * needs to be awakened. 1537 */ 1538void 1539lck_rw_lock_exclusive_to_shared_gen( 1540 lck_rw_t *lck, 1541 int prior_lock_state) 1542{ 1543 lck_rw_t *fake_lck; 1544 1545 /* 1546 * prior_lock state is a snapshot of the 1st word of the 1547 * lock in question... we'll fake up a pointer to it 1548 * and carefully not access anything beyond whats defined 1549 * in the first word of a lck_rw_t 1550 */ 1551 fake_lck = (lck_rw_t *)&prior_lock_state; 1552 1553 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START, 1554 (int)lck, fake_lck->lck_rw_want_write, fake_lck->lck_rw_want_upgrade, 0, 0); 1555 1556 /* 1557 * don't wake up anyone waiting to take the lock exclusively 1558 * since we hold a read count... when the read count drops to 0, 1559 * the writers will be woken. 1560 * 1561 * wake up any waiting readers if we don't have any writers waiting, 1562 * or the lock is NOT marked as rw_priv_excl (writers have privilege) 1563 */ 1564 if (!(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting) && fake_lck->lck_r_waiting) 1565 thread_wakeup(RW_LOCK_READER_EVENT(lck)); 1566 1567 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END, 1568 (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0); 1569 1570#if CONFIG_DTRACE 1571 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0); 1572#endif 1573} 1574 1575 1576/* 1577 * Routine: lck_rw_try_lock 1578 */ 1579boolean_t 1580lck_rw_try_lock( 1581 lck_rw_t *lck, 1582 lck_rw_type_t lck_rw_type) 1583{ 1584 if (lck_rw_type == LCK_RW_TYPE_SHARED) 1585 return(lck_rw_try_lock_shared(lck)); 1586 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) 1587 return(lck_rw_try_lock_exclusive(lck)); 1588 else 1589 panic("lck_rw_try_lock(): Invalid rw lock type: %x\n", lck_rw_type); 1590 return(FALSE); 1591} 1592 1593 1594void 1595lck_rw_assert( 1596 lck_rw_t *lck, 1597 unsigned int type) 1598{ 1599 switch (type) { 1600 case LCK_RW_ASSERT_SHARED: 1601 if (lck->lck_rw_shared_count != 0) { 1602 return; 1603 } 1604 break; 1605 case LCK_RW_ASSERT_EXCLUSIVE: 1606 if ((lck->lck_rw_want_write || 1607 lck->lck_rw_want_upgrade) && 1608 lck->lck_rw_shared_count == 0) { 1609 return; 1610 } 1611 break; 1612 case LCK_RW_ASSERT_HELD: 1613 if (lck->lck_rw_want_write || 1614 lck->lck_rw_want_upgrade || 1615 lck->lck_rw_shared_count != 0) { 1616 return; 1617 } 1618 break; 1619 default: 1620 break; 1621 } 1622 1623 panic("rw lock (%p) not held (mode=%u), first word %08x\n", lck, type, *(uint32_t *)lck); 1624} 1625 1626#ifdef MUTEX_ZONE 1627extern zone_t lck_mtx_zone; 1628#endif 1629/* 1630 * Routine: lck_mtx_alloc_init 1631 */ 1632lck_mtx_t * 1633lck_mtx_alloc_init( 1634 lck_grp_t *grp, 1635 lck_attr_t *attr) 1636{ 1637 lck_mtx_t *lck; 1638#ifdef MUTEX_ZONE 1639 if ((lck = (lck_mtx_t *)zalloc(lck_mtx_zone)) != 0) 1640 lck_mtx_init(lck, grp, attr); 1641#else 1642 if ((lck = (lck_mtx_t *)kalloc(sizeof(lck_mtx_t))) != 0) 1643 lck_mtx_init(lck, grp, attr); 1644#endif 1645 return(lck); 1646} 1647 1648/* 1649 * Routine: lck_mtx_free 1650 */ 1651void 1652lck_mtx_free( 1653 lck_mtx_t *lck, 1654 lck_grp_t *grp) 1655{ 1656 lck_mtx_destroy(lck, grp); 1657#ifdef MUTEX_ZONE 1658 zfree(lck_mtx_zone, lck); 1659#else 1660 kfree(lck, sizeof(lck_mtx_t)); 1661#endif 1662} 1663 1664/* 1665 * Routine: lck_mtx_ext_init 1666 */ 1667static void 1668lck_mtx_ext_init( 1669 lck_mtx_ext_t *lck, 1670 lck_grp_t *grp, 1671 lck_attr_t *attr) 1672{ 1673 bzero((void *)lck, sizeof(lck_mtx_ext_t)); 1674 1675 if ((attr->lck_attr_val) & LCK_ATTR_DEBUG) { 1676 lck->lck_mtx_deb.type = MUTEX_TAG; 1677 lck->lck_mtx_attr |= LCK_MTX_ATTR_DEBUG; 1678 } 1679 1680 lck->lck_mtx_grp = grp; 1681 1682 if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT) 1683 lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT; 1684 1685 lck->lck_mtx.lck_mtx_is_ext = 1; 1686#if defined(__x86_64__) 1687 lck->lck_mtx.lck_mtx_sw.lck_mtxd.lck_mtxd_pad32 = 0xFFFFFFFF; 1688#endif 1689} 1690 1691/* 1692 * Routine: lck_mtx_init 1693 */ 1694void 1695lck_mtx_init( 1696 lck_mtx_t *lck, 1697 lck_grp_t *grp, 1698 lck_attr_t *attr) 1699{ 1700 lck_mtx_ext_t *lck_ext; 1701 lck_attr_t *lck_attr; 1702 1703 if (attr != LCK_ATTR_NULL) 1704 lck_attr = attr; 1705 else 1706 lck_attr = &LockDefaultLckAttr; 1707 1708 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) { 1709 if ((lck_ext = (lck_mtx_ext_t *)kalloc(sizeof(lck_mtx_ext_t))) != 0) { 1710 lck_mtx_ext_init(lck_ext, grp, lck_attr); 1711 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT; 1712 lck->lck_mtx_ptr = lck_ext; 1713 } 1714 } else { 1715 lck->lck_mtx_owner = 0; 1716 lck->lck_mtx_state = 0; 1717 } 1718#if defined(__x86_64__) 1719 lck->lck_mtx_sw.lck_mtxd.lck_mtxd_pad32 = 0xFFFFFFFF; 1720#endif 1721 lck_grp_reference(grp); 1722 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX); 1723} 1724 1725/* 1726 * Routine: lck_mtx_init_ext 1727 */ 1728void 1729lck_mtx_init_ext( 1730 lck_mtx_t *lck, 1731 lck_mtx_ext_t *lck_ext, 1732 lck_grp_t *grp, 1733 lck_attr_t *attr) 1734{ 1735 lck_attr_t *lck_attr; 1736 1737 if (attr != LCK_ATTR_NULL) 1738 lck_attr = attr; 1739 else 1740 lck_attr = &LockDefaultLckAttr; 1741 1742 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) { 1743 lck_mtx_ext_init(lck_ext, grp, lck_attr); 1744 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT; 1745 lck->lck_mtx_ptr = lck_ext; 1746 } else { 1747 lck->lck_mtx_owner = 0; 1748 lck->lck_mtx_state = 0; 1749 } 1750#if defined(__x86_64__) 1751 lck->lck_mtx_sw.lck_mtxd.lck_mtxd_pad32 = 0xFFFFFFFF; 1752#endif 1753 1754 lck_grp_reference(grp); 1755 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX); 1756} 1757 1758/* 1759 * Routine: lck_mtx_destroy 1760 */ 1761void 1762lck_mtx_destroy( 1763 lck_mtx_t *lck, 1764 lck_grp_t *grp) 1765{ 1766 boolean_t lck_is_indirect; 1767 1768 if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED) 1769 return; 1770 lck_is_indirect = (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT); 1771 1772 lck_mtx_lock_mark_destroyed(lck); 1773 1774 if (lck_is_indirect) 1775 kfree(lck->lck_mtx_ptr, sizeof(lck_mtx_ext_t)); 1776 lck_grp_lckcnt_decr(grp, LCK_TYPE_MTX); 1777 lck_grp_deallocate(grp); 1778 return; 1779} 1780 1781 1782#define LCK_MTX_LCK_WAIT_CODE 0x20 1783#define LCK_MTX_LCK_WAKEUP_CODE 0x21 1784#define LCK_MTX_LCK_SPIN_CODE 0x22 1785#define LCK_MTX_LCK_ACQUIRE_CODE 0x23 1786#define LCK_MTX_LCK_DEMOTE_CODE 0x24 1787 1788 1789/* 1790 * Routine: lck_mtx_unlock_wakeup_x86 1791 * 1792 * Invoked on unlock when there is 1793 * contention (i.e. the assembly routine sees that 1794 * that mutex->lck_mtx_waiters != 0 or 1795 * that mutex->lck_mtx_promoted != 0... 1796 * 1797 * neither the mutex or interlock is held 1798 */ 1799void 1800lck_mtx_unlock_wakeup_x86 ( 1801 lck_mtx_t *mutex, 1802 int prior_lock_state) 1803{ 1804 lck_mtx_t fake_lck; 1805 1806 /* 1807 * prior_lock state is a snapshot of the 2nd word of the 1808 * lock in question... we'll fake up a lock with the bits 1809 * copied into place and carefully not access anything 1810 * beyond whats defined in the second word of a lck_mtx_t 1811 */ 1812 fake_lck.lck_mtx_state = prior_lock_state; 1813 1814 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_START, 1815 mutex, fake_lck.lck_mtx_promoted, fake_lck.lck_mtx_waiters, fake_lck.lck_mtx_pri, 0); 1816 1817 if (__probable(fake_lck.lck_mtx_waiters)) { 1818 1819 if (fake_lck.lck_mtx_waiters > 1) 1820 thread_wakeup_one_with_pri((event_t)(((unsigned int*)mutex)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int)), fake_lck.lck_mtx_pri); 1821 else 1822 thread_wakeup_one((event_t)(((unsigned int*)mutex)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int))); 1823 } 1824 1825 if (__improbable(fake_lck.lck_mtx_promoted)) { 1826 thread_t thread = current_thread(); 1827 1828 1829 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_DEMOTE_CODE) | DBG_FUNC_NONE, 1830 thread_tid(thread), thread->promotions, thread->sched_flags & TH_SFLAG_PROMOTED, 0, 0); 1831 1832 if (thread->promotions > 0) { 1833 spl_t s = splsched(); 1834 1835 thread_lock(thread); 1836 1837 if (--thread->promotions == 0 && (thread->sched_flags & TH_SFLAG_PROMOTED)) { 1838 1839 thread->sched_flags &= ~TH_SFLAG_PROMOTED; 1840 1841 if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) { 1842 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_DEMOTE) | DBG_FUNC_NONE, 1843 thread->sched_pri, DEPRESSPRI, 0, mutex, 0); 1844 1845 set_sched_pri(thread, DEPRESSPRI); 1846 } 1847 else { 1848 if (thread->priority < thread->sched_pri) { 1849 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_DEMOTE) | DBG_FUNC_NONE, 1850 thread->sched_pri, thread->priority, 0, mutex, 0); 1851 1852 SCHED(compute_priority)(thread, FALSE); 1853 } 1854 } 1855 } 1856 thread_unlock(thread); 1857 splx(s); 1858 } 1859 } 1860 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_END, 1861 mutex, 0, mutex->lck_mtx_waiters, 0, 0); 1862} 1863 1864 1865/* 1866 * Routine: lck_mtx_lock_acquire_x86 1867 * 1868 * Invoked on acquiring the mutex when there is 1869 * contention (i.e. the assembly routine sees that 1870 * that mutex->lck_mtx_waiters != 0 or 1871 * thread->was_promoted_on_wakeup != 0)... 1872 * 1873 * mutex is owned... interlock is held... preemption is disabled 1874 */ 1875void 1876lck_mtx_lock_acquire_x86( 1877 lck_mtx_t *mutex) 1878{ 1879 thread_t thread; 1880 integer_t priority; 1881 spl_t s; 1882 1883 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_START, 1884 mutex, thread->was_promoted_on_wakeup, mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0); 1885 1886 if (mutex->lck_mtx_waiters) 1887 priority = mutex->lck_mtx_pri; 1888 else 1889 priority = 0; 1890 1891 thread = (thread_t)mutex->lck_mtx_owner; /* faster then current_thread() */ 1892 1893 if (thread->sched_pri < priority || thread->was_promoted_on_wakeup) { 1894 1895 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROMOTE) | DBG_FUNC_NONE, 1896 thread->sched_pri, priority, thread->was_promoted_on_wakeup, mutex, 0); 1897 1898 s = splsched(); 1899 thread_lock(thread); 1900 1901 if (thread->sched_pri < priority) 1902 set_sched_pri(thread, priority); 1903 1904 if (mutex->lck_mtx_promoted == 0) { 1905 mutex->lck_mtx_promoted = 1; 1906 1907 thread->promotions++; 1908 thread->sched_flags |= TH_SFLAG_PROMOTED; 1909 } 1910 thread->was_promoted_on_wakeup = 0; 1911 1912 thread_unlock(thread); 1913 splx(s); 1914 } 1915 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_END, 1916 mutex, 0, mutex->lck_mtx_waiters, 0, 0); 1917} 1918 1919 1920 1921/* 1922 * Routine: lck_mtx_lock_spinwait_x86 1923 * 1924 * Invoked trying to acquire a mutex when there is contention but 1925 * the holder is running on another processor. We spin for up to a maximum 1926 * time waiting for the lock to be released. 1927 * 1928 * Called with the interlock unlocked. 1929 * returns 0 if mutex acquired 1930 * returns 1 if we spun 1931 * returns 2 if we didn't spin due to the holder not running 1932 */ 1933int 1934lck_mtx_lock_spinwait_x86( 1935 lck_mtx_t *mutex) 1936{ 1937 thread_t holder; 1938 uint64_t deadline; 1939 int retval = 1; 1940 int loopcount = 0; 1941 1942 1943 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START, 1944 mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, 0, 0); 1945 1946 deadline = mach_absolute_time() + MutexSpin; 1947 1948 /* 1949 * Spin while: 1950 * - mutex is locked, and 1951 * - its locked as a spin lock, and 1952 * - owner is running on another processor, and 1953 * - owner (processor) is not idling, and 1954 * - we haven't spun for long enough. 1955 */ 1956 do { 1957 if (__probable(lck_mtx_lock_grab_mutex(mutex))) { 1958 retval = 0; 1959 break; 1960 } 1961 if ((holder = (thread_t) mutex->lck_mtx_owner) != NULL) { 1962 1963 if ( !(holder->machine.specFlags & OnProc) || 1964 (holder->state & TH_IDLE)) { 1965 if (loopcount == 0) 1966 retval = 2; 1967 break; 1968 } 1969 } 1970 cpu_pause(); 1971 1972 loopcount++; 1973 1974 } while (mach_absolute_time() < deadline); 1975 1976 1977#if CONFIG_DTRACE 1978 /* 1979 * We've already kept a count via deadline of how long we spun. 1980 * If dtrace is active, then we compute backwards to decide how 1981 * long we spun. 1982 * 1983 * Note that we record a different probe id depending on whether 1984 * this is a direct or indirect mutex. This allows us to 1985 * penalize only lock groups that have debug/stats enabled 1986 * with dtrace processing if desired. 1987 */ 1988 if (__probable(mutex->lck_mtx_is_ext == 0)) { 1989 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN, mutex, 1990 mach_absolute_time() - (deadline - MutexSpin)); 1991 } else { 1992 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN, mutex, 1993 mach_absolute_time() - (deadline - MutexSpin)); 1994 } 1995 /* The lockstat acquire event is recorded by the assembly code beneath us. */ 1996#endif 1997 1998 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END, 1999 mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, retval, 0); 2000 2001 return retval; 2002} 2003 2004 2005 2006/* 2007 * Routine: lck_mtx_lock_wait_x86 2008 * 2009 * Invoked in order to wait on contention. 2010 * 2011 * Called with the interlock locked and 2012 * preemption disabled... 2013 * returns it unlocked and with preemption enabled 2014 */ 2015void 2016lck_mtx_lock_wait_x86 ( 2017 lck_mtx_t *mutex) 2018{ 2019 thread_t self = current_thread(); 2020 thread_t holder; 2021 integer_t priority; 2022 spl_t s; 2023#if CONFIG_DTRACE 2024 uint64_t sleep_start = 0; 2025 2026 if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) { 2027 sleep_start = mach_absolute_time(); 2028 } 2029#endif 2030 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START, 2031 mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0); 2032 2033 priority = self->sched_pri; 2034 2035 if (priority < self->priority) 2036 priority = self->priority; 2037 if (priority < BASEPRI_DEFAULT) 2038 priority = BASEPRI_DEFAULT; 2039 2040 if (mutex->lck_mtx_waiters == 0 || priority > mutex->lck_mtx_pri) 2041 mutex->lck_mtx_pri = priority; 2042 mutex->lck_mtx_waiters++; 2043 2044 if ( (holder = (thread_t)mutex->lck_mtx_owner) && 2045 holder->sched_pri < mutex->lck_mtx_pri ) { 2046 2047 s = splsched(); 2048 thread_lock(holder); 2049 2050 if (holder->sched_pri < mutex->lck_mtx_pri) { 2051 KERNEL_DEBUG_CONSTANT( 2052 MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROMOTE) | DBG_FUNC_NONE, 2053 holder->sched_pri, priority, thread_tid(holder), mutex, 0); 2054 2055 set_sched_pri(holder, priority); 2056 2057 if (mutex->lck_mtx_promoted == 0) { 2058 holder->promotions++; 2059 holder->sched_flags |= TH_SFLAG_PROMOTED; 2060 2061 mutex->lck_mtx_promoted = 1; 2062 } 2063 } 2064 thread_unlock(holder); 2065 splx(s); 2066 } 2067 assert_wait((event_t)(((unsigned int*)mutex)+((sizeof(lck_mtx_t)-1)/sizeof(unsigned int))), THREAD_UNINT); 2068 2069 lck_mtx_ilk_unlock(mutex); 2070 2071 thread_block(THREAD_CONTINUE_NULL); 2072 2073 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END, 2074 mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0); 2075 2076#if CONFIG_DTRACE 2077 /* 2078 * Record the Dtrace lockstat probe for blocking, block time 2079 * measured from when we were entered. 2080 */ 2081 if (sleep_start) { 2082 if (mutex->lck_mtx_is_ext == 0) { 2083 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, mutex, 2084 mach_absolute_time() - sleep_start); 2085 } else { 2086 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, mutex, 2087 mach_absolute_time() - sleep_start); 2088 } 2089 } 2090#endif 2091} 2092