1/* 2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_COPYRIGHT@ 30 */ 31/* 32 * Mach Operating System 33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56#include <mach_ldebug.h> 57#include <debug.h> 58 59#include <mach/kern_return.h> 60#include <mach/mach_host_server.h> 61#include <mach_debug/lockgroup_info.h> 62 63#include <kern/locks.h> 64#include <kern/misc_protos.h> 65#include <kern/kalloc.h> 66#include <kern/thread.h> 67#include <kern/processor.h> 68#include <kern/sched_prim.h> 69#include <kern/debug.h> 70#include <string.h> 71 72 73#include <sys/kdebug.h> 74 75#if CONFIG_DTRACE 76/* 77 * We need only enough declarations from the BSD-side to be able to 78 * test if our probe is active, and to call __dtrace_probe(). Setting 79 * NEED_DTRACE_DEFS gets a local copy of those definitions pulled in. 80 */ 81#define NEED_DTRACE_DEFS 82#include <../bsd/sys/lockstat.h> 83#endif 84 85#define LCK_MTX_SLEEP_CODE 0 86#define LCK_MTX_SLEEP_DEADLINE_CODE 1 87#define LCK_MTX_LCK_WAIT_CODE 2 88#define LCK_MTX_UNLCK_WAKEUP_CODE 3 89 90 91static queue_head_t lck_grp_queue; 92static unsigned int lck_grp_cnt; 93 94decl_lck_mtx_data(static,lck_grp_lock) 95static lck_mtx_ext_t lck_grp_lock_ext; 96 97lck_grp_attr_t LockDefaultGroupAttr; 98lck_grp_t LockCompatGroup; 99lck_attr_t LockDefaultLckAttr; 100 101/* 102 * Routine: lck_mod_init 103 */ 104 105void 106lck_mod_init( 107 void) 108{ 109 /* 110 * Obtain "lcks" options:this currently controls lock statistics 111 */ 112 if (!PE_parse_boot_argn("lcks", &LcksOpts, sizeof (LcksOpts))) 113 LcksOpts = 0; 114 115 queue_init(&lck_grp_queue); 116 117 /* 118 * Need to bootstrap the LockCompatGroup instead of calling lck_grp_init() here. This avoids 119 * grabbing the lck_grp_lock before it is initialized. 120 */ 121 122 bzero(&LockCompatGroup, sizeof(lck_grp_t)); 123 (void) strncpy(LockCompatGroup.lck_grp_name, "Compatibility APIs", LCK_GRP_MAX_NAME); 124 125 if (LcksOpts & enaLkStat) 126 LockCompatGroup.lck_grp_attr = LCK_GRP_ATTR_STAT; 127 else 128 LockCompatGroup.lck_grp_attr = LCK_ATTR_NONE; 129 130 LockCompatGroup.lck_grp_refcnt = 1; 131 132 enqueue_tail(&lck_grp_queue, (queue_entry_t)&LockCompatGroup); 133 lck_grp_cnt = 1; 134 135 lck_grp_attr_setdefault(&LockDefaultGroupAttr); 136 lck_attr_setdefault(&LockDefaultLckAttr); 137 138 lck_mtx_init_ext(&lck_grp_lock, &lck_grp_lock_ext, &LockCompatGroup, &LockDefaultLckAttr); 139 140} 141 142/* 143 * Routine: lck_grp_attr_alloc_init 144 */ 145 146lck_grp_attr_t * 147lck_grp_attr_alloc_init( 148 void) 149{ 150 lck_grp_attr_t *attr; 151 152 if ((attr = (lck_grp_attr_t *)kalloc(sizeof(lck_grp_attr_t))) != 0) 153 lck_grp_attr_setdefault(attr); 154 155 return(attr); 156} 157 158 159/* 160 * Routine: lck_grp_attr_setdefault 161 */ 162 163void 164lck_grp_attr_setdefault( 165 lck_grp_attr_t *attr) 166{ 167 if (LcksOpts & enaLkStat) 168 attr->grp_attr_val = LCK_GRP_ATTR_STAT; 169 else 170 attr->grp_attr_val = 0; 171} 172 173 174/* 175 * Routine: lck_grp_attr_setstat 176 */ 177 178void 179lck_grp_attr_setstat( 180 lck_grp_attr_t *attr) 181{ 182 (void)hw_atomic_or(&attr->grp_attr_val, LCK_GRP_ATTR_STAT); 183} 184 185 186/* 187 * Routine: lck_grp_attr_free 188 */ 189 190void 191lck_grp_attr_free( 192 lck_grp_attr_t *attr) 193{ 194 kfree(attr, sizeof(lck_grp_attr_t)); 195} 196 197 198/* 199 * Routine: lck_grp_alloc_init 200 */ 201 202lck_grp_t * 203lck_grp_alloc_init( 204 const char* grp_name, 205 lck_grp_attr_t *attr) 206{ 207 lck_grp_t *grp; 208 209 if ((grp = (lck_grp_t *)kalloc(sizeof(lck_grp_t))) != 0) 210 lck_grp_init(grp, grp_name, attr); 211 212 return(grp); 213} 214 215 216/* 217 * Routine: lck_grp_init 218 */ 219 220void 221lck_grp_init( 222 lck_grp_t *grp, 223 const char* grp_name, 224 lck_grp_attr_t *attr) 225{ 226 bzero((void *)grp, sizeof(lck_grp_t)); 227 228 (void) strncpy(grp->lck_grp_name, grp_name, LCK_GRP_MAX_NAME); 229 230 if (attr != LCK_GRP_ATTR_NULL) 231 grp->lck_grp_attr = attr->grp_attr_val; 232 else if (LcksOpts & enaLkStat) 233 grp->lck_grp_attr = LCK_GRP_ATTR_STAT; 234 else 235 grp->lck_grp_attr = LCK_ATTR_NONE; 236 237 grp->lck_grp_refcnt = 1; 238 239 lck_mtx_lock(&lck_grp_lock); 240 enqueue_tail(&lck_grp_queue, (queue_entry_t)grp); 241 lck_grp_cnt++; 242 lck_mtx_unlock(&lck_grp_lock); 243 244} 245 246 247/* 248 * Routine: lck_grp_free 249 */ 250 251void 252lck_grp_free( 253 lck_grp_t *grp) 254{ 255 lck_mtx_lock(&lck_grp_lock); 256 lck_grp_cnt--; 257 (void)remque((queue_entry_t)grp); 258 lck_mtx_unlock(&lck_grp_lock); 259 lck_grp_deallocate(grp); 260} 261 262 263/* 264 * Routine: lck_grp_reference 265 */ 266 267void 268lck_grp_reference( 269 lck_grp_t *grp) 270{ 271 (void)hw_atomic_add(&grp->lck_grp_refcnt, 1); 272} 273 274 275/* 276 * Routine: lck_grp_deallocate 277 */ 278 279void 280lck_grp_deallocate( 281 lck_grp_t *grp) 282{ 283 if (hw_atomic_sub(&grp->lck_grp_refcnt, 1) == 0) 284 kfree(grp, sizeof(lck_grp_t)); 285} 286 287/* 288 * Routine: lck_grp_lckcnt_incr 289 */ 290 291void 292lck_grp_lckcnt_incr( 293 lck_grp_t *grp, 294 lck_type_t lck_type) 295{ 296 unsigned int *lckcnt; 297 298 switch (lck_type) { 299 case LCK_TYPE_SPIN: 300 lckcnt = &grp->lck_grp_spincnt; 301 break; 302 case LCK_TYPE_MTX: 303 lckcnt = &grp->lck_grp_mtxcnt; 304 break; 305 case LCK_TYPE_RW: 306 lckcnt = &grp->lck_grp_rwcnt; 307 break; 308 default: 309 return panic("lck_grp_lckcnt_incr(): invalid lock type: %d\n", lck_type); 310 } 311 312 (void)hw_atomic_add(lckcnt, 1); 313} 314 315/* 316 * Routine: lck_grp_lckcnt_decr 317 */ 318 319void 320lck_grp_lckcnt_decr( 321 lck_grp_t *grp, 322 lck_type_t lck_type) 323{ 324 unsigned int *lckcnt; 325 326 switch (lck_type) { 327 case LCK_TYPE_SPIN: 328 lckcnt = &grp->lck_grp_spincnt; 329 break; 330 case LCK_TYPE_MTX: 331 lckcnt = &grp->lck_grp_mtxcnt; 332 break; 333 case LCK_TYPE_RW: 334 lckcnt = &grp->lck_grp_rwcnt; 335 break; 336 default: 337 return panic("lck_grp_lckcnt_decr(): invalid lock type: %d\n", lck_type); 338 } 339 340 (void)hw_atomic_sub(lckcnt, 1); 341} 342 343/* 344 * Routine: lck_attr_alloc_init 345 */ 346 347lck_attr_t * 348lck_attr_alloc_init( 349 void) 350{ 351 lck_attr_t *attr; 352 353 if ((attr = (lck_attr_t *)kalloc(sizeof(lck_attr_t))) != 0) 354 lck_attr_setdefault(attr); 355 356 return(attr); 357} 358 359 360/* 361 * Routine: lck_attr_setdefault 362 */ 363 364void 365lck_attr_setdefault( 366 lck_attr_t *attr) 367{ 368#if __i386__ || __x86_64__ 369#if !DEBUG 370 if (LcksOpts & enaLkDeb) 371 attr->lck_attr_val = LCK_ATTR_DEBUG; 372 else 373 attr->lck_attr_val = LCK_ATTR_NONE; 374#else 375 attr->lck_attr_val = LCK_ATTR_DEBUG; 376#endif /* !DEBUG */ 377#else 378#error Unknown architecture. 379#endif /* __arm__ */ 380} 381 382 383/* 384 * Routine: lck_attr_setdebug 385 */ 386void 387lck_attr_setdebug( 388 lck_attr_t *attr) 389{ 390 (void)hw_atomic_or(&attr->lck_attr_val, LCK_ATTR_DEBUG); 391} 392 393/* 394 * Routine: lck_attr_setdebug 395 */ 396void 397lck_attr_cleardebug( 398 lck_attr_t *attr) 399{ 400 (void)hw_atomic_and(&attr->lck_attr_val, ~LCK_ATTR_DEBUG); 401} 402 403 404/* 405 * Routine: lck_attr_rw_shared_priority 406 */ 407void 408lck_attr_rw_shared_priority( 409 lck_attr_t *attr) 410{ 411 (void)hw_atomic_or(&attr->lck_attr_val, LCK_ATTR_RW_SHARED_PRIORITY); 412} 413 414 415/* 416 * Routine: lck_attr_free 417 */ 418void 419lck_attr_free( 420 lck_attr_t *attr) 421{ 422 kfree(attr, sizeof(lck_attr_t)); 423} 424 425 426/* 427 * Routine: lck_spin_sleep 428 */ 429wait_result_t 430lck_spin_sleep( 431 lck_spin_t *lck, 432 lck_sleep_action_t lck_sleep_action, 433 event_t event, 434 wait_interrupt_t interruptible) 435{ 436 wait_result_t res; 437 438 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) 439 panic("Invalid lock sleep action %x\n", lck_sleep_action); 440 441 res = assert_wait(event, interruptible); 442 if (res == THREAD_WAITING) { 443 lck_spin_unlock(lck); 444 res = thread_block(THREAD_CONTINUE_NULL); 445 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) 446 lck_spin_lock(lck); 447 } 448 else 449 if (lck_sleep_action & LCK_SLEEP_UNLOCK) 450 lck_spin_unlock(lck); 451 452 return res; 453} 454 455 456/* 457 * Routine: lck_spin_sleep_deadline 458 */ 459wait_result_t 460lck_spin_sleep_deadline( 461 lck_spin_t *lck, 462 lck_sleep_action_t lck_sleep_action, 463 event_t event, 464 wait_interrupt_t interruptible, 465 uint64_t deadline) 466{ 467 wait_result_t res; 468 469 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) 470 panic("Invalid lock sleep action %x\n", lck_sleep_action); 471 472 res = assert_wait_deadline(event, interruptible, deadline); 473 if (res == THREAD_WAITING) { 474 lck_spin_unlock(lck); 475 res = thread_block(THREAD_CONTINUE_NULL); 476 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) 477 lck_spin_lock(lck); 478 } 479 else 480 if (lck_sleep_action & LCK_SLEEP_UNLOCK) 481 lck_spin_unlock(lck); 482 483 return res; 484} 485 486 487/* 488 * Routine: lck_mtx_sleep 489 */ 490wait_result_t 491lck_mtx_sleep( 492 lck_mtx_t *lck, 493 lck_sleep_action_t lck_sleep_action, 494 event_t event, 495 wait_interrupt_t interruptible) 496{ 497 wait_result_t res; 498 thread_t thread = current_thread(); 499 500 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_START, 501 (int)lck, (int)lck_sleep_action, (int)event, (int)interruptible, 0); 502 503 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) 504 panic("Invalid lock sleep action %x\n", lck_sleep_action); 505 506 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) { 507 /* 508 * We overload the RW lock promotion to give us a priority ceiling 509 * during the time that this thread is asleep, so that when it 510 * is re-awakened (and not yet contending on the mutex), it is 511 * runnable at a reasonably high priority. 512 */ 513 thread->rwlock_count++; 514 } 515 516 res = assert_wait(event, interruptible); 517 if (res == THREAD_WAITING) { 518 lck_mtx_unlock(lck); 519 res = thread_block(THREAD_CONTINUE_NULL); 520 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) { 521 if ((lck_sleep_action & LCK_SLEEP_SPIN)) 522 lck_mtx_lock_spin(lck); 523 else 524 lck_mtx_lock(lck); 525 } 526 } 527 else 528 if (lck_sleep_action & LCK_SLEEP_UNLOCK) 529 lck_mtx_unlock(lck); 530 531 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) { 532 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) { 533 /* sched_flags checked without lock, but will be rechecked while clearing */ 534 lck_rw_clear_promotion(thread); 535 } 536 } 537 538 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0); 539 540 return res; 541} 542 543 544/* 545 * Routine: lck_mtx_sleep_deadline 546 */ 547wait_result_t 548lck_mtx_sleep_deadline( 549 lck_mtx_t *lck, 550 lck_sleep_action_t lck_sleep_action, 551 event_t event, 552 wait_interrupt_t interruptible, 553 uint64_t deadline) 554{ 555 wait_result_t res; 556 thread_t thread = current_thread(); 557 558 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_START, 559 (int)lck, (int)lck_sleep_action, (int)event, (int)interruptible, 0); 560 561 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) 562 panic("Invalid lock sleep action %x\n", lck_sleep_action); 563 564 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) { 565 /* 566 * See lck_mtx_sleep(). 567 */ 568 thread->rwlock_count++; 569 } 570 571 res = assert_wait_deadline(event, interruptible, deadline); 572 if (res == THREAD_WAITING) { 573 lck_mtx_unlock(lck); 574 res = thread_block(THREAD_CONTINUE_NULL); 575 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) { 576 if ((lck_sleep_action & LCK_SLEEP_SPIN)) 577 lck_mtx_lock_spin(lck); 578 else 579 lck_mtx_lock(lck); 580 } 581 } 582 else 583 if (lck_sleep_action & LCK_SLEEP_UNLOCK) 584 lck_mtx_unlock(lck); 585 586 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) { 587 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) { 588 /* sched_flags checked without lock, but will be rechecked while clearing */ 589 lck_rw_clear_promotion(thread); 590 } 591 } 592 593 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0); 594 595 return res; 596} 597 598/* 599 * Routine: lck_mtx_lock_wait 600 * 601 * Invoked in order to wait on contention. 602 * 603 * Called with the interlock locked and 604 * returns it unlocked. 605 */ 606void 607lck_mtx_lock_wait ( 608 lck_mtx_t *lck, 609 thread_t holder) 610{ 611 thread_t self = current_thread(); 612 lck_mtx_t *mutex; 613 integer_t priority; 614 spl_t s = splsched(); 615#if CONFIG_DTRACE 616 uint64_t sleep_start = 0; 617 618 if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) { 619 sleep_start = mach_absolute_time(); 620 } 621#endif 622 623 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) 624 mutex = lck; 625 else 626 mutex = &lck->lck_mtx_ptr->lck_mtx; 627 628 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START, (int)lck, (int)holder, 0, 0, 0); 629 630 priority = self->sched_pri; 631 if (priority < self->priority) 632 priority = self->priority; 633 if (priority < BASEPRI_DEFAULT) 634 priority = BASEPRI_DEFAULT; 635 636 /* Do not promote past promotion ceiling */ 637 priority = MIN(priority, MAXPRI_PROMOTE); 638 639 thread_lock(holder); 640 if (mutex->lck_mtx_pri == 0) 641 holder->promotions++; 642 holder->sched_flags |= TH_SFLAG_PROMOTED; 643 if ( mutex->lck_mtx_pri < priority && 644 holder->sched_pri < priority ) { 645 KERNEL_DEBUG_CONSTANT( 646 MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE, 647 holder->sched_pri, priority, holder, lck, 0); 648 set_sched_pri(holder, priority); 649 } 650 thread_unlock(holder); 651 splx(s); 652 653 if (mutex->lck_mtx_pri < priority) 654 mutex->lck_mtx_pri = priority; 655 if (self->pending_promoter[self->pending_promoter_index] == NULL) { 656 self->pending_promoter[self->pending_promoter_index] = mutex; 657 mutex->lck_mtx_waiters++; 658 } 659 else 660 if (self->pending_promoter[self->pending_promoter_index] != mutex) { 661 self->pending_promoter[++self->pending_promoter_index] = mutex; 662 mutex->lck_mtx_waiters++; 663 } 664 665 assert_wait((event_t)(((unsigned int*)lck)+((sizeof(lck_mtx_t)-1)/sizeof(unsigned int))), THREAD_UNINT); 666 lck_mtx_ilk_unlock(mutex); 667 668 thread_block(THREAD_CONTINUE_NULL); 669 670 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0); 671#if CONFIG_DTRACE 672 /* 673 * Record the Dtrace lockstat probe for blocking, block time 674 * measured from when we were entered. 675 */ 676 if (sleep_start) { 677 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) { 678 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, lck, 679 mach_absolute_time() - sleep_start); 680 } else { 681 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, lck, 682 mach_absolute_time() - sleep_start); 683 } 684 } 685#endif 686} 687 688/* 689 * Routine: lck_mtx_lock_acquire 690 * 691 * Invoked on acquiring the mutex when there is 692 * contention. 693 * 694 * Returns the current number of waiters. 695 * 696 * Called with the interlock locked. 697 */ 698int 699lck_mtx_lock_acquire( 700 lck_mtx_t *lck) 701{ 702 thread_t thread = current_thread(); 703 lck_mtx_t *mutex; 704 705 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) 706 mutex = lck; 707 else 708 mutex = &lck->lck_mtx_ptr->lck_mtx; 709 710 if (thread->pending_promoter[thread->pending_promoter_index] == mutex) { 711 thread->pending_promoter[thread->pending_promoter_index] = NULL; 712 if (thread->pending_promoter_index > 0) 713 thread->pending_promoter_index--; 714 mutex->lck_mtx_waiters--; 715 } 716 717 if (mutex->lck_mtx_waiters > 0) { 718 integer_t priority = mutex->lck_mtx_pri; 719 spl_t s = splsched(); 720 721 thread_lock(thread); 722 thread->promotions++; 723 thread->sched_flags |= TH_SFLAG_PROMOTED; 724 if (thread->sched_pri < priority) { 725 KERNEL_DEBUG_CONSTANT( 726 MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE, 727 thread->sched_pri, priority, 0, lck, 0); 728 /* Do not promote past promotion ceiling */ 729 assert(priority <= MAXPRI_PROMOTE); 730 set_sched_pri(thread, priority); 731 } 732 thread_unlock(thread); 733 splx(s); 734 } 735 else 736 mutex->lck_mtx_pri = 0; 737 738#if CONFIG_DTRACE 739 if (lockstat_probemap[LS_LCK_MTX_LOCK_ACQUIRE] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_ACQUIRE]) { 740 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) { 741 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lck, 0); 742 } else { 743 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_ACQUIRE, lck, 0); 744 } 745 } 746#endif 747 return (mutex->lck_mtx_waiters); 748} 749 750/* 751 * Routine: lck_mtx_unlock_wakeup 752 * 753 * Invoked on unlock when there is contention. 754 * 755 * Called with the interlock locked. 756 */ 757void 758lck_mtx_unlock_wakeup ( 759 lck_mtx_t *lck, 760 thread_t holder) 761{ 762 thread_t thread = current_thread(); 763 lck_mtx_t *mutex; 764 765 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) 766 mutex = lck; 767 else 768 mutex = &lck->lck_mtx_ptr->lck_mtx; 769 770 if (thread != holder) 771 panic("lck_mtx_unlock_wakeup: mutex %p holder %p\n", mutex, holder); 772 773 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_START, (int)lck, (int)holder, 0, 0, 0); 774 775 assert(mutex->lck_mtx_waiters > 0); 776 thread_wakeup_one((event_t)(((unsigned int*)lck)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int))); 777 778 if (thread->promotions > 0) { 779 spl_t s = splsched(); 780 781 thread_lock(thread); 782 if ( --thread->promotions == 0 && 783 (thread->sched_flags & TH_SFLAG_PROMOTED) ) { 784 thread->sched_flags &= ~TH_SFLAG_PROMOTED; 785 786 if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) { 787 /* Thread still has a RW lock promotion */ 788 } else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) { 789 KERNEL_DEBUG_CONSTANT( 790 MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE, 791 thread->sched_pri, DEPRESSPRI, 0, lck, 0); 792 793 set_sched_pri(thread, DEPRESSPRI); 794 } 795 else { 796 if (thread->priority < thread->sched_pri) { 797 KERNEL_DEBUG_CONSTANT( 798 MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | 799 DBG_FUNC_NONE, 800 thread->sched_pri, thread->priority, 801 0, lck, 0); 802 } 803 804 SCHED(compute_priority)(thread, FALSE); 805 } 806 } 807 thread_unlock(thread); 808 splx(s); 809 } 810 811 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0); 812} 813 814void 815lck_mtx_unlockspin_wakeup ( 816 lck_mtx_t *lck) 817{ 818 assert(lck->lck_mtx_waiters > 0); 819 thread_wakeup_one((event_t)(((unsigned int*)lck)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int))); 820 821 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_NONE, (int)lck, 0, 0, 1, 0); 822#if CONFIG_DTRACE 823 /* 824 * When there are waiters, we skip the hot-patch spot in the 825 * fastpath, so we record it here. 826 */ 827 LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lck, 0); 828#endif 829} 830 831 832/* 833 * Routine: mutex_pause 834 * 835 * Called by former callers of simple_lock_pause(). 836 */ 837#define MAX_COLLISION_COUNTS 32 838#define MAX_COLLISION 8 839 840unsigned int max_collision_count[MAX_COLLISION_COUNTS]; 841 842uint32_t collision_backoffs[MAX_COLLISION] = { 843 10, 50, 100, 200, 400, 600, 800, 1000 844}; 845 846 847void 848mutex_pause(uint32_t collisions) 849{ 850 wait_result_t wait_result; 851 uint32_t back_off; 852 853 if (collisions >= MAX_COLLISION_COUNTS) 854 collisions = MAX_COLLISION_COUNTS - 1; 855 max_collision_count[collisions]++; 856 857 if (collisions >= MAX_COLLISION) 858 collisions = MAX_COLLISION - 1; 859 back_off = collision_backoffs[collisions]; 860 861 wait_result = assert_wait_timeout((event_t)mutex_pause, THREAD_UNINT, back_off, NSEC_PER_USEC); 862 assert(wait_result == THREAD_WAITING); 863 864 wait_result = thread_block(THREAD_CONTINUE_NULL); 865 assert(wait_result == THREAD_TIMED_OUT); 866} 867 868 869unsigned int mutex_yield_wait = 0; 870unsigned int mutex_yield_no_wait = 0; 871 872void 873lck_mtx_yield( 874 lck_mtx_t *lck) 875{ 876 int waiters; 877 878#if DEBUG 879 lck_mtx_assert(lck, LCK_MTX_ASSERT_OWNED); 880#endif /* DEBUG */ 881 882 if (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT) 883 waiters = lck->lck_mtx_ptr->lck_mtx.lck_mtx_waiters; 884 else 885 waiters = lck->lck_mtx_waiters; 886 887 if ( !waiters) { 888 mutex_yield_no_wait++; 889 } else { 890 mutex_yield_wait++; 891 lck_mtx_unlock(lck); 892 mutex_pause(0); 893 lck_mtx_lock(lck); 894 } 895} 896 897 898/* 899 * Routine: lck_rw_sleep 900 */ 901wait_result_t 902lck_rw_sleep( 903 lck_rw_t *lck, 904 lck_sleep_action_t lck_sleep_action, 905 event_t event, 906 wait_interrupt_t interruptible) 907{ 908 wait_result_t res; 909 lck_rw_type_t lck_rw_type; 910 thread_t thread = current_thread(); 911 912 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) 913 panic("Invalid lock sleep action %x\n", lck_sleep_action); 914 915 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) { 916 /* 917 * Although we are dropping the RW lock, the intent in most cases 918 * is that this thread remains as an observer, since it may hold 919 * some secondary resource, but must yield to avoid deadlock. In 920 * this situation, make sure that the thread is boosted to the 921 * RW lock ceiling while blocked, so that it can re-acquire the 922 * RW lock at that priority. 923 */ 924 thread->rwlock_count++; 925 } 926 927 res = assert_wait(event, interruptible); 928 if (res == THREAD_WAITING) { 929 lck_rw_type = lck_rw_done(lck); 930 res = thread_block(THREAD_CONTINUE_NULL); 931 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) { 932 if (!(lck_sleep_action & (LCK_SLEEP_SHARED|LCK_SLEEP_EXCLUSIVE))) 933 lck_rw_lock(lck, lck_rw_type); 934 else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) 935 lck_rw_lock_exclusive(lck); 936 else 937 lck_rw_lock_shared(lck); 938 } 939 } 940 else 941 if (lck_sleep_action & LCK_SLEEP_UNLOCK) 942 (void)lck_rw_done(lck); 943 944 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) { 945 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) { 946 /* sched_flags checked without lock, but will be rechecked while clearing */ 947 948 /* Only if the caller wanted the lck_rw_t returned unlocked should we drop to 0 */ 949 assert(lck_sleep_action & LCK_SLEEP_UNLOCK); 950 951 lck_rw_clear_promotion(thread); 952 } 953 } 954 955 return res; 956} 957 958 959/* 960 * Routine: lck_rw_sleep_deadline 961 */ 962wait_result_t 963lck_rw_sleep_deadline( 964 lck_rw_t *lck, 965 lck_sleep_action_t lck_sleep_action, 966 event_t event, 967 wait_interrupt_t interruptible, 968 uint64_t deadline) 969{ 970 wait_result_t res; 971 lck_rw_type_t lck_rw_type; 972 thread_t thread = current_thread(); 973 974 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) 975 panic("Invalid lock sleep action %x\n", lck_sleep_action); 976 977 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) { 978 thread->rwlock_count++; 979 } 980 981 res = assert_wait_deadline(event, interruptible, deadline); 982 if (res == THREAD_WAITING) { 983 lck_rw_type = lck_rw_done(lck); 984 res = thread_block(THREAD_CONTINUE_NULL); 985 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) { 986 if (!(lck_sleep_action & (LCK_SLEEP_SHARED|LCK_SLEEP_EXCLUSIVE))) 987 lck_rw_lock(lck, lck_rw_type); 988 else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) 989 lck_rw_lock_exclusive(lck); 990 else 991 lck_rw_lock_shared(lck); 992 } 993 } 994 else 995 if (lck_sleep_action & LCK_SLEEP_UNLOCK) 996 (void)lck_rw_done(lck); 997 998 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) { 999 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) { 1000 /* sched_flags checked without lock, but will be rechecked while clearing */ 1001 1002 /* Only if the caller wanted the lck_rw_t returned unlocked should we drop to 0 */ 1003 assert(lck_sleep_action & LCK_SLEEP_UNLOCK); 1004 1005 lck_rw_clear_promotion(thread); 1006 } 1007 } 1008 1009 return res; 1010} 1011 1012/* 1013 * Reader-writer lock promotion 1014 * 1015 * We support a limited form of reader-writer 1016 * lock promotion whose effects are: 1017 * 1018 * * Qualifying threads have decay disabled 1019 * * Scheduler priority is reset to a floor of 1020 * of their statically assigned priority 1021 * or BASEPRI_BACKGROUND 1022 * 1023 * The rationale is that lck_rw_ts do not have 1024 * a single owner, so we cannot apply a directed 1025 * priority boost from all waiting threads 1026 * to all holding threads without maintaining 1027 * lists of all shared owners and all waiting 1028 * threads for every lock. 1029 * 1030 * Instead (and to preserve the uncontended fast- 1031 * path), acquiring (or attempting to acquire) 1032 * a RW lock in shared or exclusive lock increments 1033 * a per-thread counter. Only if that thread stops 1034 * making forward progress (for instance blocking 1035 * on a mutex, or being preempted) do we consult 1036 * the counter and apply the priority floor. 1037 * When the thread becomes runnable again (or in 1038 * the case of preemption it never stopped being 1039 * runnable), it has the priority boost and should 1040 * be in a good position to run on the CPU and 1041 * release all RW locks (at which point the priority 1042 * boost is cleared). 1043 * 1044 * Care must be taken to ensure that priority 1045 * boosts are not retained indefinitely, since unlike 1046 * mutex priority boosts (where the boost is tied 1047 * to the mutex lifecycle), the boost is tied 1048 * to the thread and independent of any particular 1049 * lck_rw_t. Assertions are in place on return 1050 * to userspace so that the boost is not held 1051 * indefinitely. 1052 * 1053 * The routines that increment/decrement the 1054 * per-thread counter should err on the side of 1055 * incrementing any time a preemption is possible 1056 * and the lock would be visible to the rest of the 1057 * system as held (so it should be incremented before 1058 * interlocks are dropped/preemption is enabled, or 1059 * before a CAS is executed to acquire the lock). 1060 * 1061 */ 1062 1063/* 1064 * lck_rw_clear_promotion: Undo priority promotions when the last RW 1065 * lock is released by a thread (if a promotion was active) 1066 */ 1067void lck_rw_clear_promotion(thread_t thread) 1068{ 1069 assert(thread->rwlock_count == 0); 1070 1071 /* Cancel any promotions if the thread had actually blocked while holding a RW lock */ 1072 spl_t s = splsched(); 1073 1074 thread_lock(thread); 1075 1076 if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) { 1077 thread->sched_flags &= ~TH_SFLAG_RW_PROMOTED; 1078 1079 if (thread->sched_flags & TH_SFLAG_PROMOTED) { 1080 /* Thread still has a mutex promotion */ 1081 } else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) { 1082 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_DEMOTE) | DBG_FUNC_NONE, 1083 thread->sched_pri, DEPRESSPRI, 0, 0, 0); 1084 1085 set_sched_pri(thread, DEPRESSPRI); 1086 } else { 1087 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_DEMOTE) | DBG_FUNC_NONE, 1088 thread->sched_pri, thread->priority, 0, 0, 0); 1089 1090 SCHED(compute_priority)(thread, FALSE); 1091 } 1092 } 1093 1094 thread_unlock(thread); 1095 splx(s); 1096} 1097 1098kern_return_t 1099host_lockgroup_info( 1100 host_t host, 1101 lockgroup_info_array_t *lockgroup_infop, 1102 mach_msg_type_number_t *lockgroup_infoCntp) 1103{ 1104 lockgroup_info_t *lockgroup_info_base; 1105 lockgroup_info_t *lockgroup_info; 1106 vm_offset_t lockgroup_info_addr; 1107 vm_size_t lockgroup_info_size; 1108 lck_grp_t *lck_grp; 1109 unsigned int i; 1110 vm_size_t used; 1111 vm_map_copy_t copy; 1112 kern_return_t kr; 1113 1114 if (host == HOST_NULL) 1115 return KERN_INVALID_HOST; 1116 1117 lck_mtx_lock(&lck_grp_lock); 1118 1119 lockgroup_info_size = round_page(lck_grp_cnt * sizeof *lockgroup_info); 1120 kr = kmem_alloc_pageable(ipc_kernel_map, 1121 &lockgroup_info_addr, lockgroup_info_size); 1122 if (kr != KERN_SUCCESS) { 1123 lck_mtx_unlock(&lck_grp_lock); 1124 return(kr); 1125 } 1126 1127 lockgroup_info_base = (lockgroup_info_t *) lockgroup_info_addr; 1128 lck_grp = (lck_grp_t *)queue_first(&lck_grp_queue); 1129 lockgroup_info = lockgroup_info_base; 1130 1131 for (i = 0; i < lck_grp_cnt; i++) { 1132 1133 lockgroup_info->lock_spin_cnt = lck_grp->lck_grp_spincnt; 1134 lockgroup_info->lock_spin_util_cnt = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_util_cnt; 1135 lockgroup_info->lock_spin_held_cnt = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_held_cnt; 1136 lockgroup_info->lock_spin_miss_cnt = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_miss_cnt; 1137 lockgroup_info->lock_spin_held_max = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_held_max; 1138 lockgroup_info->lock_spin_held_cum = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_held_cum; 1139 1140 lockgroup_info->lock_mtx_cnt = lck_grp->lck_grp_mtxcnt; 1141 lockgroup_info->lock_mtx_util_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_util_cnt; 1142 lockgroup_info->lock_mtx_held_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_held_cnt; 1143 lockgroup_info->lock_mtx_miss_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_miss_cnt; 1144 lockgroup_info->lock_mtx_wait_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_cnt; 1145 lockgroup_info->lock_mtx_held_max = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_held_max; 1146 lockgroup_info->lock_mtx_held_cum = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_held_cum; 1147 lockgroup_info->lock_mtx_wait_max = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_max; 1148 lockgroup_info->lock_mtx_wait_cum = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_cum; 1149 1150 lockgroup_info->lock_rw_cnt = lck_grp->lck_grp_rwcnt; 1151 lockgroup_info->lock_rw_util_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_util_cnt; 1152 lockgroup_info->lock_rw_held_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_held_cnt; 1153 lockgroup_info->lock_rw_miss_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_miss_cnt; 1154 lockgroup_info->lock_rw_wait_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cnt; 1155 lockgroup_info->lock_rw_held_max = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_held_max; 1156 lockgroup_info->lock_rw_held_cum = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_held_cum; 1157 lockgroup_info->lock_rw_wait_max = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_max; 1158 lockgroup_info->lock_rw_wait_cum = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cum; 1159 1160 (void) strncpy(lockgroup_info->lockgroup_name,lck_grp->lck_grp_name, LOCKGROUP_MAX_NAME); 1161 1162 lck_grp = (lck_grp_t *)(queue_next((queue_entry_t)(lck_grp))); 1163 lockgroup_info++; 1164 } 1165 1166 *lockgroup_infoCntp = lck_grp_cnt; 1167 lck_mtx_unlock(&lck_grp_lock); 1168 1169 used = (*lockgroup_infoCntp) * sizeof *lockgroup_info; 1170 1171 if (used != lockgroup_info_size) 1172 bzero((char *) lockgroup_info, lockgroup_info_size - used); 1173 1174 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)lockgroup_info_addr, 1175 (vm_map_size_t)lockgroup_info_size, TRUE, ©); 1176 assert(kr == KERN_SUCCESS); 1177 1178 *lockgroup_infop = (lockgroup_info_t *) copy; 1179 1180 return(KERN_SUCCESS); 1181} 1182 1183