1/* 2 * Copyright (c) 1993-2008 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * Timer interrupt callout module. 30 */ 31 32#include <mach/mach_types.h> 33 34#include <kern/clock.h> 35#include <kern/processor.h> 36#include <kern/timer_call.h> 37#include <kern/timer_queue.h> 38#include <kern/call_entry.h> 39#include <kern/thread.h> 40 41#include <sys/kdebug.h> 42 43#if CONFIG_DTRACE 44#include <mach/sdt.h> 45#endif 46 47 48#if DEBUG 49#define TIMER_ASSERT 1 50#endif 51 52//#define TIMER_ASSERT 1 53//#define TIMER_DBG 1 54 55#if TIMER_DBG 56#define DBG(x...) kprintf("DBG: " x); 57#else 58#define DBG(x...) 59#endif 60 61#if TIMER_TRACE 62#define TIMER_KDEBUG_TRACE KERNEL_DEBUG_CONSTANT_IST 63#else 64#define TIMER_KDEBUG_TRACE(x...) 65#endif 66 67 68lck_grp_t timer_call_lck_grp; 69lck_attr_t timer_call_lck_attr; 70lck_grp_attr_t timer_call_lck_grp_attr; 71 72lck_grp_t timer_longterm_lck_grp; 73lck_attr_t timer_longterm_lck_attr; 74lck_grp_attr_t timer_longterm_lck_grp_attr; 75 76 77#define timer_queue_lock_spin(queue) \ 78 lck_mtx_lock_spin_always(&queue->lock_data) 79 80#define timer_queue_unlock(queue) \ 81 lck_mtx_unlock_always(&queue->lock_data) 82 83 84#define QUEUE(x) ((queue_t)(x)) 85#define MPQUEUE(x) ((mpqueue_head_t *)(x)) 86#define TIMER_CALL(x) ((timer_call_t)(x)) 87 88/* 89 * The longterm timer object is a global structure holding all timers 90 * beyond the short-term, local timer queue threshold. The boot processor 91 * is responsible for moving each timer to its local timer queue 92 * if and when that timer becomes due within the threshold. 93 */ 94#define TIMER_LONGTERM_NONE EndOfAllTime 95#if defined(__x86_64__) 96#define TIMER_LONGTERM_THRESHOLD (1ULL * NSEC_PER_SEC) 97#else 98#define TIMER_LONGTERM_THRESHOLD TIMER_LONGTERM_NONE 99#endif 100 101typedef struct { 102 uint64_t interval; /* longterm timer interval */ 103 uint64_t margin; /* fudge factor (10% of interval */ 104 uint64_t deadline; /* first/soonest longterm deadline */ 105 uint64_t preempted; /* sooner timer has pre-empted */ 106 timer_call_t call; /* first/soonest longterm timer call */ 107 uint64_t deadline_set; /* next timer set */ 108 timer_call_data_t timer; /* timer used by threshold management */ 109 /* Stats: */ 110 uint64_t scans; /* num threshold timer scans */ 111 uint64_t preempts; /* num threshold reductions */ 112 uint64_t latency; /* average threshold latency */ 113 uint64_t latency_min; /* minimum threshold latency */ 114 uint64_t latency_max; /* maximum threshold latency */ 115} threshold_t; 116 117typedef struct { 118 mpqueue_head_t queue; /* longterm timer list */ 119 uint64_t enqueues; /* num timers queued */ 120 uint64_t dequeues; /* num timers dequeued */ 121 uint64_t escalates; /* num timers becoming shortterm */ 122 uint64_t scan_time; /* last time the list was scanned */ 123 threshold_t threshold; /* longterm timer threshold */ 124} timer_longterm_t; 125 126timer_longterm_t timer_longterm; 127 128static mpqueue_head_t *timer_longterm_queue = NULL; 129 130static void timer_longterm_init(void); 131static void timer_longterm_callout( 132 timer_call_param_t p0, 133 timer_call_param_t p1); 134extern void timer_longterm_scan( 135 timer_longterm_t *tlp, 136 uint64_t now); 137static void timer_longterm_update( 138 timer_longterm_t *tlp); 139static void timer_longterm_update_locked( 140 timer_longterm_t *tlp); 141static mpqueue_head_t * timer_longterm_enqueue_unlocked( 142 timer_call_t call, 143 uint64_t now, 144 uint64_t deadline, 145 mpqueue_head_t ** old_queue); 146static void timer_longterm_dequeued_locked( 147 timer_call_t call); 148 149uint64_t past_deadline_timers; 150uint64_t past_deadline_deltas; 151uint64_t past_deadline_longest; 152uint64_t past_deadline_shortest = ~0ULL; 153enum {PAST_DEADLINE_TIMER_ADJUSTMENT_NS = 10 * 1000}; 154 155uint64_t past_deadline_timer_adjustment; 156 157static boolean_t timer_call_enter_internal(timer_call_t call, timer_call_param_t param1, uint64_t deadline, uint64_t leeway, uint32_t flags, boolean_t ratelimited); 158boolean_t mach_timer_coalescing_enabled = TRUE; 159 160mpqueue_head_t *timer_call_enqueue_deadline_unlocked( 161 timer_call_t call, 162 mpqueue_head_t *queue, 163 uint64_t deadline); 164 165mpqueue_head_t *timer_call_dequeue_unlocked( 166 timer_call_t call); 167 168 169void 170timer_call_init(void) 171{ 172 lck_attr_setdefault(&timer_call_lck_attr); 173 lck_grp_attr_setdefault(&timer_call_lck_grp_attr); 174 lck_grp_init(&timer_call_lck_grp, "timer_call", &timer_call_lck_grp_attr); 175 nanotime_to_absolutetime(0, PAST_DEADLINE_TIMER_ADJUSTMENT_NS, &past_deadline_timer_adjustment); 176 177 timer_longterm_init(); 178} 179 180 181void 182timer_call_queue_init(mpqueue_head_t *queue) 183{ 184 DBG("timer_call_queue_init(%p)\n", queue); 185 mpqueue_init(queue, &timer_call_lck_grp, &timer_call_lck_attr); 186} 187 188 189void 190timer_call_setup( 191 timer_call_t call, 192 timer_call_func_t func, 193 timer_call_param_t param0) 194{ 195 DBG("timer_call_setup(%p,%p,%p)\n", call, func, param0); 196 call_entry_setup(CE(call), func, param0); 197 simple_lock_init(&(call)->lock, 0); 198 call->async_dequeue = FALSE; 199} 200 201/* 202 * Timer call entry locking model 203 * ============================== 204 * 205 * Timer call entries are linked on per-cpu timer queues which are protected 206 * by the queue lock and the call entry lock. The locking protocol is: 207 * 208 * 0) The canonical locking order is timer call entry followed by queue. 209 * 210 * 1) With only the entry lock held, entry.queue is valid: 211 * 1a) NULL: the entry is not queued, or 212 * 1b) non-NULL: this queue must be locked before the entry is modified. 213 * After locking the queue, the call.async_dequeue flag must be checked: 214 * 1c) TRUE: the entry was removed from the queue by another thread 215 * and we must NULL the entry.queue and reset this flag, or 216 * 1d) FALSE: (ie. queued), the entry can be manipulated. 217 * 218 * 2) If a queue lock is obtained first, the queue is stable: 219 * 2a) If a try-lock of a queued entry succeeds, the call can be operated on 220 * and dequeued. 221 * 2b) If a try-lock fails, it indicates that another thread is attempting 222 * to change the entry and move it to a different position in this queue 223 * or to different queue. The entry can be dequeued but it should not be 224 * operated upon since it is being changed. Furthermore, we don't null 225 * the entry.queue pointer (protected by the entry lock we don't own). 226 * Instead, we set the async_dequeue flag -- see (1c). 227 * 2c) Same as 2b but occurring when a longterm timer is matured. 228 */ 229 230/* 231 * Inlines timer_call_entry_dequeue() and timer_call_entry_enqueue_deadline() 232 * cast between pointer types (mpqueue_head_t *) and (queue_t) so that 233 * we can use the call_entry_dequeue() and call_entry_enqueue_deadline() 234 * methods to operate on timer_call structs as if they are call_entry structs. 235 * These structures are identical except for their queue head pointer fields. 236 * 237 * In the debug case, we assert that the timer call locking protocol 238 * is being obeyed. 239 */ 240#if TIMER_ASSERT 241static __inline__ mpqueue_head_t * 242timer_call_entry_dequeue( 243 timer_call_t entry) 244{ 245 mpqueue_head_t *old_queue = MPQUEUE(CE(entry)->queue); 246 247 if (!hw_lock_held((hw_lock_t)&entry->lock)) 248 panic("_call_entry_dequeue() " 249 "entry %p is not locked\n", entry); 250 /* 251 * XXX The queue lock is actually a mutex in spin mode 252 * but there's no way to test for it being held 253 * so we pretend it's a spinlock! 254 */ 255 if (!hw_lock_held((hw_lock_t)&old_queue->lock_data)) 256 panic("_call_entry_dequeue() " 257 "queue %p is not locked\n", old_queue); 258 259 call_entry_dequeue(CE(entry)); 260 old_queue->count--; 261 262 return (old_queue); 263} 264 265static __inline__ mpqueue_head_t * 266timer_call_entry_enqueue_deadline( 267 timer_call_t entry, 268 mpqueue_head_t *queue, 269 uint64_t deadline) 270{ 271 mpqueue_head_t *old_queue = MPQUEUE(CE(entry)->queue); 272 273 if (!hw_lock_held((hw_lock_t)&entry->lock)) 274 panic("_call_entry_enqueue_deadline() " 275 "entry %p is not locked\n", entry); 276 /* XXX More lock pretense: */ 277 if (!hw_lock_held((hw_lock_t)&queue->lock_data)) 278 panic("_call_entry_enqueue_deadline() " 279 "queue %p is not locked\n", queue); 280 if (old_queue != NULL && old_queue != queue) 281 panic("_call_entry_enqueue_deadline() " 282 "old_queue %p != queue", old_queue); 283 284 call_entry_enqueue_deadline(CE(entry), QUEUE(queue), deadline); 285 286/* For efficiency, track the earliest soft deadline on the queue, so that 287 * fuzzy decisions can be made without lock acquisitions. 288 */ 289 queue->earliest_soft_deadline = ((timer_call_t)queue_first(&queue->head))->soft_deadline; 290 291 if (old_queue) 292 old_queue->count--; 293 queue->count++; 294 295 return (old_queue); 296} 297 298#else 299 300static __inline__ mpqueue_head_t * 301timer_call_entry_dequeue( 302 timer_call_t entry) 303{ 304 mpqueue_head_t *old_queue = MPQUEUE(CE(entry)->queue); 305 306 call_entry_dequeue(CE(entry)); 307 old_queue->count--; 308 309 return old_queue; 310} 311 312static __inline__ mpqueue_head_t * 313timer_call_entry_enqueue_deadline( 314 timer_call_t entry, 315 mpqueue_head_t *queue, 316 uint64_t deadline) 317{ 318 mpqueue_head_t *old_queue = MPQUEUE(CE(entry)->queue); 319 320 call_entry_enqueue_deadline(CE(entry), QUEUE(queue), deadline); 321 322 /* For efficiency, track the earliest soft deadline on the queue, 323 * so that fuzzy decisions can be made without lock acquisitions. 324 */ 325 queue->earliest_soft_deadline = ((timer_call_t)queue_first(&queue->head))->soft_deadline; 326 327 if (old_queue) 328 old_queue->count--; 329 queue->count++; 330 331 return old_queue; 332} 333 334#endif 335 336static __inline__ void 337timer_call_entry_enqueue_tail( 338 timer_call_t entry, 339 mpqueue_head_t *queue) 340{ 341 call_entry_enqueue_tail(CE(entry), QUEUE(queue)); 342 queue->count++; 343 return; 344} 345 346/* 347 * Remove timer entry from its queue but don't change the queue pointer 348 * and set the async_dequeue flag. This is locking case 2b. 349 */ 350static __inline__ void 351timer_call_entry_dequeue_async( 352 timer_call_t entry) 353{ 354 mpqueue_head_t *old_queue = MPQUEUE(CE(entry)->queue); 355 if (old_queue) { 356 old_queue->count--; 357 (void) remque(qe(entry)); 358 entry->async_dequeue = TRUE; 359 } 360 return; 361} 362 363#if TIMER_ASSERT 364unsigned timer_call_enqueue_deadline_unlocked_async1; 365unsigned timer_call_enqueue_deadline_unlocked_async2; 366#endif 367/* 368 * Assumes call_entry and queues unlocked, interrupts disabled. 369 */ 370__inline__ mpqueue_head_t * 371timer_call_enqueue_deadline_unlocked( 372 timer_call_t call, 373 mpqueue_head_t *queue, 374 uint64_t deadline) 375{ 376 call_entry_t entry = CE(call); 377 mpqueue_head_t *old_queue; 378 379 DBG("timer_call_enqueue_deadline_unlocked(%p,%p,)\n", call, queue); 380 381 simple_lock(&call->lock); 382 old_queue = MPQUEUE(entry->queue); 383 if (old_queue != NULL) { 384 timer_queue_lock_spin(old_queue); 385 if (call->async_dequeue) { 386 /* collision (1c): timer already dequeued, clear flag */ 387#if TIMER_ASSERT 388 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 389 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, 390 call, 391 call->async_dequeue, 392 CE(call)->queue, 393 0x1c, 0); 394 timer_call_enqueue_deadline_unlocked_async1++; 395#endif 396 call->async_dequeue = FALSE; 397 entry->queue = NULL; 398 } else if (old_queue != queue) { 399 timer_call_entry_dequeue(call); 400#if TIMER_ASSERT 401 timer_call_enqueue_deadline_unlocked_async2++; 402#endif 403 } 404 if (old_queue == timer_longterm_queue) 405 timer_longterm_dequeued_locked(call); 406 if (old_queue != queue) { 407 timer_queue_unlock(old_queue); 408 timer_queue_lock_spin(queue); 409 } 410 } else { 411 timer_queue_lock_spin(queue); 412 } 413 414 timer_call_entry_enqueue_deadline(call, queue, deadline); 415 timer_queue_unlock(queue); 416 simple_unlock(&call->lock); 417 418 return (old_queue); 419} 420 421#if TIMER_ASSERT 422unsigned timer_call_dequeue_unlocked_async1; 423unsigned timer_call_dequeue_unlocked_async2; 424#endif 425mpqueue_head_t * 426timer_call_dequeue_unlocked( 427 timer_call_t call) 428{ 429 call_entry_t entry = CE(call); 430 mpqueue_head_t *old_queue; 431 432 DBG("timer_call_dequeue_unlocked(%p)\n", call); 433 434 simple_lock(&call->lock); 435 old_queue = MPQUEUE(entry->queue); 436#if TIMER_ASSERT 437 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 438 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, 439 call, 440 call->async_dequeue, 441 CE(call)->queue, 442 0, 0); 443#endif 444 if (old_queue != NULL) { 445 timer_queue_lock_spin(old_queue); 446 if (call->async_dequeue) { 447 /* collision (1c): timer already dequeued, clear flag */ 448#if TIMER_ASSERT 449 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 450 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, 451 call, 452 call->async_dequeue, 453 CE(call)->queue, 454 0x1c, 0); 455 timer_call_dequeue_unlocked_async1++; 456#endif 457 call->async_dequeue = FALSE; 458 entry->queue = NULL; 459 } else { 460 timer_call_entry_dequeue(call); 461 } 462 if (old_queue == timer_longterm_queue) 463 timer_longterm_dequeued_locked(call); 464 timer_queue_unlock(old_queue); 465 } 466 simple_unlock(&call->lock); 467 return (old_queue); 468} 469 470static boolean_t 471timer_call_enter_internal( 472 timer_call_t call, 473 timer_call_param_t param1, 474 uint64_t deadline, 475 uint64_t leeway, 476 uint32_t flags, 477 boolean_t ratelimited) 478{ 479 mpqueue_head_t *queue = NULL; 480 mpqueue_head_t *old_queue; 481 spl_t s; 482 uint64_t slop; 483 uint32_t urgency; 484 485 s = splclock(); 486 487 call->soft_deadline = deadline; 488 call->flags = flags; 489 490 uint64_t ctime = mach_absolute_time(); 491 492 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 493 DECR_TIMER_ENTER | DBG_FUNC_START, 494 call, 495 param1, deadline, flags, 0); 496 497 urgency = (flags & TIMER_CALL_URGENCY_MASK); 498 499 boolean_t slop_ratelimited = FALSE; 500 slop = timer_call_slop(deadline, ctime, urgency, current_thread(), &slop_ratelimited); 501 502 if ((flags & TIMER_CALL_LEEWAY) != 0 && leeway > slop) 503 slop = leeway; 504 505 if (UINT64_MAX - deadline <= slop) { 506 deadline = UINT64_MAX; 507 } else { 508 deadline += slop; 509 } 510 511 if (__improbable(deadline < ctime)) { 512 uint64_t delta = (ctime - deadline); 513 514 past_deadline_timers++; 515 past_deadline_deltas += delta; 516 if (delta > past_deadline_longest) 517 past_deadline_longest = deadline; 518 if (delta < past_deadline_shortest) 519 past_deadline_shortest = delta; 520 521 deadline = ctime + past_deadline_timer_adjustment; 522 call->soft_deadline = deadline; 523 } 524 525 /* Bit 0 of the "soft" deadline indicates that 526 * this particular timer call requires rate-limiting 527 * behaviour. Maintain the invariant deadline >= soft_deadline by 528 * setting bit 0 of "deadline". 529 */ 530 531 deadline |= 1; 532 if (ratelimited || slop_ratelimited) { 533 call->soft_deadline |= 1ULL; 534 } else { 535 call->soft_deadline &= ~0x1ULL; 536 } 537 538 call->ttd = call->soft_deadline - ctime; 539 540#if CONFIG_DTRACE 541 DTRACE_TMR7(callout__create, timer_call_func_t, CE(call)->func, 542 timer_call_param_t, CE(call)->param0, uint32_t, call->flags, 543 (deadline - call->soft_deadline), 544 (call->ttd >> 32), (unsigned) (call->ttd & 0xFFFFFFFF), call); 545#endif 546 547 if (!ratelimited && !slop_ratelimited) { 548 queue = timer_longterm_enqueue_unlocked(call, ctime, deadline, &old_queue); 549 } 550 551 if (queue == NULL) { 552 queue = timer_queue_assign(deadline); 553 old_queue = timer_call_enqueue_deadline_unlocked(call, queue, deadline); 554 } 555 556 CE(call)->param1 = param1; 557#if TIMER_TRACE 558 CE(call)->entry_time = ctime; 559#endif 560 561 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 562 DECR_TIMER_ENTER | DBG_FUNC_END, 563 call, 564 (old_queue != NULL), call->soft_deadline, queue->count, 0); 565 566 splx(s); 567 568 return (old_queue != NULL); 569} 570 571/* 572 * timer_call_*() 573 * return boolean indicating whether the call was previously queued. 574 */ 575boolean_t 576timer_call_enter( 577 timer_call_t call, 578 uint64_t deadline, 579 uint32_t flags) 580{ 581 return timer_call_enter_internal(call, NULL, deadline, 0, flags, FALSE); 582} 583 584boolean_t 585timer_call_enter1( 586 timer_call_t call, 587 timer_call_param_t param1, 588 uint64_t deadline, 589 uint32_t flags) 590{ 591 return timer_call_enter_internal(call, param1, deadline, 0, flags, FALSE); 592} 593 594boolean_t 595timer_call_enter_with_leeway( 596 timer_call_t call, 597 timer_call_param_t param1, 598 uint64_t deadline, 599 uint64_t leeway, 600 uint32_t flags, 601 boolean_t ratelimited) 602{ 603 return timer_call_enter_internal(call, param1, deadline, leeway, flags, ratelimited); 604} 605 606boolean_t 607timer_call_cancel( 608 timer_call_t call) 609{ 610 mpqueue_head_t *old_queue; 611 spl_t s; 612 613 s = splclock(); 614 615 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 616 DECR_TIMER_CANCEL | DBG_FUNC_START, 617 call, 618 CE(call)->deadline, call->soft_deadline, call->flags, 0); 619 620 old_queue = timer_call_dequeue_unlocked(call); 621 622 if (old_queue != NULL) { 623 timer_queue_lock_spin(old_queue); 624 if (!queue_empty(&old_queue->head)) { 625 timer_queue_cancel(old_queue, CE(call)->deadline, CE(queue_first(&old_queue->head))->deadline); 626 old_queue->earliest_soft_deadline = ((timer_call_t)queue_first(&old_queue->head))->soft_deadline; 627 } 628 else { 629 timer_queue_cancel(old_queue, CE(call)->deadline, UINT64_MAX); 630 old_queue->earliest_soft_deadline = UINT64_MAX; 631 } 632 timer_queue_unlock(old_queue); 633 } 634 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 635 DECR_TIMER_CANCEL | DBG_FUNC_END, 636 call, 637 old_queue, 638 CE(call)->deadline - mach_absolute_time(), 639 CE(call)->deadline - CE(call)->entry_time, 0); 640 splx(s); 641 642#if CONFIG_DTRACE 643 DTRACE_TMR6(callout__cancel, timer_call_func_t, CE(call)->func, 644 timer_call_param_t, CE(call)->param0, uint32_t, call->flags, 0, 645 (call->ttd >> 32), (unsigned) (call->ttd & 0xFFFFFFFF)); 646#endif 647 648 return (old_queue != NULL); 649} 650 651uint32_t timer_queue_shutdown_lock_skips; 652void 653timer_queue_shutdown( 654 mpqueue_head_t *queue) 655{ 656 timer_call_t call; 657 mpqueue_head_t *new_queue; 658 spl_t s; 659 660 DBG("timer_queue_shutdown(%p)\n", queue); 661 662 s = splclock(); 663 664 /* Note comma operator in while expression re-locking each iteration */ 665 while (timer_queue_lock_spin(queue), !queue_empty(&queue->head)) { 666 call = TIMER_CALL(queue_first(&queue->head)); 667 if (!simple_lock_try(&call->lock)) { 668 /* 669 * case (2b) lock order inversion, dequeue and skip 670 * Don't change the call_entry queue back-pointer 671 * but set the async_dequeue field. 672 */ 673 timer_queue_shutdown_lock_skips++; 674 timer_call_entry_dequeue_async(call); 675#if TIMER_ASSERT 676 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 677 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, 678 call, 679 call->async_dequeue, 680 CE(call)->queue, 681 0x2b, 0); 682#endif 683 timer_queue_unlock(queue); 684 continue; 685 } 686 687 /* remove entry from old queue */ 688 timer_call_entry_dequeue(call); 689 timer_queue_unlock(queue); 690 691 /* and queue it on new */ 692 new_queue = timer_queue_assign(CE(call)->deadline); 693 timer_queue_lock_spin(new_queue); 694 timer_call_entry_enqueue_deadline( 695 call, new_queue, CE(call)->deadline); 696 timer_queue_unlock(new_queue); 697 698 simple_unlock(&call->lock); 699 } 700 701 timer_queue_unlock(queue); 702 splx(s); 703} 704 705uint32_t timer_queue_expire_lock_skips; 706uint64_t 707timer_queue_expire_with_options( 708 mpqueue_head_t *queue, 709 uint64_t deadline, 710 boolean_t rescan) 711{ 712 timer_call_t call = NULL; 713 uint32_t tc_iterations = 0; 714 DBG("timer_queue_expire(%p,)\n", queue); 715 716 uint64_t cur_deadline = deadline; 717 timer_queue_lock_spin(queue); 718 719 while (!queue_empty(&queue->head)) { 720 /* Upon processing one or more timer calls, refresh the 721 * deadline to account for time elapsed in the callout 722 */ 723 if (++tc_iterations > 1) 724 cur_deadline = mach_absolute_time(); 725 726 if (call == NULL) 727 call = TIMER_CALL(queue_first(&queue->head)); 728 729 if (call->soft_deadline <= cur_deadline) { 730 timer_call_func_t func; 731 timer_call_param_t param0, param1; 732 733 TCOAL_DEBUG(0xDDDD0000, queue->earliest_soft_deadline, call->soft_deadline, 0, 0, 0); 734 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 735 DECR_TIMER_EXPIRE | DBG_FUNC_NONE, 736 call, 737 call->soft_deadline, 738 CE(call)->deadline, 739 CE(call)->entry_time, 0); 740 741 /* Bit 0 of the "soft" deadline indicates that 742 * this particular timer call is rate-limited 743 * and hence shouldn't be processed before its 744 * hard deadline. 745 */ 746 if ((call->soft_deadline & 0x1) && 747 (CE(call)->deadline > cur_deadline)) { 748 if (rescan == FALSE) 749 break; 750 } 751 752 if (!simple_lock_try(&call->lock)) { 753 /* case (2b) lock inversion, dequeue and skip */ 754 timer_queue_expire_lock_skips++; 755 timer_call_entry_dequeue_async(call); 756 call = NULL; 757 continue; 758 } 759 760 timer_call_entry_dequeue(call); 761 762 func = CE(call)->func; 763 param0 = CE(call)->param0; 764 param1 = CE(call)->param1; 765 766 simple_unlock(&call->lock); 767 timer_queue_unlock(queue); 768 769 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 770 DECR_TIMER_CALLOUT | DBG_FUNC_START, 771 call, VM_KERNEL_UNSLIDE(func), param0, param1, 0); 772 773#if CONFIG_DTRACE 774 DTRACE_TMR7(callout__start, timer_call_func_t, func, 775 timer_call_param_t, param0, unsigned, call->flags, 776 0, (call->ttd >> 32), 777 (unsigned) (call->ttd & 0xFFFFFFFF), call); 778#endif 779 /* Maintain time-to-deadline in per-processor data 780 * structure for thread wakeup deadline statistics. 781 */ 782 uint64_t *ttdp = &(PROCESSOR_DATA(current_processor(), timer_call_ttd)); 783 *ttdp = call->ttd; 784 (*func)(param0, param1); 785 *ttdp = 0; 786#if CONFIG_DTRACE 787 DTRACE_TMR4(callout__end, timer_call_func_t, func, 788 param0, param1, call); 789#endif 790 791 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 792 DECR_TIMER_CALLOUT | DBG_FUNC_END, 793 call, VM_KERNEL_UNSLIDE(func), param0, param1, 0); 794 call = NULL; 795 timer_queue_lock_spin(queue); 796 } else { 797 if (__probable(rescan == FALSE)) { 798 break; 799 } else { 800 int64_t skew = CE(call)->deadline - call->soft_deadline; 801 assert(CE(call)->deadline >= call->soft_deadline); 802 803 /* DRK: On a latency quality-of-service level change, 804 * re-sort potentially rate-limited timers. The platform 805 * layer determines which timers require 806 * this. In the absence of the per-callout 807 * synchronization requirement, a global resort could 808 * be more efficient. The re-sort effectively 809 * annuls all timer adjustments, i.e. the "soft 810 * deadline" is the sort key. 811 */ 812 813 if (timer_resort_threshold(skew)) { 814 if (__probable(simple_lock_try(&call->lock))) { 815 timer_call_entry_dequeue(call); 816 timer_call_entry_enqueue_deadline(call, queue, call->soft_deadline); 817 simple_unlock(&call->lock); 818 call = NULL; 819 } 820 } 821 if (call) { 822 call = TIMER_CALL(queue_next(qe(call))); 823 if (queue_end(&queue->head, qe(call))) 824 break; 825 } 826 } 827 } 828 } 829 830 if (!queue_empty(&queue->head)) { 831 call = TIMER_CALL(queue_first(&queue->head)); 832 cur_deadline = CE(call)->deadline; 833 queue->earliest_soft_deadline = call->soft_deadline; 834 } else { 835 queue->earliest_soft_deadline = cur_deadline = UINT64_MAX; 836 } 837 838 timer_queue_unlock(queue); 839 840 return (cur_deadline); 841} 842 843uint64_t 844timer_queue_expire( 845 mpqueue_head_t *queue, 846 uint64_t deadline) 847{ 848 return timer_queue_expire_with_options(queue, deadline, FALSE); 849} 850 851extern int serverperfmode; 852uint32_t timer_queue_migrate_lock_skips; 853/* 854 * timer_queue_migrate() is called by timer_queue_migrate_cpu() 855 * to move timer requests from the local processor (queue_from) 856 * to a target processor's (queue_to). 857 */ 858int 859timer_queue_migrate(mpqueue_head_t *queue_from, mpqueue_head_t *queue_to) 860{ 861 timer_call_t call; 862 timer_call_t head_to; 863 int timers_migrated = 0; 864 865 DBG("timer_queue_migrate(%p,%p)\n", queue_from, queue_to); 866 867 assert(!ml_get_interrupts_enabled()); 868 assert(queue_from != queue_to); 869 870 if (serverperfmode) { 871 /* 872 * if we're running a high end server 873 * avoid migrations... they add latency 874 * and don't save us power under typical 875 * server workloads 876 */ 877 return -4; 878 } 879 880 /* 881 * Take both local (from) and target (to) timer queue locks while 882 * moving the timers from the local queue to the target processor. 883 * We assume that the target is always the boot processor. 884 * But only move if all of the following is true: 885 * - the target queue is non-empty 886 * - the local queue is non-empty 887 * - the local queue's first deadline is later than the target's 888 * - the local queue contains no non-migrateable "local" call 889 * so that we need not have the target resync. 890 */ 891 892 timer_queue_lock_spin(queue_to); 893 894 head_to = TIMER_CALL(queue_first(&queue_to->head)); 895 if (queue_empty(&queue_to->head)) { 896 timers_migrated = -1; 897 goto abort1; 898 } 899 900 timer_queue_lock_spin(queue_from); 901 902 if (queue_empty(&queue_from->head)) { 903 timers_migrated = -2; 904 goto abort2; 905 } 906 907 call = TIMER_CALL(queue_first(&queue_from->head)); 908 if (CE(call)->deadline < CE(head_to)->deadline) { 909 timers_migrated = 0; 910 goto abort2; 911 } 912 913 /* perform scan for non-migratable timers */ 914 do { 915 if (call->flags & TIMER_CALL_LOCAL) { 916 timers_migrated = -3; 917 goto abort2; 918 } 919 call = TIMER_CALL(queue_next(qe(call))); 920 } while (!queue_end(&queue_from->head, qe(call))); 921 922 /* migration loop itself -- both queues are locked */ 923 while (!queue_empty(&queue_from->head)) { 924 call = TIMER_CALL(queue_first(&queue_from->head)); 925 if (!simple_lock_try(&call->lock)) { 926 /* case (2b) lock order inversion, dequeue only */ 927#ifdef TIMER_ASSERT 928 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 929 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, 930 call, 931 CE(call)->queue, 932 call->lock.interlock.lock_data, 933 0x2b, 0); 934#endif 935 timer_queue_migrate_lock_skips++; 936 timer_call_entry_dequeue_async(call); 937 continue; 938 } 939 timer_call_entry_dequeue(call); 940 timer_call_entry_enqueue_deadline( 941 call, queue_to, CE(call)->deadline); 942 timers_migrated++; 943 simple_unlock(&call->lock); 944 } 945 queue_from->earliest_soft_deadline = UINT64_MAX; 946abort2: 947 timer_queue_unlock(queue_from); 948abort1: 949 timer_queue_unlock(queue_to); 950 951 return timers_migrated; 952} 953 954void 955timer_queue_trace_cpu(int ncpu) 956{ 957 timer_call_nosync_cpu( 958 ncpu, 959 (void(*)())timer_queue_trace, 960 (void*) timer_queue_cpu(ncpu)); 961} 962 963void 964timer_queue_trace( 965 mpqueue_head_t *queue) 966{ 967 timer_call_t call; 968 spl_t s; 969 970 if (!kdebug_enable) 971 return; 972 973 s = splclock(); 974 timer_queue_lock_spin(queue); 975 976 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 977 DECR_TIMER_QUEUE | DBG_FUNC_START, 978 queue->count, mach_absolute_time(), 0, 0, 0); 979 980 if (!queue_empty(&queue->head)) { 981 call = TIMER_CALL(queue_first(&queue->head)); 982 do { 983 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 984 DECR_TIMER_QUEUE | DBG_FUNC_NONE, 985 call->soft_deadline, 986 CE(call)->deadline, 987 CE(call)->entry_time, 988 CE(call)->func, 989 0); 990 call = TIMER_CALL(queue_next(qe(call))); 991 } while (!queue_end(&queue->head, qe(call))); 992 } 993 994 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 995 DECR_TIMER_QUEUE | DBG_FUNC_END, 996 queue->count, mach_absolute_time(), 0, 0, 0); 997 998 timer_queue_unlock(queue); 999 splx(s); 1000} 1001 1002void 1003timer_longterm_dequeued_locked(timer_call_t call) 1004{ 1005 timer_longterm_t *tlp = &timer_longterm; 1006 1007 tlp->dequeues++; 1008 if (call == tlp->threshold.call) 1009 tlp->threshold.call = NULL; 1010} 1011 1012/* 1013 * Place a timer call in the longterm list 1014 * and adjust the next timer callout deadline if the new timer is first. 1015 */ 1016mpqueue_head_t * 1017timer_longterm_enqueue_unlocked(timer_call_t call, 1018 uint64_t now, 1019 uint64_t deadline, 1020 mpqueue_head_t **old_queue) 1021{ 1022 timer_longterm_t *tlp = &timer_longterm; 1023 boolean_t update_required = FALSE; 1024 uint64_t longterm_threshold; 1025 1026 longterm_threshold = now + tlp->threshold.interval; 1027 1028 /* 1029 * Return NULL without doing anything if: 1030 * - this timer is local, or 1031 * - the longterm mechanism is disabled, or 1032 * - this deadline is too short. 1033 */ 1034 if (__probable((call->flags & TIMER_CALL_LOCAL) != 0 || 1035 (tlp->threshold.interval == TIMER_LONGTERM_NONE) || 1036 (deadline <= longterm_threshold))) 1037 return NULL; 1038 1039 /* 1040 * Remove timer from its current queue, if any. 1041 */ 1042 *old_queue = timer_call_dequeue_unlocked(call); 1043 1044 /* 1045 * Lock the longterm queue, queue timer and determine 1046 * whether an update is necessary. 1047 */ 1048 assert(!ml_get_interrupts_enabled()); 1049 simple_lock(&call->lock); 1050 timer_queue_lock_spin(timer_longterm_queue); 1051 timer_call_entry_enqueue_tail(call, timer_longterm_queue); 1052 CE(call)->deadline = deadline; 1053 1054 tlp->enqueues++; 1055 1056 /* 1057 * We'll need to update the currently set threshold timer 1058 * if the new deadline is sooner and no sooner update is in flight. 1059 */ 1060 if (deadline < tlp->threshold.deadline && 1061 deadline < tlp->threshold.preempted) { 1062 tlp->threshold.preempted = deadline; 1063 tlp->threshold.call = call; 1064 update_required = TRUE; 1065 } 1066 timer_queue_unlock(timer_longterm_queue); 1067 simple_unlock(&call->lock); 1068 1069 if (update_required) { 1070 timer_call_nosync_cpu( 1071 master_cpu, 1072 (void (*)(void *)) timer_longterm_update, 1073 (void *)tlp); 1074 } 1075 1076 return timer_longterm_queue; 1077} 1078 1079/* 1080 * Scan for timers below the longterm threshold. 1081 * Move these to the local timer queue (of the boot processor on which the 1082 * calling thread is running). 1083 * Both the local (boot) queue and the longterm queue are locked. 1084 * The scan is similar to the timer migrate sequence but is performed by 1085 * successively examining each timer on the longterm queue: 1086 * - if within the short-term threshold 1087 * - enter on the local queue (unless being deleted), 1088 * - otherwise: 1089 * - if sooner, deadline becomes the next threshold deadline. 1090 */ 1091void 1092timer_longterm_scan(timer_longterm_t *tlp, 1093 uint64_t now) 1094{ 1095 queue_entry_t qe; 1096 timer_call_t call; 1097 uint64_t threshold; 1098 uint64_t deadline; 1099 mpqueue_head_t *timer_master_queue; 1100 1101 assert(!ml_get_interrupts_enabled()); 1102 assert(cpu_number() == master_cpu); 1103 1104 if (tlp->threshold.interval != TIMER_LONGTERM_NONE) 1105 threshold = now + tlp->threshold.interval; 1106 else 1107 threshold = TIMER_LONGTERM_NONE; 1108 1109 tlp->threshold.deadline = TIMER_LONGTERM_NONE; 1110 tlp->threshold.call = NULL; 1111 1112 if (queue_empty(&timer_longterm_queue->head)) 1113 return; 1114 1115 timer_master_queue = timer_queue_cpu(master_cpu); 1116 timer_queue_lock_spin(timer_master_queue); 1117 1118 qe = queue_first(&timer_longterm_queue->head); 1119 while (!queue_end(&timer_longterm_queue->head, qe)) { 1120 call = TIMER_CALL(qe); 1121 deadline = call->soft_deadline; 1122 qe = queue_next(qe); 1123 if (!simple_lock_try(&call->lock)) { 1124 /* case (2c) lock order inversion, dequeue only */ 1125#ifdef TIMER_ASSERT 1126 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 1127 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, 1128 call, 1129 CE(call)->queue, 1130 call->lock.interlock.lock_data, 1131 0x2c, 0); 1132#endif 1133 timer_call_entry_dequeue_async(call); 1134 continue; 1135 } 1136 if (deadline < threshold) { 1137 /* 1138 * This timer needs moving (escalating) 1139 * to the local (boot) processor's queue. 1140 */ 1141#ifdef TIMER_ASSERT 1142 if (deadline < now) 1143 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 1144 DECR_TIMER_OVERDUE | DBG_FUNC_NONE, 1145 call, 1146 deadline, 1147 now, 1148 threshold, 1149 0); 1150#endif 1151 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 1152 DECR_TIMER_ESCALATE | DBG_FUNC_NONE, 1153 call, 1154 CE(call)->deadline, 1155 CE(call)->entry_time, 1156 CE(call)->func, 1157 0); 1158 tlp->escalates++; 1159 timer_call_entry_dequeue(call); 1160 timer_call_entry_enqueue_deadline( 1161 call, timer_master_queue, CE(call)->deadline); 1162 /* 1163 * A side-effect of the following call is to update 1164 * the actual hardware deadline if required. 1165 */ 1166 (void) timer_queue_assign(deadline); 1167 } else { 1168 if (deadline < tlp->threshold.deadline) { 1169 tlp->threshold.deadline = deadline; 1170 tlp->threshold.call = call; 1171 } 1172 } 1173 simple_unlock(&call->lock); 1174 } 1175 1176 timer_queue_unlock(timer_master_queue); 1177} 1178 1179void 1180timer_longterm_callout(timer_call_param_t p0, __unused timer_call_param_t p1) 1181{ 1182 timer_longterm_t *tlp = (timer_longterm_t *) p0; 1183 1184 timer_longterm_update(tlp); 1185} 1186 1187void 1188timer_longterm_update_locked(timer_longterm_t *tlp) 1189{ 1190 uint64_t latency; 1191 1192 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 1193 DECR_TIMER_UPDATE | DBG_FUNC_START, 1194 &tlp->queue, 1195 tlp->threshold.deadline, 1196 tlp->threshold.preempted, 1197 tlp->queue.count, 0); 1198 1199 tlp->scan_time = mach_absolute_time(); 1200 if (tlp->threshold.preempted != TIMER_LONGTERM_NONE) { 1201 tlp->threshold.preempts++; 1202 tlp->threshold.deadline = tlp->threshold.preempted; 1203 tlp->threshold.preempted = TIMER_LONGTERM_NONE; 1204 /* 1205 * Note: in the unlikely event that a pre-empted timer has 1206 * itself been cancelled, we'll simply re-scan later at the 1207 * time of the preempted/cancelled timer. 1208 */ 1209 } else { 1210 tlp->threshold.scans++; 1211 1212 /* 1213 * Maintain a moving average of our wakeup latency. 1214 * Clamp latency to 0 and ignore above threshold interval. 1215 */ 1216 if (tlp->scan_time > tlp->threshold.deadline_set) 1217 latency = tlp->scan_time - tlp->threshold.deadline_set; 1218 else 1219 latency = 0; 1220 if (latency < tlp->threshold.interval) { 1221 tlp->threshold.latency_min = 1222 MIN(tlp->threshold.latency_min, latency); 1223 tlp->threshold.latency_max = 1224 MAX(tlp->threshold.latency_max, latency); 1225 tlp->threshold.latency = 1226 (tlp->threshold.latency*99 + latency) / 100; 1227 } 1228 1229 timer_longterm_scan(tlp, tlp->scan_time); 1230 } 1231 1232 tlp->threshold.deadline_set = tlp->threshold.deadline; 1233 /* The next deadline timer to be set is adjusted */ 1234 if (tlp->threshold.deadline != TIMER_LONGTERM_NONE) { 1235 tlp->threshold.deadline_set -= tlp->threshold.margin; 1236 tlp->threshold.deadline_set -= tlp->threshold.latency; 1237 } 1238 1239 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 1240 DECR_TIMER_UPDATE | DBG_FUNC_END, 1241 &tlp->queue, 1242 tlp->threshold.deadline, 1243 tlp->threshold.scans, 1244 tlp->queue.count, 0); 1245} 1246 1247void 1248timer_longterm_update(timer_longterm_t *tlp) 1249{ 1250 spl_t s = splclock(); 1251 1252 timer_queue_lock_spin(timer_longterm_queue); 1253 1254 if (cpu_number() != master_cpu) 1255 panic("timer_longterm_update_master() on non-boot cpu"); 1256 1257 timer_longterm_update_locked(tlp); 1258 1259 if (tlp->threshold.deadline != TIMER_LONGTERM_NONE) 1260 timer_call_enter( 1261 &tlp->threshold.timer, 1262 tlp->threshold.deadline_set, 1263 TIMER_CALL_LOCAL | TIMER_CALL_SYS_CRITICAL); 1264 1265 timer_queue_unlock(timer_longterm_queue); 1266 splx(s); 1267} 1268 1269void 1270timer_longterm_init(void) 1271{ 1272 uint32_t longterm; 1273 timer_longterm_t *tlp = &timer_longterm; 1274 1275 DBG("timer_longterm_init() tlp: %p, queue: %p\n", tlp, &tlp->queue); 1276 1277 /* 1278 * Set the longterm timer threshold. Defaults to TIMER_LONGTERM_THRESHOLD 1279 * or TIMER_LONGTERM_NONE (disabled) for server; 1280 * overridden longterm boot-arg 1281 */ 1282 tlp->threshold.interval = serverperfmode ? TIMER_LONGTERM_NONE 1283 : TIMER_LONGTERM_THRESHOLD; 1284 if (PE_parse_boot_argn("longterm", &longterm, sizeof (longterm))) { 1285 tlp->threshold.interval = (longterm == 0) ? 1286 TIMER_LONGTERM_NONE : 1287 longterm * NSEC_PER_MSEC; 1288 } 1289 if (tlp->threshold.interval != TIMER_LONGTERM_NONE) { 1290 printf("Longterm timer threshold: %llu ms\n", 1291 tlp->threshold.interval / NSEC_PER_MSEC); 1292 kprintf("Longterm timer threshold: %llu ms\n", 1293 tlp->threshold.interval / NSEC_PER_MSEC); 1294 nanoseconds_to_absolutetime(tlp->threshold.interval, 1295 &tlp->threshold.interval); 1296 tlp->threshold.margin = tlp->threshold.interval / 10; 1297 tlp->threshold.latency_min = EndOfAllTime; 1298 tlp->threshold.latency_max = 0; 1299 } 1300 1301 tlp->threshold.preempted = TIMER_LONGTERM_NONE; 1302 tlp->threshold.deadline = TIMER_LONGTERM_NONE; 1303 1304 lck_attr_setdefault(&timer_longterm_lck_attr); 1305 lck_grp_attr_setdefault(&timer_longterm_lck_grp_attr); 1306 lck_grp_init(&timer_longterm_lck_grp, 1307 "timer_longterm", &timer_longterm_lck_grp_attr); 1308 mpqueue_init(&tlp->queue, 1309 &timer_longterm_lck_grp, &timer_longterm_lck_attr); 1310 1311 timer_call_setup(&tlp->threshold.timer, 1312 timer_longterm_callout, (timer_call_param_t) tlp); 1313 1314 timer_longterm_queue = &tlp->queue; 1315} 1316 1317enum { 1318 THRESHOLD, QCOUNT, 1319 ENQUEUES, DEQUEUES, ESCALATES, SCANS, PREEMPTS, 1320 LATENCY, LATENCY_MIN, LATENCY_MAX 1321}; 1322uint64_t 1323timer_sysctl_get(int oid) 1324{ 1325 timer_longterm_t *tlp = &timer_longterm; 1326 1327 switch (oid) { 1328 case THRESHOLD: 1329 return (tlp->threshold.interval == TIMER_LONGTERM_NONE) ? 1330 0 : tlp->threshold.interval / NSEC_PER_MSEC; 1331 case QCOUNT: 1332 return tlp->queue.count; 1333 case ENQUEUES: 1334 return tlp->enqueues; 1335 case DEQUEUES: 1336 return tlp->dequeues; 1337 case ESCALATES: 1338 return tlp->escalates; 1339 case SCANS: 1340 return tlp->threshold.scans; 1341 case PREEMPTS: 1342 return tlp->threshold.preempts; 1343 case LATENCY: 1344 return tlp->threshold.latency; 1345 case LATENCY_MIN: 1346 return tlp->threshold.latency_min; 1347 case LATENCY_MAX: 1348 return tlp->threshold.latency_max; 1349 default: 1350 return 0; 1351 } 1352} 1353 1354/* 1355 * timer_master_scan() is the inverse of timer_longterm_scan() 1356 * since it un-escalates timers to the longterm queue. 1357 */ 1358static void 1359timer_master_scan(timer_longterm_t *tlp, 1360 uint64_t now) 1361{ 1362 queue_entry_t qe; 1363 timer_call_t call; 1364 uint64_t threshold; 1365 uint64_t deadline; 1366 mpqueue_head_t *timer_master_queue; 1367 1368 if (tlp->threshold.interval != TIMER_LONGTERM_NONE) 1369 threshold = now + tlp->threshold.interval; 1370 else 1371 threshold = TIMER_LONGTERM_NONE; 1372 1373 timer_master_queue = timer_queue_cpu(master_cpu); 1374 timer_queue_lock_spin(timer_master_queue); 1375 1376 qe = queue_first(&timer_master_queue->head); 1377 while (!queue_end(&timer_master_queue->head, qe)) { 1378 call = TIMER_CALL(qe); 1379 deadline = CE(call)->deadline; 1380 qe = queue_next(qe); 1381 if ((call->flags & TIMER_CALL_LOCAL) != 0) 1382 continue; 1383 if (!simple_lock_try(&call->lock)) { 1384 /* case (2c) lock order inversion, dequeue only */ 1385 timer_call_entry_dequeue_async(call); 1386 continue; 1387 } 1388 if (deadline > threshold) { 1389 /* move from master to longterm */ 1390 timer_call_entry_dequeue(call); 1391 timer_call_entry_enqueue_tail(call, timer_longterm_queue); 1392 if (deadline < tlp->threshold.deadline) { 1393 tlp->threshold.deadline = deadline; 1394 tlp->threshold.call = call; 1395 } 1396 } 1397 simple_unlock(&call->lock); 1398 } 1399 timer_queue_unlock(timer_master_queue); 1400} 1401 1402static void 1403timer_sysctl_set_threshold(uint64_t value) 1404{ 1405 timer_longterm_t *tlp = &timer_longterm; 1406 spl_t s = splclock(); 1407 boolean_t threshold_increase; 1408 1409 timer_queue_lock_spin(timer_longterm_queue); 1410 1411 timer_call_cancel(&tlp->threshold.timer); 1412 1413 /* 1414 * Set the new threshold and note whther it's increasing. 1415 */ 1416 if (value == 0) { 1417 tlp->threshold.interval = TIMER_LONGTERM_NONE; 1418 threshold_increase = TRUE; 1419 timer_call_cancel(&tlp->threshold.timer); 1420 } else { 1421 uint64_t old_interval = tlp->threshold.interval; 1422 tlp->threshold.interval = value * NSEC_PER_MSEC; 1423 nanoseconds_to_absolutetime(tlp->threshold.interval, 1424 &tlp->threshold.interval); 1425 tlp->threshold.margin = tlp->threshold.interval / 10; 1426 if (old_interval == TIMER_LONGTERM_NONE) 1427 threshold_increase = FALSE; 1428 else 1429 threshold_increase = (tlp->threshold.interval > old_interval); 1430 } 1431 1432 if (threshold_increase /* or removal */) { 1433 /* Escalate timers from the longterm queue */ 1434 timer_longterm_scan(tlp, mach_absolute_time()); 1435 } else /* decrease or addition */ { 1436 /* 1437 * We scan the local/master queue for timers now longterm. 1438 * To be strictly correct, we should scan all processor queues 1439 * but timer migration results in most timers gravitating to the 1440 * master processor in any case. 1441 */ 1442 timer_master_scan(tlp, mach_absolute_time()); 1443 } 1444 1445 /* Set new timer accordingly */ 1446 tlp->threshold.deadline_set = tlp->threshold.deadline; 1447 if (tlp->threshold.deadline != TIMER_LONGTERM_NONE) { 1448 tlp->threshold.deadline_set -= tlp->threshold.margin; 1449 tlp->threshold.deadline_set -= tlp->threshold.latency; 1450 timer_call_enter( 1451 &tlp->threshold.timer, 1452 tlp->threshold.deadline_set, 1453 TIMER_CALL_LOCAL | TIMER_CALL_SYS_CRITICAL); 1454 } 1455 1456 /* Reset stats */ 1457 tlp->enqueues = 0; 1458 tlp->dequeues = 0; 1459 tlp->escalates = 0; 1460 tlp->threshold.scans = 0; 1461 tlp->threshold.preempts = 0; 1462 tlp->threshold.latency = 0; 1463 tlp->threshold.latency_min = EndOfAllTime; 1464 tlp->threshold.latency_max = 0; 1465 1466 timer_queue_unlock(timer_longterm_queue); 1467 splx(s); 1468} 1469 1470int 1471timer_sysctl_set(int oid, uint64_t value) 1472{ 1473 switch (oid) { 1474 case THRESHOLD: 1475 timer_call_cpu( 1476 master_cpu, 1477 (void (*)(void *)) timer_sysctl_set_threshold, 1478 (void *) value); 1479 return KERN_SUCCESS; 1480 default: 1481 return KERN_INVALID_ARGUMENT; 1482 } 1483} 1484