1/* 2 * Copyright (c) 1993-2008 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * Timer interrupt callout module. 30 */ 31 32#include <mach/mach_types.h> 33 34#include <kern/clock.h> 35#include <kern/processor.h> 36#include <kern/timer_call.h> 37#include <kern/timer_queue.h> 38#include <kern/call_entry.h> 39#include <kern/thread.h> 40 41#include <sys/kdebug.h> 42 43#if CONFIG_DTRACE 44#include <mach/sdt.h> 45#endif 46 47 48#if DEBUG 49#define TIMER_ASSERT 1 50#endif 51 52//#define TIMER_ASSERT 1 53//#define TIMER_DBG 1 54 55#if TIMER_DBG 56#define DBG(x...) kprintf("DBG: " x); 57#else 58#define DBG(x...) 59#endif 60 61#if TIMER_TRACE 62#define TIMER_KDEBUG_TRACE KERNEL_DEBUG_CONSTANT_IST 63#else 64#define TIMER_KDEBUG_TRACE(x...) 65#endif 66 67 68lck_grp_t timer_call_lck_grp; 69lck_attr_t timer_call_lck_attr; 70lck_grp_attr_t timer_call_lck_grp_attr; 71 72lck_grp_t timer_longterm_lck_grp; 73lck_attr_t timer_longterm_lck_attr; 74lck_grp_attr_t timer_longterm_lck_grp_attr; 75 76 77#define timer_queue_lock_spin(queue) \ 78 lck_mtx_lock_spin_always(&queue->lock_data) 79 80#define timer_queue_unlock(queue) \ 81 lck_mtx_unlock_always(&queue->lock_data) 82 83 84#define QUEUE(x) ((queue_t)(x)) 85#define MPQUEUE(x) ((mpqueue_head_t *)(x)) 86#define TIMER_CALL(x) ((timer_call_t)(x)) 87#define TCE(x) (&(x->call_entry)) 88/* 89 * The longterm timer object is a global structure holding all timers 90 * beyond the short-term, local timer queue threshold. The boot processor 91 * is responsible for moving each timer to its local timer queue 92 * if and when that timer becomes due within the threshold. 93 */ 94#define TIMER_LONGTERM_NONE EndOfAllTime 95#if defined(__x86_64__) 96#define TIMER_LONGTERM_THRESHOLD (1ULL * NSEC_PER_SEC) 97#else 98#define TIMER_LONGTERM_THRESHOLD TIMER_LONGTERM_NONE 99#endif 100 101typedef struct { 102 uint64_t interval; /* longterm timer interval */ 103 uint64_t margin; /* fudge factor (10% of interval */ 104 uint64_t deadline; /* first/soonest longterm deadline */ 105 uint64_t preempted; /* sooner timer has pre-empted */ 106 timer_call_t call; /* first/soonest longterm timer call */ 107 uint64_t deadline_set; /* next timer set */ 108 timer_call_data_t timer; /* timer used by threshold management */ 109 /* Stats: */ 110 uint64_t scans; /* num threshold timer scans */ 111 uint64_t preempts; /* num threshold reductions */ 112 uint64_t latency; /* average threshold latency */ 113 uint64_t latency_min; /* minimum threshold latency */ 114 uint64_t latency_max; /* maximum threshold latency */ 115} threshold_t; 116 117typedef struct { 118 mpqueue_head_t queue; /* longterm timer list */ 119 uint64_t enqueues; /* num timers queued */ 120 uint64_t dequeues; /* num timers dequeued */ 121 uint64_t escalates; /* num timers becoming shortterm */ 122 uint64_t scan_time; /* last time the list was scanned */ 123 threshold_t threshold; /* longterm timer threshold */ 124} timer_longterm_t; 125 126timer_longterm_t timer_longterm; 127 128static mpqueue_head_t *timer_longterm_queue = NULL; 129 130static void timer_longterm_init(void); 131static void timer_longterm_callout( 132 timer_call_param_t p0, 133 timer_call_param_t p1); 134extern void timer_longterm_scan( 135 timer_longterm_t *tlp, 136 uint64_t now); 137static void timer_longterm_update( 138 timer_longterm_t *tlp); 139static void timer_longterm_update_locked( 140 timer_longterm_t *tlp); 141static mpqueue_head_t * timer_longterm_enqueue_unlocked( 142 timer_call_t call, 143 uint64_t now, 144 uint64_t deadline, 145 mpqueue_head_t ** old_queue, 146 uint64_t soft_deadline, 147 uint64_t ttd, 148 timer_call_param_t param1, 149 uint32_t callout_flags); 150static void timer_longterm_dequeued_locked( 151 timer_call_t call); 152 153uint64_t past_deadline_timers; 154uint64_t past_deadline_deltas; 155uint64_t past_deadline_longest; 156uint64_t past_deadline_shortest = ~0ULL; 157enum {PAST_DEADLINE_TIMER_ADJUSTMENT_NS = 10 * 1000}; 158 159uint64_t past_deadline_timer_adjustment; 160 161static boolean_t timer_call_enter_internal(timer_call_t call, timer_call_param_t param1, uint64_t deadline, uint64_t leeway, uint32_t flags, boolean_t ratelimited); 162boolean_t mach_timer_coalescing_enabled = TRUE; 163 164mpqueue_head_t *timer_call_enqueue_deadline_unlocked( 165 timer_call_t call, 166 mpqueue_head_t *queue, 167 uint64_t deadline, 168 uint64_t soft_deadline, 169 uint64_t ttd, 170 timer_call_param_t param1, 171 uint32_t flags); 172 173mpqueue_head_t *timer_call_dequeue_unlocked( 174 timer_call_t call); 175 176timer_coalescing_priority_params_t tcoal_prio_params; 177 178#if TCOAL_PRIO_STATS 179int32_t nc_tcl, rt_tcl, bg_tcl, kt_tcl, fp_tcl, ts_tcl, qos_tcl; 180#define TCOAL_PRIO_STAT(x) (x++) 181#else 182#define TCOAL_PRIO_STAT(x) 183#endif 184 185static void 186timer_call_init_abstime(void) 187{ 188 int i; 189 uint64_t result; 190 timer_coalescing_priority_params_ns_t * tcoal_prio_params_init = timer_call_get_priority_params(); 191 nanoseconds_to_absolutetime(PAST_DEADLINE_TIMER_ADJUSTMENT_NS, &past_deadline_timer_adjustment); 192 nanoseconds_to_absolutetime(tcoal_prio_params_init->idle_entry_timer_processing_hdeadline_threshold_ns, &result); 193 tcoal_prio_params.idle_entry_timer_processing_hdeadline_threshold_abstime = (uint32_t)result; 194 nanoseconds_to_absolutetime(tcoal_prio_params_init->interrupt_timer_coalescing_ilat_threshold_ns, &result); 195 tcoal_prio_params.interrupt_timer_coalescing_ilat_threshold_abstime = (uint32_t)result; 196 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_resort_threshold_ns, &result); 197 tcoal_prio_params.timer_resort_threshold_abstime = (uint32_t)result; 198 tcoal_prio_params.timer_coalesce_rt_shift = tcoal_prio_params_init->timer_coalesce_rt_shift; 199 tcoal_prio_params.timer_coalesce_bg_shift = tcoal_prio_params_init->timer_coalesce_bg_shift; 200 tcoal_prio_params.timer_coalesce_kt_shift = tcoal_prio_params_init->timer_coalesce_kt_shift; 201 tcoal_prio_params.timer_coalesce_fp_shift = tcoal_prio_params_init->timer_coalesce_fp_shift; 202 tcoal_prio_params.timer_coalesce_ts_shift = tcoal_prio_params_init->timer_coalesce_ts_shift; 203 204 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_rt_ns_max, 205 &tcoal_prio_params.timer_coalesce_rt_abstime_max); 206 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_bg_ns_max, 207 &tcoal_prio_params.timer_coalesce_bg_abstime_max); 208 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_kt_ns_max, 209 &tcoal_prio_params.timer_coalesce_kt_abstime_max); 210 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_fp_ns_max, 211 &tcoal_prio_params.timer_coalesce_fp_abstime_max); 212 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_ts_ns_max, 213 &tcoal_prio_params.timer_coalesce_ts_abstime_max); 214 215 for (i = 0; i < NUM_LATENCY_QOS_TIERS; i++) { 216 tcoal_prio_params.latency_qos_scale[i] = tcoal_prio_params_init->latency_qos_scale[i]; 217 nanoseconds_to_absolutetime(tcoal_prio_params_init->latency_qos_ns_max[i], 218 &tcoal_prio_params.latency_qos_abstime_max[i]); 219 tcoal_prio_params.latency_tier_rate_limited[i] = tcoal_prio_params_init->latency_tier_rate_limited[i]; 220 } 221} 222 223 224void 225timer_call_init(void) 226{ 227 lck_attr_setdefault(&timer_call_lck_attr); 228 lck_grp_attr_setdefault(&timer_call_lck_grp_attr); 229 lck_grp_init(&timer_call_lck_grp, "timer_call", &timer_call_lck_grp_attr); 230 231 timer_longterm_init(); 232 timer_call_init_abstime(); 233} 234 235 236void 237timer_call_queue_init(mpqueue_head_t *queue) 238{ 239 DBG("timer_call_queue_init(%p)\n", queue); 240 mpqueue_init(queue, &timer_call_lck_grp, &timer_call_lck_attr); 241} 242 243 244void 245timer_call_setup( 246 timer_call_t call, 247 timer_call_func_t func, 248 timer_call_param_t param0) 249{ 250 DBG("timer_call_setup(%p,%p,%p)\n", call, func, param0); 251 call_entry_setup(TCE(call), func, param0); 252 simple_lock_init(&(call)->lock, 0); 253 call->async_dequeue = FALSE; 254} 255#if TIMER_ASSERT 256static __inline__ mpqueue_head_t * 257timer_call_entry_dequeue( 258 timer_call_t entry) 259{ 260 mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue); 261 262 if (!hw_lock_held((hw_lock_t)&entry->lock)) 263 panic("_call_entry_dequeue() " 264 "entry %p is not locked\n", entry); 265 /* 266 * XXX The queue lock is actually a mutex in spin mode 267 * but there's no way to test for it being held 268 * so we pretend it's a spinlock! 269 */ 270 if (!hw_lock_held((hw_lock_t)&old_queue->lock_data)) 271 panic("_call_entry_dequeue() " 272 "queue %p is not locked\n", old_queue); 273 274 call_entry_dequeue(TCE(entry)); 275 old_queue->count--; 276 277 return (old_queue); 278} 279 280static __inline__ mpqueue_head_t * 281timer_call_entry_enqueue_deadline( 282 timer_call_t entry, 283 mpqueue_head_t *queue, 284 uint64_t deadline) 285{ 286 mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue); 287 288 if (!hw_lock_held((hw_lock_t)&entry->lock)) 289 panic("_call_entry_enqueue_deadline() " 290 "entry %p is not locked\n", entry); 291 /* XXX More lock pretense: */ 292 if (!hw_lock_held((hw_lock_t)&queue->lock_data)) 293 panic("_call_entry_enqueue_deadline() " 294 "queue %p is not locked\n", queue); 295 if (old_queue != NULL && old_queue != queue) 296 panic("_call_entry_enqueue_deadline() " 297 "old_queue %p != queue", old_queue); 298 299 call_entry_enqueue_deadline(TCE(entry), QUEUE(queue), deadline); 300 301/* For efficiency, track the earliest soft deadline on the queue, so that 302 * fuzzy decisions can be made without lock acquisitions. 303 */ 304 timer_call_t thead = (timer_call_t)queue_first(&queue->head); 305 306 queue->earliest_soft_deadline = thead->flags & TIMER_CALL_RATELIMITED ? TCE(thead)->deadline : thead->soft_deadline; 307 308 if (old_queue) 309 old_queue->count--; 310 queue->count++; 311 312 return (old_queue); 313} 314 315#else 316 317static __inline__ mpqueue_head_t * 318timer_call_entry_dequeue( 319 timer_call_t entry) 320{ 321 mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue); 322 323 call_entry_dequeue(TCE(entry)); 324 old_queue->count--; 325 326 return old_queue; 327} 328 329static __inline__ mpqueue_head_t * 330timer_call_entry_enqueue_deadline( 331 timer_call_t entry, 332 mpqueue_head_t *queue, 333 uint64_t deadline) 334{ 335 mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue); 336 337 call_entry_enqueue_deadline(TCE(entry), QUEUE(queue), deadline); 338 339 /* For efficiency, track the earliest soft deadline on the queue, 340 * so that fuzzy decisions can be made without lock acquisitions. 341 */ 342 343 timer_call_t thead = (timer_call_t)queue_first(&queue->head); 344 queue->earliest_soft_deadline = thead->flags & TIMER_CALL_RATELIMITED ? TCE(thead)->deadline : thead->soft_deadline; 345 346 if (old_queue) 347 old_queue->count--; 348 queue->count++; 349 350 return old_queue; 351} 352 353#endif 354 355static __inline__ void 356timer_call_entry_enqueue_tail( 357 timer_call_t entry, 358 mpqueue_head_t *queue) 359{ 360 call_entry_enqueue_tail(TCE(entry), QUEUE(queue)); 361 queue->count++; 362 return; 363} 364 365/* 366 * Remove timer entry from its queue but don't change the queue pointer 367 * and set the async_dequeue flag. This is locking case 2b. 368 */ 369static __inline__ void 370timer_call_entry_dequeue_async( 371 timer_call_t entry) 372{ 373 mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue); 374 if (old_queue) { 375 old_queue->count--; 376 (void) remque(qe(entry)); 377 entry->async_dequeue = TRUE; 378 } 379 return; 380} 381 382#if TIMER_ASSERT 383unsigned timer_call_enqueue_deadline_unlocked_async1; 384unsigned timer_call_enqueue_deadline_unlocked_async2; 385#endif 386/* 387 * Assumes call_entry and queues unlocked, interrupts disabled. 388 */ 389__inline__ mpqueue_head_t * 390timer_call_enqueue_deadline_unlocked( 391 timer_call_t call, 392 mpqueue_head_t *queue, 393 uint64_t deadline, 394 uint64_t soft_deadline, 395 uint64_t ttd, 396 timer_call_param_t param1, 397 uint32_t callout_flags) 398{ 399 call_entry_t entry = TCE(call); 400 mpqueue_head_t *old_queue; 401 402 DBG("timer_call_enqueue_deadline_unlocked(%p,%p,)\n", call, queue); 403 404 simple_lock(&call->lock); 405 406 old_queue = MPQUEUE(entry->queue); 407 408 if (old_queue != NULL) { 409 timer_queue_lock_spin(old_queue); 410 if (call->async_dequeue) { 411 /* collision (1c): timer already dequeued, clear flag */ 412#if TIMER_ASSERT 413 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 414 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, 415 call, 416 call->async_dequeue, 417 TCE(call)->queue, 418 0x1c, 0); 419 timer_call_enqueue_deadline_unlocked_async1++; 420#endif 421 call->async_dequeue = FALSE; 422 entry->queue = NULL; 423 } else if (old_queue != queue) { 424 timer_call_entry_dequeue(call); 425#if TIMER_ASSERT 426 timer_call_enqueue_deadline_unlocked_async2++; 427#endif 428 } 429 if (old_queue == timer_longterm_queue) 430 timer_longterm_dequeued_locked(call); 431 if (old_queue != queue) { 432 timer_queue_unlock(old_queue); 433 timer_queue_lock_spin(queue); 434 } 435 } else { 436 timer_queue_lock_spin(queue); 437 } 438 439 call->soft_deadline = soft_deadline; 440 call->flags = callout_flags; 441 TCE(call)->param1 = param1; 442 call->ttd = ttd; 443 444 timer_call_entry_enqueue_deadline(call, queue, deadline); 445 timer_queue_unlock(queue); 446 simple_unlock(&call->lock); 447 448 return (old_queue); 449} 450 451#if TIMER_ASSERT 452unsigned timer_call_dequeue_unlocked_async1; 453unsigned timer_call_dequeue_unlocked_async2; 454#endif 455mpqueue_head_t * 456timer_call_dequeue_unlocked( 457 timer_call_t call) 458{ 459 call_entry_t entry = TCE(call); 460 mpqueue_head_t *old_queue; 461 462 DBG("timer_call_dequeue_unlocked(%p)\n", call); 463 464 simple_lock(&call->lock); 465 old_queue = MPQUEUE(entry->queue); 466#if TIMER_ASSERT 467 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 468 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, 469 call, 470 call->async_dequeue, 471 TCE(call)->queue, 472 0, 0); 473#endif 474 if (old_queue != NULL) { 475 timer_queue_lock_spin(old_queue); 476 if (call->async_dequeue) { 477 /* collision (1c): timer already dequeued, clear flag */ 478#if TIMER_ASSERT 479 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 480 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, 481 call, 482 call->async_dequeue, 483 TCE(call)->queue, 484 0x1c, 0); 485 timer_call_dequeue_unlocked_async1++; 486#endif 487 call->async_dequeue = FALSE; 488 entry->queue = NULL; 489 } else { 490 timer_call_entry_dequeue(call); 491 } 492 if (old_queue == timer_longterm_queue) 493 timer_longterm_dequeued_locked(call); 494 timer_queue_unlock(old_queue); 495 } 496 simple_unlock(&call->lock); 497 return (old_queue); 498} 499 500 501/* 502 * Timer call entry locking model 503 * ============================== 504 * 505 * Timer call entries are linked on per-cpu timer queues which are protected 506 * by the queue lock and the call entry lock. The locking protocol is: 507 * 508 * 0) The canonical locking order is timer call entry followed by queue. 509 * 510 * 1) With only the entry lock held, entry.queue is valid: 511 * 1a) NULL: the entry is not queued, or 512 * 1b) non-NULL: this queue must be locked before the entry is modified. 513 * After locking the queue, the call.async_dequeue flag must be checked: 514 * 1c) TRUE: the entry was removed from the queue by another thread 515 * and we must NULL the entry.queue and reset this flag, or 516 * 1d) FALSE: (ie. queued), the entry can be manipulated. 517 * 518 * 2) If a queue lock is obtained first, the queue is stable: 519 * 2a) If a try-lock of a queued entry succeeds, the call can be operated on 520 * and dequeued. 521 * 2b) If a try-lock fails, it indicates that another thread is attempting 522 * to change the entry and move it to a different position in this queue 523 * or to different queue. The entry can be dequeued but it should not be 524 * operated upon since it is being changed. Furthermore, we don't null 525 * the entry.queue pointer (protected by the entry lock we don't own). 526 * Instead, we set the async_dequeue flag -- see (1c). 527 * 2c) Same as 2b but occurring when a longterm timer is matured. 528 * 3) A callout's parameters (deadline, flags, parameters, soft deadline &c.) 529 * should be manipulated with the appropriate timer queue lock held, 530 * to prevent queue traversal observations from observing inconsistent 531 * updates to an in-flight callout. 532 */ 533 534/* 535 * Inlines timer_call_entry_dequeue() and timer_call_entry_enqueue_deadline() 536 * cast between pointer types (mpqueue_head_t *) and (queue_t) so that 537 * we can use the call_entry_dequeue() and call_entry_enqueue_deadline() 538 * methods to operate on timer_call structs as if they are call_entry structs. 539 * These structures are identical except for their queue head pointer fields. 540 * 541 * In the debug case, we assert that the timer call locking protocol 542 * is being obeyed. 543 */ 544 545static boolean_t 546timer_call_enter_internal( 547 timer_call_t call, 548 timer_call_param_t param1, 549 uint64_t deadline, 550 uint64_t leeway, 551 uint32_t flags, 552 boolean_t ratelimited) 553{ 554 mpqueue_head_t *queue = NULL; 555 mpqueue_head_t *old_queue; 556 spl_t s; 557 uint64_t slop; 558 uint32_t urgency; 559 uint64_t sdeadline, ttd; 560 561 s = splclock(); 562 563 sdeadline = deadline; 564 uint64_t ctime = mach_absolute_time(); 565 566 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 567 DECR_TIMER_ENTER | DBG_FUNC_START, 568 call, 569 param1, deadline, flags, 0); 570 571 urgency = (flags & TIMER_CALL_URGENCY_MASK); 572 573 boolean_t slop_ratelimited = FALSE; 574 slop = timer_call_slop(deadline, ctime, urgency, current_thread(), &slop_ratelimited); 575 576 if ((flags & TIMER_CALL_LEEWAY) != 0 && leeway > slop) 577 slop = leeway; 578 579 if (UINT64_MAX - deadline <= slop) { 580 deadline = UINT64_MAX; 581 } else { 582 deadline += slop; 583 } 584 585 if (__improbable(deadline < ctime)) { 586 uint64_t delta = (ctime - deadline); 587 588 past_deadline_timers++; 589 past_deadline_deltas += delta; 590 if (delta > past_deadline_longest) 591 past_deadline_longest = deadline; 592 if (delta < past_deadline_shortest) 593 past_deadline_shortest = delta; 594 595 deadline = ctime + past_deadline_timer_adjustment; 596 sdeadline = deadline; 597 } 598 599 if (ratelimited || slop_ratelimited) { 600 flags |= TIMER_CALL_RATELIMITED; 601 } else { 602 flags &= ~TIMER_CALL_RATELIMITED; 603 } 604 605 ttd = sdeadline - ctime; 606#if CONFIG_DTRACE 607 DTRACE_TMR7(callout__create, timer_call_func_t, TCE(call)->func, 608 timer_call_param_t, TCE(call)->param0, uint32_t, flags, 609 (deadline - sdeadline), 610 (ttd >> 32), (unsigned) (ttd & 0xFFFFFFFF), call); 611#endif 612 613 /* Program timer callout parameters under the appropriate per-CPU or 614 * longterm queue lock. The callout may have been previously enqueued 615 * and in-flight on this or another timer queue. 616 */ 617 if (!ratelimited && !slop_ratelimited) { 618 queue = timer_longterm_enqueue_unlocked(call, ctime, deadline, &old_queue, sdeadline, ttd, param1, flags); 619 } 620 621 if (queue == NULL) { 622 queue = timer_queue_assign(deadline); 623 old_queue = timer_call_enqueue_deadline_unlocked(call, queue, deadline, sdeadline, ttd, param1, flags); 624 } 625 626#if TIMER_TRACE 627 TCE(call)->entry_time = ctime; 628#endif 629 630 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 631 DECR_TIMER_ENTER | DBG_FUNC_END, 632 call, 633 (old_queue != NULL), deadline, queue->count, 0); 634 635 splx(s); 636 637 return (old_queue != NULL); 638} 639 640/* 641 * timer_call_*() 642 * return boolean indicating whether the call was previously queued. 643 */ 644boolean_t 645timer_call_enter( 646 timer_call_t call, 647 uint64_t deadline, 648 uint32_t flags) 649{ 650 return timer_call_enter_internal(call, NULL, deadline, 0, flags, FALSE); 651} 652 653boolean_t 654timer_call_enter1( 655 timer_call_t call, 656 timer_call_param_t param1, 657 uint64_t deadline, 658 uint32_t flags) 659{ 660 return timer_call_enter_internal(call, param1, deadline, 0, flags, FALSE); 661} 662 663boolean_t 664timer_call_enter_with_leeway( 665 timer_call_t call, 666 timer_call_param_t param1, 667 uint64_t deadline, 668 uint64_t leeway, 669 uint32_t flags, 670 boolean_t ratelimited) 671{ 672 return timer_call_enter_internal(call, param1, deadline, leeway, flags, ratelimited); 673} 674 675boolean_t 676timer_call_cancel( 677 timer_call_t call) 678{ 679 mpqueue_head_t *old_queue; 680 spl_t s; 681 682 s = splclock(); 683 684 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 685 DECR_TIMER_CANCEL | DBG_FUNC_START, 686 call, 687 TCE(call)->deadline, call->soft_deadline, call->flags, 0); 688 689 old_queue = timer_call_dequeue_unlocked(call); 690 691 if (old_queue != NULL) { 692 timer_queue_lock_spin(old_queue); 693 if (!queue_empty(&old_queue->head)) { 694 timer_queue_cancel(old_queue, TCE(call)->deadline, CE(queue_first(&old_queue->head))->deadline); 695 timer_call_t thead = (timer_call_t)queue_first(&old_queue->head); 696 old_queue->earliest_soft_deadline = thead->flags & TIMER_CALL_RATELIMITED ? TCE(thead)->deadline : thead->soft_deadline; 697 } 698 else { 699 timer_queue_cancel(old_queue, TCE(call)->deadline, UINT64_MAX); 700 old_queue->earliest_soft_deadline = UINT64_MAX; 701 } 702 timer_queue_unlock(old_queue); 703 } 704 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 705 DECR_TIMER_CANCEL | DBG_FUNC_END, 706 call, 707 old_queue, 708 TCE(call)->deadline - mach_absolute_time(), 709 TCE(call)->deadline - TCE(call)->entry_time, 0); 710 splx(s); 711 712#if CONFIG_DTRACE 713 DTRACE_TMR6(callout__cancel, timer_call_func_t, TCE(call)->func, 714 timer_call_param_t, TCE(call)->param0, uint32_t, call->flags, 0, 715 (call->ttd >> 32), (unsigned) (call->ttd & 0xFFFFFFFF)); 716#endif 717 718 return (old_queue != NULL); 719} 720 721static uint32_t timer_queue_shutdown_lock_skips; 722static uint32_t timer_queue_shutdown_discarded; 723 724void 725timer_queue_shutdown( 726 mpqueue_head_t *queue) 727{ 728 timer_call_t call; 729 mpqueue_head_t *new_queue; 730 spl_t s; 731 732 733 DBG("timer_queue_shutdown(%p)\n", queue); 734 735 s = splclock(); 736 737 /* Note comma operator in while expression re-locking each iteration */ 738 while (timer_queue_lock_spin(queue), !queue_empty(&queue->head)) { 739 call = TIMER_CALL(queue_first(&queue->head)); 740 741 if (!simple_lock_try(&call->lock)) { 742 /* 743 * case (2b) lock order inversion, dequeue and skip 744 * Don't change the call_entry queue back-pointer 745 * but set the async_dequeue field. 746 */ 747 timer_queue_shutdown_lock_skips++; 748 timer_call_entry_dequeue_async(call); 749#if TIMER_ASSERT 750 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 751 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, 752 call, 753 call->async_dequeue, 754 TCE(call)->queue, 755 0x2b, 0); 756#endif 757 timer_queue_unlock(queue); 758 continue; 759 } 760 761 boolean_t call_local = ((call->flags & TIMER_CALL_LOCAL) != 0); 762 763 /* remove entry from old queue */ 764 timer_call_entry_dequeue(call); 765 timer_queue_unlock(queue); 766 767 if (call_local == FALSE) { 768 /* and queue it on new, discarding LOCAL timers */ 769 new_queue = timer_queue_assign(TCE(call)->deadline); 770 timer_queue_lock_spin(new_queue); 771 timer_call_entry_enqueue_deadline( 772 call, new_queue, TCE(call)->deadline); 773 timer_queue_unlock(new_queue); 774 } else { 775 timer_queue_shutdown_discarded++; 776 } 777 778 /* The only lingering LOCAL timer should be this thread's 779 * quantum expiration timer. 780 */ 781 assert((call_local == FALSE) || 782 (TCE(call)->func == thread_quantum_expire)); 783 784 simple_unlock(&call->lock); 785 } 786 787 timer_queue_unlock(queue); 788 splx(s); 789} 790 791static uint32_t timer_queue_expire_lock_skips; 792uint64_t 793timer_queue_expire_with_options( 794 mpqueue_head_t *queue, 795 uint64_t deadline, 796 boolean_t rescan) 797{ 798 timer_call_t call = NULL; 799 uint32_t tc_iterations = 0; 800 DBG("timer_queue_expire(%p,)\n", queue); 801 802 uint64_t cur_deadline = deadline; 803 timer_queue_lock_spin(queue); 804 805 while (!queue_empty(&queue->head)) { 806 /* Upon processing one or more timer calls, refresh the 807 * deadline to account for time elapsed in the callout 808 */ 809 if (++tc_iterations > 1) 810 cur_deadline = mach_absolute_time(); 811 812 if (call == NULL) 813 call = TIMER_CALL(queue_first(&queue->head)); 814 815 if (call->soft_deadline <= cur_deadline) { 816 timer_call_func_t func; 817 timer_call_param_t param0, param1; 818 819 TCOAL_DEBUG(0xDDDD0000, queue->earliest_soft_deadline, call->soft_deadline, 0, 0, 0); 820 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 821 DECR_TIMER_EXPIRE | DBG_FUNC_NONE, 822 call, 823 call->soft_deadline, 824 TCE(call)->deadline, 825 TCE(call)->entry_time, 0); 826 827 if ((call->flags & TIMER_CALL_RATELIMITED) && 828 (TCE(call)->deadline > cur_deadline)) { 829 if (rescan == FALSE) 830 break; 831 } 832 833 if (!simple_lock_try(&call->lock)) { 834 /* case (2b) lock inversion, dequeue and skip */ 835 timer_queue_expire_lock_skips++; 836 timer_call_entry_dequeue_async(call); 837 call = NULL; 838 continue; 839 } 840 841 timer_call_entry_dequeue(call); 842 843 func = TCE(call)->func; 844 param0 = TCE(call)->param0; 845 param1 = TCE(call)->param1; 846 847 simple_unlock(&call->lock); 848 timer_queue_unlock(queue); 849 850 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 851 DECR_TIMER_CALLOUT | DBG_FUNC_START, 852 call, VM_KERNEL_UNSLIDE(func), param0, param1, 0); 853 854#if CONFIG_DTRACE 855 DTRACE_TMR7(callout__start, timer_call_func_t, func, 856 timer_call_param_t, param0, unsigned, call->flags, 857 0, (call->ttd >> 32), 858 (unsigned) (call->ttd & 0xFFFFFFFF), call); 859#endif 860 /* Maintain time-to-deadline in per-processor data 861 * structure for thread wakeup deadline statistics. 862 */ 863 uint64_t *ttdp = &(PROCESSOR_DATA(current_processor(), timer_call_ttd)); 864 *ttdp = call->ttd; 865 (*func)(param0, param1); 866 *ttdp = 0; 867#if CONFIG_DTRACE 868 DTRACE_TMR4(callout__end, timer_call_func_t, func, 869 param0, param1, call); 870#endif 871 872 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 873 DECR_TIMER_CALLOUT | DBG_FUNC_END, 874 call, VM_KERNEL_UNSLIDE(func), param0, param1, 0); 875 call = NULL; 876 timer_queue_lock_spin(queue); 877 } else { 878 if (__probable(rescan == FALSE)) { 879 break; 880 } else { 881 int64_t skew = TCE(call)->deadline - call->soft_deadline; 882 assert(TCE(call)->deadline >= call->soft_deadline); 883 884 /* DRK: On a latency quality-of-service level change, 885 * re-sort potentially rate-limited timers. The platform 886 * layer determines which timers require 887 * this. In the absence of the per-callout 888 * synchronization requirement, a global resort could 889 * be more efficient. The re-sort effectively 890 * annuls all timer adjustments, i.e. the "soft 891 * deadline" is the sort key. 892 */ 893 894 if (timer_resort_threshold(skew)) { 895 if (__probable(simple_lock_try(&call->lock))) { 896 timer_call_entry_dequeue(call); 897 timer_call_entry_enqueue_deadline(call, queue, call->soft_deadline); 898 simple_unlock(&call->lock); 899 call = NULL; 900 } 901 } 902 if (call) { 903 call = TIMER_CALL(queue_next(qe(call))); 904 if (queue_end(&queue->head, qe(call))) 905 break; 906 } 907 } 908 } 909 } 910 911 if (!queue_empty(&queue->head)) { 912 call = TIMER_CALL(queue_first(&queue->head)); 913 cur_deadline = TCE(call)->deadline; 914 queue->earliest_soft_deadline = (call->flags & TIMER_CALL_RATELIMITED) ? TCE(call)->deadline: call->soft_deadline; 915 } else { 916 queue->earliest_soft_deadline = cur_deadline = UINT64_MAX; 917 } 918 919 timer_queue_unlock(queue); 920 921 return (cur_deadline); 922} 923 924uint64_t 925timer_queue_expire( 926 mpqueue_head_t *queue, 927 uint64_t deadline) 928{ 929 return timer_queue_expire_with_options(queue, deadline, FALSE); 930} 931 932extern int serverperfmode; 933static uint32_t timer_queue_migrate_lock_skips; 934/* 935 * timer_queue_migrate() is called by timer_queue_migrate_cpu() 936 * to move timer requests from the local processor (queue_from) 937 * to a target processor's (queue_to). 938 */ 939int 940timer_queue_migrate(mpqueue_head_t *queue_from, mpqueue_head_t *queue_to) 941{ 942 timer_call_t call; 943 timer_call_t head_to; 944 int timers_migrated = 0; 945 946 DBG("timer_queue_migrate(%p,%p)\n", queue_from, queue_to); 947 948 assert(!ml_get_interrupts_enabled()); 949 assert(queue_from != queue_to); 950 951 if (serverperfmode) { 952 /* 953 * if we're running a high end server 954 * avoid migrations... they add latency 955 * and don't save us power under typical 956 * server workloads 957 */ 958 return -4; 959 } 960 961 /* 962 * Take both local (from) and target (to) timer queue locks while 963 * moving the timers from the local queue to the target processor. 964 * We assume that the target is always the boot processor. 965 * But only move if all of the following is true: 966 * - the target queue is non-empty 967 * - the local queue is non-empty 968 * - the local queue's first deadline is later than the target's 969 * - the local queue contains no non-migrateable "local" call 970 * so that we need not have the target resync. 971 */ 972 973 timer_queue_lock_spin(queue_to); 974 975 head_to = TIMER_CALL(queue_first(&queue_to->head)); 976 if (queue_empty(&queue_to->head)) { 977 timers_migrated = -1; 978 goto abort1; 979 } 980 981 timer_queue_lock_spin(queue_from); 982 983 if (queue_empty(&queue_from->head)) { 984 timers_migrated = -2; 985 goto abort2; 986 } 987 988 call = TIMER_CALL(queue_first(&queue_from->head)); 989 if (TCE(call)->deadline < TCE(head_to)->deadline) { 990 timers_migrated = 0; 991 goto abort2; 992 } 993 994 /* perform scan for non-migratable timers */ 995 do { 996 if (call->flags & TIMER_CALL_LOCAL) { 997 timers_migrated = -3; 998 goto abort2; 999 } 1000 call = TIMER_CALL(queue_next(qe(call))); 1001 } while (!queue_end(&queue_from->head, qe(call))); 1002 1003 /* migration loop itself -- both queues are locked */ 1004 while (!queue_empty(&queue_from->head)) { 1005 call = TIMER_CALL(queue_first(&queue_from->head)); 1006 if (!simple_lock_try(&call->lock)) { 1007 /* case (2b) lock order inversion, dequeue only */ 1008#ifdef TIMER_ASSERT 1009 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 1010 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, 1011 call, 1012 TCE(call)->queue, 1013 call->lock.interlock.lock_data, 1014 0x2b, 0); 1015#endif 1016 timer_queue_migrate_lock_skips++; 1017 timer_call_entry_dequeue_async(call); 1018 continue; 1019 } 1020 timer_call_entry_dequeue(call); 1021 timer_call_entry_enqueue_deadline( 1022 call, queue_to, TCE(call)->deadline); 1023 timers_migrated++; 1024 simple_unlock(&call->lock); 1025 } 1026 queue_from->earliest_soft_deadline = UINT64_MAX; 1027abort2: 1028 timer_queue_unlock(queue_from); 1029abort1: 1030 timer_queue_unlock(queue_to); 1031 1032 return timers_migrated; 1033} 1034 1035void 1036timer_queue_trace_cpu(int ncpu) 1037{ 1038 timer_call_nosync_cpu( 1039 ncpu, 1040 (void(*)())timer_queue_trace, 1041 (void*) timer_queue_cpu(ncpu)); 1042} 1043 1044void 1045timer_queue_trace( 1046 mpqueue_head_t *queue) 1047{ 1048 timer_call_t call; 1049 spl_t s; 1050 1051 if (!kdebug_enable) 1052 return; 1053 1054 s = splclock(); 1055 timer_queue_lock_spin(queue); 1056 1057 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 1058 DECR_TIMER_QUEUE | DBG_FUNC_START, 1059 queue->count, mach_absolute_time(), 0, 0, 0); 1060 1061 if (!queue_empty(&queue->head)) { 1062 call = TIMER_CALL(queue_first(&queue->head)); 1063 do { 1064 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 1065 DECR_TIMER_QUEUE | DBG_FUNC_NONE, 1066 call->soft_deadline, 1067 TCE(call)->deadline, 1068 TCE(call)->entry_time, 1069 TCE(call)->func, 1070 0); 1071 call = TIMER_CALL(queue_next(qe(call))); 1072 } while (!queue_end(&queue->head, qe(call))); 1073 } 1074 1075 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 1076 DECR_TIMER_QUEUE | DBG_FUNC_END, 1077 queue->count, mach_absolute_time(), 0, 0, 0); 1078 1079 timer_queue_unlock(queue); 1080 splx(s); 1081} 1082 1083void 1084timer_longterm_dequeued_locked(timer_call_t call) 1085{ 1086 timer_longterm_t *tlp = &timer_longterm; 1087 1088 tlp->dequeues++; 1089 if (call == tlp->threshold.call) 1090 tlp->threshold.call = NULL; 1091} 1092 1093/* 1094 * Place a timer call in the longterm list 1095 * and adjust the next timer callout deadline if the new timer is first. 1096 */ 1097mpqueue_head_t * 1098timer_longterm_enqueue_unlocked(timer_call_t call, 1099 uint64_t now, 1100 uint64_t deadline, 1101 mpqueue_head_t **old_queue, 1102 uint64_t soft_deadline, 1103 uint64_t ttd, 1104 timer_call_param_t param1, 1105 uint32_t callout_flags) 1106{ 1107 timer_longterm_t *tlp = &timer_longterm; 1108 boolean_t update_required = FALSE; 1109 uint64_t longterm_threshold; 1110 1111 longterm_threshold = now + tlp->threshold.interval; 1112 1113 /* 1114 * Return NULL without doing anything if: 1115 * - this timer is local, or 1116 * - the longterm mechanism is disabled, or 1117 * - this deadline is too short. 1118 */ 1119 if ((callout_flags & TIMER_CALL_LOCAL) != 0 || 1120 (tlp->threshold.interval == TIMER_LONGTERM_NONE) || 1121 (deadline <= longterm_threshold)) 1122 return NULL; 1123 1124 /* 1125 * Remove timer from its current queue, if any. 1126 */ 1127 *old_queue = timer_call_dequeue_unlocked(call); 1128 1129 /* 1130 * Lock the longterm queue, queue timer and determine 1131 * whether an update is necessary. 1132 */ 1133 assert(!ml_get_interrupts_enabled()); 1134 simple_lock(&call->lock); 1135 timer_queue_lock_spin(timer_longterm_queue); 1136 TCE(call)->deadline = deadline; 1137 TCE(call)->param1 = param1; 1138 call->ttd = ttd; 1139 call->soft_deadline = soft_deadline; 1140 call->flags = callout_flags; 1141 timer_call_entry_enqueue_tail(call, timer_longterm_queue); 1142 1143 tlp->enqueues++; 1144 1145 /* 1146 * We'll need to update the currently set threshold timer 1147 * if the new deadline is sooner and no sooner update is in flight. 1148 */ 1149 if (deadline < tlp->threshold.deadline && 1150 deadline < tlp->threshold.preempted) { 1151 tlp->threshold.preempted = deadline; 1152 tlp->threshold.call = call; 1153 update_required = TRUE; 1154 } 1155 timer_queue_unlock(timer_longterm_queue); 1156 simple_unlock(&call->lock); 1157 1158 if (update_required) { 1159 /* 1160 * Note: this call expects that calling the master cpu 1161 * alone does not involve locking the topo lock. 1162 */ 1163 timer_call_nosync_cpu( 1164 master_cpu, 1165 (void (*)(void *)) timer_longterm_update, 1166 (void *)tlp); 1167 } 1168 1169 return timer_longterm_queue; 1170} 1171 1172/* 1173 * Scan for timers below the longterm threshold. 1174 * Move these to the local timer queue (of the boot processor on which the 1175 * calling thread is running). 1176 * Both the local (boot) queue and the longterm queue are locked. 1177 * The scan is similar to the timer migrate sequence but is performed by 1178 * successively examining each timer on the longterm queue: 1179 * - if within the short-term threshold 1180 * - enter on the local queue (unless being deleted), 1181 * - otherwise: 1182 * - if sooner, deadline becomes the next threshold deadline. 1183 */ 1184void 1185timer_longterm_scan(timer_longterm_t *tlp, 1186 uint64_t now) 1187{ 1188 queue_entry_t qe; 1189 timer_call_t call; 1190 uint64_t threshold; 1191 uint64_t deadline; 1192 mpqueue_head_t *timer_master_queue; 1193 1194 assert(!ml_get_interrupts_enabled()); 1195 assert(cpu_number() == master_cpu); 1196 1197 if (tlp->threshold.interval != TIMER_LONGTERM_NONE) 1198 threshold = now + tlp->threshold.interval; 1199 else 1200 threshold = TIMER_LONGTERM_NONE; 1201 1202 tlp->threshold.deadline = TIMER_LONGTERM_NONE; 1203 tlp->threshold.call = NULL; 1204 1205 if (queue_empty(&timer_longterm_queue->head)) 1206 return; 1207 1208 timer_master_queue = timer_queue_cpu(master_cpu); 1209 timer_queue_lock_spin(timer_master_queue); 1210 1211 qe = queue_first(&timer_longterm_queue->head); 1212 while (!queue_end(&timer_longterm_queue->head, qe)) { 1213 call = TIMER_CALL(qe); 1214 deadline = call->soft_deadline; 1215 qe = queue_next(qe); 1216 if (!simple_lock_try(&call->lock)) { 1217 /* case (2c) lock order inversion, dequeue only */ 1218#ifdef TIMER_ASSERT 1219 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 1220 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, 1221 call, 1222 TCE(call)->queue, 1223 call->lock.interlock.lock_data, 1224 0x2c, 0); 1225#endif 1226 timer_call_entry_dequeue_async(call); 1227 continue; 1228 } 1229 if (deadline < threshold) { 1230 /* 1231 * This timer needs moving (escalating) 1232 * to the local (boot) processor's queue. 1233 */ 1234#ifdef TIMER_ASSERT 1235 if (deadline < now) 1236 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 1237 DECR_TIMER_OVERDUE | DBG_FUNC_NONE, 1238 call, 1239 deadline, 1240 now, 1241 threshold, 1242 0); 1243#endif 1244 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 1245 DECR_TIMER_ESCALATE | DBG_FUNC_NONE, 1246 call, 1247 TCE(call)->deadline, 1248 TCE(call)->entry_time, 1249 TCE(call)->func, 1250 0); 1251 tlp->escalates++; 1252 timer_call_entry_dequeue(call); 1253 timer_call_entry_enqueue_deadline( 1254 call, timer_master_queue, TCE(call)->deadline); 1255 /* 1256 * A side-effect of the following call is to update 1257 * the actual hardware deadline if required. 1258 */ 1259 (void) timer_queue_assign(deadline); 1260 } else { 1261 if (deadline < tlp->threshold.deadline) { 1262 tlp->threshold.deadline = deadline; 1263 tlp->threshold.call = call; 1264 } 1265 } 1266 simple_unlock(&call->lock); 1267 } 1268 1269 timer_queue_unlock(timer_master_queue); 1270} 1271 1272void 1273timer_longterm_callout(timer_call_param_t p0, __unused timer_call_param_t p1) 1274{ 1275 timer_longterm_t *tlp = (timer_longterm_t *) p0; 1276 1277 timer_longterm_update(tlp); 1278} 1279 1280void 1281timer_longterm_update_locked(timer_longterm_t *tlp) 1282{ 1283 uint64_t latency; 1284 1285 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 1286 DECR_TIMER_UPDATE | DBG_FUNC_START, 1287 &tlp->queue, 1288 tlp->threshold.deadline, 1289 tlp->threshold.preempted, 1290 tlp->queue.count, 0); 1291 1292 tlp->scan_time = mach_absolute_time(); 1293 if (tlp->threshold.preempted != TIMER_LONGTERM_NONE) { 1294 tlp->threshold.preempts++; 1295 tlp->threshold.deadline = tlp->threshold.preempted; 1296 tlp->threshold.preempted = TIMER_LONGTERM_NONE; 1297 /* 1298 * Note: in the unlikely event that a pre-empted timer has 1299 * itself been cancelled, we'll simply re-scan later at the 1300 * time of the preempted/cancelled timer. 1301 */ 1302 } else { 1303 tlp->threshold.scans++; 1304 1305 /* 1306 * Maintain a moving average of our wakeup latency. 1307 * Clamp latency to 0 and ignore above threshold interval. 1308 */ 1309 if (tlp->scan_time > tlp->threshold.deadline_set) 1310 latency = tlp->scan_time - tlp->threshold.deadline_set; 1311 else 1312 latency = 0; 1313 if (latency < tlp->threshold.interval) { 1314 tlp->threshold.latency_min = 1315 MIN(tlp->threshold.latency_min, latency); 1316 tlp->threshold.latency_max = 1317 MAX(tlp->threshold.latency_max, latency); 1318 tlp->threshold.latency = 1319 (tlp->threshold.latency*99 + latency) / 100; 1320 } 1321 1322 timer_longterm_scan(tlp, tlp->scan_time); 1323 } 1324 1325 tlp->threshold.deadline_set = tlp->threshold.deadline; 1326 /* The next deadline timer to be set is adjusted */ 1327 if (tlp->threshold.deadline != TIMER_LONGTERM_NONE) { 1328 tlp->threshold.deadline_set -= tlp->threshold.margin; 1329 tlp->threshold.deadline_set -= tlp->threshold.latency; 1330 } 1331 1332 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 1333 DECR_TIMER_UPDATE | DBG_FUNC_END, 1334 &tlp->queue, 1335 tlp->threshold.deadline, 1336 tlp->threshold.scans, 1337 tlp->queue.count, 0); 1338} 1339 1340void 1341timer_longterm_update(timer_longterm_t *tlp) 1342{ 1343 spl_t s = splclock(); 1344 1345 timer_queue_lock_spin(timer_longterm_queue); 1346 1347 if (cpu_number() != master_cpu) 1348 panic("timer_longterm_update_master() on non-boot cpu"); 1349 1350 timer_longterm_update_locked(tlp); 1351 1352 if (tlp->threshold.deadline != TIMER_LONGTERM_NONE) 1353 timer_call_enter( 1354 &tlp->threshold.timer, 1355 tlp->threshold.deadline_set, 1356 TIMER_CALL_LOCAL | TIMER_CALL_SYS_CRITICAL); 1357 1358 timer_queue_unlock(timer_longterm_queue); 1359 splx(s); 1360} 1361 1362void 1363timer_longterm_init(void) 1364{ 1365 uint32_t longterm; 1366 timer_longterm_t *tlp = &timer_longterm; 1367 1368 DBG("timer_longterm_init() tlp: %p, queue: %p\n", tlp, &tlp->queue); 1369 1370 /* 1371 * Set the longterm timer threshold. Defaults to TIMER_LONGTERM_THRESHOLD 1372 * or TIMER_LONGTERM_NONE (disabled) for server; 1373 * overridden longterm boot-arg 1374 */ 1375 tlp->threshold.interval = serverperfmode ? TIMER_LONGTERM_NONE 1376 : TIMER_LONGTERM_THRESHOLD; 1377 if (PE_parse_boot_argn("longterm", &longterm, sizeof (longterm))) { 1378 tlp->threshold.interval = (longterm == 0) ? 1379 TIMER_LONGTERM_NONE : 1380 longterm * NSEC_PER_MSEC; 1381 } 1382 if (tlp->threshold.interval != TIMER_LONGTERM_NONE) { 1383 printf("Longterm timer threshold: %llu ms\n", 1384 tlp->threshold.interval / NSEC_PER_MSEC); 1385 kprintf("Longterm timer threshold: %llu ms\n", 1386 tlp->threshold.interval / NSEC_PER_MSEC); 1387 nanoseconds_to_absolutetime(tlp->threshold.interval, 1388 &tlp->threshold.interval); 1389 tlp->threshold.margin = tlp->threshold.interval / 10; 1390 tlp->threshold.latency_min = EndOfAllTime; 1391 tlp->threshold.latency_max = 0; 1392 } 1393 1394 tlp->threshold.preempted = TIMER_LONGTERM_NONE; 1395 tlp->threshold.deadline = TIMER_LONGTERM_NONE; 1396 1397 lck_attr_setdefault(&timer_longterm_lck_attr); 1398 lck_grp_attr_setdefault(&timer_longterm_lck_grp_attr); 1399 lck_grp_init(&timer_longterm_lck_grp, 1400 "timer_longterm", &timer_longterm_lck_grp_attr); 1401 mpqueue_init(&tlp->queue, 1402 &timer_longterm_lck_grp, &timer_longterm_lck_attr); 1403 1404 timer_call_setup(&tlp->threshold.timer, 1405 timer_longterm_callout, (timer_call_param_t) tlp); 1406 1407 timer_longterm_queue = &tlp->queue; 1408} 1409 1410enum { 1411 THRESHOLD, QCOUNT, 1412 ENQUEUES, DEQUEUES, ESCALATES, SCANS, PREEMPTS, 1413 LATENCY, LATENCY_MIN, LATENCY_MAX 1414}; 1415uint64_t 1416timer_sysctl_get(int oid) 1417{ 1418 timer_longterm_t *tlp = &timer_longterm; 1419 1420 switch (oid) { 1421 case THRESHOLD: 1422 return (tlp->threshold.interval == TIMER_LONGTERM_NONE) ? 1423 0 : tlp->threshold.interval / NSEC_PER_MSEC; 1424 case QCOUNT: 1425 return tlp->queue.count; 1426 case ENQUEUES: 1427 return tlp->enqueues; 1428 case DEQUEUES: 1429 return tlp->dequeues; 1430 case ESCALATES: 1431 return tlp->escalates; 1432 case SCANS: 1433 return tlp->threshold.scans; 1434 case PREEMPTS: 1435 return tlp->threshold.preempts; 1436 case LATENCY: 1437 return tlp->threshold.latency; 1438 case LATENCY_MIN: 1439 return tlp->threshold.latency_min; 1440 case LATENCY_MAX: 1441 return tlp->threshold.latency_max; 1442 default: 1443 return 0; 1444 } 1445} 1446 1447/* 1448 * timer_master_scan() is the inverse of timer_longterm_scan() 1449 * since it un-escalates timers to the longterm queue. 1450 */ 1451static void 1452timer_master_scan(timer_longterm_t *tlp, 1453 uint64_t now) 1454{ 1455 queue_entry_t qe; 1456 timer_call_t call; 1457 uint64_t threshold; 1458 uint64_t deadline; 1459 mpqueue_head_t *timer_master_queue; 1460 1461 if (tlp->threshold.interval != TIMER_LONGTERM_NONE) 1462 threshold = now + tlp->threshold.interval; 1463 else 1464 threshold = TIMER_LONGTERM_NONE; 1465 1466 timer_master_queue = timer_queue_cpu(master_cpu); 1467 timer_queue_lock_spin(timer_master_queue); 1468 1469 qe = queue_first(&timer_master_queue->head); 1470 while (!queue_end(&timer_master_queue->head, qe)) { 1471 call = TIMER_CALL(qe); 1472 deadline = TCE(call)->deadline; 1473 qe = queue_next(qe); 1474 if ((call->flags & TIMER_CALL_LOCAL) != 0) 1475 continue; 1476 if (!simple_lock_try(&call->lock)) { 1477 /* case (2c) lock order inversion, dequeue only */ 1478 timer_call_entry_dequeue_async(call); 1479 continue; 1480 } 1481 if (deadline > threshold) { 1482 /* move from master to longterm */ 1483 timer_call_entry_dequeue(call); 1484 timer_call_entry_enqueue_tail(call, timer_longterm_queue); 1485 if (deadline < tlp->threshold.deadline) { 1486 tlp->threshold.deadline = deadline; 1487 tlp->threshold.call = call; 1488 } 1489 } 1490 simple_unlock(&call->lock); 1491 } 1492 timer_queue_unlock(timer_master_queue); 1493} 1494 1495static void 1496timer_sysctl_set_threshold(uint64_t value) 1497{ 1498 timer_longterm_t *tlp = &timer_longterm; 1499 spl_t s = splclock(); 1500 boolean_t threshold_increase; 1501 1502 timer_queue_lock_spin(timer_longterm_queue); 1503 1504 timer_call_cancel(&tlp->threshold.timer); 1505 1506 /* 1507 * Set the new threshold and note whther it's increasing. 1508 */ 1509 if (value == 0) { 1510 tlp->threshold.interval = TIMER_LONGTERM_NONE; 1511 threshold_increase = TRUE; 1512 timer_call_cancel(&tlp->threshold.timer); 1513 } else { 1514 uint64_t old_interval = tlp->threshold.interval; 1515 tlp->threshold.interval = value * NSEC_PER_MSEC; 1516 nanoseconds_to_absolutetime(tlp->threshold.interval, 1517 &tlp->threshold.interval); 1518 tlp->threshold.margin = tlp->threshold.interval / 10; 1519 if (old_interval == TIMER_LONGTERM_NONE) 1520 threshold_increase = FALSE; 1521 else 1522 threshold_increase = (tlp->threshold.interval > old_interval); 1523 } 1524 1525 if (threshold_increase /* or removal */) { 1526 /* Escalate timers from the longterm queue */ 1527 timer_longterm_scan(tlp, mach_absolute_time()); 1528 } else /* decrease or addition */ { 1529 /* 1530 * We scan the local/master queue for timers now longterm. 1531 * To be strictly correct, we should scan all processor queues 1532 * but timer migration results in most timers gravitating to the 1533 * master processor in any case. 1534 */ 1535 timer_master_scan(tlp, mach_absolute_time()); 1536 } 1537 1538 /* Set new timer accordingly */ 1539 tlp->threshold.deadline_set = tlp->threshold.deadline; 1540 if (tlp->threshold.deadline != TIMER_LONGTERM_NONE) { 1541 tlp->threshold.deadline_set -= tlp->threshold.margin; 1542 tlp->threshold.deadline_set -= tlp->threshold.latency; 1543 timer_call_enter( 1544 &tlp->threshold.timer, 1545 tlp->threshold.deadline_set, 1546 TIMER_CALL_LOCAL | TIMER_CALL_SYS_CRITICAL); 1547 } 1548 1549 /* Reset stats */ 1550 tlp->enqueues = 0; 1551 tlp->dequeues = 0; 1552 tlp->escalates = 0; 1553 tlp->threshold.scans = 0; 1554 tlp->threshold.preempts = 0; 1555 tlp->threshold.latency = 0; 1556 tlp->threshold.latency_min = EndOfAllTime; 1557 tlp->threshold.latency_max = 0; 1558 1559 timer_queue_unlock(timer_longterm_queue); 1560 splx(s); 1561} 1562 1563int 1564timer_sysctl_set(int oid, uint64_t value) 1565{ 1566 switch (oid) { 1567 case THRESHOLD: 1568 timer_call_cpu( 1569 master_cpu, 1570 (void (*)(void *)) timer_sysctl_set_threshold, 1571 (void *) value); 1572 return KERN_SUCCESS; 1573 default: 1574 return KERN_INVALID_ARGUMENT; 1575 } 1576} 1577 1578 1579/* Select timer coalescing window based on per-task quality-of-service hints */ 1580static boolean_t tcoal_qos_adjust(thread_t t, int32_t *tshift, uint64_t *tmax_abstime, boolean_t *pratelimited) { 1581 uint32_t latency_qos; 1582 boolean_t adjusted = FALSE; 1583 task_t ctask = t->task; 1584 1585 if (ctask) { 1586 latency_qos = proc_get_effective_thread_policy(t, TASK_POLICY_LATENCY_QOS); 1587 1588 assert(latency_qos <= NUM_LATENCY_QOS_TIERS); 1589 1590 if (latency_qos) { 1591 *tshift = tcoal_prio_params.latency_qos_scale[latency_qos - 1]; 1592 *tmax_abstime = tcoal_prio_params.latency_qos_abstime_max[latency_qos - 1]; 1593 *pratelimited = tcoal_prio_params.latency_tier_rate_limited[latency_qos - 1]; 1594 adjusted = TRUE; 1595 } 1596 } 1597 return adjusted; 1598} 1599 1600 1601/* Adjust timer deadlines based on priority of the thread and the 1602 * urgency value provided at timeout establishment. With this mechanism, 1603 * timers are no longer necessarily sorted in order of soft deadline 1604 * on a given timer queue, i.e. they may be differentially skewed. 1605 * In the current scheme, this could lead to fewer pending timers 1606 * processed than is technically possible when the HW deadline arrives. 1607 */ 1608static void 1609timer_compute_leeway(thread_t cthread, int32_t urgency, int32_t *tshift, uint64_t *tmax_abstime, boolean_t *pratelimited) { 1610 int16_t tpri = cthread->sched_pri; 1611 if ((urgency & TIMER_CALL_USER_MASK) != 0) { 1612 if (tpri >= BASEPRI_RTQUEUES || 1613 urgency == TIMER_CALL_USER_CRITICAL) { 1614 *tshift = tcoal_prio_params.timer_coalesce_rt_shift; 1615 *tmax_abstime = tcoal_prio_params.timer_coalesce_rt_abstime_max; 1616 TCOAL_PRIO_STAT(rt_tcl); 1617 } else if (proc_get_effective_thread_policy(cthread, TASK_POLICY_DARWIN_BG) || 1618 (urgency == TIMER_CALL_USER_BACKGROUND)) { 1619 /* Determine if timer should be subjected to a lower QoS */ 1620 if (tcoal_qos_adjust(cthread, tshift, tmax_abstime, pratelimited)) { 1621 if (*tmax_abstime > tcoal_prio_params.timer_coalesce_bg_abstime_max) { 1622 return; 1623 } else { 1624 *pratelimited = FALSE; 1625 } 1626 } 1627 *tshift = tcoal_prio_params.timer_coalesce_bg_shift; 1628 *tmax_abstime = tcoal_prio_params.timer_coalesce_bg_abstime_max; 1629 TCOAL_PRIO_STAT(bg_tcl); 1630 } else if (tpri >= MINPRI_KERNEL) { 1631 *tshift = tcoal_prio_params.timer_coalesce_kt_shift; 1632 *tmax_abstime = tcoal_prio_params.timer_coalesce_kt_abstime_max; 1633 TCOAL_PRIO_STAT(kt_tcl); 1634 } else if (cthread->sched_mode == TH_MODE_FIXED) { 1635 *tshift = tcoal_prio_params.timer_coalesce_fp_shift; 1636 *tmax_abstime = tcoal_prio_params.timer_coalesce_fp_abstime_max; 1637 TCOAL_PRIO_STAT(fp_tcl); 1638 } else if (tcoal_qos_adjust(cthread, tshift, tmax_abstime, pratelimited)) { 1639 TCOAL_PRIO_STAT(qos_tcl); 1640 } else if (cthread->sched_mode == TH_MODE_TIMESHARE) { 1641 *tshift = tcoal_prio_params.timer_coalesce_ts_shift; 1642 *tmax_abstime = tcoal_prio_params.timer_coalesce_ts_abstime_max; 1643 TCOAL_PRIO_STAT(ts_tcl); 1644 } else { 1645 TCOAL_PRIO_STAT(nc_tcl); 1646 } 1647 } else if (urgency == TIMER_CALL_SYS_BACKGROUND) { 1648 *tshift = tcoal_prio_params.timer_coalesce_bg_shift; 1649 *tmax_abstime = tcoal_prio_params.timer_coalesce_bg_abstime_max; 1650 TCOAL_PRIO_STAT(bg_tcl); 1651 } else { 1652 *tshift = tcoal_prio_params.timer_coalesce_kt_shift; 1653 *tmax_abstime = tcoal_prio_params.timer_coalesce_kt_abstime_max; 1654 TCOAL_PRIO_STAT(kt_tcl); 1655 } 1656} 1657 1658 1659int timer_user_idle_level; 1660 1661uint64_t 1662timer_call_slop(uint64_t deadline, uint64_t now, uint32_t flags, thread_t cthread, boolean_t *pratelimited) 1663{ 1664 int32_t tcs_shift = 0; 1665 uint64_t tcs_max_abstime = 0; 1666 uint64_t adjval; 1667 uint32_t urgency = (flags & TIMER_CALL_URGENCY_MASK); 1668 1669 if (mach_timer_coalescing_enabled && 1670 (deadline > now) && (urgency != TIMER_CALL_SYS_CRITICAL)) { 1671 timer_compute_leeway(cthread, urgency, &tcs_shift, &tcs_max_abstime, pratelimited); 1672 1673 if (tcs_shift >= 0) 1674 adjval = MIN((deadline - now) >> tcs_shift, tcs_max_abstime); 1675 else 1676 adjval = MIN((deadline - now) << (-tcs_shift), tcs_max_abstime); 1677 /* Apply adjustments derived from "user idle level" heuristic */ 1678 adjval += (adjval * timer_user_idle_level) >> 7; 1679 return adjval; 1680 } else { 1681 return 0; 1682 } 1683} 1684 1685int 1686timer_get_user_idle_level(void) { 1687 return timer_user_idle_level; 1688} 1689 1690kern_return_t timer_set_user_idle_level(int ilevel) { 1691 boolean_t do_reeval = FALSE; 1692 1693 if ((ilevel < 0) || (ilevel > 128)) 1694 return KERN_INVALID_ARGUMENT; 1695 1696 if (ilevel < timer_user_idle_level) { 1697 do_reeval = TRUE; 1698 } 1699 1700 timer_user_idle_level = ilevel; 1701 1702 if (do_reeval) 1703 ml_timer_evaluate(); 1704 1705 return KERN_SUCCESS; 1706} 1707