1/* 2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_COPYRIGHT@ 30 */ 31/* 32 * @APPLE_FREE_COPYRIGHT@ 33 */ 34/* 35 * File: timer.c 36 * Purpose: Routines for handling the machine independent timer. 37 */ 38 39#include <mach/mach_types.h> 40 41#include <kern/timer_queue.h> 42#include <kern/timer_call.h> 43#include <kern/clock.h> 44#include <kern/thread.h> 45#include <kern/processor.h> 46#include <kern/macro_help.h> 47#include <kern/spl.h> 48#include <kern/timer_queue.h> 49#include <kern/pms.h> 50 51#include <machine/commpage.h> 52#include <machine/machine_routines.h> 53 54#include <sys/kdebug.h> 55#include <i386/cpu_data.h> 56#include <i386/cpu_topology.h> 57#include <i386/cpu_threads.h> 58 59uint32_t spurious_timers; 60 61/* 62 * Event timer interrupt. 63 * 64 * XXX a drawback of this implementation is that events serviced earlier must not set deadlines 65 * that occur before the entire chain completes. 66 * 67 * XXX a better implementation would use a set of generic callouts and iterate over them 68 */ 69void 70timer_intr(int user_mode, 71 uint64_t rip) 72{ 73 uint64_t abstime; 74 rtclock_timer_t *mytimer; 75 cpu_data_t *pp; 76 int64_t latency; 77 uint64_t pmdeadline; 78 boolean_t timer_processed = FALSE; 79 80 pp = current_cpu_datap(); 81 82 SCHED_STATS_TIMER_POP(current_processor()); 83 84 abstime = mach_absolute_time(); /* Get the time now */ 85 86 /* has a pending clock timer expired? */ 87 mytimer = &pp->rtclock_timer; /* Point to the event timer */ 88 89 if ((timer_processed = ((mytimer->deadline <= abstime) || 90 (abstime >= (mytimer->queue.earliest_soft_deadline))))) { 91 /* 92 * Log interrupt service latency (-ve value expected by tool) 93 * a non-PM event is expected next. 94 * The requested deadline may be earlier than when it was set 95 * - use MAX to avoid reporting bogus latencies. 96 */ 97 latency = (int64_t) (abstime - MAX(mytimer->deadline, 98 mytimer->when_set)); 99 /* Log zero timer latencies when opportunistically processing 100 * coalesced timers. 101 */ 102 if (latency < 0) { 103 TCOAL_DEBUG(0xEEEE0000, abstime, mytimer->queue.earliest_soft_deadline, abstime - mytimer->queue.earliest_soft_deadline, 0, 0); 104 latency = 0; 105 } 106 107 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 108 DECR_TRAP_LATENCY | DBG_FUNC_NONE, 109 -latency, 110 ((user_mode != 0) ? rip : VM_KERNEL_UNSLIDE(rip)), 111 user_mode, 0, 0); 112 113 mytimer->has_expired = TRUE; /* Remember that we popped */ 114 mytimer->deadline = timer_queue_expire(&mytimer->queue, abstime); 115 mytimer->has_expired = FALSE; 116 117 /* Get the time again since we ran a bit */ 118 abstime = mach_absolute_time(); 119 mytimer->when_set = abstime; 120 } 121 122 /* is it time for power management state change? */ 123 if ((pmdeadline = pmCPUGetDeadline(pp)) && (pmdeadline <= abstime)) { 124 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 125 DECR_PM_DEADLINE | DBG_FUNC_START, 126 0, 0, 0, 0, 0); 127 pmCPUDeadline(pp); 128 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 129 DECR_PM_DEADLINE | DBG_FUNC_END, 130 0, 0, 0, 0, 0); 131 timer_processed = TRUE; 132 } 133 134 /* schedule our next deadline */ 135 x86_lcpu()->rtcDeadline = EndOfAllTime; 136 timer_resync_deadlines(); 137 138 if (__improbable(timer_processed == FALSE)) 139 spurious_timers++; 140} 141 142/* 143 * Set the clock deadline. 144 */ 145void timer_set_deadline(uint64_t deadline) 146{ 147 rtclock_timer_t *mytimer; 148 spl_t s; 149 cpu_data_t *pp; 150 151 s = splclock(); /* no interruptions */ 152 pp = current_cpu_datap(); 153 154 mytimer = &pp->rtclock_timer; /* Point to the timer itself */ 155 mytimer->deadline = deadline; /* Set new expiration time */ 156 mytimer->when_set = mach_absolute_time(); 157 158 timer_resync_deadlines(); 159 160 splx(s); 161} 162 163/* 164 * Re-evaluate the outstanding deadlines and select the most proximate. 165 * 166 * Should be called at splclock. 167 */ 168void 169timer_resync_deadlines(void) 170{ 171 uint64_t deadline = EndOfAllTime; 172 uint64_t pmdeadline; 173 rtclock_timer_t *mytimer; 174 spl_t s = splclock(); 175 cpu_data_t *pp; 176 uint32_t decr; 177 178 pp = current_cpu_datap(); 179 if (!pp->cpu_running) 180 /* There's really nothing to do if this processor is down */ 181 return; 182 183 /* 184 * If we have a clock timer set, pick that. 185 */ 186 mytimer = &pp->rtclock_timer; 187 if (!mytimer->has_expired && 188 0 < mytimer->deadline && mytimer->deadline < EndOfAllTime) 189 deadline = mytimer->deadline; 190 191 /* 192 * If we have a power management deadline, see if that's earlier. 193 */ 194 pmdeadline = pmCPUGetDeadline(pp); 195 if (0 < pmdeadline && pmdeadline < deadline) 196 deadline = pmdeadline; 197 198 /* 199 * Go and set the "pop" event. 200 */ 201 decr = (uint32_t) setPop(deadline); 202 203 /* Record non-PM deadline for latency tool */ 204 if (decr != 0 && deadline != pmdeadline) { 205 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 206 DECR_SET_DEADLINE | DBG_FUNC_NONE, 207 decr, 2, 208 deadline, 209 mytimer->queue.count, 0); 210 } 211 splx(s); 212} 213 214void 215timer_queue_expire_local( 216__unused void *arg) 217{ 218 rtclock_timer_t *mytimer; 219 uint64_t abstime; 220 cpu_data_t *pp; 221 222 pp = current_cpu_datap(); 223 224 mytimer = &pp->rtclock_timer; 225 abstime = mach_absolute_time(); 226 227 mytimer->has_expired = TRUE; 228 mytimer->deadline = timer_queue_expire(&mytimer->queue, abstime); 229 mytimer->has_expired = FALSE; 230 mytimer->when_set = mach_absolute_time(); 231 232 timer_resync_deadlines(); 233} 234 235void 236timer_queue_expire_rescan( 237__unused void *arg) 238{ 239 rtclock_timer_t *mytimer; 240 uint64_t abstime; 241 cpu_data_t *pp; 242 243 assert(ml_get_interrupts_enabled() == FALSE); 244 pp = current_cpu_datap(); 245 246 mytimer = &pp->rtclock_timer; 247 abstime = mach_absolute_time(); 248 249 mytimer->has_expired = TRUE; 250 mytimer->deadline = timer_queue_expire_with_options(&mytimer->queue, abstime, TRUE); 251 mytimer->has_expired = FALSE; 252 mytimer->when_set = mach_absolute_time(); 253 254 timer_resync_deadlines(); 255} 256 257/* N.B.: Max leeway values assume 1GHz timebase */ 258timer_coalescing_priority_params_t tcoal_prio_params = 259{ 260 /* Deadline scale values for each thread attribute */ 261 0, -5, 3, 3, 3, 262 /* Maximum leeway in abstime for each thread attribute */ 263 0ULL, 100*NSEC_PER_MSEC, NSEC_PER_MSEC, NSEC_PER_MSEC, NSEC_PER_MSEC, 264 /* Deadline scale values for each latency QoS tier */ 265 {3, 2, 1, -2, -15, -15}, 266 /* Maximum leeway in abstime for each latency QoS Tier*/ 267 {1*NSEC_PER_MSEC, 5*NSEC_PER_MSEC, 20*NSEC_PER_MSEC, 75*NSEC_PER_MSEC, 268 10*NSEC_PER_SEC, 10*NSEC_PER_SEC}, 269 /* Signifies that the tier requires rate-limiting */ 270 {FALSE, FALSE, FALSE, FALSE, TRUE, TRUE} 271}; 272#define TIMER_RESORT_THRESHOLD_ABSTIME (50 * NSEC_PER_MSEC) 273 274#if TCOAL_PRIO_STATS 275int32_t nc_tcl, rt_tcl, bg_tcl, kt_tcl, fp_tcl, ts_tcl, qos_tcl; 276#define TCOAL_PRIO_STAT(x) (x++) 277#else 278#define TCOAL_PRIO_STAT(x) 279#endif 280 281/* Select timer coalescing window based on per-task quality-of-service hints */ 282static boolean_t tcoal_qos_adjust(thread_t t, int32_t *tshift, uint64_t *tmax, boolean_t *pratelimited) { 283 uint32_t latency_qos; 284 boolean_t adjusted = FALSE; 285 task_t ctask = t->task; 286 287 if (ctask) { 288 latency_qos = proc_get_effective_task_policy(ctask, TASK_POLICY_LATENCY_QOS); 289 290 assert(latency_qos <= NUM_LATENCY_QOS_TIERS); 291 292 if (latency_qos) { 293 *tshift = tcoal_prio_params.latency_qos_scale[latency_qos - 1]; 294 *tmax = tcoal_prio_params.latency_qos_ns_max[latency_qos - 1]; 295 *pratelimited = tcoal_prio_params.latency_tier_rate_limited[latency_qos - 1]; 296 adjusted = TRUE; 297 } 298 } 299 return adjusted; 300} 301 302/* Adjust timer deadlines based on priority of the thread and the 303 * urgency value provided at timeout establishment. With this mechanism, 304 * timers are no longer necessarily sorted in order of soft deadline 305 * on a given timer queue, i.e. they may be differentially skewed. 306 * In the current scheme, this could lead to fewer pending timers 307 * processed than is technically possible when the HW deadline arrives. 308 */ 309static void 310timer_compute_leeway(thread_t cthread, int32_t urgency, int32_t *tshift, uint64_t *tmax, boolean_t *pratelimited) { 311 int16_t tpri = cthread->sched_pri; 312 313 if ((urgency & TIMER_CALL_USER_MASK) != 0) { 314 if (tpri >= BASEPRI_RTQUEUES || 315 urgency == TIMER_CALL_USER_CRITICAL) { 316 *tshift = tcoal_prio_params.timer_coalesce_rt_shift; 317 *tmax = tcoal_prio_params.timer_coalesce_rt_ns_max; 318 TCOAL_PRIO_STAT(rt_tcl); 319 } else if ((urgency == TIMER_CALL_USER_BACKGROUND) || 320 proc_get_effective_thread_policy(cthread, TASK_POLICY_DARWIN_BG)) { 321 /* Determine if timer should be subjected to a lower QoS */ 322 if (tcoal_qos_adjust(cthread, tshift, tmax, pratelimited)) { 323 if (*tmax > tcoal_prio_params.timer_coalesce_bg_ns_max) { 324 return; 325 } else { 326 *pratelimited = FALSE; 327 } 328 } 329 *tshift = tcoal_prio_params.timer_coalesce_bg_shift; 330 *tmax = tcoal_prio_params.timer_coalesce_bg_ns_max; 331 TCOAL_PRIO_STAT(bg_tcl); 332 } else if (tpri >= MINPRI_KERNEL) { 333 *tshift = tcoal_prio_params.timer_coalesce_kt_shift; 334 *tmax = tcoal_prio_params.timer_coalesce_kt_ns_max; 335 TCOAL_PRIO_STAT(kt_tcl); 336 } else if (cthread->sched_mode == TH_MODE_FIXED) { 337 *tshift = tcoal_prio_params.timer_coalesce_fp_shift; 338 *tmax = tcoal_prio_params.timer_coalesce_fp_ns_max; 339 TCOAL_PRIO_STAT(fp_tcl); 340 } else if (tcoal_qos_adjust(cthread, tshift, tmax, pratelimited)) { 341 TCOAL_PRIO_STAT(qos_tcl); 342 } else if (cthread->sched_mode == TH_MODE_TIMESHARE) { 343 *tshift = tcoal_prio_params.timer_coalesce_ts_shift; 344 *tmax = tcoal_prio_params.timer_coalesce_ts_ns_max; 345 TCOAL_PRIO_STAT(ts_tcl); 346 } else { 347 TCOAL_PRIO_STAT(nc_tcl); 348 } 349 } else if (urgency == TIMER_CALL_SYS_BACKGROUND) { 350 *tshift = tcoal_prio_params.timer_coalesce_bg_shift; 351 *tmax = tcoal_prio_params.timer_coalesce_bg_ns_max; 352 TCOAL_PRIO_STAT(bg_tcl); 353 } else { 354 *tshift = tcoal_prio_params.timer_coalesce_kt_shift; 355 *tmax = tcoal_prio_params.timer_coalesce_kt_ns_max; 356 TCOAL_PRIO_STAT(kt_tcl); 357 } 358} 359 360int timer_user_idle_level; 361 362uint64_t 363timer_call_slop(uint64_t deadline, uint64_t now, uint32_t flags, thread_t cthread, boolean_t *pratelimited) 364{ 365 int32_t tcs_shift = 0; 366 uint64_t tcs_ns_max = 0; 367 uint64_t adjval; 368 uint32_t urgency = (flags & TIMER_CALL_URGENCY_MASK); 369 370 if (mach_timer_coalescing_enabled && 371 (deadline > now) && (urgency != TIMER_CALL_SYS_CRITICAL)) { 372 timer_compute_leeway(cthread, urgency, &tcs_shift, &tcs_ns_max, pratelimited); 373 374 if (tcs_shift >= 0) 375 adjval = MIN((deadline - now) >> tcs_shift, tcs_ns_max); 376 else 377 adjval = MIN((deadline - now) << (-tcs_shift), tcs_ns_max); 378 /* Apply adjustments derived from "user idle level" heuristic */ 379 adjval += (adjval * timer_user_idle_level) >> 7; 380 return adjval; 381 } else { 382 return 0; 383 } 384} 385 386boolean_t 387timer_resort_threshold(uint64_t skew) { 388 if (skew >= TIMER_RESORT_THRESHOLD_ABSTIME) 389 return TRUE; 390 else 391 return FALSE; 392} 393 394int 395ml_timer_get_user_idle_level(void) { 396 return timer_user_idle_level; 397} 398 399kern_return_t ml_timer_set_user_idle_level(int ilevel) { 400 boolean_t do_reeval = FALSE; 401 402 if ((ilevel < 0) || (ilevel > 128)) 403 return KERN_INVALID_ARGUMENT; 404 405 if (ilevel < timer_user_idle_level) { 406 do_reeval = TRUE; 407 } 408 409 timer_user_idle_level = ilevel; 410 411 if (do_reeval) 412 ml_timer_evaluate(); 413 414 return KERN_SUCCESS; 415} 416 417/* 418 * Return the local timer queue for a running processor 419 * else return the boot processor's timer queue. 420 */ 421mpqueue_head_t * 422timer_queue_assign( 423 uint64_t deadline) 424{ 425 cpu_data_t *cdp = current_cpu_datap(); 426 mpqueue_head_t *queue; 427 428 if (cdp->cpu_running) { 429 queue = &cdp->rtclock_timer.queue; 430 431 if (deadline < cdp->rtclock_timer.deadline) 432 timer_set_deadline(deadline); 433 } 434 else 435 queue = &cpu_datap(master_cpu)->rtclock_timer.queue; 436 437 return (queue); 438} 439 440void 441timer_queue_cancel( 442 mpqueue_head_t *queue, 443 uint64_t deadline, 444 uint64_t new_deadline) 445{ 446 if (queue == ¤t_cpu_datap()->rtclock_timer.queue) { 447 if (deadline < new_deadline) 448 timer_set_deadline(new_deadline); 449 } 450} 451 452/* 453 * timer_queue_migrate_cpu() is called from the Power-Management kext 454 * when a logical processor goes idle (in a deep C-state) with a distant 455 * deadline so that it's timer queue can be moved to another processor. 456 * This target processor should be the least idle (most busy) -- 457 * currently this is the primary processor for the calling thread's package. 458 * Locking restrictions demand that the target cpu must be the boot cpu. 459 */ 460uint32_t 461timer_queue_migrate_cpu(int target_cpu) 462{ 463 cpu_data_t *target_cdp = cpu_datap(target_cpu); 464 cpu_data_t *cdp = current_cpu_datap(); 465 int ntimers_moved; 466 467 assert(!ml_get_interrupts_enabled()); 468 assert(target_cpu != cdp->cpu_number); 469 assert(target_cpu == master_cpu); 470 471 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 472 DECR_TIMER_MIGRATE | DBG_FUNC_START, 473 target_cpu, 474 cdp->rtclock_timer.deadline, (cdp->rtclock_timer.deadline >>32), 475 0, 0); 476 477 /* 478 * Move timer requests from the local queue to the target processor's. 479 * The return value is the number of requests moved. If this is 0, 480 * it indicates that the first (i.e. earliest) timer is earlier than 481 * the earliest for the target processor. Since this would force a 482 * resync, the move of this and all later requests is aborted. 483 */ 484 ntimers_moved = timer_queue_migrate(&cdp->rtclock_timer.queue, 485 &target_cdp->rtclock_timer.queue); 486 487 /* 488 * Assuming we moved stuff, clear local deadline. 489 */ 490 if (ntimers_moved > 0) { 491 cdp->rtclock_timer.deadline = EndOfAllTime; 492 setPop(EndOfAllTime); 493 } 494 495 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 496 DECR_TIMER_MIGRATE | DBG_FUNC_END, 497 target_cpu, ntimers_moved, 0, 0, 0); 498 499 return ntimers_moved; 500} 501 502mpqueue_head_t * 503timer_queue_cpu(int cpu) 504{ 505 return &cpu_datap(cpu)->rtclock_timer.queue; 506} 507 508void 509timer_call_cpu(int cpu, void (*fn)(void *), void *arg) 510{ 511 mp_cpus_call(cpu_to_cpumask(cpu), SYNC, fn, arg); 512} 513 514void 515timer_call_nosync_cpu(int cpu, void (*fn)(void *), void *arg) 516{ 517 /* XXX Needs error checking and retry */ 518 mp_cpus_call(cpu_to_cpumask(cpu), NOSYNC, fn, arg); 519} 520 521