kern_timeout.c revision 247777
1/*- 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * From: @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 35 */ 36 37#include <sys/cdefs.h> 38__FBSDID("$FreeBSD: head/sys/kern/kern_timeout.c 247777 2013-03-04 11:09:56Z davide $"); 39 40#include "opt_callout_profiling.h" 41#include "opt_kdtrace.h" 42#if defined(__arm__) 43#include "opt_timer.h" 44#endif 45 46#include <sys/param.h> 47#include <sys/systm.h> 48#include <sys/bus.h> 49#include <sys/callout.h> 50#include <sys/interrupt.h> 51#include <sys/kernel.h> 52#include <sys/ktr.h> 53#include <sys/lock.h> 54#include <sys/malloc.h> 55#include <sys/mutex.h> 56#include <sys/proc.h> 57#include <sys/sdt.h> 58#include <sys/sleepqueue.h> 59#include <sys/sysctl.h> 60#include <sys/smp.h> 61 62#ifdef SMP 63#include <machine/cpu.h> 64#endif 65 66#ifndef NO_EVENTTIMERS 67DPCPU_DECLARE(sbintime_t, hardclocktime); 68#endif 69 70SDT_PROVIDER_DEFINE(callout_execute); 71SDT_PROBE_DEFINE(callout_execute, kernel, , callout_start, callout-start); 72SDT_PROBE_ARGTYPE(callout_execute, kernel, , callout_start, 0, 73 "struct callout *"); 74SDT_PROBE_DEFINE(callout_execute, kernel, , callout_end, callout-end); 75SDT_PROBE_ARGTYPE(callout_execute, kernel, , callout_end, 0, 76 "struct callout *"); 77 78#ifdef CALLOUT_PROFILING 79static int avg_depth; 80SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0, 81 "Average number of items examined per softclock call. Units = 1/1000"); 82static int avg_gcalls; 83SYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls, 0, 84 "Average number of Giant callouts made per softclock call. Units = 1/1000"); 85static int avg_lockcalls; 86SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls, 0, 87 "Average number of lock callouts made per softclock call. Units = 1/1000"); 88static int avg_mpcalls; 89SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0, 90 "Average number of MP callouts made per softclock call. Units = 1/1000"); 91static int avg_depth_dir; 92SYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth_dir, 0, 93 "Average number of direct callouts examined per callout_process call. " 94 "Units = 1/1000"); 95static int avg_lockcalls_dir; 96SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls_dir, CTLFLAG_RD, 97 &avg_lockcalls_dir, 0, "Average number of lock direct callouts made per " 98 "callout_process call. Units = 1/1000"); 99static int avg_mpcalls_dir; 100SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls_dir, 101 0, "Average number of MP direct callouts made per callout_process call. " 102 "Units = 1/1000"); 103#endif 104/* 105 * TODO: 106 * allocate more timeout table slots when table overflows. 107 */ 108u_int callwheelsize, callwheelmask; 109 110/* 111 * The callout cpu exec entities represent informations necessary for 112 * describing the state of callouts currently running on the CPU and the ones 113 * necessary for migrating callouts to the new callout cpu. In particular, 114 * the first entry of the array cc_exec_entity holds informations for callout 115 * running in SWI thread context, while the second one holds informations 116 * for callout running directly from hardware interrupt context. 117 * The cached informations are very important for deferring migration when 118 * the migrating callout is already running. 119 */ 120struct cc_exec { 121 struct callout *cc_next; 122 struct callout *cc_curr; 123#ifdef SMP 124 void (*ce_migration_func)(void *); 125 void *ce_migration_arg; 126 int ce_migration_cpu; 127 sbintime_t ce_migration_time; 128#endif 129 boolean_t cc_cancel; 130 boolean_t cc_waiting; 131}; 132 133/* 134 * There is one struct callout_cpu per cpu, holding all relevant 135 * state for the callout processing thread on the individual CPU. 136 */ 137struct callout_cpu { 138 struct mtx_padalign cc_lock; 139 struct cc_exec cc_exec_entity[2]; 140 struct callout *cc_callout; 141 struct callout_list *cc_callwheel; 142 struct callout_tailq cc_expireq; 143 struct callout_slist cc_callfree; 144 sbintime_t cc_firstevent; 145 sbintime_t cc_lastscan; 146 void *cc_cookie; 147 u_int cc_bucket; 148}; 149 150#define cc_exec_curr cc_exec_entity[0].cc_curr 151#define cc_exec_next cc_exec_entity[0].cc_next 152#define cc_exec_cancel cc_exec_entity[0].cc_cancel 153#define cc_exec_waiting cc_exec_entity[0].cc_waiting 154#define cc_exec_curr_dir cc_exec_entity[1].cc_curr 155#define cc_exec_next_dir cc_exec_entity[1].cc_next 156#define cc_exec_cancel_dir cc_exec_entity[1].cc_cancel 157#define cc_exec_waiting_dir cc_exec_entity[1].cc_waiting 158 159#ifdef SMP 160#define cc_migration_func cc_exec_entity[0].ce_migration_func 161#define cc_migration_arg cc_exec_entity[0].ce_migration_arg 162#define cc_migration_cpu cc_exec_entity[0].ce_migration_cpu 163#define cc_migration_time cc_exec_entity[0].ce_migration_time 164#define cc_migration_func_dir cc_exec_entity[1].ce_migration_func 165#define cc_migration_arg_dir cc_exec_entity[1].ce_migration_arg 166#define cc_migration_cpu_dir cc_exec_entity[1].ce_migration_cpu 167#define cc_migration_time_dir cc_exec_entity[1].ce_migration_time 168 169struct callout_cpu cc_cpu[MAXCPU]; 170#define CPUBLOCK MAXCPU 171#define CC_CPU(cpu) (&cc_cpu[(cpu)]) 172#define CC_SELF() CC_CPU(PCPU_GET(cpuid)) 173#else 174struct callout_cpu cc_cpu; 175#define CC_CPU(cpu) &cc_cpu 176#define CC_SELF() &cc_cpu 177#endif 178#define CC_LOCK(cc) mtx_lock_spin(&(cc)->cc_lock) 179#define CC_UNLOCK(cc) mtx_unlock_spin(&(cc)->cc_lock) 180#define CC_LOCK_ASSERT(cc) mtx_assert(&(cc)->cc_lock, MA_OWNED) 181 182static int timeout_cpu; 183 184static void softclock_call_cc(struct callout *c, struct callout_cpu *cc, 185#ifdef CALLOUT_PROFILING 186 int *mpcalls, int *lockcalls, int *gcalls, 187#endif 188 int direct); 189 190static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures"); 191 192/** 193 * Locked by cc_lock: 194 * cc_curr - If a callout is in progress, it is cc_curr. 195 * If cc_curr is non-NULL, threads waiting in 196 * callout_drain() will be woken up as soon as the 197 * relevant callout completes. 198 * cc_cancel - Changing to 1 with both callout_lock and cc_lock held 199 * guarantees that the current callout will not run. 200 * The softclock() function sets this to 0 before it 201 * drops callout_lock to acquire c_lock, and it calls 202 * the handler only if curr_cancelled is still 0 after 203 * cc_lock is successfully acquired. 204 * cc_waiting - If a thread is waiting in callout_drain(), then 205 * callout_wait is nonzero. Set only when 206 * cc_curr is non-NULL. 207 */ 208 209/* 210 * Resets the execution entity tied to a specific callout cpu. 211 */ 212static void 213cc_cce_cleanup(struct callout_cpu *cc, int direct) 214{ 215 216 cc->cc_exec_entity[direct].cc_curr = NULL; 217 cc->cc_exec_entity[direct].cc_next = NULL; 218 cc->cc_exec_entity[direct].cc_cancel = FALSE; 219 cc->cc_exec_entity[direct].cc_waiting = FALSE; 220#ifdef SMP 221 cc->cc_exec_entity[direct].ce_migration_cpu = CPUBLOCK; 222 cc->cc_exec_entity[direct].ce_migration_time = 0; 223 cc->cc_exec_entity[direct].ce_migration_func = NULL; 224 cc->cc_exec_entity[direct].ce_migration_arg = NULL; 225#endif 226} 227 228/* 229 * Checks if migration is requested by a specific callout cpu. 230 */ 231static int 232cc_cce_migrating(struct callout_cpu *cc, int direct) 233{ 234 235#ifdef SMP 236 return (cc->cc_exec_entity[direct].ce_migration_cpu != CPUBLOCK); 237#else 238 return (0); 239#endif 240} 241 242/* 243 * kern_timeout_callwheel_alloc() - kernel low level callwheel initialization 244 * 245 * This code is called very early in the kernel initialization sequence, 246 * and may be called more then once. 247 */ 248caddr_t 249kern_timeout_callwheel_alloc(caddr_t v) 250{ 251 struct callout_cpu *cc; 252 253 timeout_cpu = PCPU_GET(cpuid); 254 cc = CC_CPU(timeout_cpu); 255 /* 256 * Calculate callout wheel size, should be next power of two higher 257 * than 'ncallout'. 258 */ 259 callwheelsize = 1 << fls(ncallout); 260 callwheelmask = callwheelsize - 1; 261 262 cc->cc_callout = (struct callout *)v; 263 v = (caddr_t)(cc->cc_callout + ncallout); 264 cc->cc_callwheel = (struct callout_list *)v; 265 v = (caddr_t)(cc->cc_callwheel + callwheelsize); 266 return(v); 267} 268 269static void 270callout_cpu_init(struct callout_cpu *cc) 271{ 272 struct callout *c; 273 int i; 274 275 mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE); 276 SLIST_INIT(&cc->cc_callfree); 277 for (i = 0; i < callwheelsize; i++) 278 LIST_INIT(&cc->cc_callwheel[i]); 279 TAILQ_INIT(&cc->cc_expireq); 280 cc->cc_firstevent = INT64_MAX; 281 for (i = 0; i < 2; i++) 282 cc_cce_cleanup(cc, i); 283 if (cc->cc_callout == NULL) 284 return; 285 for (i = 0; i < ncallout; i++) { 286 c = &cc->cc_callout[i]; 287 callout_init(c, 0); 288 c->c_flags = CALLOUT_LOCAL_ALLOC; 289 SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); 290 } 291} 292 293#ifdef SMP 294/* 295 * Switches the cpu tied to a specific callout. 296 * The function expects a locked incoming callout cpu and returns with 297 * locked outcoming callout cpu. 298 */ 299static struct callout_cpu * 300callout_cpu_switch(struct callout *c, struct callout_cpu *cc, int new_cpu) 301{ 302 struct callout_cpu *new_cc; 303 304 MPASS(c != NULL && cc != NULL); 305 CC_LOCK_ASSERT(cc); 306 307 /* 308 * Avoid interrupts and preemption firing after the callout cpu 309 * is blocked in order to avoid deadlocks as the new thread 310 * may be willing to acquire the callout cpu lock. 311 */ 312 c->c_cpu = CPUBLOCK; 313 spinlock_enter(); 314 CC_UNLOCK(cc); 315 new_cc = CC_CPU(new_cpu); 316 CC_LOCK(new_cc); 317 spinlock_exit(); 318 c->c_cpu = new_cpu; 319 return (new_cc); 320} 321#endif 322 323/* 324 * kern_timeout_callwheel_init() - initialize previously reserved callwheel 325 * space. 326 * 327 * This code is called just once, after the space reserved for the 328 * callout wheel has been finalized. 329 */ 330void 331kern_timeout_callwheel_init(void) 332{ 333 callout_cpu_init(CC_CPU(timeout_cpu)); 334} 335 336/* 337 * Start standard softclock thread. 338 */ 339static void 340start_softclock(void *dummy) 341{ 342 struct callout_cpu *cc; 343#ifdef SMP 344 int cpu; 345#endif 346 347 cc = CC_CPU(timeout_cpu); 348 if (swi_add(&clk_intr_event, "clock", softclock, cc, SWI_CLOCK, 349 INTR_MPSAFE, &cc->cc_cookie)) 350 panic("died while creating standard software ithreads"); 351#ifdef SMP 352 CPU_FOREACH(cpu) { 353 if (cpu == timeout_cpu) 354 continue; 355 cc = CC_CPU(cpu); 356 if (swi_add(NULL, "clock", softclock, cc, SWI_CLOCK, 357 INTR_MPSAFE, &cc->cc_cookie)) 358 panic("died while creating standard software ithreads"); 359 cc->cc_callout = NULL; /* Only cpu0 handles timeout(). */ 360 cc->cc_callwheel = malloc( 361 sizeof(struct callout_list) * callwheelsize, M_CALLOUT, 362 M_WAITOK); 363 callout_cpu_init(cc); 364 } 365#endif 366} 367 368SYSINIT(start_softclock, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softclock, NULL); 369 370#define CC_HASH_SHIFT 8 371 372static inline u_int 373callout_hash(sbintime_t sbt) 374{ 375 376 return (sbt >> (32 - CC_HASH_SHIFT)); 377} 378 379static inline u_int 380callout_get_bucket(sbintime_t sbt) 381{ 382 383 return (callout_hash(sbt) & callwheelmask); 384} 385 386void 387callout_process(sbintime_t now) 388{ 389 struct callout *tmp, *tmpn; 390 struct callout_cpu *cc; 391 struct callout_list *sc; 392 sbintime_t first, last, max, tmp_max; 393 uint32_t lookahead; 394 u_int firstb, lastb, nowb; 395#ifdef CALLOUT_PROFILING 396 int depth_dir = 0, mpcalls_dir = 0, lockcalls_dir = 0; 397#endif 398 399 cc = CC_SELF(); 400 mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET); 401 402 /* Compute the buckets of the last scan and present times. */ 403 firstb = callout_hash(cc->cc_lastscan); 404 cc->cc_lastscan = now; 405 nowb = callout_hash(now); 406 407 /* Compute the last bucket and minimum time of the bucket after it. */ 408 if (nowb == firstb) 409 lookahead = (SBT_1S / 16); 410 else if (nowb - firstb == 1) 411 lookahead = (SBT_1S / 8); 412 else 413 lookahead = (SBT_1S / 2); 414 first = last = now; 415 first += (lookahead / 2); 416 last += lookahead; 417 last &= (0xffffffffffffffffLLU << (32 - CC_HASH_SHIFT)); 418 lastb = callout_hash(last) - 1; 419 max = last; 420 421 /* 422 * Check if we wrapped around the entire wheel from the last scan. 423 * In case, we need to scan entirely the wheel for pending callouts. 424 */ 425 if (lastb - firstb >= callwheelsize) { 426 lastb = firstb + callwheelsize - 1; 427 if (nowb - firstb >= callwheelsize) 428 nowb = lastb; 429 } 430 431 /* Iterate callwheel from firstb to nowb and then up to lastb. */ 432 do { 433 sc = &cc->cc_callwheel[firstb & callwheelmask]; 434 tmp = LIST_FIRST(sc); 435 while (tmp != NULL) { 436 /* Run the callout if present time within allowed. */ 437 if (tmp->c_time <= now) { 438 /* 439 * Consumer told us the callout may be run 440 * directly from hardware interrupt context. 441 */ 442 if (tmp->c_flags & CALLOUT_DIRECT) { 443#ifdef CALLOUT_PROFILING 444 ++depth_dir; 445#endif 446 cc->cc_exec_next_dir = 447 LIST_NEXT(tmp, c_links.le); 448 cc->cc_bucket = firstb & callwheelmask; 449 LIST_REMOVE(tmp, c_links.le); 450 softclock_call_cc(tmp, cc, 451#ifdef CALLOUT_PROFILING 452 &mpcalls_dir, &lockcalls_dir, NULL, 453#endif 454 1); 455 tmp = cc->cc_exec_next_dir; 456 } else { 457 tmpn = LIST_NEXT(tmp, c_links.le); 458 LIST_REMOVE(tmp, c_links.le); 459 TAILQ_INSERT_TAIL(&cc->cc_expireq, 460 tmp, c_links.tqe); 461 tmp->c_flags |= CALLOUT_PROCESSED; 462 tmp = tmpn; 463 } 464 continue; 465 } 466 /* Skip events from distant future. */ 467 if (tmp->c_time >= max) 468 goto next; 469 /* 470 * Event minimal time is bigger than present maximal 471 * time, so it cannot be aggregated. 472 */ 473 if (tmp->c_time > last) { 474 lastb = nowb; 475 goto next; 476 } 477 /* Update first and last time, respecting this event. */ 478 if (tmp->c_time < first) 479 first = tmp->c_time; 480 tmp_max = tmp->c_time + tmp->c_precision; 481 if (tmp_max < last) 482 last = tmp_max; 483next: 484 tmp = LIST_NEXT(tmp, c_links.le); 485 } 486 /* Proceed with the next bucket. */ 487 firstb++; 488 /* 489 * Stop if we looked after present time and found 490 * some event we can't execute at now. 491 * Stop if we looked far enough into the future. 492 */ 493 } while (((int)(firstb - lastb)) <= 0); 494 cc->cc_firstevent = last; 495#ifndef NO_EVENTTIMERS 496 cpu_new_callout(curcpu, last, first); 497#endif 498#ifdef CALLOUT_PROFILING 499 avg_depth_dir += (depth_dir * 1000 - avg_depth_dir) >> 8; 500 avg_mpcalls_dir += (mpcalls_dir * 1000 - avg_mpcalls_dir) >> 8; 501 avg_lockcalls_dir += (lockcalls_dir * 1000 - avg_lockcalls_dir) >> 8; 502#endif 503 mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET); 504 /* 505 * swi_sched acquires the thread lock, so we don't want to call it 506 * with cc_lock held; incorrect locking order. 507 */ 508 if (!TAILQ_EMPTY(&cc->cc_expireq)) 509 swi_sched(cc->cc_cookie, 0); 510} 511 512static struct callout_cpu * 513callout_lock(struct callout *c) 514{ 515 struct callout_cpu *cc; 516 int cpu; 517 518 for (;;) { 519 cpu = c->c_cpu; 520#ifdef SMP 521 if (cpu == CPUBLOCK) { 522 while (c->c_cpu == CPUBLOCK) 523 cpu_spinwait(); 524 continue; 525 } 526#endif 527 cc = CC_CPU(cpu); 528 CC_LOCK(cc); 529 if (cpu == c->c_cpu) 530 break; 531 CC_UNLOCK(cc); 532 } 533 return (cc); 534} 535 536static void 537callout_cc_add(struct callout *c, struct callout_cpu *cc, 538 sbintime_t sbt, sbintime_t precision, void (*func)(void *), 539 void *arg, int cpu, int flags) 540{ 541 int bucket; 542 543 CC_LOCK_ASSERT(cc); 544 if (sbt < cc->cc_lastscan) 545 sbt = cc->cc_lastscan; 546 c->c_arg = arg; 547 c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING); 548 if (flags & C_DIRECT_EXEC) 549 c->c_flags |= CALLOUT_DIRECT; 550 c->c_flags &= ~CALLOUT_PROCESSED; 551 c->c_func = func; 552 c->c_time = sbt; 553 c->c_precision = precision; 554 bucket = callout_get_bucket(c->c_time); 555 CTR3(KTR_CALLOUT, "precision set for %p: %d.%08x", 556 c, (int)(c->c_precision >> 32), 557 (u_int)(c->c_precision & 0xffffffff)); 558 LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le); 559 if (cc->cc_bucket == bucket) 560 cc->cc_exec_next_dir = c; 561#ifndef NO_EVENTTIMERS 562 /* 563 * Inform the eventtimers(4) subsystem there's a new callout 564 * that has been inserted, but only if really required. 565 */ 566 sbt = c->c_time + c->c_precision; 567 if (sbt < cc->cc_firstevent) { 568 cc->cc_firstevent = sbt; 569 cpu_new_callout(cpu, sbt, c->c_time); 570 } 571#endif 572} 573 574static void 575callout_cc_del(struct callout *c, struct callout_cpu *cc) 576{ 577 578 if ((c->c_flags & CALLOUT_LOCAL_ALLOC) == 0) 579 return; 580 c->c_func = NULL; 581 SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); 582} 583 584static void 585softclock_call_cc(struct callout *c, struct callout_cpu *cc, 586#ifdef CALLOUT_PROFILING 587 int *mpcalls, int *lockcalls, int *gcalls, 588#endif 589 int direct) 590{ 591 void (*c_func)(void *); 592 void *c_arg; 593 struct lock_class *class; 594 struct lock_object *c_lock; 595 int c_flags, sharedlock; 596#ifdef SMP 597 struct callout_cpu *new_cc; 598 void (*new_func)(void *); 599 void *new_arg; 600 int flags, new_cpu; 601 sbintime_t new_time; 602#endif 603#if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) 604 sbintime_t bt1, bt2; 605 struct timespec ts2; 606 static sbintime_t maxdt = 2 * SBT_1MS; /* 2 msec */ 607 static timeout_t *lastfunc; 608#endif 609 610 KASSERT((c->c_flags & (CALLOUT_PENDING | CALLOUT_ACTIVE)) == 611 (CALLOUT_PENDING | CALLOUT_ACTIVE), 612 ("softclock_call_cc: pend|act %p %x", c, c->c_flags)); 613 class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL; 614 sharedlock = (c->c_flags & CALLOUT_SHAREDLOCK) ? 0 : 1; 615 c_lock = c->c_lock; 616 c_func = c->c_func; 617 c_arg = c->c_arg; 618 c_flags = c->c_flags; 619 if (c->c_flags & CALLOUT_LOCAL_ALLOC) 620 c->c_flags = CALLOUT_LOCAL_ALLOC; 621 else 622 c->c_flags &= ~CALLOUT_PENDING; 623 cc->cc_exec_entity[direct].cc_curr = c; 624 cc->cc_exec_entity[direct].cc_cancel = FALSE; 625 CC_UNLOCK(cc); 626 if (c_lock != NULL) { 627 class->lc_lock(c_lock, sharedlock); 628 /* 629 * The callout may have been cancelled 630 * while we switched locks. 631 */ 632 if (cc->cc_exec_entity[direct].cc_cancel) { 633 class->lc_unlock(c_lock); 634 goto skip; 635 } 636 /* The callout cannot be stopped now. */ 637 cc->cc_exec_entity[direct].cc_cancel = TRUE; 638 if (c_lock == &Giant.lock_object) { 639#ifdef CALLOUT_PROFILING 640 (*gcalls)++; 641#endif 642 CTR3(KTR_CALLOUT, "callout giant %p func %p arg %p", 643 c, c_func, c_arg); 644 } else { 645#ifdef CALLOUT_PROFILING 646 (*lockcalls)++; 647#endif 648 CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p", 649 c, c_func, c_arg); 650 } 651 } else { 652#ifdef CALLOUT_PROFILING 653 (*mpcalls)++; 654#endif 655 CTR3(KTR_CALLOUT, "callout %p func %p arg %p", 656 c, c_func, c_arg); 657 } 658#ifdef DIAGNOSTIC 659 sbt1 = sbinuptime(); 660#endif 661 THREAD_NO_SLEEPING(); 662 SDT_PROBE(callout_execute, kernel, , callout_start, c, 0, 0, 0, 0); 663 c_func(c_arg); 664 SDT_PROBE(callout_execute, kernel, , callout_end, c, 0, 0, 0, 0); 665 THREAD_SLEEPING_OK(); 666#ifdef DIAGNOSTIC 667 bt2 = sbinuptime(); 668 bt2 -= bt1; 669 if (bt2 > maxdt) { 670 if (lastfunc != c_func || bt2 > maxdt * 2) { 671 ts2 = sbttots(bt2); 672 printf( 673 "Expensive timeout(9) function: %p(%p) %jd.%09ld s\n", 674 c_func, c_arg, (intmax_t)ts2.tv_sec, ts2.tv_nsec); 675 } 676 maxdt = bt2; 677 lastfunc = c_func; 678 } 679#endif 680 CTR1(KTR_CALLOUT, "callout %p finished", c); 681 if ((c_flags & CALLOUT_RETURNUNLOCKED) == 0) 682 class->lc_unlock(c_lock); 683skip: 684 CC_LOCK(cc); 685 KASSERT(cc->cc_exec_entity[direct].cc_curr == c, ("mishandled cc_curr")); 686 cc->cc_exec_entity[direct].cc_curr = NULL; 687 if (cc->cc_exec_entity[direct].cc_waiting) { 688 /* 689 * There is someone waiting for the 690 * callout to complete. 691 * If the callout was scheduled for 692 * migration just cancel it. 693 */ 694 if (cc_cce_migrating(cc, direct)) { 695 cc_cce_cleanup(cc, direct); 696 697 /* 698 * It should be assert here that the callout is not 699 * destroyed but that is not easy. 700 */ 701 c->c_flags &= ~CALLOUT_DFRMIGRATION; 702 } 703 cc->cc_exec_entity[direct].cc_waiting = FALSE; 704 CC_UNLOCK(cc); 705 wakeup(&cc->cc_exec_entity[direct].cc_waiting); 706 CC_LOCK(cc); 707 } else if (cc_cce_migrating(cc, direct)) { 708 KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0, 709 ("Migrating legacy callout %p", c)); 710#ifdef SMP 711 /* 712 * If the callout was scheduled for 713 * migration just perform it now. 714 */ 715 new_cpu = cc->cc_exec_entity[direct].ce_migration_cpu; 716 new_time = cc->cc_exec_entity[direct].ce_migration_time; 717 new_func = cc->cc_exec_entity[direct].ce_migration_func; 718 new_arg = cc->cc_exec_entity[direct].ce_migration_arg; 719 cc_cce_cleanup(cc, direct); 720 721 /* 722 * It should be assert here that the callout is not destroyed 723 * but that is not easy. 724 * 725 * As first thing, handle deferred callout stops. 726 */ 727 if ((c->c_flags & CALLOUT_DFRMIGRATION) == 0) { 728 CTR3(KTR_CALLOUT, 729 "deferred cancelled %p func %p arg %p", 730 c, new_func, new_arg); 731 callout_cc_del(c, cc); 732 return; 733 } 734 c->c_flags &= ~CALLOUT_DFRMIGRATION; 735 736 new_cc = callout_cpu_switch(c, cc, new_cpu); 737 flags = (direct) ? C_DIRECT_EXEC : 0; 738 callout_cc_add(c, new_cc, new_time, c->c_precision, new_func, 739 new_arg, new_cpu, flags); 740 CC_UNLOCK(new_cc); 741 CC_LOCK(cc); 742#else 743 panic("migration should not happen"); 744#endif 745 } 746 /* 747 * If the current callout is locally allocated (from 748 * timeout(9)) then put it on the freelist. 749 * 750 * Note: we need to check the cached copy of c_flags because 751 * if it was not local, then it's not safe to deref the 752 * callout pointer. 753 */ 754 KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0 || 755 c->c_flags == CALLOUT_LOCAL_ALLOC, 756 ("corrupted callout")); 757 if (c_flags & CALLOUT_LOCAL_ALLOC) 758 callout_cc_del(c, cc); 759} 760 761/* 762 * The callout mechanism is based on the work of Adam M. Costello and 763 * George Varghese, published in a technical report entitled "Redesigning 764 * the BSD Callout and Timer Facilities" and modified slightly for inclusion 765 * in FreeBSD by Justin T. Gibbs. The original work on the data structures 766 * used in this implementation was published by G. Varghese and T. Lauck in 767 * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for 768 * the Efficient Implementation of a Timer Facility" in the Proceedings of 769 * the 11th ACM Annual Symposium on Operating Systems Principles, 770 * Austin, Texas Nov 1987. 771 */ 772 773/* 774 * Software (low priority) clock interrupt. 775 * Run periodic events from timeout queue. 776 */ 777void 778softclock(void *arg) 779{ 780 struct callout_cpu *cc; 781 struct callout *c; 782#ifdef CALLOUT_PROFILING 783 int depth = 0, gcalls = 0, lockcalls = 0, mpcalls = 0; 784#endif 785 786 cc = (struct callout_cpu *)arg; 787 CC_LOCK(cc); 788 while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) { 789 TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); 790 softclock_call_cc(c, cc, 791#ifdef CALLOUT_PROFILING 792 &mpcalls, &lockcalls, &gcalls, 793#endif 794 0); 795#ifdef CALLOUT_PROFILING 796 ++depth; 797#endif 798 } 799#ifdef CALLOUT_PROFILING 800 avg_depth += (depth * 1000 - avg_depth) >> 8; 801 avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8; 802 avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8; 803 avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8; 804#endif 805 CC_UNLOCK(cc); 806} 807 808/* 809 * timeout -- 810 * Execute a function after a specified length of time. 811 * 812 * untimeout -- 813 * Cancel previous timeout function call. 814 * 815 * callout_handle_init -- 816 * Initialize a handle so that using it with untimeout is benign. 817 * 818 * See AT&T BCI Driver Reference Manual for specification. This 819 * implementation differs from that one in that although an 820 * identification value is returned from timeout, the original 821 * arguments to timeout as well as the identifier are used to 822 * identify entries for untimeout. 823 */ 824struct callout_handle 825timeout(ftn, arg, to_ticks) 826 timeout_t *ftn; 827 void *arg; 828 int to_ticks; 829{ 830 struct callout_cpu *cc; 831 struct callout *new; 832 struct callout_handle handle; 833 834 cc = CC_CPU(timeout_cpu); 835 CC_LOCK(cc); 836 /* Fill in the next free callout structure. */ 837 new = SLIST_FIRST(&cc->cc_callfree); 838 if (new == NULL) 839 /* XXX Attempt to malloc first */ 840 panic("timeout table full"); 841 SLIST_REMOVE_HEAD(&cc->cc_callfree, c_links.sle); 842 callout_reset(new, to_ticks, ftn, arg); 843 handle.callout = new; 844 CC_UNLOCK(cc); 845 846 return (handle); 847} 848 849void 850untimeout(ftn, arg, handle) 851 timeout_t *ftn; 852 void *arg; 853 struct callout_handle handle; 854{ 855 struct callout_cpu *cc; 856 857 /* 858 * Check for a handle that was initialized 859 * by callout_handle_init, but never used 860 * for a real timeout. 861 */ 862 if (handle.callout == NULL) 863 return; 864 865 cc = callout_lock(handle.callout); 866 if (handle.callout->c_func == ftn && handle.callout->c_arg == arg) 867 callout_stop(handle.callout); 868 CC_UNLOCK(cc); 869} 870 871void 872callout_handle_init(struct callout_handle *handle) 873{ 874 handle->callout = NULL; 875} 876 877/* 878 * New interface; clients allocate their own callout structures. 879 * 880 * callout_reset() - establish or change a timeout 881 * callout_stop() - disestablish a timeout 882 * callout_init() - initialize a callout structure so that it can 883 * safely be passed to callout_reset() and callout_stop() 884 * 885 * <sys/callout.h> defines three convenience macros: 886 * 887 * callout_active() - returns truth if callout has not been stopped, 888 * drained, or deactivated since the last time the callout was 889 * reset. 890 * callout_pending() - returns truth if callout is still waiting for timeout 891 * callout_deactivate() - marks the callout as having been serviced 892 */ 893int 894callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision, 895 void (*ftn)(void *), void *arg, int cpu, int flags) 896{ 897 sbintime_t to_sbt, pr; 898 struct callout_cpu *cc; 899 int cancelled, direct; 900 901 cancelled = 0; 902 if (flags & C_ABSOLUTE) { 903 to_sbt = sbt; 904 } else { 905 if ((flags & C_HARDCLOCK) && (sbt < tick_sbt)) 906 sbt = tick_sbt; 907 if ((flags & C_HARDCLOCK) || 908#ifdef NO_EVENTTIMERS 909 sbt >= sbt_timethreshold) { 910 to_sbt = getsbinuptime(); 911 912 /* Add safety belt for the case of hz > 1000. */ 913 to_sbt += tc_tick_sbt - tick_sbt; 914#else 915 sbt >= sbt_tickthreshold) { 916 /* 917 * Obtain the time of the last hardclock() call on 918 * this CPU directly from the kern_clocksource.c. 919 * This value is per-CPU, but it is equal for all 920 * active ones. 921 */ 922#ifdef __LP64__ 923 to_sbt = DPCPU_GET(hardclocktime); 924#else 925 spinlock_enter(); 926 to_sbt = DPCPU_GET(hardclocktime); 927 spinlock_exit(); 928#endif 929#endif 930 if ((flags & C_HARDCLOCK) == 0) 931 to_sbt += tick_sbt; 932 } else 933 to_sbt = sbinuptime(); 934 to_sbt += sbt; 935 pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp : 936 sbt >> C_PRELGET(flags)); 937 if (pr > precision) 938 precision = pr; 939 } 940 /* 941 * Don't allow migration of pre-allocated callouts lest they 942 * become unbalanced. 943 */ 944 if (c->c_flags & CALLOUT_LOCAL_ALLOC) 945 cpu = c->c_cpu; 946 direct = (c->c_flags & CALLOUT_DIRECT) != 0; 947 KASSERT(!direct || c->c_lock == NULL, 948 ("%s: direct callout %p has lock", __func__, c)); 949 cc = callout_lock(c); 950 if (cc->cc_exec_entity[direct].cc_curr == c) { 951 /* 952 * We're being asked to reschedule a callout which is 953 * currently in progress. If there is a lock then we 954 * can cancel the callout if it has not really started. 955 */ 956 if (c->c_lock != NULL && !cc->cc_exec_entity[direct].cc_cancel) 957 cancelled = cc->cc_exec_entity[direct].cc_cancel = TRUE; 958 if (cc->cc_exec_entity[direct].cc_waiting) { 959 /* 960 * Someone has called callout_drain to kill this 961 * callout. Don't reschedule. 962 */ 963 CTR4(KTR_CALLOUT, "%s %p func %p arg %p", 964 cancelled ? "cancelled" : "failed to cancel", 965 c, c->c_func, c->c_arg); 966 CC_UNLOCK(cc); 967 return (cancelled); 968 } 969 } 970 if (c->c_flags & CALLOUT_PENDING) { 971 if ((c->c_flags & CALLOUT_PROCESSED) == 0) { 972 if (cc->cc_exec_next_dir == c) 973 cc->cc_exec_next_dir = LIST_NEXT(c, c_links.le); 974 LIST_REMOVE(c, c_links.le); 975 } else 976 TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); 977 cancelled = 1; 978 c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING); 979 } 980 981#ifdef SMP 982 /* 983 * If the callout must migrate try to perform it immediately. 984 * If the callout is currently running, just defer the migration 985 * to a more appropriate moment. 986 */ 987 if (c->c_cpu != cpu) { 988 if (cc->cc_exec_entity[direct].cc_curr == c) { 989 cc->cc_exec_entity[direct].ce_migration_cpu = cpu; 990 cc->cc_exec_entity[direct].ce_migration_time 991 = to_sbt; 992 cc->cc_exec_entity[direct].ce_migration_func = ftn; 993 cc->cc_exec_entity[direct].ce_migration_arg = arg; 994 c->c_flags |= CALLOUT_DFRMIGRATION; 995 CTR6(KTR_CALLOUT, 996 "migration of %p func %p arg %p in %d.%08x to %u deferred", 997 c, c->c_func, c->c_arg, (int)(to_sbt >> 32), 998 (u_int)(to_sbt & 0xffffffff), cpu); 999 CC_UNLOCK(cc); 1000 return (cancelled); 1001 } 1002 cc = callout_cpu_switch(c, cc, cpu); 1003 } 1004#endif 1005 1006 callout_cc_add(c, cc, to_sbt, precision, ftn, arg, cpu, flags); 1007 CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x", 1008 cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_sbt >> 32), 1009 (u_int)(to_sbt & 0xffffffff)); 1010 CC_UNLOCK(cc); 1011 1012 return (cancelled); 1013} 1014 1015/* 1016 * Common idioms that can be optimized in the future. 1017 */ 1018int 1019callout_schedule_on(struct callout *c, int to_ticks, int cpu) 1020{ 1021 return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, cpu); 1022} 1023 1024int 1025callout_schedule(struct callout *c, int to_ticks) 1026{ 1027 return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, c->c_cpu); 1028} 1029 1030int 1031_callout_stop_safe(c, safe) 1032 struct callout *c; 1033 int safe; 1034{ 1035 struct callout_cpu *cc, *old_cc; 1036 struct lock_class *class; 1037 int direct, sq_locked, use_lock; 1038 1039 /* 1040 * Some old subsystems don't hold Giant while running a callout_stop(), 1041 * so just discard this check for the moment. 1042 */ 1043 if (!safe && c->c_lock != NULL) { 1044 if (c->c_lock == &Giant.lock_object) 1045 use_lock = mtx_owned(&Giant); 1046 else { 1047 use_lock = 1; 1048 class = LOCK_CLASS(c->c_lock); 1049 class->lc_assert(c->c_lock, LA_XLOCKED); 1050 } 1051 } else 1052 use_lock = 0; 1053 direct = (c->c_flags & CALLOUT_DIRECT) != 0; 1054 sq_locked = 0; 1055 old_cc = NULL; 1056again: 1057 cc = callout_lock(c); 1058 1059 /* 1060 * If the callout was migrating while the callout cpu lock was 1061 * dropped, just drop the sleepqueue lock and check the states 1062 * again. 1063 */ 1064 if (sq_locked != 0 && cc != old_cc) { 1065#ifdef SMP 1066 CC_UNLOCK(cc); 1067 sleepq_release(&old_cc->cc_exec_entity[direct].cc_waiting); 1068 sq_locked = 0; 1069 old_cc = NULL; 1070 goto again; 1071#else 1072 panic("migration should not happen"); 1073#endif 1074 } 1075 1076 /* 1077 * If the callout isn't pending, it's not on the queue, so 1078 * don't attempt to remove it from the queue. We can try to 1079 * stop it by other means however. 1080 */ 1081 if (!(c->c_flags & CALLOUT_PENDING)) { 1082 c->c_flags &= ~CALLOUT_ACTIVE; 1083 1084 /* 1085 * If it wasn't on the queue and it isn't the current 1086 * callout, then we can't stop it, so just bail. 1087 */ 1088 if (cc->cc_exec_entity[direct].cc_curr != c) { 1089 CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", 1090 c, c->c_func, c->c_arg); 1091 CC_UNLOCK(cc); 1092 if (sq_locked) 1093 sleepq_release( 1094 &cc->cc_exec_entity[direct].cc_waiting); 1095 return (0); 1096 } 1097 1098 if (safe) { 1099 /* 1100 * The current callout is running (or just 1101 * about to run) and blocking is allowed, so 1102 * just wait for the current invocation to 1103 * finish. 1104 */ 1105 while (cc->cc_exec_entity[direct].cc_curr == c) { 1106 /* 1107 * Use direct calls to sleepqueue interface 1108 * instead of cv/msleep in order to avoid 1109 * a LOR between cc_lock and sleepqueue 1110 * chain spinlocks. This piece of code 1111 * emulates a msleep_spin() call actually. 1112 * 1113 * If we already have the sleepqueue chain 1114 * locked, then we can safely block. If we 1115 * don't already have it locked, however, 1116 * we have to drop the cc_lock to lock 1117 * it. This opens several races, so we 1118 * restart at the beginning once we have 1119 * both locks. If nothing has changed, then 1120 * we will end up back here with sq_locked 1121 * set. 1122 */ 1123 if (!sq_locked) { 1124 CC_UNLOCK(cc); 1125 sleepq_lock( 1126 &cc->cc_exec_entity[direct].cc_waiting); 1127 sq_locked = 1; 1128 old_cc = cc; 1129 goto again; 1130 } 1131 1132 /* 1133 * Migration could be cancelled here, but 1134 * as long as it is still not sure when it 1135 * will be packed up, just let softclock() 1136 * take care of it. 1137 */ 1138 cc->cc_exec_entity[direct].cc_waiting = TRUE; 1139 DROP_GIANT(); 1140 CC_UNLOCK(cc); 1141 sleepq_add( 1142 &cc->cc_exec_entity[direct].cc_waiting, 1143 &cc->cc_lock.lock_object, "codrain", 1144 SLEEPQ_SLEEP, 0); 1145 sleepq_wait( 1146 &cc->cc_exec_entity[direct].cc_waiting, 1147 0); 1148 sq_locked = 0; 1149 old_cc = NULL; 1150 1151 /* Reacquire locks previously released. */ 1152 PICKUP_GIANT(); 1153 CC_LOCK(cc); 1154 } 1155 } else if (use_lock && 1156 !cc->cc_exec_entity[direct].cc_cancel) { 1157 /* 1158 * The current callout is waiting for its 1159 * lock which we hold. Cancel the callout 1160 * and return. After our caller drops the 1161 * lock, the callout will be skipped in 1162 * softclock(). 1163 */ 1164 cc->cc_exec_entity[direct].cc_cancel = TRUE; 1165 CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", 1166 c, c->c_func, c->c_arg); 1167 KASSERT(!cc_cce_migrating(cc, direct), 1168 ("callout wrongly scheduled for migration")); 1169 CC_UNLOCK(cc); 1170 KASSERT(!sq_locked, ("sleepqueue chain locked")); 1171 return (1); 1172 } else if ((c->c_flags & CALLOUT_DFRMIGRATION) != 0) { 1173 c->c_flags &= ~CALLOUT_DFRMIGRATION; 1174 CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p", 1175 c, c->c_func, c->c_arg); 1176 CC_UNLOCK(cc); 1177 return (1); 1178 } 1179 CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", 1180 c, c->c_func, c->c_arg); 1181 CC_UNLOCK(cc); 1182 KASSERT(!sq_locked, ("sleepqueue chain still locked")); 1183 return (0); 1184 } 1185 if (sq_locked) 1186 sleepq_release(&cc->cc_exec_entity[direct].cc_waiting); 1187 1188 c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING); 1189 1190 CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", 1191 c, c->c_func, c->c_arg); 1192 if ((c->c_flags & CALLOUT_PROCESSED) == 0) { 1193 if (cc->cc_exec_next_dir == c) 1194 cc->cc_exec_next_dir = LIST_NEXT(c, c_links.le); 1195 LIST_REMOVE(c, c_links.le); 1196 } else 1197 TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); 1198 callout_cc_del(c, cc); 1199 1200 CC_UNLOCK(cc); 1201 return (1); 1202} 1203 1204void 1205callout_init(c, mpsafe) 1206 struct callout *c; 1207 int mpsafe; 1208{ 1209 bzero(c, sizeof *c); 1210 if (mpsafe) { 1211 c->c_lock = NULL; 1212 c->c_flags = CALLOUT_RETURNUNLOCKED; 1213 } else { 1214 c->c_lock = &Giant.lock_object; 1215 c->c_flags = 0; 1216 } 1217 c->c_cpu = timeout_cpu; 1218} 1219 1220void 1221_callout_init_lock(c, lock, flags) 1222 struct callout *c; 1223 struct lock_object *lock; 1224 int flags; 1225{ 1226 bzero(c, sizeof *c); 1227 c->c_lock = lock; 1228 KASSERT((flags & ~(CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK)) == 0, 1229 ("callout_init_lock: bad flags %d", flags)); 1230 KASSERT(lock != NULL || (flags & CALLOUT_RETURNUNLOCKED) == 0, 1231 ("callout_init_lock: CALLOUT_RETURNUNLOCKED with no lock")); 1232 KASSERT(lock == NULL || !(LOCK_CLASS(lock)->lc_flags & 1233 (LC_SPINLOCK | LC_SLEEPABLE)), ("%s: invalid lock class", 1234 __func__)); 1235 c->c_flags = flags & (CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK); 1236 c->c_cpu = timeout_cpu; 1237} 1238 1239#ifdef APM_FIXUP_CALLTODO 1240/* 1241 * Adjust the kernel calltodo timeout list. This routine is used after 1242 * an APM resume to recalculate the calltodo timer list values with the 1243 * number of hz's we have been sleeping. The next hardclock() will detect 1244 * that there are fired timers and run softclock() to execute them. 1245 * 1246 * Please note, I have not done an exhaustive analysis of what code this 1247 * might break. I am motivated to have my select()'s and alarm()'s that 1248 * have expired during suspend firing upon resume so that the applications 1249 * which set the timer can do the maintanence the timer was for as close 1250 * as possible to the originally intended time. Testing this code for a 1251 * week showed that resuming from a suspend resulted in 22 to 25 timers 1252 * firing, which seemed independant on whether the suspend was 2 hours or 1253 * 2 days. Your milage may vary. - Ken Key <key@cs.utk.edu> 1254 */ 1255void 1256adjust_timeout_calltodo(time_change) 1257 struct timeval *time_change; 1258{ 1259 register struct callout *p; 1260 unsigned long delta_ticks; 1261 1262 /* 1263 * How many ticks were we asleep? 1264 * (stolen from tvtohz()). 1265 */ 1266 1267 /* Don't do anything */ 1268 if (time_change->tv_sec < 0) 1269 return; 1270 else if (time_change->tv_sec <= LONG_MAX / 1000000) 1271 delta_ticks = (time_change->tv_sec * 1000000 + 1272 time_change->tv_usec + (tick - 1)) / tick + 1; 1273 else if (time_change->tv_sec <= LONG_MAX / hz) 1274 delta_ticks = time_change->tv_sec * hz + 1275 (time_change->tv_usec + (tick - 1)) / tick + 1; 1276 else 1277 delta_ticks = LONG_MAX; 1278 1279 if (delta_ticks > INT_MAX) 1280 delta_ticks = INT_MAX; 1281 1282 /* 1283 * Now rip through the timer calltodo list looking for timers 1284 * to expire. 1285 */ 1286 1287 /* don't collide with softclock() */ 1288 CC_LOCK(cc); 1289 for (p = calltodo.c_next; p != NULL; p = p->c_next) { 1290 p->c_time -= delta_ticks; 1291 1292 /* Break if the timer had more time on it than delta_ticks */ 1293 if (p->c_time > 0) 1294 break; 1295 1296 /* take back the ticks the timer didn't use (p->c_time <= 0) */ 1297 delta_ticks = -p->c_time; 1298 } 1299 CC_UNLOCK(cc); 1300 1301 return; 1302} 1303#endif /* APM_FIXUP_CALLTODO */ 1304 1305static int 1306flssbt(sbintime_t sbt) 1307{ 1308 1309 sbt += (uint64_t)sbt >> 1; 1310 if (sizeof(long) >= sizeof(sbintime_t)) 1311 return (flsl(sbt)); 1312 if (sbt >= SBT_1S) 1313 return (flsl(((uint64_t)sbt) >> 32) + 32); 1314 return (flsl(sbt)); 1315} 1316 1317/* 1318 * Dump immediate statistic snapshot of the scheduled callouts. 1319 */ 1320static int 1321sysctl_kern_callout_stat(SYSCTL_HANDLER_ARGS) 1322{ 1323 struct callout *tmp; 1324 struct callout_cpu *cc; 1325 struct callout_list *sc; 1326 sbintime_t maxpr, maxt, medpr, medt, now, spr, st, t; 1327 int ct[64], cpr[64], ccpbk[32]; 1328 int error, val, i, count, tcum, pcum, maxc, c, medc; 1329#ifdef SMP 1330 int cpu; 1331#endif 1332 1333 val = 0; 1334 error = sysctl_handle_int(oidp, &val, 0, req); 1335 if (error != 0 || req->newptr == NULL) 1336 return (error); 1337 count = maxc = 0; 1338 st = spr = maxt = maxpr = 0; 1339 bzero(ccpbk, sizeof(ccpbk)); 1340 bzero(ct, sizeof(ct)); 1341 bzero(cpr, sizeof(cpr)); 1342 now = sbinuptime(); 1343#ifdef SMP 1344 CPU_FOREACH(cpu) { 1345 cc = CC_CPU(cpu); 1346#else 1347 cc = CC_CPU(timeout_cpu); 1348#endif 1349 CC_LOCK(cc); 1350 for (i = 0; i < callwheelsize; i++) { 1351 sc = &cc->cc_callwheel[i]; 1352 c = 0; 1353 LIST_FOREACH(tmp, sc, c_links.le) { 1354 c++; 1355 t = tmp->c_time - now; 1356 if (t < 0) 1357 t = 0; 1358 st += t / SBT_1US; 1359 spr += tmp->c_precision / SBT_1US; 1360 if (t > maxt) 1361 maxt = t; 1362 if (tmp->c_precision > maxpr) 1363 maxpr = tmp->c_precision; 1364 ct[flssbt(t)]++; 1365 cpr[flssbt(tmp->c_precision)]++; 1366 } 1367 if (c > maxc) 1368 maxc = c; 1369 ccpbk[fls(c + c / 2)]++; 1370 count += c; 1371 } 1372 CC_UNLOCK(cc); 1373#ifdef SMP 1374 } 1375#endif 1376 1377 for (i = 0, tcum = 0; i < 64 && tcum < count / 2; i++) 1378 tcum += ct[i]; 1379 medt = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0; 1380 for (i = 0, pcum = 0; i < 64 && pcum < count / 2; i++) 1381 pcum += cpr[i]; 1382 medpr = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0; 1383 for (i = 0, c = 0; i < 32 && c < count / 2; i++) 1384 c += ccpbk[i]; 1385 medc = (i >= 2) ? (1 << (i - 2)) : 0; 1386 1387 printf("Scheduled callouts statistic snapshot:\n"); 1388 printf(" Callouts: %6d Buckets: %6d*%-3d Bucket size: 0.%06ds\n", 1389 count, callwheelsize, mp_ncpus, 1000000 >> CC_HASH_SHIFT); 1390 printf(" C/Bk: med %5d avg %6d.%06jd max %6d\n", 1391 medc, 1392 count / callwheelsize / mp_ncpus, 1393 (uint64_t)count * 1000000 / callwheelsize / mp_ncpus % 1000000, 1394 maxc); 1395 printf(" Time: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n", 1396 medt / SBT_1S, (medt & 0xffffffff) * 1000000 >> 32, 1397 (st / count) / 1000000, (st / count) % 1000000, 1398 maxt / SBT_1S, (maxt & 0xffffffff) * 1000000 >> 32); 1399 printf(" Prec: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n", 1400 medpr / SBT_1S, (medpr & 0xffffffff) * 1000000 >> 32, 1401 (spr / count) / 1000000, (spr / count) % 1000000, 1402 maxpr / SBT_1S, (maxpr & 0xffffffff) * 1000000 >> 32); 1403 printf(" Distribution: \tbuckets\t time\t tcum\t" 1404 " prec\t pcum\n"); 1405 for (i = 0, tcum = pcum = 0; i < 64; i++) { 1406 if (ct[i] == 0 && cpr[i] == 0) 1407 continue; 1408 t = (i != 0) ? (((sbintime_t)1) << (i - 1)) : 0; 1409 tcum += ct[i]; 1410 pcum += cpr[i]; 1411 printf(" %10jd.%06jds\t 2**%d\t%7d\t%7d\t%7d\t%7d\n", 1412 t / SBT_1S, (t & 0xffffffff) * 1000000 >> 32, 1413 i - 1 - (32 - CC_HASH_SHIFT), 1414 ct[i], tcum, cpr[i], pcum); 1415 } 1416 return (error); 1417} 1418SYSCTL_PROC(_kern, OID_AUTO, callout_stat, 1419 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 1420 0, 0, sysctl_kern_callout_stat, "I", 1421 "Dump immediate statistic snapshot of the scheduled callouts"); 1422