kern_timeout.c revision 248032
14Srgrimes/*- 21690Sdg * Copyright (c) 1982, 1986, 1991, 1993 31690Sdg * The Regents of the University of California. All rights reserved. 41690Sdg * (c) UNIX System Laboratories, Inc. 54Srgrimes * All or some portions of this file are derived from material licensed 64Srgrimes * to the University of California by American Telephone and Telegraph 74Srgrimes * Co. or Unix System Laboratories, Inc. and are reproduced herein with 84Srgrimes * the permission of UNIX System Laboratories, Inc. 94Srgrimes * 104Srgrimes * Redistribution and use in source and binary forms, with or without 114Srgrimes * modification, are permitted provided that the following conditions 124Srgrimes * are met: 134Srgrimes * 1. Redistributions of source code must retain the above copyright 144Srgrimes * notice, this list of conditions and the following disclaimer. 154Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 164Srgrimes * notice, this list of conditions and the following disclaimer in the 174Srgrimes * documentation and/or other materials provided with the distribution. 184Srgrimes * 4. Neither the name of the University nor the names of its contributors 194Srgrimes * may be used to endorse or promote products derived from this software 204Srgrimes * without specific prior written permission. 214Srgrimes * 224Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 234Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 244Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 254Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 264Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 274Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 284Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 294Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 304Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 314Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 324Srgrimes * SUCH DAMAGE. 334Srgrimes * 344Srgrimes * From: @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 354Srgrimes */ 364Srgrimes 37608Srgrimes#include <sys/cdefs.h> 384Srgrimes__FBSDID("$FreeBSD: head/sys/kern/kern_timeout.c 248032 2013-03-08 10:37:17Z andre $"); 394Srgrimes 40116182Sobrien#include "opt_callout_profiling.h" 41116182Sobrien#include "opt_kdtrace.h" 42116182Sobrien#if defined(__arm__) 43118240Speter#include "opt_timer.h" 44104338Srwatson#endif 4578983Sjhb 4671257Speter#include <sys/param.h> 4778983Sjhb#include <sys/systm.h> 48170640Sjeff#include <sys/bus.h> 4913203Swollman#include <sys/callout.h> 501549Srgrimes#include <sys/file.h> 5165557Sjasone#include <sys/interrupt.h> 521549Srgrimes#include <sys/kernel.h> 5378983Sjhb#include <sys/ktr.h> 5467365Sjhb#include <sys/lock.h> 5578983Sjhb#include <sys/malloc.h> 5699072Sjulian#include <sys/mutex.h> 5731389Sbde#include <sys/proc.h> 58104964Sjeff#include <sys/sdt.h> 5931389Sbde#include <sys/sleepqueue.h> 6078983Sjhb#include <sys/sysctl.h> 6112662Sdg#include <sys/smp.h> 62118240Speter 63118240Speter#ifdef SMP 64118240Speter#include <machine/cpu.h> 65118240Speter#endif 66118240Speter 671549Srgrimes#ifndef NO_EVENTTIMERS 6831389SbdeDPCPU_DECLARE(sbintime_t, hardclocktime); 691549Srgrimes#endif 70163606Srwatson 71163606SrwatsonSDT_PROVIDER_DEFINE(callout_execute); 7278983SjhbSDT_PROBE_DEFINE(callout_execute, kernel, , callout_start, callout-start); 73167211SrwatsonSDT_PROBE_ARGTYPE(callout_execute, kernel, , callout_start, 0, 74167211Srwatson "struct callout *"); 7578983SjhbSDT_PROBE_DEFINE(callout_execute, kernel, , callout_end, callout-end); 7671527SjhbSDT_PROBE_ARGTYPE(callout_execute, kernel, , callout_end, 0, 77155455Sphk "struct callout *"); 781690Sdg 7983366Sjulian#ifdef CALLOUT_PROFILING 80757Sdgstatic int avg_depth; 8199072SjulianSYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0, 82173601Sjulian "Average number of items examined per softclock call. Units = 1/1000"); 83126661Srwatsonstatic int avg_gcalls; 84110190SjulianSYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls, 0, 8578636Sjhb "Average number of Giant callouts made per softclock call. Units = 1/1000"); 86170307Sjeffstatic int avg_lockcalls; 87112888SjeffSYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls, 0, 88111032Sjulian "Average number of lock callouts made per softclock call. Units = 1/1000"); 89102266Srwatsonstatic int avg_mpcalls; 90170307SjeffSYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0, 9182585Sdillon "Average number of MP callouts made per softclock call. Units = 1/1000"); 9293793Sbdestatic int avg_depth_dir; 9328013SdysonSYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth_dir, 0, 94152376Srwatson "Average number of direct callouts examined per callout_process call. " 95152376Srwatson "Units = 1/1000"); 96152376Srwatsonstatic int avg_lockcalls_dir; 97152376SrwatsonSYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls_dir, CTLFLAG_RD, 9893793Sbde &avg_lockcalls_dir, 0, "Average number of lock direct callouts made per " 99136837Sphk "callout_process call. Units = 1/1000"); 100136837Sphkstatic int avg_mpcalls_dir; 101136837SphkSYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls_dir, 102136837Sphk 0, "Average number of MP direct callouts made per callout_process call. " 103136837Sphk "Units = 1/1000"); 104136837Sphk#endif 105136837Sphk 106105974Sjulianstatic int ncallout; 107105974SjulianSYSCTL_INT(_kern, OID_AUTO, ncallout, CTLFLAG_RDTUN, &ncallout, 0, 108105974Sjulian "Number of entries in callwheel and size of timeout() preallocation"); 109105974Sjulian 11099072Sjulian/* 111105974Sjulian * TODO: 112105974Sjulian * allocate more timeout table slots when table overflows. 113105974Sjulian */ 114105974Sjulianu_int callwheelsize, callwheelmask; 115105974Sjulian 116105974Sjulian/* 117163709Sjb * The callout cpu exec entities represent informations necessary for 118105974Sjulian * describing the state of callouts currently running on the CPU and the ones 119105974Sjulian * necessary for migrating callouts to the new callout cpu. In particular, 120105974Sjulian * the first entry of the array cc_exec_entity holds informations for callout 121126932Speter * running in SWI thread context, while the second one holds informations 122103838Sjulian * for callout running directly from hardware interrupt context. 123163709Sjb * The cached informations are very important for deferring migration when 124110190Sjulian * the migrating callout is already running. 125110190Sjulian */ 126110190Sjulianstruct cc_exec { 127110190Sjulian struct callout *cc_next; 128113874Sjhb struct callout *cc_curr; 129110190Sjulian#ifdef SMP 130155455Sphk void (*ce_migration_func)(void *); 131110190Sjulian void *ce_migration_arg; 132139452Sjhb int ce_migration_cpu; 133139324Sjeff sbintime_t ce_migration_time; 134139324Sjeff#endif 135139324Sjeff bool cc_cancel; 136139324Sjeff bool cc_waiting; 137144061Sjeff}; 138144061Sjeff 1391690Sdg/* 1401690Sdg * There is one struct callout_cpu per cpu, holding all relevant 1414Srgrimes * state for the callout processing thread on the individual CPU. 14278983Sjhb */ 14378983Sjhbstruct callout_cpu { 14481493Sjhb struct mtx_padalign cc_lock; 1454Srgrimes struct cc_exec cc_exec_entity[2]; 146798Swollman struct callout *cc_callout; 14799072Sjulian struct callout_list *cc_callwheel; 14865557Sjasone struct callout_tailq cc_expireq; 149104297Sjhb struct callout_slist cc_callfree; 150104297Sjhb sbintime_t cc_firstevent; 15183366Sjulian sbintime_t cc_lastscan; 15293793Sbde void *cc_cookie; 15377015Sbde u_int cc_bucket; 15477015Sbde}; 155151316Sdavidxu 15677015Sbde#define cc_exec_curr cc_exec_entity[0].cc_curr 15765557Sjasone#define cc_exec_next cc_exec_entity[0].cc_next 158104297Sjhb#define cc_exec_cancel cc_exec_entity[0].cc_cancel 159104297Sjhb#define cc_exec_waiting cc_exec_entity[0].cc_waiting 160104378Sjmallett#define cc_exec_curr_dir cc_exec_entity[1].cc_curr 16199072Sjulian#define cc_exec_next_dir cc_exec_entity[1].cc_next 16299072Sjulian#define cc_exec_cancel_dir cc_exec_entity[1].cc_cancel 16372911Sjhb#define cc_exec_waiting_dir cc_exec_entity[1].cc_waiting 164111883Sjhb 16581493Sjhb#ifdef SMP 166170307Sjeff#define cc_migration_func cc_exec_entity[0].ce_migration_func 16793390Sjake#define cc_migration_arg cc_exec_entity[0].ce_migration_arg 168155455Sphk#define cc_migration_cpu cc_exec_entity[0].ce_migration_cpu 169104297Sjhb#define cc_migration_time cc_exec_entity[0].ce_migration_time 170163709Sjb#define cc_migration_func_dir cc_exec_entity[1].ce_migration_func 171133340Sdavidxu#define cc_migration_arg_dir cc_exec_entity[1].ce_migration_arg 172134571Sjulian#define cc_migration_cpu_dir cc_exec_entity[1].ce_migration_cpu 173163709Sjb#define cc_migration_time_dir cc_exec_entity[1].ce_migration_time 174135573Sjhb 17593390Sjakestruct callout_cpu cc_cpu[MAXCPU]; 176172207Sjeff#define CPUBLOCK MAXCPU 17793390Sjake#define CC_CPU(cpu) (&cc_cpu[(cpu)]) 17893390Sjake#define CC_SELF() CC_CPU(PCPU_GET(cpuid)) 179172207Sjeff#else 18093390Sjakestruct callout_cpu cc_cpu; 18193390Sjake#define CC_CPU(cpu) &cc_cpu 182170307Sjeff#define CC_SELF() &cc_cpu 183170307Sjeff#endif 184112888Sjeff#define CC_LOCK(cc) mtx_lock_spin(&(cc)->cc_lock) 185172207Sjeff#define CC_UNLOCK(cc) mtx_unlock_spin(&(cc)->cc_lock) 186172207Sjeff#define CC_LOCK_ASSERT(cc) mtx_assert(&(cc)->cc_lock, MA_OWNED) 187170307Sjeff 188170292Sattiliostatic int timeout_cpu; 189135573Sjhb 19099072Sjulianstatic void callout_cpu_init(struct callout_cpu *cc); 19199072Sjulianstatic void softclock_call_cc(struct callout *c, struct callout_cpu *cc, 192164936Sjulian#ifdef CALLOUT_PROFILING 19399072Sjulian int *mpcalls, int *lockcalls, int *gcalls, 19499072Sjulian#endif 19599072Sjulian int direct); 19699072Sjulian 19793390Sjakestatic MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures"); 19893390Sjake 199132266Sjhb/** 200132266Sjhb * Locked by cc_lock: 201132266Sjhb * cc_curr - If a callout is in progress, it is cc_curr. 202132266Sjhb * If cc_curr is non-NULL, threads waiting in 203131437Sjhb * callout_drain() will be woken up as soon as the 204172207Sjeff * relevant callout completes. 20593390Sjake * cc_cancel - Changing to 1 with both callout_lock and cc_lock held 20693390Sjake * guarantees that the current callout will not run. 20793390Sjake * The softclock() function sets this to 0 before it 20893390Sjake * drops callout_lock to acquire c_lock, and it calls 20977015Sbde * the handler only if curr_cancelled is still 0 after 21093390Sjake * cc_lock is successfully acquired. 21193390Sjake * cc_waiting - If a thread is waiting in callout_drain(), then 21293390Sjake * callout_wait is nonzero. Set only when 21393390Sjake * cc_curr is non-NULL. 21493390Sjake */ 215151316Sdavidxu 216151316Sdavidxu/* 217151316Sdavidxu * Resets the execution entity tied to a specific callout cpu. 218151316Sdavidxu */ 21977015Sbdestatic void 22093390Sjakecc_cce_cleanup(struct callout_cpu *cc, int direct) 22177015Sbde{ 222172207Sjeff 22393390Sjake cc->cc_exec_entity[direct].cc_curr = NULL; 22493390Sjake cc->cc_exec_entity[direct].cc_next = NULL; 22593390Sjake cc->cc_exec_entity[direct].cc_cancel = false; 22693390Sjake cc->cc_exec_entity[direct].cc_waiting = false; 227106655Srwatson#ifdef SMP 228172207Sjeff cc->cc_exec_entity[direct].ce_migration_cpu = CPUBLOCK; 229106655Srwatson cc->cc_exec_entity[direct].ce_migration_time = 0; 230106655Srwatson cc->cc_exec_entity[direct].ce_migration_func = NULL; 231111032Sjulian cc->cc_exec_entity[direct].ce_migration_arg = NULL; 232118240Speter#endif 233118240Speter} 234119781Speter 235118240Speter/* 236170307Sjeff * Checks if migration is requested by a specific callout cpu. 237163709Sjb */ 238170307Sjeffstatic int 239131473Sjhbcc_cce_migrating(struct callout_cpu *cc, int direct) 240170307Sjeff{ 241118240Speter 242118240Speter#ifdef SMP 243119781Speter return (cc->cc_exec_entity[direct].ce_migration_cpu != CPUBLOCK); 244118240Speter#else 24593793Sbde return (0); 246112888Sjeff#endif 24793793Sbde} 248114983Sjhb 249116963Sdavidxu/* 25093793Sbde * Kernel low level callwheel initialization 251114983Sjhb * called on cpu0 during kernel startup. 25293793Sbde */ 25393793Sbdestatic void 25465557Sjasonecallout_callwheel_init(void *dummy) 255155455Sphk{ 25681493Sjhb struct callout_cpu *cc; 25724691Speter 258 /* 259 * Calculate the size of the callout wheel and the preallocated 260 * timeout() structures. 261 */ 262 ncallout = imin(16 + maxproc + maxfiles, 18508); 263 TUNABLE_INT_FETCH("kern.ncallout", &ncallout); 264 265 /* 266 * Calculate callout wheel size, should be next power of two higher 267 * than 'ncallout'. 268 */ 269 callwheelsize = 1 << fls(ncallout); 270 callwheelmask = callwheelsize - 1; 271 272 /* 273 * Only cpu0 handles timeout(9) and receives a preallocation. 274 * 275 * XXX: Once all timeout(9) consumers are converted this can 276 * be removed. 277 */ 278 timeout_cpu = PCPU_GET(cpuid); 279 cc = CC_CPU(timeout_cpu); 280 cc->cc_callout = malloc(ncallout * sizeof(struct callout), 281 M_CALLOUT, M_WAITOK); 282 callout_cpu_init(cc); 283} 284SYSINIT(callwheel_init, SI_SUB_CPU, SI_ORDER_ANY, callout_callwheel_init, NULL); 285 286/* 287 * Initialize the per-cpu callout structures. 288 */ 289static void 290callout_cpu_init(struct callout_cpu *cc) 291{ 292 struct callout *c; 293 int i; 294 295 mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE); 296 SLIST_INIT(&cc->cc_callfree); 297 cc->cc_callwheel = malloc(sizeof(struct callout_tailq) * callwheelsize, 298 M_CALLOUT, M_WAITOK); 299 for (i = 0; i < callwheelsize; i++) 300 LIST_INIT(&cc->cc_callwheel[i]); 301 TAILQ_INIT(&cc->cc_expireq); 302 cc->cc_firstevent = INT64_MAX; 303 for (i = 0; i < 2; i++) 304 cc_cce_cleanup(cc, i); 305 if (cc->cc_callout == NULL) /* Only cpu0 handles timeout(9) */ 306 return; 307 for (i = 0; i < ncallout; i++) { 308 c = &cc->cc_callout[i]; 309 callout_init(c, 0); 310 c->c_flags = CALLOUT_LOCAL_ALLOC; 311 SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); 312 } 313} 314 315#ifdef SMP 316/* 317 * Switches the cpu tied to a specific callout. 318 * The function expects a locked incoming callout cpu and returns with 319 * locked outcoming callout cpu. 320 */ 321static struct callout_cpu * 322callout_cpu_switch(struct callout *c, struct callout_cpu *cc, int new_cpu) 323{ 324 struct callout_cpu *new_cc; 325 326 MPASS(c != NULL && cc != NULL); 327 CC_LOCK_ASSERT(cc); 328 329 /* 330 * Avoid interrupts and preemption firing after the callout cpu 331 * is blocked in order to avoid deadlocks as the new thread 332 * may be willing to acquire the callout cpu lock. 333 */ 334 c->c_cpu = CPUBLOCK; 335 spinlock_enter(); 336 CC_UNLOCK(cc); 337 new_cc = CC_CPU(new_cpu); 338 CC_LOCK(new_cc); 339 spinlock_exit(); 340 c->c_cpu = new_cpu; 341 return (new_cc); 342} 343#endif 344 345/* 346 * Start standard softclock thread. 347 */ 348static void 349start_softclock(void *dummy) 350{ 351 struct callout_cpu *cc; 352#ifdef SMP 353 int cpu; 354#endif 355 356 cc = CC_CPU(timeout_cpu); 357 if (swi_add(&clk_intr_event, "clock", softclock, cc, SWI_CLOCK, 358 INTR_MPSAFE, &cc->cc_cookie)) 359 panic("died while creating standard software ithreads"); 360#ifdef SMP 361 CPU_FOREACH(cpu) { 362 if (cpu == timeout_cpu) 363 continue; 364 cc = CC_CPU(cpu); 365 cc->cc_callout = NULL; /* Only cpu0 handles timeout(9). */ 366 callout_cpu_init(cc); 367 if (swi_add(NULL, "clock", softclock, cc, SWI_CLOCK, 368 INTR_MPSAFE, &cc->cc_cookie)) 369 panic("died while creating standard software ithreads"); 370 } 371#endif 372} 373SYSINIT(start_softclock, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softclock, NULL); 374 375#define CC_HASH_SHIFT 8 376 377static inline u_int 378callout_hash(sbintime_t sbt) 379{ 380 381 return (sbt >> (32 - CC_HASH_SHIFT)); 382} 383 384static inline u_int 385callout_get_bucket(sbintime_t sbt) 386{ 387 388 return (callout_hash(sbt) & callwheelmask); 389} 390 391void 392callout_process(sbintime_t now) 393{ 394 struct callout *tmp, *tmpn; 395 struct callout_cpu *cc; 396 struct callout_list *sc; 397 sbintime_t first, last, max, tmp_max; 398 uint32_t lookahead; 399 u_int firstb, lastb, nowb; 400#ifdef CALLOUT_PROFILING 401 int depth_dir = 0, mpcalls_dir = 0, lockcalls_dir = 0; 402#endif 403 404 cc = CC_SELF(); 405 mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET); 406 407 /* Compute the buckets of the last scan and present times. */ 408 firstb = callout_hash(cc->cc_lastscan); 409 cc->cc_lastscan = now; 410 nowb = callout_hash(now); 411 412 /* Compute the last bucket and minimum time of the bucket after it. */ 413 if (nowb == firstb) 414 lookahead = (SBT_1S / 16); 415 else if (nowb - firstb == 1) 416 lookahead = (SBT_1S / 8); 417 else 418 lookahead = (SBT_1S / 2); 419 first = last = now; 420 first += (lookahead / 2); 421 last += lookahead; 422 last &= (0xffffffffffffffffLLU << (32 - CC_HASH_SHIFT)); 423 lastb = callout_hash(last) - 1; 424 max = last; 425 426 /* 427 * Check if we wrapped around the entire wheel from the last scan. 428 * In case, we need to scan entirely the wheel for pending callouts. 429 */ 430 if (lastb - firstb >= callwheelsize) { 431 lastb = firstb + callwheelsize - 1; 432 if (nowb - firstb >= callwheelsize) 433 nowb = lastb; 434 } 435 436 /* Iterate callwheel from firstb to nowb and then up to lastb. */ 437 do { 438 sc = &cc->cc_callwheel[firstb & callwheelmask]; 439 tmp = LIST_FIRST(sc); 440 while (tmp != NULL) { 441 /* Run the callout if present time within allowed. */ 442 if (tmp->c_time <= now) { 443 /* 444 * Consumer told us the callout may be run 445 * directly from hardware interrupt context. 446 */ 447 if (tmp->c_flags & CALLOUT_DIRECT) { 448#ifdef CALLOUT_PROFILING 449 ++depth_dir; 450#endif 451 cc->cc_exec_next_dir = 452 LIST_NEXT(tmp, c_links.le); 453 cc->cc_bucket = firstb & callwheelmask; 454 LIST_REMOVE(tmp, c_links.le); 455 softclock_call_cc(tmp, cc, 456#ifdef CALLOUT_PROFILING 457 &mpcalls_dir, &lockcalls_dir, NULL, 458#endif 459 1); 460 tmp = cc->cc_exec_next_dir; 461 } else { 462 tmpn = LIST_NEXT(tmp, c_links.le); 463 LIST_REMOVE(tmp, c_links.le); 464 TAILQ_INSERT_TAIL(&cc->cc_expireq, 465 tmp, c_links.tqe); 466 tmp->c_flags |= CALLOUT_PROCESSED; 467 tmp = tmpn; 468 } 469 continue; 470 } 471 /* Skip events from distant future. */ 472 if (tmp->c_time >= max) 473 goto next; 474 /* 475 * Event minimal time is bigger than present maximal 476 * time, so it cannot be aggregated. 477 */ 478 if (tmp->c_time > last) { 479 lastb = nowb; 480 goto next; 481 } 482 /* Update first and last time, respecting this event. */ 483 if (tmp->c_time < first) 484 first = tmp->c_time; 485 tmp_max = tmp->c_time + tmp->c_precision; 486 if (tmp_max < last) 487 last = tmp_max; 488next: 489 tmp = LIST_NEXT(tmp, c_links.le); 490 } 491 /* Proceed with the next bucket. */ 492 firstb++; 493 /* 494 * Stop if we looked after present time and found 495 * some event we can't execute at now. 496 * Stop if we looked far enough into the future. 497 */ 498 } while (((int)(firstb - lastb)) <= 0); 499 cc->cc_firstevent = last; 500#ifndef NO_EVENTTIMERS 501 cpu_new_callout(curcpu, last, first); 502#endif 503#ifdef CALLOUT_PROFILING 504 avg_depth_dir += (depth_dir * 1000 - avg_depth_dir) >> 8; 505 avg_mpcalls_dir += (mpcalls_dir * 1000 - avg_mpcalls_dir) >> 8; 506 avg_lockcalls_dir += (lockcalls_dir * 1000 - avg_lockcalls_dir) >> 8; 507#endif 508 mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET); 509 /* 510 * swi_sched acquires the thread lock, so we don't want to call it 511 * with cc_lock held; incorrect locking order. 512 */ 513 if (!TAILQ_EMPTY(&cc->cc_expireq)) 514 swi_sched(cc->cc_cookie, 0); 515} 516 517static struct callout_cpu * 518callout_lock(struct callout *c) 519{ 520 struct callout_cpu *cc; 521 int cpu; 522 523 for (;;) { 524 cpu = c->c_cpu; 525#ifdef SMP 526 if (cpu == CPUBLOCK) { 527 while (c->c_cpu == CPUBLOCK) 528 cpu_spinwait(); 529 continue; 530 } 531#endif 532 cc = CC_CPU(cpu); 533 CC_LOCK(cc); 534 if (cpu == c->c_cpu) 535 break; 536 CC_UNLOCK(cc); 537 } 538 return (cc); 539} 540 541static void 542callout_cc_add(struct callout *c, struct callout_cpu *cc, 543 sbintime_t sbt, sbintime_t precision, void (*func)(void *), 544 void *arg, int cpu, int flags) 545{ 546 int bucket; 547 548 CC_LOCK_ASSERT(cc); 549 if (sbt < cc->cc_lastscan) 550 sbt = cc->cc_lastscan; 551 c->c_arg = arg; 552 c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING); 553 if (flags & C_DIRECT_EXEC) 554 c->c_flags |= CALLOUT_DIRECT; 555 c->c_flags &= ~CALLOUT_PROCESSED; 556 c->c_func = func; 557 c->c_time = sbt; 558 c->c_precision = precision; 559 bucket = callout_get_bucket(c->c_time); 560 CTR3(KTR_CALLOUT, "precision set for %p: %d.%08x", 561 c, (int)(c->c_precision >> 32), 562 (u_int)(c->c_precision & 0xffffffff)); 563 LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le); 564 if (cc->cc_bucket == bucket) 565 cc->cc_exec_next_dir = c; 566#ifndef NO_EVENTTIMERS 567 /* 568 * Inform the eventtimers(4) subsystem there's a new callout 569 * that has been inserted, but only if really required. 570 */ 571 sbt = c->c_time + c->c_precision; 572 if (sbt < cc->cc_firstevent) { 573 cc->cc_firstevent = sbt; 574 cpu_new_callout(cpu, sbt, c->c_time); 575 } 576#endif 577} 578 579static void 580callout_cc_del(struct callout *c, struct callout_cpu *cc) 581{ 582 583 if ((c->c_flags & CALLOUT_LOCAL_ALLOC) == 0) 584 return; 585 c->c_func = NULL; 586 SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); 587} 588 589static void 590softclock_call_cc(struct callout *c, struct callout_cpu *cc, 591#ifdef CALLOUT_PROFILING 592 int *mpcalls, int *lockcalls, int *gcalls, 593#endif 594 int direct) 595{ 596 void (*c_func)(void *); 597 void *c_arg; 598 struct lock_class *class; 599 struct lock_object *c_lock; 600 int c_flags, sharedlock; 601#ifdef SMP 602 struct callout_cpu *new_cc; 603 void (*new_func)(void *); 604 void *new_arg; 605 int flags, new_cpu; 606 sbintime_t new_time; 607#endif 608#if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) 609 sbintime_t sbt1, sbt2; 610 struct timespec ts2; 611 static sbintime_t maxdt = 2 * SBT_1MS; /* 2 msec */ 612 static timeout_t *lastfunc; 613#endif 614 615 KASSERT((c->c_flags & (CALLOUT_PENDING | CALLOUT_ACTIVE)) == 616 (CALLOUT_PENDING | CALLOUT_ACTIVE), 617 ("softclock_call_cc: pend|act %p %x", c, c->c_flags)); 618 class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL; 619 sharedlock = (c->c_flags & CALLOUT_SHAREDLOCK) ? 0 : 1; 620 c_lock = c->c_lock; 621 c_func = c->c_func; 622 c_arg = c->c_arg; 623 c_flags = c->c_flags; 624 if (c->c_flags & CALLOUT_LOCAL_ALLOC) 625 c->c_flags = CALLOUT_LOCAL_ALLOC; 626 else 627 c->c_flags &= ~CALLOUT_PENDING; 628 cc->cc_exec_entity[direct].cc_curr = c; 629 cc->cc_exec_entity[direct].cc_cancel = false; 630 CC_UNLOCK(cc); 631 if (c_lock != NULL) { 632 class->lc_lock(c_lock, sharedlock); 633 /* 634 * The callout may have been cancelled 635 * while we switched locks. 636 */ 637 if (cc->cc_exec_entity[direct].cc_cancel) { 638 class->lc_unlock(c_lock); 639 goto skip; 640 } 641 /* The callout cannot be stopped now. */ 642 cc->cc_exec_entity[direct].cc_cancel = true; 643 if (c_lock == &Giant.lock_object) { 644#ifdef CALLOUT_PROFILING 645 (*gcalls)++; 646#endif 647 CTR3(KTR_CALLOUT, "callout giant %p func %p arg %p", 648 c, c_func, c_arg); 649 } else { 650#ifdef CALLOUT_PROFILING 651 (*lockcalls)++; 652#endif 653 CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p", 654 c, c_func, c_arg); 655 } 656 } else { 657#ifdef CALLOUT_PROFILING 658 (*mpcalls)++; 659#endif 660 CTR3(KTR_CALLOUT, "callout %p func %p arg %p", 661 c, c_func, c_arg); 662 } 663#if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) 664 sbt1 = sbinuptime(); 665#endif 666 THREAD_NO_SLEEPING(); 667 SDT_PROBE(callout_execute, kernel, , callout_start, c, 0, 0, 0, 0); 668 c_func(c_arg); 669 SDT_PROBE(callout_execute, kernel, , callout_end, c, 0, 0, 0, 0); 670 THREAD_SLEEPING_OK(); 671#if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) 672 sbt2 = sbinuptime(); 673 sbt2 -= sbt1; 674 if (sbt2 > maxdt) { 675 if (lastfunc != c_func || sbt2 > maxdt * 2) { 676 ts2 = sbttots(sbt2); 677 printf( 678 "Expensive timeout(9) function: %p(%p) %jd.%09ld s\n", 679 c_func, c_arg, (intmax_t)ts2.tv_sec, ts2.tv_nsec); 680 } 681 maxdt = sbt2; 682 lastfunc = c_func; 683 } 684#endif 685 CTR1(KTR_CALLOUT, "callout %p finished", c); 686 if ((c_flags & CALLOUT_RETURNUNLOCKED) == 0) 687 class->lc_unlock(c_lock); 688skip: 689 CC_LOCK(cc); 690 KASSERT(cc->cc_exec_entity[direct].cc_curr == c, ("mishandled cc_curr")); 691 cc->cc_exec_entity[direct].cc_curr = NULL; 692 if (cc->cc_exec_entity[direct].cc_waiting) { 693 /* 694 * There is someone waiting for the 695 * callout to complete. 696 * If the callout was scheduled for 697 * migration just cancel it. 698 */ 699 if (cc_cce_migrating(cc, direct)) { 700 cc_cce_cleanup(cc, direct); 701 702 /* 703 * It should be assert here that the callout is not 704 * destroyed but that is not easy. 705 */ 706 c->c_flags &= ~CALLOUT_DFRMIGRATION; 707 } 708 cc->cc_exec_entity[direct].cc_waiting = false; 709 CC_UNLOCK(cc); 710 wakeup(&cc->cc_exec_entity[direct].cc_waiting); 711 CC_LOCK(cc); 712 } else if (cc_cce_migrating(cc, direct)) { 713 KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0, 714 ("Migrating legacy callout %p", c)); 715#ifdef SMP 716 /* 717 * If the callout was scheduled for 718 * migration just perform it now. 719 */ 720 new_cpu = cc->cc_exec_entity[direct].ce_migration_cpu; 721 new_time = cc->cc_exec_entity[direct].ce_migration_time; 722 new_func = cc->cc_exec_entity[direct].ce_migration_func; 723 new_arg = cc->cc_exec_entity[direct].ce_migration_arg; 724 cc_cce_cleanup(cc, direct); 725 726 /* 727 * It should be assert here that the callout is not destroyed 728 * but that is not easy. 729 * 730 * As first thing, handle deferred callout stops. 731 */ 732 if ((c->c_flags & CALLOUT_DFRMIGRATION) == 0) { 733 CTR3(KTR_CALLOUT, 734 "deferred cancelled %p func %p arg %p", 735 c, new_func, new_arg); 736 callout_cc_del(c, cc); 737 return; 738 } 739 c->c_flags &= ~CALLOUT_DFRMIGRATION; 740 741 new_cc = callout_cpu_switch(c, cc, new_cpu); 742 flags = (direct) ? C_DIRECT_EXEC : 0; 743 callout_cc_add(c, new_cc, new_time, c->c_precision, new_func, 744 new_arg, new_cpu, flags); 745 CC_UNLOCK(new_cc); 746 CC_LOCK(cc); 747#else 748 panic("migration should not happen"); 749#endif 750 } 751 /* 752 * If the current callout is locally allocated (from 753 * timeout(9)) then put it on the freelist. 754 * 755 * Note: we need to check the cached copy of c_flags because 756 * if it was not local, then it's not safe to deref the 757 * callout pointer. 758 */ 759 KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0 || 760 c->c_flags == CALLOUT_LOCAL_ALLOC, 761 ("corrupted callout")); 762 if (c_flags & CALLOUT_LOCAL_ALLOC) 763 callout_cc_del(c, cc); 764} 765 766/* 767 * The callout mechanism is based on the work of Adam M. Costello and 768 * George Varghese, published in a technical report entitled "Redesigning 769 * the BSD Callout and Timer Facilities" and modified slightly for inclusion 770 * in FreeBSD by Justin T. Gibbs. The original work on the data structures 771 * used in this implementation was published by G. Varghese and T. Lauck in 772 * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for 773 * the Efficient Implementation of a Timer Facility" in the Proceedings of 774 * the 11th ACM Annual Symposium on Operating Systems Principles, 775 * Austin, Texas Nov 1987. 776 */ 777 778/* 779 * Software (low priority) clock interrupt. 780 * Run periodic events from timeout queue. 781 */ 782void 783softclock(void *arg) 784{ 785 struct callout_cpu *cc; 786 struct callout *c; 787#ifdef CALLOUT_PROFILING 788 int depth = 0, gcalls = 0, lockcalls = 0, mpcalls = 0; 789#endif 790 791 cc = (struct callout_cpu *)arg; 792 CC_LOCK(cc); 793 while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) { 794 TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); 795 softclock_call_cc(c, cc, 796#ifdef CALLOUT_PROFILING 797 &mpcalls, &lockcalls, &gcalls, 798#endif 799 0); 800#ifdef CALLOUT_PROFILING 801 ++depth; 802#endif 803 } 804#ifdef CALLOUT_PROFILING 805 avg_depth += (depth * 1000 - avg_depth) >> 8; 806 avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8; 807 avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8; 808 avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8; 809#endif 810 CC_UNLOCK(cc); 811} 812 813/* 814 * timeout -- 815 * Execute a function after a specified length of time. 816 * 817 * untimeout -- 818 * Cancel previous timeout function call. 819 * 820 * callout_handle_init -- 821 * Initialize a handle so that using it with untimeout is benign. 822 * 823 * See AT&T BCI Driver Reference Manual for specification. This 824 * implementation differs from that one in that although an 825 * identification value is returned from timeout, the original 826 * arguments to timeout as well as the identifier are used to 827 * identify entries for untimeout. 828 */ 829struct callout_handle 830timeout(ftn, arg, to_ticks) 831 timeout_t *ftn; 832 void *arg; 833 int to_ticks; 834{ 835 struct callout_cpu *cc; 836 struct callout *new; 837 struct callout_handle handle; 838 839 cc = CC_CPU(timeout_cpu); 840 CC_LOCK(cc); 841 /* Fill in the next free callout structure. */ 842 new = SLIST_FIRST(&cc->cc_callfree); 843 if (new == NULL) 844 /* XXX Attempt to malloc first */ 845 panic("timeout table full"); 846 SLIST_REMOVE_HEAD(&cc->cc_callfree, c_links.sle); 847 callout_reset(new, to_ticks, ftn, arg); 848 handle.callout = new; 849 CC_UNLOCK(cc); 850 851 return (handle); 852} 853 854void 855untimeout(ftn, arg, handle) 856 timeout_t *ftn; 857 void *arg; 858 struct callout_handle handle; 859{ 860 struct callout_cpu *cc; 861 862 /* 863 * Check for a handle that was initialized 864 * by callout_handle_init, but never used 865 * for a real timeout. 866 */ 867 if (handle.callout == NULL) 868 return; 869 870 cc = callout_lock(handle.callout); 871 if (handle.callout->c_func == ftn && handle.callout->c_arg == arg) 872 callout_stop(handle.callout); 873 CC_UNLOCK(cc); 874} 875 876void 877callout_handle_init(struct callout_handle *handle) 878{ 879 handle->callout = NULL; 880} 881 882/* 883 * New interface; clients allocate their own callout structures. 884 * 885 * callout_reset() - establish or change a timeout 886 * callout_stop() - disestablish a timeout 887 * callout_init() - initialize a callout structure so that it can 888 * safely be passed to callout_reset() and callout_stop() 889 * 890 * <sys/callout.h> defines three convenience macros: 891 * 892 * callout_active() - returns truth if callout has not been stopped, 893 * drained, or deactivated since the last time the callout was 894 * reset. 895 * callout_pending() - returns truth if callout is still waiting for timeout 896 * callout_deactivate() - marks the callout as having been serviced 897 */ 898int 899callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision, 900 void (*ftn)(void *), void *arg, int cpu, int flags) 901{ 902 sbintime_t to_sbt, pr; 903 struct callout_cpu *cc; 904 int cancelled, direct; 905 906 cancelled = 0; 907 if (flags & C_ABSOLUTE) { 908 to_sbt = sbt; 909 } else { 910 if ((flags & C_HARDCLOCK) && (sbt < tick_sbt)) 911 sbt = tick_sbt; 912 if ((flags & C_HARDCLOCK) || 913#ifdef NO_EVENTTIMERS 914 sbt >= sbt_timethreshold) { 915 to_sbt = getsbinuptime(); 916 917 /* Add safety belt for the case of hz > 1000. */ 918 to_sbt += tc_tick_sbt - tick_sbt; 919#else 920 sbt >= sbt_tickthreshold) { 921 /* 922 * Obtain the time of the last hardclock() call on 923 * this CPU directly from the kern_clocksource.c. 924 * This value is per-CPU, but it is equal for all 925 * active ones. 926 */ 927#ifdef __LP64__ 928 to_sbt = DPCPU_GET(hardclocktime); 929#else 930 spinlock_enter(); 931 to_sbt = DPCPU_GET(hardclocktime); 932 spinlock_exit(); 933#endif 934#endif 935 if ((flags & C_HARDCLOCK) == 0) 936 to_sbt += tick_sbt; 937 } else 938 to_sbt = sbinuptime(); 939 to_sbt += sbt; 940 pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp : 941 sbt >> C_PRELGET(flags)); 942 if (pr > precision) 943 precision = pr; 944 } 945 /* 946 * Don't allow migration of pre-allocated callouts lest they 947 * become unbalanced. 948 */ 949 if (c->c_flags & CALLOUT_LOCAL_ALLOC) 950 cpu = c->c_cpu; 951 direct = (c->c_flags & CALLOUT_DIRECT) != 0; 952 KASSERT(!direct || c->c_lock == NULL, 953 ("%s: direct callout %p has lock", __func__, c)); 954 cc = callout_lock(c); 955 if (cc->cc_exec_entity[direct].cc_curr == c) { 956 /* 957 * We're being asked to reschedule a callout which is 958 * currently in progress. If there is a lock then we 959 * can cancel the callout if it has not really started. 960 */ 961 if (c->c_lock != NULL && !cc->cc_exec_entity[direct].cc_cancel) 962 cancelled = cc->cc_exec_entity[direct].cc_cancel = true; 963 if (cc->cc_exec_entity[direct].cc_waiting) { 964 /* 965 * Someone has called callout_drain to kill this 966 * callout. Don't reschedule. 967 */ 968 CTR4(KTR_CALLOUT, "%s %p func %p arg %p", 969 cancelled ? "cancelled" : "failed to cancel", 970 c, c->c_func, c->c_arg); 971 CC_UNLOCK(cc); 972 return (cancelled); 973 } 974 } 975 if (c->c_flags & CALLOUT_PENDING) { 976 if ((c->c_flags & CALLOUT_PROCESSED) == 0) { 977 if (cc->cc_exec_next_dir == c) 978 cc->cc_exec_next_dir = LIST_NEXT(c, c_links.le); 979 LIST_REMOVE(c, c_links.le); 980 } else 981 TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); 982 cancelled = 1; 983 c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING); 984 } 985 986#ifdef SMP 987 /* 988 * If the callout must migrate try to perform it immediately. 989 * If the callout is currently running, just defer the migration 990 * to a more appropriate moment. 991 */ 992 if (c->c_cpu != cpu) { 993 if (cc->cc_exec_entity[direct].cc_curr == c) { 994 cc->cc_exec_entity[direct].ce_migration_cpu = cpu; 995 cc->cc_exec_entity[direct].ce_migration_time 996 = to_sbt; 997 cc->cc_exec_entity[direct].ce_migration_func = ftn; 998 cc->cc_exec_entity[direct].ce_migration_arg = arg; 999 c->c_flags |= CALLOUT_DFRMIGRATION; 1000 CTR6(KTR_CALLOUT, 1001 "migration of %p func %p arg %p in %d.%08x to %u deferred", 1002 c, c->c_func, c->c_arg, (int)(to_sbt >> 32), 1003 (u_int)(to_sbt & 0xffffffff), cpu); 1004 CC_UNLOCK(cc); 1005 return (cancelled); 1006 } 1007 cc = callout_cpu_switch(c, cc, cpu); 1008 } 1009#endif 1010 1011 callout_cc_add(c, cc, to_sbt, precision, ftn, arg, cpu, flags); 1012 CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x", 1013 cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_sbt >> 32), 1014 (u_int)(to_sbt & 0xffffffff)); 1015 CC_UNLOCK(cc); 1016 1017 return (cancelled); 1018} 1019 1020/* 1021 * Common idioms that can be optimized in the future. 1022 */ 1023int 1024callout_schedule_on(struct callout *c, int to_ticks, int cpu) 1025{ 1026 return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, cpu); 1027} 1028 1029int 1030callout_schedule(struct callout *c, int to_ticks) 1031{ 1032 return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, c->c_cpu); 1033} 1034 1035int 1036_callout_stop_safe(c, safe) 1037 struct callout *c; 1038 int safe; 1039{ 1040 struct callout_cpu *cc, *old_cc; 1041 struct lock_class *class; 1042 int direct, sq_locked, use_lock; 1043 1044 /* 1045 * Some old subsystems don't hold Giant while running a callout_stop(), 1046 * so just discard this check for the moment. 1047 */ 1048 if (!safe && c->c_lock != NULL) { 1049 if (c->c_lock == &Giant.lock_object) 1050 use_lock = mtx_owned(&Giant); 1051 else { 1052 use_lock = 1; 1053 class = LOCK_CLASS(c->c_lock); 1054 class->lc_assert(c->c_lock, LA_XLOCKED); 1055 } 1056 } else 1057 use_lock = 0; 1058 direct = (c->c_flags & CALLOUT_DIRECT) != 0; 1059 sq_locked = 0; 1060 old_cc = NULL; 1061again: 1062 cc = callout_lock(c); 1063 1064 /* 1065 * If the callout was migrating while the callout cpu lock was 1066 * dropped, just drop the sleepqueue lock and check the states 1067 * again. 1068 */ 1069 if (sq_locked != 0 && cc != old_cc) { 1070#ifdef SMP 1071 CC_UNLOCK(cc); 1072 sleepq_release(&old_cc->cc_exec_entity[direct].cc_waiting); 1073 sq_locked = 0; 1074 old_cc = NULL; 1075 goto again; 1076#else 1077 panic("migration should not happen"); 1078#endif 1079 } 1080 1081 /* 1082 * If the callout isn't pending, it's not on the queue, so 1083 * don't attempt to remove it from the queue. We can try to 1084 * stop it by other means however. 1085 */ 1086 if (!(c->c_flags & CALLOUT_PENDING)) { 1087 c->c_flags &= ~CALLOUT_ACTIVE; 1088 1089 /* 1090 * If it wasn't on the queue and it isn't the current 1091 * callout, then we can't stop it, so just bail. 1092 */ 1093 if (cc->cc_exec_entity[direct].cc_curr != c) { 1094 CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", 1095 c, c->c_func, c->c_arg); 1096 CC_UNLOCK(cc); 1097 if (sq_locked) 1098 sleepq_release( 1099 &cc->cc_exec_entity[direct].cc_waiting); 1100 return (0); 1101 } 1102 1103 if (safe) { 1104 /* 1105 * The current callout is running (or just 1106 * about to run) and blocking is allowed, so 1107 * just wait for the current invocation to 1108 * finish. 1109 */ 1110 while (cc->cc_exec_entity[direct].cc_curr == c) { 1111 /* 1112 * Use direct calls to sleepqueue interface 1113 * instead of cv/msleep in order to avoid 1114 * a LOR between cc_lock and sleepqueue 1115 * chain spinlocks. This piece of code 1116 * emulates a msleep_spin() call actually. 1117 * 1118 * If we already have the sleepqueue chain 1119 * locked, then we can safely block. If we 1120 * don't already have it locked, however, 1121 * we have to drop the cc_lock to lock 1122 * it. This opens several races, so we 1123 * restart at the beginning once we have 1124 * both locks. If nothing has changed, then 1125 * we will end up back here with sq_locked 1126 * set. 1127 */ 1128 if (!sq_locked) { 1129 CC_UNLOCK(cc); 1130 sleepq_lock( 1131 &cc->cc_exec_entity[direct].cc_waiting); 1132 sq_locked = 1; 1133 old_cc = cc; 1134 goto again; 1135 } 1136 1137 /* 1138 * Migration could be cancelled here, but 1139 * as long as it is still not sure when it 1140 * will be packed up, just let softclock() 1141 * take care of it. 1142 */ 1143 cc->cc_exec_entity[direct].cc_waiting = true; 1144 DROP_GIANT(); 1145 CC_UNLOCK(cc); 1146 sleepq_add( 1147 &cc->cc_exec_entity[direct].cc_waiting, 1148 &cc->cc_lock.lock_object, "codrain", 1149 SLEEPQ_SLEEP, 0); 1150 sleepq_wait( 1151 &cc->cc_exec_entity[direct].cc_waiting, 1152 0); 1153 sq_locked = 0; 1154 old_cc = NULL; 1155 1156 /* Reacquire locks previously released. */ 1157 PICKUP_GIANT(); 1158 CC_LOCK(cc); 1159 } 1160 } else if (use_lock && 1161 !cc->cc_exec_entity[direct].cc_cancel) { 1162 /* 1163 * The current callout is waiting for its 1164 * lock which we hold. Cancel the callout 1165 * and return. After our caller drops the 1166 * lock, the callout will be skipped in 1167 * softclock(). 1168 */ 1169 cc->cc_exec_entity[direct].cc_cancel = true; 1170 CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", 1171 c, c->c_func, c->c_arg); 1172 KASSERT(!cc_cce_migrating(cc, direct), 1173 ("callout wrongly scheduled for migration")); 1174 CC_UNLOCK(cc); 1175 KASSERT(!sq_locked, ("sleepqueue chain locked")); 1176 return (1); 1177 } else if ((c->c_flags & CALLOUT_DFRMIGRATION) != 0) { 1178 c->c_flags &= ~CALLOUT_DFRMIGRATION; 1179 CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p", 1180 c, c->c_func, c->c_arg); 1181 CC_UNLOCK(cc); 1182 return (1); 1183 } 1184 CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", 1185 c, c->c_func, c->c_arg); 1186 CC_UNLOCK(cc); 1187 KASSERT(!sq_locked, ("sleepqueue chain still locked")); 1188 return (0); 1189 } 1190 if (sq_locked) 1191 sleepq_release(&cc->cc_exec_entity[direct].cc_waiting); 1192 1193 c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING); 1194 1195 CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", 1196 c, c->c_func, c->c_arg); 1197 if ((c->c_flags & CALLOUT_PROCESSED) == 0) { 1198 if (cc->cc_exec_next_dir == c) 1199 cc->cc_exec_next_dir = LIST_NEXT(c, c_links.le); 1200 LIST_REMOVE(c, c_links.le); 1201 } else 1202 TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); 1203 callout_cc_del(c, cc); 1204 1205 CC_UNLOCK(cc); 1206 return (1); 1207} 1208 1209void 1210callout_init(c, mpsafe) 1211 struct callout *c; 1212 int mpsafe; 1213{ 1214 bzero(c, sizeof *c); 1215 if (mpsafe) { 1216 c->c_lock = NULL; 1217 c->c_flags = CALLOUT_RETURNUNLOCKED; 1218 } else { 1219 c->c_lock = &Giant.lock_object; 1220 c->c_flags = 0; 1221 } 1222 c->c_cpu = timeout_cpu; 1223} 1224 1225void 1226_callout_init_lock(c, lock, flags) 1227 struct callout *c; 1228 struct lock_object *lock; 1229 int flags; 1230{ 1231 bzero(c, sizeof *c); 1232 c->c_lock = lock; 1233 KASSERT((flags & ~(CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK)) == 0, 1234 ("callout_init_lock: bad flags %d", flags)); 1235 KASSERT(lock != NULL || (flags & CALLOUT_RETURNUNLOCKED) == 0, 1236 ("callout_init_lock: CALLOUT_RETURNUNLOCKED with no lock")); 1237 KASSERT(lock == NULL || !(LOCK_CLASS(lock)->lc_flags & 1238 (LC_SPINLOCK | LC_SLEEPABLE)), ("%s: invalid lock class", 1239 __func__)); 1240 c->c_flags = flags & (CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK); 1241 c->c_cpu = timeout_cpu; 1242} 1243 1244#ifdef APM_FIXUP_CALLTODO 1245/* 1246 * Adjust the kernel calltodo timeout list. This routine is used after 1247 * an APM resume to recalculate the calltodo timer list values with the 1248 * number of hz's we have been sleeping. The next hardclock() will detect 1249 * that there are fired timers and run softclock() to execute them. 1250 * 1251 * Please note, I have not done an exhaustive analysis of what code this 1252 * might break. I am motivated to have my select()'s and alarm()'s that 1253 * have expired during suspend firing upon resume so that the applications 1254 * which set the timer can do the maintanence the timer was for as close 1255 * as possible to the originally intended time. Testing this code for a 1256 * week showed that resuming from a suspend resulted in 22 to 25 timers 1257 * firing, which seemed independant on whether the suspend was 2 hours or 1258 * 2 days. Your milage may vary. - Ken Key <key@cs.utk.edu> 1259 */ 1260void 1261adjust_timeout_calltodo(time_change) 1262 struct timeval *time_change; 1263{ 1264 register struct callout *p; 1265 unsigned long delta_ticks; 1266 1267 /* 1268 * How many ticks were we asleep? 1269 * (stolen from tvtohz()). 1270 */ 1271 1272 /* Don't do anything */ 1273 if (time_change->tv_sec < 0) 1274 return; 1275 else if (time_change->tv_sec <= LONG_MAX / 1000000) 1276 delta_ticks = (time_change->tv_sec * 1000000 + 1277 time_change->tv_usec + (tick - 1)) / tick + 1; 1278 else if (time_change->tv_sec <= LONG_MAX / hz) 1279 delta_ticks = time_change->tv_sec * hz + 1280 (time_change->tv_usec + (tick - 1)) / tick + 1; 1281 else 1282 delta_ticks = LONG_MAX; 1283 1284 if (delta_ticks > INT_MAX) 1285 delta_ticks = INT_MAX; 1286 1287 /* 1288 * Now rip through the timer calltodo list looking for timers 1289 * to expire. 1290 */ 1291 1292 /* don't collide with softclock() */ 1293 CC_LOCK(cc); 1294 for (p = calltodo.c_next; p != NULL; p = p->c_next) { 1295 p->c_time -= delta_ticks; 1296 1297 /* Break if the timer had more time on it than delta_ticks */ 1298 if (p->c_time > 0) 1299 break; 1300 1301 /* take back the ticks the timer didn't use (p->c_time <= 0) */ 1302 delta_ticks = -p->c_time; 1303 } 1304 CC_UNLOCK(cc); 1305 1306 return; 1307} 1308#endif /* APM_FIXUP_CALLTODO */ 1309 1310static int 1311flssbt(sbintime_t sbt) 1312{ 1313 1314 sbt += (uint64_t)sbt >> 1; 1315 if (sizeof(long) >= sizeof(sbintime_t)) 1316 return (flsl(sbt)); 1317 if (sbt >= SBT_1S) 1318 return (flsl(((uint64_t)sbt) >> 32) + 32); 1319 return (flsl(sbt)); 1320} 1321 1322/* 1323 * Dump immediate statistic snapshot of the scheduled callouts. 1324 */ 1325static int 1326sysctl_kern_callout_stat(SYSCTL_HANDLER_ARGS) 1327{ 1328 struct callout *tmp; 1329 struct callout_cpu *cc; 1330 struct callout_list *sc; 1331 sbintime_t maxpr, maxt, medpr, medt, now, spr, st, t; 1332 int ct[64], cpr[64], ccpbk[32]; 1333 int error, val, i, count, tcum, pcum, maxc, c, medc; 1334#ifdef SMP 1335 int cpu; 1336#endif 1337 1338 val = 0; 1339 error = sysctl_handle_int(oidp, &val, 0, req); 1340 if (error != 0 || req->newptr == NULL) 1341 return (error); 1342 count = maxc = 0; 1343 st = spr = maxt = maxpr = 0; 1344 bzero(ccpbk, sizeof(ccpbk)); 1345 bzero(ct, sizeof(ct)); 1346 bzero(cpr, sizeof(cpr)); 1347 now = sbinuptime(); 1348#ifdef SMP 1349 CPU_FOREACH(cpu) { 1350 cc = CC_CPU(cpu); 1351#else 1352 cc = CC_CPU(timeout_cpu); 1353#endif 1354 CC_LOCK(cc); 1355 for (i = 0; i < callwheelsize; i++) { 1356 sc = &cc->cc_callwheel[i]; 1357 c = 0; 1358 LIST_FOREACH(tmp, sc, c_links.le) { 1359 c++; 1360 t = tmp->c_time - now; 1361 if (t < 0) 1362 t = 0; 1363 st += t / SBT_1US; 1364 spr += tmp->c_precision / SBT_1US; 1365 if (t > maxt) 1366 maxt = t; 1367 if (tmp->c_precision > maxpr) 1368 maxpr = tmp->c_precision; 1369 ct[flssbt(t)]++; 1370 cpr[flssbt(tmp->c_precision)]++; 1371 } 1372 if (c > maxc) 1373 maxc = c; 1374 ccpbk[fls(c + c / 2)]++; 1375 count += c; 1376 } 1377 CC_UNLOCK(cc); 1378#ifdef SMP 1379 } 1380#endif 1381 1382 for (i = 0, tcum = 0; i < 64 && tcum < count / 2; i++) 1383 tcum += ct[i]; 1384 medt = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0; 1385 for (i = 0, pcum = 0; i < 64 && pcum < count / 2; i++) 1386 pcum += cpr[i]; 1387 medpr = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0; 1388 for (i = 0, c = 0; i < 32 && c < count / 2; i++) 1389 c += ccpbk[i]; 1390 medc = (i >= 2) ? (1 << (i - 2)) : 0; 1391 1392 printf("Scheduled callouts statistic snapshot:\n"); 1393 printf(" Callouts: %6d Buckets: %6d*%-3d Bucket size: 0.%06ds\n", 1394 count, callwheelsize, mp_ncpus, 1000000 >> CC_HASH_SHIFT); 1395 printf(" C/Bk: med %5d avg %6d.%06jd max %6d\n", 1396 medc, 1397 count / callwheelsize / mp_ncpus, 1398 (uint64_t)count * 1000000 / callwheelsize / mp_ncpus % 1000000, 1399 maxc); 1400 printf(" Time: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n", 1401 medt / SBT_1S, (medt & 0xffffffff) * 1000000 >> 32, 1402 (st / count) / 1000000, (st / count) % 1000000, 1403 maxt / SBT_1S, (maxt & 0xffffffff) * 1000000 >> 32); 1404 printf(" Prec: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n", 1405 medpr / SBT_1S, (medpr & 0xffffffff) * 1000000 >> 32, 1406 (spr / count) / 1000000, (spr / count) % 1000000, 1407 maxpr / SBT_1S, (maxpr & 0xffffffff) * 1000000 >> 32); 1408 printf(" Distribution: \tbuckets\t time\t tcum\t" 1409 " prec\t pcum\n"); 1410 for (i = 0, tcum = pcum = 0; i < 64; i++) { 1411 if (ct[i] == 0 && cpr[i] == 0) 1412 continue; 1413 t = (i != 0) ? (((sbintime_t)1) << (i - 1)) : 0; 1414 tcum += ct[i]; 1415 pcum += cpr[i]; 1416 printf(" %10jd.%06jds\t 2**%d\t%7d\t%7d\t%7d\t%7d\n", 1417 t / SBT_1S, (t & 0xffffffff) * 1000000 >> 32, 1418 i - 1 - (32 - CC_HASH_SHIFT), 1419 ct[i], tcum, cpr[i], pcum); 1420 } 1421 return (error); 1422} 1423SYSCTL_PROC(_kern, OID_AUTO, callout_stat, 1424 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 1425 0, 0, sysctl_kern_callout_stat, "I", 1426 "Dump immediate statistic snapshot of the scheduled callouts"); 1427