11541Srgrimes/*- 21541Srgrimes * Copyright (c) 1982, 1986, 1991, 1993 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * (c) UNIX System Laboratories, Inc. 51541Srgrimes * All or some portions of this file are derived from material licensed 61541Srgrimes * to the University of California by American Telephone and Telegraph 71541Srgrimes * Co. or Unix System Laboratories, Inc. and are reproduced herein with 81541Srgrimes * the permission of UNIX System Laboratories, Inc. 91541Srgrimes * 101541Srgrimes * Redistribution and use in source and binary forms, with or without 111541Srgrimes * modification, are permitted provided that the following conditions 121541Srgrimes * are met: 131541Srgrimes * 1. Redistributions of source code must retain the above copyright 141541Srgrimes * notice, this list of conditions and the following disclaimer. 151541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 161541Srgrimes * notice, this list of conditions and the following disclaimer in the 171541Srgrimes * documentation and/or other materials provided with the distribution. 181541Srgrimes * 4. Neither the name of the University nor the names of its contributors 191541Srgrimes * may be used to endorse or promote products derived from this software 201541Srgrimes * without specific prior written permission. 211541Srgrimes * 221541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 231541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 241541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 251541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 261541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 271541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 281541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 291541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 301541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 311541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 321541Srgrimes * SUCH DAMAGE. 331541Srgrimes * 3444510Swollman * From: @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 351541Srgrimes */ 361541Srgrimes 37116182Sobrien#include <sys/cdefs.h> 38116182Sobrien__FBSDID("$FreeBSD: stable/11/sys/kern/kern_timeout.c 360633 2020-05-04 16:30:36Z jhb $"); 39116182Sobrien 40247777Sdavide#include "opt_callout_profiling.h" 41301522Sbz#include "opt_ddb.h" 42247777Sdavide#if defined(__arm__) 43247777Sdavide#include "opt_timer.h" 44247777Sdavide#endif 45268026Sadrian#include "opt_rss.h" 46187664Srwatson 471541Srgrimes#include <sys/param.h> 481541Srgrimes#include <sys/systm.h> 49177859Sjeff#include <sys/bus.h> 5033392Sphk#include <sys/callout.h> 51248031Sandre#include <sys/file.h> 52177859Sjeff#include <sys/interrupt.h> 531541Srgrimes#include <sys/kernel.h> 54133229Srwatson#include <sys/ktr.h> 5574914Sjhb#include <sys/lock.h> 56177859Sjeff#include <sys/malloc.h> 5768840Sjhb#include <sys/mutex.h> 58150188Sjhb#include <sys/proc.h> 59187664Srwatson#include <sys/sdt.h> 60171053Sattilio#include <sys/sleepqueue.h> 61115810Sphk#include <sys/sysctl.h> 62177859Sjeff#include <sys/smp.h> 631541Srgrimes 64301522Sbz#ifdef DDB 65301522Sbz#include <ddb/ddb.h> 66301522Sbz#include <machine/_inttypes.h> 67301522Sbz#endif 68301522Sbz 69220456Sattilio#ifdef SMP 70220456Sattilio#include <machine/cpu.h> 71220456Sattilio#endif 72220456Sattilio 73247777Sdavide#ifndef NO_EVENTTIMERS 74247777SdavideDPCPU_DECLARE(sbintime_t, hardclocktime); 75247777Sdavide#endif 76247777Sdavide 77187664SrwatsonSDT_PROVIDER_DEFINE(callout_execute); 78292384SmarkjSDT_PROBE_DEFINE1(callout_execute, , , callout__start, "struct callout *"); 79292384SmarkjSDT_PROBE_DEFINE1(callout_execute, , , callout__end, "struct callout *"); 80187664Srwatson 81247777Sdavide#ifdef CALLOUT_PROFILING 82115810Sphkstatic int avg_depth; 83115810SphkSYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0, 84115810Sphk "Average number of items examined per softclock call. Units = 1/1000"); 85115810Sphkstatic int avg_gcalls; 86115810SphkSYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls, 0, 87115810Sphk "Average number of Giant callouts made per softclock call. Units = 1/1000"); 88173760Sattiliostatic int avg_lockcalls; 89173760SattilioSYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls, 0, 90173760Sattilio "Average number of lock callouts made per softclock call. Units = 1/1000"); 91115810Sphkstatic int avg_mpcalls; 92115810SphkSYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0, 93115810Sphk "Average number of MP callouts made per softclock call. Units = 1/1000"); 94247777Sdavidestatic int avg_depth_dir; 95247777SdavideSYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth_dir, 0, 96247777Sdavide "Average number of direct callouts examined per callout_process call. " 97247777Sdavide "Units = 1/1000"); 98247777Sdavidestatic int avg_lockcalls_dir; 99247777SdavideSYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls_dir, CTLFLAG_RD, 100247777Sdavide &avg_lockcalls_dir, 0, "Average number of lock direct callouts made per " 101247777Sdavide "callout_process call. Units = 1/1000"); 102247777Sdavidestatic int avg_mpcalls_dir; 103247777SdavideSYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls_dir, 104247777Sdavide 0, "Average number of MP direct callouts made per callout_process call. " 105247777Sdavide "Units = 1/1000"); 106247777Sdavide#endif 107248031Sandre 108248031Sandrestatic int ncallout; 109267992ShselaskySYSCTL_INT(_kern, OID_AUTO, ncallout, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &ncallout, 0, 110248031Sandre "Number of entries in callwheel and size of timeout() preallocation"); 111248031Sandre 112268026Sadrian#ifdef RSS 113268026Sadrianstatic int pin_default_swi = 1; 114268026Sadrianstatic int pin_pcpu_swi = 1; 115268026Sadrian#else 116265792Sadrianstatic int pin_default_swi = 0; 117265792Sadrianstatic int pin_pcpu_swi = 0; 118268026Sadrian#endif 119265792Sadrian 120267992ShselaskySYSCTL_INT(_kern, OID_AUTO, pin_default_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_default_swi, 121265792Sadrian 0, "Pin the default (non-per-cpu) swi (shared with PCPU 0 swi)"); 122267992ShselaskySYSCTL_INT(_kern, OID_AUTO, pin_pcpu_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_pcpu_swi, 123265792Sadrian 0, "Pin the per-CPU swis (except PCPU 0, which is also default"); 124265792Sadrian 12533392Sphk/* 12633392Sphk * TODO: 12733392Sphk * allocate more timeout table slots when table overflows. 12833392Sphk */ 129247715Sdavideu_int callwheelsize, callwheelmask; 1302112Swollman 131200510Sluigi/* 132277528Shselasky * The callout cpu exec entities represent informations necessary for 133277528Shselasky * describing the state of callouts currently running on the CPU and the ones 134277528Shselasky * necessary for migrating callouts to the new callout cpu. In particular, 135277528Shselasky * the first entry of the array cc_exec_entity holds informations for callout 136277528Shselasky * running in SWI thread context, while the second one holds informations 137277528Shselasky * for callout running directly from hardware interrupt context. 138277528Shselasky * The cached informations are very important for deferring migration when 139277528Shselasky * the migrating callout is already running. 140220456Sattilio */ 141247777Sdavidestruct cc_exec { 142247777Sdavide struct callout *cc_curr; 143360633Sjhb callout_func_t *cc_drain; 144277528Shselasky#ifdef SMP 145360633Sjhb callout_func_t *ce_migration_func; 146277528Shselasky void *ce_migration_arg; 147277528Shselasky int ce_migration_cpu; 148277528Shselasky sbintime_t ce_migration_time; 149277528Shselasky sbintime_t ce_migration_prec; 150277528Shselasky#endif 151277528Shselasky bool cc_cancel; 152277528Shselasky bool cc_waiting; 153220456Sattilio}; 154247467Sdavide 155220456Sattilio/* 156277528Shselasky * There is one struct callout_cpu per cpu, holding all relevant 157200510Sluigi * state for the callout processing thread on the individual CPU. 158200510Sluigi */ 159177859Sjeffstruct callout_cpu { 160242402Sattilio struct mtx_padalign cc_lock; 161247777Sdavide struct cc_exec cc_exec_entity[2]; 162278623Srrs struct callout *cc_next; 163177859Sjeff struct callout *cc_callout; 164247777Sdavide struct callout_list *cc_callwheel; 165247777Sdavide struct callout_tailq cc_expireq; 166247777Sdavide struct callout_slist cc_callfree; 167247777Sdavide sbintime_t cc_firstevent; 168247777Sdavide sbintime_t cc_lastscan; 169177859Sjeff void *cc_cookie; 170247777Sdavide u_int cc_bucket; 171280785Srrs u_int cc_inited; 172272757Sjhb char cc_ktr_event_name[20]; 173177859Sjeff}; 174128024Scperciva 175280872Srrs#define callout_migrating(c) ((c)->c_iflags & CALLOUT_DFRMIGRATION) 176280872Srrs 177278469Srrs#define cc_exec_curr(cc, dir) cc->cc_exec_entity[dir].cc_curr 178290664Srrs#define cc_exec_drain(cc, dir) cc->cc_exec_entity[dir].cc_drain 179278623Srrs#define cc_exec_next(cc) cc->cc_next 180278469Srrs#define cc_exec_cancel(cc, dir) cc->cc_exec_entity[dir].cc_cancel 181278469Srrs#define cc_exec_waiting(cc, dir) cc->cc_exec_entity[dir].cc_waiting 182177859Sjeff#ifdef SMP 183278469Srrs#define cc_migration_func(cc, dir) cc->cc_exec_entity[dir].ce_migration_func 184278469Srrs#define cc_migration_arg(cc, dir) cc->cc_exec_entity[dir].ce_migration_arg 185278469Srrs#define cc_migration_cpu(cc, dir) cc->cc_exec_entity[dir].ce_migration_cpu 186278469Srrs#define cc_migration_time(cc, dir) cc->cc_exec_entity[dir].ce_migration_time 187278469Srrs#define cc_migration_prec(cc, dir) cc->cc_exec_entity[dir].ce_migration_prec 188277528Shselasky 189177859Sjeffstruct callout_cpu cc_cpu[MAXCPU]; 190220456Sattilio#define CPUBLOCK MAXCPU 191177859Sjeff#define CC_CPU(cpu) (&cc_cpu[(cpu)]) 192177859Sjeff#define CC_SELF() CC_CPU(PCPU_GET(cpuid)) 193177859Sjeff#else 194177859Sjeffstruct callout_cpu cc_cpu; 195177859Sjeff#define CC_CPU(cpu) &cc_cpu 196177859Sjeff#define CC_SELF() &cc_cpu 197177859Sjeff#endif 198177859Sjeff#define CC_LOCK(cc) mtx_lock_spin(&(cc)->cc_lock) 199177859Sjeff#define CC_UNLOCK(cc) mtx_unlock_spin(&(cc)->cc_lock) 200220456Sattilio#define CC_LOCK_ASSERT(cc) mtx_assert(&(cc)->cc_lock, MA_OWNED) 201177859Sjeff 202177859Sjeffstatic int timeout_cpu; 203177859Sjeff 204272757Sjhbstatic void callout_cpu_init(struct callout_cpu *cc, int cpu); 205247777Sdavidestatic void softclock_call_cc(struct callout *c, struct callout_cpu *cc, 206247777Sdavide#ifdef CALLOUT_PROFILING 207247777Sdavide int *mpcalls, int *lockcalls, int *gcalls, 208247777Sdavide#endif 209247777Sdavide int direct); 210247777Sdavide 211227293Sedstatic MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures"); 212177859Sjeff 213277528Shselasky/** 214277528Shselasky * Locked by cc_lock: 215277528Shselasky * cc_curr - If a callout is in progress, it is cc_curr. 216277528Shselasky * If cc_curr is non-NULL, threads waiting in 217277528Shselasky * callout_drain() will be woken up as soon as the 218277528Shselasky * relevant callout completes. 219277528Shselasky * cc_cancel - Changing to 1 with both callout_lock and cc_lock held 220277528Shselasky * guarantees that the current callout will not run. 221277528Shselasky * The softclock() function sets this to 0 before it 222277528Shselasky * drops callout_lock to acquire c_lock, and it calls 223277528Shselasky * the handler only if curr_cancelled is still 0 after 224277528Shselasky * cc_lock is successfully acquired. 225277528Shselasky * cc_waiting - If a thread is waiting in callout_drain(), then 226277528Shselasky * callout_wait is nonzero. Set only when 227277528Shselasky * cc_curr is non-NULL. 228277528Shselasky */ 229277528Shselasky 2301541Srgrimes/* 231277528Shselasky * Resets the execution entity tied to a specific callout cpu. 232220456Sattilio */ 233220456Sattiliostatic void 234277528Shselaskycc_cce_cleanup(struct callout_cpu *cc, int direct) 235277528Shselasky{ 236277528Shselasky 237278469Srrs cc_exec_curr(cc, direct) = NULL; 238278469Srrs cc_exec_cancel(cc, direct) = false; 239278469Srrs cc_exec_waiting(cc, direct) = false; 240277528Shselasky#ifdef SMP 241278469Srrs cc_migration_cpu(cc, direct) = CPUBLOCK; 242278469Srrs cc_migration_time(cc, direct) = 0; 243278469Srrs cc_migration_prec(cc, direct) = 0; 244278469Srrs cc_migration_func(cc, direct) = NULL; 245278469Srrs cc_migration_arg(cc, direct) = NULL; 246277528Shselasky#endif 247277528Shselasky} 248277528Shselasky 249277528Shselasky/* 250277528Shselasky * Checks if migration is requested by a specific callout cpu. 251277528Shselasky */ 252277528Shselaskystatic int 253277528Shselaskycc_cce_migrating(struct callout_cpu *cc, int direct) 254277528Shselasky{ 255277528Shselasky 256277528Shselasky#ifdef SMP 257278469Srrs return (cc_migration_cpu(cc, direct) != CPUBLOCK); 258277528Shselasky#else 259277528Shselasky return (0); 260277528Shselasky#endif 261277528Shselasky} 262277528Shselasky 263277528Shselasky/* 264277528Shselasky * Kernel low level callwheel initialization 265277528Shselasky * called on cpu0 during kernel startup. 266277528Shselasky */ 267277528Shselaskystatic void 268248032Sandrecallout_callwheel_init(void *dummy) 26982127Sdillon{ 270177859Sjeff struct callout_cpu *cc; 271177859Sjeff 27282127Sdillon /* 273248031Sandre * Calculate the size of the callout wheel and the preallocated 274248031Sandre * timeout() structures. 275248141Sandre * XXX: Clip callout to result of previous function of maxusers 276248141Sandre * maximum 384. This is still huge, but acceptable. 277248031Sandre */ 278280786Sbz memset(CC_CPU(0), 0, sizeof(cc_cpu)); 279248031Sandre ncallout = imin(16 + maxproc + maxfiles, 18508); 280248031Sandre TUNABLE_INT_FETCH("kern.ncallout", &ncallout); 281248031Sandre 282248031Sandre /* 283243853Salfred * Calculate callout wheel size, should be next power of two higher 284243853Salfred * than 'ncallout'. 28582127Sdillon */ 286243853Salfred callwheelsize = 1 << fls(ncallout); 28782127Sdillon callwheelmask = callwheelsize - 1; 28882127Sdillon 289248032Sandre /* 290265792Sadrian * Fetch whether we're pinning the swi's or not. 291265792Sadrian */ 292265792Sadrian TUNABLE_INT_FETCH("kern.pin_default_swi", &pin_default_swi); 293265792Sadrian TUNABLE_INT_FETCH("kern.pin_pcpu_swi", &pin_pcpu_swi); 294265792Sadrian 295265792Sadrian /* 296248032Sandre * Only cpu0 handles timeout(9) and receives a preallocation. 297248032Sandre * 298248032Sandre * XXX: Once all timeout(9) consumers are converted this can 299248032Sandre * be removed. 300248032Sandre */ 301248032Sandre timeout_cpu = PCPU_GET(cpuid); 302248032Sandre cc = CC_CPU(timeout_cpu); 303248032Sandre cc->cc_callout = malloc(ncallout * sizeof(struct callout), 304248032Sandre M_CALLOUT, M_WAITOK); 305272757Sjhb callout_cpu_init(cc, timeout_cpu); 30682127Sdillon} 307248032SandreSYSINIT(callwheel_init, SI_SUB_CPU, SI_ORDER_ANY, callout_callwheel_init, NULL); 30882127Sdillon 309248032Sandre/* 310248032Sandre * Initialize the per-cpu callout structures. 311248032Sandre */ 312177859Sjeffstatic void 313272757Sjhbcallout_cpu_init(struct callout_cpu *cc, int cpu) 314177859Sjeff{ 315177859Sjeff struct callout *c; 316177859Sjeff int i; 317177859Sjeff 318177859Sjeff mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE); 319177859Sjeff SLIST_INIT(&cc->cc_callfree); 320280785Srrs cc->cc_inited = 1; 321248113Sdavide cc->cc_callwheel = malloc(sizeof(struct callout_list) * callwheelsize, 322248032Sandre M_CALLOUT, M_WAITOK); 323247777Sdavide for (i = 0; i < callwheelsize; i++) 324247777Sdavide LIST_INIT(&cc->cc_callwheel[i]); 325247777Sdavide TAILQ_INIT(&cc->cc_expireq); 326264388Sdavide cc->cc_firstevent = SBT_MAX; 327277528Shselasky for (i = 0; i < 2; i++) 328277528Shselasky cc_cce_cleanup(cc, i); 329272757Sjhb snprintf(cc->cc_ktr_event_name, sizeof(cc->cc_ktr_event_name), 330272757Sjhb "callwheel cpu %d", cpu); 331248032Sandre if (cc->cc_callout == NULL) /* Only cpu0 handles timeout(9) */ 332177859Sjeff return; 333177859Sjeff for (i = 0; i < ncallout; i++) { 334177859Sjeff c = &cc->cc_callout[i]; 335177859Sjeff callout_init(c, 0); 336280785Srrs c->c_iflags = CALLOUT_LOCAL_ALLOC; 337177859Sjeff SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); 338177859Sjeff } 339177859Sjeff} 340177859Sjeff 341277528Shselasky#ifdef SMP 34282127Sdillon/* 343277528Shselasky * Switches the cpu tied to a specific callout. 344277528Shselasky * The function expects a locked incoming callout cpu and returns with 345277528Shselasky * locked outcoming callout cpu. 346277528Shselasky */ 347277528Shselaskystatic struct callout_cpu * 348277528Shselaskycallout_cpu_switch(struct callout *c, struct callout_cpu *cc, int new_cpu) 349277528Shselasky{ 350277528Shselasky struct callout_cpu *new_cc; 351277528Shselasky 352277528Shselasky MPASS(c != NULL && cc != NULL); 353277528Shselasky CC_LOCK_ASSERT(cc); 354277528Shselasky 355277528Shselasky /* 356277528Shselasky * Avoid interrupts and preemption firing after the callout cpu 357277528Shselasky * is blocked in order to avoid deadlocks as the new thread 358277528Shselasky * may be willing to acquire the callout cpu lock. 359277528Shselasky */ 360277528Shselasky c->c_cpu = CPUBLOCK; 361277528Shselasky spinlock_enter(); 362277528Shselasky CC_UNLOCK(cc); 363277528Shselasky new_cc = CC_CPU(new_cpu); 364277528Shselasky CC_LOCK(new_cc); 365277528Shselasky spinlock_exit(); 366277528Shselasky c->c_cpu = new_cpu; 367277528Shselasky return (new_cc); 368277528Shselasky} 369277528Shselasky#endif 370277528Shselasky 371277528Shselasky/* 372177859Sjeff * Start standard softclock thread. 373177859Sjeff */ 374177859Sjeffstatic void 375177859Sjeffstart_softclock(void *dummy) 376177859Sjeff{ 377177859Sjeff struct callout_cpu *cc; 378261906Sadrian char name[MAXCOMLEN]; 379177859Sjeff#ifdef SMP 380177859Sjeff int cpu; 381265792Sadrian struct intr_event *ie; 382177859Sjeff#endif 383177859Sjeff 384177859Sjeff cc = CC_CPU(timeout_cpu); 385261906Sadrian snprintf(name, sizeof(name), "clock (%d)", timeout_cpu); 386261906Sadrian if (swi_add(&clk_intr_event, name, softclock, cc, SWI_CLOCK, 387214746Sjhb INTR_MPSAFE, &cc->cc_cookie)) 388177859Sjeff panic("died while creating standard software ithreads"); 389265792Sadrian if (pin_default_swi && 390265792Sadrian (intr_event_bind(clk_intr_event, timeout_cpu) != 0)) { 391265792Sadrian printf("%s: timeout clock couldn't be pinned to cpu %d\n", 392265792Sadrian __func__, 393265792Sadrian timeout_cpu); 394265792Sadrian } 395265792Sadrian 396177859Sjeff#ifdef SMP 397209059Sjhb CPU_FOREACH(cpu) { 398177859Sjeff if (cpu == timeout_cpu) 399177859Sjeff continue; 400177859Sjeff cc = CC_CPU(cpu); 401248032Sandre cc->cc_callout = NULL; /* Only cpu0 handles timeout(9). */ 402272757Sjhb callout_cpu_init(cc, cpu); 403261906Sadrian snprintf(name, sizeof(name), "clock (%d)", cpu); 404265792Sadrian ie = NULL; 405265792Sadrian if (swi_add(&ie, name, softclock, cc, SWI_CLOCK, 406177859Sjeff INTR_MPSAFE, &cc->cc_cookie)) 407177859Sjeff panic("died while creating standard software ithreads"); 408265792Sadrian if (pin_pcpu_swi && (intr_event_bind(ie, cpu) != 0)) { 409265792Sadrian printf("%s: per-cpu clock couldn't be pinned to " 410265792Sadrian "cpu %d\n", 411265792Sadrian __func__, 412265792Sadrian cpu); 413265792Sadrian } 41482127Sdillon } 415177859Sjeff#endif 416177859Sjeff} 417177859SjeffSYSINIT(start_softclock, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softclock, NULL); 418177859Sjeff 419247777Sdavide#define CC_HASH_SHIFT 8 420247777Sdavide 421247777Sdavidestatic inline u_int 422247777Sdavidecallout_hash(sbintime_t sbt) 423247777Sdavide{ 424247777Sdavide 425247777Sdavide return (sbt >> (32 - CC_HASH_SHIFT)); 426247777Sdavide} 427247777Sdavide 428247777Sdavidestatic inline u_int 429247777Sdavidecallout_get_bucket(sbintime_t sbt) 430247777Sdavide{ 431247777Sdavide 432247777Sdavide return (callout_hash(sbt) & callwheelmask); 433247777Sdavide} 434247777Sdavide 435177859Sjeffvoid 436247777Sdavidecallout_process(sbintime_t now) 437177859Sjeff{ 438247777Sdavide struct callout *tmp, *tmpn; 439177859Sjeff struct callout_cpu *cc; 440247777Sdavide struct callout_list *sc; 441247777Sdavide sbintime_t first, last, max, tmp_max; 442247777Sdavide uint32_t lookahead; 443247777Sdavide u_int firstb, lastb, nowb; 444247777Sdavide#ifdef CALLOUT_PROFILING 445247777Sdavide int depth_dir = 0, mpcalls_dir = 0, lockcalls_dir = 0; 446247777Sdavide#endif 447277528Shselasky 448247777Sdavide cc = CC_SELF(); 449277528Shselasky mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET); 450247777Sdavide 451247777Sdavide /* Compute the buckets of the last scan and present times. */ 452247777Sdavide firstb = callout_hash(cc->cc_lastscan); 453247777Sdavide cc->cc_lastscan = now; 454247777Sdavide nowb = callout_hash(now); 455247777Sdavide 456247777Sdavide /* Compute the last bucket and minimum time of the bucket after it. */ 457247777Sdavide if (nowb == firstb) 458247777Sdavide lookahead = (SBT_1S / 16); 459247777Sdavide else if (nowb - firstb == 1) 460247777Sdavide lookahead = (SBT_1S / 8); 461247777Sdavide else 462247777Sdavide lookahead = (SBT_1S / 2); 463247777Sdavide first = last = now; 464247777Sdavide first += (lookahead / 2); 465247777Sdavide last += lookahead; 466247777Sdavide last &= (0xffffffffffffffffLLU << (32 - CC_HASH_SHIFT)); 467247777Sdavide lastb = callout_hash(last) - 1; 468247777Sdavide max = last; 469247777Sdavide 470177859Sjeff /* 471247777Sdavide * Check if we wrapped around the entire wheel from the last scan. 472247777Sdavide * In case, we need to scan entirely the wheel for pending callouts. 473177859Sjeff */ 474247777Sdavide if (lastb - firstb >= callwheelsize) { 475247777Sdavide lastb = firstb + callwheelsize - 1; 476247777Sdavide if (nowb - firstb >= callwheelsize) 477247777Sdavide nowb = lastb; 478247777Sdavide } 479247777Sdavide 480247777Sdavide /* Iterate callwheel from firstb to nowb and then up to lastb. */ 481247777Sdavide do { 482247777Sdavide sc = &cc->cc_callwheel[firstb & callwheelmask]; 483247777Sdavide tmp = LIST_FIRST(sc); 484247777Sdavide while (tmp != NULL) { 485247777Sdavide /* Run the callout if present time within allowed. */ 486247777Sdavide if (tmp->c_time <= now) { 487247777Sdavide /* 488247777Sdavide * Consumer told us the callout may be run 489247777Sdavide * directly from hardware interrupt context. 490247777Sdavide */ 491280785Srrs if (tmp->c_iflags & CALLOUT_DIRECT) { 492247777Sdavide#ifdef CALLOUT_PROFILING 493247777Sdavide ++depth_dir; 494247777Sdavide#endif 495278623Srrs cc_exec_next(cc) = 496247777Sdavide LIST_NEXT(tmp, c_links.le); 497247777Sdavide cc->cc_bucket = firstb & callwheelmask; 498247777Sdavide LIST_REMOVE(tmp, c_links.le); 499247777Sdavide softclock_call_cc(tmp, cc, 500247777Sdavide#ifdef CALLOUT_PROFILING 501247777Sdavide &mpcalls_dir, &lockcalls_dir, NULL, 502247777Sdavide#endif 503247777Sdavide 1); 504278623Srrs tmp = cc_exec_next(cc); 505278623Srrs cc_exec_next(cc) = NULL; 506247777Sdavide } else { 507247777Sdavide tmpn = LIST_NEXT(tmp, c_links.le); 508247777Sdavide LIST_REMOVE(tmp, c_links.le); 509247777Sdavide TAILQ_INSERT_TAIL(&cc->cc_expireq, 510247777Sdavide tmp, c_links.tqe); 511280785Srrs tmp->c_iflags |= CALLOUT_PROCESSED; 512247777Sdavide tmp = tmpn; 513247777Sdavide } 514247777Sdavide continue; 515247777Sdavide } 516247777Sdavide /* Skip events from distant future. */ 517247777Sdavide if (tmp->c_time >= max) 518247777Sdavide goto next; 519247777Sdavide /* 520247777Sdavide * Event minimal time is bigger than present maximal 521247777Sdavide * time, so it cannot be aggregated. 522247777Sdavide */ 523247777Sdavide if (tmp->c_time > last) { 524247777Sdavide lastb = nowb; 525247777Sdavide goto next; 526247777Sdavide } 527247777Sdavide /* Update first and last time, respecting this event. */ 528247777Sdavide if (tmp->c_time < first) 529247777Sdavide first = tmp->c_time; 530247777Sdavide tmp_max = tmp->c_time + tmp->c_precision; 531247777Sdavide if (tmp_max < last) 532247777Sdavide last = tmp_max; 533247777Sdavidenext: 534247777Sdavide tmp = LIST_NEXT(tmp, c_links.le); 535180608Sjeff } 536247777Sdavide /* Proceed with the next bucket. */ 537247777Sdavide firstb++; 538247777Sdavide /* 539247777Sdavide * Stop if we looked after present time and found 540247777Sdavide * some event we can't execute at now. 541247777Sdavide * Stop if we looked far enough into the future. 542247777Sdavide */ 543247777Sdavide } while (((int)(firstb - lastb)) <= 0); 544247777Sdavide cc->cc_firstevent = last; 545247777Sdavide#ifndef NO_EVENTTIMERS 546247777Sdavide cpu_new_callout(curcpu, last, first); 547247777Sdavide#endif 548247777Sdavide#ifdef CALLOUT_PROFILING 549247777Sdavide avg_depth_dir += (depth_dir * 1000 - avg_depth_dir) >> 8; 550247777Sdavide avg_mpcalls_dir += (mpcalls_dir * 1000 - avg_mpcalls_dir) >> 8; 551247777Sdavide avg_lockcalls_dir += (lockcalls_dir * 1000 - avg_lockcalls_dir) >> 8; 552247777Sdavide#endif 553277528Shselasky mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET); 554177859Sjeff /* 555177859Sjeff * swi_sched acquires the thread lock, so we don't want to call it 556177859Sjeff * with cc_lock held; incorrect locking order. 557177859Sjeff */ 558247777Sdavide if (!TAILQ_EMPTY(&cc->cc_expireq)) 559177859Sjeff swi_sched(cc->cc_cookie, 0); 560177859Sjeff} 561177859Sjeff 562177859Sjeffstatic struct callout_cpu * 563177859Sjeffcallout_lock(struct callout *c) 564177859Sjeff{ 565177859Sjeff struct callout_cpu *cc; 566277528Shselasky int cpu; 567277528Shselasky 568277528Shselasky for (;;) { 569277528Shselasky cpu = c->c_cpu; 570277528Shselasky#ifdef SMP 571277528Shselasky if (cpu == CPUBLOCK) { 572277528Shselasky while (c->c_cpu == CPUBLOCK) 573277528Shselasky cpu_spinwait(); 574277528Shselasky continue; 575277528Shselasky } 576277528Shselasky#endif 577277528Shselasky cc = CC_CPU(cpu); 578277528Shselasky CC_LOCK(cc); 579277528Shselasky if (cpu == c->c_cpu) 580277528Shselasky break; 581277528Shselasky CC_UNLOCK(cc); 582277528Shselasky } 583177859Sjeff return (cc); 58482127Sdillon} 58582127Sdillon 586277528Shselaskystatic void 587277528Shselaskycallout_cc_add(struct callout *c, struct callout_cpu *cc, 588277528Shselasky sbintime_t sbt, sbintime_t precision, void (*func)(void *), 589278623Srrs void *arg, int cpu, int flags) 590220456Sattilio{ 591247777Sdavide int bucket; 592220456Sattilio 593220456Sattilio CC_LOCK_ASSERT(cc); 594277528Shselasky if (sbt < cc->cc_lastscan) 595277528Shselasky sbt = cc->cc_lastscan; 596277528Shselasky c->c_arg = arg; 597280785Srrs c->c_iflags |= CALLOUT_PENDING; 598280785Srrs c->c_iflags &= ~CALLOUT_PROCESSED; 599280785Srrs c->c_flags |= CALLOUT_ACTIVE; 600281510Srrs if (flags & C_DIRECT_EXEC) 601281511Srrs c->c_iflags |= CALLOUT_DIRECT; 602277528Shselasky c->c_func = func; 603277528Shselasky c->c_time = sbt; 604277528Shselasky c->c_precision = precision; 605247777Sdavide bucket = callout_get_bucket(c->c_time); 606247777Sdavide CTR3(KTR_CALLOUT, "precision set for %p: %d.%08x", 607247777Sdavide c, (int)(c->c_precision >> 32), 608247777Sdavide (u_int)(c->c_precision & 0xffffffff)); 609247777Sdavide LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le); 610247777Sdavide if (cc->cc_bucket == bucket) 611278623Srrs cc_exec_next(cc) = c; 612247777Sdavide#ifndef NO_EVENTTIMERS 613247777Sdavide /* 614247777Sdavide * Inform the eventtimers(4) subsystem there's a new callout 615247777Sdavide * that has been inserted, but only if really required. 616247777Sdavide */ 617264388Sdavide if (SBT_MAX - c->c_time < c->c_precision) 618264388Sdavide c->c_precision = SBT_MAX - c->c_time; 619247777Sdavide sbt = c->c_time + c->c_precision; 620247777Sdavide if (sbt < cc->cc_firstevent) { 621247777Sdavide cc->cc_firstevent = sbt; 622277528Shselasky cpu_new_callout(cpu, sbt, c->c_time); 623220456Sattilio } 624247777Sdavide#endif 625220456Sattilio} 626220456Sattilio 627234981Skibstatic void 628234981Skibcallout_cc_del(struct callout *c, struct callout_cpu *cc) 629234981Skib{ 630234981Skib 631280785Srrs if ((c->c_iflags & CALLOUT_LOCAL_ALLOC) == 0) 632277528Shselasky return; 633243901Skib c->c_func = NULL; 634243901Skib SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); 635234981Skib} 636234981Skib 637243901Skibstatic void 638247777Sdavidesoftclock_call_cc(struct callout *c, struct callout_cpu *cc, 639247777Sdavide#ifdef CALLOUT_PROFILING 640247777Sdavide int *mpcalls, int *lockcalls, int *gcalls, 641247777Sdavide#endif 642247777Sdavide int direct) 643234981Skib{ 644277528Shselasky struct rm_priotracker tracker; 645360633Sjhb callout_func_t *c_func, *drain; 646234981Skib void *c_arg; 647277528Shselasky struct lock_class *class; 648234981Skib struct lock_object *c_lock; 649277528Shselasky uintptr_t lock_status; 650280785Srrs int c_iflags; 651277528Shselasky#ifdef SMP 652277528Shselasky struct callout_cpu *new_cc; 653360633Sjhb callout_func_t *new_func; 654277528Shselasky void *new_arg; 655277528Shselasky int flags, new_cpu; 656277528Shselasky sbintime_t new_prec, new_time; 657277528Shselasky#endif 658247777Sdavide#if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) 659247793Sdavide sbintime_t sbt1, sbt2; 660234981Skib struct timespec ts2; 661247777Sdavide static sbintime_t maxdt = 2 * SBT_1MS; /* 2 msec */ 662360633Sjhb static callout_func_t *lastfunc; 663234981Skib#endif 664234981Skib 665280785Srrs KASSERT((c->c_iflags & CALLOUT_PENDING) == CALLOUT_PENDING, 666280785Srrs ("softclock_call_cc: pend %p %x", c, c->c_iflags)); 667280785Srrs KASSERT((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE, 668280785Srrs ("softclock_call_cc: act %p %x", c, c->c_flags)); 669277528Shselasky class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL; 670277528Shselasky lock_status = 0; 671277528Shselasky if (c->c_flags & CALLOUT_SHAREDLOCK) { 672277528Shselasky if (class == &lock_class_rm) 673277528Shselasky lock_status = (uintptr_t)&tracker; 674277528Shselasky else 675277528Shselasky lock_status = 1; 676277528Shselasky } 677234981Skib c_lock = c->c_lock; 678234981Skib c_func = c->c_func; 679234981Skib c_arg = c->c_arg; 680280785Srrs c_iflags = c->c_iflags; 681280785Srrs if (c->c_iflags & CALLOUT_LOCAL_ALLOC) 682280785Srrs c->c_iflags = CALLOUT_LOCAL_ALLOC; 683277528Shselasky else 684280785Srrs c->c_iflags &= ~CALLOUT_PENDING; 685278469Srrs 686278469Srrs cc_exec_curr(cc, direct) = c; 687278469Srrs cc_exec_cancel(cc, direct) = false; 688290664Srrs cc_exec_drain(cc, direct) = NULL; 689277528Shselasky CC_UNLOCK(cc); 690234981Skib if (c_lock != NULL) { 691277528Shselasky class->lc_lock(c_lock, lock_status); 692234981Skib /* 693277528Shselasky * The callout may have been cancelled 694277528Shselasky * while we switched locks. 695234981Skib */ 696278469Srrs if (cc_exec_cancel(cc, direct)) { 697277528Shselasky class->lc_unlock(c_lock); 698277528Shselasky goto skip; 699234981Skib } 700277528Shselasky /* The callout cannot be stopped now. */ 701278469Srrs cc_exec_cancel(cc, direct) = true; 702234981Skib if (c_lock == &Giant.lock_object) { 703247777Sdavide#ifdef CALLOUT_PROFILING 704234981Skib (*gcalls)++; 705247777Sdavide#endif 706247777Sdavide CTR3(KTR_CALLOUT, "callout giant %p func %p arg %p", 707234981Skib c, c_func, c_arg); 708234981Skib } else { 709247777Sdavide#ifdef CALLOUT_PROFILING 710234981Skib (*lockcalls)++; 711247777Sdavide#endif 712234981Skib CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p", 713234981Skib c, c_func, c_arg); 714234981Skib } 715234981Skib } else { 716247777Sdavide#ifdef CALLOUT_PROFILING 717234981Skib (*mpcalls)++; 718247777Sdavide#endif 719247777Sdavide CTR3(KTR_CALLOUT, "callout %p func %p arg %p", 720234981Skib c, c_func, c_arg); 721234981Skib } 722272757Sjhb KTR_STATE3(KTR_SCHED, "callout", cc->cc_ktr_event_name, "running", 723272757Sjhb "func:%p", c_func, "arg:%p", c_arg, "direct:%d", direct); 724247793Sdavide#if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) 725247777Sdavide sbt1 = sbinuptime(); 726234981Skib#endif 727234981Skib THREAD_NO_SLEEPING(); 728292384Smarkj SDT_PROBE1(callout_execute, , , callout__start, c); 729234981Skib c_func(c_arg); 730292384Smarkj SDT_PROBE1(callout_execute, , , callout__end, c); 731234981Skib THREAD_SLEEPING_OK(); 732247793Sdavide#if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) 733247793Sdavide sbt2 = sbinuptime(); 734247793Sdavide sbt2 -= sbt1; 735247793Sdavide if (sbt2 > maxdt) { 736247793Sdavide if (lastfunc != c_func || sbt2 > maxdt * 2) { 737247793Sdavide ts2 = sbttots(sbt2); 738234981Skib printf( 739234981Skib "Expensive timeout(9) function: %p(%p) %jd.%09ld s\n", 740234981Skib c_func, c_arg, (intmax_t)ts2.tv_sec, ts2.tv_nsec); 741234981Skib } 742247793Sdavide maxdt = sbt2; 743234981Skib lastfunc = c_func; 744234981Skib } 745234981Skib#endif 746272757Sjhb KTR_STATE0(KTR_SCHED, "callout", cc->cc_ktr_event_name, "idle"); 747234981Skib CTR1(KTR_CALLOUT, "callout %p finished", c); 748280785Srrs if ((c_iflags & CALLOUT_RETURNUNLOCKED) == 0) 749277528Shselasky class->lc_unlock(c_lock); 750277528Shselaskyskip: 751234981Skib CC_LOCK(cc); 752278469Srrs KASSERT(cc_exec_curr(cc, direct) == c, ("mishandled cc_curr")); 753278469Srrs cc_exec_curr(cc, direct) = NULL; 754290664Srrs if (cc_exec_drain(cc, direct)) { 755290664Srrs drain = cc_exec_drain(cc, direct); 756290664Srrs cc_exec_drain(cc, direct) = NULL; 757290664Srrs CC_UNLOCK(cc); 758290664Srrs drain(c_arg); 759290664Srrs CC_LOCK(cc); 760290664Srrs } 761278469Srrs if (cc_exec_waiting(cc, direct)) { 762234981Skib /* 763277528Shselasky * There is someone waiting for the 764277528Shselasky * callout to complete. 765277528Shselasky * If the callout was scheduled for 766277528Shselasky * migration just cancel it. 767234981Skib */ 768277528Shselasky if (cc_cce_migrating(cc, direct)) { 769277528Shselasky cc_cce_cleanup(cc, direct); 770277528Shselasky 771277528Shselasky /* 772277528Shselasky * It should be assert here that the callout is not 773277528Shselasky * destroyed but that is not easy. 774277528Shselasky */ 775280785Srrs c->c_iflags &= ~CALLOUT_DFRMIGRATION; 776277528Shselasky } 777278469Srrs cc_exec_waiting(cc, direct) = false; 778234981Skib CC_UNLOCK(cc); 779278469Srrs wakeup(&cc_exec_waiting(cc, direct)); 780234981Skib CC_LOCK(cc); 781277528Shselasky } else if (cc_cce_migrating(cc, direct)) { 782280785Srrs KASSERT((c_iflags & CALLOUT_LOCAL_ALLOC) == 0, 783277528Shselasky ("Migrating legacy callout %p", c)); 784277528Shselasky#ifdef SMP 785277528Shselasky /* 786277528Shselasky * If the callout was scheduled for 787277528Shselasky * migration just perform it now. 788277528Shselasky */ 789278469Srrs new_cpu = cc_migration_cpu(cc, direct); 790278469Srrs new_time = cc_migration_time(cc, direct); 791278469Srrs new_prec = cc_migration_prec(cc, direct); 792278469Srrs new_func = cc_migration_func(cc, direct); 793278469Srrs new_arg = cc_migration_arg(cc, direct); 794277528Shselasky cc_cce_cleanup(cc, direct); 795277528Shselasky 796277528Shselasky /* 797277528Shselasky * It should be assert here that the callout is not destroyed 798277528Shselasky * but that is not easy. 799277528Shselasky * 800277528Shselasky * As first thing, handle deferred callout stops. 801277528Shselasky */ 802278469Srrs if (!callout_migrating(c)) { 803277528Shselasky CTR3(KTR_CALLOUT, 804277528Shselasky "deferred cancelled %p func %p arg %p", 805277528Shselasky c, new_func, new_arg); 806277528Shselasky callout_cc_del(c, cc); 807277528Shselasky return; 808277528Shselasky } 809280785Srrs c->c_iflags &= ~CALLOUT_DFRMIGRATION; 810277528Shselasky 811277528Shselasky new_cc = callout_cpu_switch(c, cc, new_cpu); 812277528Shselasky flags = (direct) ? C_DIRECT_EXEC : 0; 813277528Shselasky callout_cc_add(c, new_cc, new_time, new_prec, new_func, 814278623Srrs new_arg, new_cpu, flags); 815277528Shselasky CC_UNLOCK(new_cc); 816277528Shselasky CC_LOCK(cc); 817277528Shselasky#else 818277528Shselasky panic("migration should not happen"); 819277528Shselasky#endif 820277528Shselasky } 821277528Shselasky /* 822277528Shselasky * If the current callout is locally allocated (from 823277528Shselasky * timeout(9)) then put it on the freelist. 824277528Shselasky * 825280785Srrs * Note: we need to check the cached copy of c_iflags because 826277528Shselasky * if it was not local, then it's not safe to deref the 827277528Shselasky * callout pointer. 828277528Shselasky */ 829280785Srrs KASSERT((c_iflags & CALLOUT_LOCAL_ALLOC) == 0 || 830280785Srrs c->c_iflags == CALLOUT_LOCAL_ALLOC, 831277528Shselasky ("corrupted callout")); 832280785Srrs if (c_iflags & CALLOUT_LOCAL_ALLOC) 833277213Shselasky callout_cc_del(c, cc); 834234981Skib} 835234981Skib 83682127Sdillon/* 837247467Sdavide * The callout mechanism is based on the work of Adam M. Costello and 83829680Sgibbs * George Varghese, published in a technical report entitled "Redesigning 83929680Sgibbs * the BSD Callout and Timer Facilities" and modified slightly for inclusion 84029680Sgibbs * in FreeBSD by Justin T. Gibbs. The original work on the data structures 841128630Shmp * used in this implementation was published by G. Varghese and T. Lauck in 84229680Sgibbs * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for 84329680Sgibbs * the Efficient Implementation of a Timer Facility" in the Proceedings of 84429680Sgibbs * the 11th ACM Annual Symposium on Operating Systems Principles, 84529680Sgibbs * Austin, Texas Nov 1987. 84629680Sgibbs */ 84732388Sphk 84829680Sgibbs/* 8491541Srgrimes * Software (low priority) clock interrupt. 8501541Srgrimes * Run periodic events from timeout queue. 8511541Srgrimes */ 8521541Srgrimesvoid 853177859Sjeffsoftclock(void *arg) 8541541Srgrimes{ 855177859Sjeff struct callout_cpu *cc; 856102936Sphk struct callout *c; 857247777Sdavide#ifdef CALLOUT_PROFILING 858247777Sdavide int depth = 0, gcalls = 0, lockcalls = 0, mpcalls = 0; 859247777Sdavide#endif 8601541Srgrimes 861177859Sjeff cc = (struct callout_cpu *)arg; 862177859Sjeff CC_LOCK(cc); 863247777Sdavide while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) { 864247777Sdavide TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); 865247777Sdavide softclock_call_cc(c, cc, 866247777Sdavide#ifdef CALLOUT_PROFILING 867247777Sdavide &mpcalls, &lockcalls, &gcalls, 868247777Sdavide#endif 869247777Sdavide 0); 870247777Sdavide#ifdef CALLOUT_PROFILING 871247777Sdavide ++depth; 872247777Sdavide#endif 8731541Srgrimes } 874247777Sdavide#ifdef CALLOUT_PROFILING 875115810Sphk avg_depth += (depth * 1000 - avg_depth) >> 8; 876115810Sphk avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8; 877173760Sattilio avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8; 878115810Sphk avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8; 879247777Sdavide#endif 880177859Sjeff CC_UNLOCK(cc); 8811541Srgrimes} 8821541Srgrimes 8831541Srgrimes/* 8841541Srgrimes * timeout -- 8851541Srgrimes * Execute a function after a specified length of time. 8861541Srgrimes * 8871541Srgrimes * untimeout -- 8881541Srgrimes * Cancel previous timeout function call. 8891541Srgrimes * 89029680Sgibbs * callout_handle_init -- 89129680Sgibbs * Initialize a handle so that using it with untimeout is benign. 89229680Sgibbs * 8931541Srgrimes * See AT&T BCI Driver Reference Manual for specification. This 894247698Smav * implementation differs from that one in that although an 89529680Sgibbs * identification value is returned from timeout, the original 89629680Sgibbs * arguments to timeout as well as the identifier are used to 89729680Sgibbs * identify entries for untimeout. 8981541Srgrimes */ 89929680Sgibbsstruct callout_handle 900267092Sdavidetimeout(timeout_t *ftn, void *arg, int to_ticks) 9011541Srgrimes{ 902177859Sjeff struct callout_cpu *cc; 90329680Sgibbs struct callout *new; 90429680Sgibbs struct callout_handle handle; 9051541Srgrimes 906177859Sjeff cc = CC_CPU(timeout_cpu); 907177859Sjeff CC_LOCK(cc); 9081541Srgrimes /* Fill in the next free callout structure. */ 909177859Sjeff new = SLIST_FIRST(&cc->cc_callfree); 91029680Sgibbs if (new == NULL) 91129680Sgibbs /* XXX Attempt to malloc first */ 9121541Srgrimes panic("timeout table full"); 913177859Sjeff SLIST_REMOVE_HEAD(&cc->cc_callfree, c_links.sle); 914277528Shselasky callout_reset(new, to_ticks, ftn, arg); 915177859Sjeff handle.callout = new; 916177859Sjeff CC_UNLOCK(cc); 9171541Srgrimes 91829680Sgibbs return (handle); 9191541Srgrimes} 9201541Srgrimes 9211541Srgrimesvoid 922267092Sdavideuntimeout(timeout_t *ftn, void *arg, struct callout_handle handle) 9231541Srgrimes{ 924177859Sjeff struct callout_cpu *cc; 9251541Srgrimes 92629680Sgibbs /* 92729680Sgibbs * Check for a handle that was initialized 92829680Sgibbs * by callout_handle_init, but never used 92929680Sgibbs * for a real timeout. 93029680Sgibbs */ 93129680Sgibbs if (handle.callout == NULL) 93229680Sgibbs return; 93329680Sgibbs 934177859Sjeff cc = callout_lock(handle.callout); 935277528Shselasky if (handle.callout->c_func == ftn && handle.callout->c_arg == arg) 936277528Shselasky callout_stop(handle.callout); 937277213Shselasky CC_UNLOCK(cc); 9381541Srgrimes} 9391541Srgrimes 94024101Sbdevoid 94129680Sgibbscallout_handle_init(struct callout_handle *handle) 94229680Sgibbs{ 94329680Sgibbs handle->callout = NULL; 94429680Sgibbs} 94529680Sgibbs 946304882Skibvoid 947304882Skibcallout_when(sbintime_t sbt, sbintime_t precision, int flags, 948304882Skib sbintime_t *res, sbintime_t *prec_res) 949304882Skib{ 950304882Skib sbintime_t to_sbt, to_pr; 951304882Skib 952304882Skib if ((flags & (C_ABSOLUTE | C_PRECALC)) != 0) { 953304882Skib *res = sbt; 954304882Skib *prec_res = precision; 955304882Skib return; 956304882Skib } 957304882Skib if ((flags & C_HARDCLOCK) != 0 && sbt < tick_sbt) 958304882Skib sbt = tick_sbt; 959304882Skib if ((flags & C_HARDCLOCK) != 0 || 960304882Skib#ifdef NO_EVENTTIMERS 961304882Skib sbt >= sbt_timethreshold) { 962304882Skib to_sbt = getsbinuptime(); 963304882Skib 964304882Skib /* Add safety belt for the case of hz > 1000. */ 965304882Skib to_sbt += tc_tick_sbt - tick_sbt; 966304882Skib#else 967304882Skib sbt >= sbt_tickthreshold) { 968304882Skib /* 969304882Skib * Obtain the time of the last hardclock() call on 970304882Skib * this CPU directly from the kern_clocksource.c. 971304882Skib * This value is per-CPU, but it is equal for all 972304882Skib * active ones. 973304882Skib */ 974304882Skib#ifdef __LP64__ 975304882Skib to_sbt = DPCPU_GET(hardclocktime); 976304882Skib#else 977304882Skib spinlock_enter(); 978304882Skib to_sbt = DPCPU_GET(hardclocktime); 979304882Skib spinlock_exit(); 980304882Skib#endif 981304882Skib#endif 982310439Sjhb if (cold && to_sbt == 0) 983310439Sjhb to_sbt = sbinuptime(); 984304882Skib if ((flags & C_HARDCLOCK) == 0) 985304882Skib to_sbt += tick_sbt; 986304882Skib } else 987304882Skib to_sbt = sbinuptime(); 988304882Skib if (SBT_MAX - to_sbt < sbt) 989304882Skib to_sbt = SBT_MAX; 990304882Skib else 991304882Skib to_sbt += sbt; 992304882Skib *res = to_sbt; 993304882Skib to_pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp : 994304882Skib sbt >> C_PRELGET(flags)); 995304882Skib *prec_res = to_pr > precision ? to_pr : precision; 996304882Skib} 997304882Skib 99844510Swollman/* 99944510Swollman * New interface; clients allocate their own callout structures. 100044510Swollman * 100144510Swollman * callout_reset() - establish or change a timeout 100244510Swollman * callout_stop() - disestablish a timeout 100344510Swollman * callout_init() - initialize a callout structure so that it can 100444510Swollman * safely be passed to callout_reset() and callout_stop() 100544510Swollman * 100650673Sjlemon * <sys/callout.h> defines three convenience macros: 100744510Swollman * 1008140487Scperciva * callout_active() - returns truth if callout has not been stopped, 1009140487Scperciva * drained, or deactivated since the last time the callout was 1010140487Scperciva * reset. 101150673Sjlemon * callout_pending() - returns truth if callout is still waiting for timeout 101250673Sjlemon * callout_deactivate() - marks the callout as having been serviced 101344510Swollman */ 1014149879Sglebiusint 1015304882Skibcallout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t prec, 1016360633Sjhb callout_func_t *ftn, void *arg, int cpu, int flags) 101744510Swollman{ 1018304882Skib sbintime_t to_sbt, precision; 1019277528Shselasky struct callout_cpu *cc; 1020277528Shselasky int cancelled, direct; 1021280785Srrs int ignore_cpu=0; 102244510Swollman 1023277528Shselasky cancelled = 0; 1024280785Srrs if (cpu == -1) { 1025280785Srrs ignore_cpu = 1; 1026280785Srrs } else if ((cpu >= MAXCPU) || 1027280786Sbz ((CC_CPU(cpu))->cc_inited == 0)) { 1028280785Srrs /* Invalid CPU spec */ 1029280785Srrs panic("Invalid CPU in callout %d", cpu); 1030280785Srrs } 1031304882Skib callout_when(sbt, prec, flags, &to_sbt, &precision); 1032247777Sdavide 1033278623Srrs /* 1034278623Srrs * This flag used to be added by callout_cc_add, but the 1035278623Srrs * first time you call this we could end up with the 1036278623Srrs * wrong direct flag if we don't do it before we add. 1037278623Srrs */ 1038278623Srrs if (flags & C_DIRECT_EXEC) { 1039280785Srrs direct = 1; 1040280785Srrs } else { 1041280785Srrs direct = 0; 1042278623Srrs } 1043277528Shselasky KASSERT(!direct || c->c_lock == NULL, 1044277528Shselasky ("%s: direct callout %p has lock", __func__, c)); 1045277528Shselasky cc = callout_lock(c); 1046280785Srrs /* 1047280785Srrs * Don't allow migration of pre-allocated callouts lest they 1048280785Srrs * become unbalanced or handle the case where the user does 1049280785Srrs * not care. 1050280785Srrs */ 1051280785Srrs if ((c->c_iflags & CALLOUT_LOCAL_ALLOC) || 1052280785Srrs ignore_cpu) { 1053280785Srrs cpu = c->c_cpu; 1054280785Srrs } 1055280785Srrs 1056278469Srrs if (cc_exec_curr(cc, direct) == c) { 1057277528Shselasky /* 1058277528Shselasky * We're being asked to reschedule a callout which is 1059277528Shselasky * currently in progress. If there is a lock then we 1060277528Shselasky * can cancel the callout if it has not really started. 1061277528Shselasky */ 1062287354Savg if (c->c_lock != NULL && !cc_exec_cancel(cc, direct)) 1063278469Srrs cancelled = cc_exec_cancel(cc, direct) = true; 1064278469Srrs if (cc_exec_waiting(cc, direct)) { 1065277528Shselasky /* 1066277528Shselasky * Someone has called callout_drain to kill this 1067277528Shselasky * callout. Don't reschedule. 1068277528Shselasky */ 1069277528Shselasky CTR4(KTR_CALLOUT, "%s %p func %p arg %p", 1070277528Shselasky cancelled ? "cancelled" : "failed to cancel", 1071277528Shselasky c, c->c_func, c->c_arg); 1072277528Shselasky CC_UNLOCK(cc); 1073277528Shselasky return (cancelled); 1074277528Shselasky } 1075278469Srrs#ifdef SMP 1076278469Srrs if (callout_migrating(c)) { 1077278469Srrs /* 1078278469Srrs * This only occurs when a second callout_reset_sbt_on 1079278469Srrs * is made after a previous one moved it into 1080278469Srrs * deferred migration (below). Note we do *not* change 1081278469Srrs * the prev_cpu even though the previous target may 1082278469Srrs * be different. 1083278469Srrs */ 1084278469Srrs cc_migration_cpu(cc, direct) = cpu; 1085278469Srrs cc_migration_time(cc, direct) = to_sbt; 1086278469Srrs cc_migration_prec(cc, direct) = precision; 1087278469Srrs cc_migration_func(cc, direct) = ftn; 1088278469Srrs cc_migration_arg(cc, direct) = arg; 1089278469Srrs cancelled = 1; 1090278469Srrs CC_UNLOCK(cc); 1091278469Srrs return (cancelled); 1092278469Srrs } 1093278469Srrs#endif 1094277528Shselasky } 1095280785Srrs if (c->c_iflags & CALLOUT_PENDING) { 1096280785Srrs if ((c->c_iflags & CALLOUT_PROCESSED) == 0) { 1097278623Srrs if (cc_exec_next(cc) == c) 1098278623Srrs cc_exec_next(cc) = LIST_NEXT(c, c_links.le); 1099277528Shselasky LIST_REMOVE(c, c_links.le); 1100280785Srrs } else { 1101277528Shselasky TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); 1102280785Srrs } 1103277528Shselasky cancelled = 1; 1104280785Srrs c->c_iflags &= ~ CALLOUT_PENDING; 1105280785Srrs c->c_flags &= ~ CALLOUT_ACTIVE; 1106277528Shselasky } 1107220456Sattilio 1108277528Shselasky#ifdef SMP 1109277528Shselasky /* 1110277528Shselasky * If the callout must migrate try to perform it immediately. 1111277528Shselasky * If the callout is currently running, just defer the migration 1112277528Shselasky * to a more appropriate moment. 1113277528Shselasky */ 1114277528Shselasky if (c->c_cpu != cpu) { 1115278469Srrs if (cc_exec_curr(cc, direct) == c) { 1116278469Srrs /* 1117278469Srrs * Pending will have been removed since we are 1118278469Srrs * actually executing the callout on another 1119278469Srrs * CPU. That callout should be waiting on the 1120278469Srrs * lock the caller holds. If we set both 1121278469Srrs * active/and/pending after we return and the 1122278469Srrs * lock on the executing callout proceeds, it 1123278469Srrs * will then see pending is true and return. 1124278469Srrs * At the return from the actual callout execution 1125278469Srrs * the migration will occur in softclock_call_cc 1126278469Srrs * and this new callout will be placed on the 1127278469Srrs * new CPU via a call to callout_cpu_switch() which 1128278469Srrs * will get the lock on the right CPU followed 1129278469Srrs * by a call callout_cc_add() which will add it there. 1130278469Srrs * (see above in softclock_call_cc()). 1131278469Srrs */ 1132278469Srrs cc_migration_cpu(cc, direct) = cpu; 1133278469Srrs cc_migration_time(cc, direct) = to_sbt; 1134278469Srrs cc_migration_prec(cc, direct) = precision; 1135278469Srrs cc_migration_func(cc, direct) = ftn; 1136278469Srrs cc_migration_arg(cc, direct) = arg; 1137280785Srrs c->c_iflags |= (CALLOUT_DFRMIGRATION | CALLOUT_PENDING); 1138280785Srrs c->c_flags |= CALLOUT_ACTIVE; 1139277528Shselasky CTR6(KTR_CALLOUT, 1140277528Shselasky "migration of %p func %p arg %p in %d.%08x to %u deferred", 1141277528Shselasky c, c->c_func, c->c_arg, (int)(to_sbt >> 32), 1142277528Shselasky (u_int)(to_sbt & 0xffffffff), cpu); 1143277528Shselasky CC_UNLOCK(cc); 1144277528Shselasky return (cancelled); 1145277528Shselasky } 1146277528Shselasky cc = callout_cpu_switch(c, cc, cpu); 1147277528Shselasky } 1148277528Shselasky#endif 1149277528Shselasky 1150278623Srrs callout_cc_add(c, cc, to_sbt, precision, ftn, arg, cpu, flags); 1151277528Shselasky CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x", 1152277528Shselasky cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_sbt >> 32), 1153277528Shselasky (u_int)(to_sbt & 0xffffffff)); 1154277528Shselasky CC_UNLOCK(cc); 1155277528Shselasky 1156277528Shselasky return (cancelled); 115744510Swollman} 115844510Swollman 1159181191Ssam/* 1160181191Ssam * Common idioms that can be optimized in the future. 1161181191Ssam */ 116281481Sjhbint 1163181191Ssamcallout_schedule_on(struct callout *c, int to_ticks, int cpu) 1164181191Ssam{ 1165181191Ssam return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, cpu); 1166181191Ssam} 1167181191Ssam 1168181191Ssamint 1169181191Ssamcallout_schedule(struct callout *c, int to_ticks) 1170181191Ssam{ 1171181191Ssam return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, c->c_cpu); 1172181191Ssam} 1173181191Ssam 1174181191Ssamint 1175360633Sjhb_callout_stop_safe(struct callout *c, int flags, callout_func_t *drain) 1176127969Scperciva{ 1177277528Shselasky struct callout_cpu *cc, *old_cc; 1178277528Shselasky struct lock_class *class; 1179277528Shselasky int direct, sq_locked, use_lock; 1180302350Sglebius int cancelled, not_on_a_list; 1181127969Scperciva 1182296320Skib if ((flags & CS_DRAIN) != 0) 1183277723Sadrian WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, c->c_lock, 1184277723Sadrian "calling %s", __func__); 1185277723Sadrian 1186277528Shselasky /* 1187277528Shselasky * Some old subsystems don't hold Giant while running a callout_stop(), 1188277528Shselasky * so just discard this check for the moment. 1189277528Shselasky */ 1190296320Skib if ((flags & CS_DRAIN) == 0 && c->c_lock != NULL) { 1191277528Shselasky if (c->c_lock == &Giant.lock_object) 1192277528Shselasky use_lock = mtx_owned(&Giant); 1193277528Shselasky else { 1194277528Shselasky use_lock = 1; 1195277528Shselasky class = LOCK_CLASS(c->c_lock); 1196277528Shselasky class->lc_assert(c->c_lock, LA_XLOCKED); 1197277528Shselasky } 1198277528Shselasky } else 1199277528Shselasky use_lock = 0; 1200280785Srrs if (c->c_iflags & CALLOUT_DIRECT) { 1201280785Srrs direct = 1; 1202280785Srrs } else { 1203280785Srrs direct = 0; 1204280785Srrs } 1205277528Shselasky sq_locked = 0; 1206277528Shselasky old_cc = NULL; 1207277528Shselaskyagain: 1208277528Shselasky cc = callout_lock(c); 1209220456Sattilio 1210280785Srrs if ((c->c_iflags & (CALLOUT_DFRMIGRATION | CALLOUT_PENDING)) == 1211280785Srrs (CALLOUT_DFRMIGRATION | CALLOUT_PENDING) && 1212280785Srrs ((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE)) { 1213278469Srrs /* 1214278469Srrs * Special case where this slipped in while we 1215278469Srrs * were migrating *as* the callout is about to 1216278469Srrs * execute. The caller probably holds the lock 1217278469Srrs * the callout wants. 1218278469Srrs * 1219278469Srrs * Get rid of the migration first. Then set 1220278469Srrs * the flag that tells this code *not* to 1221278469Srrs * try to remove it from any lists (its not 1222278469Srrs * on one yet). When the callout wheel runs, 1223278469Srrs * it will ignore this callout. 1224278469Srrs */ 1225280785Srrs c->c_iflags &= ~CALLOUT_PENDING; 1226280785Srrs c->c_flags &= ~CALLOUT_ACTIVE; 1227278469Srrs not_on_a_list = 1; 1228278469Srrs } else { 1229278469Srrs not_on_a_list = 0; 1230278469Srrs } 1231278469Srrs 1232277528Shselasky /* 1233277528Shselasky * If the callout was migrating while the callout cpu lock was 1234277528Shselasky * dropped, just drop the sleepqueue lock and check the states 1235277528Shselasky * again. 1236277528Shselasky */ 1237277528Shselasky if (sq_locked != 0 && cc != old_cc) { 1238277528Shselasky#ifdef SMP 1239277528Shselasky CC_UNLOCK(cc); 1240278469Srrs sleepq_release(&cc_exec_waiting(old_cc, direct)); 1241277528Shselasky sq_locked = 0; 1242277528Shselasky old_cc = NULL; 1243277528Shselasky goto again; 1244277528Shselasky#else 1245277528Shselasky panic("migration should not happen"); 1246277528Shselasky#endif 1247277528Shselasky } 1248220456Sattilio 1249277528Shselasky /* 1250302350Sglebius * If the callout is running, try to stop it or drain it. 1251277528Shselasky */ 1252302350Sglebius if (cc_exec_curr(cc, direct) == c) { 1253155957Sjhb /* 1254302350Sglebius * Succeed we to stop it or not, we must clear the 1255316708Smarkj * active flag - this is what API users expect. If we're 1256316708Smarkj * draining and the callout is currently executing, first wait 1257316708Smarkj * until it finishes. 1258155957Sjhb */ 1259316708Smarkj if ((flags & CS_DRAIN) == 0) 1260316708Smarkj c->c_flags &= ~CALLOUT_ACTIVE; 1261155957Sjhb 1262296320Skib if ((flags & CS_DRAIN) != 0) { 1263277528Shselasky /* 1264277528Shselasky * The current callout is running (or just 1265277528Shselasky * about to run) and blocking is allowed, so 1266277528Shselasky * just wait for the current invocation to 1267277528Shselasky * finish. 1268277528Shselasky */ 1269278469Srrs while (cc_exec_curr(cc, direct) == c) { 1270277528Shselasky /* 1271277528Shselasky * Use direct calls to sleepqueue interface 1272277528Shselasky * instead of cv/msleep in order to avoid 1273277528Shselasky * a LOR between cc_lock and sleepqueue 1274277528Shselasky * chain spinlocks. This piece of code 1275277528Shselasky * emulates a msleep_spin() call actually. 1276277528Shselasky * 1277277528Shselasky * If we already have the sleepqueue chain 1278277528Shselasky * locked, then we can safely block. If we 1279277528Shselasky * don't already have it locked, however, 1280277528Shselasky * we have to drop the cc_lock to lock 1281277528Shselasky * it. This opens several races, so we 1282277528Shselasky * restart at the beginning once we have 1283277528Shselasky * both locks. If nothing has changed, then 1284277528Shselasky * we will end up back here with sq_locked 1285277528Shselasky * set. 1286277528Shselasky */ 1287277528Shselasky if (!sq_locked) { 1288277528Shselasky CC_UNLOCK(cc); 1289277528Shselasky sleepq_lock( 1290278469Srrs &cc_exec_waiting(cc, direct)); 1291277528Shselasky sq_locked = 1; 1292277528Shselasky old_cc = cc; 1293277528Shselasky goto again; 1294277528Shselasky } 1295220456Sattilio 1296277528Shselasky /* 1297277528Shselasky * Migration could be cancelled here, but 1298277528Shselasky * as long as it is still not sure when it 1299277528Shselasky * will be packed up, just let softclock() 1300277528Shselasky * take care of it. 1301277528Shselasky */ 1302278469Srrs cc_exec_waiting(cc, direct) = true; 1303277528Shselasky DROP_GIANT(); 1304277528Shselasky CC_UNLOCK(cc); 1305277528Shselasky sleepq_add( 1306278469Srrs &cc_exec_waiting(cc, direct), 1307277528Shselasky &cc->cc_lock.lock_object, "codrain", 1308277528Shselasky SLEEPQ_SLEEP, 0); 1309277528Shselasky sleepq_wait( 1310278469Srrs &cc_exec_waiting(cc, direct), 1311277528Shselasky 0); 1312277528Shselasky sq_locked = 0; 1313277528Shselasky old_cc = NULL; 1314171053Sattilio 1315277528Shselasky /* Reacquire locks previously released. */ 1316277528Shselasky PICKUP_GIANT(); 1317277528Shselasky CC_LOCK(cc); 1318277528Shselasky } 1319316708Smarkj c->c_flags &= ~CALLOUT_ACTIVE; 1320277528Shselasky } else if (use_lock && 1321290664Srrs !cc_exec_cancel(cc, direct) && (drain == NULL)) { 1322278469Srrs 1323277528Shselasky /* 1324277528Shselasky * The current callout is waiting for its 1325277528Shselasky * lock which we hold. Cancel the callout 1326277528Shselasky * and return. After our caller drops the 1327277528Shselasky * lock, the callout will be skipped in 1328290664Srrs * softclock(). This *only* works with a 1329290664Srrs * callout_stop() *not* callout_drain() or 1330290664Srrs * callout_async_drain(). 1331277528Shselasky */ 1332278469Srrs cc_exec_cancel(cc, direct) = true; 1333277528Shselasky CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", 1334277528Shselasky c, c->c_func, c->c_arg); 1335277528Shselasky KASSERT(!cc_cce_migrating(cc, direct), 1336277528Shselasky ("callout wrongly scheduled for migration")); 1337280785Srrs if (callout_migrating(c)) { 1338280785Srrs c->c_iflags &= ~CALLOUT_DFRMIGRATION; 1339280785Srrs#ifdef SMP 1340280785Srrs cc_migration_cpu(cc, direct) = CPUBLOCK; 1341280785Srrs cc_migration_time(cc, direct) = 0; 1342280785Srrs cc_migration_prec(cc, direct) = 0; 1343280785Srrs cc_migration_func(cc, direct) = NULL; 1344280785Srrs cc_migration_arg(cc, direct) = NULL; 1345280785Srrs#endif 1346280785Srrs } 1347277528Shselasky CC_UNLOCK(cc); 1348277528Shselasky KASSERT(!sq_locked, ("sleepqueue chain locked")); 1349277528Shselasky return (1); 1350278469Srrs } else if (callout_migrating(c)) { 1351278469Srrs /* 1352278469Srrs * The callout is currently being serviced 1353278469Srrs * and the "next" callout is scheduled at 1354278469Srrs * its completion with a migration. We remove 1355278469Srrs * the migration flag so it *won't* get rescheduled, 1356278469Srrs * but we can't stop the one thats running so 1357278469Srrs * we return 0. 1358278469Srrs */ 1359280785Srrs c->c_iflags &= ~CALLOUT_DFRMIGRATION; 1360278469Srrs#ifdef SMP 1361278469Srrs /* 1362278469Srrs * We can't call cc_cce_cleanup here since 1363278469Srrs * if we do it will remove .ce_curr and 1364278469Srrs * its still running. This will prevent a 1365278469Srrs * reschedule of the callout when the 1366278469Srrs * execution completes. 1367278469Srrs */ 1368278469Srrs cc_migration_cpu(cc, direct) = CPUBLOCK; 1369278469Srrs cc_migration_time(cc, direct) = 0; 1370278469Srrs cc_migration_prec(cc, direct) = 0; 1371278469Srrs cc_migration_func(cc, direct) = NULL; 1372278469Srrs cc_migration_arg(cc, direct) = NULL; 1373278469Srrs#endif 1374277528Shselasky CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p", 1375277528Shselasky c, c->c_func, c->c_arg); 1376290664Srrs if (drain) { 1377290664Srrs cc_exec_drain(cc, direct) = drain; 1378290664Srrs } 1379277528Shselasky CC_UNLOCK(cc); 1380302350Sglebius return ((flags & CS_EXECUTING) != 0); 1381277528Shselasky } 1382277528Shselasky CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", 1383277528Shselasky c, c->c_func, c->c_arg); 1384290664Srrs if (drain) { 1385290664Srrs cc_exec_drain(cc, direct) = drain; 1386290664Srrs } 1387277528Shselasky KASSERT(!sq_locked, ("sleepqueue chain still locked")); 1388302350Sglebius cancelled = ((flags & CS_EXECUTING) != 0); 1389302350Sglebius } else 1390302350Sglebius cancelled = 1; 1391302350Sglebius 1392277528Shselasky if (sq_locked) 1393278469Srrs sleepq_release(&cc_exec_waiting(cc, direct)); 1394172025Sjhb 1395302350Sglebius if ((c->c_iflags & CALLOUT_PENDING) == 0) { 1396302350Sglebius CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", 1397302350Sglebius c, c->c_func, c->c_arg); 1398303159Sglebius /* 1399303159Sglebius * For not scheduled and not executing callout return 1400303159Sglebius * negative value. 1401303159Sglebius */ 1402303159Sglebius if (cc_exec_curr(cc, direct) != c) 1403303159Sglebius cancelled = -1; 1404302350Sglebius CC_UNLOCK(cc); 1405302350Sglebius return (cancelled); 1406302350Sglebius } 1407302350Sglebius 1408280785Srrs c->c_iflags &= ~CALLOUT_PENDING; 1409280785Srrs c->c_flags &= ~CALLOUT_ACTIVE; 1410277528Shselasky 1411234981Skib CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", 1412234981Skib c, c->c_func, c->c_arg); 1413278469Srrs if (not_on_a_list == 0) { 1414280785Srrs if ((c->c_iflags & CALLOUT_PROCESSED) == 0) { 1415278623Srrs if (cc_exec_next(cc) == c) 1416278623Srrs cc_exec_next(cc) = LIST_NEXT(c, c_links.le); 1417278469Srrs LIST_REMOVE(c, c_links.le); 1418280785Srrs } else { 1419278469Srrs TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); 1420280785Srrs } 1421278469Srrs } 1422277528Shselasky callout_cc_del(c, cc); 1423277528Shselasky CC_UNLOCK(cc); 1424302350Sglebius return (cancelled); 142544510Swollman} 142644510Swollman 142744510Swollmanvoid 1428267092Sdavidecallout_init(struct callout *c, int mpsafe) 142944510Swollman{ 1430277528Shselasky bzero(c, sizeof *c); 1431141428Siedowse if (mpsafe) { 1432277528Shselasky c->c_lock = NULL; 1433280785Srrs c->c_iflags = CALLOUT_RETURNUNLOCKED; 1434141428Siedowse } else { 1435277528Shselasky c->c_lock = &Giant.lock_object; 1436280785Srrs c->c_iflags = 0; 1437141428Siedowse } 1438277528Shselasky c->c_cpu = timeout_cpu; 143944510Swollman} 144044510Swollman 1441141428Siedowsevoid 1442267092Sdavide_callout_init_lock(struct callout *c, struct lock_object *lock, int flags) 1443141428Siedowse{ 1444141428Siedowse bzero(c, sizeof *c); 1445173760Sattilio c->c_lock = lock; 1446277528Shselasky KASSERT((flags & ~(CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK)) == 0, 1447277528Shselasky ("callout_init_lock: bad flags %d", flags)); 1448277528Shselasky KASSERT(lock != NULL || (flags & CALLOUT_RETURNUNLOCKED) == 0, 1449277528Shselasky ("callout_init_lock: CALLOUT_RETURNUNLOCKED with no lock")); 1450277528Shselasky KASSERT(lock == NULL || !(LOCK_CLASS(lock)->lc_flags & 1451277528Shselasky (LC_SPINLOCK | LC_SLEEPABLE)), ("%s: invalid lock class", 1452277528Shselasky __func__)); 1453280785Srrs c->c_iflags = flags & (CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK); 1454177859Sjeff c->c_cpu = timeout_cpu; 1455141428Siedowse} 1456141428Siedowse 145731950Snate#ifdef APM_FIXUP_CALLTODO 145831950Snate/* 145931950Snate * Adjust the kernel calltodo timeout list. This routine is used after 146031950Snate * an APM resume to recalculate the calltodo timer list values with the 146131950Snate * number of hz's we have been sleeping. The next hardclock() will detect 146231950Snate * that there are fired timers and run softclock() to execute them. 146331950Snate * 146431950Snate * Please note, I have not done an exhaustive analysis of what code this 146531950Snate * might break. I am motivated to have my select()'s and alarm()'s that 146631950Snate * have expired during suspend firing upon resume so that the applications 146731950Snate * which set the timer can do the maintanence the timer was for as close 146831950Snate * as possible to the originally intended time. Testing this code for a 146931950Snate * week showed that resuming from a suspend resulted in 22 to 25 timers 1470298819Spfg * firing, which seemed independent on whether the suspend was 2 hours or 147131950Snate * 2 days. Your milage may vary. - Ken Key <key@cs.utk.edu> 147231950Snate */ 147331950Snatevoid 1474267092Sdavideadjust_timeout_calltodo(struct timeval *time_change) 147531950Snate{ 1476331643Sdim struct callout *p; 147731950Snate unsigned long delta_ticks; 147831950Snate 147931950Snate /* 148031950Snate * How many ticks were we asleep? 148136127Sbde * (stolen from tvtohz()). 148231950Snate */ 148331950Snate 148431950Snate /* Don't do anything */ 148531950Snate if (time_change->tv_sec < 0) 148631950Snate return; 148731950Snate else if (time_change->tv_sec <= LONG_MAX / 1000000) 1488298649Spfg delta_ticks = howmany(time_change->tv_sec * 1000000 + 1489298649Spfg time_change->tv_usec, tick) + 1; 149031950Snate else if (time_change->tv_sec <= LONG_MAX / hz) 149131950Snate delta_ticks = time_change->tv_sec * hz + 1492298649Spfg howmany(time_change->tv_usec, tick) + 1; 149331950Snate else 149431950Snate delta_ticks = LONG_MAX; 149531950Snate 149631950Snate if (delta_ticks > INT_MAX) 149731950Snate delta_ticks = INT_MAX; 149831950Snate 149931950Snate /* 150031950Snate * Now rip through the timer calltodo list looking for timers 150131950Snate * to expire. 150231950Snate */ 150331950Snate 150431950Snate /* don't collide with softclock() */ 1505177859Sjeff CC_LOCK(cc); 150631950Snate for (p = calltodo.c_next; p != NULL; p = p->c_next) { 150731950Snate p->c_time -= delta_ticks; 150831950Snate 150931950Snate /* Break if the timer had more time on it than delta_ticks */ 151031950Snate if (p->c_time > 0) 151131950Snate break; 151231950Snate 151331950Snate /* take back the ticks the timer didn't use (p->c_time <= 0) */ 151431950Snate delta_ticks = -p->c_time; 151531950Snate } 1516177859Sjeff CC_UNLOCK(cc); 151731950Snate 151831950Snate return; 151931950Snate} 152031950Snate#endif /* APM_FIXUP_CALLTODO */ 1521247777Sdavide 1522247777Sdavidestatic int 1523247777Sdavideflssbt(sbintime_t sbt) 1524247777Sdavide{ 1525247777Sdavide 1526247777Sdavide sbt += (uint64_t)sbt >> 1; 1527247777Sdavide if (sizeof(long) >= sizeof(sbintime_t)) 1528247777Sdavide return (flsl(sbt)); 1529247777Sdavide if (sbt >= SBT_1S) 1530247777Sdavide return (flsl(((uint64_t)sbt) >> 32) + 32); 1531247777Sdavide return (flsl(sbt)); 1532247777Sdavide} 1533247777Sdavide 1534247777Sdavide/* 1535247777Sdavide * Dump immediate statistic snapshot of the scheduled callouts. 1536247777Sdavide */ 1537247777Sdavidestatic int 1538247777Sdavidesysctl_kern_callout_stat(SYSCTL_HANDLER_ARGS) 1539247777Sdavide{ 1540247777Sdavide struct callout *tmp; 1541247777Sdavide struct callout_cpu *cc; 1542247777Sdavide struct callout_list *sc; 1543247777Sdavide sbintime_t maxpr, maxt, medpr, medt, now, spr, st, t; 1544247777Sdavide int ct[64], cpr[64], ccpbk[32]; 1545247777Sdavide int error, val, i, count, tcum, pcum, maxc, c, medc; 1546247777Sdavide#ifdef SMP 1547247777Sdavide int cpu; 1548247777Sdavide#endif 1549247777Sdavide 1550247777Sdavide val = 0; 1551247777Sdavide error = sysctl_handle_int(oidp, &val, 0, req); 1552247777Sdavide if (error != 0 || req->newptr == NULL) 1553247777Sdavide return (error); 1554247777Sdavide count = maxc = 0; 1555247777Sdavide st = spr = maxt = maxpr = 0; 1556247777Sdavide bzero(ccpbk, sizeof(ccpbk)); 1557247777Sdavide bzero(ct, sizeof(ct)); 1558247777Sdavide bzero(cpr, sizeof(cpr)); 1559247777Sdavide now = sbinuptime(); 1560247777Sdavide#ifdef SMP 1561247777Sdavide CPU_FOREACH(cpu) { 1562247777Sdavide cc = CC_CPU(cpu); 1563247777Sdavide#else 1564247777Sdavide cc = CC_CPU(timeout_cpu); 1565247777Sdavide#endif 1566247777Sdavide CC_LOCK(cc); 1567247777Sdavide for (i = 0; i < callwheelsize; i++) { 1568247777Sdavide sc = &cc->cc_callwheel[i]; 1569247777Sdavide c = 0; 1570247777Sdavide LIST_FOREACH(tmp, sc, c_links.le) { 1571247777Sdavide c++; 1572247777Sdavide t = tmp->c_time - now; 1573247777Sdavide if (t < 0) 1574247777Sdavide t = 0; 1575247777Sdavide st += t / SBT_1US; 1576247777Sdavide spr += tmp->c_precision / SBT_1US; 1577247777Sdavide if (t > maxt) 1578247777Sdavide maxt = t; 1579247777Sdavide if (tmp->c_precision > maxpr) 1580247777Sdavide maxpr = tmp->c_precision; 1581247777Sdavide ct[flssbt(t)]++; 1582247777Sdavide cpr[flssbt(tmp->c_precision)]++; 1583247777Sdavide } 1584247777Sdavide if (c > maxc) 1585247777Sdavide maxc = c; 1586247777Sdavide ccpbk[fls(c + c / 2)]++; 1587247777Sdavide count += c; 1588247777Sdavide } 1589247777Sdavide CC_UNLOCK(cc); 1590247777Sdavide#ifdef SMP 1591247777Sdavide } 1592247777Sdavide#endif 1593247777Sdavide 1594247777Sdavide for (i = 0, tcum = 0; i < 64 && tcum < count / 2; i++) 1595247777Sdavide tcum += ct[i]; 1596247777Sdavide medt = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0; 1597247777Sdavide for (i = 0, pcum = 0; i < 64 && pcum < count / 2; i++) 1598247777Sdavide pcum += cpr[i]; 1599247777Sdavide medpr = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0; 1600247777Sdavide for (i = 0, c = 0; i < 32 && c < count / 2; i++) 1601247777Sdavide c += ccpbk[i]; 1602247777Sdavide medc = (i >= 2) ? (1 << (i - 2)) : 0; 1603247777Sdavide 1604247777Sdavide printf("Scheduled callouts statistic snapshot:\n"); 1605247777Sdavide printf(" Callouts: %6d Buckets: %6d*%-3d Bucket size: 0.%06ds\n", 1606247777Sdavide count, callwheelsize, mp_ncpus, 1000000 >> CC_HASH_SHIFT); 1607247777Sdavide printf(" C/Bk: med %5d avg %6d.%06jd max %6d\n", 1608247777Sdavide medc, 1609247777Sdavide count / callwheelsize / mp_ncpus, 1610247777Sdavide (uint64_t)count * 1000000 / callwheelsize / mp_ncpus % 1000000, 1611247777Sdavide maxc); 1612247777Sdavide printf(" Time: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n", 1613247777Sdavide medt / SBT_1S, (medt & 0xffffffff) * 1000000 >> 32, 1614247777Sdavide (st / count) / 1000000, (st / count) % 1000000, 1615247777Sdavide maxt / SBT_1S, (maxt & 0xffffffff) * 1000000 >> 32); 1616247777Sdavide printf(" Prec: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n", 1617247777Sdavide medpr / SBT_1S, (medpr & 0xffffffff) * 1000000 >> 32, 1618247777Sdavide (spr / count) / 1000000, (spr / count) % 1000000, 1619247777Sdavide maxpr / SBT_1S, (maxpr & 0xffffffff) * 1000000 >> 32); 1620247777Sdavide printf(" Distribution: \tbuckets\t time\t tcum\t" 1621247777Sdavide " prec\t pcum\n"); 1622247777Sdavide for (i = 0, tcum = pcum = 0; i < 64; i++) { 1623247777Sdavide if (ct[i] == 0 && cpr[i] == 0) 1624247777Sdavide continue; 1625247777Sdavide t = (i != 0) ? (((sbintime_t)1) << (i - 1)) : 0; 1626247777Sdavide tcum += ct[i]; 1627247777Sdavide pcum += cpr[i]; 1628247777Sdavide printf(" %10jd.%06jds\t 2**%d\t%7d\t%7d\t%7d\t%7d\n", 1629247777Sdavide t / SBT_1S, (t & 0xffffffff) * 1000000 >> 32, 1630247777Sdavide i - 1 - (32 - CC_HASH_SHIFT), 1631247777Sdavide ct[i], tcum, cpr[i], pcum); 1632247777Sdavide } 1633247777Sdavide return (error); 1634247777Sdavide} 1635247777SdavideSYSCTL_PROC(_kern, OID_AUTO, callout_stat, 1636247777Sdavide CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 1637247777Sdavide 0, 0, sysctl_kern_callout_stat, "I", 1638247777Sdavide "Dump immediate statistic snapshot of the scheduled callouts"); 1639301522Sbz 1640301522Sbz#ifdef DDB 1641301522Sbzstatic void 1642301522Sbz_show_callout(struct callout *c) 1643301522Sbz{ 1644301522Sbz 1645301522Sbz db_printf("callout %p\n", c); 1646301522Sbz#define C_DB_PRINTF(f, e) db_printf(" %s = " f "\n", #e, c->e); 1647301522Sbz db_printf(" &c_links = %p\n", &(c->c_links)); 1648301522Sbz C_DB_PRINTF("%" PRId64, c_time); 1649301522Sbz C_DB_PRINTF("%" PRId64, c_precision); 1650301522Sbz C_DB_PRINTF("%p", c_arg); 1651301522Sbz C_DB_PRINTF("%p", c_func); 1652301522Sbz C_DB_PRINTF("%p", c_lock); 1653301522Sbz C_DB_PRINTF("%#x", c_flags); 1654301522Sbz C_DB_PRINTF("%#x", c_iflags); 1655301522Sbz C_DB_PRINTF("%d", c_cpu); 1656301522Sbz#undef C_DB_PRINTF 1657301522Sbz} 1658301522Sbz 1659301522SbzDB_SHOW_COMMAND(callout, db_show_callout) 1660301522Sbz{ 1661301522Sbz 1662301522Sbz if (!have_addr) { 1663301522Sbz db_printf("usage: show callout <struct callout *>\n"); 1664301522Sbz return; 1665301522Sbz } 1666301522Sbz 1667301522Sbz _show_callout((struct callout *)addr); 1668301522Sbz} 1669301522Sbz#endif /* DDB */ 1670