callo.h revision 8566:65762b7ee3ce
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 22/* All Rights Reserved */ 23 24 25/* 26 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 27 * Use is subject to license terms. 28 */ 29 30#ifndef _SYS_CALLO_H 31#define _SYS_CALLO_H 32 33#include <sys/t_lock.h> 34#include <sys/taskq.h> 35#include <sys/lgrp.h> 36#include <sys/processor.h> 37#include <sys/cyclic.h> 38#include <sys/kstat.h> 39#include <sys/systm.h> 40 41#ifdef __cplusplus 42extern "C" { 43#endif 44 45#ifdef _KERNEL 46 47typedef struct callout_list callout_list_t; 48 49/* 50 * The callout mechanism provides general-purpose event scheduling: 51 * an arbitrary function is called in a specified amount of time. 52 * The expiration time for a callout is kept in its callout list 53 * structure. 54 */ 55typedef struct callout { 56 struct callout *c_idnext; /* next in ID hash, or on freelist */ 57 struct callout *c_idprev; /* prev in ID hash */ 58 struct callout *c_clnext; /* next in callout list */ 59 struct callout *c_clprev; /* prev in callout list */ 60 callout_id_t c_xid; /* extended callout ID; see below */ 61 callout_list_t *c_list; /* callout list */ 62 void (*c_func)(void *); /* function to call */ 63 void *c_arg; /* argument to function */ 64} callout_t; 65 66/* 67 * The callout ID (callout_id_t) uniquely identifies a callout. The callout 68 * ID is always 64 bits internally. The lower 32 bits contain an ID value. 69 * The upper 32 bits contain a generation number and flags. When the ID value 70 * wraps the generation number is incremented during ID generation. This 71 * protects callers from ID collisions that can happen as a result of the wrap. 72 * 73 * The kernel internal interface, timeout_generic(), always returns a 74 * callout_id_t. But the legacy interfaces, timeout() and realtime_timeout() 75 * return a timeout_id_t. On a 64-bit system, timeout_id_t is also 64 bits. 76 * So, the full 64-bit ID (sans the flags) can be returned. However, on 32-bit 77 * systems, timeout_id_t is 32 bits. So, only the lower 32 bits can be 78 * returned. In such cases, a default generation number of 0 is assigned to 79 * the legacy IDs. 80 * 81 * The lower 32-bit ID space is partitioned into two spaces - one for 32-bit 82 * IDs and the other for 64-bit IDs. The 32-bit ID space is further divided 83 * into two spaces - one for short-term callouts and one for long-term. 84 * 85 * Here is the bit layout for the callout ID: 86 * 87 * 63 62 61 ... 32 31 30 29 .. X+1 X ... 1 0 88 * ----------------------------------------------------------------------- 89 * | Exec | Hres | Generation | Long | Counter | ID bits | Table | Type | 90 * | | time | number | term | High | | number | | 91 * ----------------------------------------------------------------------- 92 * 93 * Exec(uting): 94 * This is the executing bit which is only set in the extended callout 95 * ID. This bit indicates that the callout handler is currently being 96 * executed. 97 * 98 * Hrestime: 99 * Kernel features like condition variables use hrestime (system date) in 100 * conjunction with callouts. Under normal circumstances, these callouts 101 * are handled in the usual manner. They go off at specified times. But 102 * when the system time is changed abruptly (e.g., via stime()), these 103 * callouts are required to be processed immediately so that they can 104 * wakeup their threads immediately. The Hrestime bit is used to mark 105 * such callouts. When the system time is changed, the callout subsystem 106 * is called to process all callouts with this bit set. 107 * 108 * Generation number: 109 * This is the generation part of the ID. 110 * 111 * Long term: 112 * This bit indicates whether this is a short-term or a long-term callout. 113 * The long-term bit exists to address the problem of callout ID collision 114 * on 32-bit systems. This is an issue because the system typically 115 * generates a large number of timeout() requests, which means that callout 116 * IDs eventually get recycled. Most timeouts are very short-lived, so that 117 * ID recycling isn't a problem; but there are a handful of timeouts which 118 * are sufficiently long-lived to see their own IDs reused. We use the 119 * long-term bit to partition the ID namespace into pieces; the short-term 120 * space gets all the heavy traffic and can wrap frequently (i.e., on the 121 * order of a day) with no ill effects; the long-term space gets very little 122 * traffic and thus never wraps. That said, we need to future proof callouts 123 * in case 32-bit systems grow in size and are able to consume callout IDs 124 * at faster rates. So, we should make all the kernel clients that use 125 * callouts to use the internal interface so that they can use IDs outside 126 * of the legacy space with a proper generation number. 127 * 128 * Counter High + ID counter bits: 129 * These bits represent the actual ID bits in the callout ID. 130 * The highest bit of the running counter is always set; this ensures that 131 * the callout ID is always non-zero, thus eliminating the need for an 132 * explicit wrap-around test during ID generation. 133 * 134 * Table number: 135 * These bits carry the table number for the callout table where the callout 136 * is queued. Each CPU has its own callout table. So, the callout tables are 137 * numbered from 0 - (max_ncpus - 1). Because max_ncpus is different on 138 * different systems, the actual number of table number bits will vary 139 * accordingly. And so will the ID counter bits. 140 * 141 * Type: 142 * This bit represents the callout (table) type. Each CPU has one realtime 143 * and one normal callout table. 144 */ 145#define CALLOUT_EXECUTING 0x8000000000000000ULL 146#define CALLOUT_HRESTIME 0x4000000000000000ULL 147#define CALLOUT_ID_MASK ~(CALLOUT_EXECUTING | CALLOUT_HRESTIME) 148#define CALLOUT_GENERATION_LOW 0x100000000ULL 149#define CALLOUT_LONGTERM 0x80000000 150#define CALLOUT_COUNTER_HIGH 0x40000000 151#define CALLOUT_TYPE_BITS 1 152#define CALLOUT_NTYPES (1 << CALLOUT_TYPE_BITS) 153#define CALLOUT_TYPE_MASK (CALLOUT_NTYPES - 1) 154#define CALLOUT_COUNTER_SHIFT callout_table_bits 155#define CALLOUT_TABLE(t, f) (((f) << CALLOUT_TYPE_BITS) | (t)) 156#define CALLOUT_TABLE_NUM(ct) ((ct) - callout_table) 157#define CALLOUT_TABLE_TYPE(ct) (CALLOUT_TABLE_NUM(ct) & CALLOUT_TYPE_MASK) 158#define CALLOUT_TABLE_SEQID(ct) (CALLOUT_TABLE_NUM(ct) >> CALLOUT_TYPE_BITS) 159 160/* 161 * We assume that during any period of CALLOUT_LONGTERM_TICKS ticks, at most 162 * (CALLOUT_COUNTER_HIGH / callout_counter_low) callouts will be generated. 163 */ 164#define CALLOUT_LONGTERM_TICKS 0x4000UL 165#define CALLOUT_BUCKET_SHIFT 9 166#define CALLOUT_BUCKETS (1 << CALLOUT_BUCKET_SHIFT) 167#define CALLOUT_BUCKET_MASK (CALLOUT_BUCKETS - 1) 168#define CALLOUT_HASH(x) ((x) & CALLOUT_BUCKET_MASK) 169#define CALLOUT_IDHASH(x) CALLOUT_HASH((x) >> CALLOUT_COUNTER_SHIFT) 170/* 171 * The multiply by 0 and 1 below are cosmetic. Just to align things better 172 * and make it more readable. The multiplications will be done at compile 173 * time. 174 */ 175#define CALLOUT_CLHASH(x) \ 176 CALLOUT_HASH( \ 177 ((x)>>(CALLOUT_BUCKET_SHIFT*0)) ^ \ 178 ((x)>>(CALLOUT_BUCKET_SHIFT*1)) ^ \ 179 ((x)>>(CALLOUT_BUCKET_SHIFT*2)) ^ \ 180 ((x)>>(CALLOUT_BUCKET_SHIFT*3))) 181 182#define CALLOUT_ID_TO_TABLE(id) ((id) & callout_table_mask) 183 184#define CALLOUT_SHORT_ID(table) \ 185 ((callout_id_t)(table) | CALLOUT_COUNTER_HIGH) 186#define CALLOUT_LONG_ID(table) \ 187 (CALLOUT_SHORT_ID(table) | CALLOUT_LONGTERM) 188 189#define CALLOUT_THREADS 2 /* keep it simple for now */ 190 191#define CALLOUT_REALTIME 0 /* realtime callout type */ 192#define CALLOUT_NORMAL 1 /* normal callout type */ 193 194/* 195 * callout_t's are cache-aligned structures allocated from kmem caches. One kmem 196 * cache is created per lgrp and is shared by all CPUs in that lgrp. Benefits: 197 * - cache pages are mapped only in the TLBs of the CPUs of the lgrp 198 * - data in cache pages is present only in those CPU caches 199 * - memory access performance improves with locality-awareness in kmem 200 * 201 * The following structure is used to manage per-lgroup kmem caches. 202 * 203 * NOTE: Free callout_t's go to a callout table's freelist. CPUs map to callout 204 * tables via their sequence IDs, not CPU IDs. DR operations can cause a 205 * free list to have callouts from multiple lgrp caches. This takes away some 206 * performance, but is no worse than if we did not use lgrp caches at all. 207 */ 208typedef struct callout_cache { 209 struct callout_cache *cc_next; /* link in the global list */ 210 lgrp_handle_t cc_hand; /* lgroup handle */ 211 kmem_cache_t *cc_cache; /* kmem cache pointer */ 212 kmem_cache_t *cc_lcache; /* kmem cache pointer */ 213} callout_cache_t; 214 215/* 216 * The callout hash structure is used for queueing both callouts and 217 * callout lists. That is why the fields are declared as void *. 218 */ 219typedef struct callout_hash { 220 void *ch_head; 221 void *ch_tail; 222} callout_hash_t; 223 224struct callout_list { 225 callout_list_t *cl_next; /* next in clhash */ 226 callout_list_t *cl_prev; /* prev in clhash */ 227 hrtime_t cl_expiration; /* expiration for callouts in list */ 228 callout_hash_t cl_callouts; /* list of callouts */ 229 kcondvar_t cl_done; /* signal callout completion */ 230 ushort_t cl_waiting; /* count of waiting untimeouts */ 231 kthread_id_t cl_executor; /* thread executing callout */ 232 ulong_t cl_pad; /* cache alignment */ 233}; 234 235/* 236 * Per-callout table kstats. 237 * 238 * CALLOUT_TIMEOUTS 239 * Callouts created since boot. 240 * CALLOUT_TIMEOUTS_PENDING 241 * Number of outstanding callouts. 242 * CALLOUT_UNTIMEOUTS_UNEXPIRED 243 * Number of cancelled callouts that have not expired. 244 * CALLOUT_UNTIMEOUTS_EXECUTING 245 * Number of cancelled callouts that were executing at the time of 246 * cancellation. 247 * CALLOUT_UNTIMEOUTS_EXPIRED 248 * Number of cancelled callouts that had already expired at the time 249 * of cancellations. 250 * CALLOUT_EXPIRATIONS 251 * Number of callouts that expired. 252 * CALLOUT_ALLOCATIONS 253 * Number of callout structures allocated. 254 */ 255typedef enum callout_stat_type { 256 CALLOUT_TIMEOUTS, 257 CALLOUT_TIMEOUTS_PENDING, 258 CALLOUT_UNTIMEOUTS_UNEXPIRED, 259 CALLOUT_UNTIMEOUTS_EXECUTING, 260 CALLOUT_UNTIMEOUTS_EXPIRED, 261 CALLOUT_EXPIRATIONS, 262 CALLOUT_ALLOCATIONS, 263 CALLOUT_NUM_STATS 264} callout_stat_type_t; 265 266/* 267 * Callout flags: 268 * 269 * CALLOUT_FLAG_ROUNDUP 270 * Roundup the expiration time to the nearest resolution boundary. 271 * If this flag is not specified, the expiration time is rounded down. 272 * CALLOUT_FLAG_ABSOLUTE 273 * Normally, the expiration passed to the timeout API functions is an 274 * expiration interval. If this flag is specified, then it is 275 * interpreted as the expiration time itself. 276 * CALLOUT_FLAG_HRESTIME 277 * Normally, callouts are not affected by changes to system time 278 * (hrestime). This flag is used to create a callout that is affected 279 * by system time. If system time changes, these timers must expire 280 * at once. These are used by condition variables and LWP timers that 281 * need this behavior. 282 * CALLOUT_FLAG_32BIT 283 * Legacy interfaces timeout() and realtime_timeout() pass this flag 284 * to timeout_generic() to indicate that a 32-bit ID should be allocated. 285 */ 286#define CALLOUT_FLAG_ROUNDUP 0x1 287#define CALLOUT_FLAG_ABSOLUTE 0x2 288#define CALLOUT_FLAG_HRESTIME 0x4 289#define CALLOUT_FLAG_32BIT 0x8 290 291/* 292 * On 32-bit systems, the legacy interfaces, timeout() and realtime_timeout(), 293 * must pass CALLOUT_FLAG_32BIT to timeout_generic() so that a 32-bit ID 294 * can be generated. 295 */ 296#ifdef _LP64 297#define CALLOUT_LEGACY 0 298#else 299#define CALLOUT_LEGACY CALLOUT_FLAG_32BIT 300#endif 301 302/* 303 * All of the state information associated with a callout table. 304 * The fields are ordered with cache performance in mind. 305 */ 306typedef struct callout_table { 307 kmutex_t ct_mutex; /* protects all callout state */ 308 callout_t *ct_free; /* free callout structures */ 309 callout_list_t *ct_lfree; /* free callout list structures */ 310 callout_id_t ct_short_id; /* most recently issued short-term ID */ 311 callout_id_t ct_long_id; /* most recently issued long-term ID */ 312 callout_hash_t *ct_idhash; /* ID hash chains */ 313 callout_hash_t *ct_clhash; /* callout list hash */ 314 kstat_named_t *ct_kstat_data; /* callout kstat data */ 315 316 uint_t ct_type; /* callout table type */ 317 uint_t ct_suspend; /* suspend count */ 318 cyclic_id_t ct_cyclic; /* cyclic for this table */ 319 hrtime_t *ct_heap; /* callout expiration heap */ 320 ulong_t ct_heap_num; /* occupied slots in the heap */ 321 ulong_t ct_heap_max; /* end of the heap */ 322 kmem_cache_t *ct_cache; /* callout kmem cache */ 323 kmem_cache_t *ct_lcache; /* callout list kmem cache */ 324 callout_id_t ct_gen_id; /* generation based ID */ 325 326 callout_hash_t ct_expired; /* list of expired callout lists */ 327 taskq_t *ct_taskq; /* taskq to execute normal callouts */ 328 kstat_t *ct_kstats; /* callout kstats */ 329#ifdef _LP64 330 ulong_t ct_pad[4]; /* cache alignment */ 331#else 332 ulong_t ct_pad[7]; /* cache alignment */ 333#endif 334} callout_table_t; 335 336/* 337 * Short hand definitions for the callout kstats. 338 */ 339#define ct_timeouts \ 340 ct_kstat_data[CALLOUT_TIMEOUTS].value.ui64 341#define ct_timeouts_pending \ 342 ct_kstat_data[CALLOUT_TIMEOUTS_PENDING].value.ui64 343#define ct_untimeouts_unexpired \ 344 ct_kstat_data[CALLOUT_UNTIMEOUTS_UNEXPIRED].value.ui64 345#define ct_untimeouts_executing \ 346 ct_kstat_data[CALLOUT_UNTIMEOUTS_EXECUTING].value.ui64 347#define ct_untimeouts_expired \ 348 ct_kstat_data[CALLOUT_UNTIMEOUTS_EXPIRED].value.ui64 349#define ct_expirations \ 350 ct_kstat_data[CALLOUT_EXPIRATIONS].value.ui64 351#define ct_allocations \ 352 ct_kstat_data[CALLOUT_ALLOCATIONS].value.ui64 353 354#define CALLOUT_CHUNK 128 355 356#define CALLOUT_HEAP_PARENT(index) (((index) - 1) >> 1) 357#define CALLOUT_HEAP_RIGHT(index) (((index) + 1) << 1) 358#define CALLOUT_HEAP_LEFT(index) ((((index) + 1) << 1) - 1) 359 360#define CALLOUT_CYCLIC_HANDLER(t) \ 361 ((t == CALLOUT_REALTIME) ? callout_realtime : callout_normal) 362 363/* 364 * We define a blanket minimum resolution for callouts of 1 millisecond. 365 * 1 millisecond is a safe value as it is already supported when the clock 366 * resolution is set to high. 367 */ 368#define CALLOUT_MIN_RESOLUTION 1000000ULL 369#define CALLOUT_TCP_RESOLUTION 10000000ULL 370 371#define CALLOUT_ALIGN 64 /* cache line size */ 372 373#ifdef _LP64 374#define CALLOUT_MAX_TICKS NSEC_TO_TICK(CY_INFINITY); 375#else 376#define CALLOUT_MAX_TICKS LONG_MAX 377#endif 378 379extern void callout_init(void); 380extern void membar_sync(void); 381extern void callout_cpu_online(cpu_t *); 382extern void callout_cpu_offline(cpu_t *); 383extern void callout_hrestime(void); 384 385#endif 386 387#ifdef __cplusplus 388} 389#endif 390 391#endif /* _SYS_CALLO_H */ 392