1179260Sjb/* 2179260Sjb * CDDL HEADER START 3179260Sjb * 4179260Sjb * The contents of this file are subject to the terms of the 5179260Sjb * Common Development and Distribution License, Version 1.0 only 6179260Sjb * (the "License"). You may not use this file except in compliance 7179260Sjb * with the License. 8179260Sjb * 9179260Sjb * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10179260Sjb * or http://www.opensolaris.org/os/licensing. 11179260Sjb * See the License for the specific language governing permissions 12179260Sjb * and limitations under the License. 13179260Sjb * 14179260Sjb * When distributing Covered Code, include this CDDL HEADER in each 15179260Sjb * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16179260Sjb * If applicable, add the following below this CDDL HEADER, with the 17179260Sjb * fields enclosed by brackets "[]" replaced with your own identifying 18179260Sjb * information: Portions Copyright [yyyy] [name of copyright owner] 19179260Sjb * 20179260Sjb * CDDL HEADER END 21179260Sjb * 22179260Sjb * Portions Copyright 2008 John Birrell <jb@freebsd.org> 23179260Sjb * 24179260Sjb * $FreeBSD$ 25179260Sjb * 26179260Sjb * This is a simplified version of the cyclic timer subsystem from 27179260Sjb * OpenSolaris. In the FreeBSD version, we don't use interrupt levels. 28179260Sjb */ 29179260Sjb 30179260Sjb/* 31179260Sjb * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 32179260Sjb * Use is subject to license terms. 33179260Sjb */ 34179260Sjb 35179260Sjb/* 36179260Sjb * The Cyclic Subsystem 37179260Sjb * -------------------- 38179260Sjb * 39179260Sjb * Prehistory 40179260Sjb * 41179260Sjb * Historically, most computer architectures have specified interval-based 42179260Sjb * timer parts (e.g. SPARCstation's counter/timer; Intel's i8254). While 43179260Sjb * these parts deal in relative (i.e. not absolute) time values, they are 44179260Sjb * typically used by the operating system to implement the abstraction of 45179260Sjb * absolute time. As a result, these parts cannot typically be reprogrammed 46179260Sjb * without introducing error in the system's notion of time. 47179260Sjb * 48179260Sjb * Starting in about 1994, chip architectures began specifying high resolution 49179260Sjb * timestamp registers. As of this writing (1999), all major chip families 50179260Sjb * (UltraSPARC, PentiumPro, MIPS, PowerPC, Alpha) have high resolution 51179260Sjb * timestamp registers, and two (UltraSPARC and MIPS) have added the capacity 52179260Sjb * to interrupt based on timestamp values. These timestamp-compare registers 53179260Sjb * present a time-based interrupt source which can be reprogrammed arbitrarily 54179260Sjb * often without introducing error. Given the low cost of implementing such a 55179260Sjb * timestamp-compare register (and the tangible benefit of eliminating 56179260Sjb * discrete timer parts), it is reasonable to expect that future chip 57179260Sjb * architectures will adopt this feature. 58179260Sjb * 59179260Sjb * The cyclic subsystem has been designed to take advantage of chip 60179260Sjb * architectures with the capacity to interrupt based on absolute, high 61179260Sjb * resolution values of time. 62179260Sjb * 63179260Sjb * Subsystem Overview 64179260Sjb * 65179260Sjb * The cyclic subsystem is a low-level kernel subsystem designed to provide 66179260Sjb * arbitrarily high resolution, per-CPU interval timers (to avoid colliding 67179260Sjb * with existing terms, we dub such an interval timer a "cyclic"). 68179260Sjb * Alternatively, a cyclic may be specified to be "omnipresent", denoting 69179260Sjb * firing on all online CPUs. 70179260Sjb * 71179260Sjb * Cyclic Subsystem Interface Overview 72179260Sjb * ----------------------------------- 73179260Sjb * 74179260Sjb * The cyclic subsystem has interfaces with the kernel at-large, with other 75179260Sjb * kernel subsystems (e.g. the processor management subsystem, the checkpoint 76179260Sjb * resume subsystem) and with the platform (the cyclic backend). Each 77179260Sjb * of these interfaces is given a brief synopsis here, and is described 78179260Sjb * in full above the interface's implementation. 79179260Sjb * 80179260Sjb * The following diagram displays the cyclic subsystem's interfaces to 81179260Sjb * other kernel components. The arrows denote a "calls" relationship, with 82179260Sjb * the large arrow indicating the cyclic subsystem's consumer interface. 83179260Sjb * Each arrow is labeled with the section in which the corresponding 84179260Sjb * interface is described. 85179260Sjb * 86179260Sjb * Kernel at-large consumers 87179260Sjb * -----------++------------ 88179260Sjb * || 89179260Sjb * || 90179260Sjb * _||_ 91179260Sjb * \ / 92179260Sjb * \/ 93179260Sjb * +---------------------+ 94179260Sjb * | | 95179260Sjb * | Cyclic subsystem |<----------- Other kernel subsystems 96179260Sjb * | | 97179260Sjb * +---------------------+ 98179260Sjb * ^ | 99179260Sjb * | | 100179260Sjb * | | 101179260Sjb * | v 102179260Sjb * +---------------------+ 103179260Sjb * | | 104179260Sjb * | Cyclic backend | 105179260Sjb * | (platform specific) | 106179260Sjb * | | 107179260Sjb * +---------------------+ 108179260Sjb * 109179260Sjb * 110179260Sjb * Kernel At-Large Interfaces 111179260Sjb * 112179260Sjb * cyclic_add() <-- Creates a cyclic 113179260Sjb * cyclic_add_omni() <-- Creates an omnipresent cyclic 114179260Sjb * cyclic_remove() <-- Removes a cyclic 115179260Sjb * 116179260Sjb * Backend Interfaces 117179260Sjb * 118179260Sjb * cyclic_init() <-- Initializes the cyclic subsystem 119179260Sjb * cyclic_fire() <-- Interrupt entry point 120179260Sjb * 121179260Sjb * The backend-supplied interfaces (through the cyc_backend structure) are 122179260Sjb * documented in detail in <sys/cyclic_impl.h> 123179260Sjb * 124179260Sjb * 125179260Sjb * Cyclic Subsystem Implementation Overview 126179260Sjb * ---------------------------------------- 127179260Sjb * 128179260Sjb * The cyclic subsystem is designed to minimize interference between cyclics 129179260Sjb * on different CPUs. Thus, all of the cyclic subsystem's data structures 130179260Sjb * hang off of a per-CPU structure, cyc_cpu. 131179260Sjb * 132179260Sjb * Each cyc_cpu has a power-of-two sized array of cyclic structures (the 133179260Sjb * cyp_cyclics member of the cyc_cpu structure). If cyclic_add() is called 134179260Sjb * and there does not exist a free slot in the cyp_cyclics array, the size of 135179260Sjb * the array will be doubled. The array will never shrink. Cyclics are 136179260Sjb * referred to by their index in the cyp_cyclics array, which is of type 137179260Sjb * cyc_index_t. 138179260Sjb * 139179260Sjb * The cyclics are kept sorted by expiration time in the cyc_cpu's heap. The 140179260Sjb * heap is keyed by cyclic expiration time, with parents expiring earlier 141179260Sjb * than their children. 142179260Sjb * 143179260Sjb * Heap Management 144179260Sjb * 145179260Sjb * The heap is managed primarily by cyclic_fire(). Upon entry, cyclic_fire() 146179260Sjb * compares the root cyclic's expiration time to the current time. If the 147179260Sjb * expiration time is in the past, cyclic_expire() is called on the root 148179260Sjb * cyclic. Upon return from cyclic_expire(), the cyclic's new expiration time 149179260Sjb * is derived by adding its interval to its old expiration time, and a 150179260Sjb * downheap operation is performed. After the downheap, cyclic_fire() 151179260Sjb * examines the (potentially changed) root cyclic, repeating the 152179260Sjb * cyclic_expire()/add interval/cyclic_downheap() sequence until the root 153179260Sjb * cyclic has an expiration time in the future. This expiration time 154179260Sjb * (guaranteed to be the earliest in the heap) is then communicated to the 155179260Sjb * backend via cyb_reprogram. Optimal backends will next call cyclic_fire() 156179260Sjb * shortly after the root cyclic's expiration time. 157179260Sjb * 158179260Sjb * To allow efficient, deterministic downheap operations, we implement the 159179260Sjb * heap as an array (the cyp_heap member of the cyc_cpu structure), with each 160179260Sjb * element containing an index into the CPU's cyp_cyclics array. 161179260Sjb * 162179260Sjb * The heap is laid out in the array according to the following: 163179260Sjb * 164179260Sjb * 1. The root of the heap is always in the 0th element of the heap array 165179260Sjb * 2. The left and right children of the nth element are element 166179260Sjb * (((n + 1) << 1) - 1) and element ((n + 1) << 1), respectively. 167179260Sjb * 168179260Sjb * This layout is standard (see, e.g., Cormen's "Algorithms"); the proof 169179260Sjb * that these constraints correctly lay out a heap (or indeed, any binary 170179260Sjb * tree) is trivial and left to the reader. 171179260Sjb * 172179260Sjb * To see the heap by example, assume our cyclics array has the following 173179260Sjb * members (at time t): 174179260Sjb * 175179260Sjb * cy_handler cy_expire 176179260Sjb * --------------------------------------------- 177179260Sjb * [ 0] clock() t+10000000 178179260Sjb * [ 1] deadman() t+1000000000 179179260Sjb * [ 2] clock_highres_fire() t+100 180179260Sjb * [ 3] clock_highres_fire() t+1000 181179260Sjb * [ 4] clock_highres_fire() t+500 182179260Sjb * [ 5] (free) -- 183179260Sjb * [ 6] (free) -- 184179260Sjb * [ 7] (free) -- 185179260Sjb * 186179260Sjb * The heap array could be: 187179260Sjb * 188179260Sjb * [0] [1] [2] [3] [4] [5] [6] [7] 189179260Sjb * +-----+-----+-----+-----+-----+-----+-----+-----+ 190179260Sjb * | | | | | | | | | 191179260Sjb * | 2 | 3 | 4 | 0 | 1 | x | x | x | 192179260Sjb * | | | | | | | | | 193179260Sjb * +-----+-----+-----+-----+-----+-----+-----+-----+ 194179260Sjb * 195179260Sjb * Graphically, this array corresponds to the following (excuse the ASCII art): 196179260Sjb * 197179260Sjb * 2 198179260Sjb * | 199179260Sjb * +------------------+------------------+ 200179260Sjb * 3 4 201179260Sjb * | 202179260Sjb * +---------+--------+ 203179260Sjb * 0 1 204179260Sjb * 205179260Sjb * Note that the heap is laid out by layer: all nodes at a given depth are 206179260Sjb * stored in consecutive elements of the array. Moreover, layers of 207179260Sjb * consecutive depths are in adjacent element ranges. This property 208179260Sjb * guarantees high locality of reference during downheap operations. 209179260Sjb * Specifically, we are guaranteed that we can downheap to a depth of 210179260Sjb * 211179260Sjb * lg (cache_line_size / sizeof (cyc_index_t)) 212179260Sjb * 213179260Sjb * nodes with at most one cache miss. On UltraSPARC (64 byte e-cache line 214179260Sjb * size), this corresponds to a depth of four nodes. Thus, if there are 215179260Sjb * fewer than sixteen cyclics in the heap, downheaps on UltraSPARC miss at 216179260Sjb * most once in the e-cache. 217179260Sjb * 218179260Sjb * Downheaps are required to compare siblings as they proceed down the 219179260Sjb * heap. For downheaps proceeding beyond the one-cache-miss depth, every 220179260Sjb * access to a left child could potentially miss in the cache. However, 221179260Sjb * if we assume 222179260Sjb * 223179260Sjb * (cache_line_size / sizeof (cyc_index_t)) > 2, 224179260Sjb * 225179260Sjb * then all siblings are guaranteed to be on the same cache line. Thus, the 226179260Sjb * miss on the left child will guarantee a hit on the right child; downheaps 227179260Sjb * will incur at most one cache miss per layer beyond the one-cache-miss 228179260Sjb * depth. The total number of cache misses for heap management during a 229179260Sjb * downheap operation is thus bounded by 230179260Sjb * 231179260Sjb * lg (n) - lg (cache_line_size / sizeof (cyc_index_t)) 232179260Sjb * 233179260Sjb * Traditional pointer-based heaps are implemented without regard to 234179260Sjb * locality. Downheaps can thus incur two cache misses per layer (one for 235179260Sjb * each child), but at most one cache miss at the root. This yields a bound 236179260Sjb * of 237179260Sjb * 238179260Sjb * 2 * lg (n) - 1 239179260Sjb * 240179260Sjb * on the total cache misses. 241179260Sjb * 242179260Sjb * This difference may seem theoretically trivial (the difference is, after 243179260Sjb * all, constant), but can become substantial in practice -- especially for 244179260Sjb * caches with very large cache lines and high miss penalties (e.g. TLBs). 245179260Sjb * 246179260Sjb * Heaps must always be full, balanced trees. Heap management must therefore 247179260Sjb * track the next point-of-insertion into the heap. In pointer-based heaps, 248179260Sjb * recomputing this point takes O(lg (n)). Given the layout of the 249179260Sjb * array-based implementation, however, the next point-of-insertion is 250179260Sjb * always: 251179260Sjb * 252179260Sjb * heap[number_of_elements] 253179260Sjb * 254179260Sjb * We exploit this property by implementing the free-list in the usused 255179260Sjb * heap elements. Heap insertion, therefore, consists only of filling in 256179260Sjb * the cyclic at cyp_cyclics[cyp_heap[number_of_elements]], incrementing 257179260Sjb * the number of elements, and performing an upheap. Heap deletion consists 258179260Sjb * of decrementing the number of elements, swapping the to-be-deleted element 259179260Sjb * with the element at cyp_heap[number_of_elements], and downheaping. 260179260Sjb * 261179260Sjb * Filling in more details in our earlier example: 262179260Sjb * 263179260Sjb * +--- free list head 264179260Sjb * | 265179260Sjb * V 266179260Sjb * 267179260Sjb * [0] [1] [2] [3] [4] [5] [6] [7] 268179260Sjb * +-----+-----+-----+-----+-----+-----+-----+-----+ 269179260Sjb * | | | | | | | | | 270179260Sjb * | 2 | 3 | 4 | 0 | 1 | 5 | 6 | 7 | 271179260Sjb * | | | | | | | | | 272179260Sjb * +-----+-----+-----+-----+-----+-----+-----+-----+ 273179260Sjb * 274179260Sjb * To insert into this heap, we would just need to fill in the cyclic at 275179260Sjb * cyp_cyclics[5], bump the number of elements (from 5 to 6) and perform 276179260Sjb * an upheap. 277179260Sjb * 278179260Sjb * If we wanted to remove, say, cyp_cyclics[3], we would first scan for it 279179260Sjb * in the cyp_heap, and discover it at cyp_heap[1]. We would then decrement 280179260Sjb * the number of elements (from 5 to 4), swap cyp_heap[1] with cyp_heap[4], 281179260Sjb * and perform a downheap from cyp_heap[1]. The linear scan is required 282179260Sjb * because the cyclic does not keep a backpointer into the heap. This makes 283179260Sjb * heap manipulation (e.g. downheaps) faster at the expense of removal 284179260Sjb * operations. 285179260Sjb * 286179260Sjb * Expiry processing 287179260Sjb * 288179260Sjb * As alluded to above, cyclic_expire() is called by cyclic_fire() to expire 289179260Sjb * a cyclic. Cyclic subsystem consumers are guaranteed that for an arbitrary 290179260Sjb * time t in the future, their cyclic handler will have been called 291179260Sjb * (t - cyt_when) / cyt_interval times. cyclic_expire() simply needs to call 292179260Sjb * the handler. 293179260Sjb * 294179260Sjb * Resizing 295179260Sjb * 296179260Sjb * All of the discussion thus far has assumed a static number of cyclics. 297179260Sjb * Obviously, static limitations are not practical; we need the capacity 298179260Sjb * to resize our data structures dynamically. 299179260Sjb * 300179260Sjb * We resize our data structures lazily, and only on a per-CPU basis. 301179260Sjb * The size of the data structures always doubles and never shrinks. We 302179260Sjb * serialize adds (and thus resizes) on cpu_lock; we never need to deal 303179260Sjb * with concurrent resizes. Resizes should be rare; they may induce jitter 304179260Sjb * on the CPU being resized, but should not affect cyclic operation on other 305179260Sjb * CPUs. 306179260Sjb * 307179260Sjb * Three key cyc_cpu data structures need to be resized: the cyclics array, 308179260Sjb * nad the heap array. Resizing is relatively straightforward: 309179260Sjb * 310179260Sjb * 1. The new, larger arrays are allocated in cyclic_expand() (called 311179260Sjb * from cyclic_add()). 312179260Sjb * 2. The contents of the old arrays are copied into the new arrays. 313179260Sjb * 3. The old cyclics array is bzero()'d 314179260Sjb * 4. The pointers are updated. 315179260Sjb * 316179260Sjb * Removals 317179260Sjb * 318179260Sjb * Cyclic removals should be rare. To simplify the implementation (and to 319179260Sjb * allow optimization for the cyclic_fire()/cyclic_expire() 320179260Sjb * path), we force removals and adds to serialize on cpu_lock. 321179260Sjb * 322179260Sjb */ 323179260Sjb#include <sys/cdefs.h> 324179260Sjb#include <sys/param.h> 325179260Sjb#include <sys/conf.h> 326179260Sjb#include <sys/kernel.h> 327179260Sjb#include <sys/lock.h> 328179260Sjb#include <sys/sx.h> 329179260Sjb#include <sys/cyclic_impl.h> 330179260Sjb#include <sys/module.h> 331179260Sjb#include <sys/systm.h> 332179260Sjb#include <sys/atomic.h> 333179260Sjb#include <sys/kmem.h> 334179260Sjb#include <sys/cmn_err.h> 335179260Sjb#include <sys/dtrace_bsd.h> 336179260Sjb#include <machine/cpu.h> 337179260Sjb 338179260Sjbstatic kmem_cache_t *cyclic_id_cache; 339179260Sjbstatic cyc_id_t *cyclic_id_head; 340179260Sjbstatic cyc_backend_t cyclic_backend; 341179260Sjb 342249132Smavstatic MALLOC_DEFINE(M_CYCLIC, "cyclic", "Cyclic timer subsystem"); 343179260Sjb 344221990Savgstatic __inline hrtime_t 345221990Savgcyc_gethrtime(void) 346221990Savg{ 347221990Savg struct bintime bt; 348221990Savg 349221990Savg binuptime(&bt); 350221990Savg return ((hrtime_t)bt.sec * NANOSEC + 351221990Savg (((uint64_t)NANOSEC * (uint32_t)(bt.frac >> 32)) >> 32)); 352221990Savg} 353221990Savg 354179260Sjb/* 355179260Sjb * Returns 1 if the upheap propagated to the root, 0 if it did not. This 356179260Sjb * allows the caller to reprogram the backend only when the root has been 357179260Sjb * modified. 358179260Sjb */ 359179260Sjbstatic int 360179260Sjbcyclic_upheap(cyc_cpu_t *cpu, cyc_index_t ndx) 361179260Sjb{ 362179260Sjb cyclic_t *cyclics; 363179260Sjb cyc_index_t *heap; 364179260Sjb cyc_index_t heap_parent, heap_current = ndx; 365179260Sjb cyc_index_t parent, current; 366179260Sjb 367179260Sjb if (heap_current == 0) 368179260Sjb return (1); 369179260Sjb 370179260Sjb heap = cpu->cyp_heap; 371179260Sjb cyclics = cpu->cyp_cyclics; 372179260Sjb heap_parent = CYC_HEAP_PARENT(heap_current); 373179260Sjb 374179260Sjb for (;;) { 375179260Sjb current = heap[heap_current]; 376179260Sjb parent = heap[heap_parent]; 377179260Sjb 378179260Sjb /* 379179260Sjb * We have an expiration time later than our parent; we're 380179260Sjb * done. 381179260Sjb */ 382179260Sjb if (cyclics[current].cy_expire >= cyclics[parent].cy_expire) 383179260Sjb return (0); 384179260Sjb 385179260Sjb /* 386179260Sjb * We need to swap with our parent, and continue up the heap. 387179260Sjb */ 388179260Sjb heap[heap_parent] = current; 389179260Sjb heap[heap_current] = parent; 390179260Sjb 391179260Sjb /* 392179260Sjb * If we just reached the root, we're done. 393179260Sjb */ 394179260Sjb if (heap_parent == 0) 395179260Sjb return (1); 396179260Sjb 397179260Sjb heap_current = heap_parent; 398179260Sjb heap_parent = CYC_HEAP_PARENT(heap_current); 399179260Sjb } 400179260Sjb} 401179260Sjb 402179260Sjbstatic void 403179260Sjbcyclic_downheap(cyc_cpu_t *cpu, cyc_index_t ndx) 404179260Sjb{ 405179260Sjb cyclic_t *cyclics = cpu->cyp_cyclics; 406179260Sjb cyc_index_t *heap = cpu->cyp_heap; 407179260Sjb 408179260Sjb cyc_index_t heap_left, heap_right, heap_me = ndx; 409179260Sjb cyc_index_t left, right, me; 410179260Sjb cyc_index_t nelems = cpu->cyp_nelems; 411179260Sjb 412179260Sjb for (;;) { 413179260Sjb /* 414179260Sjb * If we don't have a left child (i.e., we're a leaf), we're 415179260Sjb * done. 416179260Sjb */ 417179260Sjb if ((heap_left = CYC_HEAP_LEFT(heap_me)) >= nelems) 418179260Sjb return; 419179260Sjb 420179260Sjb left = heap[heap_left]; 421179260Sjb me = heap[heap_me]; 422179260Sjb 423179260Sjb heap_right = CYC_HEAP_RIGHT(heap_me); 424179260Sjb 425179260Sjb /* 426179260Sjb * Even if we don't have a right child, we still need to compare 427179260Sjb * our expiration time against that of our left child. 428179260Sjb */ 429179260Sjb if (heap_right >= nelems) 430179260Sjb goto comp_left; 431179260Sjb 432179260Sjb right = heap[heap_right]; 433179260Sjb 434179260Sjb /* 435179260Sjb * We have both a left and a right child. We need to compare 436179260Sjb * the expiration times of the children to determine which 437179260Sjb * expires earlier. 438179260Sjb */ 439179260Sjb if (cyclics[right].cy_expire < cyclics[left].cy_expire) { 440179260Sjb /* 441179260Sjb * Our right child is the earlier of our children. 442179260Sjb * We'll now compare our expiration time to its; if 443179260Sjb * ours is the earlier, we're done. 444179260Sjb */ 445179260Sjb if (cyclics[me].cy_expire <= cyclics[right].cy_expire) 446179260Sjb return; 447179260Sjb 448179260Sjb /* 449179260Sjb * Our right child expires earlier than we do; swap 450179260Sjb * with our right child, and descend right. 451179260Sjb */ 452179260Sjb heap[heap_right] = me; 453179260Sjb heap[heap_me] = right; 454179260Sjb heap_me = heap_right; 455179260Sjb continue; 456179260Sjb } 457179260Sjb 458179260Sjbcomp_left: 459179260Sjb /* 460179260Sjb * Our left child is the earlier of our children (or we have 461179260Sjb * no right child). We'll now compare our expiration time 462179260Sjb * to its; if ours is the earlier, we're done. 463179260Sjb */ 464179260Sjb if (cyclics[me].cy_expire <= cyclics[left].cy_expire) 465179260Sjb return; 466179260Sjb 467179260Sjb /* 468179260Sjb * Our left child expires earlier than we do; swap with our 469179260Sjb * left child, and descend left. 470179260Sjb */ 471179260Sjb heap[heap_left] = me; 472179260Sjb heap[heap_me] = left; 473179260Sjb heap_me = heap_left; 474179260Sjb } 475179260Sjb} 476179260Sjb 477179260Sjbstatic void 478179260Sjbcyclic_expire(cyc_cpu_t *cpu, cyc_index_t ndx, cyclic_t *cyclic) 479179260Sjb{ 480179260Sjb cyc_func_t handler = cyclic->cy_handler; 481179260Sjb void *arg = cyclic->cy_arg; 482179260Sjb 483179260Sjb (*handler)(arg); 484179260Sjb} 485179260Sjb 486179260Sjb/* 487179260Sjb * cyclic_fire(cpu_t *) 488179260Sjb * 489179260Sjb * Overview 490179260Sjb * 491179260Sjb * cyclic_fire() is the cyclic subsystem's interrupt handler. 492179260Sjb * Called by the cyclic backend. 493179260Sjb * 494179260Sjb * Arguments and notes 495179260Sjb * 496179260Sjb * The only argument is the CPU on which the interrupt is executing; 497179260Sjb * backends must call into cyclic_fire() on the specified CPU. 498179260Sjb * 499179260Sjb * cyclic_fire() may be called spuriously without ill effect. Optimal 500179260Sjb * backends will call into cyclic_fire() at or shortly after the time 501179260Sjb * requested via cyb_reprogram(). However, calling cyclic_fire() 502179260Sjb * arbitrarily late will only manifest latency bubbles; the correctness 503179260Sjb * of the cyclic subsystem does not rely on the timeliness of the backend. 504179260Sjb * 505179260Sjb * cyclic_fire() is wait-free; it will not block or spin. 506179260Sjb * 507179260Sjb * Return values 508179260Sjb * 509179260Sjb * None. 510179260Sjb * 511179260Sjb */ 512179260Sjbstatic void 513179260Sjbcyclic_fire(cpu_t *c) 514179260Sjb{ 515179260Sjb cyc_cpu_t *cpu = c->cpu_cyclic; 516216254Savg cyc_backend_t *be = cpu->cyp_backend; 517179260Sjb cyc_index_t *heap = cpu->cyp_heap; 518179260Sjb cyclic_t *cyclic, *cyclics = cpu->cyp_cyclics; 519216254Savg void *arg = be->cyb_arg; 520221990Savg hrtime_t now = cyc_gethrtime(); 521179260Sjb hrtime_t exp; 522179260Sjb 523179260Sjb if (cpu->cyp_nelems == 0) { 524179260Sjb /* This is a spurious fire. */ 525179260Sjb return; 526179260Sjb } 527179260Sjb 528179260Sjb for (;;) { 529179260Sjb cyc_index_t ndx = heap[0]; 530179260Sjb 531179260Sjb cyclic = &cyclics[ndx]; 532179260Sjb 533179260Sjb ASSERT(!(cyclic->cy_flags & CYF_FREE)); 534179260Sjb 535179260Sjb if ((exp = cyclic->cy_expire) > now) 536179260Sjb break; 537179260Sjb 538179260Sjb cyclic_expire(cpu, ndx, cyclic); 539179260Sjb 540179260Sjb /* 541179260Sjb * If this cyclic will be set to next expire in the distant 542179260Sjb * past, we have one of two situations: 543179260Sjb * 544179260Sjb * a) This is the first firing of a cyclic which had 545179260Sjb * cy_expire set to 0. 546179260Sjb * 547179260Sjb * b) We are tragically late for a cyclic -- most likely 548179260Sjb * due to being in the debugger. 549179260Sjb * 550179260Sjb * In either case, we set the new expiration time to be the 551179260Sjb * the next interval boundary. This assures that the 552179260Sjb * expiration time modulo the interval is invariant. 553179260Sjb * 554179260Sjb * We arbitrarily define "distant" to be one second (one second 555179260Sjb * is chosen because it's shorter than any foray to the 556179260Sjb * debugger while still being longer than any legitimate 557179260Sjb * stretch). 558179260Sjb */ 559179260Sjb exp += cyclic->cy_interval; 560179260Sjb 561179260Sjb if (now - exp > NANOSEC) { 562179260Sjb hrtime_t interval = cyclic->cy_interval; 563179260Sjb 564179260Sjb exp += ((now - exp) / interval + 1) * interval; 565179260Sjb } 566179260Sjb 567179260Sjb cyclic->cy_expire = exp; 568179260Sjb cyclic_downheap(cpu, 0); 569179260Sjb } 570179260Sjb 571179260Sjb /* 572179260Sjb * Now we have a cyclic in the root slot which isn't in the past; 573179260Sjb * reprogram the interrupt source. 574179260Sjb */ 575216254Savg be->cyb_reprogram(arg, exp); 576216254Savg} 577179260Sjb 578216254Savgstatic void 579216254Savgcyclic_expand_xcall(cyc_xcallarg_t *arg) 580216254Savg{ 581216254Savg cyc_cpu_t *cpu = arg->cyx_cpu; 582216254Savg cyc_index_t new_size = arg->cyx_size, size = cpu->cyp_size, i; 583216254Savg cyc_index_t *new_heap = arg->cyx_heap; 584216254Savg cyclic_t *cyclics = cpu->cyp_cyclics, *new_cyclics = arg->cyx_cyclics; 585216254Savg 586216254Savg /* Disable preemption and interrupts. */ 587216254Savg mtx_lock_spin(&cpu->cyp_mtx); 588216254Savg 589216254Savg /* 590216254Savg * Assert that the new size is a power of 2. 591216254Savg */ 592216254Savg ASSERT((new_size & (new_size - 1)) == 0); 593216254Savg ASSERT(new_size == (size << 1)); 594216254Savg ASSERT(cpu->cyp_heap != NULL && cpu->cyp_cyclics != NULL); 595216254Savg 596216254Savg bcopy(cpu->cyp_heap, new_heap, sizeof (cyc_index_t) * size); 597216254Savg bcopy(cyclics, new_cyclics, sizeof (cyclic_t) * size); 598216254Savg 599216254Savg /* 600216254Savg * Set up the free list, and set all of the new cyclics to be CYF_FREE. 601216254Savg */ 602216254Savg for (i = size; i < new_size; i++) { 603216254Savg new_heap[i] = i; 604216254Savg new_cyclics[i].cy_flags = CYF_FREE; 605216254Savg } 606216254Savg 607216254Savg /* 608216254Savg * We can go ahead and plow the value of cyp_heap and cyp_cyclics; 609216254Savg * cyclic_expand() has kept a copy. 610216254Savg */ 611216254Savg cpu->cyp_heap = new_heap; 612216254Savg cpu->cyp_cyclics = new_cyclics; 613216254Savg cpu->cyp_size = new_size; 614179260Sjb mtx_unlock_spin(&cpu->cyp_mtx); 615179260Sjb} 616179260Sjb 617179260Sjb/* 618179260Sjb * cyclic_expand() will cross call onto the CPU to perform the actual 619179260Sjb * expand operation. 620179260Sjb */ 621179260Sjbstatic void 622179260Sjbcyclic_expand(cyc_cpu_t *cpu) 623179260Sjb{ 624216254Savg cyc_index_t new_size, old_size; 625179260Sjb cyc_index_t *new_heap, *old_heap; 626179260Sjb cyclic_t *new_cyclics, *old_cyclics; 627216254Savg cyc_xcallarg_t arg; 628216254Savg cyc_backend_t *be = cpu->cyp_backend; 629179260Sjb 630179260Sjb ASSERT(MUTEX_HELD(&cpu_lock)); 631179260Sjb 632216254Savg old_heap = cpu->cyp_heap; 633216254Savg old_cyclics = cpu->cyp_cyclics; 634216254Savg 635216254Savg if ((new_size = ((old_size = cpu->cyp_size) << 1)) == 0) { 636179260Sjb new_size = CY_DEFAULT_PERCPU; 637216254Savg ASSERT(old_heap == NULL && old_cyclics == NULL); 638216254Savg } 639179260Sjb 640179260Sjb /* 641179260Sjb * Check that the new_size is a power of 2. 642179260Sjb */ 643179260Sjb ASSERT(((new_size - 1) & new_size) == 0); 644179260Sjb 645179260Sjb new_heap = malloc(sizeof(cyc_index_t) * new_size, M_CYCLIC, M_WAITOK); 646179260Sjb new_cyclics = malloc(sizeof(cyclic_t) * new_size, M_CYCLIC, M_ZERO | M_WAITOK); 647179260Sjb 648216254Savg arg.cyx_cpu = cpu; 649216254Savg arg.cyx_heap = new_heap; 650216254Savg arg.cyx_cyclics = new_cyclics; 651216254Savg arg.cyx_size = new_size; 652179260Sjb 653216254Savg be->cyb_xcall(be->cyb_arg, cpu->cyp_cpu, 654216254Savg (cyc_func_t)cyclic_expand_xcall, &arg); 655179260Sjb 656179260Sjb if (old_cyclics != NULL) { 657179260Sjb ASSERT(old_heap != NULL); 658179260Sjb ASSERT(old_size != 0); 659179260Sjb free(old_cyclics, M_CYCLIC); 660179260Sjb free(old_heap, M_CYCLIC); 661179260Sjb } 662179260Sjb} 663179260Sjb 664216254Savgstatic void 665216254Savgcyclic_add_xcall(cyc_xcallarg_t *arg) 666179260Sjb{ 667216254Savg cyc_cpu_t *cpu = arg->cyx_cpu; 668216254Savg cyc_handler_t *hdlr = arg->cyx_hdlr; 669216254Savg cyc_time_t *when = arg->cyx_when; 670216254Savg cyc_backend_t *be = cpu->cyp_backend; 671179260Sjb cyc_index_t ndx, nelems; 672216254Savg cyb_arg_t bar = be->cyb_arg; 673179260Sjb cyclic_t *cyclic; 674179260Sjb 675216254Savg ASSERT(cpu->cyp_nelems < cpu->cyp_size); 676179260Sjb 677216254Savg /* Disable preemption and interrupts. */ 678179260Sjb mtx_lock_spin(&cpu->cyp_mtx); 679179260Sjb nelems = cpu->cyp_nelems++; 680179260Sjb 681216254Savg if (nelems == 0) { 682179260Sjb /* 683179260Sjb * If this is the first element, we need to enable the 684179260Sjb * backend on this CPU. 685179260Sjb */ 686216254Savg be->cyb_enable(bar); 687216254Savg } 688179260Sjb 689179260Sjb ndx = cpu->cyp_heap[nelems]; 690179260Sjb cyclic = &cpu->cyp_cyclics[ndx]; 691179260Sjb 692179260Sjb ASSERT(cyclic->cy_flags == CYF_FREE); 693179260Sjb cyclic->cy_interval = when->cyt_interval; 694179260Sjb 695216254Savg if (when->cyt_when == 0) { 696216254Savg /* 697216254Savg * If a start time hasn't been explicitly specified, we'll 698216254Savg * start on the next interval boundary. 699216254Savg */ 700221990Savg cyclic->cy_expire = (cyc_gethrtime() / cyclic->cy_interval + 1) * 701216254Savg cyclic->cy_interval; 702216254Savg } else { 703179260Sjb cyclic->cy_expire = when->cyt_when; 704216254Savg } 705179260Sjb 706179260Sjb cyclic->cy_handler = hdlr->cyh_func; 707179260Sjb cyclic->cy_arg = hdlr->cyh_arg; 708216254Savg cyclic->cy_flags = arg->cyx_flags; 709179260Sjb 710179260Sjb if (cyclic_upheap(cpu, nelems)) { 711179260Sjb hrtime_t exp = cyclic->cy_expire; 712179260Sjb 713179260Sjb /* 714179260Sjb * If our upheap propagated to the root, we need to 715179260Sjb * reprogram the interrupt source. 716179260Sjb */ 717216254Savg be->cyb_reprogram(bar, exp); 718179260Sjb } 719179260Sjb mtx_unlock_spin(&cpu->cyp_mtx); 720179260Sjb 721216254Savg arg->cyx_ndx = ndx; 722179260Sjb} 723179260Sjb 724216254Savgstatic cyc_index_t 725216254Savgcyclic_add_here(cyc_cpu_t *cpu, cyc_handler_t *hdlr, 726216254Savg cyc_time_t *when, uint16_t flags) 727179260Sjb{ 728216254Savg cyc_backend_t *be = cpu->cyp_backend; 729216254Savg cyb_arg_t bar = be->cyb_arg; 730216254Savg cyc_xcallarg_t arg; 731179260Sjb 732179260Sjb ASSERT(MUTEX_HELD(&cpu_lock)); 733216254Savg ASSERT(!(cpu->cyp_cpu->cpu_flags & CPU_OFFLINE)); 734216254Savg ASSERT(when->cyt_when >= 0 && when->cyt_interval > 0); 735179260Sjb 736216254Savg if (cpu->cyp_nelems == cpu->cyp_size) { 737216254Savg /* 738216254Savg * This is expensive; it will cross call onto the other 739216254Savg * CPU to perform the expansion. 740216254Savg */ 741216254Savg cyclic_expand(cpu); 742216254Savg ASSERT(cpu->cyp_nelems < cpu->cyp_size); 743216254Savg } 744179260Sjb 745216254Savg /* 746216254Savg * By now, we know that we're going to be able to successfully 747216254Savg * perform the add. Now cross call over to the CPU of interest to 748216254Savg * actually add our cyclic. 749216254Savg */ 750216254Savg arg.cyx_cpu = cpu; 751216254Savg arg.cyx_hdlr = hdlr; 752216254Savg arg.cyx_when = when; 753216254Savg arg.cyx_flags = flags; 754179260Sjb 755216254Savg be->cyb_xcall(bar, cpu->cyp_cpu, (cyc_func_t)cyclic_add_xcall, &arg); 756179260Sjb 757216254Savg return (arg.cyx_ndx); 758216254Savg} 759216254Savg 760216254Savgstatic void 761216254Savgcyclic_remove_xcall(cyc_xcallarg_t *arg) 762216254Savg{ 763216254Savg cyc_cpu_t *cpu = arg->cyx_cpu; 764216254Savg cyc_backend_t *be = cpu->cyp_backend; 765216254Savg cyb_arg_t bar = be->cyb_arg; 766216254Savg cyc_index_t ndx = arg->cyx_ndx, nelems = cpu->cyp_nelems, i; 767216254Savg cyc_index_t *heap = cpu->cyp_heap, last; 768216254Savg cyclic_t *cyclic; 769216254Savg 770216254Savg ASSERT(nelems > 0); 771216254Savg 772216254Savg /* Disable preemption and interrupts. */ 773216254Savg mtx_lock_spin(&cpu->cyp_mtx); 774179260Sjb cyclic = &cpu->cyp_cyclics[ndx]; 775179260Sjb 776179260Sjb /* 777179260Sjb * Grab the current expiration time. If this cyclic is being 778179260Sjb * removed as part of a juggling operation, the expiration time 779179260Sjb * will be used when the cyclic is added to the new CPU. 780179260Sjb */ 781216254Savg if (arg->cyx_when != NULL) { 782216254Savg arg->cyx_when->cyt_when = cyclic->cy_expire; 783216254Savg arg->cyx_when->cyt_interval = cyclic->cy_interval; 784179260Sjb } 785179260Sjb 786216254Savg /* 787216254Savg * Now set the flags to CYF_FREE. We don't need a membar_enter() 788216254Savg * between zeroing pend and setting the flags because we're at 789216254Savg * CY_HIGH_LEVEL (that is, the zeroing of pend and the setting 790216254Savg * of cy_flags appear atomic to softints). 791216254Savg */ 792179260Sjb cyclic->cy_flags = CYF_FREE; 793179260Sjb 794179260Sjb for (i = 0; i < nelems; i++) { 795179260Sjb if (heap[i] == ndx) 796179260Sjb break; 797179260Sjb } 798179260Sjb 799179260Sjb if (i == nelems) 800179260Sjb panic("attempt to remove non-existent cyclic"); 801179260Sjb 802179260Sjb cpu->cyp_nelems = --nelems; 803179260Sjb 804216254Savg if (nelems == 0) { 805179260Sjb /* 806179260Sjb * If we just removed the last element, then we need to 807179260Sjb * disable the backend on this CPU. 808179260Sjb */ 809216254Savg be->cyb_disable(bar); 810216254Savg } 811179260Sjb 812216254Savg if (i == nelems) { 813179260Sjb /* 814179260Sjb * If we just removed the last element of the heap, then 815179260Sjb * we don't have to downheap. 816179260Sjb */ 817216254Savg goto out; 818216254Savg } 819179260Sjb 820179260Sjb /* 821179260Sjb * Swap the last element of the heap with the one we want to 822179260Sjb * remove, and downheap (this has the implicit effect of putting 823179260Sjb * the newly freed element on the free list). 824179260Sjb */ 825179260Sjb heap[i] = (last = heap[nelems]); 826179260Sjb heap[nelems] = ndx; 827179260Sjb 828216254Savg if (i == 0) { 829179260Sjb cyclic_downheap(cpu, 0); 830216254Savg } else { 831179260Sjb if (cyclic_upheap(cpu, i) == 0) { 832179260Sjb /* 833179260Sjb * The upheap didn't propagate to the root; if it 834179260Sjb * didn't propagate at all, we need to downheap. 835179260Sjb */ 836216254Savg if (heap[i] == last) { 837179260Sjb cyclic_downheap(cpu, i); 838216254Savg } 839216254Savg goto out; 840179260Sjb } 841179260Sjb } 842179260Sjb 843179260Sjb /* 844179260Sjb * We're here because we changed the root; we need to reprogram 845179260Sjb * the clock source. 846179260Sjb */ 847179260Sjb cyclic = &cpu->cyp_cyclics[heap[0]]; 848179260Sjb 849179260Sjb ASSERT(nelems != 0); 850216254Savg be->cyb_reprogram(bar, cyclic->cy_expire); 851216254Savgout: 852179260Sjb mtx_unlock_spin(&cpu->cyp_mtx); 853216254Savg} 854179260Sjb 855216254Savgstatic int 856216254Savgcyclic_remove_here(cyc_cpu_t *cpu, cyc_index_t ndx, cyc_time_t *when, int wait) 857216254Savg{ 858216254Savg cyc_backend_t *be = cpu->cyp_backend; 859216254Savg cyc_xcallarg_t arg; 860216254Savg 861216254Savg ASSERT(MUTEX_HELD(&cpu_lock)); 862216254Savg ASSERT(wait == CY_WAIT || wait == CY_NOWAIT); 863216254Savg 864216254Savg arg.cyx_ndx = ndx; 865216254Savg arg.cyx_cpu = cpu; 866216254Savg arg.cyx_when = when; 867216254Savg arg.cyx_wait = wait; 868216254Savg 869216254Savg be->cyb_xcall(be->cyb_arg, cpu->cyp_cpu, 870216254Savg (cyc_func_t)cyclic_remove_xcall, &arg); 871216254Savg 872179260Sjb return (1); 873179260Sjb} 874179260Sjb 875179260Sjbstatic void 876179260Sjbcyclic_configure(cpu_t *c) 877179260Sjb{ 878179260Sjb cyc_cpu_t *cpu = malloc(sizeof(cyc_cpu_t), M_CYCLIC, M_ZERO | M_WAITOK); 879179260Sjb cyc_backend_t *nbe = malloc(sizeof(cyc_backend_t), M_CYCLIC, M_ZERO | M_WAITOK); 880179260Sjb 881179260Sjb ASSERT(MUTEX_HELD(&cpu_lock)); 882179260Sjb 883179260Sjb if (cyclic_id_cache == NULL) 884179260Sjb cyclic_id_cache = kmem_cache_create("cyclic_id_cache", 885179260Sjb sizeof (cyc_id_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 886179260Sjb 887179260Sjb cpu->cyp_cpu = c; 888179260Sjb 889179260Sjb cpu->cyp_size = 1; 890179260Sjb cpu->cyp_heap = malloc(sizeof(cyc_index_t), M_CYCLIC, M_ZERO | M_WAITOK); 891179260Sjb cpu->cyp_cyclics = malloc(sizeof(cyclic_t), M_CYCLIC, M_ZERO | M_WAITOK); 892179260Sjb cpu->cyp_cyclics->cy_flags = CYF_FREE; 893179260Sjb 894179260Sjb mtx_init(&cpu->cyp_mtx, "cyclic cpu", NULL, MTX_SPIN); 895179260Sjb 896179260Sjb /* 897179260Sjb * Setup the backend for this CPU. 898179260Sjb */ 899179260Sjb bcopy(&cyclic_backend, nbe, sizeof (cyc_backend_t)); 900179260Sjb if (nbe->cyb_configure != NULL) 901179260Sjb nbe->cyb_arg = nbe->cyb_configure(c); 902179260Sjb cpu->cyp_backend = nbe; 903179260Sjb 904179260Sjb /* 905179260Sjb * On platforms where stray interrupts may be taken during startup, 906179260Sjb * the CPU's cpu_cyclic pointer serves as an indicator that the 907179260Sjb * cyclic subsystem for this CPU is prepared to field interrupts. 908179260Sjb */ 909179260Sjb membar_producer(); 910179260Sjb 911179260Sjb c->cpu_cyclic = cpu; 912179260Sjb} 913179260Sjb 914179260Sjbstatic void 915179260Sjbcyclic_unconfigure(cpu_t *c) 916179260Sjb{ 917179260Sjb cyc_cpu_t *cpu = c->cpu_cyclic; 918179260Sjb cyc_backend_t *be = cpu->cyp_backend; 919179260Sjb cyb_arg_t bar = be->cyb_arg; 920179260Sjb 921179260Sjb ASSERT(MUTEX_HELD(&cpu_lock)); 922179260Sjb 923179260Sjb c->cpu_cyclic = NULL; 924179260Sjb 925179260Sjb /* 926179260Sjb * Let the backend know that the CPU is being yanked, and free up 927179260Sjb * the backend structure. 928179260Sjb */ 929179260Sjb if (be->cyb_unconfigure != NULL) 930179260Sjb be->cyb_unconfigure(bar); 931179260Sjb free(be, M_CYCLIC); 932179260Sjb cpu->cyp_backend = NULL; 933179260Sjb 934179260Sjb mtx_destroy(&cpu->cyp_mtx); 935179260Sjb 936179260Sjb /* Finally, clean up our remaining dynamic structures. */ 937179260Sjb free(cpu->cyp_cyclics, M_CYCLIC); 938179260Sjb free(cpu->cyp_heap, M_CYCLIC); 939179260Sjb free(cpu, M_CYCLIC); 940179260Sjb} 941179260Sjb 942179260Sjbstatic void 943179260Sjbcyclic_omni_start(cyc_id_t *idp, cyc_cpu_t *cpu) 944179260Sjb{ 945179260Sjb cyc_omni_handler_t *omni = &idp->cyi_omni_hdlr; 946179260Sjb cyc_omni_cpu_t *ocpu = malloc(sizeof(cyc_omni_cpu_t), M_CYCLIC , M_WAITOK); 947179260Sjb cyc_handler_t hdlr; 948179260Sjb cyc_time_t when; 949179260Sjb 950179260Sjb ASSERT(MUTEX_HELD(&cpu_lock)); 951179260Sjb ASSERT(idp->cyi_cpu == NULL); 952179260Sjb 953179260Sjb hdlr.cyh_func = NULL; 954179260Sjb hdlr.cyh_arg = NULL; 955179260Sjb 956179260Sjb when.cyt_when = 0; 957179260Sjb when.cyt_interval = 0; 958179260Sjb 959179260Sjb omni->cyo_online(omni->cyo_arg, cpu->cyp_cpu, &hdlr, &when); 960179260Sjb 961179260Sjb ASSERT(hdlr.cyh_func != NULL); 962179260Sjb ASSERT(when.cyt_when >= 0 && when.cyt_interval > 0); 963179260Sjb 964179260Sjb ocpu->cyo_cpu = cpu; 965179260Sjb ocpu->cyo_arg = hdlr.cyh_arg; 966179260Sjb ocpu->cyo_ndx = cyclic_add_here(cpu, &hdlr, &when, 0); 967179260Sjb ocpu->cyo_next = idp->cyi_omni_list; 968179260Sjb idp->cyi_omni_list = ocpu; 969179260Sjb} 970179260Sjb 971179260Sjbstatic void 972179260Sjbcyclic_omni_stop(cyc_id_t *idp, cyc_cpu_t *cpu) 973179260Sjb{ 974179260Sjb cyc_omni_handler_t *omni = &idp->cyi_omni_hdlr; 975179260Sjb cyc_omni_cpu_t *ocpu = idp->cyi_omni_list, *prev = NULL; 976179260Sjb 977179260Sjb ASSERT(MUTEX_HELD(&cpu_lock)); 978179260Sjb ASSERT(idp->cyi_cpu == NULL); 979179260Sjb ASSERT(ocpu != NULL); 980179260Sjb 981179260Sjb while (ocpu != NULL && ocpu->cyo_cpu != cpu) { 982179260Sjb prev = ocpu; 983179260Sjb ocpu = ocpu->cyo_next; 984179260Sjb } 985179260Sjb 986179260Sjb /* 987179260Sjb * We _must_ have found an cyc_omni_cpu which corresponds to this 988179260Sjb * CPU -- the definition of an omnipresent cyclic is that it runs 989179260Sjb * on all online CPUs. 990179260Sjb */ 991179260Sjb ASSERT(ocpu != NULL); 992179260Sjb 993179260Sjb if (prev == NULL) { 994179260Sjb idp->cyi_omni_list = ocpu->cyo_next; 995179260Sjb } else { 996179260Sjb prev->cyo_next = ocpu->cyo_next; 997179260Sjb } 998179260Sjb 999179260Sjb (void) cyclic_remove_here(ocpu->cyo_cpu, ocpu->cyo_ndx, NULL, CY_WAIT); 1000179260Sjb 1001179260Sjb /* 1002179260Sjb * The cyclic has been removed from this CPU; time to call the 1003179260Sjb * omnipresent offline handler. 1004179260Sjb */ 1005179260Sjb if (omni->cyo_offline != NULL) 1006179260Sjb omni->cyo_offline(omni->cyo_arg, cpu->cyp_cpu, ocpu->cyo_arg); 1007179260Sjb 1008179260Sjb free(ocpu, M_CYCLIC); 1009179260Sjb} 1010179260Sjb 1011179260Sjbstatic cyc_id_t * 1012179260Sjbcyclic_new_id(void) 1013179260Sjb{ 1014179260Sjb cyc_id_t *idp; 1015179260Sjb 1016179260Sjb ASSERT(MUTEX_HELD(&cpu_lock)); 1017179260Sjb 1018179260Sjb idp = kmem_cache_alloc(cyclic_id_cache, KM_SLEEP); 1019179260Sjb 1020179260Sjb /* 1021179260Sjb * The cyi_cpu field of the cyc_id_t structure tracks the CPU 1022179260Sjb * associated with the cyclic. If and only if this field is NULL, the 1023179260Sjb * cyc_id_t is an omnipresent cyclic. Note that cyi_omni_list may be 1024179260Sjb * NULL for an omnipresent cyclic while the cyclic is being created 1025179260Sjb * or destroyed. 1026179260Sjb */ 1027179260Sjb idp->cyi_cpu = NULL; 1028179260Sjb idp->cyi_ndx = 0; 1029179260Sjb 1030179260Sjb idp->cyi_next = cyclic_id_head; 1031179260Sjb idp->cyi_prev = NULL; 1032179260Sjb idp->cyi_omni_list = NULL; 1033179260Sjb 1034179260Sjb if (cyclic_id_head != NULL) { 1035179260Sjb ASSERT(cyclic_id_head->cyi_prev == NULL); 1036179260Sjb cyclic_id_head->cyi_prev = idp; 1037179260Sjb } 1038179260Sjb 1039179260Sjb cyclic_id_head = idp; 1040179260Sjb 1041179260Sjb return (idp); 1042179260Sjb} 1043179260Sjb 1044179260Sjb/* 1045179260Sjb * cyclic_id_t cyclic_add(cyc_handler_t *, cyc_time_t *) 1046179260Sjb * 1047179260Sjb * Overview 1048179260Sjb * 1049179260Sjb * cyclic_add() will create an unbound cyclic with the specified handler and 1050179260Sjb * interval. The cyclic will run on a CPU which both has interrupts enabled 1051179260Sjb * and is in the system CPU partition. 1052179260Sjb * 1053179260Sjb * Arguments and notes 1054179260Sjb * 1055179260Sjb * As its first argument, cyclic_add() takes a cyc_handler, which has the 1056179260Sjb * following members: 1057179260Sjb * 1058179260Sjb * cyc_func_t cyh_func <-- Cyclic handler 1059179260Sjb * void *cyh_arg <-- Argument to cyclic handler 1060179260Sjb * 1061179260Sjb * In addition to a cyc_handler, cyclic_add() takes a cyc_time, which 1062179260Sjb * has the following members: 1063179260Sjb * 1064179260Sjb * hrtime_t cyt_when <-- Absolute time, in nanoseconds since boot, at 1065179260Sjb * which to start firing 1066179260Sjb * hrtime_t cyt_interval <-- Length of interval, in nanoseconds 1067179260Sjb * 1068179260Sjb * gethrtime() is the time source for nanoseconds since boot. If cyt_when 1069179260Sjb * is set to 0, the cyclic will start to fire when cyt_interval next 1070179260Sjb * divides the number of nanoseconds since boot. 1071179260Sjb * 1072179260Sjb * The cyt_interval field _must_ be filled in by the caller; one-shots are 1073179260Sjb * _not_ explicitly supported by the cyclic subsystem (cyclic_add() will 1074179260Sjb * assert that cyt_interval is non-zero). The maximum value for either 1075179260Sjb * field is INT64_MAX; the caller is responsible for assuring that 1076179260Sjb * cyt_when + cyt_interval <= INT64_MAX. Neither field may be negative. 1077179260Sjb * 1078179260Sjb * For an arbitrary time t in the future, the cyclic handler is guaranteed 1079179260Sjb * to have been called (t - cyt_when) / cyt_interval times. This will 1080179260Sjb * be true even if interrupts have been disabled for periods greater than 1081179260Sjb * cyt_interval nanoseconds. In order to compensate for such periods, 1082179260Sjb * the cyclic handler may be called a finite number of times with an 1083179260Sjb * arbitrarily small interval. 1084179260Sjb * 1085179260Sjb * The cyclic subsystem will not enforce any lower bound on the interval; 1086179260Sjb * if the interval is less than the time required to process an interrupt, 1087179260Sjb * the CPU will wedge. It's the responsibility of the caller to assure that 1088179260Sjb * either the value of the interval is sane, or that its caller has 1089179260Sjb * sufficient privilege to deny service (i.e. its caller is root). 1090179260Sjb * 1091179260Sjb * Return value 1092179260Sjb * 1093179260Sjb * cyclic_add() returns a cyclic_id_t, which is guaranteed to be a value 1094179260Sjb * other than CYCLIC_NONE. cyclic_add() cannot fail. 1095179260Sjb * 1096179260Sjb * Caller's context 1097179260Sjb * 1098179260Sjb * cpu_lock must be held by the caller, and the caller must not be in 1099179260Sjb * interrupt context. cyclic_add() will perform a KM_SLEEP kernel 1100179260Sjb * memory allocation, so the usual rules (e.g. p_lock cannot be held) 1101179260Sjb * apply. A cyclic may be added even in the presence of CPUs that have 1102179260Sjb * not been configured with respect to the cyclic subsystem, but only 1103179260Sjb * configured CPUs will be eligible to run the new cyclic. 1104179260Sjb * 1105179260Sjb * Cyclic handler's context 1106179260Sjb * 1107179260Sjb * Cyclic handlers will be executed in the interrupt context corresponding 1108179260Sjb * to the specified level (i.e. either high, lock or low level). The 1109179260Sjb * usual context rules apply. 1110179260Sjb * 1111179260Sjb * A cyclic handler may not grab ANY locks held by the caller of any of 1112179260Sjb * cyclic_add() or cyclic_remove(); the implementation of these functions 1113179260Sjb * may require blocking on cyclic handler completion. 1114179260Sjb * Moreover, cyclic handlers may not make any call back into the cyclic 1115179260Sjb * subsystem. 1116179260Sjb */ 1117179260Sjbcyclic_id_t 1118179260Sjbcyclic_add(cyc_handler_t *hdlr, cyc_time_t *when) 1119179260Sjb{ 1120179260Sjb cyc_id_t *idp = cyclic_new_id(); 1121179260Sjb solaris_cpu_t *c = &solaris_cpu[curcpu]; 1122179260Sjb 1123179260Sjb ASSERT(MUTEX_HELD(&cpu_lock)); 1124179260Sjb ASSERT(when->cyt_when >= 0 && when->cyt_interval > 0); 1125179260Sjb 1126179260Sjb idp->cyi_cpu = c->cpu_cyclic; 1127179260Sjb idp->cyi_ndx = cyclic_add_here(idp->cyi_cpu, hdlr, when, 0); 1128179260Sjb 1129179260Sjb return ((uintptr_t)idp); 1130179260Sjb} 1131179260Sjb 1132179260Sjb/* 1133179260Sjb * cyclic_id_t cyclic_add_omni(cyc_omni_handler_t *) 1134179260Sjb * 1135179260Sjb * Overview 1136179260Sjb * 1137179260Sjb * cyclic_add_omni() will create an omnipresent cyclic with the specified 1138179260Sjb * online and offline handlers. Omnipresent cyclics run on all online 1139179260Sjb * CPUs, including CPUs which have unbound interrupts disabled. 1140179260Sjb * 1141179260Sjb * Arguments 1142179260Sjb * 1143179260Sjb * As its only argument, cyclic_add_omni() takes a cyc_omni_handler, which 1144179260Sjb * has the following members: 1145179260Sjb * 1146179260Sjb * void (*cyo_online)() <-- Online handler 1147179260Sjb * void (*cyo_offline)() <-- Offline handler 1148179260Sjb * void *cyo_arg <-- Argument to be passed to on/offline handlers 1149179260Sjb * 1150179260Sjb * Online handler 1151179260Sjb * 1152179260Sjb * The cyo_online member is a pointer to a function which has the following 1153179260Sjb * four arguments: 1154179260Sjb * 1155179260Sjb * void * <-- Argument (cyo_arg) 1156179260Sjb * cpu_t * <-- Pointer to CPU about to be onlined 1157179260Sjb * cyc_handler_t * <-- Pointer to cyc_handler_t; must be filled in 1158179260Sjb * by omni online handler 1159179260Sjb * cyc_time_t * <-- Pointer to cyc_time_t; must be filled in by 1160179260Sjb * omni online handler 1161179260Sjb * 1162179260Sjb * The omni cyclic online handler is always called _before_ the omni 1163179260Sjb * cyclic begins to fire on the specified CPU. As the above argument 1164179260Sjb * description implies, the online handler must fill in the two structures 1165179260Sjb * passed to it: the cyc_handler_t and the cyc_time_t. These are the 1166179260Sjb * same two structures passed to cyclic_add(), outlined above. This 1167179260Sjb * allows the omni cyclic to have maximum flexibility; different CPUs may 1168179260Sjb * optionally 1169179260Sjb * 1170179260Sjb * (a) have different intervals 1171179260Sjb * (b) be explicitly in or out of phase with one another 1172179260Sjb * (c) have different handlers 1173179260Sjb * (d) have different handler arguments 1174179260Sjb * (e) fire at different levels 1175179260Sjb * 1176179260Sjb * Of these, (e) seems somewhat dubious, but is nonetheless allowed. 1177179260Sjb * 1178179260Sjb * The omni online handler is called in the same context as cyclic_add(), 1179179260Sjb * and has the same liberties: omni online handlers may perform KM_SLEEP 1180179260Sjb * kernel memory allocations, and may grab locks which are also acquired 1181179260Sjb * by cyclic handlers. However, omni cyclic online handlers may _not_ 1182179260Sjb * call back into the cyclic subsystem, and should be generally careful 1183179260Sjb * about calling into arbitrary kernel subsystems. 1184179260Sjb * 1185179260Sjb * Offline handler 1186179260Sjb * 1187179260Sjb * The cyo_offline member is a pointer to a function which has the following 1188179260Sjb * three arguments: 1189179260Sjb * 1190179260Sjb * void * <-- Argument (cyo_arg) 1191179260Sjb * cpu_t * <-- Pointer to CPU about to be offlined 1192179260Sjb * void * <-- CPU's cyclic argument (that is, value 1193179260Sjb * to which cyh_arg member of the cyc_handler_t 1194179260Sjb * was set in the omni online handler) 1195179260Sjb * 1196179260Sjb * The omni cyclic offline handler is always called _after_ the omni 1197179260Sjb * cyclic has ceased firing on the specified CPU. Its purpose is to 1198179260Sjb * allow cleanup of any resources dynamically allocated in the omni cyclic 1199179260Sjb * online handler. The context of the offline handler is identical to 1200179260Sjb * that of the online handler; the same constraints and liberties apply. 1201179260Sjb * 1202179260Sjb * The offline handler is optional; it may be NULL. 1203179260Sjb * 1204179260Sjb * Return value 1205179260Sjb * 1206179260Sjb * cyclic_add_omni() returns a cyclic_id_t, which is guaranteed to be a 1207179260Sjb * value other than CYCLIC_NONE. cyclic_add_omni() cannot fail. 1208179260Sjb * 1209179260Sjb * Caller's context 1210179260Sjb * 1211179260Sjb * The caller's context is identical to that of cyclic_add(), specified 1212179260Sjb * above. 1213179260Sjb */ 1214179260Sjbcyclic_id_t 1215179260Sjbcyclic_add_omni(cyc_omni_handler_t *omni) 1216179260Sjb{ 1217179260Sjb cyc_id_t *idp = cyclic_new_id(); 1218179260Sjb cyc_cpu_t *cpu; 1219179260Sjb cpu_t *c; 1220179260Sjb int i; 1221179260Sjb 1222179260Sjb ASSERT(MUTEX_HELD(&cpu_lock)); 1223179260Sjb ASSERT(omni != NULL && omni->cyo_online != NULL); 1224179260Sjb 1225179260Sjb idp->cyi_omni_hdlr = *omni; 1226179260Sjb 1227216254Savg CPU_FOREACH(i) { 1228179260Sjb c = &solaris_cpu[i]; 1229179260Sjb if ((cpu = c->cpu_cyclic) == NULL) 1230179260Sjb continue; 1231179260Sjb cyclic_omni_start(idp, cpu); 1232179260Sjb } 1233179260Sjb 1234179260Sjb /* 1235179260Sjb * We must have found at least one online CPU on which to run 1236179260Sjb * this cyclic. 1237179260Sjb */ 1238179260Sjb ASSERT(idp->cyi_omni_list != NULL); 1239179260Sjb ASSERT(idp->cyi_cpu == NULL); 1240179260Sjb 1241179260Sjb return ((uintptr_t)idp); 1242179260Sjb} 1243179260Sjb 1244179260Sjb/* 1245179260Sjb * void cyclic_remove(cyclic_id_t) 1246179260Sjb * 1247179260Sjb * Overview 1248179260Sjb * 1249179260Sjb * cyclic_remove() will remove the specified cyclic from the system. 1250179260Sjb * 1251179260Sjb * Arguments and notes 1252179260Sjb * 1253179260Sjb * The only argument is a cyclic_id returned from either cyclic_add() or 1254179260Sjb * cyclic_add_omni(). 1255179260Sjb * 1256179260Sjb * By the time cyclic_remove() returns, the caller is guaranteed that the 1257179260Sjb * removed cyclic handler has completed execution (this is the same 1258179260Sjb * semantic that untimeout() provides). As a result, cyclic_remove() may 1259179260Sjb * need to block, waiting for the removed cyclic to complete execution. 1260179260Sjb * This leads to an important constraint on the caller: no lock may be 1261179260Sjb * held across cyclic_remove() that also may be acquired by a cyclic 1262179260Sjb * handler. 1263179260Sjb * 1264179260Sjb * Return value 1265179260Sjb * 1266179260Sjb * None; cyclic_remove() always succeeds. 1267179260Sjb * 1268179260Sjb * Caller's context 1269179260Sjb * 1270179260Sjb * cpu_lock must be held by the caller, and the caller must not be in 1271179260Sjb * interrupt context. The caller may not hold any locks which are also 1272179260Sjb * grabbed by any cyclic handler. See "Arguments and notes", above. 1273179260Sjb */ 1274179260Sjbvoid 1275179260Sjbcyclic_remove(cyclic_id_t id) 1276179260Sjb{ 1277179260Sjb cyc_id_t *idp = (cyc_id_t *)id; 1278179260Sjb cyc_id_t *prev = idp->cyi_prev, *next = idp->cyi_next; 1279179260Sjb cyc_cpu_t *cpu = idp->cyi_cpu; 1280179260Sjb 1281179260Sjb ASSERT(MUTEX_HELD(&cpu_lock)); 1282179260Sjb 1283179260Sjb if (cpu != NULL) { 1284179260Sjb (void) cyclic_remove_here(cpu, idp->cyi_ndx, NULL, CY_WAIT); 1285179260Sjb } else { 1286179260Sjb ASSERT(idp->cyi_omni_list != NULL); 1287179260Sjb while (idp->cyi_omni_list != NULL) 1288179260Sjb cyclic_omni_stop(idp, idp->cyi_omni_list->cyo_cpu); 1289179260Sjb } 1290179260Sjb 1291179260Sjb if (prev != NULL) { 1292179260Sjb ASSERT(cyclic_id_head != idp); 1293179260Sjb prev->cyi_next = next; 1294179260Sjb } else { 1295179260Sjb ASSERT(cyclic_id_head == idp); 1296179260Sjb cyclic_id_head = next; 1297179260Sjb } 1298179260Sjb 1299179260Sjb if (next != NULL) 1300179260Sjb next->cyi_prev = prev; 1301179260Sjb 1302179260Sjb kmem_cache_free(cyclic_id_cache, idp); 1303179260Sjb} 1304179260Sjb 1305179260Sjbstatic void 1306179260Sjbcyclic_init(cyc_backend_t *be) 1307179260Sjb{ 1308179260Sjb ASSERT(MUTEX_HELD(&cpu_lock)); 1309179260Sjb 1310179260Sjb /* 1311179260Sjb * Copy the passed cyc_backend into the backend template. This must 1312179260Sjb * be done before the CPU can be configured. 1313179260Sjb */ 1314179260Sjb bcopy(be, &cyclic_backend, sizeof (cyc_backend_t)); 1315179260Sjb 1316179260Sjb cyclic_configure(&solaris_cpu[curcpu]); 1317179260Sjb} 1318179260Sjb 1319179260Sjb/* 1320179260Sjb * It is assumed that cyclic_mp_init() is called some time after cyclic 1321179260Sjb * init (and therefore, after cpu0 has been initialized). We grab cpu_lock, 1322179260Sjb * find the already initialized CPU, and initialize every other CPU with the 1323179260Sjb * same backend. 1324179260Sjb */ 1325179260Sjbstatic void 1326179260Sjbcyclic_mp_init(void) 1327179260Sjb{ 1328179260Sjb cpu_t *c; 1329179260Sjb int i; 1330179260Sjb 1331179260Sjb mutex_enter(&cpu_lock); 1332179260Sjb 1333216254Savg CPU_FOREACH(i) { 1334179260Sjb c = &solaris_cpu[i]; 1335179260Sjb if (c->cpu_cyclic == NULL) 1336179260Sjb cyclic_configure(c); 1337179260Sjb } 1338179260Sjb 1339179260Sjb mutex_exit(&cpu_lock); 1340179260Sjb} 1341179260Sjb 1342179260Sjbstatic void 1343179260Sjbcyclic_uninit(void) 1344179260Sjb{ 1345179260Sjb cpu_t *c; 1346179260Sjb int id; 1347179260Sjb 1348209059Sjhb CPU_FOREACH(id) { 1349179260Sjb c = &solaris_cpu[id]; 1350179260Sjb if (c->cpu_cyclic == NULL) 1351179260Sjb continue; 1352179260Sjb cyclic_unconfigure(c); 1353179260Sjb } 1354179260Sjb 1355179260Sjb if (cyclic_id_cache != NULL) 1356179260Sjb kmem_cache_destroy(cyclic_id_cache); 1357179260Sjb} 1358179260Sjb 1359179260Sjb#include "cyclic_machdep.c" 1360179260Sjb 1361179260Sjb/* 1362179260Sjb * Cyclic subsystem initialisation. 1363179260Sjb */ 1364179260Sjbstatic void 1365179260Sjbcyclic_load(void *dummy) 1366179260Sjb{ 1367179260Sjb mutex_enter(&cpu_lock); 1368179260Sjb 1369179260Sjb /* Initialise the machine-dependent backend. */ 1370179260Sjb cyclic_machdep_init(); 1371179260Sjb 1372179260Sjb mutex_exit(&cpu_lock); 1373179260Sjb} 1374179260Sjb 1375179260SjbSYSINIT(cyclic_register, SI_SUB_CYCLIC, SI_ORDER_SECOND, cyclic_load, NULL); 1376179260Sjb 1377179260Sjbstatic void 1378179260Sjbcyclic_unload(void) 1379179260Sjb{ 1380179260Sjb mutex_enter(&cpu_lock); 1381179260Sjb 1382179260Sjb /* Uninitialise the machine-dependent backend. */ 1383179260Sjb cyclic_machdep_uninit(); 1384179260Sjb 1385179260Sjb mutex_exit(&cpu_lock); 1386179260Sjb} 1387179260Sjb 1388179260SjbSYSUNINIT(cyclic_unregister, SI_SUB_CYCLIC, SI_ORDER_SECOND, cyclic_unload, NULL); 1389179260Sjb 1390179260Sjb/* ARGSUSED */ 1391179260Sjbstatic int 1392179260Sjbcyclic_modevent(module_t mod __unused, int type, void *data __unused) 1393179260Sjb{ 1394179260Sjb int error = 0; 1395179260Sjb 1396179260Sjb switch (type) { 1397179260Sjb case MOD_LOAD: 1398179260Sjb break; 1399179260Sjb 1400179260Sjb case MOD_UNLOAD: 1401179260Sjb break; 1402179260Sjb 1403179260Sjb case MOD_SHUTDOWN: 1404179260Sjb break; 1405179260Sjb 1406179260Sjb default: 1407179260Sjb error = EOPNOTSUPP; 1408179260Sjb break; 1409179260Sjb 1410179260Sjb } 1411179260Sjb return (error); 1412179260Sjb} 1413179260Sjb 1414179260SjbDEV_MODULE(cyclic, cyclic_modevent, NULL); 1415179260SjbMODULE_VERSION(cyclic, 1); 1416179260SjbMODULE_DEPEND(cyclic, opensolaris, 1, 1, 1); 1417