gs_scheduler.h revision 206552
119304Speter/*- 219304Speter * Copyright (c) 2009-2010 Fabio Checconi 319304Speter * Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa 419304Speter * All rights reserved. 519304Speter * 619304Speter * Redistribution and use in source and binary forms, with or without 719304Speter * modification, are permitted provided that the following conditions 819304Speter * are met: 919304Speter * 1. Redistributions of source code must retain the above copyright 1019304Speter * notice, this list of conditions and the following disclaimer. 1119304Speter * 2. Redistributions in binary form must reproduce the above copyright 1219304Speter * notice, this list of conditions and the following disclaimer in the 1319304Speter * documentation and/or other materials provided with the distribution. 1419304Speter * 1519304Speter * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 1619304Speter * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1719304Speter * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1819304Speter * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 1919304Speter * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2019304Speter * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2119304Speter * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2219304Speter * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2319304Speter * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2419304Speter * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2519304Speter * SUCH DAMAGE. 2619304Speter */ 2719304Speter 2819304Speter/* 2919304Speter * $Id$ 3019304Speter * $FreeBSD: head/sys/geom/sched/gs_scheduler.h 206552 2010-04-13 09:56:17Z luigi $ 3119304Speter * 3219304Speter * Prototypes for GEOM-based disk scheduling algorithms. 3319304Speter * See g_sched.c for generic documentation. 3419304Speter * 3519304Speter * This file is used by the kernel modules implementing the various 3619304Speter * scheduling algorithms. They should provide all the methods 37 * defined in struct g_gsched, and also invoke the macro 38 * DECLARE_GSCHED_MODULE 39 * which registers the scheduling algorithm with the geom_sched module. 40 * 41 * The various scheduling algorithms do not need to know anything 42 * about geom, they only need to handle the 'bio' requests they 43 * receive, pass them down when needed, and use the locking interface 44 * defined below. 45 */ 46 47#ifndef _G_GSCHED_H_ 48#define _G_GSCHED_H_ 49 50#ifdef _KERNEL 51#include <sys/param.h> 52#include <sys/kernel.h> 53#include <sys/ktr.h> 54#include <sys/module.h> 55#include <sys/queue.h> 56#include <geom/geom.h> 57#include "g_sched.h" 58 59/* 60 * This is the interface exported to scheduling modules. 61 * 62 * gs_init() is called when our scheduling algorithm 63 * starts being used by a geom 'sched' 64 * 65 * gs_fini() is called when the algorithm is released. 66 * 67 * gs_start() is called when a new request comes in. It should 68 * enqueue the request and return 0 if success, or return non-zero 69 * in case of failure (meaning the request is passed down). 70 * The scheduler can use bio->bio_caller1 to store a non-null 71 * pointer meaning the request is under its control. 72 * 73 * gs_next() is called in a loop by g_sched_dispatch(), right after 74 * gs_start(), or on timeouts or 'done' events. It should return 75 * immediately, either a pointer to the bio to be served or NULL 76 * if no bio should be served now. If force is specified, a 77 * work-conserving behavior is expected. 78 * 79 * gs_done() is called when a request under service completes. 80 * In turn the scheduler may decide to call the dispatch loop 81 * to serve other pending requests (or make sure there is a pending 82 * timeout to avoid stalls). 83 * 84 * gs_init_class() is called when a new client (as determined by 85 * the classifier) starts being used. 86 * 87 * gs_hash_unref() is called right before the class hashtable is 88 * destroyed; after this call, the scheduler is supposed to hold no 89 * more references to the elements in the table. 90 */ 91 92/* Forward declarations for prototypes. */ 93struct g_geom; 94struct g_sched_class; 95 96typedef void *gs_init_t (struct g_geom *geom); 97typedef void gs_fini_t (void *data); 98typedef int gs_start_t (void *data, struct bio *bio); 99typedef void gs_done_t (void *data, struct bio *bio); 100typedef struct bio *gs_next_t (void *data, int force); 101typedef int gs_init_class_t (void *data, void *priv); 102typedef void gs_fini_class_t (void *data, void *priv); 103typedef void gs_hash_unref_t (void *data); 104 105struct g_gsched { 106 const char *gs_name; 107 int gs_refs; 108 int gs_priv_size; 109 110 gs_init_t *gs_init; 111 gs_fini_t *gs_fini; 112 gs_start_t *gs_start; 113 gs_done_t *gs_done; 114 gs_next_t *gs_next; 115 g_dumpconf_t *gs_dumpconf; 116 117 gs_init_class_t *gs_init_class; 118 gs_fini_class_t *gs_fini_class; 119 gs_hash_unref_t *gs_hash_unref; 120 121 LIST_ENTRY(g_gsched) glist; 122}; 123 124#define KTR_GSCHED KTR_SPARE4 125 126MALLOC_DECLARE(M_GEOM_SCHED); 127 128/* 129 * Basic classification mechanism. Each request is associated to 130 * a g_sched_class, and each scheduler has the opportunity to set 131 * its own private data for the given (class, geom) pair. The 132 * private data have a base type of g_sched_private, and are 133 * extended at the end with the actual private fields of each 134 * scheduler. 135 */ 136struct g_sched_class { 137 int gsc_refs; 138 int gsc_expire; 139 u_long gsc_key; 140 LIST_ENTRY(g_sched_class) gsc_clist; 141 142 void *gsc_priv[0]; 143}; 144 145/* 146 * Manipulate the classifier's data. g_sched_get_class() gets a reference 147 * to the the class corresponding to bp in gp, allocating and initializing 148 * it if necessary. g_sched_put_class() releases the reference. 149 * The returned value points to the private data for the class. 150 */ 151void *g_sched_get_class(struct g_geom *gp, struct bio *bp); 152void g_sched_put_class(struct g_geom *gp, void *priv); 153 154static inline struct g_sched_class * 155g_sched_priv2class(void *priv) 156{ 157 158 return ((struct g_sched_class *)((u_long)priv - 159 offsetof(struct g_sched_class, gsc_priv))); 160} 161 162static inline void 163g_sched_priv_ref(void *priv) 164{ 165 struct g_sched_class *gsc; 166 167 gsc = g_sched_priv2class(priv); 168 gsc->gsc_refs++; 169} 170 171/* 172 * Locking interface. When each operation registered with the 173 * scheduler is invoked, a per-instance lock is taken to protect 174 * the data associated with it. If the scheduler needs something 175 * else to access the same data (e.g., a callout) it must use 176 * these functions. 177 */ 178void g_sched_lock(struct g_geom *gp); 179void g_sched_unlock(struct g_geom *gp); 180 181/* 182 * Restart request dispatching. Must be called with the per-instance 183 * mutex held. 184 */ 185void g_sched_dispatch(struct g_geom *geom); 186 187/* 188 * Simple gathering of statistical data, used by schedulers to collect 189 * info on process history. Just keep an exponential average of the 190 * samples, with some extra bits of precision. 191 */ 192struct g_savg { 193 uint64_t gs_avg; 194 unsigned int gs_smpl; 195}; 196 197static inline void 198g_savg_add_sample(struct g_savg *ss, uint64_t sample) 199{ 200 201 /* EMA with alpha = 0.125, fixed point, 3 bits of precision. */ 202 ss->gs_avg = sample + ss->gs_avg - (ss->gs_avg >> 3); 203 ss->gs_smpl = 1 + ss->gs_smpl - (ss->gs_smpl >> 3); 204} 205 206static inline int 207g_savg_valid(struct g_savg *ss) 208{ 209 210 /* We want at least 8 samples to deem an average as valid. */ 211 return (ss->gs_smpl > 7); 212} 213 214static inline uint64_t 215g_savg_read(struct g_savg *ss) 216{ 217 218 return (ss->gs_avg / ss->gs_smpl); 219} 220 221/* 222 * Declaration of a scheduler module. 223 */ 224int g_gsched_modevent(module_t mod, int cmd, void *arg); 225 226#define DECLARE_GSCHED_MODULE(name, gsched) \ 227 static moduledata_t name##_mod = { \ 228 #name, \ 229 g_gsched_modevent, \ 230 gsched, \ 231 }; \ 232 DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); \ 233 MODULE_DEPEND(name, geom_sched, 0, 0, 0); 234 235#endif /* _KERNEL */ 236 237#endif /* _G_GSCHED_H_ */ 238