gs_scheduler.h revision 206497
1/*- 2 * Copyright (c) 2009-2010 Fabio Checconi, Luigi Rizzo 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27/* 28 * $Id$ 29 * $FreeBSD: head/sys/geom/sched/gs_scheduler.h 206497 2010-04-12 16:37:45Z luigi $ 30 * 31 * Prototypes for GEOM-based disk scheduling algorithms. 32 * See g_sched.c for generic documentation. 33 * 34 * This file is used by the kernel modules implementing the various 35 * scheduling algorithms. They should provide all the methods 36 * defined in struct g_gsched, and also invoke the macro 37 * DECLARE_GSCHED_MODULE 38 * which registers the scheduling algorithm with the geom_sched module. 39 * 40 * The various scheduling algorithms do not need to know anything 41 * about geom, they only need to handle the 'bio' requests they 42 * receive, pass them down when needed, and use the locking interface 43 * defined below. 44 */ 45 46#ifndef _G_GSCHED_H_ 47#define _G_GSCHED_H_ 48 49#ifdef _KERNEL 50#include <sys/param.h> 51#include <sys/kernel.h> 52#include <sys/ktr.h> 53#include <sys/module.h> 54#include <sys/queue.h> 55#include <geom/geom.h> 56#include "g_sched.h" 57 58/* 59 * This is the interface exported to scheduling modules. 60 * 61 * gs_init() is called when our scheduling algorithm 62 * starts being used by a geom 'sched' 63 * 64 * gs_fini() is called when the algorithm is released. 65 * 66 * gs_start() is called when a new request comes in. It should 67 * enqueue the request and return 0 if success, or return non-zero 68 * in case of failure (meaning the request is passed down). 69 * The scheduler can use bio->bio_caller1 to store a non-null 70 * pointer meaning the request is under its control. 71 * 72 * gs_next() is called in a loop by g_sched_dispatch(), right after 73 * gs_start(), or on timeouts or 'done' events. It should return 74 * immediately, either a pointer to the bio to be served or NULL 75 * if no bio should be served now. If force is specified, a 76 * work-conserving behavior is expected. 77 * 78 * gs_done() is called when a request under service completes. 79 * In turn the scheduler may decide to call the dispatch loop 80 * to serve other pending requests (or make sure there is a pending 81 * timeout to avoid stalls). 82 * 83 * gs_init_class() is called when a new client (as determined by 84 * the classifier) starts being used. 85 * 86 * gs_hash_unref() is called right before the class hashtable is 87 * destroyed; after this call, the scheduler is supposed to hold no 88 * more references to the elements in the table. 89 */ 90 91/* Forward declarations for prototypes. */ 92struct g_geom; 93struct g_sched_class; 94 95typedef void *gs_init_t (struct g_geom *geom); 96typedef void gs_fini_t (void *data); 97typedef int gs_start_t (void *data, struct bio *bio); 98typedef void gs_done_t (void *data, struct bio *bio); 99typedef struct bio *gs_next_t (void *data, int force); 100typedef int gs_init_class_t (void *data, void *priv); 101typedef void gs_fini_class_t (void *data, void *priv); 102typedef void gs_hash_unref_t (void *data); 103 104struct g_gsched { 105 const char *gs_name; 106 int gs_refs; 107 int gs_priv_size; 108 109 gs_init_t *gs_init; 110 gs_fini_t *gs_fini; 111 gs_start_t *gs_start; 112 gs_done_t *gs_done; 113 gs_next_t *gs_next; 114 g_dumpconf_t *gs_dumpconf; 115 116 gs_init_class_t *gs_init_class; 117 gs_fini_class_t *gs_fini_class; 118 gs_hash_unref_t *gs_hash_unref; 119 120 LIST_ENTRY(g_gsched) glist; 121}; 122 123#define KTR_GSCHED KTR_SPARE4 124 125MALLOC_DECLARE(M_GEOM_SCHED); 126 127/* 128 * Basic classification mechanism. Each request is associated to 129 * a g_sched_class, and each scheduler has the opportunity to set 130 * its own private data for the given (class, geom) pair. The 131 * private data have a base type of g_sched_private, and are 132 * extended at the end with the actual private fields of each 133 * scheduler. 134 */ 135struct g_sched_class { 136 int gsc_refs; 137 int gsc_expire; 138 u_long gsc_key; 139 LIST_ENTRY(g_sched_class) gsc_clist; 140 141 void *gsc_priv[0]; 142}; 143 144/* 145 * Manipulate the classifier's data. g_sched_get_class() gets a reference 146 * to the the class corresponding to bp in gp, allocating and initializing 147 * it if necessary. g_sched_put_class() releases the reference. 148 * The returned value points to the private data for the class. 149 */ 150void *g_sched_get_class(struct g_geom *gp, struct bio *bp); 151void g_sched_put_class(struct g_geom *gp, void *priv); 152 153static inline struct g_sched_class * 154g_sched_priv2class(void *priv) 155{ 156 157 return ((struct g_sched_class *)((u_long)priv - 158 offsetof(struct g_sched_class, gsc_priv))); 159} 160 161static inline void 162g_sched_priv_ref(void *priv) 163{ 164 struct g_sched_class *gsc; 165 166 gsc = g_sched_priv2class(priv); 167 gsc->gsc_refs++; 168} 169 170/* 171 * Locking interface. When each operation registered with the 172 * scheduler is invoked, a per-instance lock is taken to protect 173 * the data associated with it. If the scheduler needs something 174 * else to access the same data (e.g., a callout) it must use 175 * these functions. 176 */ 177void g_sched_lock(struct g_geom *gp); 178void g_sched_unlock(struct g_geom *gp); 179 180/* 181 * Restart request dispatching. Must be called with the per-instance 182 * mutex held. 183 */ 184void g_sched_dispatch(struct g_geom *geom); 185 186/* 187 * Simple gathering of statistical data, used by schedulers to collect 188 * info on process history. Just keep an exponential average of the 189 * samples, with some extra bits of precision. 190 */ 191struct g_savg { 192 uint64_t gs_avg; 193 unsigned int gs_smpl; 194}; 195 196static inline void 197g_savg_add_sample(struct g_savg *ss, uint64_t sample) 198{ 199 200 /* EMA with alpha = 0.125, fixed point, 3 bits of precision. */ 201 ss->gs_avg = sample + ss->gs_avg - (ss->gs_avg >> 3); 202 ss->gs_smpl = 1 + ss->gs_smpl - (ss->gs_smpl >> 3); 203} 204 205static inline int 206g_savg_valid(struct g_savg *ss) 207{ 208 209 /* We want at least 8 samples to deem an average as valid. */ 210 return (ss->gs_smpl > 7); 211} 212 213static inline uint64_t 214g_savg_read(struct g_savg *ss) 215{ 216 217 return (ss->gs_avg / ss->gs_smpl); 218} 219 220/* 221 * Declaration of a scheduler module. 222 */ 223int g_gsched_modevent(module_t mod, int cmd, void *arg); 224 225#define DECLARE_GSCHED_MODULE(name, gsched) \ 226 static moduledata_t name##_mod = { \ 227 #name, \ 228 g_gsched_modevent, \ 229 gsched, \ 230 }; \ 231 DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); \ 232 MODULE_DEPEND(name, geom_sched, 0, 0, 0); 233 234#endif /* _KERNEL */ 235 236#endif /* _G_GSCHED_H_ */ 237