gs_scheduler.h revision 303975
1178431Sscf/*- 2178431Sscf * Copyright (c) 2009-2010 Fabio Checconi 3178431Sscf * Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa 4178431Sscf * All rights reserved. 5178431Sscf * 6178431Sscf * Redistribution and use in source and binary forms, with or without 7178431Sscf * modification, are permitted provided that the following conditions 8178431Sscf * are met: 9178431Sscf * 1. Redistributions of source code must retain the above copyright 10178431Sscf * notice, this list of conditions and the following disclaimer. 11178431Sscf * 2. Redistributions in binary form must reproduce the above copyright 12178431Sscf * notice, this list of conditions and the following disclaimer in the 13178431Sscf * documentation and/or other materials provided with the distribution. 14178431Sscf * 15178431Sscf * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 16178431Sscf * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17178431Sscf * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18178431Sscf * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 19178431Sscf * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20178431Sscf * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21178431Sscf * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22178431Sscf * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23178431Sscf * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24178431Sscf * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25178431Sscf * SUCH DAMAGE. 26178431Sscf */ 27178431Sscf 28178431Sscf/* 29178431Sscf * $Id$ 30178431Sscf * $FreeBSD: releng/11.0/sys/geom/sched/gs_scheduler.h 218909 2011-02-21 09:01:34Z brucec $ 31184831Sscf * 32178431Sscf * Prototypes for GEOM-based disk scheduling algorithms. 33178431Sscf * See g_sched.c for generic documentation. 34184831Sscf * 35178431Sscf * This file is used by the kernel modules implementing the various 36178431Sscf * scheduling algorithms. They should provide all the methods 37178431Sscf * defined in struct g_gsched, and also invoke the macro 38178431Sscf * DECLARE_GSCHED_MODULE 39178431Sscf * which registers the scheduling algorithm with the geom_sched module. 40185237Sscf * 41185237Sscf * The various scheduling algorithms do not need to know anything 42185237Sscf * about geom, they only need to handle the 'bio' requests they 43185237Sscf * receive, pass them down when needed, and use the locking interface 44178431Sscf * defined below. 45185237Sscf */ 46185237Sscf 47178431Sscf#ifndef _G_GSCHED_H_ 48178431Sscf#define _G_GSCHED_H_ 49178431Sscf 50178431Sscf#ifdef _KERNEL 51178431Sscf#include <sys/param.h> 52178431Sscf#include <sys/kernel.h> 53185237Sscf#include <sys/ktr.h> 54185237Sscf#include <sys/module.h> 55184831Sscf#include <sys/queue.h> 56178431Sscf#include <geom/geom.h> 57178431Sscf#include "g_sched.h" 58185237Sscf 59185237Sscf/* 60185237Sscf * This is the interface exported to scheduling modules. 61185237Sscf * 62185237Sscf * gs_init() is called when our scheduling algorithm 63185237Sscf * starts being used by a geom 'sched' 64185237Sscf * 65185237Sscf * gs_fini() is called when the algorithm is released. 66185237Sscf * 67185237Sscf * gs_start() is called when a new request comes in. It should 68185237Sscf * enqueue the request and return 0 if success, or return non-zero 69185237Sscf * in case of failure (meaning the request is passed down). 70178431Sscf * The scheduler can use bio->bio_caller1 to store a non-null 71178431Sscf * pointer meaning the request is under its control. 72185237Sscf * 73185237Sscf * gs_next() is called in a loop by g_sched_dispatch(), right after 74185237Sscf * gs_start(), or on timeouts or 'done' events. It should return 75185237Sscf * immediately, either a pointer to the bio to be served or NULL 76185237Sscf * if no bio should be served now. If force is specified, a 77185237Sscf * work-conserving behavior is expected. 78185237Sscf * 79185237Sscf * gs_done() is called when a request under service completes. 80185237Sscf * In turn the scheduler may decide to call the dispatch loop 81185237Sscf * to serve other pending requests (or make sure there is a pending 82178431Sscf * timeout to avoid stalls). 83178431Sscf * 84178431Sscf * gs_init_class() is called when a new client (as determined by 85185237Sscf * the classifier) starts being used. 86185237Sscf * 87178431Sscf * gs_hash_unref() is called right before the class hashtable is 88178431Sscf * destroyed; after this call, the scheduler is supposed to hold no 89178431Sscf * more references to the elements in the table. 90185237Sscf */ 91185237Sscf 92178431Sscf/* Forward declarations for prototypes. */ 93178431Sscfstruct g_geom; 94185237Sscfstruct g_sched_class; 95178431Sscf 96178431Sscftypedef void *gs_init_t (struct g_geom *geom); 97178431Sscftypedef void gs_fini_t (void *data); 98178431Sscftypedef int gs_start_t (void *data, struct bio *bio); 99178431Sscftypedef void gs_done_t (void *data, struct bio *bio); 100178431Sscftypedef struct bio *gs_next_t (void *data, int force); 101178431Sscftypedef int gs_init_class_t (void *data, void *priv); 102178431Sscftypedef void gs_fini_class_t (void *data, void *priv); 103178431Sscftypedef void gs_hash_unref_t (void *data); 104185237Sscf 105178431Sscfstruct g_gsched { 106178431Sscf const char *gs_name; 107178431Sscf int gs_refs; 108185237Sscf int gs_priv_size; 109178431Sscf 110185237Sscf gs_init_t *gs_init; 111185237Sscf gs_fini_t *gs_fini; 112185237Sscf gs_start_t *gs_start; 113185237Sscf gs_done_t *gs_done; 114185237Sscf gs_next_t *gs_next; 115185237Sscf g_dumpconf_t *gs_dumpconf; 116178431Sscf 117178431Sscf gs_init_class_t *gs_init_class; 118185237Sscf gs_fini_class_t *gs_fini_class; 119178431Sscf gs_hash_unref_t *gs_hash_unref; 120185237Sscf 121178431Sscf LIST_ENTRY(g_gsched) glist; 122185237Sscf}; 123185237Sscf 124185237Sscf#define KTR_GSCHED KTR_SPARE4 125185237Sscf 126185237SscfMALLOC_DECLARE(M_GEOM_SCHED); 127185237Sscf 128178431Sscf/* 129178431Sscf * Basic classification mechanism. Each request is associated to 130178431Sscf * a g_sched_class, and each scheduler has the opportunity to set 131178431Sscf * its own private data for the given (class, geom) pair. The 132178431Sscf * private data have a base type of g_sched_private, and are 133178431Sscf * extended at the end with the actual private fields of each 134178431Sscf * scheduler. 135178431Sscf */ 136178431Sscfstruct g_sched_class { 137178431Sscf int gsc_refs; 138185237Sscf int gsc_expire; 139184831Sscf u_long gsc_key; 140185237Sscf LIST_ENTRY(g_sched_class) gsc_clist; 141178431Sscf 142185237Sscf void *gsc_priv[0]; 143178431Sscf}; 144185237Sscf 145185237Sscf/* 146185237Sscf * Manipulate the classifier's data. g_sched_get_class() gets a reference 147185237Sscf * to the class corresponding to bp in gp, allocating and initializing 148185237Sscf * it if necessary. g_sched_put_class() releases the reference. 149185237Sscf * The returned value points to the private data for the class. 150178431Sscf */ 151185237Sscfvoid *g_sched_get_class(struct g_geom *gp, struct bio *bp); 152185237Sscfvoid g_sched_put_class(struct g_geom *gp, void *priv); 153185237Sscf 154185237Sscfstatic inline struct g_sched_class * 155185237Sscfg_sched_priv2class(void *priv) 156178431Sscf{ 157178431Sscf 158185237Sscf return ((struct g_sched_class *)((u_long)priv - 159178431Sscf offsetof(struct g_sched_class, gsc_priv))); 160185237Sscf} 161178431Sscf 162185237Sscfstatic inline void 163185237Sscfg_sched_priv_ref(void *priv) 164178431Sscf{ 165178431Sscf struct g_sched_class *gsc; 166185237Sscf 167185237Sscf gsc = g_sched_priv2class(priv); 168178431Sscf gsc->gsc_refs++; 169185237Sscf} 170178431Sscf 171185237Sscf/* 172185237Sscf * Locking interface. When each operation registered with the 173185237Sscf * scheduler is invoked, a per-instance lock is taken to protect 174185237Sscf * the data associated with it. If the scheduler needs something 175178431Sscf * else to access the same data (e.g., a callout) it must use 176185237Sscf * these functions. 177178431Sscf */ 178178431Sscfvoid g_sched_lock(struct g_geom *gp); 179185237Sscfvoid g_sched_unlock(struct g_geom *gp); 180178431Sscf 181178431Sscf/* 182178431Sscf * Restart request dispatching. Must be called with the per-instance 183178431Sscf * mutex held. 184178431Sscf */ 185178431Sscfvoid g_sched_dispatch(struct g_geom *geom); 186178431Sscf 187178431Sscf/* 188178431Sscf * Simple gathering of statistical data, used by schedulers to collect 189178431Sscf * info on process history. Just keep an exponential average of the 190178431Sscf * samples, with some extra bits of precision. 191178431Sscf */ 192178431Sscfstruct g_savg { 193178431Sscf uint64_t gs_avg; 194178431Sscf unsigned int gs_smpl; 195178431Sscf}; 196178431Sscf 197184831Sscfstatic inline void 198184831Sscfg_savg_add_sample(struct g_savg *ss, uint64_t sample) 199178431Sscf{ 200178431Sscf 201178431Sscf /* EMA with alpha = 0.125, fixed point, 3 bits of precision. */ 202178431Sscf ss->gs_avg = sample + ss->gs_avg - (ss->gs_avg >> 3); 203178431Sscf ss->gs_smpl = 1 + ss->gs_smpl - (ss->gs_smpl >> 3); 204184831Sscf} 205178431Sscf 206178431Sscfstatic inline int 207178431Sscfg_savg_valid(struct g_savg *ss) 208178431Sscf{ 209178431Sscf 210178431Sscf /* We want at least 8 samples to deem an average as valid. */ 211178431Sscf return (ss->gs_smpl > 7); 212185237Sscf} 213185237Sscf 214185237Sscfstatic inline uint64_t 215185237Sscfg_savg_read(struct g_savg *ss) 216185237Sscf{ 217185237Sscf 218185237Sscf return (ss->gs_avg / ss->gs_smpl); 219185237Sscf} 220178431Sscf 221178431Sscf/* 222185237Sscf * Declaration of a scheduler module. 223185237Sscf */ 224178431Sscfint g_gsched_modevent(module_t mod, int cmd, void *arg); 225178431Sscf 226178431Sscf#define DECLARE_GSCHED_MODULE(name, gsched) \ 227178431Sscf static moduledata_t name##_mod = { \ 228178431Sscf #name, \ 229178431Sscf g_gsched_modevent, \ 230178431Sscf gsched, \ 231178431Sscf }; \ 232178431Sscf DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); \ 233178431Sscf MODULE_DEPEND(name, geom_sched, 0, 0, 0); 234184831Sscf 235185237Sscf#endif /* _KERNEL */ 236185237Sscf 237178431Sscf#endif /* _G_GSCHED_H_ */ 238185237Sscf