1206497Sluigi/*- 2206552Sluigi * Copyright (c) 2009-2010 Fabio Checconi 3206552Sluigi * Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa 4206497Sluigi * All rights reserved. 5206497Sluigi * 6206497Sluigi * Redistribution and use in source and binary forms, with or without 7206497Sluigi * modification, are permitted provided that the following conditions 8206497Sluigi * are met: 9206497Sluigi * 1. Redistributions of source code must retain the above copyright 10206497Sluigi * notice, this list of conditions and the following disclaimer. 11206497Sluigi * 2. Redistributions in binary form must reproduce the above copyright 12206497Sluigi * notice, this list of conditions and the following disclaimer in the 13206497Sluigi * documentation and/or other materials provided with the distribution. 14206497Sluigi * 15206497Sluigi * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 16206497Sluigi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17206497Sluigi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18206497Sluigi * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 19206497Sluigi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20206497Sluigi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21206497Sluigi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22206497Sluigi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23206497Sluigi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24206497Sluigi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25206497Sluigi * SUCH DAMAGE. 26206497Sluigi */ 27206497Sluigi 28206497Sluigi/* 29206497Sluigi * $Id$ 30206497Sluigi * $FreeBSD$ 31206497Sluigi * 32206497Sluigi * Prototypes for GEOM-based disk scheduling algorithms. 33206497Sluigi * See g_sched.c for generic documentation. 34206497Sluigi * 35206497Sluigi * This file is used by the kernel modules implementing the various 36206497Sluigi * scheduling algorithms. They should provide all the methods 37206497Sluigi * defined in struct g_gsched, and also invoke the macro 38206497Sluigi * DECLARE_GSCHED_MODULE 39206497Sluigi * which registers the scheduling algorithm with the geom_sched module. 40206497Sluigi * 41206497Sluigi * The various scheduling algorithms do not need to know anything 42206497Sluigi * about geom, they only need to handle the 'bio' requests they 43206497Sluigi * receive, pass them down when needed, and use the locking interface 44206497Sluigi * defined below. 45206497Sluigi */ 46206497Sluigi 47206497Sluigi#ifndef _G_GSCHED_H_ 48206497Sluigi#define _G_GSCHED_H_ 49206497Sluigi 50206497Sluigi#ifdef _KERNEL 51206497Sluigi#include <sys/param.h> 52206497Sluigi#include <sys/kernel.h> 53206497Sluigi#include <sys/ktr.h> 54206497Sluigi#include <sys/module.h> 55206497Sluigi#include <sys/queue.h> 56206497Sluigi#include <geom/geom.h> 57206497Sluigi#include "g_sched.h" 58206497Sluigi 59206497Sluigi/* 60206497Sluigi * This is the interface exported to scheduling modules. 61206497Sluigi * 62206497Sluigi * gs_init() is called when our scheduling algorithm 63206497Sluigi * starts being used by a geom 'sched' 64206497Sluigi * 65206497Sluigi * gs_fini() is called when the algorithm is released. 66206497Sluigi * 67206497Sluigi * gs_start() is called when a new request comes in. It should 68206497Sluigi * enqueue the request and return 0 if success, or return non-zero 69206497Sluigi * in case of failure (meaning the request is passed down). 70206497Sluigi * The scheduler can use bio->bio_caller1 to store a non-null 71206497Sluigi * pointer meaning the request is under its control. 72206497Sluigi * 73206497Sluigi * gs_next() is called in a loop by g_sched_dispatch(), right after 74206497Sluigi * gs_start(), or on timeouts or 'done' events. It should return 75206497Sluigi * immediately, either a pointer to the bio to be served or NULL 76206497Sluigi * if no bio should be served now. If force is specified, a 77206497Sluigi * work-conserving behavior is expected. 78206497Sluigi * 79206497Sluigi * gs_done() is called when a request under service completes. 80206497Sluigi * In turn the scheduler may decide to call the dispatch loop 81206497Sluigi * to serve other pending requests (or make sure there is a pending 82206497Sluigi * timeout to avoid stalls). 83206497Sluigi * 84206497Sluigi * gs_init_class() is called when a new client (as determined by 85206497Sluigi * the classifier) starts being used. 86206497Sluigi * 87206497Sluigi * gs_hash_unref() is called right before the class hashtable is 88206497Sluigi * destroyed; after this call, the scheduler is supposed to hold no 89206497Sluigi * more references to the elements in the table. 90206497Sluigi */ 91206497Sluigi 92206497Sluigi/* Forward declarations for prototypes. */ 93206497Sluigistruct g_geom; 94206497Sluigistruct g_sched_class; 95206497Sluigi 96206497Sluigitypedef void *gs_init_t (struct g_geom *geom); 97206497Sluigitypedef void gs_fini_t (void *data); 98206497Sluigitypedef int gs_start_t (void *data, struct bio *bio); 99206497Sluigitypedef void gs_done_t (void *data, struct bio *bio); 100206497Sluigitypedef struct bio *gs_next_t (void *data, int force); 101206497Sluigitypedef int gs_init_class_t (void *data, void *priv); 102206497Sluigitypedef void gs_fini_class_t (void *data, void *priv); 103206497Sluigitypedef void gs_hash_unref_t (void *data); 104206497Sluigi 105206497Sluigistruct g_gsched { 106206497Sluigi const char *gs_name; 107206497Sluigi int gs_refs; 108206497Sluigi int gs_priv_size; 109206497Sluigi 110206497Sluigi gs_init_t *gs_init; 111206497Sluigi gs_fini_t *gs_fini; 112206497Sluigi gs_start_t *gs_start; 113206497Sluigi gs_done_t *gs_done; 114206497Sluigi gs_next_t *gs_next; 115206497Sluigi g_dumpconf_t *gs_dumpconf; 116206497Sluigi 117206497Sluigi gs_init_class_t *gs_init_class; 118206497Sluigi gs_fini_class_t *gs_fini_class; 119206497Sluigi gs_hash_unref_t *gs_hash_unref; 120206497Sluigi 121206497Sluigi LIST_ENTRY(g_gsched) glist; 122206497Sluigi}; 123206497Sluigi 124206497Sluigi#define KTR_GSCHED KTR_SPARE4 125206497Sluigi 126206497SluigiMALLOC_DECLARE(M_GEOM_SCHED); 127206497Sluigi 128206497Sluigi/* 129206497Sluigi * Basic classification mechanism. Each request is associated to 130206497Sluigi * a g_sched_class, and each scheduler has the opportunity to set 131206497Sluigi * its own private data for the given (class, geom) pair. The 132206497Sluigi * private data have a base type of g_sched_private, and are 133206497Sluigi * extended at the end with the actual private fields of each 134206497Sluigi * scheduler. 135206497Sluigi */ 136206497Sluigistruct g_sched_class { 137206497Sluigi int gsc_refs; 138206497Sluigi int gsc_expire; 139206497Sluigi u_long gsc_key; 140206497Sluigi LIST_ENTRY(g_sched_class) gsc_clist; 141206497Sluigi 142206497Sluigi void *gsc_priv[0]; 143206497Sluigi}; 144206497Sluigi 145206497Sluigi/* 146206497Sluigi * Manipulate the classifier's data. g_sched_get_class() gets a reference 147218909Sbrucec * to the class corresponding to bp in gp, allocating and initializing 148206497Sluigi * it if necessary. g_sched_put_class() releases the reference. 149206497Sluigi * The returned value points to the private data for the class. 150206497Sluigi */ 151206497Sluigivoid *g_sched_get_class(struct g_geom *gp, struct bio *bp); 152206497Sluigivoid g_sched_put_class(struct g_geom *gp, void *priv); 153206497Sluigi 154206497Sluigistatic inline struct g_sched_class * 155206497Sluigig_sched_priv2class(void *priv) 156206497Sluigi{ 157206497Sluigi 158206497Sluigi return ((struct g_sched_class *)((u_long)priv - 159206497Sluigi offsetof(struct g_sched_class, gsc_priv))); 160206497Sluigi} 161206497Sluigi 162206497Sluigistatic inline void 163206497Sluigig_sched_priv_ref(void *priv) 164206497Sluigi{ 165206497Sluigi struct g_sched_class *gsc; 166206497Sluigi 167206497Sluigi gsc = g_sched_priv2class(priv); 168206497Sluigi gsc->gsc_refs++; 169206497Sluigi} 170206497Sluigi 171206497Sluigi/* 172206497Sluigi * Locking interface. When each operation registered with the 173206497Sluigi * scheduler is invoked, a per-instance lock is taken to protect 174206497Sluigi * the data associated with it. If the scheduler needs something 175206497Sluigi * else to access the same data (e.g., a callout) it must use 176206497Sluigi * these functions. 177206497Sluigi */ 178206497Sluigivoid g_sched_lock(struct g_geom *gp); 179206497Sluigivoid g_sched_unlock(struct g_geom *gp); 180206497Sluigi 181206497Sluigi/* 182206497Sluigi * Restart request dispatching. Must be called with the per-instance 183206497Sluigi * mutex held. 184206497Sluigi */ 185206497Sluigivoid g_sched_dispatch(struct g_geom *geom); 186206497Sluigi 187206497Sluigi/* 188206497Sluigi * Simple gathering of statistical data, used by schedulers to collect 189206497Sluigi * info on process history. Just keep an exponential average of the 190206497Sluigi * samples, with some extra bits of precision. 191206497Sluigi */ 192206497Sluigistruct g_savg { 193206497Sluigi uint64_t gs_avg; 194206497Sluigi unsigned int gs_smpl; 195206497Sluigi}; 196206497Sluigi 197206497Sluigistatic inline void 198206497Sluigig_savg_add_sample(struct g_savg *ss, uint64_t sample) 199206497Sluigi{ 200206497Sluigi 201206497Sluigi /* EMA with alpha = 0.125, fixed point, 3 bits of precision. */ 202206497Sluigi ss->gs_avg = sample + ss->gs_avg - (ss->gs_avg >> 3); 203206497Sluigi ss->gs_smpl = 1 + ss->gs_smpl - (ss->gs_smpl >> 3); 204206497Sluigi} 205206497Sluigi 206206497Sluigistatic inline int 207206497Sluigig_savg_valid(struct g_savg *ss) 208206497Sluigi{ 209206497Sluigi 210206497Sluigi /* We want at least 8 samples to deem an average as valid. */ 211206497Sluigi return (ss->gs_smpl > 7); 212206497Sluigi} 213206497Sluigi 214206497Sluigistatic inline uint64_t 215206497Sluigig_savg_read(struct g_savg *ss) 216206497Sluigi{ 217206497Sluigi 218206497Sluigi return (ss->gs_avg / ss->gs_smpl); 219206497Sluigi} 220206497Sluigi 221206497Sluigi/* 222206497Sluigi * Declaration of a scheduler module. 223206497Sluigi */ 224206497Sluigiint g_gsched_modevent(module_t mod, int cmd, void *arg); 225206497Sluigi 226206497Sluigi#define DECLARE_GSCHED_MODULE(name, gsched) \ 227206497Sluigi static moduledata_t name##_mod = { \ 228206497Sluigi #name, \ 229206497Sluigi g_gsched_modevent, \ 230206497Sluigi gsched, \ 231206497Sluigi }; \ 232206497Sluigi DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); \ 233206497Sluigi MODULE_DEPEND(name, geom_sched, 0, 0, 0); 234206497Sluigi 235206497Sluigi#endif /* _KERNEL */ 236206497Sluigi 237206497Sluigi#endif /* _G_GSCHED_H_ */ 238