1/*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2009-2010 Fabio Checconi 5 * Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30/* 31 * $Id$ 32 * $FreeBSD$ 33 * 34 * Prototypes for GEOM-based disk scheduling algorithms. 35 * See g_sched.c for generic documentation. 36 * 37 * This file is used by the kernel modules implementing the various 38 * scheduling algorithms. They should provide all the methods 39 * defined in struct g_gsched, and also invoke the macro 40 * DECLARE_GSCHED_MODULE 41 * which registers the scheduling algorithm with the geom_sched module. 42 * 43 * The various scheduling algorithms do not need to know anything 44 * about geom, they only need to handle the 'bio' requests they 45 * receive, pass them down when needed, and use the locking interface 46 * defined below. 47 */ 48 49#ifndef _G_GSCHED_H_ 50#define _G_GSCHED_H_ 51 52#ifdef _KERNEL 53#include <sys/param.h> 54#include <sys/kernel.h> 55#include <sys/ktr.h> 56#include <sys/module.h> 57#include <sys/queue.h> 58#include <geom/geom.h> 59#include "g_sched.h" 60 61/* 62 * This is the interface exported to scheduling modules. 63 * 64 * gs_init() is called when our scheduling algorithm 65 * starts being used by a geom 'sched' 66 * 67 * gs_fini() is called when the algorithm is released. 68 * 69 * gs_start() is called when a new request comes in. It should 70 * enqueue the request and return 0 if success, or return non-zero 71 * in case of failure (meaning the request is passed down). 72 * The scheduler can use bio->bio_caller1 to store a non-null 73 * pointer meaning the request is under its control. 74 * 75 * gs_next() is called in a loop by g_sched_dispatch(), right after 76 * gs_start(), or on timeouts or 'done' events. It should return 77 * immediately, either a pointer to the bio to be served or NULL 78 * if no bio should be served now. If force is specified, a 79 * work-conserving behavior is expected. 80 * 81 * gs_done() is called when a request under service completes. 82 * In turn the scheduler may decide to call the dispatch loop 83 * to serve other pending requests (or make sure there is a pending 84 * timeout to avoid stalls). 85 * 86 * gs_init_class() is called when a new client (as determined by 87 * the classifier) starts being used. 88 * 89 * gs_hash_unref() is called right before the class hashtable is 90 * destroyed; after this call, the scheduler is supposed to hold no 91 * more references to the elements in the table. 92 */ 93 94/* Forward declarations for prototypes. */ 95struct g_geom; 96struct g_sched_class; 97 98typedef void *gs_init_t (struct g_geom *geom); 99typedef void gs_fini_t (void *data); 100typedef int gs_start_t (void *data, struct bio *bio); 101typedef void gs_done_t (void *data, struct bio *bio); 102typedef struct bio *gs_next_t (void *data, int force); 103typedef int gs_init_class_t (void *data, void *priv); 104typedef void gs_fini_class_t (void *data, void *priv); 105typedef void gs_hash_unref_t (void *data); 106 107struct g_gsched { 108 const char *gs_name; 109 int gs_refs; 110 int gs_priv_size; 111 112 gs_init_t *gs_init; 113 gs_fini_t *gs_fini; 114 gs_start_t *gs_start; 115 gs_done_t *gs_done; 116 gs_next_t *gs_next; 117 g_dumpconf_t *gs_dumpconf; 118 119 gs_init_class_t *gs_init_class; 120 gs_fini_class_t *gs_fini_class; 121 gs_hash_unref_t *gs_hash_unref; 122 123 LIST_ENTRY(g_gsched) glist; 124}; 125 126#define KTR_GSCHED KTR_SPARE4 127 128MALLOC_DECLARE(M_GEOM_SCHED); 129 130/* 131 * Basic classification mechanism. Each request is associated to 132 * a g_sched_class, and each scheduler has the opportunity to set 133 * its own private data for the given (class, geom) pair. The 134 * private data have a base type of g_sched_private, and are 135 * extended at the end with the actual private fields of each 136 * scheduler. 137 */ 138struct g_sched_class { 139 int gsc_refs; 140 int gsc_expire; 141 u_long gsc_key; 142 LIST_ENTRY(g_sched_class) gsc_clist; 143 144 void *gsc_priv[0]; 145}; 146 147/* 148 * Manipulate the classifier's data. g_sched_get_class() gets a reference 149 * to the class corresponding to bp in gp, allocating and initializing 150 * it if necessary. g_sched_put_class() releases the reference. 151 * The returned value points to the private data for the class. 152 */ 153void *g_sched_get_class(struct g_geom *gp, struct bio *bp); 154void g_sched_put_class(struct g_geom *gp, void *priv); 155 156static inline struct g_sched_class * 157g_sched_priv2class(void *priv) 158{ 159 160 return ((struct g_sched_class *)((u_long)priv - 161 offsetof(struct g_sched_class, gsc_priv))); 162} 163 164static inline void 165g_sched_priv_ref(void *priv) 166{ 167 struct g_sched_class *gsc; 168 169 gsc = g_sched_priv2class(priv); 170 gsc->gsc_refs++; 171} 172 173/* 174 * Locking interface. When each operation registered with the 175 * scheduler is invoked, a per-instance lock is taken to protect 176 * the data associated with it. If the scheduler needs something 177 * else to access the same data (e.g., a callout) it must use 178 * these functions. 179 */ 180void g_sched_lock(struct g_geom *gp); 181void g_sched_unlock(struct g_geom *gp); 182 183/* 184 * Restart request dispatching. Must be called with the per-instance 185 * mutex held. 186 */ 187void g_sched_dispatch(struct g_geom *geom); 188 189/* 190 * Simple gathering of statistical data, used by schedulers to collect 191 * info on process history. Just keep an exponential average of the 192 * samples, with some extra bits of precision. 193 */ 194struct g_savg { 195 uint64_t gs_avg; 196 unsigned int gs_smpl; 197}; 198 199static inline void 200g_savg_add_sample(struct g_savg *ss, uint64_t sample) 201{ 202 203 /* EMA with alpha = 0.125, fixed point, 3 bits of precision. */ 204 ss->gs_avg = sample + ss->gs_avg - (ss->gs_avg >> 3); 205 ss->gs_smpl = 1 + ss->gs_smpl - (ss->gs_smpl >> 3); 206} 207 208static inline int 209g_savg_valid(struct g_savg *ss) 210{ 211 212 /* We want at least 8 samples to deem an average as valid. */ 213 return (ss->gs_smpl > 7); 214} 215 216static inline uint64_t 217g_savg_read(struct g_savg *ss) 218{ 219 220 return (ss->gs_avg / ss->gs_smpl); 221} 222 223/* 224 * Declaration of a scheduler module. 225 */ 226int g_gsched_modevent(module_t mod, int cmd, void *arg); 227 228#define DECLARE_GSCHED_MODULE(name, gsched) \ 229 static moduledata_t name##_mod = { \ 230 #name, \ 231 g_gsched_modevent, \ 232 gsched, \ 233 }; \ 234 DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); \ 235 MODULE_DEPEND(name, geom_sched, 0, 0, 0); 236 237#endif /* _KERNEL */ 238 239#endif /* _G_GSCHED_H_ */ 240