gs_delay.c revision 292425
1/*- 2 * Copyright (c) 2015 Netflix, Inc. 3 * 4 * Derived from gs_rr.c: 5 * Copyright (c) 2009-2010 Fabio Checconi 6 * Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31/* 32 * $Id$ 33 * $FreeBSD: head/sys/geom/sched/gs_delay.c 292425 2015-12-18 05:39:25Z imp $ 34 * 35 * A simple scheduler that just delays certain transactions by a certain 36 * amount. We collect all the transactions that are 'done' and put them on 37 * a queue. The queue is run through every so often and the transactions that 38 * have taken longer than the threshold delay are completed. 39 */ 40 41#include <sys/param.h> 42#include <sys/systm.h> 43#include <sys/kernel.h> 44#include <sys/bio.h> 45#include <sys/callout.h> 46#include <sys/malloc.h> 47#include <sys/module.h> 48#include <sys/proc.h> 49#include <sys/queue.h> 50#include <sys/sbuf.h> 51#include <sys/sysctl.h> 52#include "gs_scheduler.h" 53 54/* Useful constants */ 55#define BTFRAC_1US 18446744073709ULL /* 2^64 / 1000000 */ 56 57/* list of scheduler instances */ 58LIST_HEAD(g_scheds, g_delay_softc); 59 60/* 61 * Per device descriptor, holding the Round Robin list of queues 62 * accessing the disk, a reference to the geom, and the timer. 63 */ 64struct g_delay_softc { 65 struct g_geom *sc_geom; 66 67 struct bio_queue_head sc_bioq; /* queue of pending requests */ 68 struct callout sc_wait; /* timer for completing with delays */ 69 70 /* Statistics */ 71 int sc_in_flight; /* requests in the driver */ 72}; 73 74/* 75 * parameters, config and stats 76 */ 77struct g_delay_params { 78 uint64_t io; 79 int bypass; /* bypass scheduling */ 80 int units; /* how many instances */ 81 int latency; /* How big a latncy are hoping for */ 82}; 83 84static struct g_delay_params me = { 85 .bypass = 0, 86 .units = 0, 87 .latency = 0, 88 .io = 0, 89}; 90struct g_delay_params *gs_delay_me = &me; 91 92SYSCTL_DECL(_kern_geom_sched); 93static SYSCTL_NODE(_kern_geom_sched, OID_AUTO, delay, CTLFLAG_RW, 0, 94 "GEOM_SCHED DELAY stuff"); 95SYSCTL_INT(_kern_geom_sched_delay, OID_AUTO, bypass, CTLFLAG_RD, 96 &me.bypass, 0, "Scheduler bypass"); 97SYSCTL_INT(_kern_geom_sched_delay, OID_AUTO, units, CTLFLAG_RD, 98 &me.units, 0, "Scheduler instances"); 99SYSCTL_INT(_kern_geom_sched_delay, OID_AUTO, latency, CTLFLAG_RW, 100 &me.latency, 0, "Minimum latency for requests, in microseconds (1/hz resolution)"); 101SYSCTL_QUAD(_kern_geom_sched_delay, OID_AUTO, io, CTLFLAG_RW, 102 &me.io, 0, "I/Os delayed\n"); 103 104static int 105g_delay_init_class(void *data, void *priv) 106{ 107 return (0); 108} 109 110static void 111g_delay_fini_class(void *data, void *priv) 112{ 113} 114 115/* 116 * Called on a request arrival, timeout or completion. 117 * Try to serve a request among those queued. 118 */ 119static struct bio * 120g_delay_next(void *data, int force) 121{ 122 struct g_delay_softc *sc = data; 123 struct bio *bp; 124 struct bintime bt; 125 126 bp = bioq_first(&sc->sc_bioq); 127 if (bp == NULL) 128 return (NULL); 129 130 /* 131 * If the time isn't yet ripe for this bp to be let loose, 132 * then the time isn't ripe for any of its friends either 133 * since we insert in-order. Terminate if the bio hasn't 134 * aged appropriately. Note that there's pathology here 135 * such that we may be up to one tick early in releasing 136 * this I/O. We could implement this up to a tick late too 137 * but choose not to. 138 */ 139 getbinuptime(&bt); /* BIO's bio_t0 is uptime */ 140 if (bintime_cmp(&bp->bio_t0, &bt, >)) 141 return (NULL); 142 me.io++; 143 144 /* 145 * The bp has mellowed enough, let it through and update stats. 146 * If there's others, we'll catch them next time we get called. 147 */ 148 sc->sc_in_flight++; 149 150 bp = bioq_takefirst(&sc->sc_bioq); 151 return (bp); 152} 153 154/* 155 * Called when a real request for disk I/O arrives. 156 * Locate the queue associated with the client. 157 * If the queue is the one we are anticipating for, reset its timeout; 158 * if the queue is not in the round robin list, insert it in the list. 159 * On any error, do not queue the request and return -1, the caller 160 * will take care of this request. 161 */ 162static int 163g_delay_start(void *data, struct bio *bp) 164{ 165 struct g_delay_softc *sc = data; 166 167 if (me.bypass) 168 return (-1); /* bypass the scheduler */ 169 170 bp->bio_caller1 = sc; 171 getbinuptime(&bp->bio_t0); /* BIO's bio_t0 is uptime */ 172 bintime_addx(&bp->bio_t0, BTFRAC_1US * me.latency); 173 174 /* 175 * Keep the I/Os ordered. Lower layers will reorder as we release them down. 176 * We rely on this in g_delay_next() so that we delay all things equally. Even 177 * if we move to multiple queues to push stuff down the stack, we'll want to 178 * insert in order and let the lower layers do whatever reordering they want. 179 */ 180 bioq_insert_tail(&sc->sc_bioq, bp); 181 182 return (0); 183} 184 185static void 186g_delay_timeout(void *data) 187{ 188 struct g_delay_softc *sc = data; 189 190 g_sched_lock(sc->sc_geom); 191 g_sched_dispatch(sc->sc_geom); 192 g_sched_unlock(sc->sc_geom); 193 callout_reset(&sc->sc_wait, 1, g_delay_timeout, sc); 194} 195 196/* 197 * Module glue: allocate descriptor, initialize its fields. 198 */ 199static void * 200g_delay_init(struct g_geom *geom) 201{ 202 struct g_delay_softc *sc; 203 204 /* XXX check whether we can sleep */ 205 sc = malloc(sizeof *sc, M_GEOM_SCHED, M_NOWAIT | M_ZERO); 206 sc->sc_geom = geom; 207 bioq_init(&sc->sc_bioq); 208 callout_init(&sc->sc_wait, CALLOUT_MPSAFE); 209 callout_reset(&sc->sc_wait, 1, g_delay_timeout, sc); 210 me.units++; 211 212 return (sc); 213} 214 215/* 216 * Module glue -- drain the callout structure, destroy the 217 * hash table and its element, and free the descriptor. 218 */ 219static void 220g_delay_fini(void *data) 221{ 222 struct g_delay_softc *sc = data; 223 224 /* We're force drained before getting here */ 225 226 /* Kick out timers */ 227 callout_drain(&sc->sc_wait); 228 me.units--; 229 free(sc, M_GEOM_SCHED); 230} 231 232/* 233 * Called when the request under service terminates. 234 * Start the anticipation timer if needed. 235 */ 236static void 237g_delay_done(void *data, struct bio *bp) 238{ 239 struct g_delay_softc *sc = data; 240 241 sc->sc_in_flight--; 242 243 g_sched_dispatch(sc->sc_geom); 244} 245 246static void 247g_delay_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, 248 struct g_consumer *cp, struct g_provider *pp) 249{ 250} 251 252static struct g_gsched g_delay = { 253 .gs_name = "delay", 254 .gs_priv_size = 0, 255 .gs_init = g_delay_init, 256 .gs_fini = g_delay_fini, 257 .gs_start = g_delay_start, 258 .gs_done = g_delay_done, 259 .gs_next = g_delay_next, 260 .gs_dumpconf = g_delay_dumpconf, 261 .gs_init_class = g_delay_init_class, 262 .gs_fini_class = g_delay_fini_class, 263}; 264 265DECLARE_GSCHED_MODULE(delay, &g_delay); 266