geom_fox.c revision 219029
133965Sjdp/*- 2218822Sdim * Copyright (c) 2003 Poul-Henning Kamp 3218822Sdim * All rights reserved. 433965Sjdp * 533965Sjdp * Redistribution and use in source and binary forms, with or without 633965Sjdp * modification, are permitted provided that the following conditions 733965Sjdp * are met: 833965Sjdp * 1. Redistributions of source code must retain the above copyright 933965Sjdp * notice, this list of conditions and the following disclaimer. 1033965Sjdp * 2. Redistributions in binary form must reproduce the above copyright 1133965Sjdp * notice, this list of conditions and the following disclaimer in the 1233965Sjdp * documentation and/or other materials provided with the distribution. 1333965Sjdp * 3. The names of the authors may not be used to endorse or promote 1433965Sjdp * products derived from this software without specific prior written 1533965Sjdp * permission. 1633965Sjdp * 1733965Sjdp * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1833965Sjdp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19218822Sdim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2033965Sjdp * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21130561Sobrien * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22130561Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23130561Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24130561Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25130561Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26130561Sobrien * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27130561Sobrien * SUCH DAMAGE. 28130561Sobrien * 29130561Sobrien * $FreeBSD: head/sys/geom/geom_fox.c 219029 2011-02-25 10:24:35Z netchild $ 30130561Sobrien */ 31130561Sobrien 32130561Sobrien/* This is a GEOM module for handling path selection for multi-path 33130561Sobrien * storage devices. It is named "fox" because it, like they, prefer 34218822Sdim * to have multiple exits to choose from. 35218822Sdim * 36218822Sdim */ 37218822Sdim 38218822Sdim#include <sys/param.h> 39130561Sobrien#include <sys/systm.h> 40130561Sobrien#include <sys/sysctl.h> 4133965Sjdp#include <sys/kernel.h> 42130561Sobrien#include <sys/conf.h> 43130561Sobrien#include <sys/bio.h> 44130561Sobrien#include <sys/malloc.h> 45130561Sobrien#include <sys/lock.h> 4633965Sjdp#include <sys/mutex.h> 47130561Sobrien#include <sys/libkern.h> 4833965Sjdp#include <sys/endian.h> 49130561Sobrien#include <sys/md5.h> 5033965Sjdp#include <sys/errno.h> 51130561Sobrien#include <geom/geom.h> 52218822Sdim 53218822Sdim#define FOX_CLASS_NAME "FOX" 54218822Sdim#define FOX_MAGIC "GEOM::FOX" 55130561Sobrien 56130561SobrienFEATURE(geom_fox, "GEOM FOX redundant path mitigation support"); 57218822Sdim 58130561Sobrienstruct g_fox_softc { 59218822Sdim off_t mediasize; 60218822Sdim u_int sectorsize; 61218822Sdim TAILQ_HEAD(, bio) queue; 62218822Sdim struct mtx lock; 63218822Sdim u_char magic[16]; 64218822Sdim struct g_consumer *path; 65218822Sdim struct g_consumer *opath; 66130561Sobrien int waiting; 67130561Sobrien int cr, cw, ce; 68218822Sdim}; 69218822Sdim 70130561Sobrien/* 71130561Sobrien * This function is called whenever we need to select a new path. 72130561Sobrien */ 73130561Sobrienstatic void 7433965Sjdpg_fox_select_path(void *arg, int flag) 7533965Sjdp{ 7633965Sjdp struct g_geom *gp; 7733965Sjdp struct g_fox_softc *sc; 7833965Sjdp struct g_consumer *cp1; 7933965Sjdp struct bio *bp; 8033965Sjdp int error; 8133965Sjdp 8277298Sobrien g_topology_assert(); 8377298Sobrien if (flag == EV_CANCEL) 84130561Sobrien return; 85130561Sobrien gp = arg; 8633965Sjdp sc = gp->softc; 8733965Sjdp 8833965Sjdp if (sc->opath != NULL) { 8977298Sobrien /* 9077298Sobrien * First, close the old path entirely. 91130561Sobrien */ 92130561Sobrien printf("Closing old path (%s) on fox (%s)\n", 9333965Sjdp sc->opath->provider->name, gp->name); 9433965Sjdp 9533965Sjdp cp1 = LIST_NEXT(sc->opath, consumer); 9677298Sobrien 9777298Sobrien g_access(sc->opath, -sc->cr, -sc->cw, -(sc->ce + 1)); 98130561Sobrien 99130561Sobrien /* 10033965Sjdp * The attempt to reopen it with a exclusive count 10133965Sjdp */ 10233965Sjdp error = g_access(sc->opath, 0, 0, 1); 10377298Sobrien if (error) { 10477298Sobrien /* 105130561Sobrien * Ok, ditch this consumer, we can't use it. 106130561Sobrien */ 10733965Sjdp printf("Drop old path (%s) on fox (%s)\n", 10833965Sjdp sc->opath->provider->name, gp->name); 10938889Sjdp g_detach(sc->opath); 11038889Sjdp g_destroy_consumer(sc->opath); 11138889Sjdp if (LIST_EMPTY(&gp->consumer)) { 11238889Sjdp /* No consumers left */ 11338889Sjdp g_wither_geom(gp, ENXIO); 11477298Sobrien for (;;) { 11577298Sobrien bp = TAILQ_FIRST(&sc->queue); 116130561Sobrien if (bp == NULL) 117130561Sobrien break; 11838889Sjdp TAILQ_REMOVE(&sc->queue, bp, bio_queue); 11938889Sjdp bp->bio_error = ENXIO; 120218822Sdim g_std_done(bp); 121130561Sobrien } 122218822Sdim return; 123218822Sdim } 124218822Sdim } else { 125218822Sdim printf("Got e-bit on old path (%s) on fox (%s)\n", 126218822Sdim sc->opath->provider->name, gp->name); 127218822Sdim } 128218822Sdim sc->opath = NULL; 129218822Sdim } else { 130218822Sdim cp1 = LIST_FIRST(&gp->consumer); 131218822Sdim } 132218822Sdim if (cp1 == NULL) 133218822Sdim cp1 = LIST_FIRST(&gp->consumer); 134218822Sdim printf("Open new path (%s) on fox (%s)\n", 135218822Sdim cp1->provider->name, gp->name); 136218822Sdim error = g_access(cp1, sc->cr, sc->cw, sc->ce); 137218822Sdim if (error) { 138218822Sdim /* 139218822Sdim * If we failed, we take another trip through here 140218822Sdim */ 141218822Sdim printf("Open new path (%s) on fox (%s) failed, reselect.\n", 142218822Sdim cp1->provider->name, gp->name); 143218822Sdim sc->opath = cp1; 144218822Sdim g_post_event(g_fox_select_path, gp, M_WAITOK, gp, NULL); 145218822Sdim } else { 146218822Sdim printf("Open new path (%s) on fox (%s) succeeded\n", 147130561Sobrien cp1->provider->name, gp->name); 148218822Sdim mtx_lock(&sc->lock); 149130561Sobrien sc->path = cp1; 150130561Sobrien sc->waiting = 0; 151130561Sobrien for (;;) { 152130561Sobrien bp = TAILQ_FIRST(&sc->queue); 153130561Sobrien if (bp == NULL) 154130561Sobrien break; 155130561Sobrien TAILQ_REMOVE(&sc->queue, bp, bio_queue); 156218822Sdim g_io_request(bp, sc->path); 157130561Sobrien } 158130561Sobrien mtx_unlock(&sc->lock); 159130561Sobrien } 160130561Sobrien} 161218822Sdim 162130561Sobrienstatic void 163130561Sobrieng_fox_orphan(struct g_consumer *cp) 164130561Sobrien{ 165130561Sobrien struct g_geom *gp; 166130561Sobrien struct g_fox_softc *sc; 167130561Sobrien int error, mark; 16833965Sjdp 16933965Sjdp g_topology_assert(); 17033965Sjdp gp = cp->geom; 17177298Sobrien sc = gp->softc; 172130561Sobrien printf("Removing path (%s) from fox (%s)\n", 173130561Sobrien cp->provider->name, gp->name); 17433965Sjdp mtx_lock(&sc->lock); 17533965Sjdp if (cp == sc->path) { 17633965Sjdp sc->opath = NULL; 17733965Sjdp sc->path = NULL; 17877298Sobrien sc->waiting = 1; 17977298Sobrien mark = 1; 180130561Sobrien } else { 181130561Sobrien mark = 0; 18233965Sjdp } 18333965Sjdp mtx_unlock(&sc->lock); 18433965Sjdp 18533965Sjdp g_access(cp, -cp->acr, -cp->acw, -cp->ace); 18633965Sjdp error = cp->provider->error; 18777298Sobrien g_detach(cp); 188130561Sobrien g_destroy_consumer(cp); 189130561Sobrien if (!LIST_EMPTY(&gp->consumer)) { 19033965Sjdp if (mark) 19133965Sjdp g_post_event(g_fox_select_path, gp, M_WAITOK, gp, NULL); 19233965Sjdp return; 19389857Sobrien } 19489857Sobrien 195130561Sobrien mtx_destroy(&sc->lock); 196130561Sobrien g_free(gp->softc); 19789857Sobrien gp->softc = NULL; 19889857Sobrien g_wither_geom(gp, ENXIO); 19989857Sobrien} 20033965Sjdp 20133965Sjdpstatic void 20233965Sjdpg_fox_done(struct bio *bp) 20377298Sobrien{ 204130561Sobrien struct g_geom *gp; 205130561Sobrien struct g_fox_softc *sc; 20633965Sjdp int error; 20789857Sobrien 20889857Sobrien if (bp->bio_error == 0) { 20989857Sobrien g_std_done(bp); 21089857Sobrien return; 21189857Sobrien } 212130561Sobrien gp = bp->bio_from->geom; 213130561Sobrien sc = gp->softc; 21489857Sobrien if (bp->bio_from != sc->path) { 21589857Sobrien g_io_request(bp, sc->path); 21689857Sobrien return; 21789857Sobrien } 21889857Sobrien mtx_lock(&sc->lock); 21989857Sobrien sc->opath = sc->path; 220130561Sobrien sc->path = NULL; 221130561Sobrien error = g_post_event(g_fox_select_path, gp, M_NOWAIT, gp, NULL); 22289857Sobrien if (error) { 22389857Sobrien bp->bio_error = ENOMEM; 22489857Sobrien g_std_done(bp); 22589857Sobrien } else { 22689857Sobrien sc->waiting = 1; 227130561Sobrien TAILQ_INSERT_TAIL(&sc->queue, bp, bio_queue); 228130561Sobrien } 22989857Sobrien mtx_unlock(&sc->lock); 23089857Sobrien} 23189857Sobrien 23289857Sobrienstatic void 23389857Sobrieng_fox_start(struct bio *bp) 234130561Sobrien{ 235130561Sobrien struct g_geom *gp; 23689857Sobrien struct bio *bp2; 23789857Sobrien struct g_fox_softc *sc; 23889857Sobrien int error; 23989857Sobrien 24089857Sobrien gp = bp->bio_to->geom; 241130561Sobrien sc = gp->softc; 242130561Sobrien if (sc == NULL) { 24389857Sobrien g_io_deliver(bp, ENXIO); 24489857Sobrien return; 24589857Sobrien } 24689857Sobrien switch(bp->bio_cmd) { 24789857Sobrien case BIO_READ: 248130561Sobrien case BIO_WRITE: 249130561Sobrien case BIO_DELETE: 25089857Sobrien bp2 = g_clone_bio(bp); 25133965Sjdp if (bp2 == NULL) { 252218822Sdim g_io_deliver(bp, ENOMEM); 253218822Sdim break; 254218822Sdim } 255218822Sdim bp2->bio_offset += sc->sectorsize; 256218822Sdim bp2->bio_done = g_fox_done; 257130561Sobrien mtx_lock(&sc->lock); 25833965Sjdp if (sc->path == NULL || !TAILQ_EMPTY(&sc->queue)) { 25933965Sjdp if (sc->waiting == 0) { 260218822Sdim error = g_post_event(g_fox_select_path, gp, 261218822Sdim M_NOWAIT, gp, NULL); 26233965Sjdp if (error) { 263218822Sdim g_destroy_bio(bp2); 26433965Sjdp bp2 = NULL; 265218822Sdim g_io_deliver(bp, error); 266218822Sdim } else { 26733965Sjdp sc->waiting = 1; 268218822Sdim } 269218822Sdim } 270218822Sdim if (bp2 != NULL) 27133965Sjdp TAILQ_INSERT_TAIL(&sc->queue, bp2, 27233965Sjdp bio_queue); 273218822Sdim } else { 27433965Sjdp g_io_request(bp2, sc->path); 275218822Sdim } 27633965Sjdp mtx_unlock(&sc->lock); 277218822Sdim break; 278218822Sdim default: 279218822Sdim g_io_deliver(bp, EOPNOTSUPP); 280218822Sdim break; 28133965Sjdp } 282218822Sdim return; 283218822Sdim} 284218822Sdim 285218822Sdimstatic int 286218822Sdimg_fox_access(struct g_provider *pp, int dr, int dw, int de) 287218822Sdim{ 288218822Sdim struct g_geom *gp; 289218822Sdim struct g_fox_softc *sc; 290218822Sdim struct g_consumer *cp1; 29133965Sjdp int error; 292218822Sdim 293218822Sdim g_topology_assert(); 29433965Sjdp gp = pp->geom; 29533965Sjdp sc = gp->softc; 29633965Sjdp if (sc == NULL) { 29733965Sjdp if (dr <= 0 && dw <= 0 && de <= 0) 29833965Sjdp return (0); 29933965Sjdp else 30033965Sjdp return (ENXIO); 30133965Sjdp } 302218822Sdim 303218822Sdim if (sc->cr == 0 && sc->cw == 0 && sc->ce == 0) { 30433965Sjdp /* 305218822Sdim * First open, open all consumers with an exclusive bit 30633965Sjdp */ 30733965Sjdp error = 0; 30833965Sjdp LIST_FOREACH(cp1, &gp->consumer, consumer) { 30933965Sjdp error = g_access(cp1, 0, 0, 1); 31033965Sjdp if (error) { 31133965Sjdp printf("FOX: access(%s,0,0,1) = %d\n", 31233965Sjdp cp1->provider->name, error); 31333965Sjdp break; 31433965Sjdp } 31533965Sjdp } 316130561Sobrien if (error) { 317130561Sobrien LIST_FOREACH(cp1, &gp->consumer, consumer) { 318130561Sobrien if (cp1->ace) 319130561Sobrien g_access(cp1, 0, 0, -1); 320130561Sobrien } 321130561Sobrien return (error); 322130561Sobrien } 323130561Sobrien } 324130561Sobrien if (sc->path == NULL) 325130561Sobrien g_fox_select_path(gp, 0); 326130561Sobrien if (sc->path == NULL) 327130561Sobrien error = ENXIO; 328130561Sobrien else 329130561Sobrien error = g_access(sc->path, dr, dw, de); 330130561Sobrien if (error == 0) { 331130561Sobrien sc->cr += dr; 332130561Sobrien sc->cw += dw; 333130561Sobrien sc->ce += de; 334130561Sobrien if (sc->cr == 0 && sc->cw == 0 && sc->ce == 0) { 335130561Sobrien /* 336130561Sobrien * Last close, remove e-bit on all consumers 337130561Sobrien */ 338130561Sobrien LIST_FOREACH(cp1, &gp->consumer, consumer) 339130561Sobrien g_access(cp1, 0, 0, -1); 340130561Sobrien } 341130561Sobrien } 342218822Sdim return (error); 343218822Sdim} 344218822Sdim 345218822Sdimstatic struct g_geom * 346218822Sdimg_fox_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) 347218822Sdim{ 348218822Sdim struct g_geom *gp, *gp2; 349130561Sobrien struct g_provider *pp2; 350130561Sobrien struct g_consumer *cp, *cp2; 351130561Sobrien struct g_fox_softc *sc, *sc2; 352130561Sobrien int error; 353130561Sobrien u_int sectorsize; 354130561Sobrien u_char *buf; 355130561Sobrien 356130561Sobrien g_trace(G_T_TOPOLOGY, "fox_taste(%s, %s)", mp->name, pp->name); 357130561Sobrien g_topology_assert(); 358130561Sobrien if (!strcmp(pp->geom->class->name, mp->name)) 359130561Sobrien return (NULL); 360130561Sobrien gp = g_new_geomf(mp, "%s.fox", pp->name); 361130561Sobrien gp->softc = g_malloc(sizeof(struct g_fox_softc), M_WAITOK | M_ZERO); 36233965Sjdp sc = gp->softc; 36333965Sjdp 36433965Sjdp cp = g_new_consumer(gp); 36533965Sjdp g_attach(cp, pp); 36660484Sobrien error = g_access(cp, 1, 0, 0); 36733965Sjdp if (error) { 36833965Sjdp g_free(sc); 36933965Sjdp g_detach(cp); 37033965Sjdp g_destroy_consumer(cp); 37133965Sjdp g_destroy_geom(gp); 37233965Sjdp return(NULL); 37333965Sjdp } 37433965Sjdp do { 37533965Sjdp sectorsize = cp->provider->sectorsize; 37633965Sjdp g_topology_unlock(); 37733965Sjdp buf = g_read_data(cp, 0, sectorsize, NULL); 37833965Sjdp g_topology_lock(); 37960484Sobrien if (buf == NULL) 38060484Sobrien break; 38160484Sobrien if (memcmp(buf, FOX_MAGIC, strlen(FOX_MAGIC))) 38260484Sobrien break; 38360484Sobrien 38460484Sobrien /* 38533965Sjdp * First we need to see if this a new path for an existing fox. 38633965Sjdp */ 38733965Sjdp LIST_FOREACH(gp2, &mp->geom, geom) { 38833965Sjdp sc2 = gp2->softc; 38933965Sjdp if (sc2 == NULL) 39033965Sjdp continue; 39133965Sjdp if (memcmp(buf + 16, sc2->magic, sizeof sc2->magic)) 39233965Sjdp continue; 393130561Sobrien break; 394130561Sobrien } 395218822Sdim if (gp2 != NULL) { 396130561Sobrien /* 397130561Sobrien * It was. Create a new consumer for that fox, 398130561Sobrien * attach it, and if the fox is open, open this 399130561Sobrien * path with an exclusive count of one. 400130561Sobrien */ 401130561Sobrien printf("Adding path (%s) to fox (%s)\n", 402130561Sobrien pp->name, gp2->name); 403130561Sobrien cp2 = g_new_consumer(gp2); 404130561Sobrien g_attach(cp2, pp); 40533965Sjdp pp2 = LIST_FIRST(&gp2->provider); 40633965Sjdp if (pp2->acr > 0 || pp2->acw > 0 || pp2->ace > 0) { 40733965Sjdp error = g_access(cp2, 0, 0, 1); 40833965Sjdp if (error) { 40933965Sjdp /* 41033965Sjdp * This is bad, or more likely, 41133965Sjdp * the user is doing something stupid 41233965Sjdp */ 41333965Sjdp printf( 41433965Sjdp "WARNING: New path (%s) to fox(%s) not added: %s\n%s", 415218822Sdim cp2->provider->name, gp2->name, 416218822Sdim "Could not get exclusive bit.", 417218822Sdim "WARNING: This indicates a risk of data inconsistency." 418218822Sdim ); 419218822Sdim g_detach(cp2); 42033965Sjdp g_destroy_consumer(cp2); 421130561Sobrien } 42233965Sjdp } 42333965Sjdp break; 424218822Sdim } 425218822Sdim printf("Creating new fox (%s)\n", pp->name); 426218822Sdim sc->path = cp; 42733965Sjdp memcpy(sc->magic, buf + 16, sizeof sc->magic); 42833965Sjdp pp2 = g_new_providerf(gp, "%s", gp->name); 429218822Sdim pp2->mediasize = sc->mediasize = pp->mediasize - pp->sectorsize; 430218822Sdim pp2->sectorsize = sc->sectorsize = pp->sectorsize; 43133965Sjdpprintf("fox %s lock %p\n", gp->name, &sc->lock); 432218822Sdim 433218822Sdim mtx_init(&sc->lock, "fox queue", NULL, MTX_DEF); 434218822Sdim TAILQ_INIT(&sc->queue); 43533965Sjdp g_error_provider(pp2, 0); 43633965Sjdp } while (0); 437218822Sdim if (buf != NULL) 43833965Sjdp g_free(buf); 439218822Sdim g_access(cp, -1, 0, 0); 44033965Sjdp 441218822Sdim if (!LIST_EMPTY(&gp->provider)) 442218822Sdim return (gp); 443218822Sdim 444218822Sdim g_free(gp->softc); 44533965Sjdp g_detach(cp); 446218822Sdim g_destroy_consumer(cp); 447218822Sdim g_destroy_geom(gp); 448218822Sdim return (NULL); 449218822Sdim} 450218822Sdim 451218822Sdimstatic int 452218822Sdimg_fox_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp) 453218822Sdim{ 454218822Sdim struct g_fox_softc *sc; 45533965Sjdp 456218822Sdim g_topology_assert(); 45733965Sjdp sc = gp->softc; 45833965Sjdp mtx_destroy(&sc->lock); 45933965Sjdp g_free(gp->softc); 46033965Sjdp gp->softc = NULL; 46133965Sjdp g_wither_geom(gp, ENXIO); 46233965Sjdp return (0); 46333965Sjdp} 464218822Sdim 465218822Sdimstatic struct g_class g_fox_class = { 46633965Sjdp .name = FOX_CLASS_NAME, 46733965Sjdp .version = G_VERSION, 46833965Sjdp .taste = g_fox_taste, 46933965Sjdp .destroy_geom = g_fox_destroy_geom, 47033965Sjdp .start = g_fox_start, 47133965Sjdp .spoiled = g_fox_orphan, 472218822Sdim .orphan = g_fox_orphan, 47333965Sjdp .access= g_fox_access, 474130561Sobrien}; 47533965Sjdp 476130561SobrienDECLARE_GEOM_CLASS(g_fox_class, g_fox); 477130561Sobrien