1116518Sphk/*- 2116518Sphk * Copyright (c) 2003 Poul-Henning Kamp 3116518Sphk * All rights reserved. 4116518Sphk * 5116518Sphk * Redistribution and use in source and binary forms, with or without 6116518Sphk * modification, are permitted provided that the following conditions 7116518Sphk * are met: 8116518Sphk * 1. Redistributions of source code must retain the above copyright 9116518Sphk * notice, this list of conditions and the following disclaimer. 10116518Sphk * 2. Redistributions in binary form must reproduce the above copyright 11116518Sphk * notice, this list of conditions and the following disclaimer in the 12116518Sphk * documentation and/or other materials provided with the distribution. 13116518Sphk * 3. The names of the authors may not be used to endorse or promote 14116518Sphk * products derived from this software without specific prior written 15116518Sphk * permission. 16116518Sphk * 17116518Sphk * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18116518Sphk * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19116518Sphk * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20116518Sphk * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21116518Sphk * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22116518Sphk * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23116518Sphk * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24116518Sphk * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25116518Sphk * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26116518Sphk * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27116518Sphk * SUCH DAMAGE. 28116518Sphk * 29116518Sphk * $FreeBSD: releng/10.3/sys/geom/geom_fox.c 219029 2011-02-25 10:24:35Z netchild $ 30139778Simp */ 31139778Simp 32139778Simp/* This is a GEOM module for handling path selection for multi-path 33116518Sphk * storage devices. It is named "fox" because it, like they, prefer 34116518Sphk * to have multiple exits to choose from. 35116518Sphk * 36116518Sphk */ 37116518Sphk 38116518Sphk#include <sys/param.h> 39116518Sphk#include <sys/systm.h> 40219029Snetchild#include <sys/sysctl.h> 41116518Sphk#include <sys/kernel.h> 42116518Sphk#include <sys/conf.h> 43116518Sphk#include <sys/bio.h> 44116518Sphk#include <sys/malloc.h> 45116518Sphk#include <sys/lock.h> 46116518Sphk#include <sys/mutex.h> 47116518Sphk#include <sys/libkern.h> 48116518Sphk#include <sys/endian.h> 49116518Sphk#include <sys/md5.h> 50116518Sphk#include <sys/errno.h> 51116518Sphk#include <geom/geom.h> 52116518Sphk 53116518Sphk#define FOX_CLASS_NAME "FOX" 54116518Sphk#define FOX_MAGIC "GEOM::FOX" 55116518Sphk 56219029SnetchildFEATURE(geom_fox, "GEOM FOX redundant path mitigation support"); 57219029Snetchild 58116518Sphkstruct g_fox_softc { 59116518Sphk off_t mediasize; 60116518Sphk u_int sectorsize; 61116518Sphk TAILQ_HEAD(, bio) queue; 62116518Sphk struct mtx lock; 63116518Sphk u_char magic[16]; 64116518Sphk struct g_consumer *path; 65116518Sphk struct g_consumer *opath; 66116518Sphk int waiting; 67116518Sphk int cr, cw, ce; 68116518Sphk}; 69116518Sphk 70116518Sphk/* 71116518Sphk * This function is called whenever we need to select a new path. 72116518Sphk */ 73116518Sphkstatic void 74116518Sphkg_fox_select_path(void *arg, int flag) 75116518Sphk{ 76116518Sphk struct g_geom *gp; 77116518Sphk struct g_fox_softc *sc; 78116518Sphk struct g_consumer *cp1; 79116518Sphk struct bio *bp; 80116518Sphk int error; 81116518Sphk 82116518Sphk g_topology_assert(); 83116518Sphk if (flag == EV_CANCEL) 84116518Sphk return; 85116518Sphk gp = arg; 86116518Sphk sc = gp->softc; 87116518Sphk 88116518Sphk if (sc->opath != NULL) { 89116518Sphk /* 90116518Sphk * First, close the old path entirely. 91116518Sphk */ 92116518Sphk printf("Closing old path (%s) on fox (%s)\n", 93116518Sphk sc->opath->provider->name, gp->name); 94116518Sphk 95116518Sphk cp1 = LIST_NEXT(sc->opath, consumer); 96116518Sphk 97125803Sphk g_access(sc->opath, -sc->cr, -sc->cw, -(sc->ce + 1)); 98116518Sphk 99116518Sphk /* 100116518Sphk * The attempt to reopen it with a exclusive count 101116518Sphk */ 102125755Sphk error = g_access(sc->opath, 0, 0, 1); 103116518Sphk if (error) { 104116518Sphk /* 105116518Sphk * Ok, ditch this consumer, we can't use it. 106116518Sphk */ 107116518Sphk printf("Drop old path (%s) on fox (%s)\n", 108116518Sphk sc->opath->provider->name, gp->name); 109116518Sphk g_detach(sc->opath); 110116518Sphk g_destroy_consumer(sc->opath); 111116518Sphk if (LIST_EMPTY(&gp->consumer)) { 112116518Sphk /* No consumers left */ 113116518Sphk g_wither_geom(gp, ENXIO); 114116518Sphk for (;;) { 115116518Sphk bp = TAILQ_FIRST(&sc->queue); 116116518Sphk if (bp == NULL) 117116518Sphk break; 118116518Sphk TAILQ_REMOVE(&sc->queue, bp, bio_queue); 119116518Sphk bp->bio_error = ENXIO; 120116518Sphk g_std_done(bp); 121116518Sphk } 122116518Sphk return; 123116518Sphk } 124116518Sphk } else { 125116518Sphk printf("Got e-bit on old path (%s) on fox (%s)\n", 126116518Sphk sc->opath->provider->name, gp->name); 127116518Sphk } 128116518Sphk sc->opath = NULL; 129116518Sphk } else { 130116518Sphk cp1 = LIST_FIRST(&gp->consumer); 131116518Sphk } 132116518Sphk if (cp1 == NULL) 133116518Sphk cp1 = LIST_FIRST(&gp->consumer); 134116518Sphk printf("Open new path (%s) on fox (%s)\n", 135116518Sphk cp1->provider->name, gp->name); 136125755Sphk error = g_access(cp1, sc->cr, sc->cw, sc->ce); 137116518Sphk if (error) { 138116518Sphk /* 139116518Sphk * If we failed, we take another trip through here 140116518Sphk */ 141116518Sphk printf("Open new path (%s) on fox (%s) failed, reselect.\n", 142116518Sphk cp1->provider->name, gp->name); 143116518Sphk sc->opath = cp1; 144116518Sphk g_post_event(g_fox_select_path, gp, M_WAITOK, gp, NULL); 145116518Sphk } else { 146116518Sphk printf("Open new path (%s) on fox (%s) succeeded\n", 147116518Sphk cp1->provider->name, gp->name); 148116518Sphk mtx_lock(&sc->lock); 149116518Sphk sc->path = cp1; 150116518Sphk sc->waiting = 0; 151116518Sphk for (;;) { 152116518Sphk bp = TAILQ_FIRST(&sc->queue); 153116518Sphk if (bp == NULL) 154116518Sphk break; 155116518Sphk TAILQ_REMOVE(&sc->queue, bp, bio_queue); 156116518Sphk g_io_request(bp, sc->path); 157116518Sphk } 158116518Sphk mtx_unlock(&sc->lock); 159116518Sphk } 160116518Sphk} 161116518Sphk 162116518Sphkstatic void 163116518Sphkg_fox_orphan(struct g_consumer *cp) 164116518Sphk{ 165116518Sphk struct g_geom *gp; 166116518Sphk struct g_fox_softc *sc; 167116518Sphk int error, mark; 168116518Sphk 169116518Sphk g_topology_assert(); 170116518Sphk gp = cp->geom; 171116518Sphk sc = gp->softc; 172116518Sphk printf("Removing path (%s) from fox (%s)\n", 173116518Sphk cp->provider->name, gp->name); 174116518Sphk mtx_lock(&sc->lock); 175116518Sphk if (cp == sc->path) { 176116518Sphk sc->opath = NULL; 177116518Sphk sc->path = NULL; 178116518Sphk sc->waiting = 1; 179116518Sphk mark = 1; 180116518Sphk } else { 181116518Sphk mark = 0; 182116518Sphk } 183116518Sphk mtx_unlock(&sc->lock); 184116518Sphk 185125755Sphk g_access(cp, -cp->acr, -cp->acw, -cp->ace); 186116518Sphk error = cp->provider->error; 187116518Sphk g_detach(cp); 188116518Sphk g_destroy_consumer(cp); 189116518Sphk if (!LIST_EMPTY(&gp->consumer)) { 190116518Sphk if (mark) 191116518Sphk g_post_event(g_fox_select_path, gp, M_WAITOK, gp, NULL); 192116518Sphk return; 193116518Sphk } 194116518Sphk 195116518Sphk mtx_destroy(&sc->lock); 196121366Sphk g_free(gp->softc); 197116518Sphk gp->softc = NULL; 198116518Sphk g_wither_geom(gp, ENXIO); 199116518Sphk} 200116518Sphk 201116518Sphkstatic void 202116518Sphkg_fox_done(struct bio *bp) 203116518Sphk{ 204116518Sphk struct g_geom *gp; 205116518Sphk struct g_fox_softc *sc; 206116518Sphk int error; 207116518Sphk 208116518Sphk if (bp->bio_error == 0) { 209116518Sphk g_std_done(bp); 210116518Sphk return; 211116518Sphk } 212116518Sphk gp = bp->bio_from->geom; 213116518Sphk sc = gp->softc; 214116518Sphk if (bp->bio_from != sc->path) { 215116518Sphk g_io_request(bp, sc->path); 216116518Sphk return; 217116518Sphk } 218116518Sphk mtx_lock(&sc->lock); 219116518Sphk sc->opath = sc->path; 220116518Sphk sc->path = NULL; 221116518Sphk error = g_post_event(g_fox_select_path, gp, M_NOWAIT, gp, NULL); 222116518Sphk if (error) { 223116518Sphk bp->bio_error = ENOMEM; 224116518Sphk g_std_done(bp); 225116518Sphk } else { 226116518Sphk sc->waiting = 1; 227116518Sphk TAILQ_INSERT_TAIL(&sc->queue, bp, bio_queue); 228116518Sphk } 229116518Sphk mtx_unlock(&sc->lock); 230116518Sphk} 231116518Sphk 232116518Sphkstatic void 233116518Sphkg_fox_start(struct bio *bp) 234116518Sphk{ 235116518Sphk struct g_geom *gp; 236116518Sphk struct bio *bp2; 237116518Sphk struct g_fox_softc *sc; 238116518Sphk int error; 239116518Sphk 240116518Sphk gp = bp->bio_to->geom; 241116518Sphk sc = gp->softc; 242116518Sphk if (sc == NULL) { 243116518Sphk g_io_deliver(bp, ENXIO); 244116518Sphk return; 245116518Sphk } 246116518Sphk switch(bp->bio_cmd) { 247116518Sphk case BIO_READ: 248116518Sphk case BIO_WRITE: 249116518Sphk case BIO_DELETE: 250116518Sphk bp2 = g_clone_bio(bp); 251116518Sphk if (bp2 == NULL) { 252116518Sphk g_io_deliver(bp, ENOMEM); 253116518Sphk break; 254116518Sphk } 255116518Sphk bp2->bio_offset += sc->sectorsize; 256116518Sphk bp2->bio_done = g_fox_done; 257116518Sphk mtx_lock(&sc->lock); 258116518Sphk if (sc->path == NULL || !TAILQ_EMPTY(&sc->queue)) { 259116518Sphk if (sc->waiting == 0) { 260116518Sphk error = g_post_event(g_fox_select_path, gp, 261116518Sphk M_NOWAIT, gp, NULL); 262116518Sphk if (error) { 263116518Sphk g_destroy_bio(bp2); 264116518Sphk bp2 = NULL; 265116518Sphk g_io_deliver(bp, error); 266116518Sphk } else { 267116518Sphk sc->waiting = 1; 268116518Sphk } 269116518Sphk } 270116518Sphk if (bp2 != NULL) 271116518Sphk TAILQ_INSERT_TAIL(&sc->queue, bp2, 272116518Sphk bio_queue); 273116518Sphk } else { 274116518Sphk g_io_request(bp2, sc->path); 275116518Sphk } 276116518Sphk mtx_unlock(&sc->lock); 277116518Sphk break; 278116518Sphk default: 279116518Sphk g_io_deliver(bp, EOPNOTSUPP); 280116518Sphk break; 281116518Sphk } 282116518Sphk return; 283116518Sphk} 284116518Sphk 285116518Sphkstatic int 286116518Sphkg_fox_access(struct g_provider *pp, int dr, int dw, int de) 287116518Sphk{ 288116518Sphk struct g_geom *gp; 289116518Sphk struct g_fox_softc *sc; 290116518Sphk struct g_consumer *cp1; 291116518Sphk int error; 292116518Sphk 293116518Sphk g_topology_assert(); 294116518Sphk gp = pp->geom; 295116518Sphk sc = gp->softc; 296125803Sphk if (sc == NULL) { 297125803Sphk if (dr <= 0 && dw <= 0 && de <= 0) 298125803Sphk return (0); 299125803Sphk else 300125803Sphk return (ENXIO); 301125803Sphk } 302116518Sphk 303116518Sphk if (sc->cr == 0 && sc->cw == 0 && sc->ce == 0) { 304116518Sphk /* 305116518Sphk * First open, open all consumers with an exclusive bit 306116518Sphk */ 307116518Sphk error = 0; 308116518Sphk LIST_FOREACH(cp1, &gp->consumer, consumer) { 309125755Sphk error = g_access(cp1, 0, 0, 1); 310116518Sphk if (error) { 311116518Sphk printf("FOX: access(%s,0,0,1) = %d\n", 312116518Sphk cp1->provider->name, error); 313116518Sphk break; 314116518Sphk } 315116518Sphk } 316116518Sphk if (error) { 317116518Sphk LIST_FOREACH(cp1, &gp->consumer, consumer) { 318116518Sphk if (cp1->ace) 319125755Sphk g_access(cp1, 0, 0, -1); 320116518Sphk } 321116518Sphk return (error); 322116518Sphk } 323116518Sphk } 324116518Sphk if (sc->path == NULL) 325116518Sphk g_fox_select_path(gp, 0); 326116518Sphk if (sc->path == NULL) 327116518Sphk error = ENXIO; 328116518Sphk else 329125755Sphk error = g_access(sc->path, dr, dw, de); 330116518Sphk if (error == 0) { 331116518Sphk sc->cr += dr; 332116518Sphk sc->cw += dw; 333116518Sphk sc->ce += de; 334116518Sphk if (sc->cr == 0 && sc->cw == 0 && sc->ce == 0) { 335116518Sphk /* 336116518Sphk * Last close, remove e-bit on all consumers 337116518Sphk */ 338116518Sphk LIST_FOREACH(cp1, &gp->consumer, consumer) 339125755Sphk g_access(cp1, 0, 0, -1); 340116518Sphk } 341116518Sphk } 342116518Sphk return (error); 343116518Sphk} 344116518Sphk 345116518Sphkstatic struct g_geom * 346116518Sphkg_fox_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) 347116518Sphk{ 348116518Sphk struct g_geom *gp, *gp2; 349116518Sphk struct g_provider *pp2; 350116518Sphk struct g_consumer *cp, *cp2; 351116518Sphk struct g_fox_softc *sc, *sc2; 352116518Sphk int error; 353116518Sphk u_int sectorsize; 354116518Sphk u_char *buf; 355116518Sphk 356116518Sphk g_trace(G_T_TOPOLOGY, "fox_taste(%s, %s)", mp->name, pp->name); 357116518Sphk g_topology_assert(); 358116518Sphk if (!strcmp(pp->geom->class->name, mp->name)) 359116518Sphk return (NULL); 360116518Sphk gp = g_new_geomf(mp, "%s.fox", pp->name); 361116518Sphk gp->softc = g_malloc(sizeof(struct g_fox_softc), M_WAITOK | M_ZERO); 362116518Sphk sc = gp->softc; 363116518Sphk 364116518Sphk cp = g_new_consumer(gp); 365116518Sphk g_attach(cp, pp); 366125755Sphk error = g_access(cp, 1, 0, 0); 367116518Sphk if (error) { 368116518Sphk g_free(sc); 369116518Sphk g_detach(cp); 370116518Sphk g_destroy_consumer(cp); 371116518Sphk g_destroy_geom(gp); 372116518Sphk return(NULL); 373116518Sphk } 374116518Sphk do { 375116518Sphk sectorsize = cp->provider->sectorsize; 376116518Sphk g_topology_unlock(); 377152971Ssobomax buf = g_read_data(cp, 0, sectorsize, NULL); 378116518Sphk g_topology_lock(); 379152967Ssobomax if (buf == NULL) 380116518Sphk break; 381116518Sphk if (memcmp(buf, FOX_MAGIC, strlen(FOX_MAGIC))) 382116518Sphk break; 383116518Sphk 384116518Sphk /* 385116518Sphk * First we need to see if this a new path for an existing fox. 386116518Sphk */ 387116518Sphk LIST_FOREACH(gp2, &mp->geom, geom) { 388116518Sphk sc2 = gp2->softc; 389121475Sphk if (sc2 == NULL) 390116518Sphk continue; 391116518Sphk if (memcmp(buf + 16, sc2->magic, sizeof sc2->magic)) 392116518Sphk continue; 393116518Sphk break; 394116518Sphk } 395116518Sphk if (gp2 != NULL) { 396116518Sphk /* 397116518Sphk * It was. Create a new consumer for that fox, 398116518Sphk * attach it, and if the fox is open, open this 399116518Sphk * path with an exclusive count of one. 400116518Sphk */ 401116518Sphk printf("Adding path (%s) to fox (%s)\n", 402116518Sphk pp->name, gp2->name); 403116518Sphk cp2 = g_new_consumer(gp2); 404116518Sphk g_attach(cp2, pp); 405116518Sphk pp2 = LIST_FIRST(&gp2->provider); 406116518Sphk if (pp2->acr > 0 || pp2->acw > 0 || pp2->ace > 0) { 407125755Sphk error = g_access(cp2, 0, 0, 1); 408116518Sphk if (error) { 409116518Sphk /* 410116518Sphk * This is bad, or more likely, 411116518Sphk * the user is doing something stupid 412116518Sphk */ 413116518Sphk printf( 414116518Sphk "WARNING: New path (%s) to fox(%s) not added: %s\n%s", 415121475Sphk cp2->provider->name, gp2->name, 416116518Sphk "Could not get exclusive bit.", 417116518Sphk "WARNING: This indicates a risk of data inconsistency." 418116518Sphk ); 419116518Sphk g_detach(cp2); 420116518Sphk g_destroy_consumer(cp2); 421116518Sphk } 422116518Sphk } 423116518Sphk break; 424116518Sphk } 425116518Sphk printf("Creating new fox (%s)\n", pp->name); 426116518Sphk sc->path = cp; 427116518Sphk memcpy(sc->magic, buf + 16, sizeof sc->magic); 428116518Sphk pp2 = g_new_providerf(gp, "%s", gp->name); 429116518Sphk pp2->mediasize = sc->mediasize = pp->mediasize - pp->sectorsize; 430116518Sphk pp2->sectorsize = sc->sectorsize = pp->sectorsize; 431116518Sphkprintf("fox %s lock %p\n", gp->name, &sc->lock); 432116518Sphk 433116518Sphk mtx_init(&sc->lock, "fox queue", NULL, MTX_DEF); 434116518Sphk TAILQ_INIT(&sc->queue); 435116518Sphk g_error_provider(pp2, 0); 436116518Sphk } while (0); 437116518Sphk if (buf != NULL) 438116518Sphk g_free(buf); 439125755Sphk g_access(cp, -1, 0, 0); 440116518Sphk 441116518Sphk if (!LIST_EMPTY(&gp->provider)) 442116518Sphk return (gp); 443116518Sphk 444116518Sphk g_free(gp->softc); 445116518Sphk g_detach(cp); 446116518Sphk g_destroy_consumer(cp); 447116518Sphk g_destroy_geom(gp); 448116518Sphk return (NULL); 449116518Sphk} 450116518Sphk 451116518Sphkstatic int 452116518Sphkg_fox_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp) 453116518Sphk{ 454116518Sphk struct g_fox_softc *sc; 455116518Sphk 456116518Sphk g_topology_assert(); 457116518Sphk sc = gp->softc; 458116518Sphk mtx_destroy(&sc->lock); 459116518Sphk g_free(gp->softc); 460125538Sle gp->softc = NULL; 461116518Sphk g_wither_geom(gp, ENXIO); 462116518Sphk return (0); 463116518Sphk} 464116518Sphk 465116518Sphkstatic struct g_class g_fox_class = { 466116518Sphk .name = FOX_CLASS_NAME, 467133318Sphk .version = G_VERSION, 468116518Sphk .taste = g_fox_taste, 469116518Sphk .destroy_geom = g_fox_destroy_geom, 470133314Sphk .start = g_fox_start, 471133314Sphk .spoiled = g_fox_orphan, 472133314Sphk .orphan = g_fox_orphan, 473133314Sphk .access= g_fox_access, 474116518Sphk}; 475116518Sphk 476116518SphkDECLARE_GEOM_CLASS(g_fox_class, g_fox); 477