1130389Sle/*- 2190507Slulf * Copyright (c) 2004, 2007 Lukas Ertl 3130389Sle * All rights reserved. 4130389Sle * 5130389Sle * Redistribution and use in source and binary forms, with or without 6130389Sle * modification, are permitted provided that the following conditions 7130389Sle * are met: 8130389Sle * 1. Redistributions of source code must retain the above copyright 9130389Sle * notice, this list of conditions and the following disclaimer. 10130389Sle * 2. Redistributions in binary form must reproduce the above copyright 11130389Sle * notice, this list of conditions and the following disclaimer in the 12130389Sle * documentation and/or other materials provided with the distribution. 13130389Sle * 14130389Sle * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15130389Sle * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16130389Sle * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17130389Sle * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18130389Sle * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19130389Sle * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20130389Sle * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21130389Sle * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22130389Sle * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23130389Sle * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24130389Sle * SUCH DAMAGE. 25130389Sle */ 26130389Sle 27130389Sle#include <sys/cdefs.h> 28130389Sle__FBSDID("$FreeBSD$"); 29130389Sle 30130389Sle#include <sys/libkern.h> 31130389Sle#include <sys/malloc.h> 32130389Sle 33130389Sle#include <geom/geom.h> 34130389Sle#include <geom/vinum/geom_vinum_var.h> 35130389Sle#include <geom/vinum/geom_vinum.h> 36130389Sle#include <geom/vinum/geom_vinum_share.h> 37130389Sle 38138112Slevoid 39138112Slegv_setstate(struct g_geom *gp, struct gctl_req *req) 40138112Sle{ 41138112Sle struct gv_softc *sc; 42138112Sle struct gv_sd *s; 43138112Sle struct gv_drive *d; 44190507Slulf struct gv_volume *v; 45190507Slulf struct gv_plex *p; 46138112Sle char *obj, *state; 47190507Slulf int f, *flags, type; 48138112Sle 49138112Sle f = 0; 50138112Sle obj = gctl_get_param(req, "object", NULL); 51138112Sle if (obj == NULL) { 52138112Sle gctl_error(req, "no object given"); 53138112Sle return; 54138112Sle } 55138112Sle 56138112Sle state = gctl_get_param(req, "state", NULL); 57138112Sle if (state == NULL) { 58138112Sle gctl_error(req, "no state given"); 59138112Sle return; 60138112Sle } 61138112Sle 62138112Sle flags = gctl_get_paraml(req, "flags", sizeof(*flags)); 63138112Sle if (flags == NULL) { 64138112Sle gctl_error(req, "no flags given"); 65138112Sle return; 66138112Sle } 67138112Sle 68138112Sle if (*flags & GV_FLAG_F) 69138112Sle f = GV_SETSTATE_FORCE; 70138112Sle 71138112Sle sc = gp->softc; 72138112Sle type = gv_object_type(sc, obj); 73138112Sle switch (type) { 74138112Sle case GV_TYPE_VOL: 75190507Slulf if (gv_volstatei(state) < 0) { 76190507Slulf gctl_error(req, "invalid volume state '%s'", state); 77190507Slulf break; 78190507Slulf } 79190507Slulf v = gv_find_vol(sc, obj); 80190507Slulf gv_post_event(sc, GV_EVENT_SET_VOL_STATE, v, NULL, 81190507Slulf gv_volstatei(state), f); 82190507Slulf break; 83190507Slulf 84138112Sle case GV_TYPE_PLEX: 85190507Slulf if (gv_plexstatei(state) < 0) { 86190507Slulf gctl_error(req, "invalid plex state '%s'", state); 87190507Slulf break; 88190507Slulf } 89190507Slulf p = gv_find_plex(sc, obj); 90190507Slulf gv_post_event(sc, GV_EVENT_SET_PLEX_STATE, p, NULL, 91190507Slulf gv_plexstatei(state), f); 92138112Sle break; 93138112Sle 94138112Sle case GV_TYPE_SD: 95190507Slulf if (gv_sdstatei(state) < 0) { 96138112Sle gctl_error(req, "invalid subdisk state '%s'", state); 97138112Sle break; 98138112Sle } 99138112Sle s = gv_find_sd(sc, obj); 100190507Slulf gv_post_event(sc, GV_EVENT_SET_SD_STATE, s, NULL, 101190507Slulf gv_sdstatei(state), f); 102138112Sle break; 103138112Sle 104138112Sle case GV_TYPE_DRIVE: 105190507Slulf if (gv_drivestatei(state) < 0) { 106138112Sle gctl_error(req, "invalid drive state '%s'", state); 107138112Sle break; 108138112Sle } 109138112Sle d = gv_find_drive(sc, obj); 110190507Slulf gv_post_event(sc, GV_EVENT_SET_DRIVE_STATE, d, NULL, 111190507Slulf gv_drivestatei(state), f); 112138112Sle break; 113138112Sle 114138112Sle default: 115138112Sle gctl_error(req, "unknown object '%s'", obj); 116138112Sle break; 117138112Sle } 118138112Sle} 119138112Sle 120190507Slulf/* Update drive state; return 0 if the state changes, otherwise error. */ 121130389Sleint 122130389Slegv_set_drive_state(struct gv_drive *d, int newstate, int flags) 123130389Sle{ 124130389Sle struct gv_sd *s; 125130389Sle int oldstate; 126130389Sle 127130389Sle KASSERT(d != NULL, ("gv_set_drive_state: NULL d")); 128130389Sle 129130389Sle oldstate = d->state; 130130389Sle 131130389Sle if (newstate == oldstate) 132138112Sle return (0); 133130389Sle 134130389Sle /* We allow to take down an open drive only with force. */ 135190507Slulf if ((newstate == GV_DRIVE_DOWN) && gv_consumer_is_open(d->consumer) && 136130389Sle (!(flags & GV_SETSTATE_FORCE))) 137190507Slulf return (GV_ERR_ISBUSY); 138130389Sle 139130389Sle d->state = newstate; 140130389Sle 141130389Sle if (d->state != oldstate) { 142130389Sle LIST_FOREACH(s, &d->subdisks, from_drive) 143130389Sle gv_update_sd_state(s); 144130389Sle } 145130389Sle 146135162Sle /* Save the config back to disk. */ 147135162Sle if (flags & GV_SETSTATE_CONFIG) 148190507Slulf gv_save_config(d->vinumconf); 149135162Sle 150138112Sle return (0); 151130389Sle} 152130389Sle 153130389Sleint 154130389Slegv_set_sd_state(struct gv_sd *s, int newstate, int flags) 155130389Sle{ 156130389Sle struct gv_drive *d; 157130389Sle struct gv_plex *p; 158130389Sle int oldstate, status; 159130389Sle 160130389Sle KASSERT(s != NULL, ("gv_set_sd_state: NULL s")); 161130389Sle 162130389Sle oldstate = s->state; 163130389Sle 164130389Sle /* We are optimistic and assume it will work. */ 165130389Sle status = 0; 166130389Sle 167130389Sle if (newstate == oldstate) 168130389Sle return (0); 169130389Sle 170130389Sle switch (newstate) { 171130389Sle case GV_SD_DOWN: 172130389Sle /* 173130389Sle * If we're attached to a plex, we won't go down without use of 174130389Sle * force. 175130389Sle */ 176130389Sle if ((s->plex_sc != NULL) && !(flags & GV_SETSTATE_FORCE)) 177190507Slulf return (GV_ERR_ISATTACHED); 178130389Sle break; 179130389Sle 180190507Slulf case GV_SD_REVIVING: 181190507Slulf case GV_SD_INITIALIZING: 182190507Slulf /* 183190507Slulf * Only do this if we're forced, since it usually is done 184190507Slulf * internally, and then we do use the force flag. 185190507Slulf */ 186190507Slulf if (!flags & GV_SETSTATE_FORCE) 187190507Slulf return (GV_ERR_SETSTATE); 188190507Slulf break; 189190507Slulf 190130389Sle case GV_SD_UP: 191130389Sle /* We can't bring the subdisk up if our drive is dead. */ 192130389Sle d = s->drive_sc; 193130389Sle if ((d == NULL) || (d->state != GV_DRIVE_UP)) 194190507Slulf return (GV_ERR_SETSTATE); 195130389Sle 196130389Sle /* Check from where we want to be brought up. */ 197130389Sle switch (s->state) { 198130389Sle case GV_SD_REVIVING: 199130389Sle case GV_SD_INITIALIZING: 200130389Sle /* 201130389Sle * The subdisk was initializing. We allow it to be 202130389Sle * brought up. 203130389Sle */ 204130389Sle break; 205130389Sle 206130389Sle case GV_SD_DOWN: 207130389Sle /* 208130389Sle * The subdisk is currently down. We allow it to be 209130389Sle * brought up if it is not attached to a plex. 210130389Sle */ 211130389Sle p = s->plex_sc; 212130389Sle if (p == NULL) 213130389Sle break; 214130389Sle 215130389Sle /* 216130389Sle * If this subdisk is attached to a plex, we allow it 217130389Sle * to be brought up if the plex if it's not a RAID5 218130389Sle * plex, otherwise it's made 'stale'. 219130389Sle */ 220130389Sle 221130389Sle if (p->org != GV_PLEX_RAID5) 222130389Sle break; 223190507Slulf else if (s->flags & GV_SD_CANGOUP) { 224190507Slulf s->flags &= ~GV_SD_CANGOUP; 225138112Sle break; 226190507Slulf } else if (flags & GV_SETSTATE_FORCE) 227190507Slulf break; 228130389Sle else 229130389Sle s->state = GV_SD_STALE; 230130389Sle 231190507Slulf status = GV_ERR_SETSTATE; 232130389Sle break; 233130389Sle 234130389Sle case GV_SD_STALE: 235130389Sle /* 236135434Sle * A stale subdisk can be brought up only if it's part 237135434Sle * of a concat or striped plex that's the only one in a 238135434Sle * volume, or if the subdisk isn't attached to a plex. 239135434Sle * Otherwise it needs to be revived or initialized 240135434Sle * first. 241130389Sle */ 242135434Sle p = s->plex_sc; 243138112Sle if (p == NULL || flags & GV_SETSTATE_FORCE) 244135434Sle break; 245135434Sle 246190507Slulf if ((p->org != GV_PLEX_RAID5 && 247190507Slulf p->vol_sc->plexcount == 1) || 248190507Slulf (p->flags & GV_PLEX_SYNCING && 249190507Slulf p->synced > 0 && 250190507Slulf p->org == GV_PLEX_RAID5)) 251135434Sle break; 252135434Sle else 253190507Slulf return (GV_ERR_SETSTATE); 254135434Sle 255130389Sle default: 256190507Slulf return (GV_ERR_INVSTATE); 257130389Sle } 258130389Sle break; 259130389Sle 260130389Sle /* Other state transitions are only possible with force. */ 261130389Sle default: 262130389Sle if (!(flags & GV_SETSTATE_FORCE)) 263190507Slulf return (GV_ERR_SETSTATE); 264130389Sle } 265130389Sle 266130389Sle /* We can change the state and do it. */ 267130389Sle if (status == 0) 268130389Sle s->state = newstate; 269130389Sle 270130389Sle /* Update our plex, if we're attached to one. */ 271130389Sle if (s->plex_sc != NULL) 272130389Sle gv_update_plex_state(s->plex_sc); 273130389Sle 274130389Sle /* Save the config back to disk. */ 275130389Sle if (flags & GV_SETSTATE_CONFIG) 276190507Slulf gv_save_config(s->vinumconf); 277130389Sle 278130389Sle return (status); 279130389Sle} 280130389Sle 281190507Slulfint 282190507Slulfgv_set_plex_state(struct gv_plex *p, int newstate, int flags) 283190507Slulf{ 284190507Slulf struct gv_volume *v; 285190507Slulf int oldstate, plexdown; 286130389Sle 287190507Slulf KASSERT(p != NULL, ("gv_set_plex_state: NULL p")); 288190507Slulf 289190507Slulf oldstate = p->state; 290190507Slulf v = p->vol_sc; 291190507Slulf plexdown = 0; 292190507Slulf 293190507Slulf if (newstate == oldstate) 294190507Slulf return (0); 295190507Slulf 296190507Slulf switch (newstate) { 297190507Slulf case GV_PLEX_UP: 298190507Slulf /* Let update_plex handle if the plex can come up */ 299190507Slulf gv_update_plex_state(p); 300190507Slulf if (p->state != GV_PLEX_UP && !(flags & GV_SETSTATE_FORCE)) 301190507Slulf return (GV_ERR_SETSTATE); 302190507Slulf p->state = newstate; 303190507Slulf break; 304190507Slulf case GV_PLEX_DOWN: 305190507Slulf /* 306190507Slulf * Set state to GV_PLEX_DOWN only if no-one is using the plex, 307190507Slulf * or if the state is forced. 308190507Slulf */ 309190507Slulf if (v != NULL) { 310190507Slulf /* If the only one up, force is needed. */ 311190507Slulf plexdown = gv_plexdown(v); 312190507Slulf if ((v->plexcount == 1 || 313190507Slulf (v->plexcount - plexdown == 1)) && 314190507Slulf ((flags & GV_SETSTATE_FORCE) == 0)) 315190507Slulf return (GV_ERR_SETSTATE); 316190507Slulf } 317190507Slulf p->state = newstate; 318190507Slulf break; 319190507Slulf case GV_PLEX_DEGRADED: 320190507Slulf /* Only used internally, so we have to be forced. */ 321190507Slulf if (flags & GV_SETSTATE_FORCE) 322190507Slulf p->state = newstate; 323190507Slulf break; 324190507Slulf } 325190507Slulf 326190507Slulf /* Update our volume if we have one. */ 327190507Slulf if (v != NULL) 328190507Slulf gv_update_vol_state(v); 329190507Slulf 330190507Slulf /* Save config. */ 331190507Slulf if (flags & GV_SETSTATE_CONFIG) 332190507Slulf gv_save_config(p->vinumconf); 333190507Slulf return (0); 334190507Slulf} 335190507Slulf 336190507Slulfint 337190507Slulfgv_set_vol_state(struct gv_volume *v, int newstate, int flags) 338190507Slulf{ 339190507Slulf int oldstate; 340190507Slulf 341190507Slulf KASSERT(v != NULL, ("gv_set_vol_state: NULL v")); 342190507Slulf 343190507Slulf oldstate = v->state; 344190507Slulf 345190507Slulf if (newstate == oldstate) 346190507Slulf return (0); 347190507Slulf 348190507Slulf switch (newstate) { 349190507Slulf case GV_VOL_UP: 350190507Slulf /* Let update handle if the volume can come up. */ 351190507Slulf gv_update_vol_state(v); 352190507Slulf if (v->state != GV_VOL_UP && !(flags & GV_SETSTATE_FORCE)) 353190507Slulf return (GV_ERR_SETSTATE); 354190507Slulf v->state = newstate; 355190507Slulf break; 356190507Slulf case GV_VOL_DOWN: 357190507Slulf /* 358190507Slulf * Set state to GV_VOL_DOWN only if no-one is using the volume, 359190507Slulf * or if the state should be forced. 360190507Slulf */ 361190507Slulf if (!gv_provider_is_open(v->provider) && 362190507Slulf !(flags & GV_SETSTATE_FORCE)) 363190507Slulf return (GV_ERR_ISBUSY); 364190507Slulf v->state = newstate; 365190507Slulf break; 366190507Slulf } 367190507Slulf /* Save config */ 368190507Slulf if (flags & GV_SETSTATE_CONFIG) 369190507Slulf gv_save_config(v->vinumconf); 370190507Slulf return (0); 371190507Slulf} 372190507Slulf 373130389Sle/* Update the state of a subdisk based on its environment. */ 374130389Slevoid 375130389Slegv_update_sd_state(struct gv_sd *s) 376130389Sle{ 377130389Sle struct gv_drive *d; 378140591Sle int oldstate; 379130389Sle 380130389Sle KASSERT(s != NULL, ("gv_update_sd_state: NULL s")); 381130389Sle d = s->drive_sc; 382130389Sle KASSERT(d != NULL, ("gv_update_sd_state: NULL d")); 383140591Sle 384140591Sle oldstate = s->state; 385130389Sle 386130389Sle /* If our drive isn't up we cannot be up either. */ 387190507Slulf if (d->state != GV_DRIVE_UP) { 388130389Sle s->state = GV_SD_DOWN; 389130389Sle /* If this subdisk was just created, we assume it is good.*/ 390190507Slulf } else if (s->flags & GV_SD_NEWBORN) { 391130389Sle s->state = GV_SD_UP; 392130389Sle s->flags &= ~GV_SD_NEWBORN; 393190507Slulf } else if (s->state != GV_SD_UP) { 394190507Slulf if (s->flags & GV_SD_CANGOUP) { 395190507Slulf s->state = GV_SD_UP; 396190507Slulf s->flags &= ~GV_SD_CANGOUP; 397190507Slulf } else 398190507Slulf s->state = GV_SD_STALE; 399190507Slulf } else 400130389Sle s->state = GV_SD_UP; 401130389Sle 402140591Sle if (s->state != oldstate) 403184292Slulf G_VINUM_DEBUG(1, "subdisk %s state change: %s -> %s", s->name, 404184292Slulf gv_sdstate(oldstate), gv_sdstate(s->state)); 405140591Sle 406130389Sle /* Update the plex, if we have one. */ 407130389Sle if (s->plex_sc != NULL) 408130389Sle gv_update_plex_state(s->plex_sc); 409130389Sle} 410130389Sle 411130389Sle/* Update the state of a plex based on its environment. */ 412130389Slevoid 413130389Slegv_update_plex_state(struct gv_plex *p) 414130389Sle{ 415190507Slulf struct gv_sd *s; 416130389Sle int sdstates; 417140591Sle int oldstate; 418130389Sle 419130389Sle KASSERT(p != NULL, ("gv_update_plex_state: NULL p")); 420130389Sle 421140591Sle oldstate = p->state; 422140591Sle 423130389Sle /* First, check the state of our subdisks. */ 424130389Sle sdstates = gv_sdstatemap(p); 425130389Sle 426130389Sle /* If all subdisks are up, our plex can be up, too. */ 427130389Sle if (sdstates == GV_SD_UPSTATE) 428130389Sle p->state = GV_PLEX_UP; 429130389Sle 430130389Sle /* One or more of our subdisks are down. */ 431130389Sle else if (sdstates & GV_SD_DOWNSTATE) { 432130389Sle /* A RAID5 plex can handle one dead subdisk. */ 433130389Sle if ((p->org == GV_PLEX_RAID5) && (p->sddown == 1)) 434130389Sle p->state = GV_PLEX_DEGRADED; 435130389Sle else 436130389Sle p->state = GV_PLEX_DOWN; 437130389Sle 438130389Sle /* Some of our subdisks are initializing. */ 439130389Sle } else if (sdstates & GV_SD_INITSTATE) { 440190507Slulf 441190507Slulf if (p->flags & GV_PLEX_SYNCING || 442190507Slulf p->flags & GV_PLEX_REBUILDING) 443130389Sle p->state = GV_PLEX_DEGRADED; 444130389Sle else 445130389Sle p->state = GV_PLEX_DOWN; 446130389Sle } else 447130389Sle p->state = GV_PLEX_DOWN; 448130389Sle 449190507Slulf if (p->state == GV_PLEX_UP) { 450190507Slulf LIST_FOREACH(s, &p->subdisks, in_plex) { 451190507Slulf if (s->flags & GV_SD_GROW) { 452190507Slulf p->state = GV_PLEX_GROWABLE; 453190507Slulf break; 454190507Slulf } 455190507Slulf } 456190507Slulf } 457190507Slulf 458140591Sle if (p->state != oldstate) 459184292Slulf G_VINUM_DEBUG(1, "plex %s state change: %s -> %s", p->name, 460140591Sle gv_plexstate(oldstate), gv_plexstate(p->state)); 461140591Sle 462130389Sle /* Update our volume, if we have one. */ 463130389Sle if (p->vol_sc != NULL) 464130389Sle gv_update_vol_state(p->vol_sc); 465130389Sle} 466130389Sle 467130389Sle/* Update the volume state based on its plexes. */ 468130389Slevoid 469130389Slegv_update_vol_state(struct gv_volume *v) 470130389Sle{ 471130389Sle struct gv_plex *p; 472130389Sle 473130389Sle KASSERT(v != NULL, ("gv_update_vol_state: NULL v")); 474134014Sle 475157292Sle /* The volume can't be up without plexes. */ 476157292Sle if (v->plexcount == 0) { 477157292Sle v->state = GV_VOL_DOWN; 478157292Sle return; 479157292Sle } 480157292Sle 481130389Sle LIST_FOREACH(p, &v->plexes, in_volume) { 482130389Sle /* One of our plexes is accessible, and so are we. */ 483130389Sle if (p->state > GV_PLEX_DEGRADED) { 484130389Sle v->state = GV_VOL_UP; 485130389Sle return; 486134014Sle 487134014Sle /* We can handle a RAID5 plex with one dead subdisk as well. */ 488134014Sle } else if ((p->org == GV_PLEX_RAID5) && 489134014Sle (p->state == GV_PLEX_DEGRADED)) { 490134014Sle v->state = GV_VOL_UP; 491134014Sle return; 492130389Sle } 493130389Sle } 494130389Sle 495130389Sle /* Not one of our plexes is up, so we can't be either. */ 496130389Sle v->state = GV_VOL_DOWN; 497130389Sle} 498130389Sle 499130389Sle/* Return a state map for the subdisks of a plex. */ 500130389Sleint 501130389Slegv_sdstatemap(struct gv_plex *p) 502130389Sle{ 503130389Sle struct gv_sd *s; 504130389Sle int statemap; 505130389Sle 506130389Sle KASSERT(p != NULL, ("gv_sdstatemap: NULL p")); 507130389Sle 508130389Sle statemap = 0; 509130389Sle p->sddown = 0; /* No subdisks down yet. */ 510130389Sle 511130389Sle LIST_FOREACH(s, &p->subdisks, in_plex) { 512130389Sle switch (s->state) { 513130389Sle case GV_SD_DOWN: 514130389Sle case GV_SD_STALE: 515130389Sle statemap |= GV_SD_DOWNSTATE; 516130389Sle p->sddown++; /* Another unusable subdisk. */ 517130389Sle break; 518130389Sle 519130389Sle case GV_SD_UP: 520130389Sle statemap |= GV_SD_UPSTATE; 521130389Sle break; 522130389Sle 523130389Sle case GV_SD_INITIALIZING: 524130389Sle statemap |= GV_SD_INITSTATE; 525130389Sle break; 526130389Sle 527130389Sle case GV_SD_REVIVING: 528130389Sle statemap |= GV_SD_INITSTATE; 529130389Sle p->sddown++; /* XXX: Another unusable subdisk? */ 530130389Sle break; 531130389Sle } 532130389Sle } 533130389Sle return (statemap); 534130389Sle} 535