geom_vinum.c revision 131107
1/* 2 * Copyright (c) 2004 Lukas Ertl 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 */ 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD: head/sys/geom/vinum/geom_vinum.c 131107 2004-06-25 18:04:33Z le $"); 30 31#include <sys/param.h> 32#include <sys/bio.h> 33#include <sys/kernel.h> 34#include <sys/lock.h> 35#include <sys/malloc.h> 36#include <sys/module.h> 37#include <sys/mutex.h> 38#include <sys/systm.h> 39 40#include <geom/geom.h> 41#include <geom/vinum/geom_vinum_var.h> 42#include <geom/vinum/geom_vinum.h> 43#include <geom/vinum/geom_vinum_share.h> 44 45#if 0 46SYSCTL_DECL(_kern_geom); 47SYSCTL_NODE(_kern_geom, OID_AUTO, vinum, CTLFLAG_RW, 0, "GEOM_VINUM stuff"); 48SYSCTL_UINT(_kern_geom_vinum, OID_AUTO, debug, CTLFLAG_RW, &gv_debug, 0, 49 "Debug level"); 50#endif 51 52int gv_create(struct g_geom *, struct gctl_req *); 53void config_new_drive(struct gv_drive *); 54 55static void 56gv_orphan(struct g_consumer *cp) 57{ 58 struct g_geom *gp; 59 struct gv_softc *sc; 60 int error; 61 62 g_topology_assert(); 63 64 KASSERT(cp != NULL, ("gv_orphan: null cp")); 65 gp = cp->geom; 66 KASSERT(gp != NULL, ("gv_orphan: null gp")); 67 sc = gp->softc; 68 69 g_trace(G_T_TOPOLOGY, "gv_orphan(%s)", gp->name); 70 71 if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0) 72 g_access(cp, -cp->acr, -cp->acw, -cp->ace); 73 error = cp->provider->error; 74 if (error == 0) 75 error = ENXIO; 76 g_detach(cp); 77 g_destroy_consumer(cp); 78 if (!LIST_EMPTY(&gp->consumer)) 79 return; 80 g_free(sc); 81 g_wither_geom(gp, error); 82} 83 84static void 85gv_start(struct bio *bp) 86{ 87 struct bio *bp2; 88 struct g_geom *gp; 89 90 gp = bp->bio_to->geom; 91 switch(bp->bio_cmd) { 92 case BIO_READ: 93 case BIO_WRITE: 94 case BIO_DELETE: 95 bp2 = g_clone_bio(bp); 96 bp2->bio_done = g_std_done; 97 g_io_request(bp2, LIST_FIRST(&gp->consumer)); 98 return; 99 default: 100 g_io_deliver(bp, EOPNOTSUPP); 101 return; 102 } 103} 104 105static int 106gv_access(struct g_provider *pp, int dr, int dw, int de) 107{ 108 struct g_geom *gp; 109 struct g_consumer *cp; 110 int error; 111 112 gp = pp->geom; 113 error = ENXIO; 114 cp = LIST_FIRST(&gp->consumer); 115 error = g_access(cp, dr, dw, de); 116 return (error); 117} 118 119static struct g_geom * 120gv_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) 121{ 122 struct g_geom *gp; 123 struct g_consumer *cp; 124 struct gv_softc *sc; 125 struct gv_hdr *vhdr; 126 int error, first; 127 char *buf; 128 129 vhdr = NULL; 130 buf = NULL; 131 first = 0; 132 133 g_trace(G_T_TOPOLOGY, "gv_taste(%s, %s)", mp->name, pp->name); 134 g_topology_assert(); 135 136 if (pp->sectorsize == 0) 137 return (NULL); 138 139 /* Check if we already have a VINUM geom, or create a new one. */ 140 if (LIST_EMPTY(&mp->geom)) { 141 gp = g_new_geomf(mp, "VINUM"); 142 gp->spoiled = gv_orphan; 143 gp->orphan = gv_orphan; 144 gp->access = gv_access; 145 gp->start = gv_start; 146 gp->softc = g_malloc(sizeof(struct gv_softc), 147 M_WAITOK | M_ZERO); 148 sc = gp->softc; 149 sc->geom = gp; 150 LIST_INIT(&sc->drives); 151 LIST_INIT(&sc->subdisks); 152 LIST_INIT(&sc->plexes); 153 LIST_INIT(&sc->volumes); 154 first++; 155 } else { 156 gp = LIST_FIRST(&mp->geom); 157 sc = gp->softc; 158 } 159 160 161 /* We need a temporary consumer to read the config from. */ 162 cp = g_new_consumer(gp); 163 error = g_attach(cp, pp); 164 if (error) { 165 g_destroy_consumer(cp); 166 if (first) { 167 g_free(sc); 168 g_destroy_geom(gp); 169 } 170 return (NULL); 171 } 172 error = g_access(cp, 1, 0, 0); 173 if (error) { 174 g_detach(cp); 175 g_destroy_consumer(cp); 176 if (first) { 177 g_free(gp->softc); 178 g_destroy_geom(gp); 179 } 180 return (NULL); 181 } 182 183 g_topology_unlock(); 184 185 /* Check if the provided slice is a valid vinum drive. */ 186 vhdr = g_read_data(cp, GV_HDR_OFFSET, GV_HDR_LEN, &error); 187 if (vhdr == NULL || error != 0) { 188 g_topology_lock(); 189 g_access(cp, -1, 0, 0); 190 g_detach(cp); 191 g_destroy_consumer(cp); 192 if (first) { 193 g_free(sc); 194 g_destroy_geom(gp); 195 } 196 return (NULL); 197 } 198 199 /* This provider has no vinum magic on board. */ 200 if (vhdr->magic != GV_MAGIC) { 201 /* Release the temporary consumer, we don't need it anymore. */ 202 g_topology_lock(); 203 g_access(cp, -1, 0, 0); 204 g_detach(cp); 205 g_destroy_consumer(cp); 206 207 g_free(vhdr); 208 209 /* 210 * If there is no other VINUM geom yet just take this one; the 211 * configuration is still empty, but it can be filled by other 212 * valid vinum drives later. 213 */ 214 if (first) 215 return (gp); 216 else 217 return (NULL); 218 219 /* 220 * We have found a valid vinum drive, now read the on-disk 221 * configuration. 222 */ 223 } else { 224 g_free(vhdr); 225 226 buf = g_read_data(cp, GV_CFG_OFFSET, GV_CFG_LEN, 227 &error); 228 if (buf == NULL || error != 0) { 229 g_topology_lock(); 230 g_access(cp, -1, 0, 0); 231 g_detach(cp); 232 g_destroy_consumer(cp); 233 if (first) { 234 g_free(sc); 235 g_destroy_geom(gp); 236 } 237 return (NULL); 238 } 239 240 /* Release the temporary consumer, we don't need it anymore. */ 241 g_topology_lock(); 242 g_access(cp, -1, 0, 0); 243 g_detach(cp); 244 g_destroy_consumer(cp); 245 246 /* We are the first VINUM geom. */ 247 if (first) { 248 gv_parse_config(sc, buf, 0); 249 g_free(buf); 250 return (gp); 251 252 /* Just merge the configs. */ 253 } else { 254 gv_parse_config(sc, buf, 1); 255 g_free(buf); 256 return (NULL); 257 } 258 } 259} 260 261/* XXX this really belongs somewhere else */ 262void 263config_new_drive(struct gv_drive *d) 264{ 265 struct gv_hdr *vhdr; 266 struct gv_freelist *fl; 267 268 KASSERT(d != NULL, ("config_new_drive: NULL d")); 269 270 vhdr = g_malloc(sizeof(*vhdr), M_WAITOK | M_ZERO); 271 vhdr->magic = GV_MAGIC; 272 vhdr->config_length = GV_CFG_LEN; 273 274 bcopy(hostname, vhdr->label.sysname, GV_HOSTNAME_LEN); 275 strncpy(vhdr->label.name, d->name, GV_MAXDRIVENAME); 276 microtime(&vhdr->label.date_of_birth); 277 278 d->hdr = vhdr; 279 280 LIST_INIT(&d->subdisks); 281 LIST_INIT(&d->freelist); 282 283 fl = g_malloc(sizeof(struct gv_freelist), M_WAITOK | M_ZERO); 284 fl->offset = GV_DATA_START; 285 fl->size = d->avail; 286 LIST_INSERT_HEAD(&d->freelist, fl, freelist); 287 d->freelist_entries = 1; 288 289} 290 291/* Handle userland requests for creating new objects. */ 292int 293gv_create(struct g_geom *gp, struct gctl_req *req) 294{ 295 struct gv_softc *sc; 296 struct gv_drive *d, *d2; 297 struct gv_plex *p, *p2; 298 struct gv_sd *s, *s2; 299 struct gv_volume *v, *v2; 300 struct g_consumer *cp; 301 struct g_provider *pp; 302 int error, i, *drives, *plexes, *subdisks, *volumes; 303 char buf[20], errstr[ERRBUFSIZ]; 304 305 g_topology_assert(); 306 307 sc = gp->softc; 308 309 /* Find out how many of each object have been passed in. */ 310 volumes = gctl_get_paraml(req, "volumes", sizeof(*volumes)); 311 plexes = gctl_get_paraml(req, "plexes", sizeof(*plexes)); 312 subdisks = gctl_get_paraml(req, "subdisks", sizeof(*subdisks)); 313 drives = gctl_get_paraml(req, "drives", sizeof(*drives)); 314 315 /* First, handle drive definitions ... */ 316 for (i = 0; i < *drives; i++) { 317 snprintf(buf, sizeof(buf), "drive%d", i); 318 d2 = gctl_get_paraml(req, buf, sizeof(*d2)); 319 d = g_malloc(sizeof(*d), M_WAITOK | M_ZERO); 320 bcopy(d2, d, sizeof(*d)); 321 322 /* 323 * Make sure that the provider specified in the drive 324 * specification is an active GEOM provider. 325 */ 326 pp = g_provider_by_name(d->device); 327 if (pp == NULL) { 328 gctl_error(req, "%s: drive not found", d->device); 329 g_free(d); 330 return (-1); 331 } 332 d->size = pp->mediasize - GV_DATA_START; 333 d->avail = d->size; 334 335 config_new_drive(d); 336 337 LIST_INSERT_HEAD(&sc->drives, d, drive); 338 } 339 340 /* ... then volume definitions ... */ 341 for (i = 0; i < *volumes; i++) { 342 error = 0; 343 snprintf(buf, sizeof(buf), "volume%d", i); 344 v2 = gctl_get_paraml(req, buf, sizeof(*v2)); 345 346 v = gv_find_vol(sc, v2->name); 347 if (v != NULL) { 348 gctl_error(req, "volume '%s' is already known", 349 v->name); 350 return (-1); 351 } 352 353 v = g_malloc(sizeof(*v), M_WAITOK | M_ZERO); 354 bcopy(v2, v, sizeof(*v)); 355 356 v->vinumconf = sc; 357 LIST_INIT(&v->plexes); 358 LIST_INSERT_HEAD(&sc->volumes, v, volume); 359 } 360 361 /* ... then plex definitions ... */ 362 for (i = 0; i < *plexes; i++) { 363 error = 0; 364 snprintf(buf, sizeof(buf), "plex%d", i); 365 p2 = gctl_get_paraml(req, buf, sizeof(*p2)); 366 367 p = gv_find_plex(sc, p2->name); 368 if (p != NULL) { 369 gctl_error(req, "plex '%s' is already known", p->name); 370 return (-1); 371 } 372 373 p = g_malloc(sizeof(*p), M_WAITOK | M_ZERO); 374 bcopy(p2, p, sizeof(*p)); 375 376 /* Find the volume this plex should be attached to. */ 377 v = gv_find_vol(sc, p->volume); 378 if (v != NULL) { 379 if (v->plexcount) 380 p->flags |= GV_PLEX_ADDED; 381 p->vol_sc = v; 382 v->plexcount++; 383 LIST_INSERT_HEAD(&v->plexes, p, in_volume); 384 } 385 386 p->vinumconf = sc; 387 p->flags |= GV_PLEX_NEWBORN; 388 LIST_INIT(&p->subdisks); 389 LIST_INSERT_HEAD(&sc->plexes, p, plex); 390 } 391 392 /* ... and finally, subdisk definitions. */ 393 for (i = 0; i < *subdisks; i++) { 394 error = 0; 395 snprintf(buf, sizeof(buf), "sd%d", i); 396 s2 = gctl_get_paraml(req, buf, sizeof(*s2)); 397 398 s = gv_find_sd(sc, s2->name); 399 if (s != NULL) { 400 gctl_error(req, "subdisk '%s' is already known", 401 s->name); 402 return (-1); 403 } 404 405 s = g_malloc(sizeof(*s), M_WAITOK | M_ZERO); 406 bcopy(s2, s, sizeof(*s)); 407 408 /* Find the drive where this subdisk should be put on. */ 409 d = gv_find_drive(sc, s->drive); 410 411 /* drive not found - XXX */ 412 if (d == NULL) { 413 printf("FOO: drive '%s' not found\n", s->drive); 414 g_free(s); 415 continue; 416 } 417 418 /* Find the plex where this subdisk belongs to. */ 419 p = gv_find_plex(sc, s->plex); 420 421 /* plex not found - XXX */ 422 if (p == NULL) { 423 printf("FOO: plex '%s' not found\n", s->plex); 424 g_free(s); 425 continue; 426 } 427 428 /* 429 * First we give the subdisk to the drive, to handle autosized 430 * values ... 431 */ 432 error = gv_sd_to_drive(sc, d, s, errstr, sizeof(errstr)); 433 if (error) { 434 gctl_error(req, errstr); 435 g_free(s); 436 continue; 437 } 438 439 /* 440 * Then, we give the subdisk to the plex; we check if the 441 * given values are correct and maybe adjust them. 442 */ 443 error = gv_sd_to_plex(p, s, 1); 444 if (error) { 445 printf("FOO: couldn't give sd '%s' to plex '%s'\n", 446 s->name, p->name); 447 } 448 s->flags |= GV_SD_NEWBORN; 449 450 s->vinumconf = sc; 451 LIST_INSERT_HEAD(&sc->subdisks, s, sd); 452 } 453 454 LIST_FOREACH(s, &sc->subdisks, sd) 455 gv_update_sd_state(s); 456 LIST_FOREACH(p, &sc->plexes, plex) 457 gv_update_plex_config(p); 458 LIST_FOREACH(v, &sc->volumes, volume) 459 gv_update_vol_state(v); 460 461 /* 462 * Write out the configuration to each drive. If the drive doesn't 463 * have a valid geom_slice geom yet, attach it temporarily to our VINUM 464 * geom. 465 */ 466 LIST_FOREACH(d, &sc->drives, drive) { 467 if (d->geom == NULL) { 468 /* 469 * XXX if the provider disapears before we get a chance 470 * to write the config out to the drive, should this 471 * be handled any differently? 472 */ 473 pp = g_provider_by_name(d->device); 474 if (pp == NULL) { 475 printf("geom_vinum: %s: drive disapeared?\n", 476 d->device); 477 continue; 478 } 479 cp = g_new_consumer(gp); 480 g_attach(cp, pp); 481 gv_save_config(cp, d, sc); 482 g_detach(cp); 483 g_destroy_consumer(cp); 484 } else 485 gv_save_config(NULL, d, sc); 486 } 487 488 return (0); 489} 490 491static void 492gv_config(struct gctl_req *req, struct g_class *mp, char const *verb) 493{ 494 struct g_geom *gp; 495 struct gv_softc *sc; 496 struct sbuf *sb; 497 char *comment; 498 499 g_topology_assert(); 500 501 gp = LIST_FIRST(&mp->geom); 502 sc = gp->softc; 503 504 if (!strcmp(verb, "list")) { 505 gv_list(gp, req); 506 507 /* Save our configuration back to disk. */ 508 } else if (!strcmp(verb, "saveconfig")) { 509 510 gv_save_config_all(sc); 511 512 /* Return configuration in string form. */ 513 } else if (!strcmp(verb, "getconfig")) { 514 comment = gctl_get_param(req, "comment", NULL); 515 516 sb = sbuf_new(NULL, NULL, GV_CFG_LEN, SBUF_FIXEDLEN); 517 gv_format_config(sc, sb, 0, comment); 518 sbuf_finish(sb); 519 gctl_set_param(req, "config", sbuf_data(sb), sbuf_len(sb) + 1); 520 sbuf_delete(sb); 521 522 } else if (!strcmp(verb, "create")) { 523 gv_create(gp, req); 524 525 } else if (!strcmp(verb, "remove")) { 526 gv_remove(gp, req); 527 528 } else if (!strcmp(verb, "start")) { 529 gv_start_obj(gp, req); 530 531 } else 532 gctl_error(req, "Unknown verb parameter"); 533} 534 535static int 536gv_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp) 537{ 538 struct g_geom *gp2; 539 struct gv_softc *sc; 540 struct gv_drive *d, *d2; 541 struct gv_plex *p, *p2; 542 struct gv_sd *s, *s2; 543 struct gv_volume *v, *v2; 544 struct gv_freelist *fl, *fl2; 545 546 g_trace(G_T_TOPOLOGY, "gv_destroy_geom: %s", gp->name); 547 g_topology_assert(); 548 549 KASSERT(gp != NULL, ("gv_destroy_geom: null gp")); 550 KASSERT(gp->softc != NULL, ("gv_destroy_geom: null sc")); 551 552 sc = gp->softc; 553 554 /* 555 * Check if any of our drives is still open; if so, refuse destruction. 556 */ 557 LIST_FOREACH(d, &sc->drives, drive) { 558 gp2 = d->geom; 559 if (gv_is_open(gp2)) 560 return (EBUSY); 561 } 562 563 /* Clean up and deallocate what we allocated. */ 564 LIST_FOREACH_SAFE(d, &sc->drives, drive, d2) { 565 LIST_REMOVE(d, drive); 566 g_free(d->hdr); 567 d->hdr = NULL; 568 LIST_FOREACH_SAFE(fl, &d->freelist, freelist, fl2) { 569 d->freelist_entries--; 570 LIST_REMOVE(fl, freelist); 571 g_free(fl); 572 fl = NULL; 573 } 574 d->geom->softc = NULL; 575 g_free(d); 576 } 577 578 LIST_FOREACH_SAFE(s, &sc->subdisks, sd, s2) { 579 LIST_REMOVE(s, sd); 580 s->drive_sc = NULL; 581 s->plex_sc = NULL; 582 s->provider = NULL; 583 s->consumer = NULL; 584 g_free(s); 585 } 586 587 LIST_FOREACH_SAFE(p, &sc->plexes, plex, p2) { 588 LIST_REMOVE(p, plex); 589 gv_kill_thread(p); 590 p->vol_sc = NULL; 591 p->geom->softc = NULL; 592 p->provider = NULL; 593 p->consumer = NULL; 594 if (p->org == GV_PLEX_RAID5) { 595 mtx_destroy(&p->worklist_mtx); 596 } 597 g_free(p); 598 } 599 600 LIST_FOREACH_SAFE(v, &sc->volumes, volume, v2) { 601 LIST_REMOVE(v, volume); 602 v->geom->softc = NULL; 603 g_free(v); 604 } 605 606 gp->softc = NULL; 607 g_free(sc); 608 g_wither_geom(gp, ENXIO); 609 return (0); 610} 611 612#define VINUM_CLASS_NAME "VINUM" 613 614static struct g_class g_vinum_class = { 615 .name = VINUM_CLASS_NAME, 616 .taste = gv_taste, 617 .destroy_geom = gv_destroy_geom, 618 .ctlreq = gv_config, 619}; 620 621DECLARE_GEOM_CLASS(g_vinum_class, g_vinum); 622