geom_vinum_drive.c revision 146325
1/*- 2 * Copyright (c) 2004, 2005 Lukas Ertl 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/sys/geom/vinum/geom_vinum_drive.c 146325 2005-05-17 16:38:30Z le $"); 29 30#include <sys/param.h> 31#include <sys/bio.h> 32#include <sys/errno.h> 33#include <sys/conf.h> 34#include <sys/kernel.h> 35#include <sys/kthread.h> 36#include <sys/libkern.h> 37#include <sys/lock.h> 38#include <sys/malloc.h> 39#include <sys/module.h> 40#include <sys/mutex.h> 41#include <sys/sbuf.h> 42#include <sys/systm.h> 43#include <sys/time.h> 44 45#include <geom/geom.h> 46#include <geom/vinum/geom_vinum_var.h> 47#include <geom/vinum/geom_vinum.h> 48#include <geom/vinum/geom_vinum_share.h> 49 50static void gv_drive_dead(void *, int); 51static void gv_drive_worker(void *); 52void gv_drive_modify(struct gv_drive *); 53 54void 55gv_config_new_drive(struct gv_drive *d) 56{ 57 struct gv_hdr *vhdr; 58 struct gv_freelist *fl; 59 60 KASSERT(d != NULL, ("config_new_drive: NULL d")); 61 62 vhdr = g_malloc(sizeof(*vhdr), M_WAITOK | M_ZERO); 63 vhdr->magic = GV_MAGIC; 64 vhdr->config_length = GV_CFG_LEN; 65 66 bcopy(hostname, vhdr->label.sysname, GV_HOSTNAME_LEN); 67 strncpy(vhdr->label.name, d->name, GV_MAXDRIVENAME); 68 microtime(&vhdr->label.date_of_birth); 69 70 d->hdr = vhdr; 71 72 LIST_INIT(&d->subdisks); 73 LIST_INIT(&d->freelist); 74 75 fl = g_malloc(sizeof(struct gv_freelist), M_WAITOK | M_ZERO); 76 fl->offset = GV_DATA_START; 77 fl->size = d->avail; 78 LIST_INSERT_HEAD(&d->freelist, fl, freelist); 79 d->freelist_entries = 1; 80 81 TAILQ_INIT(&d->bqueue); 82 mtx_init(&d->bqueue_mtx, "gv_drive", NULL, MTX_DEF); 83 kthread_create(gv_drive_worker, d, NULL, 0, 0, "gv_d %s", d->name); 84 d->flags |= GV_DRIVE_THREAD_ACTIVE; 85} 86 87void 88gv_save_config_all(struct gv_softc *sc) 89{ 90 struct gv_drive *d; 91 92 g_topology_assert(); 93 94 LIST_FOREACH(d, &sc->drives, drive) { 95 if (d->geom == NULL) 96 continue; 97 gv_save_config(NULL, d, sc); 98 } 99} 100 101/* Save the vinum configuration back to disk. */ 102void 103gv_save_config(struct g_consumer *cp, struct gv_drive *d, struct gv_softc *sc) 104{ 105 struct g_geom *gp; 106 struct g_consumer *cp2; 107 struct gv_hdr *vhdr, *hdr; 108 struct sbuf *sb; 109 int error; 110 111 g_topology_assert(); 112 113 KASSERT(d != NULL, ("gv_save_config: null d")); 114 KASSERT(sc != NULL, ("gv_save_config: null sc")); 115 116 if (d->state != GV_DRIVE_UP) 117 return; 118 119 if (cp == NULL) { 120 gp = d->geom; 121 KASSERT(gp != NULL, ("gv_save_config: null gp")); 122 cp2 = LIST_FIRST(&gp->consumer); 123 KASSERT(cp2 != NULL, ("gv_save_config: null cp2")); 124 } else 125 cp2 = cp; 126 127 vhdr = g_malloc(GV_HDR_LEN, M_WAITOK | M_ZERO); 128 vhdr->magic = GV_MAGIC; 129 vhdr->config_length = GV_CFG_LEN; 130 131 hdr = d->hdr; 132 if (hdr == NULL) { 133 printf("GEOM_VINUM: drive %s has NULL hdr\n", d->name); 134 g_free(vhdr); 135 return; 136 } 137 microtime(&hdr->label.last_update); 138 bcopy(&hdr->label, &vhdr->label, sizeof(struct gv_label)); 139 140 sb = sbuf_new(NULL, NULL, GV_CFG_LEN, SBUF_FIXEDLEN); 141 gv_format_config(sc, sb, 1, NULL); 142 sbuf_finish(sb); 143 144 error = g_access(cp2, 0, 1, 0); 145 if (error) { 146 printf("GEOM_VINUM: g_access failed on drive %s, errno %d\n", 147 d->name, error); 148 sbuf_delete(sb); 149 g_free(vhdr); 150 return; 151 } 152 g_topology_unlock(); 153 154 do { 155 error = g_write_data(cp2, GV_HDR_OFFSET, vhdr, GV_HDR_LEN); 156 if (error) { 157 printf("GEOM_VINUM: writing vhdr failed on drive %s, " 158 "errno %d", d->name, error); 159 break; 160 } 161 162 error = g_write_data(cp2, GV_CFG_OFFSET, sbuf_data(sb), 163 GV_CFG_LEN); 164 if (error) { 165 printf("GEOM_VINUM: writing first config copy failed " 166 "on drive %s, errno %d", d->name, error); 167 break; 168 } 169 170 error = g_write_data(cp2, GV_CFG_OFFSET + GV_CFG_LEN, 171 sbuf_data(sb), GV_CFG_LEN); 172 if (error) 173 printf("GEOM_VINUM: writing second config copy failed " 174 "on drive %s, errno %d", d->name, error); 175 } while (0); 176 177 g_topology_lock(); 178 g_access(cp2, 0, -1, 0); 179 sbuf_delete(sb); 180 g_free(vhdr); 181 182 if (d->geom != NULL) 183 gv_drive_modify(d); 184} 185 186/* This resembles g_slice_access(). */ 187static int 188gv_drive_access(struct g_provider *pp, int dr, int dw, int de) 189{ 190 struct g_geom *gp; 191 struct g_consumer *cp; 192 struct g_provider *pp2; 193 struct gv_drive *d; 194 struct gv_sd *s, *s2; 195 int error; 196 197 gp = pp->geom; 198 cp = LIST_FIRST(&gp->consumer); 199 if (cp == NULL) 200 return (0); 201 202 d = gp->softc; 203 if (d == NULL) 204 return (0); 205 206 s = pp->private; 207 KASSERT(s != NULL, ("gv_drive_access: NULL s")); 208 209 LIST_FOREACH(s2, &d->subdisks, from_drive) { 210 if (s == s2) 211 continue; 212 if (s->drive_offset + s->size <= s2->drive_offset) 213 continue; 214 if (s2->drive_offset + s2->size <= s->drive_offset) 215 continue; 216 217 /* Overlap. */ 218 pp2 = s2->provider; 219 KASSERT(s2 != NULL, ("gv_drive_access: NULL s2")); 220 if ((pp->acw + dw) > 0 && pp2->ace > 0) 221 return (EPERM); 222 if ((pp->ace + de) > 0 && pp2->acw > 0) 223 return (EPERM); 224 } 225 226 error = g_access(cp, dr, dw, de); 227 return (error); 228} 229 230static void 231gv_drive_done(struct bio *bp) 232{ 233 struct gv_drive *d; 234 struct gv_bioq *bq; 235 236 /* Put the BIO on the worker queue again. */ 237 d = bp->bio_from->geom->softc; 238 bp->bio_cflags |= GV_BIO_DONE; 239 bq = g_malloc(sizeof(*bq), M_NOWAIT | M_ZERO); 240 bq->bp = bp; 241 mtx_lock(&d->bqueue_mtx); 242 TAILQ_INSERT_TAIL(&d->bqueue, bq, queue); 243 wakeup(d); 244 mtx_unlock(&d->bqueue_mtx); 245} 246 247 248static void 249gv_drive_start(struct bio *bp) 250{ 251 struct gv_drive *d; 252 struct gv_sd *s; 253 struct gv_bioq *bq; 254 255 switch (bp->bio_cmd) { 256 case BIO_READ: 257 case BIO_WRITE: 258 case BIO_DELETE: 259 break; 260 case BIO_GETATTR: 261 default: 262 g_io_deliver(bp, EOPNOTSUPP); 263 return; 264 } 265 266 s = bp->bio_to->private; 267 if ((s->state == GV_SD_DOWN) || (s->state == GV_SD_STALE)) { 268 g_io_deliver(bp, ENXIO); 269 return; 270 } 271 272 d = bp->bio_to->geom->softc; 273 274 /* 275 * Put the BIO on the worker queue, where the worker thread will pick 276 * it up. 277 */ 278 bq = g_malloc(sizeof(*bq), M_NOWAIT | M_ZERO); 279 bq->bp = bp; 280 mtx_lock(&d->bqueue_mtx); 281 TAILQ_INSERT_TAIL(&d->bqueue, bq, queue); 282 wakeup(d); 283 mtx_unlock(&d->bqueue_mtx); 284 285} 286 287static void 288gv_drive_worker(void *arg) 289{ 290 struct bio *bp, *cbp; 291 struct g_geom *gp; 292 struct g_provider *pp; 293 struct gv_drive *d; 294 struct gv_sd *s; 295 struct gv_bioq *bq, *bq2; 296 int error; 297 298 d = arg; 299 300 mtx_lock(&d->bqueue_mtx); 301 for (;;) { 302 /* We were signaled to exit. */ 303 if (d->flags & GV_DRIVE_THREAD_DIE) 304 break; 305 306 /* Take the first BIO from out queue. */ 307 bq = TAILQ_FIRST(&d->bqueue); 308 if (bq == NULL) { 309 msleep(d, &d->bqueue_mtx, PRIBIO, "-", hz/10); 310 continue; 311 } 312 TAILQ_REMOVE(&d->bqueue, bq, queue); 313 mtx_unlock(&d->bqueue_mtx); 314 315 bp = bq->bp; 316 g_free(bq); 317 pp = bp->bio_to; 318 gp = pp->geom; 319 320 /* Completed request. */ 321 if (bp->bio_cflags & GV_BIO_DONE) { 322 error = bp->bio_error; 323 324 /* Deliver the original request. */ 325 g_std_done(bp); 326 327 /* The request had an error, we need to clean up. */ 328 if (error != 0) { 329 g_topology_lock(); 330 gv_set_drive_state(d, GV_DRIVE_DOWN, 331 GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG); 332 g_topology_unlock(); 333 g_post_event(gv_drive_dead, d, M_WAITOK, d, 334 NULL); 335 } 336 337 /* New request, needs to be sent downwards. */ 338 } else { 339 s = pp->private; 340 341 if ((s->state == GV_SD_DOWN) || 342 (s->state == GV_SD_STALE)) { 343 g_io_deliver(bp, ENXIO); 344 mtx_lock(&d->bqueue_mtx); 345 continue; 346 } 347 if (bp->bio_offset > s->size) { 348 g_io_deliver(bp, EINVAL); 349 mtx_lock(&d->bqueue_mtx); 350 continue; 351 } 352 353 cbp = g_clone_bio(bp); 354 if (cbp == NULL) { 355 g_io_deliver(bp, ENOMEM); 356 mtx_lock(&d->bqueue_mtx); 357 continue; 358 } 359 if (cbp->bio_offset + cbp->bio_length > s->size) 360 cbp->bio_length = s->size - 361 cbp->bio_offset; 362 cbp->bio_done = gv_drive_done; 363 cbp->bio_offset += s->drive_offset; 364 g_io_request(cbp, LIST_FIRST(&gp->consumer)); 365 } 366 367 mtx_lock(&d->bqueue_mtx); 368 } 369 370 TAILQ_FOREACH_SAFE(bq, &d->bqueue, queue, bq2) { 371 TAILQ_REMOVE(&d->bqueue, bq, queue); 372 mtx_unlock(&d->bqueue_mtx); 373 bp = bq->bp; 374 g_free(bq); 375 if (bp->bio_cflags & GV_BIO_DONE) 376 g_std_done(bp); 377 else 378 g_io_deliver(bp, ENXIO); 379 mtx_lock(&d->bqueue_mtx); 380 } 381 mtx_unlock(&d->bqueue_mtx); 382 d->flags |= GV_DRIVE_THREAD_DEAD; 383 384 kthread_exit(ENXIO); 385} 386 387 388static void 389gv_drive_orphan(struct g_consumer *cp) 390{ 391 struct g_geom *gp; 392 struct gv_drive *d; 393 394 g_topology_assert(); 395 gp = cp->geom; 396 g_trace(G_T_TOPOLOGY, "gv_drive_orphan(%s)", gp->name); 397 d = gp->softc; 398 if (d != NULL) { 399 gv_set_drive_state(d, GV_DRIVE_DOWN, 400 GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG); 401 g_post_event(gv_drive_dead, d, M_WAITOK, d, NULL); 402 } else 403 g_wither_geom(gp, ENXIO); 404} 405 406static struct g_geom * 407gv_drive_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) 408{ 409 struct g_geom *gp, *gp2; 410 struct g_consumer *cp; 411 struct gv_drive *d; 412 struct gv_sd *s; 413 struct gv_softc *sc; 414 struct gv_freelist *fl; 415 struct gv_hdr *vhdr; 416 int error; 417 char *buf, errstr[ERRBUFSIZ]; 418 419 vhdr = NULL; 420 d = NULL; 421 422 g_trace(G_T_TOPOLOGY, "gv_drive_taste(%s, %s)", mp->name, pp->name); 423 g_topology_assert(); 424 425 /* Find the VINUM class and its associated geom. */ 426 gp2 = find_vinum_geom(); 427 if (gp2 == NULL) 428 return (NULL); 429 sc = gp2->softc; 430 431 gp = g_new_geomf(mp, "%s.vinumdrive", pp->name); 432 gp->start = gv_drive_start; 433 gp->orphan = gv_drive_orphan; 434 gp->access = gv_drive_access; 435 gp->start = gv_drive_start; 436 437 cp = g_new_consumer(gp); 438 g_attach(cp, pp); 439 error = g_access(cp, 1, 0, 0); 440 if (error) { 441 g_detach(cp); 442 g_destroy_consumer(cp); 443 g_destroy_geom(gp); 444 return (NULL); 445 } 446 447 g_topology_unlock(); 448 449 /* Now check if the provided slice is a valid vinum drive. */ 450 do { 451 vhdr = g_read_data(cp, GV_HDR_OFFSET, pp->sectorsize, &error); 452 if (vhdr == NULL || error != 0) 453 break; 454 if (vhdr->magic != GV_MAGIC) { 455 g_free(vhdr); 456 break; 457 } 458 459 /* 460 * We have found a valid vinum drive. Let's see if it is 461 * already known in the configuration. There's a chance that 462 * the VINUMDRIVE class tastes before the VINUM class could 463 * taste, so parse the configuration here too, just to be on 464 * the safe side. 465 */ 466 buf = g_read_data(cp, GV_CFG_OFFSET, GV_CFG_LEN, &error); 467 if (buf == NULL || error != 0) { 468 g_free(vhdr); 469 break; 470 } 471 g_topology_lock(); 472 gv_parse_config(sc, buf, 1); 473 g_free(buf); 474 475 d = gv_find_drive(sc, vhdr->label.name); 476 477 /* We already know about this drive. */ 478 if (d != NULL) { 479 /* Check if this drive already has a geom. */ 480 if (d->geom != NULL) { 481 g_topology_unlock(); 482 break; 483 } 484 bcopy(vhdr, d->hdr, sizeof(*vhdr)); 485 486 /* This is a new drive. */ 487 } else { 488 d = g_malloc(sizeof(*d), M_WAITOK | M_ZERO); 489 490 /* Initialize all needed variables. */ 491 d->size = pp->mediasize - GV_DATA_START; 492 d->avail = d->size; 493 d->hdr = vhdr; 494 strncpy(d->name, vhdr->label.name, GV_MAXDRIVENAME); 495 LIST_INIT(&d->subdisks); 496 LIST_INIT(&d->freelist); 497 498 /* We also need a freelist entry. */ 499 fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO); 500 fl->offset = GV_DATA_START; 501 fl->size = d->avail; 502 LIST_INSERT_HEAD(&d->freelist, fl, freelist); 503 d->freelist_entries = 1; 504 505 TAILQ_INIT(&d->bqueue); 506 507 /* Save it into the main configuration. */ 508 LIST_INSERT_HEAD(&sc->drives, d, drive); 509 } 510 511 /* 512 * Create a bio queue mutex and a worker thread, if necessary. 513 */ 514 if (mtx_initialized(&d->bqueue_mtx) == 0) 515 mtx_init(&d->bqueue_mtx, "gv_drive", NULL, MTX_DEF); 516 517 if (!(d->flags & GV_DRIVE_THREAD_ACTIVE)) { 518 kthread_create(gv_drive_worker, d, NULL, 0, 0, 519 "gv_d %s", d->name); 520 d->flags |= GV_DRIVE_THREAD_ACTIVE; 521 } 522 523 g_access(cp, -1, 0, 0); 524 525 gp->softc = d; 526 d->geom = gp; 527 d->vinumconf = sc; 528 strncpy(d->device, pp->name, GV_MAXDRIVENAME); 529 530 /* 531 * Find out which subdisks belong to this drive and crosslink 532 * them. 533 */ 534 LIST_FOREACH(s, &sc->subdisks, sd) { 535 if (!strncmp(s->drive, d->name, GV_MAXDRIVENAME)) 536 /* XXX: errors ignored */ 537 gv_sd_to_drive(sc, d, s, errstr, 538 sizeof(errstr)); 539 } 540 541 /* This drive is now up for sure. */ 542 gv_set_drive_state(d, GV_DRIVE_UP, 0); 543 544 /* 545 * If there are subdisks on this drive, we need to create 546 * providers for them. 547 */ 548 if (d->sdcount) 549 gv_drive_modify(d); 550 551 return (gp); 552 553 } while (0); 554 555 g_topology_lock(); 556 g_access(cp, -1, 0, 0); 557 558 g_detach(cp); 559 g_destroy_consumer(cp); 560 g_destroy_geom(gp); 561 return (NULL); 562} 563 564/* 565 * Modify the providers for the given drive 'd'. It is assumed that the 566 * subdisk list of 'd' is already correctly set up. 567 */ 568void 569gv_drive_modify(struct gv_drive *d) 570{ 571 struct g_geom *gp; 572 struct g_consumer *cp; 573 struct g_provider *pp, *pp2; 574 struct gv_sd *s; 575 576 KASSERT(d != NULL, ("gv_drive_modify: null d")); 577 gp = d->geom; 578 KASSERT(gp != NULL, ("gv_drive_modify: null gp")); 579 cp = LIST_FIRST(&gp->consumer); 580 KASSERT(cp != NULL, ("gv_drive_modify: null cp")); 581 pp = cp->provider; 582 KASSERT(pp != NULL, ("gv_drive_modify: null pp")); 583 584 g_topology_assert(); 585 586 LIST_FOREACH(s, &d->subdisks, from_drive) { 587 /* This subdisk already has a provider. */ 588 if (s->provider != NULL) 589 continue; 590 pp2 = g_new_providerf(gp, "gvinum/sd/%s", s->name); 591 pp2->mediasize = s->size; 592 pp2->sectorsize = pp->sectorsize; 593 g_error_provider(pp2, 0); 594 s->provider = pp2; 595 pp2->private = s; 596 } 597} 598 599static void 600gv_drive_dead(void *arg, int flag) 601{ 602 struct g_geom *gp; 603 struct g_consumer *cp; 604 struct gv_drive *d; 605 struct gv_sd *s; 606 607 g_topology_assert(); 608 KASSERT(arg != NULL, ("gv_drive_dead: NULL arg")); 609 610 if (flag == EV_CANCEL) 611 return; 612 613 d = arg; 614 if (d->state != GV_DRIVE_DOWN) 615 return; 616 617 g_trace(G_T_TOPOLOGY, "gv_drive_dead(%s)", d->name); 618 619 gp = d->geom; 620 if (gp == NULL) 621 return; 622 623 LIST_FOREACH(cp, &gp->consumer, consumer) { 624 if (cp->nstart != cp->nend) { 625 printf("GEOM_VINUM: dead drive '%s' has still " 626 "active requests, can't detach consumer\n", 627 d->name); 628 g_post_event(gv_drive_dead, d, M_WAITOK, d, 629 NULL); 630 return; 631 } 632 if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0) 633 g_access(cp, -cp->acr, -cp->acw, -cp->ace); 634 } 635 636 printf("GEOM_VINUM: lost drive '%s'\n", d->name); 637 d->geom = NULL; 638 LIST_FOREACH(s, &d->subdisks, from_drive) { 639 s->provider = NULL; 640 s->consumer = NULL; 641 } 642 gv_kill_drive_thread(d); 643 gp->softc = NULL; 644 g_wither_geom(gp, ENXIO); 645} 646 647static int 648gv_drive_destroy_geom(struct gctl_req *req, struct g_class *mp, 649 struct g_geom *gp) 650{ 651 struct gv_drive *d; 652 653 g_trace(G_T_TOPOLOGY, "gv_drive_destroy_geom: %s", gp->name); 654 g_topology_assert(); 655 656 d = gp->softc; 657 gv_kill_drive_thread(d); 658 659 g_wither_geom(gp, ENXIO); 660 return (0); 661} 662 663#define VINUMDRIVE_CLASS_NAME "VINUMDRIVE" 664 665static struct g_class g_vinum_drive_class = { 666 .name = VINUMDRIVE_CLASS_NAME, 667 .version = G_VERSION, 668 .taste = gv_drive_taste, 669 .destroy_geom = gv_drive_destroy_geom 670}; 671 672DECLARE_GEOM_CLASS(g_vinum_drive_class, g_vinum_drive); 673