geom_vinum_drive.c revision 149094
1/*- 2 * Copyright (c) 2004, 2005 Lukas Ertl 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/sys/geom/vinum/geom_vinum_drive.c 149094 2005-08-15 17:07:47Z le $"); 29 30#include <sys/param.h> 31#include <sys/bio.h> 32#include <sys/errno.h> 33#include <sys/conf.h> 34#include <sys/kernel.h> 35#include <sys/kthread.h> 36#include <sys/libkern.h> 37#include <sys/lock.h> 38#include <sys/malloc.h> 39#include <sys/module.h> 40#include <sys/mutex.h> 41#include <sys/sbuf.h> 42#include <sys/systm.h> 43#include <sys/time.h> 44 45#include <geom/geom.h> 46#include <geom/vinum/geom_vinum_var.h> 47#include <geom/vinum/geom_vinum.h> 48#include <geom/vinum/geom_vinum_share.h> 49 50static void gv_drive_dead(void *, int); 51static void gv_drive_worker(void *); 52void gv_drive_modify(struct gv_drive *); 53 54void 55gv_config_new_drive(struct gv_drive *d) 56{ 57 struct gv_hdr *vhdr; 58 struct gv_freelist *fl; 59 60 KASSERT(d != NULL, ("config_new_drive: NULL d")); 61 62 vhdr = g_malloc(sizeof(*vhdr), M_WAITOK | M_ZERO); 63 vhdr->magic = GV_MAGIC; 64 vhdr->config_length = GV_CFG_LEN; 65 66 bcopy(hostname, vhdr->label.sysname, GV_HOSTNAME_LEN); 67 strncpy(vhdr->label.name, d->name, GV_MAXDRIVENAME); 68 microtime(&vhdr->label.date_of_birth); 69 70 d->hdr = vhdr; 71 72 LIST_INIT(&d->subdisks); 73 LIST_INIT(&d->freelist); 74 75 fl = g_malloc(sizeof(struct gv_freelist), M_WAITOK | M_ZERO); 76 fl->offset = GV_DATA_START; 77 fl->size = d->avail; 78 LIST_INSERT_HEAD(&d->freelist, fl, freelist); 79 d->freelist_entries = 1; 80 81 TAILQ_INIT(&d->bqueue); 82 mtx_init(&d->bqueue_mtx, "gv_drive", NULL, MTX_DEF); 83 kthread_create(gv_drive_worker, d, NULL, 0, 0, "gv_d %s", d->name); 84 d->flags |= GV_DRIVE_THREAD_ACTIVE; 85} 86 87void 88gv_save_config_all(struct gv_softc *sc) 89{ 90 struct gv_drive *d; 91 92 g_topology_assert(); 93 94 LIST_FOREACH(d, &sc->drives, drive) { 95 if (d->geom == NULL) 96 continue; 97 gv_save_config(NULL, d, sc); 98 } 99} 100 101/* Save the vinum configuration back to disk. */ 102void 103gv_save_config(struct g_consumer *cp, struct gv_drive *d, struct gv_softc *sc) 104{ 105 struct g_geom *gp; 106 struct g_consumer *cp2; 107 struct gv_hdr *vhdr, *hdr; 108 struct sbuf *sb; 109 int error; 110 111 g_topology_assert(); 112 113 KASSERT(d != NULL, ("gv_save_config: null d")); 114 KASSERT(sc != NULL, ("gv_save_config: null sc")); 115 116 /* 117 * We can't save the config on a drive that isn't up, but drives that 118 * were just created aren't officially up yet, so we check a special 119 * flag. 120 */ 121 if ((d->state != GV_DRIVE_UP) && !(d->flags && GV_DRIVE_NEWBORN)) 122 return; 123 124 if (cp == NULL) { 125 gp = d->geom; 126 KASSERT(gp != NULL, ("gv_save_config: null gp")); 127 cp2 = LIST_FIRST(&gp->consumer); 128 KASSERT(cp2 != NULL, ("gv_save_config: null cp2")); 129 } else 130 cp2 = cp; 131 132 vhdr = g_malloc(GV_HDR_LEN, M_WAITOK | M_ZERO); 133 vhdr->magic = GV_MAGIC; 134 vhdr->config_length = GV_CFG_LEN; 135 136 hdr = d->hdr; 137 if (hdr == NULL) { 138 printf("GEOM_VINUM: drive %s has NULL hdr\n", d->name); 139 g_free(vhdr); 140 return; 141 } 142 microtime(&hdr->label.last_update); 143 bcopy(&hdr->label, &vhdr->label, sizeof(struct gv_label)); 144 145 sb = sbuf_new(NULL, NULL, GV_CFG_LEN, SBUF_FIXEDLEN); 146 gv_format_config(sc, sb, 1, NULL); 147 sbuf_finish(sb); 148 149 error = g_access(cp2, 0, 1, 0); 150 if (error) { 151 printf("GEOM_VINUM: g_access failed on drive %s, errno %d\n", 152 d->name, error); 153 sbuf_delete(sb); 154 g_free(vhdr); 155 return; 156 } 157 g_topology_unlock(); 158 159 do { 160 error = g_write_data(cp2, GV_HDR_OFFSET, vhdr, GV_HDR_LEN); 161 if (error) { 162 printf("GEOM_VINUM: writing vhdr failed on drive %s, " 163 "errno %d", d->name, error); 164 break; 165 } 166 167 error = g_write_data(cp2, GV_CFG_OFFSET, sbuf_data(sb), 168 GV_CFG_LEN); 169 if (error) { 170 printf("GEOM_VINUM: writing first config copy failed " 171 "on drive %s, errno %d", d->name, error); 172 break; 173 } 174 175 error = g_write_data(cp2, GV_CFG_OFFSET + GV_CFG_LEN, 176 sbuf_data(sb), GV_CFG_LEN); 177 if (error) 178 printf("GEOM_VINUM: writing second config copy failed " 179 "on drive %s, errno %d", d->name, error); 180 } while (0); 181 182 g_topology_lock(); 183 g_access(cp2, 0, -1, 0); 184 sbuf_delete(sb); 185 g_free(vhdr); 186 187 if (d->geom != NULL) 188 gv_drive_modify(d); 189} 190 191/* This resembles g_slice_access(). */ 192static int 193gv_drive_access(struct g_provider *pp, int dr, int dw, int de) 194{ 195 struct g_geom *gp; 196 struct g_consumer *cp; 197 struct g_provider *pp2; 198 struct gv_drive *d; 199 struct gv_sd *s, *s2; 200 int error; 201 202 gp = pp->geom; 203 cp = LIST_FIRST(&gp->consumer); 204 if (cp == NULL) 205 return (0); 206 207 d = gp->softc; 208 if (d == NULL) 209 return (0); 210 211 s = pp->private; 212 KASSERT(s != NULL, ("gv_drive_access: NULL s")); 213 214 LIST_FOREACH(s2, &d->subdisks, from_drive) { 215 if (s == s2) 216 continue; 217 if (s->drive_offset + s->size <= s2->drive_offset) 218 continue; 219 if (s2->drive_offset + s2->size <= s->drive_offset) 220 continue; 221 222 /* Overlap. */ 223 pp2 = s2->provider; 224 KASSERT(s2 != NULL, ("gv_drive_access: NULL s2")); 225 if ((pp->acw + dw) > 0 && pp2->ace > 0) 226 return (EPERM); 227 if ((pp->ace + de) > 0 && pp2->acw > 0) 228 return (EPERM); 229 } 230 231 error = g_access(cp, dr, dw, de); 232 return (error); 233} 234 235static void 236gv_drive_done(struct bio *bp) 237{ 238 struct gv_drive *d; 239 struct gv_bioq *bq; 240 241 /* Put the BIO on the worker queue again. */ 242 d = bp->bio_from->geom->softc; 243 bp->bio_cflags |= GV_BIO_DONE; 244 bq = g_malloc(sizeof(*bq), M_NOWAIT | M_ZERO); 245 bq->bp = bp; 246 mtx_lock(&d->bqueue_mtx); 247 TAILQ_INSERT_TAIL(&d->bqueue, bq, queue); 248 wakeup(d); 249 mtx_unlock(&d->bqueue_mtx); 250} 251 252 253static void 254gv_drive_start(struct bio *bp) 255{ 256 struct gv_drive *d; 257 struct gv_sd *s; 258 struct gv_bioq *bq; 259 260 switch (bp->bio_cmd) { 261 case BIO_READ: 262 case BIO_WRITE: 263 case BIO_DELETE: 264 break; 265 case BIO_GETATTR: 266 default: 267 g_io_deliver(bp, EOPNOTSUPP); 268 return; 269 } 270 271 s = bp->bio_to->private; 272 if ((s->state == GV_SD_DOWN) || (s->state == GV_SD_STALE)) { 273 g_io_deliver(bp, ENXIO); 274 return; 275 } 276 277 d = bp->bio_to->geom->softc; 278 279 /* 280 * Put the BIO on the worker queue, where the worker thread will pick 281 * it up. 282 */ 283 bq = g_malloc(sizeof(*bq), M_NOWAIT | M_ZERO); 284 bq->bp = bp; 285 mtx_lock(&d->bqueue_mtx); 286 TAILQ_INSERT_TAIL(&d->bqueue, bq, queue); 287 wakeup(d); 288 mtx_unlock(&d->bqueue_mtx); 289 290} 291 292static void 293gv_drive_worker(void *arg) 294{ 295 struct bio *bp, *cbp; 296 struct g_geom *gp; 297 struct g_provider *pp; 298 struct gv_drive *d; 299 struct gv_sd *s; 300 struct gv_bioq *bq, *bq2; 301 int error; 302 303 d = arg; 304 305 mtx_lock(&d->bqueue_mtx); 306 for (;;) { 307 /* We were signaled to exit. */ 308 if (d->flags & GV_DRIVE_THREAD_DIE) 309 break; 310 311 /* Take the first BIO from out queue. */ 312 bq = TAILQ_FIRST(&d->bqueue); 313 if (bq == NULL) { 314 msleep(d, &d->bqueue_mtx, PRIBIO, "-", hz/10); 315 continue; 316 } 317 TAILQ_REMOVE(&d->bqueue, bq, queue); 318 mtx_unlock(&d->bqueue_mtx); 319 320 bp = bq->bp; 321 g_free(bq); 322 pp = bp->bio_to; 323 gp = pp->geom; 324 325 /* Completed request. */ 326 if (bp->bio_cflags & GV_BIO_DONE) { 327 error = bp->bio_error; 328 329 /* Deliver the original request. */ 330 g_std_done(bp); 331 332 /* The request had an error, we need to clean up. */ 333 if (error != 0) { 334 g_topology_lock(); 335 gv_set_drive_state(d, GV_DRIVE_DOWN, 336 GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG); 337 g_topology_unlock(); 338 g_post_event(gv_drive_dead, d, M_WAITOK, d, 339 NULL); 340 } 341 342 /* New request, needs to be sent downwards. */ 343 } else { 344 s = pp->private; 345 346 if ((s->state == GV_SD_DOWN) || 347 (s->state == GV_SD_STALE)) { 348 g_io_deliver(bp, ENXIO); 349 mtx_lock(&d->bqueue_mtx); 350 continue; 351 } 352 if (bp->bio_offset > s->size) { 353 g_io_deliver(bp, EINVAL); 354 mtx_lock(&d->bqueue_mtx); 355 continue; 356 } 357 358 cbp = g_clone_bio(bp); 359 if (cbp == NULL) { 360 g_io_deliver(bp, ENOMEM); 361 mtx_lock(&d->bqueue_mtx); 362 continue; 363 } 364 if (cbp->bio_offset + cbp->bio_length > s->size) 365 cbp->bio_length = s->size - 366 cbp->bio_offset; 367 cbp->bio_done = gv_drive_done; 368 cbp->bio_offset += s->drive_offset; 369 g_io_request(cbp, LIST_FIRST(&gp->consumer)); 370 } 371 372 mtx_lock(&d->bqueue_mtx); 373 } 374 375 TAILQ_FOREACH_SAFE(bq, &d->bqueue, queue, bq2) { 376 TAILQ_REMOVE(&d->bqueue, bq, queue); 377 mtx_unlock(&d->bqueue_mtx); 378 bp = bq->bp; 379 g_free(bq); 380 if (bp->bio_cflags & GV_BIO_DONE) 381 g_std_done(bp); 382 else 383 g_io_deliver(bp, ENXIO); 384 mtx_lock(&d->bqueue_mtx); 385 } 386 mtx_unlock(&d->bqueue_mtx); 387 d->flags |= GV_DRIVE_THREAD_DEAD; 388 389 kthread_exit(ENXIO); 390} 391 392 393static void 394gv_drive_orphan(struct g_consumer *cp) 395{ 396 struct g_geom *gp; 397 struct gv_drive *d; 398 399 g_topology_assert(); 400 gp = cp->geom; 401 g_trace(G_T_TOPOLOGY, "gv_drive_orphan(%s)", gp->name); 402 d = gp->softc; 403 if (d != NULL) { 404 gv_set_drive_state(d, GV_DRIVE_DOWN, 405 GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG); 406 g_post_event(gv_drive_dead, d, M_WAITOK, d, NULL); 407 } else 408 g_wither_geom(gp, ENXIO); 409} 410 411static struct g_geom * 412gv_drive_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) 413{ 414 struct g_geom *gp, *gp2; 415 struct g_consumer *cp; 416 struct gv_drive *d; 417 struct gv_sd *s; 418 struct gv_softc *sc; 419 struct gv_freelist *fl; 420 struct gv_hdr *vhdr; 421 int error; 422 char *buf, errstr[ERRBUFSIZ]; 423 424 vhdr = NULL; 425 d = NULL; 426 427 g_trace(G_T_TOPOLOGY, "gv_drive_taste(%s, %s)", mp->name, pp->name); 428 g_topology_assert(); 429 430 /* Find the VINUM class and its associated geom. */ 431 gp2 = find_vinum_geom(); 432 if (gp2 == NULL) 433 return (NULL); 434 sc = gp2->softc; 435 436 gp = g_new_geomf(mp, "%s.vinumdrive", pp->name); 437 gp->start = gv_drive_start; 438 gp->orphan = gv_drive_orphan; 439 gp->access = gv_drive_access; 440 gp->start = gv_drive_start; 441 442 cp = g_new_consumer(gp); 443 g_attach(cp, pp); 444 error = g_access(cp, 1, 0, 0); 445 if (error) { 446 g_detach(cp); 447 g_destroy_consumer(cp); 448 g_destroy_geom(gp); 449 return (NULL); 450 } 451 452 g_topology_unlock(); 453 454 /* Now check if the provided slice is a valid vinum drive. */ 455 do { 456 vhdr = g_read_data(cp, GV_HDR_OFFSET, pp->sectorsize, &error); 457 if (vhdr == NULL || error != 0) 458 break; 459 if (vhdr->magic != GV_MAGIC) { 460 g_free(vhdr); 461 break; 462 } 463 464 /* 465 * We have found a valid vinum drive. Let's see if it is 466 * already known in the configuration. There's a chance that 467 * the VINUMDRIVE class tastes before the VINUM class could 468 * taste, so parse the configuration here too, just to be on 469 * the safe side. 470 */ 471 buf = g_read_data(cp, GV_CFG_OFFSET, GV_CFG_LEN, &error); 472 if (buf == NULL || error != 0) { 473 g_free(vhdr); 474 break; 475 } 476 g_topology_lock(); 477 gv_parse_config(sc, buf, 1); 478 g_free(buf); 479 480 d = gv_find_drive(sc, vhdr->label.name); 481 482 /* We already know about this drive. */ 483 if (d != NULL) { 484 /* Check if this drive already has a geom. */ 485 if (d->geom != NULL) { 486 g_topology_unlock(); 487 break; 488 } 489 bcopy(vhdr, d->hdr, sizeof(*vhdr)); 490 491 /* This is a new drive. */ 492 } else { 493 d = g_malloc(sizeof(*d), M_WAITOK | M_ZERO); 494 495 /* Initialize all needed variables. */ 496 d->size = pp->mediasize - GV_DATA_START; 497 d->avail = d->size; 498 d->hdr = vhdr; 499 strncpy(d->name, vhdr->label.name, GV_MAXDRIVENAME); 500 LIST_INIT(&d->subdisks); 501 LIST_INIT(&d->freelist); 502 503 /* We also need a freelist entry. */ 504 fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO); 505 fl->offset = GV_DATA_START; 506 fl->size = d->avail; 507 LIST_INSERT_HEAD(&d->freelist, fl, freelist); 508 d->freelist_entries = 1; 509 510 TAILQ_INIT(&d->bqueue); 511 512 /* Save it into the main configuration. */ 513 LIST_INSERT_HEAD(&sc->drives, d, drive); 514 } 515 516 /* 517 * Create a bio queue mutex and a worker thread, if necessary. 518 */ 519 if (mtx_initialized(&d->bqueue_mtx) == 0) 520 mtx_init(&d->bqueue_mtx, "gv_drive", NULL, MTX_DEF); 521 522 if (!(d->flags & GV_DRIVE_THREAD_ACTIVE)) { 523 kthread_create(gv_drive_worker, d, NULL, 0, 0, 524 "gv_d %s", d->name); 525 d->flags |= GV_DRIVE_THREAD_ACTIVE; 526 } 527 528 g_access(cp, -1, 0, 0); 529 530 gp->softc = d; 531 d->geom = gp; 532 d->vinumconf = sc; 533 strncpy(d->device, pp->name, GV_MAXDRIVENAME); 534 535 /* 536 * Find out which subdisks belong to this drive and crosslink 537 * them. 538 */ 539 LIST_FOREACH(s, &sc->subdisks, sd) { 540 if (!strncmp(s->drive, d->name, GV_MAXDRIVENAME)) 541 /* XXX: errors ignored */ 542 gv_sd_to_drive(sc, d, s, errstr, 543 sizeof(errstr)); 544 } 545 546 /* This drive is now up for sure. */ 547 gv_set_drive_state(d, GV_DRIVE_UP, 0); 548 549 /* 550 * If there are subdisks on this drive, we need to create 551 * providers for them. 552 */ 553 if (d->sdcount) 554 gv_drive_modify(d); 555 556 return (gp); 557 558 } while (0); 559 560 g_topology_lock(); 561 g_access(cp, -1, 0, 0); 562 563 g_detach(cp); 564 g_destroy_consumer(cp); 565 g_destroy_geom(gp); 566 return (NULL); 567} 568 569/* 570 * Modify the providers for the given drive 'd'. It is assumed that the 571 * subdisk list of 'd' is already correctly set up. 572 */ 573void 574gv_drive_modify(struct gv_drive *d) 575{ 576 struct g_geom *gp; 577 struct g_consumer *cp; 578 struct g_provider *pp, *pp2; 579 struct gv_sd *s; 580 581 KASSERT(d != NULL, ("gv_drive_modify: null d")); 582 gp = d->geom; 583 KASSERT(gp != NULL, ("gv_drive_modify: null gp")); 584 cp = LIST_FIRST(&gp->consumer); 585 KASSERT(cp != NULL, ("gv_drive_modify: null cp")); 586 pp = cp->provider; 587 KASSERT(pp != NULL, ("gv_drive_modify: null pp")); 588 589 g_topology_assert(); 590 591 LIST_FOREACH(s, &d->subdisks, from_drive) { 592 /* This subdisk already has a provider. */ 593 if (s->provider != NULL) 594 continue; 595 pp2 = g_new_providerf(gp, "gvinum/sd/%s", s->name); 596 pp2->mediasize = s->size; 597 pp2->sectorsize = pp->sectorsize; 598 g_error_provider(pp2, 0); 599 s->provider = pp2; 600 pp2->private = s; 601 } 602} 603 604static void 605gv_drive_dead(void *arg, int flag) 606{ 607 struct g_geom *gp; 608 struct g_consumer *cp; 609 struct gv_drive *d; 610 struct gv_sd *s; 611 612 g_topology_assert(); 613 KASSERT(arg != NULL, ("gv_drive_dead: NULL arg")); 614 615 if (flag == EV_CANCEL) 616 return; 617 618 d = arg; 619 if (d->state != GV_DRIVE_DOWN) 620 return; 621 622 g_trace(G_T_TOPOLOGY, "gv_drive_dead(%s)", d->name); 623 624 gp = d->geom; 625 if (gp == NULL) 626 return; 627 628 LIST_FOREACH(cp, &gp->consumer, consumer) { 629 if (cp->nstart != cp->nend) { 630 printf("GEOM_VINUM: dead drive '%s' has still " 631 "active requests, can't detach consumer\n", 632 d->name); 633 g_post_event(gv_drive_dead, d, M_WAITOK, d, 634 NULL); 635 return; 636 } 637 if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0) 638 g_access(cp, -cp->acr, -cp->acw, -cp->ace); 639 } 640 641 printf("GEOM_VINUM: lost drive '%s'\n", d->name); 642 d->geom = NULL; 643 LIST_FOREACH(s, &d->subdisks, from_drive) { 644 s->provider = NULL; 645 s->consumer = NULL; 646 } 647 gv_kill_drive_thread(d); 648 gp->softc = NULL; 649 g_wither_geom(gp, ENXIO); 650} 651 652static int 653gv_drive_destroy_geom(struct gctl_req *req, struct g_class *mp, 654 struct g_geom *gp) 655{ 656 struct gv_drive *d; 657 658 g_trace(G_T_TOPOLOGY, "gv_drive_destroy_geom: %s", gp->name); 659 g_topology_assert(); 660 661 d = gp->softc; 662 gv_kill_drive_thread(d); 663 664 g_wither_geom(gp, ENXIO); 665 return (0); 666} 667 668#define VINUMDRIVE_CLASS_NAME "VINUMDRIVE" 669 670static struct g_class g_vinum_drive_class = { 671 .name = VINUMDRIVE_CLASS_NAME, 672 .version = G_VERSION, 673 .taste = gv_drive_taste, 674 .destroy_geom = gv_drive_destroy_geom 675}; 676 677DECLARE_GEOM_CLASS(g_vinum_drive_class, g_vinum_drive); 678