geom_vinum_plex.c revision 190507
1/*- 2 * Copyright (c) 2004, 2007 Lukas Ertl 3 * Copyright (c) 2007, 2009 Ulf Lilleengen 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD: head/sys/geom/vinum/geom_vinum_plex.c 190507 2009-03-28 17:20:08Z lulf $"); 30 31#include <sys/param.h> 32#include <sys/bio.h> 33#include <sys/lock.h> 34#include <sys/malloc.h> 35#include <sys/systm.h> 36 37#include <geom/geom.h> 38#include <geom/vinum/geom_vinum_var.h> 39#include <geom/vinum/geom_vinum_raid5.h> 40#include <geom/vinum/geom_vinum.h> 41 42static int gv_check_parity(struct gv_plex *, struct bio *, 43 struct gv_raid5_packet *); 44static int gv_normal_parity(struct gv_plex *, struct bio *, 45 struct gv_raid5_packet *); 46static void gv_plex_flush(struct gv_plex *); 47static int gv_plex_offset(struct gv_plex *, off_t, off_t, off_t *, off_t *, 48 int *, int); 49static int gv_plex_normal_request(struct gv_plex *, struct bio *, off_t, 50 off_t, caddr_t); 51void 52gv_plex_start(struct gv_plex *p, struct bio *bp) 53{ 54 struct bio *cbp; 55 struct gv_sd *s; 56 struct gv_raid5_packet *wp; 57 caddr_t addr; 58 off_t bcount, boff, len; 59 60 bcount = bp->bio_length; 61 addr = bp->bio_data; 62 boff = bp->bio_offset; 63 64 /* Walk over the whole length of the request, we might split it up. */ 65 while (bcount > 0) { 66 wp = NULL; 67 68 /* 69 * RAID5 plexes need special treatment, as a single request 70 * might involve several read/write sub-requests. 71 */ 72 if (p->org == GV_PLEX_RAID5) { 73 wp = gv_raid5_start(p, bp, addr, boff, bcount); 74 if (wp == NULL) 75 return; 76 77 len = wp->length; 78 79 if (TAILQ_EMPTY(&wp->bits)) 80 g_free(wp); 81 else if (wp->lockbase != -1) 82 TAILQ_INSERT_TAIL(&p->packets, wp, list); 83 84 /* 85 * Requests to concatenated and striped plexes go straight 86 * through. 87 */ 88 } else { 89 len = gv_plex_normal_request(p, bp, boff, bcount, addr); 90 } 91 if (len < 0) 92 return; 93 94 bcount -= len; 95 addr += len; 96 boff += len; 97 } 98 99 /* 100 * Fire off all sub-requests. We get the correct consumer (== drive) 101 * to send each request to via the subdisk that was stored in 102 * cbp->bio_caller1. 103 */ 104 cbp = bioq_takefirst(p->bqueue); 105 while (cbp != NULL) { 106 /* 107 * RAID5 sub-requests need to come in correct order, otherwise 108 * we trip over the parity, as it might be overwritten by 109 * another sub-request. We abuse cbp->bio_caller2 to mark 110 * potential overlap situations. 111 */ 112 if (cbp->bio_caller2 != NULL && gv_stripe_active(p, cbp)) { 113 /* Park the bio on the waiting queue. */ 114 cbp->bio_cflags |= GV_BIO_ONHOLD; 115 bioq_disksort(p->wqueue, cbp); 116 } else { 117 s = cbp->bio_caller1; 118 g_io_request(cbp, s->drive_sc->consumer); 119 } 120 cbp = bioq_takefirst(p->bqueue); 121 } 122} 123 124static int 125gv_plex_offset(struct gv_plex *p, off_t boff, off_t bcount, off_t *real_off, 126 off_t *real_len, int *sdno, int growing) 127{ 128 struct gv_sd *s; 129 int i, sdcount; 130 off_t len_left, stripeend, stripeno, stripestart; 131 132 switch (p->org) { 133 case GV_PLEX_CONCAT: 134 /* 135 * Find the subdisk where this request starts. The subdisks in 136 * this list must be ordered by plex_offset. 137 */ 138 i = 0; 139 LIST_FOREACH(s, &p->subdisks, in_plex) { 140 if (s->plex_offset <= boff && 141 s->plex_offset + s->size > boff) { 142 *sdno = i; 143 break; 144 } 145 i++; 146 } 147 if (s == NULL || s->drive_sc == NULL) 148 return (GV_ERR_NOTFOUND); 149 150 /* Calculate corresponding offsets on disk. */ 151 *real_off = boff - s->plex_offset; 152 len_left = s->size - (*real_off); 153 KASSERT(len_left >= 0, ("gv_plex_offset: len_left < 0")); 154 *real_len = (bcount > len_left) ? len_left : bcount; 155 break; 156 157 case GV_PLEX_STRIPED: 158 /* The number of the stripe where the request starts. */ 159 stripeno = boff / p->stripesize; 160 KASSERT(stripeno >= 0, ("gv_plex_offset: stripeno < 0")); 161 162 /* Take growing subdisks into account when calculating. */ 163 sdcount = gv_sdcount(p, (boff >= p->synced)); 164 165 if (!(boff + bcount <= p->synced) && 166 (p->flags & GV_PLEX_GROWING) && 167 !growing) 168 return (GV_ERR_ISBUSY); 169 *sdno = stripeno % sdcount; 170 171 KASSERT(sdno >= 0, ("gv_plex_offset: sdno < 0")); 172 stripestart = (stripeno / sdcount) * 173 p->stripesize; 174 KASSERT(stripestart >= 0, ("gv_plex_offset: stripestart < 0")); 175 stripeend = stripestart + p->stripesize; 176 *real_off = boff - (stripeno * p->stripesize) + 177 stripestart; 178 len_left = stripeend - *real_off; 179 KASSERT(len_left >= 0, ("gv_plex_offset: len_left < 0")); 180 181 *real_len = (bcount <= len_left) ? bcount : len_left; 182 break; 183 184 default: 185 return (GV_ERR_PLEXORG); 186 } 187 return (0); 188} 189 190/* 191 * Prepare a normal plex request. 192 */ 193static int 194gv_plex_normal_request(struct gv_plex *p, struct bio *bp, off_t boff, 195 off_t bcount, caddr_t addr) 196{ 197 struct gv_sd *s; 198 struct bio *cbp; 199 off_t real_len, real_off; 200 int i, err, sdno; 201 202 s = NULL; 203 sdno = -1; 204 real_len = real_off = 0; 205 206 err = ENXIO; 207 208 if (p == NULL || LIST_EMPTY(&p->subdisks)) 209 goto bad; 210 211 err = gv_plex_offset(p, boff, bcount, &real_off, 212 &real_len, &sdno, (bp->bio_pflags & GV_BIO_SYNCREQ)); 213 /* If the request was blocked, put it into wait. */ 214 if (err == GV_ERR_ISBUSY) { 215 bioq_disksort(p->rqueue, bp); 216 return (-1); /* "Fail", and delay request. */ 217 } 218 if (err) { 219 err = ENXIO; 220 goto bad; 221 } 222 err = ENXIO; 223 224 /* Find the right subdisk. */ 225 i = 0; 226 LIST_FOREACH(s, &p->subdisks, in_plex) { 227 if (i == sdno) 228 break; 229 i++; 230 } 231 232 /* Subdisk not found. */ 233 if (s == NULL || s->drive_sc == NULL) 234 goto bad; 235 236 /* Now check if we can handle the request on this subdisk. */ 237 switch (s->state) { 238 case GV_SD_UP: 239 /* If the subdisk is up, just continue. */ 240 break; 241 case GV_SD_DOWN: 242 if (bp->bio_cflags & GV_BIO_INTERNAL) 243 G_VINUM_DEBUG(0, "subdisk must be in the stale state in" 244 " order to perform administrative requests"); 245 goto bad; 246 case GV_SD_STALE: 247 if (!(bp->bio_cflags & GV_BIO_SYNCREQ)) { 248 G_VINUM_DEBUG(0, "subdisk stale, unable to perform " 249 "regular requests"); 250 goto bad; 251 } 252 253 G_VINUM_DEBUG(1, "sd %s is initializing", s->name); 254 gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE); 255 break; 256 case GV_SD_INITIALIZING: 257 if (bp->bio_cmd == BIO_READ) 258 goto bad; 259 break; 260 default: 261 /* All other subdisk states mean it's not accessible. */ 262 goto bad; 263 } 264 265 /* Clone the bio and adjust the offsets and sizes. */ 266 cbp = g_clone_bio(bp); 267 if (cbp == NULL) { 268 err = ENOMEM; 269 goto bad; 270 } 271 cbp->bio_offset = real_off + s->drive_offset; 272 cbp->bio_length = real_len; 273 cbp->bio_data = addr; 274 cbp->bio_done = gv_done; 275 cbp->bio_caller1 = s; 276 if ((bp->bio_cflags & GV_BIO_SYNCREQ)) 277 cbp->bio_cflags |= GV_BIO_SYNCREQ; 278 279 /* Store the sub-requests now and let others issue them. */ 280 bioq_insert_tail(p->bqueue, cbp); 281 return (real_len); 282bad: 283 G_VINUM_LOGREQ(0, bp, "plex request failed."); 284 /* Building the sub-request failed. If internal BIO, do not deliver. */ 285 if (bp->bio_cflags & GV_BIO_INTERNAL) { 286 if (bp->bio_cflags & GV_BIO_MALLOC) 287 g_free(bp->bio_data); 288 g_destroy_bio(bp); 289 p->flags &= ~(GV_PLEX_SYNCING | GV_PLEX_REBUILDING | 290 GV_PLEX_GROWING); 291 return (-1); 292 } 293 g_io_deliver(bp, err); 294 return (-1); 295} 296 297/* 298 * Handle a completed request to a striped or concatenated plex. 299 */ 300void 301gv_plex_normal_done(struct gv_plex *p, struct bio *bp) 302{ 303 struct bio *pbp; 304 305 pbp = bp->bio_parent; 306 if (pbp->bio_error == 0) 307 pbp->bio_error = bp->bio_error; 308 g_destroy_bio(bp); 309 pbp->bio_inbed++; 310 if (pbp->bio_children == pbp->bio_inbed) { 311 /* Just set it to length since multiple plexes will 312 * screw things up. */ 313 pbp->bio_completed = pbp->bio_length; 314 if (pbp->bio_cflags & GV_BIO_SYNCREQ) 315 gv_sync_complete(p, pbp); 316 else if (pbp->bio_pflags & GV_BIO_SYNCREQ) 317 gv_grow_complete(p, pbp); 318 else 319 g_io_deliver(pbp, pbp->bio_error); 320 } 321} 322 323/* 324 * Handle a completed request to a RAID-5 plex. 325 */ 326void 327gv_plex_raid5_done(struct gv_plex *p, struct bio *bp) 328{ 329 struct gv_softc *sc; 330 struct bio *cbp, *pbp; 331 struct gv_bioq *bq, *bq2; 332 struct gv_raid5_packet *wp; 333 off_t completed; 334 int i; 335 336 completed = 0; 337 sc = p->vinumconf; 338 wp = bp->bio_caller2; 339 340 switch (bp->bio_parent->bio_cmd) { 341 case BIO_READ: 342 if (wp == NULL) { 343 completed = bp->bio_completed; 344 break; 345 } 346 347 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) { 348 if (bq->bp != bp) 349 continue; 350 TAILQ_REMOVE(&wp->bits, bq, queue); 351 g_free(bq); 352 for (i = 0; i < wp->length; i++) 353 wp->data[i] ^= bp->bio_data[i]; 354 break; 355 } 356 if (TAILQ_EMPTY(&wp->bits)) { 357 completed = wp->length; 358 if (wp->lockbase != -1) { 359 TAILQ_REMOVE(&p->packets, wp, list); 360 /* Bring the waiting bios back into the game. */ 361 pbp = bioq_takefirst(p->wqueue); 362 while (pbp != NULL) { 363 mtx_lock(&sc->queue_mtx); 364 bioq_disksort(sc->bqueue, pbp); 365 mtx_unlock(&sc->queue_mtx); 366 pbp = bioq_takefirst(p->wqueue); 367 } 368 } 369 g_free(wp); 370 } 371 372 break; 373 374 case BIO_WRITE: 375 /* XXX can this ever happen? */ 376 if (wp == NULL) { 377 completed = bp->bio_completed; 378 break; 379 } 380 381 /* Check if we need to handle parity data. */ 382 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) { 383 if (bq->bp != bp) 384 continue; 385 TAILQ_REMOVE(&wp->bits, bq, queue); 386 g_free(bq); 387 cbp = wp->parity; 388 if (cbp != NULL) { 389 for (i = 0; i < wp->length; i++) 390 cbp->bio_data[i] ^= bp->bio_data[i]; 391 } 392 break; 393 } 394 395 /* Handle parity data. */ 396 if (TAILQ_EMPTY(&wp->bits)) { 397 if (bp->bio_parent->bio_cflags & GV_BIO_CHECK) 398 i = gv_check_parity(p, bp, wp); 399 else 400 i = gv_normal_parity(p, bp, wp); 401 402 /* All of our sub-requests have finished. */ 403 if (i) { 404 completed = wp->length; 405 TAILQ_REMOVE(&p->packets, wp, list); 406 /* Bring the waiting bios back into the game. */ 407 pbp = bioq_takefirst(p->wqueue); 408 while (pbp != NULL) { 409 mtx_lock(&sc->queue_mtx); 410 bioq_disksort(sc->bqueue, pbp); 411 mtx_unlock(&sc->queue_mtx); 412 pbp = bioq_takefirst(p->wqueue); 413 } 414 g_free(wp); 415 } 416 } 417 418 break; 419 } 420 421 pbp = bp->bio_parent; 422 if (pbp->bio_error == 0) 423 pbp->bio_error = bp->bio_error; 424 pbp->bio_completed += completed; 425 426 /* When the original request is finished, we deliver it. */ 427 pbp->bio_inbed++; 428 if (pbp->bio_inbed == pbp->bio_children) { 429 /* Hand it over for checking or delivery. */ 430 if (pbp->bio_cmd == BIO_WRITE && 431 (pbp->bio_cflags & GV_BIO_CHECK)) { 432 gv_parity_complete(p, pbp); 433 } else if (pbp->bio_cmd == BIO_WRITE && 434 (pbp->bio_cflags & GV_BIO_REBUILD)) { 435 gv_rebuild_complete(p, pbp); 436 } else if (pbp->bio_cflags & GV_BIO_INIT) { 437 gv_init_complete(p, pbp); 438 } else if (pbp->bio_cflags & GV_BIO_SYNCREQ) { 439 gv_sync_complete(p, pbp); 440 } else if (pbp->bio_pflags & GV_BIO_SYNCREQ) { 441 gv_grow_complete(p, pbp); 442 } else { 443 g_io_deliver(pbp, pbp->bio_error); 444 } 445 } 446 447 /* Clean up what we allocated. */ 448 if (bp->bio_cflags & GV_BIO_MALLOC) 449 g_free(bp->bio_data); 450 g_destroy_bio(bp); 451} 452 453static int 454gv_check_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp) 455{ 456 struct bio *pbp; 457 struct gv_sd *s; 458 int err, finished, i; 459 460 err = 0; 461 finished = 1; 462 463 if (wp->waiting != NULL) { 464 pbp = wp->waiting; 465 wp->waiting = NULL; 466 s = pbp->bio_caller1; 467 g_io_request(pbp, s->drive_sc->consumer); 468 finished = 0; 469 470 } else if (wp->parity != NULL) { 471 pbp = wp->parity; 472 wp->parity = NULL; 473 474 /* Check if the parity is correct. */ 475 for (i = 0; i < wp->length; i++) { 476 if (bp->bio_data[i] != pbp->bio_data[i]) { 477 err = 1; 478 break; 479 } 480 } 481 482 /* The parity is not correct... */ 483 if (err) { 484 bp->bio_parent->bio_error = EAGAIN; 485 486 /* ... but we rebuild it. */ 487 if (bp->bio_parent->bio_cflags & GV_BIO_PARITY) { 488 s = pbp->bio_caller1; 489 g_io_request(pbp, s->drive_sc->consumer); 490 finished = 0; 491 } 492 } 493 494 /* 495 * Clean up the BIO we would have used for rebuilding the 496 * parity. 497 */ 498 if (finished) { 499 bp->bio_parent->bio_inbed++; 500 g_destroy_bio(pbp); 501 } 502 503 } 504 505 return (finished); 506} 507 508static int 509gv_normal_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp) 510{ 511 struct bio *cbp, *pbp; 512 struct gv_sd *s; 513 int finished, i; 514 515 finished = 1; 516 517 if (wp->waiting != NULL) { 518 pbp = wp->waiting; 519 wp->waiting = NULL; 520 cbp = wp->parity; 521 for (i = 0; i < wp->length; i++) 522 cbp->bio_data[i] ^= pbp->bio_data[i]; 523 s = pbp->bio_caller1; 524 g_io_request(pbp, s->drive_sc->consumer); 525 finished = 0; 526 527 } else if (wp->parity != NULL) { 528 cbp = wp->parity; 529 wp->parity = NULL; 530 s = cbp->bio_caller1; 531 g_io_request(cbp, s->drive_sc->consumer); 532 finished = 0; 533 } 534 535 return (finished); 536} 537 538/* Flush the queue with delayed requests. */ 539static void 540gv_plex_flush(struct gv_plex *p) 541{ 542 struct gv_softc *sc; 543 struct bio *bp; 544 545 sc = p->vinumconf; 546 bp = bioq_takefirst(p->rqueue); 547 while (bp != NULL) { 548 gv_plex_start(p, bp); 549 bp = bioq_takefirst(p->rqueue); 550 } 551} 552 553int 554gv_sync_request(struct gv_plex *from, struct gv_plex *to, off_t offset, 555 off_t length, int type, caddr_t data) 556{ 557 struct gv_softc *sc; 558 struct bio *bp; 559 560 KASSERT(from != NULL, ("NULL from")); 561 KASSERT(to != NULL, ("NULL to")); 562 sc = from->vinumconf; 563 KASSERT(sc != NULL, ("NULL sc")); 564 565 bp = g_new_bio(); 566 if (bp == NULL) { 567 G_VINUM_DEBUG(0, "sync from '%s' failed at offset " 568 " %jd; out of memory", from->name, offset); 569 return (ENOMEM); 570 } 571 bp->bio_length = length; 572 bp->bio_done = gv_done; 573 bp->bio_cflags |= GV_BIO_SYNCREQ; 574 bp->bio_offset = offset; 575 bp->bio_caller1 = from; 576 bp->bio_caller2 = to; 577 bp->bio_cmd = type; 578 if (data == NULL) 579 data = g_malloc(length, M_WAITOK); 580 bp->bio_cflags |= GV_BIO_MALLOC; /* Free on the next run. */ 581 bp->bio_data = data; 582 583 /* Send down next. */ 584 mtx_lock(&sc->queue_mtx); 585 bioq_disksort(sc->bqueue, bp); 586 mtx_unlock(&sc->queue_mtx); 587 //gv_plex_start(from, bp); 588 return (0); 589} 590 591/* 592 * Handle a finished plex sync bio. 593 */ 594int 595gv_sync_complete(struct gv_plex *to, struct bio *bp) 596{ 597 struct gv_plex *from, *p; 598 struct gv_sd *s; 599 struct gv_volume *v; 600 struct gv_softc *sc; 601 off_t offset; 602 int err; 603 604 g_topology_assert_not(); 605 606 err = 0; 607 KASSERT(to != NULL, ("NULL to")); 608 KASSERT(bp != NULL, ("NULL bp")); 609 from = bp->bio_caller2; 610 KASSERT(from != NULL, ("NULL from")); 611 v = to->vol_sc; 612 KASSERT(v != NULL, ("NULL v")); 613 sc = v->vinumconf; 614 KASSERT(sc != NULL, ("NULL sc")); 615 616 /* If it was a read, write it. */ 617 if (bp->bio_cmd == BIO_READ) { 618 err = gv_sync_request(from, to, bp->bio_offset, bp->bio_length, 619 BIO_WRITE, bp->bio_data); 620 /* If it was a write, read the next one. */ 621 } else if (bp->bio_cmd == BIO_WRITE) { 622 if (bp->bio_cflags & GV_BIO_MALLOC) 623 g_free(bp->bio_data); 624 to->synced += bp->bio_length; 625 /* If we're finished, clean up. */ 626 if (bp->bio_offset + bp->bio_length >= from->size) { 627 G_VINUM_DEBUG(1, "syncing of %s from %s completed", 628 to->name, from->name); 629 /* Update our state. */ 630 LIST_FOREACH(s, &to->subdisks, in_plex) 631 gv_set_sd_state(s, GV_SD_UP, 0); 632 gv_update_plex_state(to); 633 to->flags &= ~GV_PLEX_SYNCING; 634 to->synced = 0; 635 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 636 } else { 637 offset = bp->bio_offset + bp->bio_length; 638 err = gv_sync_request(from, to, offset, 639 MIN(bp->bio_length, from->size - offset), 640 BIO_READ, NULL); 641 } 642 } 643 g_destroy_bio(bp); 644 /* Clean up if there was an error. */ 645 if (err) { 646 to->flags &= ~GV_PLEX_SYNCING; 647 G_VINUM_DEBUG(0, "error syncing plexes: error code %d", err); 648 } 649 650 /* Check if all plexes are synced, and lower refcounts. */ 651 g_topology_lock(); 652 LIST_FOREACH(p, &v->plexes, in_volume) { 653 if (p->flags & GV_PLEX_SYNCING) { 654 g_topology_unlock(); 655 return (-1); 656 } 657 } 658 /* If we came here, all plexes are synced, and we're free. */ 659 gv_access(v->provider, -1, -1, 0); 660 g_topology_unlock(); 661 G_VINUM_DEBUG(1, "plex sync completed"); 662 gv_volume_flush(v); 663 return (0); 664} 665 666/* 667 * Create a new bio struct for the next grow request. 668 */ 669int 670gv_grow_request(struct gv_plex *p, off_t offset, off_t length, int type, 671 caddr_t data) 672{ 673 struct gv_softc *sc; 674 struct bio *bp; 675 676 KASSERT(p != NULL, ("gv_grow_request: NULL p")); 677 sc = p->vinumconf; 678 KASSERT(sc != NULL, ("gv_grow_request: NULL sc")); 679 680 bp = g_new_bio(); 681 if (bp == NULL) { 682 G_VINUM_DEBUG(0, "grow of %s failed creating bio: " 683 "out of memory", p->name); 684 return (ENOMEM); 685 } 686 687 bp->bio_cmd = type; 688 bp->bio_done = gv_done; 689 bp->bio_error = 0; 690 bp->bio_caller1 = p; 691 bp->bio_offset = offset; 692 bp->bio_length = length; 693 bp->bio_pflags |= GV_BIO_SYNCREQ; /* XXX: misuse of pflags AND syncreq.*/ 694 if (data == NULL) 695 data = g_malloc(length, M_WAITOK); 696 bp->bio_cflags |= GV_BIO_MALLOC; 697 bp->bio_data = data; 698 699 mtx_lock(&sc->queue_mtx); 700 bioq_disksort(sc->bqueue, bp); 701 mtx_unlock(&sc->queue_mtx); 702 //gv_plex_start(p, bp); 703 return (0); 704} 705 706/* 707 * Finish handling of a bio to a growing plex. 708 */ 709void 710gv_grow_complete(struct gv_plex *p, struct bio *bp) 711{ 712 struct gv_softc *sc; 713 struct gv_sd *s; 714 struct gv_volume *v; 715 off_t origsize, offset; 716 int sdcount, err; 717 718 v = p->vol_sc; 719 KASSERT(v != NULL, ("gv_grow_complete: NULL v")); 720 sc = v->vinumconf; 721 KASSERT(sc != NULL, ("gv_grow_complete: NULL sc")); 722 err = 0; 723 724 /* If it was a read, write it. */ 725 if (bp->bio_cmd == BIO_READ) { 726 p->synced += bp->bio_length; 727 err = gv_grow_request(p, bp->bio_offset, bp->bio_length, 728 BIO_WRITE, bp->bio_data); 729 /* If it was a write, read next. */ 730 } else if (bp->bio_cmd == BIO_WRITE) { 731 if (bp->bio_cflags & GV_BIO_MALLOC) 732 g_free(bp->bio_data); 733 734 /* Find the real size of the plex. */ 735 sdcount = gv_sdcount(p, 1); 736 s = LIST_FIRST(&p->subdisks); 737 KASSERT(s != NULL, ("NULL s")); 738 origsize = (s->size * (sdcount - 1)); 739 if (bp->bio_offset + bp->bio_length >= origsize) { 740 G_VINUM_DEBUG(1, "growing of %s completed", p->name); 741 p->flags &= ~GV_PLEX_GROWING; 742 LIST_FOREACH(s, &p->subdisks, in_plex) { 743 s->flags &= ~GV_SD_GROW; 744 gv_set_sd_state(s, GV_SD_UP, 0); 745 } 746 p->size = gv_plex_size(p); 747 gv_update_vol_size(v, gv_vol_size(v)); 748 gv_set_plex_state(p, GV_PLEX_UP, 0); 749 g_topology_lock(); 750 gv_access(v->provider, -1, -1, 0); 751 g_topology_unlock(); 752 p->synced = 0; 753 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 754 /* Issue delayed requests. */ 755 gv_plex_flush(p); 756 } else { 757 offset = bp->bio_offset + bp->bio_length; 758 err = gv_grow_request(p, offset, 759 MIN(bp->bio_length, origsize - offset), 760 BIO_READ, NULL); 761 } 762 } 763 g_destroy_bio(bp); 764 765 if (err) { 766 p->flags &= ~GV_PLEX_GROWING; 767 G_VINUM_DEBUG(0, "error growing plex: error code %d", err); 768 } 769} 770 771 772/* 773 * Create an initialization BIO and send it off to the consumer. Assume that 774 * we're given initialization data as parameter. 775 */ 776void 777gv_init_request(struct gv_sd *s, off_t start, caddr_t data, off_t length) 778{ 779 struct gv_drive *d; 780 struct g_consumer *cp; 781 struct bio *bp, *cbp; 782 783 KASSERT(s != NULL, ("gv_init_request: NULL s")); 784 d = s->drive_sc; 785 KASSERT(d != NULL, ("gv_init_request: NULL d")); 786 cp = d->consumer; 787 KASSERT(cp != NULL, ("gv_init_request: NULL cp")); 788 789 bp = g_new_bio(); 790 if (bp == NULL) { 791 G_VINUM_DEBUG(0, "subdisk '%s' init: write failed at offset %jd" 792 " (drive offset %jd); out of memory", s->name, 793 (intmax_t)s->initialized, (intmax_t)start); 794 return; /* XXX: Error codes. */ 795 } 796 bp->bio_cmd = BIO_WRITE; 797 bp->bio_data = data; 798 bp->bio_done = gv_done; 799 bp->bio_error = 0; 800 bp->bio_length = length; 801 bp->bio_cflags |= GV_BIO_INIT; 802 bp->bio_offset = start; 803 bp->bio_caller1 = s; 804 805 /* Then ofcourse, we have to clone it. */ 806 cbp = g_clone_bio(bp); 807 if (cbp == NULL) { 808 G_VINUM_DEBUG(0, "subdisk '%s' init: write failed at offset %jd" 809 " (drive offset %jd); out of memory", s->name, 810 (intmax_t)s->initialized, (intmax_t)start); 811 return; /* XXX: Error codes. */ 812 } 813 cbp->bio_done = gv_done; 814 cbp->bio_caller1 = s; 815 /* Send it off to the consumer. */ 816 g_io_request(cbp, cp); 817} 818 819/* 820 * Handle a finished initialization BIO. 821 */ 822void 823gv_init_complete(struct gv_plex *p, struct bio *bp) 824{ 825 struct gv_softc *sc; 826 struct gv_drive *d; 827 struct g_consumer *cp; 828 struct gv_sd *s; 829 off_t start, length; 830 caddr_t data; 831 int error; 832 833 s = bp->bio_caller1; 834 start = bp->bio_offset; 835 length = bp->bio_length; 836 error = bp->bio_error; 837 data = bp->bio_data; 838 839 KASSERT(s != NULL, ("gv_init_complete: NULL s")); 840 d = s->drive_sc; 841 KASSERT(d != NULL, ("gv_init_complete: NULL d")); 842 cp = d->consumer; 843 KASSERT(cp != NULL, ("gv_init_complete: NULL cp")); 844 sc = p->vinumconf; 845 KASSERT(sc != NULL, ("gv_init_complete: NULL sc")); 846 847 g_destroy_bio(bp); 848 849 /* 850 * First we need to find out if it was okay, and abort if it's not. 851 * Then we need to free previous buffers, find out the correct subdisk, 852 * as well as getting the correct starting point and length of the BIO. 853 */ 854 if (start >= s->drive_offset + s->size) { 855 /* Free the data we initialized. */ 856 if (data != NULL) 857 g_free(data); 858 g_topology_assert_not(); 859 g_topology_lock(); 860 g_access(cp, 0, -1, 0); 861 g_topology_unlock(); 862 if (error) { 863 gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE | 864 GV_SETSTATE_CONFIG); 865 } else { 866 gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_CONFIG); 867 s->initialized = 0; 868 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 869 G_VINUM_DEBUG(1, "subdisk '%s' init: finished " 870 "successfully", s->name); 871 } 872 return; 873 } 874 s->initialized += length; 875 start += length; 876 gv_init_request(s, start, data, length); 877} 878 879/* 880 * Create a new bio struct for the next parity rebuild. Used both by internal 881 * rebuild of degraded plexes as well as user initiated rebuilds/checks. 882 */ 883void 884gv_parity_request(struct gv_plex *p, int flags, off_t offset) 885{ 886 struct gv_softc *sc; 887 struct bio *bp; 888 889 KASSERT(p != NULL, ("gv_parity_request: NULL p")); 890 sc = p->vinumconf; 891 KASSERT(sc != NULL, ("gv_parity_request: NULL sc")); 892 893 bp = g_new_bio(); 894 if (bp == NULL) { 895 G_VINUM_DEBUG(0, "rebuild of %s failed creating bio: " 896 "out of memory", p->name); 897 return; 898 } 899 900 bp->bio_cmd = BIO_WRITE; 901 bp->bio_done = gv_done; 902 bp->bio_error = 0; 903 bp->bio_length = p->stripesize; 904 bp->bio_caller1 = p; 905 906 /* 907 * Check if it's a rebuild of a degraded plex or a user request of 908 * parity rebuild. 909 */ 910 if (flags & GV_BIO_REBUILD) 911 bp->bio_data = g_malloc(GV_DFLT_SYNCSIZE, M_WAITOK); 912 else if (flags & GV_BIO_CHECK) 913 bp->bio_data = g_malloc(p->stripesize, M_WAITOK | M_ZERO); 914 else { 915 G_VINUM_DEBUG(0, "invalid flags given in rebuild"); 916 return; 917 } 918 919 bp->bio_cflags = flags; 920 bp->bio_cflags |= GV_BIO_MALLOC; 921 922 /* We still have more parity to build. */ 923 bp->bio_offset = offset; 924 mtx_lock(&sc->queue_mtx); 925 bioq_disksort(sc->bqueue, bp); 926 mtx_unlock(&sc->queue_mtx); 927 //gv_plex_start(p, bp); /* Send it down to the plex. */ 928} 929 930/* 931 * Handle a finished parity write. 932 */ 933void 934gv_parity_complete(struct gv_plex *p, struct bio *bp) 935{ 936 struct gv_softc *sc; 937 int error, flags; 938 939 error = bp->bio_error; 940 flags = bp->bio_cflags; 941 flags &= ~GV_BIO_MALLOC; 942 943 sc = p->vinumconf; 944 KASSERT(sc != NULL, ("gv_parity_complete: NULL sc")); 945 946 /* Clean up what we allocated. */ 947 if (bp->bio_cflags & GV_BIO_MALLOC) 948 g_free(bp->bio_data); 949 g_destroy_bio(bp); 950 951 if (error == EAGAIN) { 952 G_VINUM_DEBUG(0, "parity incorrect at offset 0x%jx", 953 (intmax_t)p->synced); 954 } 955 956 /* Any error is fatal, except EAGAIN when we're rebuilding. */ 957 if (error && !(error == EAGAIN && (flags & GV_BIO_PARITY))) { 958 /* Make sure we don't have the lock. */ 959 g_topology_assert_not(); 960 g_topology_lock(); 961 gv_access(p->vol_sc->provider, -1, -1, 0); 962 g_topology_unlock(); 963 G_VINUM_DEBUG(0, "parity check on %s failed at 0x%jx " 964 "errno %d", p->name, (intmax_t)p->synced, error); 965 return; 966 } else { 967 p->synced += p->stripesize; 968 } 969 970 if (p->synced >= p->size) { 971 /* Make sure we don't have the lock. */ 972 g_topology_assert_not(); 973 g_topology_lock(); 974 gv_access(p->vol_sc->provider, -1, -1, 0); 975 g_topology_unlock(); 976 /* We're finished. */ 977 G_VINUM_DEBUG(1, "parity operation on %s finished", p->name); 978 p->synced = 0; 979 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 980 return; 981 } 982 983 /* Send down next. It will determine if we need to itself. */ 984 gv_parity_request(p, flags, p->synced); 985} 986 987/* 988 * Handle a finished plex rebuild bio. 989 */ 990void 991gv_rebuild_complete(struct gv_plex *p, struct bio *bp) 992{ 993 struct gv_softc *sc; 994 struct gv_sd *s; 995 int error, flags; 996 off_t offset; 997 998 error = bp->bio_error; 999 flags = bp->bio_cflags; 1000 offset = bp->bio_offset; 1001 flags &= ~GV_BIO_MALLOC; 1002 sc = p->vinumconf; 1003 KASSERT(sc != NULL, ("gv_rebuild_complete: NULL sc")); 1004 1005 /* Clean up what we allocated. */ 1006 if (bp->bio_cflags & GV_BIO_MALLOC) 1007 g_free(bp->bio_data); 1008 g_destroy_bio(bp); 1009 1010 if (error) { 1011 g_topology_assert_not(); 1012 g_topology_lock(); 1013 gv_access(p->vol_sc->provider, -1, -1, 0); 1014 g_topology_unlock(); 1015 1016 G_VINUM_DEBUG(0, "rebuild of %s failed at offset %jd errno: %d", 1017 p->name, (intmax_t)offset, error); 1018 p->flags &= ~GV_PLEX_REBUILDING; 1019 p->synced = 0; 1020 gv_plex_flush(p); /* Flush out remaining rebuild BIOs. */ 1021 return; 1022 } 1023 1024 offset += (p->stripesize * (gv_sdcount(p, 1) - 1)); 1025 if (offset >= p->size) { 1026 /* We're finished. */ 1027 g_topology_assert_not(); 1028 g_topology_lock(); 1029 gv_access(p->vol_sc->provider, -1, -1, 0); 1030 g_topology_unlock(); 1031 1032 G_VINUM_DEBUG(1, "rebuild of %s finished", p->name); 1033 gv_save_config(p->vinumconf); 1034 p->flags &= ~GV_PLEX_REBUILDING; 1035 p->synced = 0; 1036 /* Try to up all subdisks. */ 1037 LIST_FOREACH(s, &p->subdisks, in_plex) 1038 gv_update_sd_state(s); 1039 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 1040 gv_plex_flush(p); /* Flush out remaining rebuild BIOs. */ 1041 return; 1042 } 1043 1044 /* Send down next. It will determine if we need to itself. */ 1045 gv_parity_request(p, flags, offset); 1046} 1047