geom_vinum_init.c revision 143130
1/*- 2 * Copyright (c) 2004 Lukas Ertl 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/sys/geom/vinum/geom_vinum_init.c 143130 2005-03-04 16:43:40Z le $"); 29 30#include <sys/param.h> 31#include <sys/bio.h> 32#include <sys/kernel.h> 33#include <sys/kthread.h> 34#include <sys/libkern.h> 35#include <sys/malloc.h> 36#include <sys/queue.h> 37 38#include <geom/geom.h> 39#include <geom/vinum/geom_vinum_var.h> 40#include <geom/vinum/geom_vinum.h> 41#include <geom/vinum/geom_vinum_share.h> 42 43int gv_init_plex(struct gv_plex *); 44int gv_init_sd(struct gv_sd *); 45void gv_init_td(void *); 46void gv_rebuild_plex(struct gv_plex *); 47void gv_rebuild_td(void *); 48void gv_start_plex(struct gv_plex *); 49void gv_start_vol(struct gv_volume *); 50void gv_sync(struct gv_volume *); 51void gv_sync_td(void *); 52 53struct gv_sync_args { 54 struct gv_volume *v; 55 struct gv_plex *from; 56 struct gv_plex *to; 57 off_t syncsize; 58}; 59 60void 61gv_parityop(struct g_geom *gp, struct gctl_req *req) 62{ 63 struct gv_softc *sc; 64 struct gv_plex *p; 65 struct bio *bp; 66 struct g_consumer *cp; 67 int error, *flags, type, *rebuild, rv; 68 char *plex; 69 70 rv = -1; 71 72 plex = gctl_get_param(req, "plex", NULL); 73 if (plex == NULL) { 74 gctl_error(req, "no plex given"); 75 goto out; 76 } 77 78 flags = gctl_get_paraml(req, "flags", sizeof(*flags)); 79 if (flags == NULL) { 80 gctl_error(req, "no flags given"); 81 goto out; 82 } 83 84 rebuild = gctl_get_paraml(req, "rebuild", sizeof(*rebuild)); 85 if (rebuild == NULL) { 86 gctl_error(req, "no rebuild op given"); 87 goto out; 88 } 89 90 sc = gp->softc; 91 type = gv_object_type(sc, plex); 92 switch (type) { 93 case GV_TYPE_PLEX: 94 break; 95 case GV_TYPE_VOL: 96 case GV_TYPE_SD: 97 case GV_TYPE_DRIVE: 98 default: 99 gctl_error(req, "'%s' is not a plex", plex); 100 goto out; 101 } 102 103 p = gv_find_plex(sc, plex); 104 if (p->state != GV_PLEX_UP) { 105 gctl_error(req, "plex %s is not completely accessible", 106 p->name); 107 goto out; 108 } 109 110 cp = p->consumer; 111 error = g_access(cp, 1, 1, 0); 112 if (error) { 113 gctl_error(req, "cannot access consumer"); 114 goto out; 115 } 116 g_topology_unlock(); 117 118 /* Reset the check pointer when using -f. */ 119 if (*flags & GV_FLAG_F) 120 p->synced = 0; 121 122 bp = g_new_bio(); 123 if (bp == NULL) { 124 gctl_error(req, "cannot create BIO - out of memory"); 125 g_topology_lock(); 126 error = g_access(cp, -1, -1, 0); 127 goto out; 128 } 129 bp->bio_cmd = BIO_WRITE; 130 bp->bio_done = NULL; 131 bp->bio_data = g_malloc(p->stripesize, M_WAITOK | M_ZERO); 132 bp->bio_cflags |= GV_BIO_CHECK; 133 if (*rebuild) 134 bp->bio_cflags |= GV_BIO_PARITY; 135 bp->bio_offset = p->synced; 136 bp->bio_length = p->stripesize; 137 138 /* Schedule it down ... */ 139 g_io_request(bp, cp); 140 141 /* ... and wait for the result. */ 142 error = biowait(bp, "gwrite"); 143 g_free(bp->bio_data); 144 g_destroy_bio(bp); 145 146 if (error) { 147 /* Incorrect parity. */ 148 if (error == EAGAIN) 149 rv = 1; 150 151 /* Some other error happened. */ 152 else 153 gctl_error(req, "Parity check failed at offset 0x%jx, " 154 "errno %d", (intmax_t)p->synced, error); 155 156 /* Correct parity. */ 157 } else 158 rv = 0; 159 160 gctl_set_param(req, "offset", &p->synced, sizeof(p->synced)); 161 162 /* Advance the checkpointer if there was no error. */ 163 if (rv == 0) 164 p->synced += p->stripesize; 165 166 /* End of plex; reset the check pointer and signal it to the caller. */ 167 if (p->synced >= p->size) { 168 p->synced = 0; 169 rv = -2; 170 } 171 172 g_topology_lock(); 173 error = g_access(cp, -1, -1, 0); 174 175out: 176 gctl_set_param(req, "rv", &rv, sizeof(rv)); 177} 178 179void 180gv_start_obj(struct g_geom *gp, struct gctl_req *req) 181{ 182 struct gv_softc *sc; 183 struct gv_volume *v; 184 struct gv_plex *p; 185 int *argc, *initsize; 186 char *argv, buf[20]; 187 int i, type; 188 189 argc = gctl_get_paraml(req, "argc", sizeof(*argc)); 190 initsize = gctl_get_paraml(req, "initsize", sizeof(*initsize)); 191 192 if (argc == NULL || *argc == 0) { 193 gctl_error(req, "no arguments given"); 194 return; 195 } 196 197 sc = gp->softc; 198 199 for (i = 0; i < *argc; i++) { 200 snprintf(buf, sizeof(buf), "argv%d", i); 201 argv = gctl_get_param(req, buf, NULL); 202 if (argv == NULL) 203 continue; 204 type = gv_object_type(sc, argv); 205 switch (type) { 206 case GV_TYPE_VOL: 207 v = gv_find_vol(sc, argv); 208 gv_start_vol(v); 209 break; 210 211 case GV_TYPE_PLEX: 212 p = gv_find_plex(sc, argv); 213 gv_start_plex(p); 214 break; 215 216 case GV_TYPE_SD: 217 case GV_TYPE_DRIVE: 218 /* XXX not yet */ 219 gctl_error(req, "cannot start '%s'", argv); 220 return; 221 default: 222 gctl_error(req, "unknown object '%s'", argv); 223 return; 224 } 225 } 226} 227 228void 229gv_start_plex(struct gv_plex *p) 230{ 231 struct gv_volume *v; 232 233 KASSERT(p != NULL, ("gv_start_plex: NULL p")); 234 235 if (p->state == GV_PLEX_UP) 236 return; 237 238 v = p->vol_sc; 239 if ((v != NULL) && (v->plexcount > 1)) 240 gv_sync(v); 241 else if (p->org == GV_PLEX_RAID5) { 242 if (p->state == GV_PLEX_DEGRADED) 243 gv_rebuild_plex(p); 244 else 245 gv_init_plex(p); 246 } 247 248 return; 249} 250 251void 252gv_start_vol(struct gv_volume *v) 253{ 254 struct gv_plex *p; 255 struct gv_sd *s; 256 257 KASSERT(v != NULL, ("gv_start_vol: NULL v")); 258 259 if (v->plexcount == 0) 260 return; 261 262 else if (v->plexcount == 1) { 263 p = LIST_FIRST(&v->plexes); 264 KASSERT(p != NULL, ("gv_start_vol: NULL p on %s", v->name)); 265 if (p->org == GV_PLEX_RAID5) { 266 switch (p->state) { 267 case GV_PLEX_DOWN: 268 gv_init_plex(p); 269 break; 270 case GV_PLEX_DEGRADED: 271 gv_rebuild_plex(p); 272 break; 273 default: 274 return; 275 } 276 } else { 277 LIST_FOREACH(s, &p->subdisks, in_plex) { 278 gv_set_sd_state(s, GV_SD_UP, 279 GV_SETSTATE_CONFIG); 280 } 281 } 282 } else 283 gv_sync(v); 284} 285 286void 287gv_sync(struct gv_volume *v) 288{ 289 struct gv_softc *sc; 290 struct gv_plex *p, *up; 291 struct gv_sync_args *sync; 292 293 KASSERT(v != NULL, ("gv_sync: NULL v")); 294 sc = v->vinumconf; 295 KASSERT(sc != NULL, ("gv_sync: NULL sc on %s", v->name)); 296 297 /* Find the plex that's up. */ 298 up = NULL; 299 LIST_FOREACH(up, &v->plexes, in_volume) { 300 if (up->state == GV_PLEX_UP) 301 break; 302 } 303 304 /* Didn't find a good plex. */ 305 if (up == NULL) 306 return; 307 308 LIST_FOREACH(p, &v->plexes, in_volume) { 309 if ((p == up) || (p->state == GV_PLEX_UP)) 310 continue; 311 sync = g_malloc(sizeof(*sync), M_WAITOK | M_ZERO); 312 sync->v = v; 313 sync->from = up; 314 sync->to = p; 315 sync->syncsize = GV_DFLT_SYNCSIZE; 316 kthread_create(gv_sync_td, sync, NULL, 0, 0, "gv_sync '%s'", 317 p->name); 318 } 319} 320 321void 322gv_rebuild_plex(struct gv_plex *p) 323{ 324 struct gv_sync_args *sync; 325 326 if ((p->flags & GV_PLEX_SYNCING) || gv_is_open(p->geom)) 327 return; 328 329 sync = g_malloc(sizeof(*sync), M_WAITOK | M_ZERO); 330 sync->to = p; 331 sync->syncsize = GV_DFLT_SYNCSIZE; 332 333 kthread_create(gv_rebuild_td, sync, NULL, 0, 0, "gv_rebuild %s", 334 p->name); 335} 336 337int 338gv_init_plex(struct gv_plex *p) 339{ 340 struct gv_sd *s; 341 int err; 342 343 KASSERT(p != NULL, ("gv_init_plex: NULL p")); 344 345 LIST_FOREACH(s, &p->subdisks, in_plex) { 346 err = gv_init_sd(s); 347 if (err) 348 return (err); 349 } 350 351 return (0); 352} 353 354int 355gv_init_sd(struct gv_sd *s) 356{ 357 KASSERT(s != NULL, ("gv_init_sd: NULL s")); 358 359 if (gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE)) 360 return (-1); 361 362 s->init_size = GV_DFLT_SYNCSIZE; 363 s->flags &= ~GV_SD_INITCANCEL; 364 365 /* Spawn the thread that does the work for us. */ 366 kthread_create(gv_init_td, s, NULL, 0, 0, "gv_init %s", s->name); 367 368 return (0); 369} 370 371/* This thread is responsible for rebuilding a degraded RAID5 plex. */ 372void 373gv_rebuild_td(void *arg) 374{ 375 struct bio *bp; 376 struct gv_plex *p; 377 struct g_consumer *cp; 378 struct gv_sync_args *sync; 379 u_char *buf; 380 off_t i; 381 int error; 382 383 buf = NULL; 384 bp = NULL; 385 386 sync = arg; 387 p = sync->to; 388 p->synced = 0; 389 p->flags |= GV_PLEX_SYNCING; 390 cp = p->consumer; 391 392 g_topology_lock(); 393 error = g_access(cp, 1, 1, 0); 394 if (error) { 395 g_topology_unlock(); 396 printf("GEOM_VINUM: rebuild of %s failed to access consumer: " 397 "%d\n", p->name, error); 398 kthread_exit(error); 399 } 400 g_topology_unlock(); 401 402 buf = g_malloc(sync->syncsize, M_WAITOK); 403 404 printf("GEOM_VINUM: rebuild of %s started\n", p->name); 405 i = 0; 406 for (i = 0; i < p->size; i += (p->stripesize * (p->sdcount - 1))) { 407/* 408 if (i + sync->syncsize > p->size) 409 sync->syncsize = p->size - i; 410*/ 411 bp = g_new_bio(); 412 if (bp == NULL) { 413 printf("GEOM_VINUM: rebuild of %s failed creating bio: " 414 "out of memory\n", p->name); 415 break; 416 } 417 bp->bio_cmd = BIO_WRITE; 418 bp->bio_done = NULL; 419 bp->bio_data = buf; 420 bp->bio_cflags |= GV_BIO_REBUILD; 421 bp->bio_offset = i; 422 bp->bio_length = p->stripesize; 423 424 /* Schedule it down ... */ 425 g_io_request(bp, cp); 426 427 /* ... and wait for the result. */ 428 error = biowait(bp, "gwrite"); 429 if (error) { 430 printf("GEOM_VINUM: rebuild of %s failed at offset %jd " 431 "errno: %d\n", p->name, i, error); 432 break; 433 } 434 g_destroy_bio(bp); 435 bp = NULL; 436 } 437 438 if (bp != NULL) 439 g_destroy_bio(bp); 440 if (buf != NULL) 441 g_free(buf); 442 443 g_topology_lock(); 444 g_access(cp, -1, -1, 0); 445 gv_save_config_all(p->vinumconf); 446 g_topology_unlock(); 447 448 p->flags &= ~GV_PLEX_SYNCING; 449 p->synced = 0; 450 451 /* Successful initialization. */ 452 if (!error) 453 printf("GEOM_VINUM: rebuild of %s finished\n", p->name); 454 455 g_free(sync); 456 kthread_exit(error); 457} 458 459void 460gv_sync_td(void *arg) 461{ 462 struct bio *bp; 463 struct gv_plex *p; 464 struct g_consumer *from, *to; 465 struct gv_sync_args *sync; 466 u_char *buf; 467 off_t i; 468 int error; 469 470 sync = arg; 471 472 from = sync->from->consumer; 473 to = sync->to->consumer; 474 475 p = sync->to; 476 477 if (p->flags & GV_PLEX_SYNCING) { 478 printf("GEOM_VINUM: plex '%s' is already syncing.\n", p->name); 479 g_free(sync); 480 kthread_exit(0); 481 } 482 483 p->synced = 0; 484 p->flags |= GV_PLEX_SYNCING; 485 486 error = 0; 487 488 g_topology_lock(); 489 error = g_access(from, 1, 0, 0); 490 if (error) { 491 g_topology_unlock(); 492 printf("GEOM_VINUM: sync from '%s' failed to access " 493 "consumer: %d\n", sync->from->name, error); 494 g_free(sync); 495 kthread_exit(error); 496 } 497 error = g_access(to, 0, 1, 0); 498 if (error) { 499 g_access(from, -1, 0, 0); 500 g_topology_unlock(); 501 printf("GEOM_VINUM: sync to '%s' failed to access " 502 "consumer: %d\n", p->name, error); 503 g_free(sync); 504 kthread_exit(error); 505 } 506 g_topology_unlock(); 507 508 printf("GEOM_VINUM: plex sync %s -> %s started\n", sync->from->name, 509 sync->to->name); 510 for (i = 0; i < p->size; i+= sync->syncsize) { 511 /* Read some bits from the good plex. */ 512 buf = g_read_data(from, i, sync->syncsize, &error); 513 if (buf == NULL) { 514 printf("GEOM_VINUM: sync read from '%s' failed at " 515 "offset %jd; errno: %d\n", sync->from->name, i, 516 error); 517 break; 518 } 519 520 /* 521 * Create a bio and schedule it down on the 'bad' plex. We 522 * cannot simply use g_write_data() because we have to let the 523 * lower parts know that we are an initialization process and 524 * not a 'normal' request. 525 */ 526 bp = g_new_bio(); 527 if (bp == NULL) { 528 printf("GEOM_VINUM: sync write to '%s' failed at " 529 "offset %jd; out of memory\n", p->name, i); 530 g_free(buf); 531 break; 532 } 533 bp->bio_cmd = BIO_WRITE; 534 bp->bio_offset = i; 535 bp->bio_length = sync->syncsize; 536 bp->bio_data = buf; 537 bp->bio_done = NULL; 538 539 /* 540 * This hack declare this bio as part of an initialization 541 * process, so that the lower levels allow it to get through. 542 */ 543 bp->bio_cflags |= GV_BIO_SYNCREQ; 544 545 /* Schedule it down ... */ 546 g_io_request(bp, to); 547 548 /* ... and wait for the result. */ 549 error = biowait(bp, "gwrite"); 550 g_destroy_bio(bp); 551 g_free(buf); 552 if (error) { 553 printf("GEOM_VINUM: sync write to '%s' failed at " 554 "offset %jd; errno: %d\n", p->name, i, error); 555 break; 556 } 557 558 /* Note that we have synced a little bit more. */ 559 p->synced += sync->syncsize; 560 } 561 562 g_topology_lock(); 563 g_access(from, -1, 0, 0); 564 g_access(to, 0, -1, 0); 565 gv_save_config_all(p->vinumconf); 566 g_topology_unlock(); 567 568 /* Successful initialization. */ 569 if (!error) 570 printf("GEOM_VINUM: plex sync %s -> %s finished\n", 571 sync->from->name, sync->to->name); 572 573 p->flags &= ~GV_PLEX_SYNCING; 574 p->synced = 0; 575 576 g_free(sync); 577 kthread_exit(error); 578} 579 580void 581gv_init_td(void *arg) 582{ 583 struct gv_sd *s; 584 struct gv_drive *d; 585 struct g_geom *gp; 586 struct g_consumer *cp; 587 int error; 588 off_t i, init_size, start, offset, length; 589 u_char *buf; 590 591 s = arg; 592 KASSERT(s != NULL, ("gv_init_td: NULL s")); 593 d = s->drive_sc; 594 KASSERT(d != NULL, ("gv_init_td: NULL d")); 595 gp = d->geom; 596 KASSERT(gp != NULL, ("gv_init_td: NULL gp")); 597 598 cp = LIST_FIRST(&gp->consumer); 599 KASSERT(cp != NULL, ("gv_init_td: NULL cp")); 600 601 s->init_error = 0; 602 init_size = s->init_size; 603 start = s->drive_offset + s->initialized; 604 offset = s->drive_offset; 605 length = s->size; 606 607 buf = g_malloc(s->init_size, M_WAITOK | M_ZERO); 608 609 g_topology_lock(); 610 error = g_access(cp, 0, 1, 0); 611 if (error) { 612 s->init_error = error; 613 g_topology_unlock(); 614 printf("geom_vinum: init '%s' failed to access consumer: %d\n", 615 s->name, error); 616 kthread_exit(error); 617 } 618 g_topology_unlock(); 619 620 for (i = start; i < offset + length; i += init_size) { 621 if (s->flags & GV_SD_INITCANCEL) { 622 printf("geom_vinum: subdisk '%s' init: cancelled at" 623 " offset %jd (drive offset %jd)\n", s->name, 624 (intmax_t)s->initialized, (intmax_t)i); 625 error = EAGAIN; 626 break; 627 } 628 error = g_write_data(cp, i, buf, init_size); 629 if (error) { 630 printf("geom_vinum: subdisk '%s' init: write failed" 631 " at offset %jd (drive offset %jd)\n", s->name, 632 (intmax_t)s->initialized, (intmax_t)i); 633 break; 634 } 635 s->initialized += init_size; 636 } 637 638 g_free(buf); 639 640 g_topology_lock(); 641 g_access(cp, 0, -1, 0); 642 g_topology_unlock(); 643 if (error) { 644 s->init_error = error; 645 g_topology_lock(); 646 gv_set_sd_state(s, GV_SD_STALE, 647 GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG); 648 g_topology_unlock(); 649 } else { 650 g_topology_lock(); 651 gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_CONFIG); 652 g_topology_unlock(); 653 s->initialized = 0; 654 printf("geom_vinum: init '%s' finished\n", s->name); 655 } 656 kthread_exit(error); 657} 658