g_stripe.c revision 223921
1/*- 2 * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/sys/geom/stripe/g_stripe.c 223921 2011-07-11 05:22:31Z ae $"); 29 30#include <sys/param.h> 31#include <sys/systm.h> 32#include <sys/kernel.h> 33#include <sys/module.h> 34#include <sys/lock.h> 35#include <sys/mutex.h> 36#include <sys/bio.h> 37#include <sys/sbuf.h> 38#include <sys/sysctl.h> 39#include <sys/malloc.h> 40#include <vm/uma.h> 41#include <geom/geom.h> 42#include <geom/stripe/g_stripe.h> 43 44FEATURE(geom_stripe, "GEOM striping support"); 45 46static MALLOC_DEFINE(M_STRIPE, "stripe_data", "GEOM_STRIPE Data"); 47 48static uma_zone_t g_stripe_zone; 49 50static int g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force); 51static int g_stripe_destroy_geom(struct gctl_req *req, struct g_class *mp, 52 struct g_geom *gp); 53 54static g_taste_t g_stripe_taste; 55static g_ctl_req_t g_stripe_config; 56static g_dumpconf_t g_stripe_dumpconf; 57static g_init_t g_stripe_init; 58static g_fini_t g_stripe_fini; 59 60struct g_class g_stripe_class = { 61 .name = G_STRIPE_CLASS_NAME, 62 .version = G_VERSION, 63 .ctlreq = g_stripe_config, 64 .taste = g_stripe_taste, 65 .destroy_geom = g_stripe_destroy_geom, 66 .init = g_stripe_init, 67 .fini = g_stripe_fini 68}; 69 70SYSCTL_DECL(_kern_geom); 71SYSCTL_NODE(_kern_geom, OID_AUTO, stripe, CTLFLAG_RW, 0, "GEOM_STRIPE stuff"); 72static u_int g_stripe_debug = 0; 73TUNABLE_INT("kern.geom.stripe.debug", &g_stripe_debug); 74SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, debug, CTLFLAG_RW, &g_stripe_debug, 0, 75 "Debug level"); 76static int g_stripe_fast = 0; 77TUNABLE_INT("kern.geom.stripe.fast", &g_stripe_fast); 78static int 79g_sysctl_stripe_fast(SYSCTL_HANDLER_ARGS) 80{ 81 int error, fast; 82 83 fast = g_stripe_fast; 84 error = sysctl_handle_int(oidp, &fast, 0, req); 85 if (error == 0 && req->newptr != NULL) 86 g_stripe_fast = fast; 87 return (error); 88} 89SYSCTL_PROC(_kern_geom_stripe, OID_AUTO, fast, CTLTYPE_INT | CTLFLAG_RW, 90 NULL, 0, g_sysctl_stripe_fast, "I", "Fast, but memory-consuming, mode"); 91static u_int g_stripe_maxmem = MAXPHYS * 100; 92TUNABLE_INT("kern.geom.stripe.maxmem", &g_stripe_maxmem); 93SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, maxmem, CTLFLAG_RD, &g_stripe_maxmem, 94 0, "Maximum memory that can be allocated in \"fast\" mode (in bytes)"); 95static u_int g_stripe_fast_failed = 0; 96SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, fast_failed, CTLFLAG_RD, 97 &g_stripe_fast_failed, 0, "How many times \"fast\" mode failed"); 98 99/* 100 * Greatest Common Divisor. 101 */ 102static u_int 103gcd(u_int a, u_int b) 104{ 105 u_int c; 106 107 while (b != 0) { 108 c = a; 109 a = b; 110 b = (c % b); 111 } 112 return (a); 113} 114 115/* 116 * Least Common Multiple. 117 */ 118static u_int 119lcm(u_int a, u_int b) 120{ 121 122 return ((a * b) / gcd(a, b)); 123} 124 125static void 126g_stripe_init(struct g_class *mp __unused) 127{ 128 129 g_stripe_zone = uma_zcreate("g_stripe_zone", MAXPHYS, NULL, NULL, 130 NULL, NULL, 0, 0); 131 g_stripe_maxmem -= g_stripe_maxmem % MAXPHYS; 132 uma_zone_set_max(g_stripe_zone, g_stripe_maxmem / MAXPHYS); 133} 134 135static void 136g_stripe_fini(struct g_class *mp __unused) 137{ 138 139 uma_zdestroy(g_stripe_zone); 140} 141 142/* 143 * Return the number of valid disks. 144 */ 145static u_int 146g_stripe_nvalid(struct g_stripe_softc *sc) 147{ 148 u_int i, no; 149 150 no = 0; 151 for (i = 0; i < sc->sc_ndisks; i++) { 152 if (sc->sc_disks[i] != NULL) 153 no++; 154 } 155 156 return (no); 157} 158 159static void 160g_stripe_remove_disk(struct g_consumer *cp) 161{ 162 struct g_stripe_softc *sc; 163 u_int no; 164 165 KASSERT(cp != NULL, ("Non-valid disk in %s.", __func__)); 166 sc = (struct g_stripe_softc *)cp->private; 167 KASSERT(sc != NULL, ("NULL sc in %s.", __func__)); 168 no = cp->index; 169 170 G_STRIPE_DEBUG(0, "Disk %s removed from %s.", cp->provider->name, 171 sc->sc_name); 172 173 sc->sc_disks[no] = NULL; 174 if (sc->sc_provider != NULL) { 175 sc->sc_provider->flags |= G_PF_WITHER; 176 g_orphan_provider(sc->sc_provider, ENXIO); 177 sc->sc_provider = NULL; 178 G_STRIPE_DEBUG(0, "Device %s removed.", sc->sc_name); 179 } 180 181 if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) 182 g_access(cp, -cp->acr, -cp->acw, -cp->ace); 183 g_detach(cp); 184 g_destroy_consumer(cp); 185} 186 187static void 188g_stripe_orphan(struct g_consumer *cp) 189{ 190 struct g_stripe_softc *sc; 191 struct g_geom *gp; 192 193 g_topology_assert(); 194 gp = cp->geom; 195 sc = gp->softc; 196 if (sc == NULL) 197 return; 198 199 g_stripe_remove_disk(cp); 200 /* If there are no valid disks anymore, remove device. */ 201 if (g_stripe_nvalid(sc) == 0) 202 g_stripe_destroy(sc, 1); 203} 204 205static int 206g_stripe_access(struct g_provider *pp, int dr, int dw, int de) 207{ 208 struct g_consumer *cp1, *cp2; 209 struct g_stripe_softc *sc; 210 struct g_geom *gp; 211 int error; 212 213 gp = pp->geom; 214 sc = gp->softc; 215 216 if (sc == NULL) { 217 /* 218 * It looks like geom is being withered. 219 * In that case we allow only negative requests. 220 */ 221 KASSERT(dr <= 0 && dw <= 0 && de <= 0, 222 ("Positive access request (device=%s).", pp->name)); 223 if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 && 224 (pp->ace + de) == 0) { 225 G_STRIPE_DEBUG(0, "Device %s definitely destroyed.", 226 gp->name); 227 } 228 return (0); 229 } 230 231 /* On first open, grab an extra "exclusive" bit */ 232 if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0) 233 de++; 234 /* ... and let go of it on last close */ 235 if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 && (pp->ace + de) == 0) 236 de--; 237 238 error = ENXIO; 239 LIST_FOREACH(cp1, &gp->consumer, consumer) { 240 error = g_access(cp1, dr, dw, de); 241 if (error == 0) 242 continue; 243 /* 244 * If we fail here, backout all previous changes. 245 */ 246 LIST_FOREACH(cp2, &gp->consumer, consumer) { 247 if (cp1 == cp2) 248 return (error); 249 g_access(cp2, -dr, -dw, -de); 250 } 251 /* NOTREACHED */ 252 } 253 254 return (error); 255} 256 257static void 258g_stripe_copy(struct g_stripe_softc *sc, char *src, char *dst, off_t offset, 259 off_t length, int mode) 260{ 261 u_int stripesize; 262 size_t len; 263 264 stripesize = sc->sc_stripesize; 265 len = (size_t)(stripesize - (offset & (stripesize - 1))); 266 do { 267 bcopy(src, dst, len); 268 if (mode) { 269 dst += len + stripesize * (sc->sc_ndisks - 1); 270 src += len; 271 } else { 272 dst += len; 273 src += len + stripesize * (sc->sc_ndisks - 1); 274 } 275 length -= len; 276 KASSERT(length >= 0, 277 ("Length < 0 (stripesize=%zu, offset=%jd, length=%jd).", 278 (size_t)stripesize, (intmax_t)offset, (intmax_t)length)); 279 if (length > stripesize) 280 len = stripesize; 281 else 282 len = length; 283 } while (length > 0); 284} 285 286static void 287g_stripe_done(struct bio *bp) 288{ 289 struct g_stripe_softc *sc; 290 struct bio *pbp; 291 292 pbp = bp->bio_parent; 293 sc = pbp->bio_to->geom->softc; 294 if (pbp->bio_error == 0) 295 pbp->bio_error = bp->bio_error; 296 pbp->bio_completed += bp->bio_completed; 297 if (bp->bio_cmd == BIO_READ && bp->bio_caller1 != NULL) { 298 g_stripe_copy(sc, bp->bio_data, bp->bio_caller1, bp->bio_offset, 299 bp->bio_length, 1); 300 bp->bio_data = bp->bio_caller1; 301 bp->bio_caller1 = NULL; 302 } 303 g_destroy_bio(bp); 304 pbp->bio_inbed++; 305 if (pbp->bio_children == pbp->bio_inbed) { 306 if (pbp->bio_driver1 != NULL) 307 uma_zfree(g_stripe_zone, pbp->bio_driver1); 308 g_io_deliver(pbp, pbp->bio_error); 309 } 310} 311 312static int 313g_stripe_start_fast(struct bio *bp, u_int no, off_t offset, off_t length) 314{ 315 TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); 316 u_int nparts = 0, stripesize; 317 struct g_stripe_softc *sc; 318 char *addr, *data = NULL; 319 struct bio *cbp; 320 int error; 321 322 sc = bp->bio_to->geom->softc; 323 324 addr = bp->bio_data; 325 stripesize = sc->sc_stripesize; 326 327 cbp = g_clone_bio(bp); 328 if (cbp == NULL) { 329 error = ENOMEM; 330 goto failure; 331 } 332 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 333 nparts++; 334 /* 335 * Fill in the component buf structure. 336 */ 337 cbp->bio_done = g_stripe_done; 338 cbp->bio_offset = offset; 339 cbp->bio_data = addr; 340 cbp->bio_caller1 = NULL; 341 cbp->bio_length = length; 342 cbp->bio_caller2 = sc->sc_disks[no]; 343 344 /* offset -= offset % stripesize; */ 345 offset -= offset & (stripesize - 1); 346 addr += length; 347 length = bp->bio_length - length; 348 for (no++; length > 0; no++, length -= stripesize, addr += stripesize) { 349 if (no > sc->sc_ndisks - 1) { 350 no = 0; 351 offset += stripesize; 352 } 353 if (nparts >= sc->sc_ndisks) { 354 cbp = TAILQ_NEXT(cbp, bio_queue); 355 if (cbp == NULL) 356 cbp = TAILQ_FIRST(&queue); 357 nparts++; 358 /* 359 * Update bio structure. 360 */ 361 /* 362 * MIN() is in case when 363 * (bp->bio_length % sc->sc_stripesize) != 0. 364 */ 365 cbp->bio_length += MIN(stripesize, length); 366 if (cbp->bio_caller1 == NULL) { 367 cbp->bio_caller1 = cbp->bio_data; 368 cbp->bio_data = NULL; 369 if (data == NULL) { 370 data = uma_zalloc(g_stripe_zone, 371 M_NOWAIT); 372 if (data == NULL) { 373 error = ENOMEM; 374 goto failure; 375 } 376 } 377 } 378 } else { 379 cbp = g_clone_bio(bp); 380 if (cbp == NULL) { 381 error = ENOMEM; 382 goto failure; 383 } 384 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 385 nparts++; 386 /* 387 * Fill in the component buf structure. 388 */ 389 cbp->bio_done = g_stripe_done; 390 cbp->bio_offset = offset; 391 cbp->bio_data = addr; 392 cbp->bio_caller1 = NULL; 393 /* 394 * MIN() is in case when 395 * (bp->bio_length % sc->sc_stripesize) != 0. 396 */ 397 cbp->bio_length = MIN(stripesize, length); 398 cbp->bio_caller2 = sc->sc_disks[no]; 399 } 400 } 401 if (data != NULL) 402 bp->bio_driver1 = data; 403 /* 404 * Fire off all allocated requests! 405 */ 406 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 407 struct g_consumer *cp; 408 409 TAILQ_REMOVE(&queue, cbp, bio_queue); 410 cp = cbp->bio_caller2; 411 cbp->bio_caller2 = NULL; 412 cbp->bio_to = cp->provider; 413 if (cbp->bio_caller1 != NULL) { 414 cbp->bio_data = data; 415 if (bp->bio_cmd == BIO_WRITE) { 416 g_stripe_copy(sc, cbp->bio_caller1, data, 417 cbp->bio_offset, cbp->bio_length, 0); 418 } 419 data += cbp->bio_length; 420 } 421 G_STRIPE_LOGREQ(cbp, "Sending request."); 422 g_io_request(cbp, cp); 423 } 424 return (0); 425failure: 426 if (data != NULL) 427 uma_zfree(g_stripe_zone, data); 428 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 429 TAILQ_REMOVE(&queue, cbp, bio_queue); 430 if (cbp->bio_caller1 != NULL) { 431 cbp->bio_data = cbp->bio_caller1; 432 cbp->bio_caller1 = NULL; 433 } 434 bp->bio_children--; 435 g_destroy_bio(cbp); 436 } 437 return (error); 438} 439 440static int 441g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length) 442{ 443 TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); 444 struct g_stripe_softc *sc; 445 uint32_t stripesize; 446 struct bio *cbp; 447 char *addr; 448 int error; 449 450 sc = bp->bio_to->geom->softc; 451 452 addr = bp->bio_data; 453 stripesize = sc->sc_stripesize; 454 455 cbp = g_clone_bio(bp); 456 if (cbp == NULL) { 457 error = ENOMEM; 458 goto failure; 459 } 460 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 461 /* 462 * Fill in the component buf structure. 463 */ 464 cbp->bio_done = g_std_done; 465 cbp->bio_offset = offset; 466 cbp->bio_data = addr; 467 cbp->bio_length = length; 468 cbp->bio_caller2 = sc->sc_disks[no]; 469 470 /* offset -= offset % stripesize; */ 471 offset -= offset & (stripesize - 1); 472 addr += length; 473 length = bp->bio_length - length; 474 for (no++; length > 0; no++, length -= stripesize, addr += stripesize) { 475 if (no > sc->sc_ndisks - 1) { 476 no = 0; 477 offset += stripesize; 478 } 479 cbp = g_clone_bio(bp); 480 if (cbp == NULL) { 481 error = ENOMEM; 482 goto failure; 483 } 484 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 485 486 /* 487 * Fill in the component buf structure. 488 */ 489 cbp->bio_done = g_std_done; 490 cbp->bio_offset = offset; 491 cbp->bio_data = addr; 492 /* 493 * MIN() is in case when 494 * (bp->bio_length % sc->sc_stripesize) != 0. 495 */ 496 cbp->bio_length = MIN(stripesize, length); 497 498 cbp->bio_caller2 = sc->sc_disks[no]; 499 } 500 /* 501 * Fire off all allocated requests! 502 */ 503 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 504 struct g_consumer *cp; 505 506 TAILQ_REMOVE(&queue, cbp, bio_queue); 507 cp = cbp->bio_caller2; 508 cbp->bio_caller2 = NULL; 509 cbp->bio_to = cp->provider; 510 G_STRIPE_LOGREQ(cbp, "Sending request."); 511 g_io_request(cbp, cp); 512 } 513 return (0); 514failure: 515 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 516 TAILQ_REMOVE(&queue, cbp, bio_queue); 517 bp->bio_children--; 518 g_destroy_bio(cbp); 519 } 520 return (error); 521} 522 523static void 524g_stripe_flush(struct g_stripe_softc *sc, struct bio *bp) 525{ 526 struct bio_queue_head queue; 527 struct g_consumer *cp; 528 struct bio *cbp; 529 u_int no; 530 531 bioq_init(&queue); 532 for (no = 0; no < sc->sc_ndisks; no++) { 533 cbp = g_clone_bio(bp); 534 if (cbp == NULL) { 535 for (cbp = bioq_first(&queue); cbp != NULL; 536 cbp = bioq_first(&queue)) { 537 bioq_remove(&queue, cbp); 538 g_destroy_bio(cbp); 539 } 540 if (bp->bio_error == 0) 541 bp->bio_error = ENOMEM; 542 g_io_deliver(bp, bp->bio_error); 543 return; 544 } 545 bioq_insert_tail(&queue, cbp); 546 cbp->bio_done = g_std_done; 547 cbp->bio_caller1 = sc->sc_disks[no]; 548 cbp->bio_to = sc->sc_disks[no]->provider; 549 } 550 for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) { 551 bioq_remove(&queue, cbp); 552 G_STRIPE_LOGREQ(cbp, "Sending request."); 553 cp = cbp->bio_caller1; 554 cbp->bio_caller1 = NULL; 555 g_io_request(cbp, cp); 556 } 557} 558 559static void 560g_stripe_start(struct bio *bp) 561{ 562 off_t offset, start, length, nstripe; 563 struct g_stripe_softc *sc; 564 u_int no, stripesize; 565 int error, fast = 0; 566 567 sc = bp->bio_to->geom->softc; 568 /* 569 * If sc == NULL, provider's error should be set and g_stripe_start() 570 * should not be called at all. 571 */ 572 KASSERT(sc != NULL, 573 ("Provider's error should be set (error=%d)(device=%s).", 574 bp->bio_to->error, bp->bio_to->name)); 575 576 G_STRIPE_LOGREQ(bp, "Request received."); 577 578 switch (bp->bio_cmd) { 579 case BIO_READ: 580 case BIO_WRITE: 581 case BIO_DELETE: 582 break; 583 case BIO_FLUSH: 584 g_stripe_flush(sc, bp); 585 return; 586 case BIO_GETATTR: 587 /* To which provider it should be delivered? */ 588 default: 589 g_io_deliver(bp, EOPNOTSUPP); 590 return; 591 } 592 593 stripesize = sc->sc_stripesize; 594 595 /* 596 * Calculations are quite messy, but fast I hope. 597 */ 598 599 /* Stripe number. */ 600 /* nstripe = bp->bio_offset / stripesize; */ 601 nstripe = bp->bio_offset >> (off_t)sc->sc_stripebits; 602 /* Disk number. */ 603 no = nstripe % sc->sc_ndisks; 604 /* Start position in stripe. */ 605 /* start = bp->bio_offset % stripesize; */ 606 start = bp->bio_offset & (stripesize - 1); 607 /* Start position in disk. */ 608 /* offset = (nstripe / sc->sc_ndisks) * stripesize + start; */ 609 offset = ((nstripe / sc->sc_ndisks) << sc->sc_stripebits) + start; 610 /* Length of data to operate. */ 611 length = MIN(bp->bio_length, stripesize - start); 612 613 /* 614 * Do use "fast" mode when: 615 * 1. "Fast" mode is ON. 616 * and 617 * 2. Request size is less than or equal to MAXPHYS, 618 * which should always be true. 619 * and 620 * 3. Request size is bigger than stripesize * ndisks. If it isn't, 621 * there will be no need to send more than one I/O request to 622 * a provider, so there is nothing to optmize. 623 */ 624 if (g_stripe_fast && bp->bio_length <= MAXPHYS && 625 bp->bio_length >= stripesize * sc->sc_ndisks) { 626 fast = 1; 627 } 628 error = 0; 629 if (fast) { 630 error = g_stripe_start_fast(bp, no, offset, length); 631 if (error != 0) 632 g_stripe_fast_failed++; 633 } 634 /* 635 * Do use "economic" when: 636 * 1. "Economic" mode is ON. 637 * or 638 * 2. "Fast" mode failed. It can only fail if there is no memory. 639 */ 640 if (!fast || error != 0) 641 error = g_stripe_start_economic(bp, no, offset, length); 642 if (error != 0) { 643 if (bp->bio_error == 0) 644 bp->bio_error = error; 645 g_io_deliver(bp, bp->bio_error); 646 } 647} 648 649static void 650g_stripe_check_and_run(struct g_stripe_softc *sc) 651{ 652 off_t mediasize, ms; 653 u_int no, sectorsize = 0; 654 655 if (g_stripe_nvalid(sc) != sc->sc_ndisks) 656 return; 657 658 sc->sc_provider = g_new_providerf(sc->sc_geom, "stripe/%s", 659 sc->sc_name); 660 /* 661 * Find the smallest disk. 662 */ 663 mediasize = sc->sc_disks[0]->provider->mediasize; 664 if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) 665 mediasize -= sc->sc_disks[0]->provider->sectorsize; 666 mediasize -= mediasize % sc->sc_stripesize; 667 sectorsize = sc->sc_disks[0]->provider->sectorsize; 668 for (no = 1; no < sc->sc_ndisks; no++) { 669 ms = sc->sc_disks[no]->provider->mediasize; 670 if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) 671 ms -= sc->sc_disks[no]->provider->sectorsize; 672 ms -= ms % sc->sc_stripesize; 673 if (ms < mediasize) 674 mediasize = ms; 675 sectorsize = lcm(sectorsize, 676 sc->sc_disks[no]->provider->sectorsize); 677 } 678 sc->sc_provider->sectorsize = sectorsize; 679 sc->sc_provider->mediasize = mediasize * sc->sc_ndisks; 680 sc->sc_provider->stripesize = sc->sc_stripesize; 681 sc->sc_provider->stripeoffset = 0; 682 g_error_provider(sc->sc_provider, 0); 683 684 G_STRIPE_DEBUG(0, "Device %s activated.", sc->sc_name); 685} 686 687static int 688g_stripe_read_metadata(struct g_consumer *cp, struct g_stripe_metadata *md) 689{ 690 struct g_provider *pp; 691 u_char *buf; 692 int error; 693 694 g_topology_assert(); 695 696 error = g_access(cp, 1, 0, 0); 697 if (error != 0) 698 return (error); 699 pp = cp->provider; 700 g_topology_unlock(); 701 buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize, 702 &error); 703 g_topology_lock(); 704 g_access(cp, -1, 0, 0); 705 if (buf == NULL) 706 return (error); 707 708 /* Decode metadata. */ 709 stripe_metadata_decode(buf, md); 710 g_free(buf); 711 712 return (0); 713} 714 715/* 716 * Add disk to given device. 717 */ 718static int 719g_stripe_add_disk(struct g_stripe_softc *sc, struct g_provider *pp, u_int no) 720{ 721 struct g_consumer *cp, *fcp; 722 struct g_geom *gp; 723 int error; 724 725 /* Metadata corrupted? */ 726 if (no >= sc->sc_ndisks) 727 return (EINVAL); 728 729 /* Check if disk is not already attached. */ 730 if (sc->sc_disks[no] != NULL) 731 return (EEXIST); 732 733 gp = sc->sc_geom; 734 fcp = LIST_FIRST(&gp->consumer); 735 736 cp = g_new_consumer(gp); 737 error = g_attach(cp, pp); 738 if (error != 0) { 739 g_destroy_consumer(cp); 740 return (error); 741 } 742 743 if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) { 744 error = g_access(cp, fcp->acr, fcp->acw, fcp->ace); 745 if (error != 0) { 746 g_detach(cp); 747 g_destroy_consumer(cp); 748 return (error); 749 } 750 } 751 if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) { 752 struct g_stripe_metadata md; 753 754 /* Reread metadata. */ 755 error = g_stripe_read_metadata(cp, &md); 756 if (error != 0) 757 goto fail; 758 759 if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0 || 760 strcmp(md.md_name, sc->sc_name) != 0 || 761 md.md_id != sc->sc_id) { 762 G_STRIPE_DEBUG(0, "Metadata on %s changed.", pp->name); 763 goto fail; 764 } 765 } 766 767 cp->private = sc; 768 cp->index = no; 769 sc->sc_disks[no] = cp; 770 771 G_STRIPE_DEBUG(0, "Disk %s attached to %s.", pp->name, sc->sc_name); 772 773 g_stripe_check_and_run(sc); 774 775 return (0); 776fail: 777 if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) 778 g_access(cp, -fcp->acr, -fcp->acw, -fcp->ace); 779 g_detach(cp); 780 g_destroy_consumer(cp); 781 return (error); 782} 783 784static struct g_geom * 785g_stripe_create(struct g_class *mp, const struct g_stripe_metadata *md, 786 u_int type) 787{ 788 struct g_stripe_softc *sc; 789 struct g_geom *gp; 790 u_int no; 791 792 G_STRIPE_DEBUG(1, "Creating device %s (id=%u).", md->md_name, 793 md->md_id); 794 795 /* Two disks is minimum. */ 796 if (md->md_all < 2) { 797 G_STRIPE_DEBUG(0, "Too few disks defined for %s.", md->md_name); 798 return (NULL); 799 } 800#if 0 801 /* Stripe size have to be grater than or equal to sector size. */ 802 if (md->md_stripesize < sectorsize) { 803 G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name); 804 return (NULL); 805 } 806#endif 807 /* Stripe size have to be power of 2. */ 808 if (!powerof2(md->md_stripesize)) { 809 G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name); 810 return (NULL); 811 } 812 813 /* Check for duplicate unit */ 814 LIST_FOREACH(gp, &mp->geom, geom) { 815 sc = gp->softc; 816 if (sc != NULL && strcmp(sc->sc_name, md->md_name) == 0) { 817 G_STRIPE_DEBUG(0, "Device %s already configured.", 818 sc->sc_name); 819 return (NULL); 820 } 821 } 822 gp = g_new_geomf(mp, "%s", md->md_name); 823 sc = malloc(sizeof(*sc), M_STRIPE, M_WAITOK | M_ZERO); 824 gp->start = g_stripe_start; 825 gp->spoiled = g_stripe_orphan; 826 gp->orphan = g_stripe_orphan; 827 gp->access = g_stripe_access; 828 gp->dumpconf = g_stripe_dumpconf; 829 830 sc->sc_id = md->md_id; 831 sc->sc_stripesize = md->md_stripesize; 832 sc->sc_stripebits = bitcount32(sc->sc_stripesize - 1); 833 sc->sc_ndisks = md->md_all; 834 sc->sc_disks = malloc(sizeof(struct g_consumer *) * sc->sc_ndisks, 835 M_STRIPE, M_WAITOK | M_ZERO); 836 for (no = 0; no < sc->sc_ndisks; no++) 837 sc->sc_disks[no] = NULL; 838 sc->sc_type = type; 839 840 gp->softc = sc; 841 sc->sc_geom = gp; 842 sc->sc_provider = NULL; 843 844 G_STRIPE_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id); 845 846 return (gp); 847} 848 849static int 850g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force) 851{ 852 struct g_provider *pp; 853 struct g_geom *gp; 854 u_int no; 855 856 g_topology_assert(); 857 858 if (sc == NULL) 859 return (ENXIO); 860 861 pp = sc->sc_provider; 862 if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { 863 if (force) { 864 G_STRIPE_DEBUG(0, "Device %s is still open, so it " 865 "can't be definitely removed.", pp->name); 866 } else { 867 G_STRIPE_DEBUG(1, 868 "Device %s is still open (r%dw%de%d).", pp->name, 869 pp->acr, pp->acw, pp->ace); 870 return (EBUSY); 871 } 872 } 873 874 for (no = 0; no < sc->sc_ndisks; no++) { 875 if (sc->sc_disks[no] != NULL) 876 g_stripe_remove_disk(sc->sc_disks[no]); 877 } 878 879 gp = sc->sc_geom; 880 gp->softc = NULL; 881 KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)", 882 gp->name)); 883 free(sc->sc_disks, M_STRIPE); 884 free(sc, M_STRIPE); 885 886 pp = LIST_FIRST(&gp->provider); 887 if (pp == NULL || (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)) 888 G_STRIPE_DEBUG(0, "Device %s destroyed.", gp->name); 889 890 g_wither_geom(gp, ENXIO); 891 892 return (0); 893} 894 895static int 896g_stripe_destroy_geom(struct gctl_req *req __unused, 897 struct g_class *mp __unused, struct g_geom *gp) 898{ 899 struct g_stripe_softc *sc; 900 901 sc = gp->softc; 902 return (g_stripe_destroy(sc, 0)); 903} 904 905static struct g_geom * 906g_stripe_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) 907{ 908 struct g_stripe_metadata md; 909 struct g_stripe_softc *sc; 910 struct g_consumer *cp; 911 struct g_geom *gp; 912 int error; 913 914 g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); 915 g_topology_assert(); 916 917 /* Skip providers that are already open for writing. */ 918 if (pp->acw > 0) 919 return (NULL); 920 921 G_STRIPE_DEBUG(3, "Tasting %s.", pp->name); 922 923 gp = g_new_geomf(mp, "stripe:taste"); 924 gp->start = g_stripe_start; 925 gp->access = g_stripe_access; 926 gp->orphan = g_stripe_orphan; 927 cp = g_new_consumer(gp); 928 g_attach(cp, pp); 929 error = g_stripe_read_metadata(cp, &md); 930 g_detach(cp); 931 g_destroy_consumer(cp); 932 g_destroy_geom(gp); 933 if (error != 0) 934 return (NULL); 935 gp = NULL; 936 937 if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0) 938 return (NULL); 939 if (md.md_version > G_STRIPE_VERSION) { 940 printf("geom_stripe.ko module is too old to handle %s.\n", 941 pp->name); 942 return (NULL); 943 } 944 /* 945 * Backward compatibility: 946 */ 947 /* There was no md_provider field in earlier versions of metadata. */ 948 if (md.md_version < 2) 949 bzero(md.md_provider, sizeof(md.md_provider)); 950 /* There was no md_provsize field in earlier versions of metadata. */ 951 if (md.md_version < 3) 952 md.md_provsize = pp->mediasize; 953 954 if (md.md_provider[0] != '\0' && 955 !g_compare_names(md.md_provider, pp->name)) 956 return (NULL); 957 if (md.md_provsize != pp->mediasize) 958 return (NULL); 959 960 /* 961 * Let's check if device already exists. 962 */ 963 sc = NULL; 964 LIST_FOREACH(gp, &mp->geom, geom) { 965 sc = gp->softc; 966 if (sc == NULL) 967 continue; 968 if (sc->sc_type != G_STRIPE_TYPE_AUTOMATIC) 969 continue; 970 if (strcmp(md.md_name, sc->sc_name) != 0) 971 continue; 972 if (md.md_id != sc->sc_id) 973 continue; 974 break; 975 } 976 if (gp != NULL) { 977 G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name); 978 error = g_stripe_add_disk(sc, pp, md.md_no); 979 if (error != 0) { 980 G_STRIPE_DEBUG(0, 981 "Cannot add disk %s to %s (error=%d).", pp->name, 982 gp->name, error); 983 return (NULL); 984 } 985 } else { 986 gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_AUTOMATIC); 987 if (gp == NULL) { 988 G_STRIPE_DEBUG(0, "Cannot create device %s.", 989 md.md_name); 990 return (NULL); 991 } 992 sc = gp->softc; 993 G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name); 994 error = g_stripe_add_disk(sc, pp, md.md_no); 995 if (error != 0) { 996 G_STRIPE_DEBUG(0, 997 "Cannot add disk %s to %s (error=%d).", pp->name, 998 gp->name, error); 999 g_stripe_destroy(sc, 1); 1000 return (NULL); 1001 } 1002 } 1003 1004 return (gp); 1005} 1006 1007static void 1008g_stripe_ctl_create(struct gctl_req *req, struct g_class *mp) 1009{ 1010 u_int attached, no; 1011 struct g_stripe_metadata md; 1012 struct g_provider *pp; 1013 struct g_stripe_softc *sc; 1014 struct g_geom *gp; 1015 struct sbuf *sb; 1016 intmax_t *stripesize; 1017 const char *name; 1018 char param[16]; 1019 int *nargs; 1020 1021 g_topology_assert(); 1022 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 1023 if (nargs == NULL) { 1024 gctl_error(req, "No '%s' argument.", "nargs"); 1025 return; 1026 } 1027 if (*nargs <= 2) { 1028 gctl_error(req, "Too few arguments."); 1029 return; 1030 } 1031 1032 strlcpy(md.md_magic, G_STRIPE_MAGIC, sizeof(md.md_magic)); 1033 md.md_version = G_STRIPE_VERSION; 1034 name = gctl_get_asciiparam(req, "arg0"); 1035 if (name == NULL) { 1036 gctl_error(req, "No 'arg%u' argument.", 0); 1037 return; 1038 } 1039 strlcpy(md.md_name, name, sizeof(md.md_name)); 1040 md.md_id = arc4random(); 1041 md.md_no = 0; 1042 md.md_all = *nargs - 1; 1043 stripesize = gctl_get_paraml(req, "stripesize", sizeof(*stripesize)); 1044 if (stripesize == NULL) { 1045 gctl_error(req, "No '%s' argument.", "stripesize"); 1046 return; 1047 } 1048 md.md_stripesize = *stripesize; 1049 bzero(md.md_provider, sizeof(md.md_provider)); 1050 /* This field is not important here. */ 1051 md.md_provsize = 0; 1052 1053 /* Check all providers are valid */ 1054 for (no = 1; no < *nargs; no++) { 1055 snprintf(param, sizeof(param), "arg%u", no); 1056 name = gctl_get_asciiparam(req, param); 1057 if (name == NULL) { 1058 gctl_error(req, "No 'arg%u' argument.", no); 1059 return; 1060 } 1061 if (strncmp(name, "/dev/", strlen("/dev/")) == 0) 1062 name += strlen("/dev/"); 1063 pp = g_provider_by_name(name); 1064 if (pp == NULL) { 1065 G_STRIPE_DEBUG(1, "Disk %s is invalid.", name); 1066 gctl_error(req, "Disk %s is invalid.", name); 1067 return; 1068 } 1069 } 1070 1071 gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_MANUAL); 1072 if (gp == NULL) { 1073 gctl_error(req, "Can't configure %s.", md.md_name); 1074 return; 1075 } 1076 1077 sc = gp->softc; 1078 sb = sbuf_new_auto(); 1079 sbuf_printf(sb, "Can't attach disk(s) to %s:", gp->name); 1080 for (attached = 0, no = 1; no < *nargs; no++) { 1081 snprintf(param, sizeof(param), "arg%u", no); 1082 name = gctl_get_asciiparam(req, param); 1083 if (name == NULL) { 1084 gctl_error(req, "No 'arg%u' argument.", no); 1085 continue; 1086 } 1087 if (strncmp(name, "/dev/", strlen("/dev/")) == 0) 1088 name += strlen("/dev/"); 1089 pp = g_provider_by_name(name); 1090 KASSERT(pp != NULL, ("Provider %s disappear?!", name)); 1091 if (g_stripe_add_disk(sc, pp, no - 1) != 0) { 1092 G_STRIPE_DEBUG(1, "Disk %u (%s) not attached to %s.", 1093 no, pp->name, gp->name); 1094 sbuf_printf(sb, " %s", pp->name); 1095 continue; 1096 } 1097 attached++; 1098 } 1099 sbuf_finish(sb); 1100 if (md.md_all != attached) { 1101 g_stripe_destroy(gp->softc, 1); 1102 gctl_error(req, "%s", sbuf_data(sb)); 1103 } 1104 sbuf_delete(sb); 1105} 1106 1107static struct g_stripe_softc * 1108g_stripe_find_device(struct g_class *mp, const char *name) 1109{ 1110 struct g_stripe_softc *sc; 1111 struct g_geom *gp; 1112 1113 LIST_FOREACH(gp, &mp->geom, geom) { 1114 sc = gp->softc; 1115 if (sc == NULL) 1116 continue; 1117 if (strcmp(sc->sc_name, name) == 0) 1118 return (sc); 1119 } 1120 return (NULL); 1121} 1122 1123static void 1124g_stripe_ctl_destroy(struct gctl_req *req, struct g_class *mp) 1125{ 1126 struct g_stripe_softc *sc; 1127 int *force, *nargs, error; 1128 const char *name; 1129 char param[16]; 1130 u_int i; 1131 1132 g_topology_assert(); 1133 1134 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 1135 if (nargs == NULL) { 1136 gctl_error(req, "No '%s' argument.", "nargs"); 1137 return; 1138 } 1139 if (*nargs <= 0) { 1140 gctl_error(req, "Missing device(s)."); 1141 return; 1142 } 1143 force = gctl_get_paraml(req, "force", sizeof(*force)); 1144 if (force == NULL) { 1145 gctl_error(req, "No '%s' argument.", "force"); 1146 return; 1147 } 1148 1149 for (i = 0; i < (u_int)*nargs; i++) { 1150 snprintf(param, sizeof(param), "arg%u", i); 1151 name = gctl_get_asciiparam(req, param); 1152 if (name == NULL) { 1153 gctl_error(req, "No 'arg%u' argument.", i); 1154 return; 1155 } 1156 sc = g_stripe_find_device(mp, name); 1157 if (sc == NULL) { 1158 gctl_error(req, "No such device: %s.", name); 1159 return; 1160 } 1161 error = g_stripe_destroy(sc, *force); 1162 if (error != 0) { 1163 gctl_error(req, "Cannot destroy device %s (error=%d).", 1164 sc->sc_name, error); 1165 return; 1166 } 1167 } 1168} 1169 1170static void 1171g_stripe_config(struct gctl_req *req, struct g_class *mp, const char *verb) 1172{ 1173 uint32_t *version; 1174 1175 g_topology_assert(); 1176 1177 version = gctl_get_paraml(req, "version", sizeof(*version)); 1178 if (version == NULL) { 1179 gctl_error(req, "No '%s' argument.", "version"); 1180 return; 1181 } 1182 if (*version != G_STRIPE_VERSION) { 1183 gctl_error(req, "Userland and kernel parts are out of sync."); 1184 return; 1185 } 1186 1187 if (strcmp(verb, "create") == 0) { 1188 g_stripe_ctl_create(req, mp); 1189 return; 1190 } else if (strcmp(verb, "destroy") == 0 || 1191 strcmp(verb, "stop") == 0) { 1192 g_stripe_ctl_destroy(req, mp); 1193 return; 1194 } 1195 1196 gctl_error(req, "Unknown verb."); 1197} 1198 1199static void 1200g_stripe_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, 1201 struct g_consumer *cp, struct g_provider *pp) 1202{ 1203 struct g_stripe_softc *sc; 1204 1205 sc = gp->softc; 1206 if (sc == NULL) 1207 return; 1208 if (pp != NULL) { 1209 /* Nothing here. */ 1210 } else if (cp != NULL) { 1211 sbuf_printf(sb, "%s<Number>%u</Number>\n", indent, 1212 (u_int)cp->index); 1213 } else { 1214 sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id); 1215 sbuf_printf(sb, "%s<Stripesize>%u</Stripesize>\n", indent, 1216 (u_int)sc->sc_stripesize); 1217 sbuf_printf(sb, "%s<Type>", indent); 1218 switch (sc->sc_type) { 1219 case G_STRIPE_TYPE_AUTOMATIC: 1220 sbuf_printf(sb, "AUTOMATIC"); 1221 break; 1222 case G_STRIPE_TYPE_MANUAL: 1223 sbuf_printf(sb, "MANUAL"); 1224 break; 1225 default: 1226 sbuf_printf(sb, "UNKNOWN"); 1227 break; 1228 } 1229 sbuf_printf(sb, "</Type>\n"); 1230 sbuf_printf(sb, "%s<Status>Total=%u, Online=%u</Status>\n", 1231 indent, sc->sc_ndisks, g_stripe_nvalid(sc)); 1232 sbuf_printf(sb, "%s<State>", indent); 1233 if (sc->sc_provider != NULL && sc->sc_provider->error == 0) 1234 sbuf_printf(sb, "UP"); 1235 else 1236 sbuf_printf(sb, "DOWN"); 1237 sbuf_printf(sb, "</State>\n"); 1238 } 1239} 1240 1241DECLARE_GEOM_CLASS(g_stripe_class, g_stripe); 1242