g_stripe.c revision 133201
1/*- 2 * Copyright (c) 2003 Pawel Jakub Dawidek <pjd@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/sys/geom/stripe/g_stripe.c 133201 2004-08-06 09:55:40Z pjd $"); 29 30#include <sys/param.h> 31#include <sys/systm.h> 32#include <sys/kernel.h> 33#include <sys/module.h> 34#include <sys/lock.h> 35#include <sys/mutex.h> 36#include <sys/bio.h> 37#include <sys/sysctl.h> 38#include <sys/malloc.h> 39#include <vm/uma.h> 40#include <geom/geom.h> 41#include <geom/stripe/g_stripe.h> 42 43 44#define MAX_IO_SIZE (DFLTPHYS * 2) 45static MALLOC_DEFINE(M_STRIPE, "stripe data", "GEOM_STRIPE Data"); 46 47static uma_zone_t g_stripe_zone; 48 49static int g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force); 50static int g_stripe_destroy_geom(struct gctl_req *req, struct g_class *mp, 51 struct g_geom *gp); 52 53static g_taste_t g_stripe_taste; 54static g_ctl_req_t g_stripe_config; 55static g_dumpconf_t g_stripe_dumpconf; 56static g_init_t g_stripe_init; 57static g_fini_t g_stripe_fini; 58 59struct g_class g_stripe_class = { 60 .name = G_STRIPE_CLASS_NAME, 61 .ctlreq = g_stripe_config, 62 .taste = g_stripe_taste, 63 .destroy_geom = g_stripe_destroy_geom, 64 .init = g_stripe_init, 65 .fini = g_stripe_fini 66}; 67 68SYSCTL_DECL(_kern_geom); 69SYSCTL_NODE(_kern_geom, OID_AUTO, stripe, CTLFLAG_RW, 0, "GEOM_STRIPE stuff"); 70static u_int g_stripe_debug = 0; 71SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, debug, CTLFLAG_RW, &g_stripe_debug, 0, 72 "Debug level"); 73static int g_stripe_fast = 1; 74TUNABLE_INT("kern.geom.stripe.fast", &g_stripe_fast); 75static int 76g_sysctl_stripe_fast(SYSCTL_HANDLER_ARGS) 77{ 78 int error, fast; 79 80 fast = g_stripe_fast; 81 error = sysctl_handle_int(oidp, &fast, sizeof(fast), req); 82 if (error == 0 && req->newptr != NULL) 83 g_stripe_fast = fast; 84 return (error); 85} 86SYSCTL_PROC(_kern_geom_stripe, OID_AUTO, fast, CTLTYPE_INT | CTLFLAG_RW, 87 NULL, 0, g_sysctl_stripe_fast, "I", "Fast, but memory-consuming, mode"); 88static u_int g_stripe_maxmem = MAX_IO_SIZE * 10; 89TUNABLE_INT("kern.geom.stripe.maxmem", &g_stripe_maxmem); 90SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, maxmem, CTLFLAG_RD, &g_stripe_maxmem, 91 0, "Maximum memory that can be allocated in \"fast\" mode (in bytes)"); 92 93/* 94 * Greatest Common Divisor. 95 */ 96static u_int 97gcd(u_int a, u_int b) 98{ 99 u_int c; 100 101 while (b != 0) { 102 c = a; 103 a = b; 104 b = (c % b); 105 } 106 return (a); 107} 108 109/* 110 * Least Common Multiple. 111 */ 112static u_int 113lcm(u_int a, u_int b) 114{ 115 116 return ((a * b) / gcd(a, b)); 117} 118 119static void 120g_stripe_init(struct g_class *mp __unused) 121{ 122 123 g_stripe_zone = uma_zcreate("g_stripe_zone", MAX_IO_SIZE, NULL, NULL, 124 NULL, NULL, 0, 0); 125 g_stripe_maxmem -= g_stripe_maxmem % MAX_IO_SIZE; 126 uma_zone_set_max(g_stripe_zone, g_stripe_maxmem / MAX_IO_SIZE); 127} 128 129static void 130g_stripe_fini(struct g_class *mp __unused) 131{ 132 133 uma_zdestroy(g_stripe_zone); 134} 135 136/* 137 * Return the number of valid disks. 138 */ 139static u_int 140g_stripe_nvalid(struct g_stripe_softc *sc) 141{ 142 u_int i, no; 143 144 no = 0; 145 for (i = 0; i < sc->sc_ndisks; i++) { 146 if (sc->sc_disks[i] != NULL) 147 no++; 148 } 149 150 return (no); 151} 152 153static void 154g_stripe_remove_disk(struct g_consumer *cp) 155{ 156 struct g_stripe_softc *sc; 157 u_int no; 158 159 KASSERT(cp != NULL, ("Non-valid disk in %s.", __func__)); 160 sc = (struct g_stripe_softc *)cp->private; 161 KASSERT(sc != NULL, ("NULL sc in %s.", __func__)); 162 no = cp->index; 163 164 G_STRIPE_DEBUG(0, "Disk %s removed from %s.", cp->provider->name, 165 sc->sc_name); 166 167 sc->sc_disks[no] = NULL; 168 if (sc->sc_provider != NULL) { 169 g_orphan_provider(sc->sc_provider, ENXIO); 170 sc->sc_provider = NULL; 171 G_STRIPE_DEBUG(0, "Device %s removed.", sc->sc_name); 172 } 173 174 if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) 175 g_access(cp, -cp->acr, -cp->acw, -cp->ace); 176 g_detach(cp); 177 g_destroy_consumer(cp); 178} 179 180static void 181g_stripe_orphan(struct g_consumer *cp) 182{ 183 struct g_stripe_softc *sc; 184 struct g_geom *gp; 185 186 g_topology_assert(); 187 gp = cp->geom; 188 sc = gp->softc; 189 if (sc == NULL) 190 return; 191 192 g_stripe_remove_disk(cp); 193 /* If there are no valid disks anymore, remove device. */ 194 if (g_stripe_nvalid(sc) == 0) 195 g_stripe_destroy(sc, 1); 196} 197 198static int 199g_stripe_access(struct g_provider *pp, int dr, int dw, int de) 200{ 201 struct g_consumer *cp1, *cp2; 202 struct g_stripe_softc *sc; 203 struct g_geom *gp; 204 int error; 205 206 gp = pp->geom; 207 sc = gp->softc; 208 209 if (sc == NULL) { 210 /* 211 * It looks like geom is being withered. 212 * In that case we allow only negative requests. 213 */ 214 KASSERT(dr <= 0 && dw <= 0 && de <= 0, 215 ("Positive access request (device=%s).", pp->name)); 216 if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 && 217 (pp->ace + de) == 0) { 218 G_STRIPE_DEBUG(0, "Device %s definitely destroyed.", 219 gp->name); 220 } 221 return (0); 222 } 223 224 /* On first open, grab an extra "exclusive" bit */ 225 if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0) 226 de++; 227 /* ... and let go of it on last close */ 228 if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 && (pp->ace + de) == 0) 229 de--; 230 231 error = ENXIO; 232 LIST_FOREACH(cp1, &gp->consumer, consumer) { 233 error = g_access(cp1, dr, dw, de); 234 if (error == 0) 235 continue; 236 /* 237 * If we fail here, backout all previous changes. 238 */ 239 LIST_FOREACH(cp2, &gp->consumer, consumer) { 240 if (cp1 == cp2) 241 return (error); 242 g_access(cp2, -dr, -dw, -de); 243 } 244 /* NOTREACHED */ 245 } 246 247 return (error); 248} 249 250static void 251g_stripe_copy(struct g_stripe_softc *sc, char *src, char *dst, off_t offset, 252 off_t length, int mode) 253{ 254 u_int stripesize; 255 size_t len; 256 257 stripesize = sc->sc_stripesize; 258 len = (size_t)(stripesize - (offset & (stripesize - 1))); 259 do { 260 bcopy(src, dst, len); 261 if (mode) { 262 dst += len + stripesize * (sc->sc_ndisks - 1); 263 src += len; 264 } else { 265 dst += len; 266 src += len + stripesize * (sc->sc_ndisks - 1); 267 } 268 length -= len; 269 KASSERT(length >= 0, 270 ("Length < 0 (stripesize=%zu, offset=%jd, length=%jd).", 271 (size_t)stripesize, (intmax_t)offset, (intmax_t)length)); 272 if (length > stripesize) 273 len = stripesize; 274 else 275 len = length; 276 } while (length > 0); 277} 278 279static void 280g_stripe_done(struct bio *bp) 281{ 282 struct g_stripe_softc *sc; 283 struct bio *pbp; 284 285 pbp = bp->bio_parent; 286 sc = pbp->bio_to->geom->softc; 287 if (pbp->bio_error == 0) 288 pbp->bio_error = bp->bio_error; 289 pbp->bio_completed += bp->bio_completed; 290 if (bp->bio_cmd == BIO_READ && bp->bio_driver1 != NULL) { 291 g_stripe_copy(sc, bp->bio_data, bp->bio_driver1, bp->bio_offset, 292 bp->bio_length, 1); 293 bp->bio_data = bp->bio_driver1; 294 bp->bio_driver1 = NULL; 295 } 296 g_destroy_bio(bp); 297 pbp->bio_inbed++; 298 if (pbp->bio_children == pbp->bio_inbed) { 299 if (pbp->bio_caller1 != NULL) 300 uma_zfree(g_stripe_zone, pbp->bio_caller1); 301 g_io_deliver(pbp, pbp->bio_error); 302 } 303} 304 305static int 306g_stripe_start_fast(struct bio *bp, u_int no, off_t offset, off_t length) 307{ 308 TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); 309 u_int nparts = 0, stripesize; 310 struct g_stripe_softc *sc; 311 char *addr, *data = NULL; 312 struct bio *cbp; 313 int error; 314 315 sc = bp->bio_to->geom->softc; 316 317 addr = bp->bio_data; 318 stripesize = sc->sc_stripesize; 319 320 cbp = g_clone_bio(bp); 321 if (cbp == NULL) { 322 error = ENOMEM; 323 goto failure; 324 } 325 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 326 nparts++; 327 /* 328 * Fill in the component buf structure. 329 */ 330 cbp->bio_done = g_stripe_done; 331 cbp->bio_offset = offset; 332 cbp->bio_data = addr; 333 cbp->bio_driver1 = NULL; 334 cbp->bio_length = length; 335 cbp->bio_driver2 = sc->sc_disks[no]; 336 337 /* offset -= offset % stripesize; */ 338 offset -= offset & (stripesize - 1); 339 addr += length; 340 length = bp->bio_length - length; 341 for (no++; length > 0; no++, length -= stripesize, addr += stripesize) { 342 if (no > sc->sc_ndisks - 1) { 343 no = 0; 344 offset += stripesize; 345 } 346 if (nparts >= sc->sc_ndisks) { 347 cbp = TAILQ_NEXT(cbp, bio_queue); 348 if (cbp == NULL) 349 cbp = TAILQ_FIRST(&queue); 350 nparts++; 351 /* 352 * Update bio structure. 353 */ 354 /* 355 * MIN() is in case when 356 * (bp->bio_length % sc->sc_stripesize) != 0. 357 */ 358 cbp->bio_length += MIN(stripesize, length); 359 if (cbp->bio_driver1 == NULL) { 360 cbp->bio_driver1 = cbp->bio_data; 361 cbp->bio_data = NULL; 362 if (data == NULL) { 363 data = uma_zalloc(g_stripe_zone, 364 M_NOWAIT); 365 if (data == NULL) { 366 error = ENOMEM; 367 goto failure; 368 } 369 } 370 } 371 } else { 372 cbp = g_clone_bio(bp); 373 if (cbp == NULL) { 374 error = ENOMEM; 375 goto failure; 376 } 377 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 378 nparts++; 379 /* 380 * Fill in the component buf structure. 381 */ 382 cbp->bio_done = g_stripe_done; 383 cbp->bio_offset = offset; 384 cbp->bio_data = addr; 385 cbp->bio_driver1 = NULL; 386 /* 387 * MIN() is in case when 388 * (bp->bio_length % sc->sc_stripesize) != 0. 389 */ 390 cbp->bio_length = MIN(stripesize, length); 391 cbp->bio_driver2 = sc->sc_disks[no]; 392 } 393 } 394 if (data != NULL) 395 bp->bio_caller1 = data; 396 /* 397 * Fire off all allocated requests! 398 */ 399 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 400 struct g_consumer *cp; 401 402 TAILQ_REMOVE(&queue, cbp, bio_queue); 403 cp = cbp->bio_driver2; 404 cbp->bio_driver2 = NULL; 405 cbp->bio_to = cp->provider; 406 if (cbp->bio_driver1 != NULL) { 407 cbp->bio_data = data; 408 if (bp->bio_cmd == BIO_WRITE) { 409 g_stripe_copy(sc, cbp->bio_driver1, data, 410 cbp->bio_offset, cbp->bio_length, 0); 411 } 412 data += cbp->bio_length; 413 } 414 G_STRIPE_LOGREQ(cbp, "Sending request."); 415 g_io_request(cbp, cp); 416 } 417 return (0); 418failure: 419 if (data != NULL) 420 uma_zfree(g_stripe_zone, data); 421 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 422 TAILQ_REMOVE(&queue, cbp, bio_queue); 423 if (cbp->bio_driver1 != NULL) { 424 cbp->bio_data = cbp->bio_driver1; 425 cbp->bio_driver1 = NULL; 426 } 427 bp->bio_children--; 428 g_destroy_bio(cbp); 429 } 430 return (error); 431} 432 433static int 434g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length) 435{ 436 TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); 437 struct g_stripe_softc *sc; 438 uint32_t stripesize; 439 struct bio *cbp; 440 char *addr; 441 int error; 442 443 sc = bp->bio_to->geom->softc; 444 445 addr = bp->bio_data; 446 stripesize = sc->sc_stripesize; 447 448 cbp = g_clone_bio(bp); 449 if (cbp == NULL) { 450 error = ENOMEM; 451 goto failure; 452 } 453 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 454 /* 455 * Fill in the component buf structure. 456 */ 457 cbp->bio_done = g_std_done; 458 cbp->bio_offset = offset; 459 cbp->bio_data = addr; 460 cbp->bio_length = length; 461 cbp->bio_driver2 = sc->sc_disks[no]; 462 463 /* offset -= offset % stripesize; */ 464 offset -= offset & (stripesize - 1); 465 addr += length; 466 length = bp->bio_length - length; 467 for (no++; length > 0; no++, length -= stripesize, addr += stripesize) { 468 if (no > sc->sc_ndisks - 1) { 469 no = 0; 470 offset += stripesize; 471 } 472 cbp = g_clone_bio(bp); 473 if (cbp == NULL) { 474 error = ENOMEM; 475 goto failure; 476 } 477 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 478 479 /* 480 * Fill in the component buf structure. 481 */ 482 cbp->bio_done = g_std_done; 483 cbp->bio_offset = offset; 484 cbp->bio_data = addr; 485 /* 486 * MIN() is in case when 487 * (bp->bio_length % sc->sc_stripesize) != 0. 488 */ 489 cbp->bio_length = MIN(stripesize, length); 490 491 cbp->bio_driver2 = sc->sc_disks[no]; 492 } 493 /* 494 * Fire off all allocated requests! 495 */ 496 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 497 struct g_consumer *cp; 498 499 TAILQ_REMOVE(&queue, cbp, bio_queue); 500 cp = cbp->bio_driver2; 501 cbp->bio_driver2 = NULL; 502 cbp->bio_to = cp->provider; 503 G_STRIPE_LOGREQ(cbp, "Sending request."); 504 g_io_request(cbp, cp); 505 } 506 return (0); 507failure: 508 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 509 TAILQ_REMOVE(&queue, cbp, bio_queue); 510 bp->bio_children--; 511 g_destroy_bio(cbp); 512 } 513 return (error); 514} 515 516static void 517g_stripe_start(struct bio *bp) 518{ 519 off_t offset, start, length, nstripe; 520 struct g_stripe_softc *sc; 521 u_int no, stripesize; 522 int error, fast = 0; 523 524 sc = bp->bio_to->geom->softc; 525 /* 526 * If sc == NULL, provider's error should be set and g_stripe_start() 527 * should not be called at all. 528 */ 529 KASSERT(sc != NULL, 530 ("Provider's error should be set (error=%d)(device=%s).", 531 bp->bio_to->error, bp->bio_to->name)); 532 533 G_STRIPE_LOGREQ(bp, "Request received."); 534 535 switch (bp->bio_cmd) { 536 case BIO_READ: 537 case BIO_WRITE: 538 case BIO_DELETE: 539 /* 540 * Only those requests are supported. 541 */ 542 break; 543 case BIO_GETATTR: 544 /* To which provider it should be delivered? */ 545 default: 546 g_io_deliver(bp, EOPNOTSUPP); 547 return; 548 } 549 550 stripesize = sc->sc_stripesize; 551 552 /* 553 * Calculations are quite messy, but fast I hope. 554 */ 555 556 /* Stripe number. */ 557 /* nstripe = bp->bio_offset / stripesize; */ 558 nstripe = bp->bio_offset >> (off_t)sc->sc_stripebits; 559 /* Disk number. */ 560 no = nstripe % sc->sc_ndisks; 561 /* Start position in stripe. */ 562 /* start = bp->bio_offset % stripesize; */ 563 start = bp->bio_offset & (stripesize - 1); 564 /* Start position in disk. */ 565 /* offset = (nstripe / sc->sc_ndisks) * stripesize + start; */ 566 offset = ((nstripe / sc->sc_ndisks) << sc->sc_stripebits) + start; 567 /* Length of data to operate. */ 568 length = MIN(bp->bio_length, stripesize - start); 569 570 /* 571 * Do use "fast" mode when: 572 * 1. "Fast" mode is ON. 573 * and 574 * 2. Request size is less than or equal to MAX_IO_SIZE (128kB), 575 * which should always be true. 576 * and 577 * 3. Request size is bigger than stripesize * ndisks. If it isn't, 578 * there will be no need to send more than one I/O request to 579 * a provider, so there is nothing to optmize. 580 */ 581 if (g_stripe_fast && bp->bio_length <= MAX_IO_SIZE && 582 bp->bio_length >= stripesize * sc->sc_ndisks) { 583 fast = 1; 584 } 585 error = 0; 586 if (fast) 587 error = g_stripe_start_fast(bp, no, offset, length); 588 /* 589 * Do use "economic" when: 590 * 1. "Economic" mode is ON. 591 * or 592 * 2. "Fast" mode failed. It can only failed if there is no memory. 593 */ 594 if (!fast || error != 0) 595 error = g_stripe_start_economic(bp, no, offset, length); 596 if (error != 0) { 597 if (bp->bio_error == 0) 598 bp->bio_error = error; 599 g_io_deliver(bp, bp->bio_error); 600 } 601} 602 603static void 604g_stripe_check_and_run(struct g_stripe_softc *sc) 605{ 606 off_t mediasize, ms; 607 u_int no, sectorsize = 0; 608 609 if (g_stripe_nvalid(sc) != sc->sc_ndisks) 610 return; 611 612 sc->sc_provider = g_new_providerf(sc->sc_geom, "stripe/%s", 613 sc->sc_name); 614 /* 615 * Find the smallest disk. 616 */ 617 mediasize = sc->sc_disks[0]->provider->mediasize; 618 if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) 619 mediasize -= sc->sc_disks[0]->provider->sectorsize; 620 mediasize -= mediasize % sc->sc_stripesize; 621 sectorsize = sc->sc_disks[0]->provider->sectorsize; 622 for (no = 1; no < sc->sc_ndisks; no++) { 623 ms = sc->sc_disks[no]->provider->mediasize; 624 if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) 625 ms -= sc->sc_disks[no]->provider->sectorsize; 626 ms -= ms % sc->sc_stripesize; 627 if (ms < mediasize) 628 mediasize = ms; 629 sectorsize = lcm(sectorsize, 630 sc->sc_disks[no]->provider->sectorsize); 631 } 632 sc->sc_provider->sectorsize = sectorsize; 633 sc->sc_provider->mediasize = mediasize * sc->sc_ndisks; 634 g_error_provider(sc->sc_provider, 0); 635 636 G_STRIPE_DEBUG(0, "Device %s activated.", sc->sc_name); 637} 638 639static int 640g_stripe_read_metadata(struct g_consumer *cp, struct g_stripe_metadata *md) 641{ 642 struct g_provider *pp; 643 u_char *buf; 644 int error; 645 646 g_topology_assert(); 647 648 error = g_access(cp, 1, 0, 0); 649 if (error != 0) 650 return (error); 651 pp = cp->provider; 652 g_topology_unlock(); 653 buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize, 654 &error); 655 g_topology_lock(); 656 g_access(cp, -1, 0, 0); 657 if (buf == NULL) 658 return (error); 659 660 /* Decode metadata. */ 661 stripe_metadata_decode(buf, md); 662 g_free(buf); 663 664 return (0); 665} 666 667/* 668 * Add disk to given device. 669 */ 670static int 671g_stripe_add_disk(struct g_stripe_softc *sc, struct g_provider *pp, u_int no) 672{ 673 struct g_consumer *cp, *fcp; 674 struct g_geom *gp; 675 int error; 676 677 /* Metadata corrupted? */ 678 if (no >= sc->sc_ndisks) 679 return (EINVAL); 680 681 /* Check if disk is not already attached. */ 682 if (sc->sc_disks[no] != NULL) 683 return (EEXIST); 684 685 gp = sc->sc_geom; 686 fcp = LIST_FIRST(&gp->consumer); 687 688 cp = g_new_consumer(gp); 689 error = g_attach(cp, pp); 690 if (error != 0) { 691 g_destroy_consumer(cp); 692 return (error); 693 } 694 695 if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) { 696 error = g_access(cp, fcp->acr, fcp->acw, fcp->ace); 697 if (error != 0) { 698 g_detach(cp); 699 g_destroy_consumer(cp); 700 return (error); 701 } 702 } 703 if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) { 704 struct g_stripe_metadata md; 705 706 /* Reread metadata. */ 707 error = g_stripe_read_metadata(cp, &md); 708 if (error != 0) 709 goto fail; 710 711 if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0 || 712 strcmp(md.md_name, sc->sc_name) != 0 || 713 md.md_id != sc->sc_id) { 714 G_STRIPE_DEBUG(0, "Metadata on %s changed.", pp->name); 715 goto fail; 716 } 717 } 718 719 cp->private = sc; 720 cp->index = no; 721 sc->sc_disks[no] = cp; 722 723 G_STRIPE_DEBUG(0, "Disk %s attached to %s.", pp->name, sc->sc_name); 724 725 g_stripe_check_and_run(sc); 726 727 return (0); 728fail: 729 if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) 730 g_access(cp, -fcp->acr, -fcp->acw, -fcp->ace); 731 g_detach(cp); 732 g_destroy_consumer(cp); 733 return (error); 734} 735 736static struct g_geom * 737g_stripe_create(struct g_class *mp, const struct g_stripe_metadata *md, 738 u_int type) 739{ 740 struct g_stripe_softc *sc; 741 struct g_geom *gp; 742 u_int no; 743 744 G_STRIPE_DEBUG(1, "Creating device %s (id=%u).", md->md_name, 745 md->md_id); 746 747 /* Two disks is minimum. */ 748 if (md->md_all < 2) { 749 G_STRIPE_DEBUG(0, "Too few disks defined for %s.", md->md_name); 750 return (NULL); 751 } 752#if 0 753 /* Stripe size have to be grater than or equal to sector size. */ 754 if (md->md_stripesize < sectorsize) { 755 G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name); 756 return (NULL); 757 } 758#endif 759 /* Stripe size have to be power of 2. */ 760 if (!powerof2(md->md_stripesize)) { 761 G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name); 762 return (NULL); 763 } 764 765 /* Check for duplicate unit */ 766 LIST_FOREACH(gp, &mp->geom, geom) { 767 sc = gp->softc; 768 if (sc != NULL && strcmp(sc->sc_name, md->md_name) == 0) { 769 G_STRIPE_DEBUG(0, "Device %s already configured.", 770 sc->sc_name); 771 return (NULL); 772 } 773 } 774 gp = g_new_geomf(mp, "%s", md->md_name); 775 gp->softc = NULL; /* for a moment */ 776 777 sc = malloc(sizeof(*sc), M_STRIPE, M_WAITOK | M_ZERO); 778 gp->start = g_stripe_start; 779 gp->spoiled = g_stripe_orphan; 780 gp->orphan = g_stripe_orphan; 781 gp->access = g_stripe_access; 782 gp->dumpconf = g_stripe_dumpconf; 783 784 sc->sc_id = md->md_id; 785 sc->sc_stripesize = md->md_stripesize; 786 sc->sc_stripebits = BITCOUNT(sc->sc_stripesize - 1); 787 sc->sc_ndisks = md->md_all; 788 sc->sc_disks = malloc(sizeof(struct g_consumer *) * sc->sc_ndisks, 789 M_STRIPE, M_WAITOK | M_ZERO); 790 for (no = 0; no < sc->sc_ndisks; no++) 791 sc->sc_disks[no] = NULL; 792 sc->sc_type = type; 793 794 gp->softc = sc; 795 sc->sc_geom = gp; 796 sc->sc_provider = NULL; 797 798 G_STRIPE_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id); 799 800 return (gp); 801} 802 803static int 804g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force) 805{ 806 struct g_provider *pp; 807 struct g_geom *gp; 808 u_int no; 809 810 g_topology_assert(); 811 812 if (sc == NULL) 813 return (ENXIO); 814 815 pp = sc->sc_provider; 816 if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { 817 if (force) { 818 G_STRIPE_DEBUG(0, "Device %s is still open, so it " 819 "can't be definitely removed.", pp->name); 820 } else { 821 G_STRIPE_DEBUG(1, 822 "Device %s is still open (r%dw%de%d).", pp->name, 823 pp->acr, pp->acw, pp->ace); 824 return (EBUSY); 825 } 826 } 827 828 for (no = 0; no < sc->sc_ndisks; no++) { 829 if (sc->sc_disks[no] != NULL) 830 g_stripe_remove_disk(sc->sc_disks[no]); 831 } 832 833 gp = sc->sc_geom; 834 gp->softc = NULL; 835 KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)", 836 gp->name)); 837 free(sc->sc_disks, M_STRIPE); 838 free(sc, M_STRIPE); 839 840 pp = LIST_FIRST(&gp->provider); 841 if (pp == NULL || (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)) 842 G_STRIPE_DEBUG(0, "Device %s destroyed.", gp->name); 843 844 g_wither_geom(gp, ENXIO); 845 846 return (0); 847} 848 849static int 850g_stripe_destroy_geom(struct gctl_req *req __unused, 851 struct g_class *mp __unused, struct g_geom *gp) 852{ 853 struct g_stripe_softc *sc; 854 855 sc = gp->softc; 856 return (g_stripe_destroy(sc, 0)); 857} 858 859static struct g_geom * 860g_stripe_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) 861{ 862 struct g_stripe_metadata md; 863 struct g_stripe_softc *sc; 864 struct g_consumer *cp; 865 struct g_geom *gp; 866 int error; 867 868 g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); 869 g_topology_assert(); 870 871 G_STRIPE_DEBUG(3, "Tasting %s.", pp->name); 872 873 gp = g_new_geomf(mp, "stripe:taste"); 874 gp->start = g_stripe_start; 875 gp->access = g_stripe_access; 876 gp->orphan = g_stripe_orphan; 877 cp = g_new_consumer(gp); 878 g_attach(cp, pp); 879 880 error = g_stripe_read_metadata(cp, &md); 881 g_wither_geom(gp, ENXIO); 882 if (error != 0) 883 return (NULL); 884 gp = NULL; 885 886 if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0) 887 return (NULL); 888 if (md.md_version > G_STRIPE_VERSION) { 889 printf("geom_stripe.ko module is too old to handle %s.\n", 890 pp->name); 891 return (NULL); 892 } 893 894 /* 895 * Let's check if device already exists. 896 */ 897 sc = NULL; 898 LIST_FOREACH(gp, &mp->geom, geom) { 899 sc = gp->softc; 900 if (sc == NULL) 901 continue; 902 if (sc->sc_type != G_STRIPE_TYPE_AUTOMATIC) 903 continue; 904 if (strcmp(md.md_name, sc->sc_name) != 0) 905 continue; 906 if (md.md_id != sc->sc_id) 907 continue; 908 break; 909 } 910 if (gp != NULL) { 911 G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name); 912 error = g_stripe_add_disk(sc, pp, md.md_no); 913 if (error != 0) { 914 G_STRIPE_DEBUG(0, 915 "Cannot add disk %s to %s (error=%d).", pp->name, 916 gp->name, error); 917 return (NULL); 918 } 919 } else { 920 gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_AUTOMATIC); 921 if (gp == NULL) { 922 G_STRIPE_DEBUG(0, "Cannot create device %s.", 923 md.md_name); 924 return (NULL); 925 } 926 sc = gp->softc; 927 G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name); 928 error = g_stripe_add_disk(sc, pp, md.md_no); 929 if (error != 0) { 930 G_STRIPE_DEBUG(0, 931 "Cannot add disk %s to %s (error=%d).", pp->name, 932 gp->name, error); 933 g_stripe_destroy(sc, 1); 934 return (NULL); 935 } 936 } 937 938 return (gp); 939} 940 941static void 942g_stripe_ctl_create(struct gctl_req *req, struct g_class *mp) 943{ 944 u_int attached, no; 945 struct g_stripe_metadata md; 946 struct g_provider *pp; 947 struct g_stripe_softc *sc; 948 struct g_geom *gp; 949 struct sbuf *sb; 950 intmax_t *stripesize; 951 const char *name; 952 char param[16]; 953 int *nargs; 954 955 g_topology_assert(); 956 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 957 if (nargs == NULL) { 958 gctl_error(req, "No '%s' argument.", "nargs"); 959 return; 960 } 961 if (*nargs <= 2) { 962 gctl_error(req, "Too few arguments."); 963 return; 964 } 965 966 strlcpy(md.md_magic, G_STRIPE_MAGIC, sizeof(md.md_magic)); 967 md.md_version = G_STRIPE_VERSION; 968 name = gctl_get_asciiparam(req, "arg0"); 969 if (name == NULL) { 970 gctl_error(req, "No 'arg%u' argument.", 0); 971 return; 972 } 973 strlcpy(md.md_name, name, sizeof(md.md_name)); 974 md.md_id = arc4random(); 975 md.md_no = 0; 976 md.md_all = *nargs - 1; 977 stripesize = gctl_get_paraml(req, "stripesize", sizeof(*stripesize)); 978 if (stripesize == NULL) { 979 gctl_error(req, "No '%s' argument.", "stripesize"); 980 return; 981 } 982 md.md_stripesize = *stripesize; 983 984 /* Check all providers are valid */ 985 for (no = 1; no < *nargs; no++) { 986 snprintf(param, sizeof(param), "arg%u", no); 987 name = gctl_get_asciiparam(req, param); 988 if (name == NULL) { 989 gctl_error(req, "No 'arg%u' argument.", no); 990 return; 991 } 992 if (strncmp(name, "/dev/", strlen("/dev/")) == 0) 993 name += strlen("/dev/"); 994 pp = g_provider_by_name(name); 995 if (pp == NULL) { 996 G_STRIPE_DEBUG(1, "Disk %s is invalid.", name); 997 gctl_error(req, "Disk %s is invalid.", name); 998 return; 999 } 1000 } 1001 1002 gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_MANUAL); 1003 if (gp == NULL) { 1004 gctl_error(req, "Can't configure %s.", md.md_name); 1005 return; 1006 } 1007 1008 sc = gp->softc; 1009 sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND); 1010 sbuf_printf(sb, "Can't attach disk(s) to %s:", gp->name); 1011 for (attached = 0, no = 1; no < *nargs; no++) { 1012 snprintf(param, sizeof(param), "arg%u", no); 1013 name = gctl_get_asciiparam(req, param); 1014 if (strncmp(name, "/dev/", strlen("/dev/")) == 0) 1015 name += strlen("/dev/"); 1016 pp = g_provider_by_name(name); 1017 KASSERT(pp != NULL, ("Provider %s disappear?!", name)); 1018 if (g_stripe_add_disk(sc, pp, no - 1) != 0) { 1019 G_STRIPE_DEBUG(1, "Disk %u (%s) not attached to %s.", 1020 no, pp->name, gp->name); 1021 sbuf_printf(sb, " %s", pp->name); 1022 continue; 1023 } 1024 attached++; 1025 } 1026 sbuf_finish(sb); 1027 if (md.md_all != attached) { 1028 g_stripe_destroy(gp->softc, 1); 1029 gctl_error(req, "%s", sbuf_data(sb)); 1030 } 1031 sbuf_delete(sb); 1032} 1033 1034static struct g_stripe_softc * 1035g_stripe_find_device(struct g_class *mp, const char *name) 1036{ 1037 struct g_stripe_softc *sc; 1038 struct g_geom *gp; 1039 1040 LIST_FOREACH(gp, &mp->geom, geom) { 1041 sc = gp->softc; 1042 if (sc == NULL) 1043 continue; 1044 if (strcmp(sc->sc_name, name) == 0) 1045 return (sc); 1046 } 1047 return (NULL); 1048} 1049 1050static void 1051g_stripe_ctl_destroy(struct gctl_req *req, struct g_class *mp) 1052{ 1053 struct g_stripe_softc *sc; 1054 int *force, *nargs, error; 1055 const char *name; 1056 char param[16]; 1057 u_int i; 1058 1059 g_topology_assert(); 1060 1061 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 1062 if (nargs == NULL) { 1063 gctl_error(req, "No '%s' argument.", "nargs"); 1064 return; 1065 } 1066 if (*nargs <= 0) { 1067 gctl_error(req, "Missing device(s)."); 1068 return; 1069 } 1070 force = gctl_get_paraml(req, "force", sizeof(*force)); 1071 if (force == NULL) { 1072 gctl_error(req, "No '%s' argument.", "force"); 1073 return; 1074 } 1075 1076 for (i = 0; i < (u_int)*nargs; i++) { 1077 snprintf(param, sizeof(param), "arg%u", i); 1078 name = gctl_get_asciiparam(req, param); 1079 if (name == NULL) { 1080 gctl_error(req, "No 'arg%u' argument.", i); 1081 return; 1082 } 1083 sc = g_stripe_find_device(mp, name); 1084 if (sc == NULL) { 1085 gctl_error(req, "No such device: %s.", name); 1086 return; 1087 } 1088 error = g_stripe_destroy(sc, *force); 1089 if (error != 0) { 1090 gctl_error(req, "Cannot destroy device %s (error=%d).", 1091 sc->sc_name, error); 1092 return; 1093 } 1094 } 1095} 1096 1097static void 1098g_stripe_config(struct gctl_req *req, struct g_class *mp, const char *verb) 1099{ 1100 uint32_t *version; 1101 1102 g_topology_assert(); 1103 1104 version = gctl_get_paraml(req, "version", sizeof(*version)); 1105 if (version == NULL) { 1106 gctl_error(req, "No '%s' argument.", "version"); 1107 return; 1108 } 1109 if (*version != G_STRIPE_VERSION) { 1110 gctl_error(req, "Userland and kernel parts are out of sync."); 1111 return; 1112 } 1113 1114 if (strcmp(verb, "create") == 0) { 1115 g_stripe_ctl_create(req, mp); 1116 return; 1117 } else if (strcmp(verb, "destroy") == 0 || 1118 strcmp(verb, "stop") == 0) { 1119 g_stripe_ctl_destroy(req, mp); 1120 return; 1121 } 1122 1123 gctl_error(req, "Unknown verb."); 1124} 1125 1126static void 1127g_stripe_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, 1128 struct g_consumer *cp, struct g_provider *pp) 1129{ 1130 struct g_stripe_softc *sc; 1131 1132 sc = gp->softc; 1133 if (sc == NULL) 1134 return; 1135 if (pp != NULL) { 1136 /* Nothing here. */ 1137 } else if (cp != NULL) { 1138 /* Nothing here. */ 1139 } else { 1140 sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id); 1141 sbuf_printf(sb, "%s<Stripesize>%u</Stripesize>\n", indent, 1142 (u_int)sc->sc_stripesize); 1143 sbuf_printf(sb, "%s<Type>", indent); 1144 switch (sc->sc_type) { 1145 case G_STRIPE_TYPE_AUTOMATIC: 1146 sbuf_printf(sb, "AUTOMATIC"); 1147 break; 1148 case G_STRIPE_TYPE_MANUAL: 1149 sbuf_printf(sb, "MANUAL"); 1150 break; 1151 default: 1152 sbuf_printf(sb, "UNKNOWN"); 1153 break; 1154 } 1155 sbuf_printf(sb, "</Type>\n"); 1156 sbuf_printf(sb, "%s<Status>Total=%u, Online=%u</Status>\n", 1157 indent, sc->sc_ndisks, g_stripe_nvalid(sc)); 1158 sbuf_printf(sb, "%s<State>", indent); 1159 if (sc->sc_provider != NULL && sc->sc_provider->error == 0) 1160 sbuf_printf(sb, "UP"); 1161 else 1162 sbuf_printf(sb, "DOWN"); 1163 sbuf_printf(sb, "</State>\n"); 1164 } 1165} 1166 1167DECLARE_GEOM_CLASS(g_stripe_class, g_stripe); 1168