vdev_geom.c revision 259168
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> 23 * All rights reserved. 24 * 25 * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org> 26 */ 27 28#include <sys/zfs_context.h> 29#include <sys/param.h> 30#include <sys/kernel.h> 31#include <sys/bio.h> 32#include <sys/disk.h> 33#include <sys/spa.h> 34#include <sys/spa_impl.h> 35#include <sys/vdev_impl.h> 36#include <sys/fs/zfs.h> 37#include <sys/zio.h> 38#include <geom/geom.h> 39#include <geom/geom_int.h> 40 41/* 42 * Virtual device vector for GEOM. 43 */ 44 45static g_attrchanged_t vdev_geom_attrchanged; 46struct g_class zfs_vdev_class = { 47 .name = "ZFS::VDEV", 48 .version = G_VERSION, 49 .attrchanged = vdev_geom_attrchanged, 50}; 51 52DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev); 53 54SYSCTL_DECL(_vfs_zfs_vdev); 55/* Don't send BIO_FLUSH. */ 56static int vdev_geom_bio_flush_disable = 0; 57TUNABLE_INT("vfs.zfs.vdev.bio_flush_disable", &vdev_geom_bio_flush_disable); 58SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RW, 59 &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH"); 60/* Don't send BIO_DELETE. */ 61static int vdev_geom_bio_delete_disable = 0; 62TUNABLE_INT("vfs.zfs.vdev.bio_delete_disable", &vdev_geom_bio_delete_disable); 63SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RW, 64 &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE"); 65 66static void 67vdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp) 68{ 69 int error; 70 uint16_t rate; 71 72 error = g_getattr("GEOM::rotation_rate", cp, &rate); 73 if (error == 0) 74 vd->vdev_rotation_rate = rate; 75 else 76 vd->vdev_rotation_rate = VDEV_RATE_UNKNOWN; 77} 78 79static void 80vdev_geom_attrchanged(struct g_consumer *cp, const char *attr) 81{ 82 vdev_t *vd; 83 84 vd = cp->private; 85 if (vd == NULL) 86 return; 87 88 if (strcmp(attr, "GEOM::rotation_rate") == 0) { 89 vdev_geom_set_rotation_rate(vd, cp); 90 return; 91 } 92} 93 94static void 95vdev_geom_orphan(struct g_consumer *cp) 96{ 97 vdev_t *vd; 98 99 g_topology_assert(); 100 101 vd = cp->private; 102 if (vd == NULL) 103 return; 104 105 /* 106 * Orphan callbacks occur from the GEOM event thread. 107 * Concurrent with this call, new I/O requests may be 108 * working their way through GEOM about to find out 109 * (only once executed by the g_down thread) that we've 110 * been orphaned from our disk provider. These I/Os 111 * must be retired before we can detach our consumer. 112 * This is most easily achieved by acquiring the 113 * SPA ZIO configuration lock as a writer, but doing 114 * so with the GEOM topology lock held would cause 115 * a lock order reversal. Instead, rely on the SPA's 116 * async removal support to invoke a close on this 117 * vdev once it is safe to do so. 118 */ 119 zfs_post_remove(vd->vdev_spa, vd); 120 vd->vdev_remove_wanted = B_TRUE; 121 spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE); 122} 123 124static struct g_consumer * 125vdev_geom_attach(struct g_provider *pp) 126{ 127 struct g_geom *gp; 128 struct g_consumer *cp; 129 130 g_topology_assert(); 131 132 ZFS_LOG(1, "Attaching to %s.", pp->name); 133 /* Do we have geom already? No? Create one. */ 134 LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) { 135 if (gp->flags & G_GEOM_WITHER) 136 continue; 137 if (strcmp(gp->name, "zfs::vdev") != 0) 138 continue; 139 break; 140 } 141 if (gp == NULL) { 142 gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev"); 143 gp->orphan = vdev_geom_orphan; 144 cp = g_new_consumer(gp); 145 if (g_attach(cp, pp) != 0) { 146 g_wither_geom(gp, ENXIO); 147 return (NULL); 148 } 149 if (g_access(cp, 1, 0, 1) != 0) { 150 g_wither_geom(gp, ENXIO); 151 return (NULL); 152 } 153 ZFS_LOG(1, "Created geom and consumer for %s.", pp->name); 154 } else { 155 /* Check if we are already connected to this provider. */ 156 LIST_FOREACH(cp, &gp->consumer, consumer) { 157 if (cp->provider == pp) { 158 ZFS_LOG(1, "Found consumer for %s.", pp->name); 159 break; 160 } 161 } 162 if (cp == NULL) { 163 cp = g_new_consumer(gp); 164 if (g_attach(cp, pp) != 0) { 165 g_destroy_consumer(cp); 166 return (NULL); 167 } 168 if (g_access(cp, 1, 0, 1) != 0) { 169 g_detach(cp); 170 g_destroy_consumer(cp); 171 return (NULL); 172 } 173 ZFS_LOG(1, "Created consumer for %s.", pp->name); 174 } else { 175 if (g_access(cp, 1, 0, 1) != 0) 176 return (NULL); 177 ZFS_LOG(1, "Used existing consumer for %s.", pp->name); 178 } 179 } 180 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; 181 return (cp); 182} 183 184static void 185vdev_geom_detach(void *arg, int flag __unused) 186{ 187 struct g_geom *gp; 188 struct g_consumer *cp; 189 190 g_topology_assert(); 191 cp = arg; 192 gp = cp->geom; 193 194 ZFS_LOG(1, "Closing access to %s.", cp->provider->name); 195 g_access(cp, -1, 0, -1); 196 /* Destroy consumer on last close. */ 197 if (cp->acr == 0 && cp->ace == 0) { 198 ZFS_LOG(1, "Destroyed consumer to %s.", cp->provider->name); 199 if (cp->acw > 0) 200 g_access(cp, 0, -cp->acw, 0); 201 g_detach(cp); 202 g_destroy_consumer(cp); 203 } 204 /* Destroy geom if there are no consumers left. */ 205 if (LIST_EMPTY(&gp->consumer)) { 206 ZFS_LOG(1, "Destroyed geom %s.", gp->name); 207 g_wither_geom(gp, ENXIO); 208 } 209} 210 211static uint64_t 212nvlist_get_guid(nvlist_t *list) 213{ 214 uint64_t value; 215 216 value = 0; 217 nvlist_lookup_uint64(list, ZPOOL_CONFIG_GUID, &value); 218 return (value); 219} 220 221static int 222vdev_geom_io(struct g_consumer *cp, int cmd, void *data, off_t offset, off_t size) 223{ 224 struct bio *bp; 225 u_char *p; 226 off_t off, maxio; 227 int error; 228 229 ASSERT((offset % cp->provider->sectorsize) == 0); 230 ASSERT((size % cp->provider->sectorsize) == 0); 231 232 bp = g_alloc_bio(); 233 off = offset; 234 offset += size; 235 p = data; 236 maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize); 237 error = 0; 238 239 for (; off < offset; off += maxio, p += maxio, size -= maxio) { 240 bzero(bp, sizeof(*bp)); 241 bp->bio_cmd = cmd; 242 bp->bio_done = NULL; 243 bp->bio_offset = off; 244 bp->bio_length = MIN(size, maxio); 245 bp->bio_data = p; 246 g_io_request(bp, cp); 247 error = biowait(bp, "vdev_geom_io"); 248 if (error != 0) 249 break; 250 } 251 252 g_destroy_bio(bp); 253 return (error); 254} 255 256static void 257vdev_geom_taste_orphan(struct g_consumer *cp) 258{ 259 260 KASSERT(1 == 0, ("%s called while tasting %s.", __func__, 261 cp->provider->name)); 262} 263 264static int 265vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config) 266{ 267 struct g_provider *pp; 268 vdev_label_t *label; 269 char *p, *buf; 270 size_t buflen; 271 uint64_t psize; 272 off_t offset, size; 273 uint64_t guid, state, txg; 274 int error, l, len; 275 276 g_topology_assert_not(); 277 278 pp = cp->provider; 279 ZFS_LOG(1, "Reading config from %s...", pp->name); 280 281 psize = pp->mediasize; 282 psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t)); 283 284 size = sizeof(*label) + pp->sectorsize - 285 ((sizeof(*label) - 1) % pp->sectorsize) - 1; 286 287 guid = 0; 288 label = kmem_alloc(size, KM_SLEEP); 289 buflen = sizeof(label->vl_vdev_phys.vp_nvlist); 290 291 *config = NULL; 292 for (l = 0; l < VDEV_LABELS; l++) { 293 294 offset = vdev_label_offset(psize, l, 0); 295 if ((offset % pp->sectorsize) != 0) 296 continue; 297 298 if (vdev_geom_io(cp, BIO_READ, label, offset, size) != 0) 299 continue; 300 buf = label->vl_vdev_phys.vp_nvlist; 301 302 if (nvlist_unpack(buf, buflen, config, 0) != 0) 303 continue; 304 305 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE, 306 &state) != 0 || state > POOL_STATE_L2CACHE) { 307 nvlist_free(*config); 308 *config = NULL; 309 continue; 310 } 311 312 if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE && 313 (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG, 314 &txg) != 0 || txg == 0)) { 315 nvlist_free(*config); 316 *config = NULL; 317 continue; 318 } 319 320 break; 321 } 322 323 kmem_free(label, size); 324 return (*config == NULL ? ENOENT : 0); 325} 326 327static void 328resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id) 329{ 330 nvlist_t **new_configs; 331 uint64_t i; 332 333 if (id < *count) 334 return; 335 new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *), 336 KM_SLEEP); 337 for (i = 0; i < *count; i++) 338 new_configs[i] = (*configs)[i]; 339 if (*configs != NULL) 340 kmem_free(*configs, *count * sizeof(void *)); 341 *configs = new_configs; 342 *count = id + 1; 343} 344 345static void 346process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg, 347 const char *name, uint64_t* known_pool_guid) 348{ 349 nvlist_t *vdev_tree; 350 uint64_t pool_guid; 351 uint64_t vdev_guid, known_guid; 352 uint64_t id, txg, known_txg; 353 char *pname; 354 int i; 355 356 if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 || 357 strcmp(pname, name) != 0) 358 goto ignore; 359 360 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0) 361 goto ignore; 362 363 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0) 364 goto ignore; 365 366 if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) 367 goto ignore; 368 369 if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0) 370 goto ignore; 371 372 VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 373 374 if (*known_pool_guid != 0) { 375 if (pool_guid != *known_pool_guid) 376 goto ignore; 377 } else 378 *known_pool_guid = pool_guid; 379 380 resize_configs(configs, count, id); 381 382 if ((*configs)[id] != NULL) { 383 VERIFY(nvlist_lookup_uint64((*configs)[id], 384 ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0); 385 if (txg <= known_txg) 386 goto ignore; 387 nvlist_free((*configs)[id]); 388 } 389 390 (*configs)[id] = cfg; 391 return; 392 393ignore: 394 nvlist_free(cfg); 395} 396 397static int 398vdev_geom_attach_taster(struct g_consumer *cp, struct g_provider *pp) 399{ 400 int error; 401 402 if (pp->flags & G_PF_WITHER) 403 return (EINVAL); 404 g_attach(cp, pp); 405 error = g_access(cp, 1, 0, 0); 406 if (error == 0) { 407 if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize)) 408 error = EINVAL; 409 else if (pp->mediasize < SPA_MINDEVSIZE) 410 error = EINVAL; 411 if (error != 0) 412 g_access(cp, -1, 0, 0); 413 } 414 if (error != 0) 415 g_detach(cp); 416 return (error); 417} 418 419static void 420vdev_geom_detach_taster(struct g_consumer *cp) 421{ 422 g_access(cp, -1, 0, 0); 423 g_detach(cp); 424} 425 426int 427vdev_geom_read_pool_label(const char *name, 428 nvlist_t ***configs, uint64_t *count) 429{ 430 struct g_class *mp; 431 struct g_geom *gp, *zgp; 432 struct g_provider *pp; 433 struct g_consumer *zcp; 434 nvlist_t *vdev_cfg; 435 uint64_t pool_guid; 436 int error; 437 438 DROP_GIANT(); 439 g_topology_lock(); 440 441 zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste"); 442 /* This orphan function should be never called. */ 443 zgp->orphan = vdev_geom_taste_orphan; 444 zcp = g_new_consumer(zgp); 445 446 *configs = NULL; 447 *count = 0; 448 pool_guid = 0; 449 LIST_FOREACH(mp, &g_classes, class) { 450 if (mp == &zfs_vdev_class) 451 continue; 452 LIST_FOREACH(gp, &mp->geom, geom) { 453 if (gp->flags & G_GEOM_WITHER) 454 continue; 455 LIST_FOREACH(pp, &gp->provider, provider) { 456 if (pp->flags & G_PF_WITHER) 457 continue; 458 if (vdev_geom_attach_taster(zcp, pp) != 0) 459 continue; 460 g_topology_unlock(); 461 error = vdev_geom_read_config(zcp, &vdev_cfg); 462 g_topology_lock(); 463 vdev_geom_detach_taster(zcp); 464 if (error) 465 continue; 466 ZFS_LOG(1, "successfully read vdev config"); 467 468 process_vdev_config(configs, count, 469 vdev_cfg, name, &pool_guid); 470 } 471 } 472 } 473 474 g_destroy_consumer(zcp); 475 g_destroy_geom(zgp); 476 g_topology_unlock(); 477 PICKUP_GIANT(); 478 479 return (*count > 0 ? 0 : ENOENT); 480} 481 482static uint64_t 483vdev_geom_read_guid(struct g_consumer *cp) 484{ 485 nvlist_t *config; 486 uint64_t guid; 487 488 g_topology_assert_not(); 489 490 guid = 0; 491 if (vdev_geom_read_config(cp, &config) == 0) { 492 guid = nvlist_get_guid(config); 493 nvlist_free(config); 494 } 495 return (guid); 496} 497 498static struct g_consumer * 499vdev_geom_attach_by_guid(uint64_t guid) 500{ 501 struct g_class *mp; 502 struct g_geom *gp, *zgp; 503 struct g_provider *pp; 504 struct g_consumer *cp, *zcp; 505 uint64_t pguid; 506 507 g_topology_assert(); 508 509 zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste"); 510 /* This orphan function should be never called. */ 511 zgp->orphan = vdev_geom_taste_orphan; 512 zcp = g_new_consumer(zgp); 513 514 cp = NULL; 515 LIST_FOREACH(mp, &g_classes, class) { 516 if (mp == &zfs_vdev_class) 517 continue; 518 LIST_FOREACH(gp, &mp->geom, geom) { 519 if (gp->flags & G_GEOM_WITHER) 520 continue; 521 LIST_FOREACH(pp, &gp->provider, provider) { 522 if (vdev_geom_attach_taster(zcp, pp) != 0) 523 continue; 524 g_topology_unlock(); 525 pguid = vdev_geom_read_guid(zcp); 526 g_topology_lock(); 527 vdev_geom_detach_taster(zcp); 528 if (pguid != guid) 529 continue; 530 cp = vdev_geom_attach(pp); 531 if (cp == NULL) { 532 printf("ZFS WARNING: Unable to attach to %s.\n", 533 pp->name); 534 continue; 535 } 536 break; 537 } 538 if (cp != NULL) 539 break; 540 } 541 if (cp != NULL) 542 break; 543 } 544end: 545 g_destroy_consumer(zcp); 546 g_destroy_geom(zgp); 547 return (cp); 548} 549 550static struct g_consumer * 551vdev_geom_open_by_guid(vdev_t *vd) 552{ 553 struct g_consumer *cp; 554 char *buf; 555 size_t len; 556 557 g_topology_assert(); 558 559 ZFS_LOG(1, "Searching by guid [%ju].", (uintmax_t)vd->vdev_guid); 560 cp = vdev_geom_attach_by_guid(vd->vdev_guid); 561 if (cp != NULL) { 562 len = strlen(cp->provider->name) + strlen("/dev/") + 1; 563 buf = kmem_alloc(len, KM_SLEEP); 564 565 snprintf(buf, len, "/dev/%s", cp->provider->name); 566 spa_strfree(vd->vdev_path); 567 vd->vdev_path = buf; 568 569 ZFS_LOG(1, "Attach by guid [%ju] succeeded, provider %s.", 570 (uintmax_t)vd->vdev_guid, vd->vdev_path); 571 } else { 572 ZFS_LOG(1, "Search by guid [%ju] failed.", 573 (uintmax_t)vd->vdev_guid); 574 } 575 576 return (cp); 577} 578 579static struct g_consumer * 580vdev_geom_open_by_path(vdev_t *vd, int check_guid) 581{ 582 struct g_provider *pp; 583 struct g_consumer *cp; 584 uint64_t guid; 585 586 g_topology_assert(); 587 588 cp = NULL; 589 pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1); 590 if (pp != NULL) { 591 ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path); 592 cp = vdev_geom_attach(pp); 593 if (cp != NULL && check_guid && ISP2(pp->sectorsize) && 594 pp->sectorsize <= VDEV_PAD_SIZE) { 595 g_topology_unlock(); 596 guid = vdev_geom_read_guid(cp); 597 g_topology_lock(); 598 if (guid != vd->vdev_guid) { 599 vdev_geom_detach(cp, 0); 600 cp = NULL; 601 ZFS_LOG(1, "guid mismatch for provider %s: " 602 "%ju != %ju.", vd->vdev_path, 603 (uintmax_t)vd->vdev_guid, (uintmax_t)guid); 604 } else { 605 ZFS_LOG(1, "guid match for provider %s.", 606 vd->vdev_path); 607 } 608 } 609 } 610 611 return (cp); 612} 613 614static int 615vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, 616 uint64_t *logical_ashift, uint64_t *physical_ashift) 617{ 618 struct g_provider *pp; 619 struct g_consumer *cp; 620 size_t bufsize; 621 int error; 622 623 /* 624 * We must have a pathname, and it must be absolute. 625 */ 626 if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { 627 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 628 return (EINVAL); 629 } 630 631 vd->vdev_tsd = NULL; 632 633 DROP_GIANT(); 634 g_topology_lock(); 635 error = 0; 636 637 /* 638 * If we're creating or splitting a pool, just find the GEOM provider 639 * by its name and ignore GUID mismatches. 640 */ 641 if (vd->vdev_spa->spa_load_state == SPA_LOAD_NONE || 642 vd->vdev_spa->spa_splitting_newspa == B_TRUE) 643 cp = vdev_geom_open_by_path(vd, 0); 644 else { 645 cp = vdev_geom_open_by_path(vd, 1); 646 if (cp == NULL) { 647 /* 648 * The device at vd->vdev_path doesn't have the 649 * expected guid. The disks might have merely 650 * moved around so try all other GEOM providers 651 * to find one with the right guid. 652 */ 653 cp = vdev_geom_open_by_guid(vd); 654 } 655 } 656 657 if (cp == NULL) { 658 ZFS_LOG(1, "Provider %s not found.", vd->vdev_path); 659 error = ENOENT; 660 } else if (cp->provider->sectorsize > VDEV_PAD_SIZE || 661 !ISP2(cp->provider->sectorsize)) { 662 ZFS_LOG(1, "Provider %s has unsupported sectorsize.", 663 vd->vdev_path); 664 vdev_geom_detach(cp, 0); 665 error = EINVAL; 666 cp = NULL; 667 } else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) { 668 int i; 669 670 for (i = 0; i < 5; i++) { 671 error = g_access(cp, 0, 1, 0); 672 if (error == 0) 673 break; 674 g_topology_unlock(); 675 tsleep(vd, 0, "vdev", hz / 2); 676 g_topology_lock(); 677 } 678 if (error != 0) { 679 printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n", 680 vd->vdev_path, error); 681 vdev_geom_detach(cp, 0); 682 cp = NULL; 683 } 684 } 685 g_topology_unlock(); 686 PICKUP_GIANT(); 687 if (cp == NULL) { 688 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 689 return (error); 690 } 691 692 cp->private = vd; 693 vd->vdev_tsd = cp; 694 pp = cp->provider; 695 696 /* 697 * Determine the actual size of the device. 698 */ 699 *max_psize = *psize = pp->mediasize; 700 701 /* 702 * Determine the device's minimum transfer size and preferred 703 * transfer size. 704 */ 705 *logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1; 706 *physical_ashift = 0; 707 if (pp->stripesize) 708 *physical_ashift = highbit(pp->stripesize) - 1; 709 710 /* 711 * Clear the nowritecache settings, so that on a vdev_reopen() 712 * we will try again. 713 */ 714 vd->vdev_nowritecache = B_FALSE; 715 716 if (vd->vdev_physpath != NULL) 717 spa_strfree(vd->vdev_physpath); 718 bufsize = sizeof("/dev/") + strlen(pp->name); 719 vd->vdev_physpath = kmem_alloc(bufsize, KM_SLEEP); 720 snprintf(vd->vdev_physpath, bufsize, "/dev/%s", pp->name); 721 722 /* 723 * Determine the device's rotation rate. 724 */ 725 vdev_geom_set_rotation_rate(vd, cp); 726 727 return (0); 728} 729 730static void 731vdev_geom_close(vdev_t *vd) 732{ 733 struct g_consumer *cp; 734 735 cp = vd->vdev_tsd; 736 if (cp == NULL) 737 return; 738 vd->vdev_tsd = NULL; 739 vd->vdev_delayed_close = B_FALSE; 740 cp->private = NULL; /* XXX locking */ 741 g_post_event(vdev_geom_detach, cp, M_WAITOK, NULL); 742} 743 744static void 745vdev_geom_io_intr(struct bio *bp) 746{ 747 vdev_t *vd; 748 zio_t *zio; 749 750 zio = bp->bio_caller1; 751 vd = zio->io_vd; 752 zio->io_error = bp->bio_error; 753 if (zio->io_error == 0 && bp->bio_resid != 0) 754 zio->io_error = EIO; 755 if (bp->bio_cmd == BIO_FLUSH && bp->bio_error == ENOTSUP) { 756 /* 757 * If we get ENOTSUP, we know that no future 758 * attempts will ever succeed. In this case we 759 * set a persistent bit so that we don't bother 760 * with the ioctl in the future. 761 */ 762 vd->vdev_nowritecache = B_TRUE; 763 } 764 if (bp->bio_cmd == BIO_DELETE && bp->bio_error == ENOTSUP) { 765 /* 766 * If we get ENOTSUP, we know that no future 767 * attempts will ever succeed. In this case we 768 * set a persistent bit so that we don't bother 769 * with the ioctl in the future. 770 */ 771 vd->vdev_notrim = B_TRUE; 772 } 773 if (zio->io_error == EIO && !vd->vdev_remove_wanted) { 774 /* 775 * If provider's error is set we assume it is being 776 * removed. 777 */ 778 if (bp->bio_to->error != 0) { 779 /* 780 * We post the resource as soon as possible, instead of 781 * when the async removal actually happens, because the 782 * DE is using this information to discard previous I/O 783 * errors. 784 */ 785 /* XXX: zfs_post_remove() can sleep. */ 786 zfs_post_remove(zio->io_spa, vd); 787 vd->vdev_remove_wanted = B_TRUE; 788 spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE); 789 } else if (!vd->vdev_delayed_close) { 790 vd->vdev_delayed_close = B_TRUE; 791 } 792 } 793 g_destroy_bio(bp); 794 zio_interrupt(zio); 795} 796 797static int 798vdev_geom_io_start(zio_t *zio) 799{ 800 vdev_t *vd; 801 struct g_consumer *cp; 802 struct bio *bp; 803 int error; 804 805 vd = zio->io_vd; 806 807 if (zio->io_type == ZIO_TYPE_IOCTL) { 808 /* XXPOLICY */ 809 if (!vdev_readable(vd)) { 810 zio->io_error = ENXIO; 811 return (ZIO_PIPELINE_CONTINUE); 812 } 813 814 switch (zio->io_cmd) { 815 case DKIOCFLUSHWRITECACHE: 816 if (zfs_nocacheflush || vdev_geom_bio_flush_disable) 817 break; 818 if (vd->vdev_nowritecache) { 819 zio->io_error = ENOTSUP; 820 break; 821 } 822 goto sendreq; 823 case DKIOCTRIM: 824 if (vdev_geom_bio_delete_disable) 825 break; 826 if (vd->vdev_notrim) { 827 zio->io_error = ENOTSUP; 828 break; 829 } 830 goto sendreq; 831 default: 832 zio->io_error = ENOTSUP; 833 } 834 835 return (ZIO_PIPELINE_CONTINUE); 836 } 837sendreq: 838 cp = vd->vdev_tsd; 839 if (cp == NULL) { 840 zio->io_error = ENXIO; 841 return (ZIO_PIPELINE_CONTINUE); 842 } 843 bp = g_alloc_bio(); 844 bp->bio_caller1 = zio; 845 switch (zio->io_type) { 846 case ZIO_TYPE_READ: 847 case ZIO_TYPE_WRITE: 848 bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE; 849 bp->bio_data = zio->io_data; 850 bp->bio_offset = zio->io_offset; 851 bp->bio_length = zio->io_size; 852 break; 853 case ZIO_TYPE_IOCTL: 854 switch (zio->io_cmd) { 855 case DKIOCFLUSHWRITECACHE: 856 bp->bio_cmd = BIO_FLUSH; 857 bp->bio_flags |= BIO_ORDERED; 858 bp->bio_data = NULL; 859 bp->bio_offset = cp->provider->mediasize; 860 bp->bio_length = 0; 861 break; 862 case DKIOCTRIM: 863 bp->bio_cmd = BIO_DELETE; 864 bp->bio_data = NULL; 865 bp->bio_offset = zio->io_offset; 866 bp->bio_length = zio->io_size; 867 break; 868 } 869 break; 870 } 871 bp->bio_done = vdev_geom_io_intr; 872 873 g_io_request(bp, cp); 874 875 return (ZIO_PIPELINE_STOP); 876} 877 878static void 879vdev_geom_io_done(zio_t *zio) 880{ 881} 882 883static void 884vdev_geom_hold(vdev_t *vd) 885{ 886} 887 888static void 889vdev_geom_rele(vdev_t *vd) 890{ 891} 892 893vdev_ops_t vdev_geom_ops = { 894 vdev_geom_open, 895 vdev_geom_close, 896 vdev_default_asize, 897 vdev_geom_io_start, 898 vdev_geom_io_done, 899 NULL, 900 vdev_geom_hold, 901 vdev_geom_rele, 902 VDEV_TYPE_DISK, /* name of this vdev type */ 903 B_TRUE /* leaf vdev */ 904}; 905