vdev_geom.c revision 292069
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> 23 * All rights reserved. 24 * 25 * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org> 26 */ 27 28#include <sys/zfs_context.h> 29#include <sys/param.h> 30#include <sys/kernel.h> 31#include <sys/bio.h> 32#include <sys/disk.h> 33#include <sys/spa.h> 34#include <sys/spa_impl.h> 35#include <sys/vdev_impl.h> 36#include <sys/fs/zfs.h> 37#include <sys/zio.h> 38#include <geom/geom.h> 39#include <geom/geom_int.h> 40 41/* 42 * Virtual device vector for GEOM. 43 */ 44 45static g_attrchanged_t vdev_geom_attrchanged; 46struct g_class zfs_vdev_class = { 47 .name = "ZFS::VDEV", 48 .version = G_VERSION, 49 .attrchanged = vdev_geom_attrchanged, 50}; 51 52DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev); 53 54SYSCTL_DECL(_vfs_zfs_vdev); 55/* Don't send BIO_FLUSH. */ 56static int vdev_geom_bio_flush_disable; 57SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RWTUN, 58 &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH"); 59/* Don't send BIO_DELETE. */ 60static int vdev_geom_bio_delete_disable; 61SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RWTUN, 62 &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE"); 63 64static void 65vdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp) 66{ 67 int error; 68 uint16_t rate; 69 70 error = g_getattr("GEOM::rotation_rate", cp, &rate); 71 if (error == 0) 72 vd->vdev_rotation_rate = rate; 73 else 74 vd->vdev_rotation_rate = VDEV_RATE_UNKNOWN; 75} 76 77static void 78vdev_geom_attrchanged(struct g_consumer *cp, const char *attr) 79{ 80 vdev_t *vd; 81 82 vd = cp->private; 83 if (vd == NULL) 84 return; 85 86 if (strcmp(attr, "GEOM::rotation_rate") == 0) { 87 vdev_geom_set_rotation_rate(vd, cp); 88 return; 89 } 90} 91 92static void 93vdev_geom_orphan(struct g_consumer *cp) 94{ 95 vdev_t *vd; 96 97 g_topology_assert(); 98 99 vd = cp->private; 100 if (vd == NULL) 101 return; 102 103 /* 104 * Orphan callbacks occur from the GEOM event thread. 105 * Concurrent with this call, new I/O requests may be 106 * working their way through GEOM about to find out 107 * (only once executed by the g_down thread) that we've 108 * been orphaned from our disk provider. These I/Os 109 * must be retired before we can detach our consumer. 110 * This is most easily achieved by acquiring the 111 * SPA ZIO configuration lock as a writer, but doing 112 * so with the GEOM topology lock held would cause 113 * a lock order reversal. Instead, rely on the SPA's 114 * async removal support to invoke a close on this 115 * vdev once it is safe to do so. 116 */ 117 zfs_post_remove(vd->vdev_spa, vd); 118 vd->vdev_remove_wanted = B_TRUE; 119 spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE); 120} 121 122static struct g_consumer * 123vdev_geom_attach(struct g_provider *pp) 124{ 125 struct g_geom *gp; 126 struct g_consumer *cp; 127 128 g_topology_assert(); 129 130 ZFS_LOG(1, "Attaching to %s.", pp->name); 131 /* Do we have geom already? No? Create one. */ 132 LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) { 133 if (gp->flags & G_GEOM_WITHER) 134 continue; 135 if (strcmp(gp->name, "zfs::vdev") != 0) 136 continue; 137 break; 138 } 139 if (gp == NULL) { 140 gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev"); 141 gp->orphan = vdev_geom_orphan; 142 cp = g_new_consumer(gp); 143 if (g_attach(cp, pp) != 0) { 144 g_wither_geom(gp, ENXIO); 145 return (NULL); 146 } 147 if (g_access(cp, 1, 0, 1) != 0) { 148 g_wither_geom(gp, ENXIO); 149 return (NULL); 150 } 151 ZFS_LOG(1, "Created geom and consumer for %s.", pp->name); 152 } else { 153 /* Check if we are already connected to this provider. */ 154 LIST_FOREACH(cp, &gp->consumer, consumer) { 155 if (cp->provider == pp) { 156 ZFS_LOG(1, "Found consumer for %s.", pp->name); 157 break; 158 } 159 } 160 if (cp == NULL) { 161 cp = g_new_consumer(gp); 162 if (g_attach(cp, pp) != 0) { 163 g_destroy_consumer(cp); 164 return (NULL); 165 } 166 if (g_access(cp, 1, 0, 1) != 0) { 167 g_detach(cp); 168 g_destroy_consumer(cp); 169 return (NULL); 170 } 171 ZFS_LOG(1, "Created consumer for %s.", pp->name); 172 } else { 173 if (g_access(cp, 1, 0, 1) != 0) 174 return (NULL); 175 ZFS_LOG(1, "Used existing consumer for %s.", pp->name); 176 } 177 } 178 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; 179 return (cp); 180} 181 182static void 183vdev_geom_detach(void *arg, int flag __unused) 184{ 185 struct g_geom *gp; 186 struct g_consumer *cp; 187 188 g_topology_assert(); 189 cp = arg; 190 gp = cp->geom; 191 192 ZFS_LOG(1, "Closing access to %s.", cp->provider->name); 193 g_access(cp, -1, 0, -1); 194 /* Destroy consumer on last close. */ 195 if (cp->acr == 0 && cp->ace == 0) { 196 ZFS_LOG(1, "Destroyed consumer to %s.", cp->provider->name); 197 if (cp->acw > 0) 198 g_access(cp, 0, -cp->acw, 0); 199 g_detach(cp); 200 g_destroy_consumer(cp); 201 } 202 /* Destroy geom if there are no consumers left. */ 203 if (LIST_EMPTY(&gp->consumer)) { 204 ZFS_LOG(1, "Destroyed geom %s.", gp->name); 205 g_wither_geom(gp, ENXIO); 206 } 207} 208 209static void 210nvlist_get_guids(nvlist_t *list, uint64_t *pguid, uint64_t *vguid) 211{ 212 213 nvlist_lookup_uint64(list, ZPOOL_CONFIG_GUID, vguid); 214 nvlist_lookup_uint64(list, ZPOOL_CONFIG_POOL_GUID, pguid); 215} 216 217static int 218vdev_geom_io(struct g_consumer *cp, int cmd, void *data, off_t offset, off_t size) 219{ 220 struct bio *bp; 221 u_char *p; 222 off_t off, maxio; 223 int error; 224 225 ASSERT((offset % cp->provider->sectorsize) == 0); 226 ASSERT((size % cp->provider->sectorsize) == 0); 227 228 bp = g_alloc_bio(); 229 off = offset; 230 offset += size; 231 p = data; 232 maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize); 233 error = 0; 234 235 for (; off < offset; off += maxio, p += maxio, size -= maxio) { 236 bzero(bp, sizeof(*bp)); 237 bp->bio_cmd = cmd; 238 bp->bio_done = NULL; 239 bp->bio_offset = off; 240 bp->bio_length = MIN(size, maxio); 241 bp->bio_data = p; 242 g_io_request(bp, cp); 243 error = biowait(bp, "vdev_geom_io"); 244 if (error != 0) 245 break; 246 } 247 248 g_destroy_bio(bp); 249 return (error); 250} 251 252static void 253vdev_geom_taste_orphan(struct g_consumer *cp) 254{ 255 256 KASSERT(1 == 0, ("%s called while tasting %s.", __func__, 257 cp->provider->name)); 258} 259 260static int 261vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config) 262{ 263 struct g_provider *pp; 264 vdev_label_t *label; 265 char *p, *buf; 266 size_t buflen; 267 uint64_t psize; 268 off_t offset, size; 269 uint64_t state, txg; 270 int error, l, len; 271 272 g_topology_assert_not(); 273 274 pp = cp->provider; 275 ZFS_LOG(1, "Reading config from %s...", pp->name); 276 277 psize = pp->mediasize; 278 psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t)); 279 280 size = sizeof(*label) + pp->sectorsize - 281 ((sizeof(*label) - 1) % pp->sectorsize) - 1; 282 283 label = kmem_alloc(size, KM_SLEEP); 284 buflen = sizeof(label->vl_vdev_phys.vp_nvlist); 285 286 *config = NULL; 287 for (l = 0; l < VDEV_LABELS; l++) { 288 289 offset = vdev_label_offset(psize, l, 0); 290 if ((offset % pp->sectorsize) != 0) 291 continue; 292 293 if (vdev_geom_io(cp, BIO_READ, label, offset, size) != 0) 294 continue; 295 buf = label->vl_vdev_phys.vp_nvlist; 296 297 if (nvlist_unpack(buf, buflen, config, 0) != 0) 298 continue; 299 300 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE, 301 &state) != 0 || state > POOL_STATE_L2CACHE) { 302 nvlist_free(*config); 303 *config = NULL; 304 continue; 305 } 306 307 if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE && 308 (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG, 309 &txg) != 0 || txg == 0)) { 310 nvlist_free(*config); 311 *config = NULL; 312 continue; 313 } 314 315 break; 316 } 317 318 kmem_free(label, size); 319 return (*config == NULL ? ENOENT : 0); 320} 321 322static void 323resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id) 324{ 325 nvlist_t **new_configs; 326 uint64_t i; 327 328 if (id < *count) 329 return; 330 new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *), 331 KM_SLEEP); 332 for (i = 0; i < *count; i++) 333 new_configs[i] = (*configs)[i]; 334 if (*configs != NULL) 335 kmem_free(*configs, *count * sizeof(void *)); 336 *configs = new_configs; 337 *count = id + 1; 338} 339 340static void 341process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg, 342 const char *name, uint64_t* known_pool_guid) 343{ 344 nvlist_t *vdev_tree; 345 uint64_t pool_guid; 346 uint64_t vdev_guid, known_guid; 347 uint64_t id, txg, known_txg; 348 char *pname; 349 int i; 350 351 if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 || 352 strcmp(pname, name) != 0) 353 goto ignore; 354 355 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0) 356 goto ignore; 357 358 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0) 359 goto ignore; 360 361 if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) 362 goto ignore; 363 364 if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0) 365 goto ignore; 366 367 VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 368 369 if (*known_pool_guid != 0) { 370 if (pool_guid != *known_pool_guid) 371 goto ignore; 372 } else 373 *known_pool_guid = pool_guid; 374 375 resize_configs(configs, count, id); 376 377 if ((*configs)[id] != NULL) { 378 VERIFY(nvlist_lookup_uint64((*configs)[id], 379 ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0); 380 if (txg <= known_txg) 381 goto ignore; 382 nvlist_free((*configs)[id]); 383 } 384 385 (*configs)[id] = cfg; 386 return; 387 388ignore: 389 nvlist_free(cfg); 390} 391 392static int 393vdev_geom_attach_taster(struct g_consumer *cp, struct g_provider *pp) 394{ 395 int error; 396 397 if (pp->flags & G_PF_WITHER) 398 return (EINVAL); 399 g_attach(cp, pp); 400 error = g_access(cp, 1, 0, 0); 401 if (error == 0) { 402 if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize)) 403 error = EINVAL; 404 else if (pp->mediasize < SPA_MINDEVSIZE) 405 error = EINVAL; 406 if (error != 0) 407 g_access(cp, -1, 0, 0); 408 } 409 if (error != 0) 410 g_detach(cp); 411 return (error); 412} 413 414static void 415vdev_geom_detach_taster(struct g_consumer *cp) 416{ 417 g_access(cp, -1, 0, 0); 418 g_detach(cp); 419} 420 421int 422vdev_geom_read_pool_label(const char *name, 423 nvlist_t ***configs, uint64_t *count) 424{ 425 struct g_class *mp; 426 struct g_geom *gp, *zgp; 427 struct g_provider *pp; 428 struct g_consumer *zcp; 429 nvlist_t *vdev_cfg; 430 uint64_t pool_guid; 431 int error; 432 433 DROP_GIANT(); 434 g_topology_lock(); 435 436 zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste"); 437 /* This orphan function should be never called. */ 438 zgp->orphan = vdev_geom_taste_orphan; 439 zcp = g_new_consumer(zgp); 440 441 *configs = NULL; 442 *count = 0; 443 pool_guid = 0; 444 LIST_FOREACH(mp, &g_classes, class) { 445 if (mp == &zfs_vdev_class) 446 continue; 447 LIST_FOREACH(gp, &mp->geom, geom) { 448 if (gp->flags & G_GEOM_WITHER) 449 continue; 450 LIST_FOREACH(pp, &gp->provider, provider) { 451 if (pp->flags & G_PF_WITHER) 452 continue; 453 if (vdev_geom_attach_taster(zcp, pp) != 0) 454 continue; 455 g_topology_unlock(); 456 error = vdev_geom_read_config(zcp, &vdev_cfg); 457 g_topology_lock(); 458 vdev_geom_detach_taster(zcp); 459 if (error) 460 continue; 461 ZFS_LOG(1, "successfully read vdev config"); 462 463 process_vdev_config(configs, count, 464 vdev_cfg, name, &pool_guid); 465 } 466 } 467 } 468 469 g_destroy_consumer(zcp); 470 g_destroy_geom(zgp); 471 g_topology_unlock(); 472 PICKUP_GIANT(); 473 474 return (*count > 0 ? 0 : ENOENT); 475} 476 477static void 478vdev_geom_read_guids(struct g_consumer *cp, uint64_t *pguid, uint64_t *vguid) 479{ 480 nvlist_t *config; 481 482 g_topology_assert_not(); 483 484 *pguid = 0; 485 *vguid = 0; 486 if (vdev_geom_read_config(cp, &config) == 0) { 487 nvlist_get_guids(config, pguid, vguid); 488 nvlist_free(config); 489 } 490} 491 492static struct g_consumer * 493vdev_geom_attach_by_guids(uint64_t pool_guid, uint64_t vdev_guid) 494{ 495 struct g_class *mp; 496 struct g_geom *gp, *zgp; 497 struct g_provider *pp; 498 struct g_consumer *cp, *zcp; 499 uint64_t pguid, vguid; 500 501 g_topology_assert(); 502 503 zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste"); 504 /* This orphan function should be never called. */ 505 zgp->orphan = vdev_geom_taste_orphan; 506 zcp = g_new_consumer(zgp); 507 508 cp = NULL; 509 LIST_FOREACH(mp, &g_classes, class) { 510 if (mp == &zfs_vdev_class) 511 continue; 512 LIST_FOREACH(gp, &mp->geom, geom) { 513 if (gp->flags & G_GEOM_WITHER) 514 continue; 515 LIST_FOREACH(pp, &gp->provider, provider) { 516 if (vdev_geom_attach_taster(zcp, pp) != 0) 517 continue; 518 g_topology_unlock(); 519 vdev_geom_read_guids(zcp, &pguid, &vguid); 520 g_topology_lock(); 521 vdev_geom_detach_taster(zcp); 522 if (pguid != pool_guid || vguid != vdev_guid) 523 continue; 524 cp = vdev_geom_attach(pp); 525 if (cp == NULL) { 526 printf("ZFS WARNING: Unable to " 527 "attach to %s.\n", pp->name); 528 continue; 529 } 530 break; 531 } 532 if (cp != NULL) 533 break; 534 } 535 if (cp != NULL) 536 break; 537 } 538end: 539 g_destroy_consumer(zcp); 540 g_destroy_geom(zgp); 541 return (cp); 542} 543 544static struct g_consumer * 545vdev_geom_open_by_guids(vdev_t *vd) 546{ 547 struct g_consumer *cp; 548 char *buf; 549 size_t len; 550 551 g_topology_assert(); 552 553 ZFS_LOG(1, "Searching by guid [%ju].", (uintmax_t)vd->vdev_guid); 554 cp = vdev_geom_attach_by_guids(spa_guid(vd->vdev_spa), vd->vdev_guid); 555 if (cp != NULL) { 556 len = strlen(cp->provider->name) + strlen("/dev/") + 1; 557 buf = kmem_alloc(len, KM_SLEEP); 558 559 snprintf(buf, len, "/dev/%s", cp->provider->name); 560 spa_strfree(vd->vdev_path); 561 vd->vdev_path = buf; 562 563 ZFS_LOG(1, "Attach by guid [%ju:%ju] succeeded, provider %s.", 564 (uintmax_t)spa_guid(vd->vdev_spa), 565 (uintmax_t)vd->vdev_guid, vd->vdev_path); 566 } else { 567 ZFS_LOG(1, "Search by guid [%ju:%ju] failed.", 568 (uintmax_t)spa_guid(vd->vdev_spa), 569 (uintmax_t)vd->vdev_guid); 570 } 571 572 return (cp); 573} 574 575static struct g_consumer * 576vdev_geom_open_by_path(vdev_t *vd, int check_guid) 577{ 578 struct g_provider *pp; 579 struct g_consumer *cp; 580 uint64_t pguid, vguid; 581 582 g_topology_assert(); 583 584 cp = NULL; 585 pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1); 586 if (pp != NULL) { 587 ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path); 588 cp = vdev_geom_attach(pp); 589 if (cp != NULL && check_guid && ISP2(pp->sectorsize) && 590 pp->sectorsize <= VDEV_PAD_SIZE) { 591 g_topology_unlock(); 592 vdev_geom_read_guids(cp, &pguid, &vguid); 593 g_topology_lock(); 594 if (pguid != spa_guid(vd->vdev_spa) || 595 vguid != vd->vdev_guid) { 596 vdev_geom_detach(cp, 0); 597 cp = NULL; 598 ZFS_LOG(1, "guid mismatch for provider %s: " 599 "%ju:%ju != %ju:%ju.", vd->vdev_path, 600 (uintmax_t)spa_guid(vd->vdev_spa), 601 (uintmax_t)vd->vdev_guid, 602 (uintmax_t)pguid, (uintmax_t)vguid); 603 } else { 604 ZFS_LOG(1, "guid match for provider %s.", 605 vd->vdev_path); 606 } 607 } 608 } 609 610 return (cp); 611} 612 613static int 614vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, 615 uint64_t *logical_ashift, uint64_t *physical_ashift) 616{ 617 struct g_provider *pp; 618 struct g_consumer *cp; 619 size_t bufsize; 620 int error; 621 622 /* 623 * We must have a pathname, and it must be absolute. 624 */ 625 if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { 626 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 627 return (EINVAL); 628 } 629 630 vd->vdev_tsd = NULL; 631 632 DROP_GIANT(); 633 g_topology_lock(); 634 error = 0; 635 636 if (vd->vdev_spa->spa_splitting_newspa || 637 (vd->vdev_prevstate == VDEV_STATE_UNKNOWN && 638 vd->vdev_spa->spa_load_state == SPA_LOAD_NONE)) { 639 /* 640 * We are dealing with a vdev that hasn't been previously 641 * opened (since boot), and we are not loading an 642 * existing pool configuration. This looks like a 643 * vdev add operation to a new or existing pool. 644 * Assume the user knows what he/she is doing and find 645 * GEOM provider by its name, ignoring GUID mismatches. 646 * 647 * XXPOLICY: It would be safer to only allow a device 648 * that is unlabeled or labeled but missing 649 * GUID information to be opened in this fashion, 650 * unless we are doing a split, in which case we 651 * should allow any guid. 652 */ 653 cp = vdev_geom_open_by_path(vd, 0); 654 } else { 655 /* 656 * Try using the recorded path for this device, but only 657 * accept it if its label data contains the expected GUIDs. 658 */ 659 cp = vdev_geom_open_by_path(vd, 1); 660 if (cp == NULL) { 661 /* 662 * The device at vd->vdev_path doesn't have the 663 * expected GUIDs. The disks might have merely 664 * moved around so try all other GEOM providers 665 * to find one with the right GUIDs. 666 */ 667 cp = vdev_geom_open_by_guids(vd); 668 } 669 } 670 671 if (cp == NULL) { 672 ZFS_LOG(1, "Provider %s not found.", vd->vdev_path); 673 error = ENOENT; 674 } else if (cp->provider->sectorsize > VDEV_PAD_SIZE || 675 !ISP2(cp->provider->sectorsize)) { 676 ZFS_LOG(1, "Provider %s has unsupported sectorsize.", 677 vd->vdev_path); 678 vdev_geom_detach(cp, 0); 679 error = EINVAL; 680 cp = NULL; 681 } else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) { 682 int i; 683 684 for (i = 0; i < 5; i++) { 685 error = g_access(cp, 0, 1, 0); 686 if (error == 0) 687 break; 688 g_topology_unlock(); 689 tsleep(vd, 0, "vdev", hz / 2); 690 g_topology_lock(); 691 } 692 if (error != 0) { 693 printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n", 694 vd->vdev_path, error); 695 vdev_geom_detach(cp, 0); 696 cp = NULL; 697 } 698 } 699 g_topology_unlock(); 700 PICKUP_GIANT(); 701 if (cp == NULL) { 702 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 703 return (error); 704 } 705 706 cp->private = vd; 707 vd->vdev_tsd = cp; 708 pp = cp->provider; 709 710 /* 711 * Determine the actual size of the device. 712 */ 713 *max_psize = *psize = pp->mediasize; 714 715 /* 716 * Determine the device's minimum transfer size and preferred 717 * transfer size. 718 */ 719 *logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1; 720 *physical_ashift = 0; 721 if (pp->stripesize) 722 *physical_ashift = highbit(pp->stripesize) - 1; 723 724 /* 725 * Clear the nowritecache settings, so that on a vdev_reopen() 726 * we will try again. 727 */ 728 vd->vdev_nowritecache = B_FALSE; 729 730 if (vd->vdev_physpath != NULL) 731 spa_strfree(vd->vdev_physpath); 732 bufsize = sizeof("/dev/") + strlen(pp->name); 733 vd->vdev_physpath = kmem_alloc(bufsize, KM_SLEEP); 734 snprintf(vd->vdev_physpath, bufsize, "/dev/%s", pp->name); 735 736 /* 737 * Determine the device's rotation rate. 738 */ 739 vdev_geom_set_rotation_rate(vd, cp); 740 741 return (0); 742} 743 744static void 745vdev_geom_close(vdev_t *vd) 746{ 747 struct g_consumer *cp; 748 749 cp = vd->vdev_tsd; 750 if (cp == NULL) 751 return; 752 vd->vdev_tsd = NULL; 753 vd->vdev_delayed_close = B_FALSE; 754 cp->private = NULL; /* XXX locking */ 755 g_post_event(vdev_geom_detach, cp, M_WAITOK, NULL); 756} 757 758static void 759vdev_geom_io_intr(struct bio *bp) 760{ 761 vdev_t *vd; 762 zio_t *zio; 763 764 zio = bp->bio_caller1; 765 vd = zio->io_vd; 766 zio->io_error = bp->bio_error; 767 if (zio->io_error == 0 && bp->bio_resid != 0) 768 zio->io_error = SET_ERROR(EIO); 769 770 switch(zio->io_error) { 771 case ENOTSUP: 772 /* 773 * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know 774 * that future attempts will never succeed. In this case 775 * we set a persistent flag so that we don't bother with 776 * requests in the future. 777 */ 778 switch(bp->bio_cmd) { 779 case BIO_FLUSH: 780 vd->vdev_nowritecache = B_TRUE; 781 break; 782 case BIO_DELETE: 783 vd->vdev_notrim = B_TRUE; 784 break; 785 } 786 break; 787 case ENXIO: 788 if (!vd->vdev_remove_wanted) { 789 /* 790 * If provider's error is set we assume it is being 791 * removed. 792 */ 793 if (bp->bio_to->error != 0) { 794 vd->vdev_remove_wanted = B_TRUE; 795 spa_async_request(zio->io_spa, 796 SPA_ASYNC_REMOVE); 797 } else if (!vd->vdev_delayed_close) { 798 vd->vdev_delayed_close = B_TRUE; 799 } 800 } 801 break; 802 } 803 g_destroy_bio(bp); 804 zio_interrupt(zio); 805} 806 807static void 808vdev_geom_io_start(zio_t *zio) 809{ 810 vdev_t *vd; 811 struct g_consumer *cp; 812 struct bio *bp; 813 int error; 814 815 vd = zio->io_vd; 816 817 switch (zio->io_type) { 818 case ZIO_TYPE_IOCTL: 819 /* XXPOLICY */ 820 if (!vdev_readable(vd)) { 821 zio->io_error = SET_ERROR(ENXIO); 822 zio_interrupt(zio); 823 return; 824 } else { 825 switch (zio->io_cmd) { 826 case DKIOCFLUSHWRITECACHE: 827 if (zfs_nocacheflush || vdev_geom_bio_flush_disable) 828 break; 829 if (vd->vdev_nowritecache) { 830 zio->io_error = SET_ERROR(ENOTSUP); 831 break; 832 } 833 goto sendreq; 834 default: 835 zio->io_error = SET_ERROR(ENOTSUP); 836 } 837 } 838 839 zio_execute(zio); 840 return; 841 case ZIO_TYPE_FREE: 842 if (vd->vdev_notrim) { 843 zio->io_error = SET_ERROR(ENOTSUP); 844 } else if (!vdev_geom_bio_delete_disable) { 845 goto sendreq; 846 } 847 zio_execute(zio); 848 return; 849 } 850sendreq: 851 ASSERT(zio->io_type == ZIO_TYPE_READ || 852 zio->io_type == ZIO_TYPE_WRITE || 853 zio->io_type == ZIO_TYPE_FREE || 854 zio->io_type == ZIO_TYPE_IOCTL); 855 856 cp = vd->vdev_tsd; 857 if (cp == NULL) { 858 zio->io_error = SET_ERROR(ENXIO); 859 zio_interrupt(zio); 860 return; 861 } 862 bp = g_alloc_bio(); 863 bp->bio_caller1 = zio; 864 switch (zio->io_type) { 865 case ZIO_TYPE_READ: 866 case ZIO_TYPE_WRITE: 867 bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE; 868 bp->bio_data = zio->io_data; 869 bp->bio_offset = zio->io_offset; 870 bp->bio_length = zio->io_size; 871 break; 872 case ZIO_TYPE_FREE: 873 bp->bio_cmd = BIO_DELETE; 874 bp->bio_data = NULL; 875 bp->bio_offset = zio->io_offset; 876 bp->bio_length = zio->io_size; 877 break; 878 case ZIO_TYPE_IOCTL: 879 bp->bio_cmd = BIO_FLUSH; 880 bp->bio_flags |= BIO_ORDERED; 881 bp->bio_data = NULL; 882 bp->bio_offset = cp->provider->mediasize; 883 bp->bio_length = 0; 884 break; 885 } 886 bp->bio_done = vdev_geom_io_intr; 887 888 g_io_request(bp, cp); 889} 890 891static void 892vdev_geom_io_done(zio_t *zio) 893{ 894} 895 896static void 897vdev_geom_hold(vdev_t *vd) 898{ 899} 900 901static void 902vdev_geom_rele(vdev_t *vd) 903{ 904} 905 906vdev_ops_t vdev_geom_ops = { 907 vdev_geom_open, 908 vdev_geom_close, 909 vdev_default_asize, 910 vdev_geom_io_start, 911 vdev_geom_io_done, 912 NULL, 913 vdev_geom_hold, 914 vdev_geom_rele, 915 VDEV_TYPE_DISK, /* name of this vdev type */ 916 B_TRUE /* leaf vdev */ 917}; 918