1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> 23 * All rights reserved. 24 * 25 * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org> 26 */ 27 28#include <sys/zfs_context.h> 29#include <sys/param.h> 30#include <sys/kernel.h> 31#include <sys/bio.h> 32#include <sys/disk.h> 33#include <sys/spa.h> 34#include <sys/spa_impl.h> 35#include <sys/vdev_impl.h> 36#include <sys/fs/zfs.h> 37#include <sys/zio.h> 38#include <geom/geom.h> 39#include <geom/geom_int.h> 40 41/* 42 * Virtual device vector for GEOM. 43 */ 44 45static g_attrchanged_t vdev_geom_attrchanged; 46struct g_class zfs_vdev_class = { 47 .name = "ZFS::VDEV", 48 .version = G_VERSION, 49 .attrchanged = vdev_geom_attrchanged, 50}; 51 52DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev); 53 54SYSCTL_DECL(_vfs_zfs_vdev); 55/* Don't send BIO_FLUSH. */ 56static int vdev_geom_bio_flush_disable; 57SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RWTUN, 58 &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH"); 59/* Don't send BIO_DELETE. */ 60static int vdev_geom_bio_delete_disable; 61SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RWTUN, 62 &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE"); 63 64/* Declare local functions */ 65static void vdev_geom_detach(struct g_consumer *cp, boolean_t open_for_read); 66 67/* 68 * Thread local storage used to indicate when a thread is probing geoms 69 * for their guids. If NULL, this thread is not tasting geoms. If non NULL, 70 * it is looking for a replacement for the vdev_t* that is its value. 71 */ 72uint_t zfs_geom_probe_vdev_key; 73 74static void 75vdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp) 76{ 77 int error; 78 uint16_t rate; 79 80 error = g_getattr("GEOM::rotation_rate", cp, &rate); 81 if (error == 0) 82 vd->vdev_rotation_rate = rate; 83 else 84 vd->vdev_rotation_rate = VDEV_RATE_UNKNOWN; 85} 86 87static void 88vdev_geom_set_physpath(struct g_consumer *cp, boolean_t do_null_update) 89{ 90 boolean_t needs_update = B_FALSE; 91 vdev_t *vd; 92 char *physpath; 93 int error, physpath_len; 94 95 if (g_access(cp, 1, 0, 0) != 0) 96 return; 97 98 vd = cp->private; 99 physpath_len = MAXPATHLEN; 100 physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO); 101 error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath); 102 g_access(cp, -1, 0, 0); 103 if (error == 0) { 104 char *old_physpath; 105 106 /* g_topology lock ensures that vdev has not been closed */ 107 g_topology_assert(); 108 old_physpath = vd->vdev_physpath; 109 vd->vdev_physpath = spa_strdup(physpath); 110 111 if (old_physpath != NULL) { 112 needs_update = (strcmp(old_physpath, 113 vd->vdev_physpath) != 0); 114 spa_strfree(old_physpath); 115 } else 116 needs_update = do_null_update; 117 } 118 g_free(physpath); 119 120 /* 121 * If the physical path changed, update the config. 122 * Only request an update for previously unset physpaths if 123 * requested by the caller. 124 */ 125 if (needs_update) 126 spa_async_request(vd->vdev_spa, SPA_ASYNC_CONFIG_UPDATE); 127 128} 129 130static void 131vdev_geom_attrchanged(struct g_consumer *cp, const char *attr) 132{ 133 vdev_t *vd; 134 char *old_physpath; 135 int error; 136 137 vd = cp->private; 138 if (vd == NULL) 139 return; 140 141 if (strcmp(attr, "GEOM::rotation_rate") == 0) { 142 vdev_geom_set_rotation_rate(vd, cp); 143 return; 144 } 145 146 if (strcmp(attr, "GEOM::physpath") == 0) { 147 vdev_geom_set_physpath(cp, /*do_null_update*/B_TRUE); 148 return; 149 } 150} 151 152static void 153vdev_geom_orphan(struct g_consumer *cp) 154{ 155 vdev_t *vd; 156 157 g_topology_assert(); 158 159 vd = cp->private; 160 if (vd == NULL) { 161 /* Vdev close in progress. Ignore the event. */ 162 return; 163 } 164 165 /* 166 * Orphan callbacks occur from the GEOM event thread. 167 * Concurrent with this call, new I/O requests may be 168 * working their way through GEOM about to find out 169 * (only once executed by the g_down thread) that we've 170 * been orphaned from our disk provider. These I/Os 171 * must be retired before we can detach our consumer. 172 * This is most easily achieved by acquiring the 173 * SPA ZIO configuration lock as a writer, but doing 174 * so with the GEOM topology lock held would cause 175 * a lock order reversal. Instead, rely on the SPA's 176 * async removal support to invoke a close on this 177 * vdev once it is safe to do so. 178 */ 179 vd->vdev_remove_wanted = B_TRUE; 180 spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE); 181} 182 183static struct g_consumer * 184vdev_geom_attach(struct g_provider *pp, vdev_t *vd) 185{ 186 struct g_geom *gp; 187 struct g_consumer *cp; 188 int error; 189 190 g_topology_assert(); 191 192 ZFS_LOG(1, "Attaching to %s.", pp->name); 193 194 if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize)) { 195 ZFS_LOG(1, "Failing attach of %s. Incompatible sectorsize %d\n", 196 pp->name, pp->sectorsize); 197 return (NULL); 198 } else if (pp->mediasize < SPA_MINDEVSIZE) { 199 ZFS_LOG(1, "Failing attach of %s. Incompatible mediasize %ju\n", 200 pp->name, pp->mediasize); 201 return (NULL); 202 } 203 204 /* Do we have geom already? No? Create one. */ 205 LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) { 206 if (gp->flags & G_GEOM_WITHER) 207 continue; 208 if (strcmp(gp->name, "zfs::vdev") != 0) 209 continue; 210 break; 211 } 212 if (gp == NULL) { 213 gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev"); 214 gp->orphan = vdev_geom_orphan; 215 gp->attrchanged = vdev_geom_attrchanged; 216 cp = g_new_consumer(gp); 217 error = g_attach(cp, pp); 218 if (error != 0) { 219 ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", __func__, 220 __LINE__, error); 221 vdev_geom_detach(cp, B_FALSE); 222 return (NULL); 223 } 224 error = g_access(cp, 1, 0, 1); 225 if (error != 0) { 226 ZFS_LOG(1, "%s(%d): g_access failed: %d", __func__, 227 __LINE__, error); 228 vdev_geom_detach(cp, B_FALSE); 229 return (NULL); 230 } 231 ZFS_LOG(1, "Created geom and consumer for %s.", pp->name); 232 } else { 233 /* Check if we are already connected to this provider. */ 234 LIST_FOREACH(cp, &gp->consumer, consumer) { 235 if (cp->provider == pp) { 236 ZFS_LOG(1, "Found consumer for %s.", pp->name); 237 break; 238 } 239 } 240 if (cp == NULL) { 241 cp = g_new_consumer(gp); 242 error = g_attach(cp, pp); 243 if (error != 0) { 244 ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", 245 __func__, __LINE__, error); 246 vdev_geom_detach(cp, B_FALSE); 247 return (NULL); 248 } 249 error = g_access(cp, 1, 0, 1); 250 if (error != 0) { 251 ZFS_LOG(1, "%s(%d): g_access failed: %d\n", 252 __func__, __LINE__, error); 253 vdev_geom_detach(cp, B_FALSE); 254 return (NULL); 255 } 256 ZFS_LOG(1, "Created consumer for %s.", pp->name); 257 } else { 258 error = g_access(cp, 1, 0, 1); 259 if (error != 0) { 260 ZFS_LOG(1, "%s(%d): g_access failed: %d\n", 261 __func__, __LINE__, error); 262 return (NULL); 263 } 264 ZFS_LOG(1, "Used existing consumer for %s.", pp->name); 265 } 266 } 267 268 /* 269 * BUG: cp may already belong to a vdev. This could happen if: 270 * 1) That vdev is a shared spare, or 271 * 2) We are trying to reopen a missing vdev and we are scanning by 272 * guid. In that case, we'll ultimately fail to open this consumer, 273 * but not until after setting the private field. 274 * The solution is to: 275 * 1) Don't set the private field until after the open succeeds, and 276 * 2) Set it to a linked list of vdevs, not just a single vdev 277 */ 278 cp->private = vd; 279 if (vd != NULL) { 280 vd->vdev_tsd = cp; 281 vdev_geom_set_physpath(cp, /*do_null_update*/B_FALSE); 282 } 283 284 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; 285 return (cp); 286} 287 288static void 289vdev_geom_detach(struct g_consumer *cp, boolean_t open_for_read) 290{ 291 struct g_geom *gp; 292 vdev_t *vd; 293 294 g_topology_assert(); 295 296 ZFS_LOG(1, "Detaching from %s.", 297 cp->provider && cp->provider->name ? cp->provider->name : "NULL"); 298 299 vd = cp->private; 300 cp->private = NULL; 301 302 gp = cp->geom; 303 if (open_for_read) 304 g_access(cp, -1, 0, -1); 305 /* Destroy consumer on last close. */ 306 if (cp->acr == 0 && cp->ace == 0) { 307 if (cp->acw > 0) 308 g_access(cp, 0, -cp->acw, 0); 309 if (cp->provider != NULL) { 310 ZFS_LOG(1, "Destroying consumer for %s.", 311 cp->provider->name ? cp->provider->name : "NULL"); 312 g_detach(cp); 313 } 314 g_destroy_consumer(cp); 315 } 316 /* Destroy geom if there are no consumers left. */ 317 if (LIST_EMPTY(&gp->consumer)) { 318 ZFS_LOG(1, "Destroyed geom %s.", gp->name); 319 g_wither_geom(gp, ENXIO); 320 } 321} 322 323static void 324vdev_geom_close_locked(vdev_t *vd) 325{ 326 struct g_consumer *cp; 327 328 g_topology_assert(); 329 330 cp = vd->vdev_tsd; 331 vd->vdev_tsd = NULL; 332 vd->vdev_delayed_close = B_FALSE; 333 if (cp == NULL) 334 return; 335 336 ZFS_LOG(1, "Closing access to %s.", cp->provider->name); 337 338 vdev_geom_detach(cp, B_TRUE); 339} 340 341/* 342 * Issue one or more bios to the vdev in parallel 343 * cmds, datas, offsets, errors, and sizes are arrays of length ncmds. Each IO 344 * operation is described by parallel entries from each array. There may be 345 * more bios actually issued than entries in the array 346 */ 347static void 348vdev_geom_io(struct g_consumer *cp, int *cmds, void **datas, off_t *offsets, 349 off_t *sizes, int *errors, int ncmds) 350{ 351 struct bio **bios; 352 u_char *p; 353 off_t off, maxio, s, end; 354 int i, n_bios, j; 355 size_t bios_size; 356 357 maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize); 358 n_bios = 0; 359 360 /* How many bios are required for all commands ? */ 361 for (i = 0; i < ncmds; i++) 362 n_bios += (sizes[i] + maxio - 1) / maxio; 363 364 /* Allocate memory for the bios */ 365 bios_size = n_bios * sizeof(struct bio*); 366 bios = kmem_zalloc(bios_size, KM_SLEEP); 367 368 /* Prepare and issue all of the bios */ 369 for (i = j = 0; i < ncmds; i++) { 370 off = offsets[i]; 371 p = datas[i]; 372 s = sizes[i]; 373 end = off + s; 374 ASSERT((off % cp->provider->sectorsize) == 0); 375 ASSERT((s % cp->provider->sectorsize) == 0); 376 377 for (; off < end; off += maxio, p += maxio, s -= maxio, j++) { 378 bios[j] = g_alloc_bio(); 379 bios[j]->bio_cmd = cmds[i]; 380 bios[j]->bio_done = NULL; 381 bios[j]->bio_offset = off; 382 bios[j]->bio_length = MIN(s, maxio); 383 bios[j]->bio_data = p; 384 g_io_request(bios[j], cp); 385 } 386 } 387 ASSERT(j == n_bios); 388 389 /* Wait for all of the bios to complete, and clean them up */ 390 for (i = j = 0; i < ncmds; i++) { 391 off = offsets[i]; 392 s = sizes[i]; 393 end = off + s; 394 395 for (; off < end; off += maxio, s -= maxio, j++) { 396 errors[i] = biowait(bios[j], "vdev_geom_io") || errors[i]; 397 g_destroy_bio(bios[j]); 398 } 399 } 400 kmem_free(bios, bios_size); 401} 402 403static int 404vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config) 405{ 406 struct g_provider *pp; 407 vdev_phys_t *vdev_lists[VDEV_LABELS]; 408 char *p, *buf; 409 size_t buflen; 410 uint64_t psize, state, txg; 411 off_t offsets[VDEV_LABELS]; 412 off_t size; 413 off_t sizes[VDEV_LABELS]; 414 int cmds[VDEV_LABELS]; 415 int errors[VDEV_LABELS]; 416 int l, len; 417 418 g_topology_assert_not(); 419 420 pp = cp->provider; 421 ZFS_LOG(1, "Reading config from %s...", pp->name); 422 423 psize = pp->mediasize; 424 psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t)); 425 426 size = sizeof(*vdev_lists[0]) + pp->sectorsize - 427 ((sizeof(*vdev_lists[0]) - 1) % pp->sectorsize) - 1; 428 429 buflen = sizeof(vdev_lists[0]->vp_nvlist); 430 431 *config = NULL; 432 /* Create all of the IO requests */ 433 for (l = 0; l < VDEV_LABELS; l++) { 434 cmds[l] = BIO_READ; 435 vdev_lists[l] = kmem_alloc(size, KM_SLEEP); 436 offsets[l] = vdev_label_offset(psize, l, 0) + VDEV_SKIP_SIZE; 437 sizes[l] = size; 438 errors[l] = 0; 439 ASSERT(offsets[l] % pp->sectorsize == 0); 440 } 441 442 /* Issue the IO requests */ 443 vdev_geom_io(cp, cmds, (void**)vdev_lists, offsets, sizes, errors, 444 VDEV_LABELS); 445 446 /* Parse the labels */ 447 for (l = 0; l < VDEV_LABELS; l++) { 448 if (errors[l] != 0) 449 continue; 450 451 buf = vdev_lists[l]->vp_nvlist; 452 453 if (nvlist_unpack(buf, buflen, config, 0) != 0) 454 continue; 455 456 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE, 457 &state) != 0 || state > POOL_STATE_L2CACHE) { 458 nvlist_free(*config); 459 *config = NULL; 460 continue; 461 } 462 463 if (state != POOL_STATE_SPARE && 464 state != POOL_STATE_L2CACHE && 465 (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG, 466 &txg) != 0 || txg == 0)) { 467 nvlist_free(*config); 468 *config = NULL; 469 continue; 470 } 471 472 break; 473 } 474 475 /* Free the label storage */ 476 for (l = 0; l < VDEV_LABELS; l++) 477 kmem_free(vdev_lists[l], size); 478 479 return (*config == NULL ? ENOENT : 0); 480} 481 482static void 483resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id) 484{ 485 nvlist_t **new_configs; 486 uint64_t i; 487 488 if (id < *count) 489 return; 490 new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *), 491 KM_SLEEP); 492 for (i = 0; i < *count; i++) 493 new_configs[i] = (*configs)[i]; 494 if (*configs != NULL) 495 kmem_free(*configs, *count * sizeof(void *)); 496 *configs = new_configs; 497 *count = id + 1; 498} 499 500static void 501process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg, 502 const char *name, uint64_t* known_pool_guid) 503{ 504 nvlist_t *vdev_tree; 505 uint64_t pool_guid; 506 uint64_t vdev_guid, known_guid; 507 uint64_t id, txg, known_txg; 508 char *pname; 509 int i; 510 511 if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 || 512 strcmp(pname, name) != 0) 513 goto ignore; 514 515 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0) 516 goto ignore; 517 518 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0) 519 goto ignore; 520 521 if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) 522 goto ignore; 523 524 if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0) 525 goto ignore; 526 527 VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 528 529 if (*known_pool_guid != 0) { 530 if (pool_guid != *known_pool_guid) 531 goto ignore; 532 } else 533 *known_pool_guid = pool_guid; 534 535 resize_configs(configs, count, id); 536 537 if ((*configs)[id] != NULL) { 538 VERIFY(nvlist_lookup_uint64((*configs)[id], 539 ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0); 540 if (txg <= known_txg) 541 goto ignore; 542 nvlist_free((*configs)[id]); 543 } 544 545 (*configs)[id] = cfg; 546 return; 547 548ignore: 549 nvlist_free(cfg); 550} 551 552int 553vdev_geom_read_pool_label(const char *name, 554 nvlist_t ***configs, uint64_t *count) 555{ 556 struct g_class *mp; 557 struct g_geom *gp; 558 struct g_provider *pp; 559 struct g_consumer *zcp; 560 nvlist_t *vdev_cfg; 561 uint64_t pool_guid; 562 int error; 563 564 DROP_GIANT(); 565 g_topology_lock(); 566 567 *configs = NULL; 568 *count = 0; 569 pool_guid = 0; 570 LIST_FOREACH(mp, &g_classes, class) { 571 if (mp == &zfs_vdev_class) 572 continue; 573 LIST_FOREACH(gp, &mp->geom, geom) { 574 if (gp->flags & G_GEOM_WITHER) 575 continue; 576 LIST_FOREACH(pp, &gp->provider, provider) { 577 if (pp->flags & G_PF_WITHER) 578 continue; 579 zcp = vdev_geom_attach(pp, NULL); 580 if (zcp == NULL) 581 continue; 582 g_topology_unlock(); 583 error = vdev_geom_read_config(zcp, &vdev_cfg); 584 g_topology_lock(); 585 vdev_geom_detach(zcp, B_TRUE); 586 if (error) 587 continue; 588 ZFS_LOG(1, "successfully read vdev config"); 589 590 process_vdev_config(configs, count, 591 vdev_cfg, name, &pool_guid); 592 } 593 } 594 } 595 g_topology_unlock(); 596 PICKUP_GIANT(); 597 598 return (*count > 0 ? 0 : ENOENT); 599} 600 601enum match { 602 NO_MATCH, 603 TOP_MATCH, 604 FULL_MATCH 605}; 606 607static enum match 608vdev_attach_ok(vdev_t *vd, struct g_provider *pp) 609{ 610 nvlist_t *config; 611 uint64_t pool_guid, top_guid, vdev_guid; 612 struct g_consumer *cp; 613 614 cp = vdev_geom_attach(pp, NULL); 615 if (cp == NULL) { 616 ZFS_LOG(1, "Unable to attach tasting instance to %s.", 617 pp->name); 618 return (NO_MATCH); 619 } 620 g_topology_unlock(); 621 if (vdev_geom_read_config(cp, &config) != 0) { 622 g_topology_lock(); 623 vdev_geom_detach(cp, B_TRUE); 624 ZFS_LOG(1, "Unable to read config from %s.", pp->name); 625 return (NO_MATCH); 626 } 627 g_topology_lock(); 628 vdev_geom_detach(cp, B_TRUE); 629 630 pool_guid = 0; 631 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid); 632 top_guid = 0; 633 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID, &top_guid); 634 vdev_guid = 0; 635 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid); 636 nvlist_free(config); 637 638 /* 639 * Check that the label's pool guid matches the desired guid. 640 * Inactive spares and L2ARCs do not have any pool guid in the label. 641 */ 642 if (pool_guid != 0 && pool_guid != spa_guid(vd->vdev_spa)) { 643 ZFS_LOG(1, "pool guid mismatch for provider %s: %ju != %ju.", 644 pp->name, 645 (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)pool_guid); 646 return (NO_MATCH); 647 } 648 649 /* 650 * Check that the label's vdev guid matches the desired guid. 651 * The second condition handles possible race on vdev detach, when 652 * remaining vdev receives GUID of destroyed top level mirror vdev. 653 */ 654 if (vdev_guid == vd->vdev_guid) { 655 ZFS_LOG(1, "guids match for provider %s.", pp->name); 656 return (FULL_MATCH); 657 } else if (top_guid == vd->vdev_guid && vd == vd->vdev_top) { 658 ZFS_LOG(1, "top vdev guid match for provider %s.", pp->name); 659 return (TOP_MATCH); 660 } 661 ZFS_LOG(1, "vdev guid mismatch for provider %s: %ju != %ju.", 662 pp->name, (uintmax_t)vd->vdev_guid, (uintmax_t)vdev_guid); 663 return (NO_MATCH); 664} 665 666static struct g_consumer * 667vdev_geom_attach_by_guids(vdev_t *vd) 668{ 669 struct g_class *mp; 670 struct g_geom *gp; 671 struct g_provider *pp; 672 struct g_consumer *cp; 673 enum match m; 674 675 g_topology_assert(); 676 677 cp = NULL; 678 LIST_FOREACH(mp, &g_classes, class) { 679 if (mp == &zfs_vdev_class) 680 continue; 681 LIST_FOREACH(gp, &mp->geom, geom) { 682 if (gp->flags & G_GEOM_WITHER) 683 continue; 684 LIST_FOREACH(pp, &gp->provider, provider) { 685 m = vdev_attach_ok(vd, pp); 686 if (m == NO_MATCH) 687 continue; 688 if (cp != NULL) { 689 if (m == FULL_MATCH) 690 vdev_geom_detach(cp, B_TRUE); 691 else 692 continue; 693 } 694 cp = vdev_geom_attach(pp, vd); 695 if (cp == NULL) { 696 printf("ZFS WARNING: Unable to " 697 "attach to %s.\n", pp->name); 698 continue; 699 } 700 if (m == FULL_MATCH) 701 return (cp); 702 } 703 } 704 } 705 return (cp); 706} 707 708static struct g_consumer * 709vdev_geom_open_by_guids(vdev_t *vd) 710{ 711 struct g_consumer *cp; 712 char *buf; 713 size_t len; 714 715 g_topology_assert(); 716 717 ZFS_LOG(1, "Searching by guids [%ju:%ju].", 718 (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)vd->vdev_guid); 719 cp = vdev_geom_attach_by_guids(vd); 720 if (cp != NULL) { 721 len = strlen(cp->provider->name) + strlen("/dev/") + 1; 722 buf = kmem_alloc(len, KM_SLEEP); 723 724 snprintf(buf, len, "/dev/%s", cp->provider->name); 725 spa_strfree(vd->vdev_path); 726 vd->vdev_path = buf; 727 728 ZFS_LOG(1, "Attach by guid [%ju:%ju] succeeded, provider %s.", 729 (uintmax_t)spa_guid(vd->vdev_spa), 730 (uintmax_t)vd->vdev_guid, vd->vdev_path); 731 } else { 732 ZFS_LOG(1, "Search by guid [%ju:%ju] failed.", 733 (uintmax_t)spa_guid(vd->vdev_spa), 734 (uintmax_t)vd->vdev_guid); 735 } 736 737 return (cp); 738} 739 740static struct g_consumer * 741vdev_geom_open_by_path(vdev_t *vd, int check_guid) 742{ 743 struct g_provider *pp; 744 struct g_consumer *cp; 745 746 g_topology_assert(); 747 748 cp = NULL; 749 pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1); 750 if (pp != NULL) { 751 ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path); 752 if (!check_guid || vdev_attach_ok(vd, pp) == FULL_MATCH) 753 cp = vdev_geom_attach(pp, vd); 754 } 755 756 return (cp); 757} 758 759static int 760vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, 761 uint64_t *logical_ashift, uint64_t *physical_ashift) 762{ 763 struct g_provider *pp; 764 struct g_consumer *cp; 765 size_t bufsize; 766 int error; 767 768 /* Set the TLS to indicate downstack that we should not access zvols*/ 769 VERIFY(tsd_set(zfs_geom_probe_vdev_key, vd) == 0); 770 771 /* 772 * We must have a pathname, and it must be absolute. 773 */ 774 if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { 775 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 776 return (EINVAL); 777 } 778 779 /* 780 * Reopen the device if it's not currently open. Otherwise, 781 * just update the physical size of the device. 782 */ 783 if ((cp = vd->vdev_tsd) != NULL) { 784 ASSERT(vd->vdev_reopening); 785 goto skip_open; 786 } 787 788 DROP_GIANT(); 789 g_topology_lock(); 790 error = 0; 791 792 if (vd->vdev_spa->spa_splitting_newspa || 793 (vd->vdev_prevstate == VDEV_STATE_UNKNOWN && 794 vd->vdev_spa->spa_load_state == SPA_LOAD_NONE || 795 vd->vdev_spa->spa_load_state == SPA_LOAD_CREATE)) { 796 /* 797 * We are dealing with a vdev that hasn't been previously 798 * opened (since boot), and we are not loading an 799 * existing pool configuration. This looks like a 800 * vdev add operation to a new or existing pool. 801 * Assume the user knows what he/she is doing and find 802 * GEOM provider by its name, ignoring GUID mismatches. 803 * 804 * XXPOLICY: It would be safer to only allow a device 805 * that is unlabeled or labeled but missing 806 * GUID information to be opened in this fashion, 807 * unless we are doing a split, in which case we 808 * should allow any guid. 809 */ 810 cp = vdev_geom_open_by_path(vd, 0); 811 } else { 812 /* 813 * Try using the recorded path for this device, but only 814 * accept it if its label data contains the expected GUIDs. 815 */ 816 cp = vdev_geom_open_by_path(vd, 1); 817 if (cp == NULL) { 818 /* 819 * The device at vd->vdev_path doesn't have the 820 * expected GUIDs. The disks might have merely 821 * moved around so try all other GEOM providers 822 * to find one with the right GUIDs. 823 */ 824 cp = vdev_geom_open_by_guids(vd); 825 } 826 } 827 828 /* Clear the TLS now that tasting is done */ 829 VERIFY(tsd_set(zfs_geom_probe_vdev_key, NULL) == 0); 830 831 if (cp == NULL) { 832 ZFS_LOG(1, "Provider %s not found.", vd->vdev_path); 833 error = ENOENT; 834 } else if (cp->provider->sectorsize > VDEV_PAD_SIZE || 835 !ISP2(cp->provider->sectorsize)) { 836 ZFS_LOG(1, "Provider %s has unsupported sectorsize.", 837 vd->vdev_path); 838 839 vdev_geom_close_locked(vd); 840 error = EINVAL; 841 cp = NULL; 842 } else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) { 843 int i; 844 845 for (i = 0; i < 5; i++) { 846 error = g_access(cp, 0, 1, 0); 847 if (error == 0) 848 break; 849 g_topology_unlock(); 850 tsleep(vd, 0, "vdev", hz / 2); 851 g_topology_lock(); 852 } 853 if (error != 0) { 854 printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n", 855 vd->vdev_path, error); 856 vdev_geom_close_locked(vd); 857 cp = NULL; 858 } 859 } 860 861 /* Fetch initial physical path information for this device. */ 862 if (cp != NULL) 863 vdev_geom_attrchanged(cp, "GEOM::physpath"); 864 865 g_topology_unlock(); 866 PICKUP_GIANT(); 867 if (cp == NULL) { 868 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 869 return (error); 870 } 871skip_open: 872 pp = cp->provider; 873 874 /* 875 * Determine the actual size of the device. 876 */ 877 *max_psize = *psize = pp->mediasize; 878 879 /* 880 * Determine the device's minimum transfer size and preferred 881 * transfer size. 882 */ 883 *logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1; 884 *physical_ashift = 0; 885 if (pp->stripesize > (1 << *logical_ashift) && ISP2(pp->stripesize) && 886 pp->stripesize <= (1 << SPA_MAXASHIFT) && pp->stripeoffset == 0) 887 *physical_ashift = highbit(pp->stripesize) - 1; 888 889 /* 890 * Clear the nowritecache settings, so that on a vdev_reopen() 891 * we will try again. 892 */ 893 vd->vdev_nowritecache = B_FALSE; 894 895 /* 896 * Determine the device's rotation rate. 897 */ 898 vdev_geom_set_rotation_rate(vd, cp); 899 900 return (0); 901} 902 903static void 904vdev_geom_close(vdev_t *vd) 905{ 906 907 if (vd->vdev_reopening) 908 return; 909 910 DROP_GIANT(); 911 g_topology_lock(); 912 vdev_geom_close_locked(vd); 913 g_topology_unlock(); 914 PICKUP_GIANT(); 915} 916 917static void 918vdev_geom_io_intr(struct bio *bp) 919{ 920 vdev_t *vd; 921 zio_t *zio; 922 923 zio = bp->bio_caller1; 924 vd = zio->io_vd; 925 zio->io_error = bp->bio_error; 926 if (zio->io_error == 0 && bp->bio_resid != 0) 927 zio->io_error = SET_ERROR(EIO); 928 929 switch(zio->io_error) { 930 case ENOTSUP: 931 /* 932 * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know 933 * that future attempts will never succeed. In this case 934 * we set a persistent flag so that we don't bother with 935 * requests in the future. 936 */ 937 switch(bp->bio_cmd) { 938 case BIO_FLUSH: 939 vd->vdev_nowritecache = B_TRUE; 940 break; 941 case BIO_DELETE: 942 vd->vdev_notrim = B_TRUE; 943 break; 944 } 945 break; 946 case ENXIO: 947 if (!vd->vdev_remove_wanted) { 948 /* 949 * If provider's error is set we assume it is being 950 * removed. 951 */ 952 if (bp->bio_to->error != 0) { 953 vd->vdev_remove_wanted = B_TRUE; 954 spa_async_request(zio->io_spa, 955 SPA_ASYNC_REMOVE); 956 } else if (!vd->vdev_delayed_close) { 957 vd->vdev_delayed_close = B_TRUE; 958 } 959 } 960 break; 961 } 962 g_destroy_bio(bp); 963 zio_delay_interrupt(zio); 964} 965 966static void 967vdev_geom_io_start(zio_t *zio) 968{ 969 vdev_t *vd; 970 struct g_consumer *cp; 971 struct bio *bp; 972 int error; 973 974 vd = zio->io_vd; 975 976 switch (zio->io_type) { 977 case ZIO_TYPE_IOCTL: 978 /* XXPOLICY */ 979 if (!vdev_readable(vd)) { 980 zio->io_error = SET_ERROR(ENXIO); 981 zio_interrupt(zio); 982 return; 983 } else { 984 switch (zio->io_cmd) { 985 case DKIOCFLUSHWRITECACHE: 986 if (zfs_nocacheflush || vdev_geom_bio_flush_disable) 987 break; 988 if (vd->vdev_nowritecache) { 989 zio->io_error = SET_ERROR(ENOTSUP); 990 break; 991 } 992 goto sendreq; 993 default: 994 zio->io_error = SET_ERROR(ENOTSUP); 995 } 996 } 997 998 zio_execute(zio); 999 return; 1000 case ZIO_TYPE_FREE: 1001 if (vd->vdev_notrim) { 1002 zio->io_error = SET_ERROR(ENOTSUP); 1003 } else if (!vdev_geom_bio_delete_disable) { 1004 goto sendreq; 1005 } 1006 zio_execute(zio); 1007 return; 1008 } 1009sendreq: 1010 ASSERT(zio->io_type == ZIO_TYPE_READ || 1011 zio->io_type == ZIO_TYPE_WRITE || 1012 zio->io_type == ZIO_TYPE_FREE || 1013 zio->io_type == ZIO_TYPE_IOCTL); 1014 1015 cp = vd->vdev_tsd; 1016 if (cp == NULL) { 1017 zio->io_error = SET_ERROR(ENXIO); 1018 zio_interrupt(zio); 1019 return; 1020 } 1021 bp = g_alloc_bio(); 1022 bp->bio_caller1 = zio; 1023 switch (zio->io_type) { 1024 case ZIO_TYPE_READ: 1025 case ZIO_TYPE_WRITE: 1026 zio->io_target_timestamp = zio_handle_io_delay(zio); 1027 bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE; 1028 bp->bio_data = zio->io_data; 1029 bp->bio_offset = zio->io_offset; 1030 bp->bio_length = zio->io_size; 1031 break; 1032 case ZIO_TYPE_FREE: 1033 bp->bio_cmd = BIO_DELETE; 1034 bp->bio_data = NULL; 1035 bp->bio_offset = zio->io_offset; 1036 bp->bio_length = zio->io_size; 1037 break; 1038 case ZIO_TYPE_IOCTL: 1039 bp->bio_cmd = BIO_FLUSH; 1040 bp->bio_flags |= BIO_ORDERED; 1041 bp->bio_data = NULL; 1042 bp->bio_offset = cp->provider->mediasize; 1043 bp->bio_length = 0; 1044 break; 1045 } 1046 bp->bio_done = vdev_geom_io_intr; 1047 1048 g_io_request(bp, cp); 1049} 1050 1051static void 1052vdev_geom_io_done(zio_t *zio) 1053{ 1054} 1055 1056static void 1057vdev_geom_hold(vdev_t *vd) 1058{ 1059} 1060 1061static void 1062vdev_geom_rele(vdev_t *vd) 1063{ 1064} 1065 1066vdev_ops_t vdev_geom_ops = { 1067 vdev_geom_open, 1068 vdev_geom_close, 1069 vdev_default_asize, 1070 vdev_geom_io_start, 1071 vdev_geom_io_done, 1072 NULL, 1073 vdev_geom_hold, 1074 vdev_geom_rele, 1075 VDEV_TYPE_DISK, /* name of this vdev type */ 1076 B_TRUE /* leaf vdev */ 1077}; 1078