1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> 23 * All rights reserved. 24 * 25 * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org> 26 */ 27 28#include <sys/zfs_context.h> 29#include <sys/param.h> 30#include <sys/kernel.h> 31#include <sys/bio.h> 32#include <sys/disk.h> 33#include <sys/spa.h> 34#include <sys/spa_impl.h> 35#include <sys/vdev_impl.h> 36#include <sys/fs/zfs.h> 37#include <sys/zio.h> 38#include <geom/geom.h> 39#include <geom/geom_int.h> 40 41/* 42 * Virtual device vector for GEOM. 43 */ 44 45static g_attrchanged_t vdev_geom_attrchanged; 46struct g_class zfs_vdev_class = { 47 .name = "ZFS::VDEV", 48 .version = G_VERSION, 49 .attrchanged = vdev_geom_attrchanged, 50}; 51 52struct consumer_vdev_elem { 53 SLIST_ENTRY(consumer_vdev_elem) elems; 54 vdev_t *vd; 55}; 56 57SLIST_HEAD(consumer_priv_t, consumer_vdev_elem); 58_Static_assert(sizeof(((struct g_consumer*)NULL)->private) 59 == sizeof(struct consumer_priv_t*), 60 "consumer_priv_t* can't be stored in g_consumer.private"); 61 62DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev); 63 64SYSCTL_DECL(_vfs_zfs_vdev); 65/* Don't send BIO_FLUSH. */ 66static int vdev_geom_bio_flush_disable; 67SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RWTUN, 68 &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH"); 69/* Don't send BIO_DELETE. */ 70static int vdev_geom_bio_delete_disable; 71SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RWTUN, 72 &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE"); 73 74/* Declare local functions */ 75static void vdev_geom_detach(struct g_consumer *cp, boolean_t open_for_read); 76 77/* 78 * Thread local storage used to indicate when a thread is probing geoms 79 * for their guids. If NULL, this thread is not tasting geoms. If non NULL, 80 * it is looking for a replacement for the vdev_t* that is its value. 81 */ 82uint_t zfs_geom_probe_vdev_key; 83 84static void 85vdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp) 86{ 87 int error; 88 uint16_t rate; 89 90 error = g_getattr("GEOM::rotation_rate", cp, &rate); 91 if (error == 0 && rate == 1) 92 vd->vdev_nonrot = B_TRUE; 93 else 94 vd->vdev_nonrot = B_FALSE; 95} 96 97static void 98vdev_geom_set_physpath(vdev_t *vd, struct g_consumer *cp, 99 boolean_t do_null_update) 100{ 101 boolean_t needs_update = B_FALSE; 102 char *physpath; 103 int error, physpath_len; 104 105 physpath_len = MAXPATHLEN; 106 physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO); 107 error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath); 108 if (error == 0) { 109 char *old_physpath; 110 111 /* g_topology lock ensures that vdev has not been closed */ 112 g_topology_assert(); 113 old_physpath = vd->vdev_physpath; 114 vd->vdev_physpath = spa_strdup(physpath); 115 116 if (old_physpath != NULL) { 117 needs_update = (strcmp(old_physpath, 118 vd->vdev_physpath) != 0); 119 spa_strfree(old_physpath); 120 } else 121 needs_update = do_null_update; 122 } 123 g_free(physpath); 124 125 /* 126 * If the physical path changed, update the config. 127 * Only request an update for previously unset physpaths if 128 * requested by the caller. 129 */ 130 if (needs_update) 131 spa_async_request(vd->vdev_spa, SPA_ASYNC_CONFIG_UPDATE); 132 133} 134 135static void 136vdev_geom_attrchanged(struct g_consumer *cp, const char *attr) 137{ 138 char *old_physpath; 139 struct consumer_priv_t *priv; 140 struct consumer_vdev_elem *elem; 141 int error; 142 143 priv = (struct consumer_priv_t*)&cp->private; 144 if (SLIST_EMPTY(priv)) 145 return; 146 147 SLIST_FOREACH(elem, priv, elems) { 148 vdev_t *vd = elem->vd; 149 if (strcmp(attr, "GEOM::rotation_rate") == 0) { 150 vdev_geom_set_rotation_rate(vd, cp); 151 return; 152 } 153 if (strcmp(attr, "GEOM::physpath") == 0) { 154 vdev_geom_set_physpath(vd, cp, /*null_update*/B_TRUE); 155 return; 156 } 157 } 158} 159 160static void 161vdev_geom_resize(struct g_consumer *cp) 162{ 163 struct consumer_priv_t *priv; 164 struct consumer_vdev_elem *elem; 165 spa_t *spa; 166 vdev_t *vd; 167 168 priv = (struct consumer_priv_t *)&cp->private; 169 if (SLIST_EMPTY(priv)) 170 return; 171 172 SLIST_FOREACH(elem, priv, elems) { 173 vd = elem->vd; 174 if (vd->vdev_state != VDEV_STATE_HEALTHY) 175 continue; 176 spa = vd->vdev_spa; 177 if (!spa->spa_autoexpand) 178 continue; 179 vdev_online(spa, vd->vdev_guid, ZFS_ONLINE_EXPAND, NULL); 180 } 181} 182 183static void 184vdev_geom_orphan(struct g_consumer *cp) 185{ 186 struct consumer_priv_t *priv; 187 struct consumer_vdev_elem *elem; 188 189 g_topology_assert(); 190 191 priv = (struct consumer_priv_t*)&cp->private; 192 if (SLIST_EMPTY(priv)) 193 /* Vdev close in progress. Ignore the event. */ 194 return; 195 196 /* 197 * Orphan callbacks occur from the GEOM event thread. 198 * Concurrent with this call, new I/O requests may be 199 * working their way through GEOM about to find out 200 * (only once executed by the g_down thread) that we've 201 * been orphaned from our disk provider. These I/Os 202 * must be retired before we can detach our consumer. 203 * This is most easily achieved by acquiring the 204 * SPA ZIO configuration lock as a writer, but doing 205 * so with the GEOM topology lock held would cause 206 * a lock order reversal. Instead, rely on the SPA's 207 * async removal support to invoke a close on this 208 * vdev once it is safe to do so. 209 */ 210 SLIST_FOREACH(elem, priv, elems) { 211 vdev_t *vd = elem->vd; 212 213 vd->vdev_remove_wanted = B_TRUE; 214 spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE); 215 } 216} 217 218static struct g_consumer * 219vdev_geom_attach(struct g_provider *pp, vdev_t *vd, boolean_t sanity) 220{ 221 struct g_geom *gp; 222 struct g_consumer *cp; 223 int error; 224 225 g_topology_assert(); 226 227 ZFS_LOG(1, "Attaching to %s.", pp->name); 228 229 if (sanity) { 230 if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize)) { 231 ZFS_LOG(1, "Failing attach of %s. " 232 "Incompatible sectorsize %d\n", 233 pp->name, pp->sectorsize); 234 return (NULL); 235 } else if (pp->mediasize < SPA_MINDEVSIZE) { 236 ZFS_LOG(1, "Failing attach of %s. " 237 "Incompatible mediasize %ju\n", 238 pp->name, pp->mediasize); 239 return (NULL); 240 } 241 } 242 243 /* Do we have geom already? No? Create one. */ 244 LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) { 245 if (gp->flags & G_GEOM_WITHER) 246 continue; 247 if (strcmp(gp->name, "zfs::vdev") != 0) 248 continue; 249 break; 250 } 251 if (gp == NULL) { 252 gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev"); 253 gp->orphan = vdev_geom_orphan; 254 gp->attrchanged = vdev_geom_attrchanged; 255 gp->resize = vdev_geom_resize; 256 cp = g_new_consumer(gp); 257 error = g_attach(cp, pp); 258 if (error != 0) { 259 ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", __func__, 260 __LINE__, error); 261 vdev_geom_detach(cp, B_FALSE); 262 return (NULL); 263 } 264 error = g_access(cp, 1, 0, 1); 265 if (error != 0) { 266 ZFS_LOG(1, "%s(%d): g_access failed: %d\n", __func__, 267 __LINE__, error); 268 vdev_geom_detach(cp, B_FALSE); 269 return (NULL); 270 } 271 ZFS_LOG(1, "Created geom and consumer for %s.", pp->name); 272 } else { 273 /* Check if we are already connected to this provider. */ 274 LIST_FOREACH(cp, &gp->consumer, consumer) { 275 if (cp->provider == pp) { 276 ZFS_LOG(1, "Found consumer for %s.", pp->name); 277 break; 278 } 279 } 280 if (cp == NULL) { 281 cp = g_new_consumer(gp); 282 error = g_attach(cp, pp); 283 if (error != 0) { 284 ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", 285 __func__, __LINE__, error); 286 vdev_geom_detach(cp, B_FALSE); 287 return (NULL); 288 } 289 error = g_access(cp, 1, 0, 1); 290 if (error != 0) { 291 ZFS_LOG(1, "%s(%d): g_access failed: %d\n", 292 __func__, __LINE__, error); 293 vdev_geom_detach(cp, B_FALSE); 294 return (NULL); 295 } 296 ZFS_LOG(1, "Created consumer for %s.", pp->name); 297 } else { 298 error = g_access(cp, 1, 0, 1); 299 if (error != 0) { 300 ZFS_LOG(1, "%s(%d): g_access failed: %d\n", 301 __func__, __LINE__, error); 302 return (NULL); 303 } 304 ZFS_LOG(1, "Used existing consumer for %s.", pp->name); 305 } 306 } 307 308 if (vd != NULL) 309 vd->vdev_tsd = cp; 310 311 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; 312 return (cp); 313} 314 315static void 316vdev_geom_detach(struct g_consumer *cp, boolean_t open_for_read) 317{ 318 struct g_geom *gp; 319 320 g_topology_assert(); 321 322 ZFS_LOG(1, "Detaching from %s.", 323 cp->provider && cp->provider->name ? cp->provider->name : "NULL"); 324 325 gp = cp->geom; 326 if (open_for_read) 327 g_access(cp, -1, 0, -1); 328 /* Destroy consumer on last close. */ 329 if (cp->acr == 0 && cp->ace == 0) { 330 if (cp->acw > 0) 331 g_access(cp, 0, -cp->acw, 0); 332 if (cp->provider != NULL) { 333 ZFS_LOG(1, "Destroying consumer for %s.", 334 cp->provider->name ? cp->provider->name : "NULL"); 335 g_detach(cp); 336 } 337 g_destroy_consumer(cp); 338 } 339 /* Destroy geom if there are no consumers left. */ 340 if (LIST_EMPTY(&gp->consumer)) { 341 ZFS_LOG(1, "Destroyed geom %s.", gp->name); 342 g_wither_geom(gp, ENXIO); 343 } 344} 345 346static void 347vdev_geom_close_locked(vdev_t *vd) 348{ 349 struct g_consumer *cp; 350 struct consumer_priv_t *priv; 351 struct consumer_vdev_elem *elem, *elem_temp; 352 353 g_topology_assert(); 354 355 cp = vd->vdev_tsd; 356 vd->vdev_delayed_close = B_FALSE; 357 if (cp == NULL) 358 return; 359 360 ZFS_LOG(1, "Closing access to %s.", cp->provider->name); 361 KASSERT(cp->private != NULL, ("%s: cp->private is NULL", __func__)); 362 priv = (struct consumer_priv_t*)&cp->private; 363 vd->vdev_tsd = NULL; 364 SLIST_FOREACH_SAFE(elem, priv, elems, elem_temp) { 365 if (elem->vd == vd) { 366 SLIST_REMOVE(priv, elem, consumer_vdev_elem, elems); 367 g_free(elem); 368 } 369 } 370 371 vdev_geom_detach(cp, B_TRUE); 372} 373 374/* 375 * Issue one or more bios to the vdev in parallel 376 * cmds, datas, offsets, errors, and sizes are arrays of length ncmds. Each IO 377 * operation is described by parallel entries from each array. There may be 378 * more bios actually issued than entries in the array 379 */ 380static void 381vdev_geom_io(struct g_consumer *cp, int *cmds, void **datas, off_t *offsets, 382 off_t *sizes, int *errors, int ncmds) 383{ 384 struct bio **bios; 385 u_char *p; 386 off_t off, maxio, s, end; 387 int i, n_bios, j; 388 size_t bios_size; 389 390 maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize); 391 n_bios = 0; 392 393 /* How many bios are required for all commands ? */ 394 for (i = 0; i < ncmds; i++) 395 n_bios += (sizes[i] + maxio - 1) / maxio; 396 397 /* Allocate memory for the bios */ 398 bios_size = n_bios * sizeof(struct bio*); 399 bios = kmem_zalloc(bios_size, KM_SLEEP); 400 401 /* Prepare and issue all of the bios */ 402 for (i = j = 0; i < ncmds; i++) { 403 off = offsets[i]; 404 p = datas[i]; 405 s = sizes[i]; 406 end = off + s; 407 ASSERT((off % cp->provider->sectorsize) == 0); 408 ASSERT((s % cp->provider->sectorsize) == 0); 409 410 for (; off < end; off += maxio, p += maxio, s -= maxio, j++) { 411 bios[j] = g_alloc_bio(); 412 bios[j]->bio_cmd = cmds[i]; 413 bios[j]->bio_done = NULL; 414 bios[j]->bio_offset = off; 415 bios[j]->bio_length = MIN(s, maxio); 416 bios[j]->bio_data = p; 417 g_io_request(bios[j], cp); 418 } 419 } 420 ASSERT(j == n_bios); 421 422 /* Wait for all of the bios to complete, and clean them up */ 423 for (i = j = 0; i < ncmds; i++) { 424 off = offsets[i]; 425 s = sizes[i]; 426 end = off + s; 427 428 for (; off < end; off += maxio, s -= maxio, j++) { 429 errors[i] = biowait(bios[j], "vdev_geom_io") || errors[i]; 430 g_destroy_bio(bios[j]); 431 } 432 } 433 kmem_free(bios, bios_size); 434} 435 436/* 437 * Read the vdev config from a device. Return the number of valid labels that 438 * were found. The vdev config will be returned in config if and only if at 439 * least one valid label was found. 440 */ 441static int 442vdev_geom_read_config(struct g_consumer *cp, nvlist_t **configp) 443{ 444 struct g_provider *pp; 445 nvlist_t *config; 446 vdev_phys_t *vdev_lists[VDEV_LABELS]; 447 char *buf; 448 size_t buflen; 449 uint64_t psize, state, txg; 450 off_t offsets[VDEV_LABELS]; 451 off_t size; 452 off_t sizes[VDEV_LABELS]; 453 int cmds[VDEV_LABELS]; 454 int errors[VDEV_LABELS]; 455 int l, nlabels; 456 457 g_topology_assert_not(); 458 459 pp = cp->provider; 460 ZFS_LOG(1, "Reading config from %s...", pp->name); 461 462 psize = pp->mediasize; 463 psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t)); 464 465 size = sizeof(*vdev_lists[0]) + pp->sectorsize - 466 ((sizeof(*vdev_lists[0]) - 1) % pp->sectorsize) - 1; 467 468 buflen = sizeof(vdev_lists[0]->vp_nvlist); 469 470 /* Create all of the IO requests */ 471 for (l = 0; l < VDEV_LABELS; l++) { 472 cmds[l] = BIO_READ; 473 vdev_lists[l] = kmem_alloc(size, KM_SLEEP); 474 offsets[l] = vdev_label_offset(psize, l, 0) + VDEV_SKIP_SIZE; 475 sizes[l] = size; 476 errors[l] = 0; 477 ASSERT(offsets[l] % pp->sectorsize == 0); 478 } 479 480 /* Issue the IO requests */ 481 vdev_geom_io(cp, cmds, (void**)vdev_lists, offsets, sizes, errors, 482 VDEV_LABELS); 483 484 /* Parse the labels */ 485 config = *configp = NULL; 486 nlabels = 0; 487 for (l = 0; l < VDEV_LABELS; l++) { 488 if (errors[l] != 0) 489 continue; 490 491 buf = vdev_lists[l]->vp_nvlist; 492 493 if (nvlist_unpack(buf, buflen, &config, 0) != 0) 494 continue; 495 496 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, 497 &state) != 0 || state > POOL_STATE_L2CACHE) { 498 nvlist_free(config); 499 continue; 500 } 501 502 if (state != POOL_STATE_SPARE && 503 state != POOL_STATE_L2CACHE && 504 (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, 505 &txg) != 0 || txg == 0)) { 506 nvlist_free(config); 507 continue; 508 } 509 510 if (*configp != NULL) 511 nvlist_free(*configp); 512 *configp = config; 513 514 nlabels++; 515 } 516 517 /* Free the label storage */ 518 for (l = 0; l < VDEV_LABELS; l++) 519 kmem_free(vdev_lists[l], size); 520 521 return (nlabels); 522} 523 524static void 525resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id) 526{ 527 nvlist_t **new_configs; 528 uint64_t i; 529 530 if (id < *count) 531 return; 532 new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *), 533 KM_SLEEP); 534 for (i = 0; i < *count; i++) 535 new_configs[i] = (*configs)[i]; 536 if (*configs != NULL) 537 kmem_free(*configs, *count * sizeof(void *)); 538 *configs = new_configs; 539 *count = id + 1; 540} 541 542static void 543process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg, 544 const char *name, uint64_t* known_pool_guid) 545{ 546 nvlist_t *vdev_tree; 547 uint64_t pool_guid; 548 uint64_t vdev_guid, known_guid; 549 uint64_t id, txg, known_txg; 550 char *pname; 551 int i; 552 553 if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 || 554 strcmp(pname, name) != 0) 555 goto ignore; 556 557 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0) 558 goto ignore; 559 560 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0) 561 goto ignore; 562 563 if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) 564 goto ignore; 565 566 if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0) 567 goto ignore; 568 569 VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 570 571 if (*known_pool_guid != 0) { 572 if (pool_guid != *known_pool_guid) 573 goto ignore; 574 } else 575 *known_pool_guid = pool_guid; 576 577 resize_configs(configs, count, id); 578 579 if ((*configs)[id] != NULL) { 580 VERIFY(nvlist_lookup_uint64((*configs)[id], 581 ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0); 582 if (txg <= known_txg) 583 goto ignore; 584 nvlist_free((*configs)[id]); 585 } 586 587 (*configs)[id] = cfg; 588 return; 589 590ignore: 591 nvlist_free(cfg); 592} 593 594int 595vdev_geom_read_pool_label(const char *name, 596 nvlist_t ***configs, uint64_t *count) 597{ 598 struct g_class *mp; 599 struct g_geom *gp; 600 struct g_provider *pp; 601 struct g_consumer *zcp; 602 nvlist_t *vdev_cfg; 603 uint64_t pool_guid; 604 int error, nlabels; 605 606 DROP_GIANT(); 607 g_topology_lock(); 608 609 *configs = NULL; 610 *count = 0; 611 pool_guid = 0; 612 LIST_FOREACH(mp, &g_classes, class) { 613 if (mp == &zfs_vdev_class) 614 continue; 615 LIST_FOREACH(gp, &mp->geom, geom) { 616 if (gp->flags & G_GEOM_WITHER) 617 continue; 618 LIST_FOREACH(pp, &gp->provider, provider) { 619 if (pp->flags & G_PF_WITHER) 620 continue; 621 zcp = vdev_geom_attach(pp, NULL, B_TRUE); 622 if (zcp == NULL) 623 continue; 624 g_topology_unlock(); 625 nlabels = vdev_geom_read_config(zcp, &vdev_cfg); 626 g_topology_lock(); 627 vdev_geom_detach(zcp, B_TRUE); 628 if (nlabels == 0) 629 continue; 630 ZFS_LOG(1, "successfully read vdev config"); 631 632 process_vdev_config(configs, count, 633 vdev_cfg, name, &pool_guid); 634 } 635 } 636 } 637 g_topology_unlock(); 638 PICKUP_GIANT(); 639 640 return (*count > 0 ? 0 : ENOENT); 641} 642 643enum match { 644 NO_MATCH = 0, /* No matching labels found */ 645 TOPGUID_MATCH = 1, /* Labels match top guid, not vdev guid*/ 646 ZERO_MATCH = 1, /* Should never be returned */ 647 ONE_MATCH = 2, /* 1 label matching the vdev_guid */ 648 TWO_MATCH = 3, /* 2 label matching the vdev_guid */ 649 THREE_MATCH = 4, /* 3 label matching the vdev_guid */ 650 FULL_MATCH = 5 /* all labels match the vdev_guid */ 651}; 652 653static enum match 654vdev_attach_ok(vdev_t *vd, struct g_provider *pp) 655{ 656 nvlist_t *config; 657 uint64_t pool_guid, top_guid, vdev_guid; 658 struct g_consumer *cp; 659 int nlabels; 660 661 cp = vdev_geom_attach(pp, NULL, B_TRUE); 662 if (cp == NULL) { 663 ZFS_LOG(1, "Unable to attach tasting instance to %s.", 664 pp->name); 665 return (NO_MATCH); 666 } 667 g_topology_unlock(); 668 nlabels = vdev_geom_read_config(cp, &config); 669 g_topology_lock(); 670 vdev_geom_detach(cp, B_TRUE); 671 if (nlabels == 0) { 672 ZFS_LOG(1, "Unable to read config from %s.", pp->name); 673 return (NO_MATCH); 674 } 675 676 pool_guid = 0; 677 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid); 678 top_guid = 0; 679 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID, &top_guid); 680 vdev_guid = 0; 681 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid); 682 nvlist_free(config); 683 684 /* 685 * Check that the label's pool guid matches the desired guid. 686 * Inactive spares and L2ARCs do not have any pool guid in the label. 687 */ 688 if (pool_guid != 0 && pool_guid != spa_guid(vd->vdev_spa)) { 689 ZFS_LOG(1, "pool guid mismatch for provider %s: %ju != %ju.", 690 pp->name, 691 (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)pool_guid); 692 return (NO_MATCH); 693 } 694 695 /* 696 * Check that the label's vdev guid matches the desired guid. 697 * The second condition handles possible race on vdev detach, when 698 * remaining vdev receives GUID of destroyed top level mirror vdev. 699 */ 700 if (vdev_guid == vd->vdev_guid) { 701 ZFS_LOG(1, "guids match for provider %s.", pp->name); 702 return (ZERO_MATCH + nlabels); 703 } else if (top_guid == vd->vdev_guid && vd == vd->vdev_top) { 704 ZFS_LOG(1, "top vdev guid match for provider %s.", pp->name); 705 return (TOPGUID_MATCH); 706 } 707 ZFS_LOG(1, "vdev guid mismatch for provider %s: %ju != %ju.", 708 pp->name, (uintmax_t)vd->vdev_guid, (uintmax_t)vdev_guid); 709 return (NO_MATCH); 710} 711 712static struct g_consumer * 713vdev_geom_attach_by_guids(vdev_t *vd) 714{ 715 struct g_class *mp; 716 struct g_geom *gp; 717 struct g_provider *pp, *best_pp; 718 struct g_consumer *cp; 719 const char *vdpath; 720 enum match match, best_match; 721 722 g_topology_assert(); 723 724 vdpath = vd->vdev_path + sizeof("/dev/") - 1; 725 cp = NULL; 726 best_pp = NULL; 727 best_match = NO_MATCH; 728 LIST_FOREACH(mp, &g_classes, class) { 729 if (mp == &zfs_vdev_class) 730 continue; 731 LIST_FOREACH(gp, &mp->geom, geom) { 732 if (gp->flags & G_GEOM_WITHER) 733 continue; 734 LIST_FOREACH(pp, &gp->provider, provider) { 735 match = vdev_attach_ok(vd, pp); 736 if (match > best_match) { 737 best_match = match; 738 best_pp = pp; 739 } else if (match == best_match) { 740 if (strcmp(pp->name, vdpath) == 0) { 741 best_pp = pp; 742 } 743 } 744 if (match == FULL_MATCH) 745 goto out; 746 } 747 } 748 } 749 750out: 751 if (best_pp) { 752 cp = vdev_geom_attach(best_pp, vd, B_TRUE); 753 if (cp == NULL) { 754 printf("ZFS WARNING: Unable to attach to %s.\n", 755 best_pp->name); 756 } 757 } 758 return (cp); 759} 760 761static struct g_consumer * 762vdev_geom_open_by_guids(vdev_t *vd) 763{ 764 struct g_consumer *cp; 765 char *buf; 766 size_t len; 767 768 g_topology_assert(); 769 770 ZFS_LOG(1, "Searching by guids [%ju:%ju].", 771 (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)vd->vdev_guid); 772 cp = vdev_geom_attach_by_guids(vd); 773 if (cp != NULL) { 774 len = strlen(cp->provider->name) + strlen("/dev/") + 1; 775 buf = kmem_alloc(len, KM_SLEEP); 776 777 snprintf(buf, len, "/dev/%s", cp->provider->name); 778 spa_strfree(vd->vdev_path); 779 vd->vdev_path = buf; 780 781 ZFS_LOG(1, "Attach by guid [%ju:%ju] succeeded, provider %s.", 782 (uintmax_t)spa_guid(vd->vdev_spa), 783 (uintmax_t)vd->vdev_guid, cp->provider->name); 784 } else { 785 ZFS_LOG(1, "Search by guid [%ju:%ju] failed.", 786 (uintmax_t)spa_guid(vd->vdev_spa), 787 (uintmax_t)vd->vdev_guid); 788 } 789 790 return (cp); 791} 792 793static struct g_consumer * 794vdev_geom_open_by_path(vdev_t *vd, int check_guid) 795{ 796 struct g_provider *pp; 797 struct g_consumer *cp; 798 799 g_topology_assert(); 800 801 cp = NULL; 802 pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1); 803 if (pp != NULL) { 804 ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path); 805 if (!check_guid || vdev_attach_ok(vd, pp) == FULL_MATCH) 806 cp = vdev_geom_attach(pp, vd, B_FALSE); 807 } 808 809 return (cp); 810} 811 812static int 813vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, 814 uint64_t *logical_ashift, uint64_t *physical_ashift) 815{ 816 struct g_provider *pp; 817 struct g_consumer *cp; 818 size_t bufsize; 819 int error; 820 821 /* Set the TLS to indicate downstack that we should not access zvols*/ 822 VERIFY(tsd_set(zfs_geom_probe_vdev_key, vd) == 0); 823 824 /* 825 * We must have a pathname, and it must be absolute. 826 */ 827 if (vd->vdev_path == NULL || strncmp(vd->vdev_path, "/dev/", 5) != 0) { 828 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 829 return (EINVAL); 830 } 831 832 /* 833 * Reopen the device if it's not currently open. Otherwise, 834 * just update the physical size of the device. 835 */ 836 if ((cp = vd->vdev_tsd) != NULL) { 837 ASSERT(vd->vdev_reopening); 838 goto skip_open; 839 } 840 841 DROP_GIANT(); 842 g_topology_lock(); 843 error = 0; 844 845 if (vd->vdev_spa->spa_splitting_newspa || 846 (vd->vdev_prevstate == VDEV_STATE_UNKNOWN && 847 vd->vdev_spa->spa_load_state == SPA_LOAD_NONE || 848 vd->vdev_spa->spa_load_state == SPA_LOAD_CREATE)) { 849 /* 850 * We are dealing with a vdev that hasn't been previously 851 * opened (since boot), and we are not loading an 852 * existing pool configuration. This looks like a 853 * vdev add operation to a new or existing pool. 854 * Assume the user knows what he/she is doing and find 855 * GEOM provider by its name, ignoring GUID mismatches. 856 * 857 * XXPOLICY: It would be safer to only allow a device 858 * that is unlabeled or labeled but missing 859 * GUID information to be opened in this fashion, 860 * unless we are doing a split, in which case we 861 * should allow any guid. 862 */ 863 cp = vdev_geom_open_by_path(vd, 0); 864 } else { 865 /* 866 * Try using the recorded path for this device, but only 867 * accept it if its label data contains the expected GUIDs. 868 */ 869 cp = vdev_geom_open_by_path(vd, 1); 870 if (cp == NULL) { 871 /* 872 * The device at vd->vdev_path doesn't have the 873 * expected GUIDs. The disks might have merely 874 * moved around so try all other GEOM providers 875 * to find one with the right GUIDs. 876 */ 877 cp = vdev_geom_open_by_guids(vd); 878 } 879 } 880 881 /* Clear the TLS now that tasting is done */ 882 VERIFY(tsd_set(zfs_geom_probe_vdev_key, NULL) == 0); 883 884 if (cp == NULL) { 885 ZFS_LOG(1, "Vdev %s not found.", vd->vdev_path); 886 error = ENOENT; 887 } else { 888 struct consumer_priv_t *priv; 889 struct consumer_vdev_elem *elem; 890 int spamode; 891 892 priv = (struct consumer_priv_t*)&cp->private; 893 if (cp->private == NULL) 894 SLIST_INIT(priv); 895 elem = g_malloc(sizeof(*elem), M_WAITOK|M_ZERO); 896 elem->vd = vd; 897 SLIST_INSERT_HEAD(priv, elem, elems); 898 899 spamode = spa_mode(vd->vdev_spa); 900 if (cp->provider->sectorsize > VDEV_PAD_SIZE || 901 !ISP2(cp->provider->sectorsize)) { 902 ZFS_LOG(1, "Provider %s has unsupported sectorsize.", 903 cp->provider->name); 904 905 vdev_geom_close_locked(vd); 906 error = EINVAL; 907 cp = NULL; 908 } else if (cp->acw == 0 && (spamode & FWRITE) != 0) { 909 int i; 910 911 for (i = 0; i < 5; i++) { 912 error = g_access(cp, 0, 1, 0); 913 if (error == 0) 914 break; 915 g_topology_unlock(); 916 tsleep(vd, 0, "vdev", hz / 2); 917 g_topology_lock(); 918 } 919 if (error != 0) { 920 printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n", 921 cp->provider->name, error); 922 vdev_geom_close_locked(vd); 923 cp = NULL; 924 } 925 } 926 } 927 928 /* Fetch initial physical path information for this device. */ 929 if (cp != NULL) { 930 vdev_geom_attrchanged(cp, "GEOM::physpath"); 931 932 /* Set other GEOM characteristics */ 933 vdev_geom_set_physpath(vd, cp, /*do_null_update*/B_FALSE); 934 vdev_geom_set_rotation_rate(vd, cp); 935 } 936 937 g_topology_unlock(); 938 PICKUP_GIANT(); 939 if (cp == NULL) { 940 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 941 vdev_dbgmsg(vd, "vdev_geom_open: failed to open [error=%d]", 942 error); 943 return (error); 944 } 945skip_open: 946 pp = cp->provider; 947 948 /* 949 * Determine the actual size of the device. 950 */ 951 *max_psize = *psize = pp->mediasize; 952 953 /* 954 * Determine the device's minimum transfer size and preferred 955 * transfer size. 956 */ 957 *logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1; 958 *physical_ashift = 0; 959 if (pp->stripesize > (1 << *logical_ashift) && ISP2(pp->stripesize) && 960 pp->stripesize <= (1 << SPA_MAXASHIFT) && pp->stripeoffset == 0) 961 *physical_ashift = highbit(pp->stripesize) - 1; 962 963 /* 964 * Clear the nowritecache settings, so that on a vdev_reopen() 965 * we will try again. 966 */ 967 vd->vdev_nowritecache = B_FALSE; 968 969 return (0); 970} 971 972static void 973vdev_geom_close(vdev_t *vd) 974{ 975 struct g_consumer *cp; 976 977 cp = vd->vdev_tsd; 978 979 DROP_GIANT(); 980 g_topology_lock(); 981 982 if (!vd->vdev_reopening || 983 (cp != NULL && ((cp->flags & G_CF_ORPHAN) != 0 || 984 (cp->provider != NULL && cp->provider->error != 0)))) 985 vdev_geom_close_locked(vd); 986 987 g_topology_unlock(); 988 PICKUP_GIANT(); 989} 990 991static void 992vdev_geom_io_intr(struct bio *bp) 993{ 994 vdev_t *vd; 995 zio_t *zio; 996 997 zio = bp->bio_caller1; 998 vd = zio->io_vd; 999 zio->io_error = bp->bio_error; 1000 if (zio->io_error == 0 && bp->bio_resid != 0) 1001 zio->io_error = SET_ERROR(EIO); 1002 1003 switch(zio->io_error) { 1004 case ENOTSUP: 1005 /* 1006 * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know 1007 * that future attempts will never succeed. In this case 1008 * we set a persistent flag so that we don't bother with 1009 * requests in the future. 1010 */ 1011 switch(bp->bio_cmd) { 1012 case BIO_FLUSH: 1013 vd->vdev_nowritecache = B_TRUE; 1014 break; 1015 case BIO_DELETE: 1016 vd->vdev_notrim = B_TRUE; 1017 break; 1018 } 1019 break; 1020 case ENXIO: 1021 if (!vd->vdev_remove_wanted) { 1022 /* 1023 * If provider's error is set we assume it is being 1024 * removed. 1025 */ 1026 if (bp->bio_to->error != 0) { 1027 vd->vdev_remove_wanted = B_TRUE; 1028 spa_async_request(zio->io_spa, 1029 SPA_ASYNC_REMOVE); 1030 } else if (!vd->vdev_delayed_close) { 1031 vd->vdev_delayed_close = B_TRUE; 1032 } 1033 } 1034 break; 1035 } 1036 1037 /* 1038 * We have to split bio freeing into two parts, because the ABD code 1039 * cannot be called in this context and vdev_op_io_done is not called 1040 * for ZIO_TYPE_IOCTL zio-s. 1041 */ 1042 if (zio->io_type != ZIO_TYPE_READ && zio->io_type != ZIO_TYPE_WRITE) { 1043 g_destroy_bio(bp); 1044 zio->io_bio = NULL; 1045 } 1046 zio_delay_interrupt(zio); 1047} 1048 1049static void 1050vdev_geom_io_start(zio_t *zio) 1051{ 1052 vdev_t *vd; 1053 struct g_consumer *cp; 1054 struct bio *bp; 1055 int error; 1056 1057 vd = zio->io_vd; 1058 1059 switch (zio->io_type) { 1060 case ZIO_TYPE_IOCTL: 1061 /* XXPOLICY */ 1062 if (!vdev_readable(vd)) { 1063 zio->io_error = SET_ERROR(ENXIO); 1064 zio_interrupt(zio); 1065 return; 1066 } else { 1067 switch (zio->io_cmd) { 1068 case DKIOCFLUSHWRITECACHE: 1069 if (zfs_nocacheflush || vdev_geom_bio_flush_disable) 1070 break; 1071 if (vd->vdev_nowritecache) { 1072 zio->io_error = SET_ERROR(ENOTSUP); 1073 break; 1074 } 1075 goto sendreq; 1076 default: 1077 zio->io_error = SET_ERROR(ENOTSUP); 1078 } 1079 } 1080 1081 zio_execute(zio); 1082 return; 1083 case ZIO_TYPE_FREE: 1084 if (vd->vdev_notrim) { 1085 zio->io_error = SET_ERROR(ENOTSUP); 1086 } else if (!vdev_geom_bio_delete_disable) { 1087 goto sendreq; 1088 } 1089 zio_execute(zio); 1090 return; 1091 } 1092sendreq: 1093 ASSERT(zio->io_type == ZIO_TYPE_READ || 1094 zio->io_type == ZIO_TYPE_WRITE || 1095 zio->io_type == ZIO_TYPE_FREE || 1096 zio->io_type == ZIO_TYPE_IOCTL); 1097 1098 cp = vd->vdev_tsd; 1099 if (cp == NULL) { 1100 zio->io_error = SET_ERROR(ENXIO); 1101 zio_interrupt(zio); 1102 return; 1103 } 1104 bp = g_alloc_bio(); 1105 bp->bio_caller1 = zio; 1106 switch (zio->io_type) { 1107 case ZIO_TYPE_READ: 1108 case ZIO_TYPE_WRITE: 1109 zio->io_target_timestamp = zio_handle_io_delay(zio); 1110 bp->bio_offset = zio->io_offset; 1111 bp->bio_length = zio->io_size; 1112 if (zio->io_type == ZIO_TYPE_READ) { 1113 bp->bio_cmd = BIO_READ; 1114 bp->bio_data = 1115 abd_borrow_buf(zio->io_abd, zio->io_size); 1116 } else { 1117 bp->bio_cmd = BIO_WRITE; 1118 bp->bio_data = 1119 abd_borrow_buf_copy(zio->io_abd, zio->io_size); 1120 } 1121 break; 1122 case ZIO_TYPE_FREE: 1123 bp->bio_cmd = BIO_DELETE; 1124 bp->bio_data = NULL; 1125 bp->bio_offset = zio->io_offset; 1126 bp->bio_length = zio->io_size; 1127 break; 1128 case ZIO_TYPE_IOCTL: 1129 bp->bio_cmd = BIO_FLUSH; 1130 bp->bio_data = NULL; 1131 bp->bio_offset = cp->provider->mediasize; 1132 bp->bio_length = 0; 1133 break; 1134 } 1135 bp->bio_done = vdev_geom_io_intr; 1136 zio->io_bio = bp; 1137 1138 g_io_request(bp, cp); 1139} 1140 1141static void 1142vdev_geom_io_done(zio_t *zio) 1143{ 1144 struct bio *bp = zio->io_bio; 1145 1146 if (zio->io_type != ZIO_TYPE_READ && zio->io_type != ZIO_TYPE_WRITE) { 1147 ASSERT(bp == NULL); 1148 return; 1149 } 1150 1151 if (bp == NULL) { 1152 ASSERT3S(zio->io_error, ==, ENXIO); 1153 return; 1154 } 1155 1156 if (zio->io_type == ZIO_TYPE_READ) 1157 abd_return_buf_copy(zio->io_abd, bp->bio_data, zio->io_size); 1158 else 1159 abd_return_buf(zio->io_abd, bp->bio_data, zio->io_size); 1160 1161 g_destroy_bio(bp); 1162 zio->io_bio = NULL; 1163} 1164 1165static void 1166vdev_geom_hold(vdev_t *vd) 1167{ 1168} 1169 1170static void 1171vdev_geom_rele(vdev_t *vd) 1172{ 1173} 1174 1175vdev_ops_t vdev_geom_ops = { 1176 vdev_geom_open, 1177 vdev_geom_close, 1178 vdev_default_asize, 1179 vdev_geom_io_start, 1180 vdev_geom_io_done, 1181 NULL, 1182 NULL, 1183 vdev_geom_hold, 1184 vdev_geom_rele, 1185 NULL, 1186 vdev_default_xlate, 1187 VDEV_TYPE_DISK, /* name of this vdev type */ 1188 B_TRUE /* leaf vdev */ 1189}; 1190