vdev_geom.c revision 208142
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> 23 * All rights reserved. 24 */ 25 26#include <sys/zfs_context.h> 27#include <sys/param.h> 28#include <sys/kernel.h> 29#include <sys/bio.h> 30#include <sys/disk.h> 31#include <sys/spa.h> 32#include <sys/spa_impl.h> 33#include <sys/vdev_impl.h> 34#include <sys/fs/zfs.h> 35#include <sys/zio.h> 36#include <geom/geom.h> 37#include <geom/geom_int.h> 38 39/* 40 * Virtual device vector for GEOM. 41 */ 42 43struct g_class zfs_vdev_class = { 44 .name = "ZFS::VDEV", 45 .version = G_VERSION, 46}; 47 48DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev); 49 50static void 51vdev_geom_orphan(struct g_consumer *cp) 52{ 53 struct g_geom *gp; 54 vdev_t *vd; 55 int error; 56 57 g_topology_assert(); 58 59 vd = cp->private; 60 gp = cp->geom; 61 error = cp->provider->error; 62 63 ZFS_LOG(1, "Closing access to %s.", cp->provider->name); 64 if (cp->acr + cp->acw + cp->ace > 0) 65 g_access(cp, -cp->acr, -cp->acw, -cp->ace); 66 ZFS_LOG(1, "Destroyed consumer to %s.", cp->provider->name); 67 g_detach(cp); 68 g_destroy_consumer(cp); 69 /* Destroy geom if there are no consumers left. */ 70 if (LIST_EMPTY(&gp->consumer)) { 71 ZFS_LOG(1, "Destroyed geom %s.", gp->name); 72 g_wither_geom(gp, error); 73 } 74 vd->vdev_tsd = NULL; 75 vd->vdev_remove_wanted = B_TRUE; 76 spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE); 77} 78 79static struct g_consumer * 80vdev_geom_attach(struct g_provider *pp) 81{ 82 struct g_geom *gp; 83 struct g_consumer *cp; 84 85 g_topology_assert(); 86 87 ZFS_LOG(1, "Attaching to %s.", pp->name); 88 /* Do we have geom already? No? Create one. */ 89 LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) { 90 if (gp->flags & G_GEOM_WITHER) 91 continue; 92 if (strcmp(gp->name, "zfs::vdev") != 0) 93 continue; 94 break; 95 } 96 if (gp == NULL) { 97 gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev"); 98 gp->orphan = vdev_geom_orphan; 99 cp = g_new_consumer(gp); 100 if (g_attach(cp, pp) != 0) { 101 g_wither_geom(gp, ENXIO); 102 return (NULL); 103 } 104 if (g_access(cp, 1, 0, 1) != 0) { 105 g_wither_geom(gp, ENXIO); 106 return (NULL); 107 } 108 ZFS_LOG(1, "Created geom and consumer for %s.", pp->name); 109 } else { 110 /* Check if we are already connected to this provider. */ 111 LIST_FOREACH(cp, &gp->consumer, consumer) { 112 if (cp->provider == pp) { 113 ZFS_LOG(1, "Found consumer for %s.", pp->name); 114 break; 115 } 116 } 117 if (cp == NULL) { 118 cp = g_new_consumer(gp); 119 if (g_attach(cp, pp) != 0) { 120 g_destroy_consumer(cp); 121 return (NULL); 122 } 123 if (g_access(cp, 1, 0, 1) != 0) { 124 g_detach(cp); 125 g_destroy_consumer(cp); 126 return (NULL); 127 } 128 ZFS_LOG(1, "Created consumer for %s.", pp->name); 129 } else { 130 if (g_access(cp, 1, 0, 1) != 0) 131 return (NULL); 132 ZFS_LOG(1, "Used existing consumer for %s.", pp->name); 133 } 134 } 135 return (cp); 136} 137 138static void 139vdev_geom_detach(void *arg, int flag __unused) 140{ 141 struct g_geom *gp; 142 struct g_consumer *cp; 143 144 g_topology_assert(); 145 cp = arg; 146 gp = cp->geom; 147 148 ZFS_LOG(1, "Closing access to %s.", cp->provider->name); 149 g_access(cp, -1, 0, -1); 150 /* Destroy consumer on last close. */ 151 if (cp->acr == 0 && cp->ace == 0) { 152 ZFS_LOG(1, "Destroyed consumer to %s.", cp->provider->name); 153 if (cp->acw > 0) 154 g_access(cp, 0, -cp->acw, 0); 155 g_detach(cp); 156 g_destroy_consumer(cp); 157 } 158 /* Destroy geom if there are no consumers left. */ 159 if (LIST_EMPTY(&gp->consumer)) { 160 ZFS_LOG(1, "Destroyed geom %s.", gp->name); 161 g_wither_geom(gp, ENXIO); 162 } 163} 164 165static uint64_t 166nvlist_get_guid(nvlist_t *list) 167{ 168 nvpair_t *elem = NULL; 169 uint64_t value; 170 171 while ((elem = nvlist_next_nvpair(list, elem)) != NULL) { 172 if (nvpair_type(elem) == DATA_TYPE_UINT64 && 173 strcmp(nvpair_name(elem), "guid") == 0) { 174 VERIFY(nvpair_value_uint64(elem, &value) == 0); 175 return (value); 176 } 177 } 178 return (0); 179} 180 181static int 182vdev_geom_io(struct g_consumer *cp, int cmd, void *data, off_t offset, off_t size) 183{ 184 struct bio *bp; 185 u_char *p; 186 off_t off; 187 int error; 188 189 ASSERT((offset % cp->provider->sectorsize) == 0); 190 ASSERT((size % cp->provider->sectorsize) == 0); 191 192 bp = g_alloc_bio(); 193 off = offset; 194 offset += size; 195 p = data; 196 error = 0; 197 198 for (; off < offset; off += MAXPHYS, p += MAXPHYS, size -= MAXPHYS) { 199 bzero(bp, sizeof(*bp)); 200 bp->bio_cmd = cmd; 201 bp->bio_done = NULL; 202 bp->bio_offset = off; 203 bp->bio_length = MIN(size, MAXPHYS); 204 bp->bio_data = p; 205 g_io_request(bp, cp); 206 error = biowait(bp, "vdev_geom_io"); 207 if (error != 0) 208 break; 209 } 210 211 g_destroy_bio(bp); 212 return (error); 213} 214 215static uint64_t 216vdev_geom_read_guid(struct g_consumer *cp) 217{ 218 struct g_provider *pp; 219 vdev_label_t *label; 220 char *p, *buf; 221 size_t buflen; 222 uint64_t psize; 223 off_t offset, size; 224 uint64_t guid; 225 int error, l, len, iszvol; 226 227 g_topology_assert_not(); 228 229 pp = cp->provider; 230 ZFS_LOG(1, "Reading guid from %s...", pp->name); 231 if (g_getattr("ZFS::iszvol", cp, &iszvol) == 0 && iszvol) { 232 ZFS_LOG(1, "Skipping ZVOL-based provider %s.", pp->name); 233 return (0); 234 } 235 236 psize = pp->mediasize; 237 psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t)); 238 239 size = sizeof(*label) + pp->sectorsize - 240 ((sizeof(*label) - 1) % pp->sectorsize) - 1; 241 242 guid = 0; 243 label = kmem_alloc(size, KM_SLEEP); 244 buflen = sizeof(label->vl_vdev_phys.vp_nvlist); 245 246 for (l = 0; l < VDEV_LABELS; l++) { 247 nvlist_t *config = NULL; 248 249 offset = vdev_label_offset(psize, l, 0); 250 if ((offset % pp->sectorsize) != 0) 251 continue; 252 253 if (vdev_geom_io(cp, BIO_READ, label, offset, size) != 0) 254 continue; 255 buf = label->vl_vdev_phys.vp_nvlist; 256 257 if (nvlist_unpack(buf, buflen, &config, 0) != 0) 258 continue; 259 260 guid = nvlist_get_guid(config); 261 nvlist_free(config); 262 if (guid != 0) 263 break; 264 } 265 266 kmem_free(label, size); 267 if (guid != 0) 268 ZFS_LOG(1, "guid for %s is %ju", pp->name, (uintmax_t)guid); 269 return (guid); 270} 271 272struct vdev_geom_find { 273 uint64_t guid; 274 struct g_consumer *cp; 275}; 276 277static void 278vdev_geom_taste_orphan(struct g_consumer *cp) 279{ 280 281 KASSERT(1 == 0, ("%s called while tasting %s.", __func__, 282 cp->provider->name)); 283} 284 285static void 286vdev_geom_attach_by_guid_event(void *arg, int flags __unused) 287{ 288 struct vdev_geom_find *ap; 289 struct g_class *mp; 290 struct g_geom *gp, *zgp; 291 struct g_provider *pp; 292 struct g_consumer *zcp; 293 uint64_t guid; 294 295 g_topology_assert(); 296 297 ap = arg; 298 299 zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste"); 300 /* This orphan function should be never called. */ 301 zgp->orphan = vdev_geom_taste_orphan; 302 zcp = g_new_consumer(zgp); 303 304 LIST_FOREACH(mp, &g_classes, class) { 305 if (mp == &zfs_vdev_class) 306 continue; 307 LIST_FOREACH(gp, &mp->geom, geom) { 308 if (gp->flags & G_GEOM_WITHER) 309 continue; 310 LIST_FOREACH(pp, &gp->provider, provider) { 311 if (pp->flags & G_PF_WITHER) 312 continue; 313 g_attach(zcp, pp); 314 if (g_access(zcp, 1, 0, 0) != 0) { 315 g_detach(zcp); 316 continue; 317 } 318 g_topology_unlock(); 319 guid = vdev_geom_read_guid(zcp); 320 g_topology_lock(); 321 g_access(zcp, -1, 0, 0); 322 g_detach(zcp); 323 if (guid != ap->guid) 324 continue; 325 ap->cp = vdev_geom_attach(pp); 326 if (ap->cp == NULL) { 327 printf("ZFS WARNING: Unable to attach to %s.\n", 328 pp->name); 329 continue; 330 } 331 goto end; 332 } 333 } 334 } 335 ap->cp = NULL; 336end: 337 g_destroy_consumer(zcp); 338 g_destroy_geom(zgp); 339} 340 341static struct g_consumer * 342vdev_geom_attach_by_guid(uint64_t guid) 343{ 344 struct vdev_geom_find *ap; 345 struct g_consumer *cp; 346 347 ap = kmem_zalloc(sizeof(*ap), KM_SLEEP); 348 ap->guid = guid; 349 g_waitfor_event(vdev_geom_attach_by_guid_event, ap, M_WAITOK, NULL); 350 cp = ap->cp; 351 kmem_free(ap, sizeof(*ap)); 352 return (cp); 353} 354 355static struct g_consumer * 356vdev_geom_open_by_guid(vdev_t *vd) 357{ 358 struct g_consumer *cp; 359 char *buf; 360 size_t len; 361 362 ZFS_LOG(1, "Searching by guid [%ju].", (uintmax_t)vd->vdev_guid); 363 cp = vdev_geom_attach_by_guid(vd->vdev_guid); 364 if (cp != NULL) { 365 len = strlen(cp->provider->name) + strlen("/dev/") + 1; 366 buf = kmem_alloc(len, KM_SLEEP); 367 368 snprintf(buf, len, "/dev/%s", cp->provider->name); 369 spa_strfree(vd->vdev_path); 370 vd->vdev_path = buf; 371 372 ZFS_LOG(1, "Attach by guid [%ju] succeeded, provider %s.", 373 (uintmax_t)vd->vdev_guid, vd->vdev_path); 374 } else { 375 ZFS_LOG(1, "Search by guid [%ju] failed.", 376 (uintmax_t)vd->vdev_guid); 377 } 378 379 return (cp); 380} 381 382static struct g_consumer * 383vdev_geom_open_by_path(vdev_t *vd, int check_guid) 384{ 385 struct g_provider *pp; 386 struct g_consumer *cp; 387 uint64_t guid; 388 389 cp = NULL; 390 g_topology_lock(); 391 pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1); 392 if (pp != NULL) { 393 ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path); 394 cp = vdev_geom_attach(pp); 395 if (cp != NULL && check_guid) { 396 g_topology_unlock(); 397 guid = vdev_geom_read_guid(cp); 398 g_topology_lock(); 399 if (guid != vd->vdev_guid) { 400 vdev_geom_detach(cp, 0); 401 cp = NULL; 402 ZFS_LOG(1, "guid mismatch for provider %s: " 403 "%ju != %ju.", vd->vdev_path, 404 (uintmax_t)vd->vdev_guid, (uintmax_t)guid); 405 } else { 406 ZFS_LOG(1, "guid match for provider %s.", 407 vd->vdev_path); 408 } 409 } 410 } 411 g_topology_unlock(); 412 413 return (cp); 414} 415 416static int 417vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift) 418{ 419 struct g_provider *pp; 420 struct g_consumer *cp; 421 int error, owned; 422 423 /* 424 * We must have a pathname, and it must be absolute. 425 */ 426 if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { 427 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 428 return (EINVAL); 429 } 430 431 vd->vdev_tsd = NULL; 432 433 if ((owned = mtx_owned(&Giant))) 434 mtx_unlock(&Giant); 435 error = 0; 436 437 /* 438 * If we're creating pool, just find GEOM provider by its name 439 * and ignore GUID mismatches. 440 */ 441 if (vd->vdev_spa->spa_load_state == SPA_LOAD_NONE) 442 cp = vdev_geom_open_by_path(vd, 0); 443 else { 444 cp = vdev_geom_open_by_path(vd, 1); 445 if (cp == NULL) { 446 /* 447 * The device at vd->vdev_path doesn't have the 448 * expected guid. The disks might have merely 449 * moved around so try all other GEOM providers 450 * to find one with the right guid. 451 */ 452 cp = vdev_geom_open_by_guid(vd); 453 } 454 } 455 456 if (cp == NULL) { 457 ZFS_LOG(1, "Provider %s not found.", vd->vdev_path); 458 error = ENOENT; 459 } else if (cp->acw == 0 && (spa_mode & FWRITE) != 0) { 460 int i; 461 462 g_topology_lock(); 463 for (i = 0; i < 5; i++) { 464 error = g_access(cp, 0, 1, 0); 465 if (error == 0) 466 break; 467 g_topology_unlock(); 468 tsleep(vd, 0, "vdev", hz / 2); 469 g_topology_lock(); 470 } 471 if (error != 0) { 472 printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n", 473 vd->vdev_path, error); 474 vdev_geom_detach(cp, 0); 475 cp = NULL; 476 } 477 g_topology_unlock(); 478 } 479 if (owned) 480 mtx_lock(&Giant); 481 if (cp == NULL) { 482 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 483 return (error); 484 } 485 486 cp->private = vd; 487 vd->vdev_tsd = cp; 488 pp = cp->provider; 489 490 /* 491 * Determine the actual size of the device. 492 */ 493 *psize = pp->mediasize; 494 495 /* 496 * Determine the device's minimum transfer size. 497 */ 498 *ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1; 499 500 /* 501 * Clear the nowritecache bit, so that on a vdev_reopen() we will 502 * try again. 503 */ 504 vd->vdev_nowritecache = B_FALSE; 505 506 return (0); 507} 508 509static void 510vdev_geom_close(vdev_t *vd) 511{ 512 struct g_consumer *cp; 513 514 cp = vd->vdev_tsd; 515 if (cp == NULL) 516 return; 517 vd->vdev_tsd = NULL; 518 g_post_event(vdev_geom_detach, cp, M_WAITOK, NULL); 519} 520 521static void 522vdev_geom_io_intr(struct bio *bp) 523{ 524 zio_t *zio; 525 526 zio = bp->bio_caller1; 527 zio->io_error = bp->bio_error; 528 if (zio->io_error == 0 && bp->bio_resid != 0) 529 zio->io_error = EIO; 530 if (bp->bio_cmd == BIO_FLUSH && bp->bio_error == ENOTSUP) { 531 vdev_t *vd; 532 533 /* 534 * If we get ENOTSUP, we know that no future 535 * attempts will ever succeed. In this case we 536 * set a persistent bit so that we don't bother 537 * with the ioctl in the future. 538 */ 539 vd = zio->io_vd; 540 vd->vdev_nowritecache = B_TRUE; 541 } 542 g_destroy_bio(bp); 543 zio_interrupt(zio); 544} 545 546static int 547vdev_geom_io_start(zio_t *zio) 548{ 549 vdev_t *vd; 550 struct g_consumer *cp; 551 struct bio *bp; 552 int error; 553 554 vd = zio->io_vd; 555 556 if (zio->io_type == ZIO_TYPE_IOCTL) { 557 /* XXPOLICY */ 558 if (!vdev_readable(vd)) { 559 zio->io_error = ENXIO; 560 return (ZIO_PIPELINE_CONTINUE); 561 } 562 563 switch (zio->io_cmd) { 564 565 case DKIOCFLUSHWRITECACHE: 566 567 if (zfs_nocacheflush) 568 break; 569 570 if (vd->vdev_nowritecache) { 571 zio->io_error = ENOTSUP; 572 break; 573 } 574 575 goto sendreq; 576 default: 577 zio->io_error = ENOTSUP; 578 } 579 580 return (ZIO_PIPELINE_CONTINUE); 581 } 582sendreq: 583 cp = vd->vdev_tsd; 584 if (cp == NULL) { 585 zio->io_error = ENXIO; 586 return (ZIO_PIPELINE_CONTINUE); 587 } 588 bp = g_alloc_bio(); 589 bp->bio_caller1 = zio; 590 switch (zio->io_type) { 591 case ZIO_TYPE_READ: 592 case ZIO_TYPE_WRITE: 593 bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE; 594 bp->bio_data = zio->io_data; 595 bp->bio_offset = zio->io_offset; 596 bp->bio_length = zio->io_size; 597 break; 598 case ZIO_TYPE_IOCTL: 599 bp->bio_cmd = BIO_FLUSH; 600 bp->bio_data = NULL; 601 bp->bio_offset = cp->provider->mediasize; 602 bp->bio_length = 0; 603 break; 604 } 605 bp->bio_done = vdev_geom_io_intr; 606 607 g_io_request(bp, cp); 608 609 return (ZIO_PIPELINE_STOP); 610} 611 612static void 613vdev_geom_io_done(zio_t *zio) 614{ 615} 616 617vdev_ops_t vdev_geom_ops = { 618 vdev_geom_open, 619 vdev_geom_close, 620 vdev_default_asize, 621 vdev_geom_io_start, 622 vdev_geom_io_done, 623 NULL, 624 VDEV_TYPE_DISK, /* name of this vdev type */ 625 B_TRUE /* leaf vdev */ 626}; 627