vdev_geom.c revision 292066
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22168404Spjd * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> 23168404Spjd * All rights reserved. 24236155Smm * 25236155Smm * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org> 26168404Spjd */ 27168404Spjd 28168404Spjd#include <sys/zfs_context.h> 29168404Spjd#include <sys/param.h> 30168404Spjd#include <sys/kernel.h> 31168404Spjd#include <sys/bio.h> 32169303Spjd#include <sys/disk.h> 33168404Spjd#include <sys/spa.h> 34205346Spjd#include <sys/spa_impl.h> 35168404Spjd#include <sys/vdev_impl.h> 36168404Spjd#include <sys/fs/zfs.h> 37168404Spjd#include <sys/zio.h> 38168404Spjd#include <geom/geom.h> 39169303Spjd#include <geom/geom_int.h> 40168404Spjd 41168404Spjd/* 42168404Spjd * Virtual device vector for GEOM. 43168404Spjd */ 44168404Spjd 45256956Ssmhstatic g_attrchanged_t vdev_geom_attrchanged; 46168404Spjdstruct g_class zfs_vdev_class = { 47168404Spjd .name = "ZFS::VDEV", 48168404Spjd .version = G_VERSION, 49256956Ssmh .attrchanged = vdev_geom_attrchanged, 50168404Spjd}; 51168404Spjd 52168404SpjdDECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev); 53168404Spjd 54240868SpjdSYSCTL_DECL(_vfs_zfs_vdev); 55240868Spjd/* Don't send BIO_FLUSH. */ 56267992Shselaskystatic int vdev_geom_bio_flush_disable; 57267992ShselaskySYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RWTUN, 58219089Spjd &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH"); 59240868Spjd/* Don't send BIO_DELETE. */ 60267992Shselaskystatic int vdev_geom_bio_delete_disable; 61267992ShselaskySYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RWTUN, 62240868Spjd &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE"); 63219089Spjd 64168404Spjdstatic void 65256956Ssmhvdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp) 66256956Ssmh{ 67256956Ssmh int error; 68256956Ssmh uint16_t rate; 69256956Ssmh 70256956Ssmh error = g_getattr("GEOM::rotation_rate", cp, &rate); 71256956Ssmh if (error == 0) 72256956Ssmh vd->vdev_rotation_rate = rate; 73256956Ssmh else 74256956Ssmh vd->vdev_rotation_rate = VDEV_RATE_UNKNOWN; 75256956Ssmh} 76256956Ssmh 77256956Ssmhstatic void 78256956Ssmhvdev_geom_attrchanged(struct g_consumer *cp, const char *attr) 79256956Ssmh{ 80256956Ssmh vdev_t *vd; 81256956Ssmh 82256956Ssmh vd = cp->private; 83256956Ssmh if (vd == NULL) 84256956Ssmh return; 85256956Ssmh 86256956Ssmh if (strcmp(attr, "GEOM::rotation_rate") == 0) { 87256956Ssmh vdev_geom_set_rotation_rate(vd, cp); 88256956Ssmh return; 89256956Ssmh } 90256956Ssmh} 91256956Ssmh 92256956Ssmhstatic void 93168404Spjdvdev_geom_orphan(struct g_consumer *cp) 94168404Spjd{ 95168404Spjd vdev_t *vd; 96168404Spjd 97168404Spjd g_topology_assert(); 98168404Spjd 99168404Spjd vd = cp->private; 100253754Smav if (vd == NULL) 101253754Smav return; 102168404Spjd 103219089Spjd /* 104219089Spjd * Orphan callbacks occur from the GEOM event thread. 105219089Spjd * Concurrent with this call, new I/O requests may be 106219089Spjd * working their way through GEOM about to find out 107219089Spjd * (only once executed by the g_down thread) that we've 108219089Spjd * been orphaned from our disk provider. These I/Os 109219089Spjd * must be retired before we can detach our consumer. 110219089Spjd * This is most easily achieved by acquiring the 111219089Spjd * SPA ZIO configuration lock as a writer, but doing 112219089Spjd * so with the GEOM topology lock held would cause 113219089Spjd * a lock order reversal. Instead, rely on the SPA's 114219089Spjd * async removal support to invoke a close on this 115219089Spjd * vdev once it is safe to do so. 116219089Spjd */ 117219089Spjd zfs_post_remove(vd->vdev_spa, vd); 118185029Spjd vd->vdev_remove_wanted = B_TRUE; 119185029Spjd spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE); 120168404Spjd} 121168404Spjd 122168404Spjdstatic struct g_consumer * 123203504Spjdvdev_geom_attach(struct g_provider *pp) 124168404Spjd{ 125168404Spjd struct g_geom *gp; 126168404Spjd struct g_consumer *cp; 127168404Spjd 128168404Spjd g_topology_assert(); 129168404Spjd 130168404Spjd ZFS_LOG(1, "Attaching to %s.", pp->name); 131168404Spjd /* Do we have geom already? No? Create one. */ 132168404Spjd LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) { 133169303Spjd if (gp->flags & G_GEOM_WITHER) 134169303Spjd continue; 135169303Spjd if (strcmp(gp->name, "zfs::vdev") != 0) 136169303Spjd continue; 137169303Spjd break; 138168404Spjd } 139168404Spjd if (gp == NULL) { 140168404Spjd gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev"); 141168404Spjd gp->orphan = vdev_geom_orphan; 142168404Spjd cp = g_new_consumer(gp); 143168404Spjd if (g_attach(cp, pp) != 0) { 144168404Spjd g_wither_geom(gp, ENXIO); 145168404Spjd return (NULL); 146168404Spjd } 147203504Spjd if (g_access(cp, 1, 0, 1) != 0) { 148168404Spjd g_wither_geom(gp, ENXIO); 149168404Spjd return (NULL); 150168404Spjd } 151168404Spjd ZFS_LOG(1, "Created geom and consumer for %s.", pp->name); 152168404Spjd } else { 153168404Spjd /* Check if we are already connected to this provider. */ 154168404Spjd LIST_FOREACH(cp, &gp->consumer, consumer) { 155168404Spjd if (cp->provider == pp) { 156168404Spjd ZFS_LOG(1, "Found consumer for %s.", pp->name); 157168404Spjd break; 158168404Spjd } 159168404Spjd } 160168404Spjd if (cp == NULL) { 161168404Spjd cp = g_new_consumer(gp); 162168404Spjd if (g_attach(cp, pp) != 0) { 163168404Spjd g_destroy_consumer(cp); 164168404Spjd return (NULL); 165168404Spjd } 166203504Spjd if (g_access(cp, 1, 0, 1) != 0) { 167168404Spjd g_detach(cp); 168168404Spjd g_destroy_consumer(cp); 169168404Spjd return (NULL); 170168404Spjd } 171168404Spjd ZFS_LOG(1, "Created consumer for %s.", pp->name); 172168404Spjd } else { 173203504Spjd if (g_access(cp, 1, 0, 1) != 0) 174168404Spjd return (NULL); 175168404Spjd ZFS_LOG(1, "Used existing consumer for %s.", pp->name); 176168404Spjd } 177168404Spjd } 178256880Smav cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; 179168404Spjd return (cp); 180168404Spjd} 181168404Spjd 182168404Spjdstatic void 183168404Spjdvdev_geom_detach(void *arg, int flag __unused) 184168404Spjd{ 185168404Spjd struct g_geom *gp; 186168404Spjd struct g_consumer *cp; 187168404Spjd 188168404Spjd g_topology_assert(); 189168404Spjd cp = arg; 190168404Spjd gp = cp->geom; 191168404Spjd 192168404Spjd ZFS_LOG(1, "Closing access to %s.", cp->provider->name); 193168404Spjd g_access(cp, -1, 0, -1); 194168404Spjd /* Destroy consumer on last close. */ 195168404Spjd if (cp->acr == 0 && cp->ace == 0) { 196168404Spjd ZFS_LOG(1, "Destroyed consumer to %s.", cp->provider->name); 197168404Spjd if (cp->acw > 0) 198168404Spjd g_access(cp, 0, -cp->acw, 0); 199168404Spjd g_detach(cp); 200168404Spjd g_destroy_consumer(cp); 201168404Spjd } 202168404Spjd /* Destroy geom if there are no consumers left. */ 203168404Spjd if (LIST_EMPTY(&gp->consumer)) { 204168404Spjd ZFS_LOG(1, "Destroyed geom %s.", gp->name); 205168404Spjd g_wither_geom(gp, ENXIO); 206168404Spjd } 207168404Spjd} 208168404Spjd 209292066Sasomersstatic void 210292066Sasomersnvlist_get_guids(nvlist_t *list, uint64_t *pguid, uint64_t *vguid) 211185029Spjd{ 212185029Spjd 213292066Sasomers nvlist_lookup_uint64(list, ZPOOL_CONFIG_GUID, vguid); 214292066Sasomers nvlist_lookup_uint64(list, ZPOOL_CONFIG_POOL_GUID, pguid); 215185029Spjd} 216185029Spjd 217185029Spjdstatic int 218185029Spjdvdev_geom_io(struct g_consumer *cp, int cmd, void *data, off_t offset, off_t size) 219185029Spjd{ 220185029Spjd struct bio *bp; 221185029Spjd u_char *p; 222208682Spjd off_t off, maxio; 223185029Spjd int error; 224185029Spjd 225185029Spjd ASSERT((offset % cp->provider->sectorsize) == 0); 226185029Spjd ASSERT((size % cp->provider->sectorsize) == 0); 227185029Spjd 228185029Spjd bp = g_alloc_bio(); 229185029Spjd off = offset; 230185029Spjd offset += size; 231185029Spjd p = data; 232208682Spjd maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize); 233185029Spjd error = 0; 234185029Spjd 235208682Spjd for (; off < offset; off += maxio, p += maxio, size -= maxio) { 236185029Spjd bzero(bp, sizeof(*bp)); 237185029Spjd bp->bio_cmd = cmd; 238185029Spjd bp->bio_done = NULL; 239185029Spjd bp->bio_offset = off; 240208682Spjd bp->bio_length = MIN(size, maxio); 241185029Spjd bp->bio_data = p; 242185029Spjd g_io_request(bp, cp); 243185029Spjd error = biowait(bp, "vdev_geom_io"); 244185029Spjd if (error != 0) 245185029Spjd break; 246185029Spjd } 247185029Spjd 248185029Spjd g_destroy_bio(bp); 249185029Spjd return (error); 250185029Spjd} 251185029Spjd 252241286Savgstatic void 253241286Savgvdev_geom_taste_orphan(struct g_consumer *cp) 254185029Spjd{ 255241286Savg 256241286Savg KASSERT(1 == 0, ("%s called while tasting %s.", __func__, 257241286Savg cp->provider->name)); 258241286Savg} 259241286Savg 260241286Savgstatic int 261241286Savgvdev_geom_read_config(struct g_consumer *cp, nvlist_t **config) 262241286Savg{ 263185029Spjd struct g_provider *pp; 264185029Spjd vdev_label_t *label; 265185029Spjd char *p, *buf; 266185029Spjd size_t buflen; 267185029Spjd uint64_t psize; 268185029Spjd off_t offset, size; 269292066Sasomers uint64_t state, txg; 270219089Spjd int error, l, len; 271185029Spjd 272185029Spjd g_topology_assert_not(); 273185029Spjd 274185029Spjd pp = cp->provider; 275241286Savg ZFS_LOG(1, "Reading config from %s...", pp->name); 276185029Spjd 277185029Spjd psize = pp->mediasize; 278185029Spjd psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t)); 279185029Spjd 280185029Spjd size = sizeof(*label) + pp->sectorsize - 281185029Spjd ((sizeof(*label) - 1) % pp->sectorsize) - 1; 282185029Spjd 283185029Spjd label = kmem_alloc(size, KM_SLEEP); 284185029Spjd buflen = sizeof(label->vl_vdev_phys.vp_nvlist); 285185029Spjd 286241286Savg *config = NULL; 287185174Spjd for (l = 0; l < VDEV_LABELS; l++) { 288185029Spjd 289185029Spjd offset = vdev_label_offset(psize, l, 0); 290185029Spjd if ((offset % pp->sectorsize) != 0) 291185029Spjd continue; 292185029Spjd 293200124Spjd if (vdev_geom_io(cp, BIO_READ, label, offset, size) != 0) 294185029Spjd continue; 295185029Spjd buf = label->vl_vdev_phys.vp_nvlist; 296185029Spjd 297241286Savg if (nvlist_unpack(buf, buflen, config, 0) != 0) 298185029Spjd continue; 299185029Spjd 300241286Savg if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE, 301252056Ssmh &state) != 0 || state > POOL_STATE_L2CACHE) { 302241286Savg nvlist_free(*config); 303241286Savg *config = NULL; 304241286Savg continue; 305241286Savg } 306241286Savg 307242135Savg if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE && 308242135Savg (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG, 309242135Savg &txg) != 0 || txg == 0)) { 310241286Savg nvlist_free(*config); 311241286Savg *config = NULL; 312241286Savg continue; 313241286Savg } 314241286Savg 315241286Savg break; 316185029Spjd } 317185029Spjd 318185029Spjd kmem_free(label, size); 319241286Savg return (*config == NULL ? ENOENT : 0); 320185029Spjd} 321185029Spjd 322243502Savgstatic void 323243502Savgresize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id) 324241286Savg{ 325243502Savg nvlist_t **new_configs; 326243502Savg uint64_t i; 327243502Savg 328243502Savg if (id < *count) 329243502Savg return; 330244635Savg new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *), 331244635Savg KM_SLEEP); 332243502Savg for (i = 0; i < *count; i++) 333243502Savg new_configs[i] = (*configs)[i]; 334243502Savg if (*configs != NULL) 335243502Savg kmem_free(*configs, *count * sizeof(void *)); 336243502Savg *configs = new_configs; 337243502Savg *count = id + 1; 338243502Savg} 339243502Savg 340243502Savgstatic void 341243502Savgprocess_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg, 342243502Savg const char *name, uint64_t* known_pool_guid) 343243502Savg{ 344243502Savg nvlist_t *vdev_tree; 345243502Savg uint64_t pool_guid; 346243502Savg uint64_t vdev_guid, known_guid; 347243502Savg uint64_t id, txg, known_txg; 348241286Savg char *pname; 349243502Savg int i; 350241286Savg 351243502Savg if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 || 352241286Savg strcmp(pname, name) != 0) 353243502Savg goto ignore; 354241286Savg 355243502Savg if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0) 356243502Savg goto ignore; 357241286Savg 358243502Savg if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0) 359243502Savg goto ignore; 360241286Savg 361243502Savg if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) 362243502Savg goto ignore; 363243502Savg 364243502Savg if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0) 365243502Savg goto ignore; 366243502Savg 367243502Savg VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 368243502Savg 369243502Savg if (*known_pool_guid != 0) { 370243502Savg if (pool_guid != *known_pool_guid) 371243502Savg goto ignore; 372243502Savg } else 373243502Savg *known_pool_guid = pool_guid; 374243502Savg 375243502Savg resize_configs(configs, count, id); 376243502Savg 377243502Savg if ((*configs)[id] != NULL) { 378243502Savg VERIFY(nvlist_lookup_uint64((*configs)[id], 379243502Savg ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0); 380243502Savg if (txg <= known_txg) 381243502Savg goto ignore; 382243502Savg nvlist_free((*configs)[id]); 383243502Savg } 384243502Savg 385243502Savg (*configs)[id] = cfg; 386243502Savg return; 387243502Savg 388243502Savgignore: 389243502Savg nvlist_free(cfg); 390241286Savg} 391241286Savg 392241286Savgstatic int 393241286Savgvdev_geom_attach_taster(struct g_consumer *cp, struct g_provider *pp) 394241286Savg{ 395241286Savg int error; 396241286Savg 397241286Savg if (pp->flags & G_PF_WITHER) 398241286Savg return (EINVAL); 399241286Savg g_attach(cp, pp); 400241286Savg error = g_access(cp, 1, 0, 0); 401259168Smav if (error == 0) { 402259168Smav if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize)) 403259168Smav error = EINVAL; 404259168Smav else if (pp->mediasize < SPA_MINDEVSIZE) 405259168Smav error = EINVAL; 406259168Smav if (error != 0) 407259168Smav g_access(cp, -1, 0, 0); 408259168Smav } 409241286Savg if (error != 0) 410241286Savg g_detach(cp); 411241286Savg return (error); 412241286Savg} 413241286Savg 414169303Spjdstatic void 415242332Sdelphijvdev_geom_detach_taster(struct g_consumer *cp) 416169303Spjd{ 417241286Savg g_access(cp, -1, 0, 0); 418241286Savg g_detach(cp); 419241286Savg} 420169303Spjd 421241286Savgint 422243502Savgvdev_geom_read_pool_label(const char *name, 423243502Savg nvlist_t ***configs, uint64_t *count) 424241286Savg{ 425241286Savg struct g_class *mp; 426241286Savg struct g_geom *gp, *zgp; 427241286Savg struct g_provider *pp; 428241286Savg struct g_consumer *zcp; 429241286Savg nvlist_t *vdev_cfg; 430243502Savg uint64_t pool_guid; 431241286Savg int error; 432241286Savg 433241286Savg DROP_GIANT(); 434241286Savg g_topology_lock(); 435241286Savg 436241286Savg zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste"); 437241286Savg /* This orphan function should be never called. */ 438241286Savg zgp->orphan = vdev_geom_taste_orphan; 439241286Savg zcp = g_new_consumer(zgp); 440241286Savg 441243502Savg *configs = NULL; 442243502Savg *count = 0; 443243502Savg pool_guid = 0; 444241286Savg LIST_FOREACH(mp, &g_classes, class) { 445241286Savg if (mp == &zfs_vdev_class) 446241286Savg continue; 447241286Savg LIST_FOREACH(gp, &mp->geom, geom) { 448241286Savg if (gp->flags & G_GEOM_WITHER) 449241286Savg continue; 450241286Savg LIST_FOREACH(pp, &gp->provider, provider) { 451241286Savg if (pp->flags & G_PF_WITHER) 452241286Savg continue; 453241286Savg if (vdev_geom_attach_taster(zcp, pp) != 0) 454241286Savg continue; 455241286Savg g_topology_unlock(); 456241286Savg error = vdev_geom_read_config(zcp, &vdev_cfg); 457241286Savg g_topology_lock(); 458242332Sdelphij vdev_geom_detach_taster(zcp); 459241286Savg if (error) 460241286Savg continue; 461241286Savg ZFS_LOG(1, "successfully read vdev config"); 462241286Savg 463243502Savg process_vdev_config(configs, count, 464243502Savg vdev_cfg, name, &pool_guid); 465241286Savg } 466241286Savg } 467241286Savg } 468241286Savg 469241286Savg g_destroy_consumer(zcp); 470241286Savg g_destroy_geom(zgp); 471241286Savg g_topology_unlock(); 472241286Savg PICKUP_GIANT(); 473243502Savg 474243502Savg return (*count > 0 ? 0 : ENOENT); 475169303Spjd} 476169303Spjd 477292066Sasomersstatic void 478292066Sasomersvdev_geom_read_guids(struct g_consumer *cp, uint64_t *pguid, uint64_t *vguid) 479241286Savg{ 480241286Savg nvlist_t *config; 481241286Savg 482241286Savg g_topology_assert_not(); 483241286Savg 484292066Sasomers *pguid = 0; 485292066Sasomers *vguid = 0; 486241286Savg if (vdev_geom_read_config(cp, &config) == 0) { 487292066Sasomers nvlist_get_guids(config, pguid, vguid); 488241286Savg nvlist_free(config); 489241286Savg } 490241286Savg} 491241286Savg 492219089Spjdstatic struct g_consumer * 493292066Sasomersvdev_geom_attach_by_guids(uint64_t pool_guid, uint64_t vdev_guid) 494169303Spjd{ 495169303Spjd struct g_class *mp; 496169303Spjd struct g_geom *gp, *zgp; 497169303Spjd struct g_provider *pp; 498219089Spjd struct g_consumer *cp, *zcp; 499292066Sasomers uint64_t pguid, vguid; 500169303Spjd 501169303Spjd g_topology_assert(); 502169303Spjd 503169303Spjd zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste"); 504169303Spjd /* This orphan function should be never called. */ 505169303Spjd zgp->orphan = vdev_geom_taste_orphan; 506169303Spjd zcp = g_new_consumer(zgp); 507169303Spjd 508219089Spjd cp = NULL; 509169303Spjd LIST_FOREACH(mp, &g_classes, class) { 510169303Spjd if (mp == &zfs_vdev_class) 511169303Spjd continue; 512169303Spjd LIST_FOREACH(gp, &mp->geom, geom) { 513169303Spjd if (gp->flags & G_GEOM_WITHER) 514169303Spjd continue; 515169303Spjd LIST_FOREACH(pp, &gp->provider, provider) { 516241286Savg if (vdev_geom_attach_taster(zcp, pp) != 0) 517169303Spjd continue; 518169303Spjd g_topology_unlock(); 519292066Sasomers vdev_geom_read_guids(zcp, &pguid, &vguid); 520169303Spjd g_topology_lock(); 521242332Sdelphij vdev_geom_detach_taster(zcp); 522292066Sasomers if (pguid != pool_guid || vguid != vdev_guid) 523169303Spjd continue; 524219089Spjd cp = vdev_geom_attach(pp); 525219089Spjd if (cp == NULL) { 526292066Sasomers ZFS_LOG(1, "ZFS WARNING: Unable to " 527292066Sasomers "attach to %s.\n", pp->name); 528169303Spjd continue; 529169303Spjd } 530219089Spjd break; 531169303Spjd } 532219089Spjd if (cp != NULL) 533219089Spjd break; 534169303Spjd } 535219089Spjd if (cp != NULL) 536219089Spjd break; 537169303Spjd } 538169303Spjdend: 539169303Spjd g_destroy_consumer(zcp); 540169303Spjd g_destroy_geom(zgp); 541169303Spjd return (cp); 542169303Spjd} 543169303Spjd 544185029Spjdstatic struct g_consumer * 545292066Sasomersvdev_geom_open_by_guids(vdev_t *vd) 546168404Spjd{ 547185174Spjd struct g_consumer *cp; 548185174Spjd char *buf; 549185174Spjd size_t len; 550185174Spjd 551219089Spjd g_topology_assert(); 552219089Spjd 553185174Spjd ZFS_LOG(1, "Searching by guid [%ju].", (uintmax_t)vd->vdev_guid); 554292066Sasomers cp = vdev_geom_attach_by_guids(spa_guid(vd->vdev_spa), vd->vdev_guid); 555185174Spjd if (cp != NULL) { 556185174Spjd len = strlen(cp->provider->name) + strlen("/dev/") + 1; 557185174Spjd buf = kmem_alloc(len, KM_SLEEP); 558197842Spjd 559185174Spjd snprintf(buf, len, "/dev/%s", cp->provider->name); 560185174Spjd spa_strfree(vd->vdev_path); 561185174Spjd vd->vdev_path = buf; 562185174Spjd 563292066Sasomers ZFS_LOG(1, "Attach by guid [%ju:%ju] succeeded, provider %s.", 564292066Sasomers (uintmax_t)spa_guid(vd->vdev_spa), 565185174Spjd (uintmax_t)vd->vdev_guid, vd->vdev_path); 566185174Spjd } else { 567292066Sasomers ZFS_LOG(1, "Search by guid [%ju:%ju] failed.", 568292066Sasomers (uintmax_t)spa_guid(vd->vdev_spa), 569185174Spjd (uintmax_t)vd->vdev_guid); 570185174Spjd } 571185174Spjd 572185174Spjd return (cp); 573185174Spjd} 574185174Spjd 575185174Spjdstatic struct g_consumer * 576200158Spjdvdev_geom_open_by_path(vdev_t *vd, int check_guid) 577185174Spjd{ 578168404Spjd struct g_provider *pp; 579168404Spjd struct g_consumer *cp; 580292066Sasomers uint64_t pguid, vguid; 581168404Spjd 582219089Spjd g_topology_assert(); 583219089Spjd 584169303Spjd cp = NULL; 585168404Spjd pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1); 586169303Spjd if (pp != NULL) { 587169303Spjd ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path); 588203504Spjd cp = vdev_geom_attach(pp); 589218278Sae if (cp != NULL && check_guid && ISP2(pp->sectorsize) && 590218278Sae pp->sectorsize <= VDEV_PAD_SIZE) { 591169303Spjd g_topology_unlock(); 592292066Sasomers vdev_geom_read_guids(cp, &pguid, &vguid); 593169303Spjd g_topology_lock(); 594292066Sasomers if (pguid != spa_guid(vd->vdev_spa) || 595292066Sasomers vguid != vd->vdev_guid) { 596169303Spjd vdev_geom_detach(cp, 0); 597169303Spjd cp = NULL; 598185174Spjd ZFS_LOG(1, "guid mismatch for provider %s: " 599292066Sasomers "%ju:%ju != %ju:%ju.", vd->vdev_path, 600292066Sasomers (uintmax_t)spa_guid(vd->vdev_spa), 601292066Sasomers (uintmax_t)vd->vdev_guid, 602292066Sasomers (uintmax_t)pguid, (uintmax_t)vguid); 603185174Spjd } else { 604185174Spjd ZFS_LOG(1, "guid match for provider %s.", 605185029Spjd vd->vdev_path); 606185174Spjd } 607169303Spjd } 608168404Spjd } 609169303Spjd 610185029Spjd return (cp); 611185029Spjd} 612169303Spjd 613185029Spjdstatic int 614236155Smmvdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, 615254591Sgibbs uint64_t *logical_ashift, uint64_t *physical_ashift) 616185029Spjd{ 617185029Spjd struct g_provider *pp; 618185029Spjd struct g_consumer *cp; 619219089Spjd size_t bufsize; 620224791Spjd int error; 621185029Spjd 622185029Spjd /* 623185029Spjd * We must have a pathname, and it must be absolute. 624185029Spjd */ 625185029Spjd if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { 626185029Spjd vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 627185029Spjd return (EINVAL); 628185029Spjd } 629185029Spjd 630185029Spjd vd->vdev_tsd = NULL; 631185029Spjd 632219089Spjd DROP_GIANT(); 633219089Spjd g_topology_lock(); 634203504Spjd error = 0; 635205346Spjd 636292066Sasomers if (vd->vdev_spa->spa_splitting_newspa || 637292066Sasomers (vd->vdev_prevstate == VDEV_STATE_UNKNOWN && 638292066Sasomers vd->vdev_spa->spa_load_state == SPA_LOAD_NONE)) { 639292066Sasomers /* 640292066Sasomers * We are dealing with a vdev that hasn't been previously 641292066Sasomers * opened (since boot), and we are not loading an 642292066Sasomers * existing pool configuration. This looks like a 643292066Sasomers * vdev add operation to a new or existing pool. 644292066Sasomers * Assume the user knows what he/she is doing and find 645292066Sasomers * GEOM provider by its name, ignoring GUID mismatches. 646292066Sasomers * 647292066Sasomers * XXPOLICY: It would be safer to only allow a device 648292066Sasomers * that is unlabeled or labeled but missing 649292066Sasomers * GUID information to be opened in this fashion, 650292066Sasomers * unless we are doing a split, in which case we 651292066Sasomers * should allow any guid. 652292066Sasomers */ 653205346Spjd cp = vdev_geom_open_by_path(vd, 0); 654292066Sasomers } else { 655292066Sasomers /* 656292066Sasomers * Try using the recorded path for this device, but only 657292066Sasomers * accept it if its label data contains the expected GUIDs. 658292066Sasomers */ 659205346Spjd cp = vdev_geom_open_by_path(vd, 1); 660205346Spjd if (cp == NULL) { 661205346Spjd /* 662205346Spjd * The device at vd->vdev_path doesn't have the 663292066Sasomers * expected GUIDs. The disks might have merely 664205346Spjd * moved around so try all other GEOM providers 665292066Sasomers * to find one with the right GUIDs. 666205346Spjd */ 667292066Sasomers cp = vdev_geom_open_by_guids(vd); 668205346Spjd } 669169303Spjd } 670205346Spjd 671185174Spjd if (cp == NULL) { 672185174Spjd ZFS_LOG(1, "Provider %s not found.", vd->vdev_path); 673203504Spjd error = ENOENT; 674218278Sae } else if (cp->provider->sectorsize > VDEV_PAD_SIZE || 675218278Sae !ISP2(cp->provider->sectorsize)) { 676218278Sae ZFS_LOG(1, "Provider %s has unsupported sectorsize.", 677218278Sae vd->vdev_path); 678218278Sae vdev_geom_detach(cp, 0); 679218278Sae error = EINVAL; 680218278Sae cp = NULL; 681209962Smm } else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) { 682207936Spjd int i; 683207936Spjd 684207936Spjd for (i = 0; i < 5; i++) { 685207936Spjd error = g_access(cp, 0, 1, 0); 686207936Spjd if (error == 0) 687207936Spjd break; 688207936Spjd g_topology_unlock(); 689207936Spjd tsleep(vd, 0, "vdev", hz / 2); 690207936Spjd g_topology_lock(); 691207936Spjd } 692203504Spjd if (error != 0) { 693207934Spjd printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n", 694203504Spjd vd->vdev_path, error); 695203504Spjd vdev_geom_detach(cp, 0); 696203504Spjd cp = NULL; 697203504Spjd } 698185174Spjd } 699219089Spjd g_topology_unlock(); 700219089Spjd PICKUP_GIANT(); 701203504Spjd if (cp == NULL) { 702203504Spjd vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 703203504Spjd return (error); 704203504Spjd } 705185029Spjd 706185029Spjd cp->private = vd; 707208142Spjd vd->vdev_tsd = cp; 708169303Spjd pp = cp->provider; 709168404Spjd 710168404Spjd /* 711168404Spjd * Determine the actual size of the device. 712168404Spjd */ 713236155Smm *max_psize = *psize = pp->mediasize; 714168404Spjd 715168404Spjd /* 716254591Sgibbs * Determine the device's minimum transfer size and preferred 717254591Sgibbs * transfer size. 718168404Spjd */ 719254591Sgibbs *logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1; 720254591Sgibbs *physical_ashift = 0; 721254591Sgibbs if (pp->stripesize) 722254591Sgibbs *physical_ashift = highbit(pp->stripesize) - 1; 723168404Spjd 724168404Spjd /* 725240868Spjd * Clear the nowritecache settings, so that on a vdev_reopen() 726240868Spjd * we will try again. 727168404Spjd */ 728168404Spjd vd->vdev_nowritecache = B_FALSE; 729168404Spjd 730219089Spjd if (vd->vdev_physpath != NULL) 731219089Spjd spa_strfree(vd->vdev_physpath); 732219089Spjd bufsize = sizeof("/dev/") + strlen(pp->name); 733219089Spjd vd->vdev_physpath = kmem_alloc(bufsize, KM_SLEEP); 734219089Spjd snprintf(vd->vdev_physpath, bufsize, "/dev/%s", pp->name); 735219089Spjd 736256956Ssmh /* 737256956Ssmh * Determine the device's rotation rate. 738256956Ssmh */ 739256956Ssmh vdev_geom_set_rotation_rate(vd, cp); 740256956Ssmh 741168404Spjd return (0); 742168404Spjd} 743168404Spjd 744168404Spjdstatic void 745168404Spjdvdev_geom_close(vdev_t *vd) 746168404Spjd{ 747168404Spjd struct g_consumer *cp; 748168404Spjd 749208142Spjd cp = vd->vdev_tsd; 750208142Spjd if (cp == NULL) 751168404Spjd return; 752208142Spjd vd->vdev_tsd = NULL; 753219089Spjd vd->vdev_delayed_close = B_FALSE; 754253754Smav cp->private = NULL; /* XXX locking */ 755168404Spjd g_post_event(vdev_geom_detach, cp, M_WAITOK, NULL); 756168404Spjd} 757168404Spjd 758168404Spjdstatic void 759168404Spjdvdev_geom_io_intr(struct bio *bp) 760168404Spjd{ 761219089Spjd vdev_t *vd; 762168404Spjd zio_t *zio; 763168404Spjd 764168404Spjd zio = bp->bio_caller1; 765219089Spjd vd = zio->io_vd; 766208142Spjd zio->io_error = bp->bio_error; 767208142Spjd if (zio->io_error == 0 && bp->bio_resid != 0) 768269407Ssmh zio->io_error = SET_ERROR(EIO); 769264885Ssmh 770264885Ssmh switch(zio->io_error) { 771264885Ssmh case ENOTSUP: 772208142Spjd /* 773264885Ssmh * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know 774264885Ssmh * that future attempts will never succeed. In this case 775264885Ssmh * we set a persistent flag so that we don't bother with 776264885Ssmh * requests in the future. 777208142Spjd */ 778264885Ssmh switch(bp->bio_cmd) { 779264885Ssmh case BIO_FLUSH: 780264885Ssmh vd->vdev_nowritecache = B_TRUE; 781264885Ssmh break; 782264885Ssmh case BIO_DELETE: 783264885Ssmh vd->vdev_notrim = B_TRUE; 784264885Ssmh break; 785219089Spjd } 786264885Ssmh break; 787264885Ssmh case ENXIO: 788264885Ssmh if (!vd->vdev_remove_wanted) { 789264885Ssmh /* 790264885Ssmh * If provider's error is set we assume it is being 791264885Ssmh * removed. 792264885Ssmh */ 793264885Ssmh if (bp->bio_to->error != 0) { 794264885Ssmh vd->vdev_remove_wanted = B_TRUE; 795264885Ssmh spa_async_request(zio->io_spa, 796264885Ssmh SPA_ASYNC_REMOVE); 797264885Ssmh } else if (!vd->vdev_delayed_close) { 798264885Ssmh vd->vdev_delayed_close = B_TRUE; 799264885Ssmh } 800264885Ssmh } 801264885Ssmh break; 802219089Spjd } 803208142Spjd g_destroy_bio(bp); 804208142Spjd zio_interrupt(zio); 805168404Spjd} 806168404Spjd 807274304Sdelphijstatic void 808168404Spjdvdev_geom_io_start(zio_t *zio) 809168404Spjd{ 810168404Spjd vdev_t *vd; 811168404Spjd struct g_consumer *cp; 812168404Spjd struct bio *bp; 813168404Spjd int error; 814168404Spjd 815168404Spjd vd = zio->io_vd; 816168404Spjd 817265152Ssmh switch (zio->io_type) { 818265152Ssmh case ZIO_TYPE_IOCTL: 819168404Spjd /* XXPOLICY */ 820185029Spjd if (!vdev_readable(vd)) { 821265152Ssmh zio->io_error = SET_ERROR(ENXIO); 822274304Sdelphij zio_interrupt(zio); 823274304Sdelphij return; 824269407Ssmh } else { 825269407Ssmh switch (zio->io_cmd) { 826269407Ssmh case DKIOCFLUSHWRITECACHE: 827269407Ssmh if (zfs_nocacheflush || vdev_geom_bio_flush_disable) 828269407Ssmh break; 829269407Ssmh if (vd->vdev_nowritecache) { 830269407Ssmh zio->io_error = SET_ERROR(ENOTSUP); 831269407Ssmh break; 832269407Ssmh } 833269407Ssmh goto sendreq; 834269407Ssmh default: 835265152Ssmh zio->io_error = SET_ERROR(ENOTSUP); 836168404Spjd } 837168404Spjd } 838168404Spjd 839274304Sdelphij zio_execute(zio); 840274304Sdelphij return; 841265152Ssmh case ZIO_TYPE_FREE: 842265152Ssmh if (vd->vdev_notrim) { 843265152Ssmh zio->io_error = SET_ERROR(ENOTSUP); 844269407Ssmh } else if (!vdev_geom_bio_delete_disable) { 845269407Ssmh goto sendreq; 846265152Ssmh } 847274304Sdelphij zio_execute(zio); 848274304Sdelphij return; 849168404Spjd } 850168404Spjdsendreq: 851274619Ssmh ASSERT(zio->io_type == ZIO_TYPE_READ || 852274619Ssmh zio->io_type == ZIO_TYPE_WRITE || 853274619Ssmh zio->io_type == ZIO_TYPE_FREE || 854274619Ssmh zio->io_type == ZIO_TYPE_IOCTL); 855274619Ssmh 856208142Spjd cp = vd->vdev_tsd; 857185029Spjd if (cp == NULL) { 858265152Ssmh zio->io_error = SET_ERROR(ENXIO); 859269407Ssmh zio_interrupt(zio); 860274304Sdelphij return; 861168404Spjd } 862168404Spjd bp = g_alloc_bio(); 863168404Spjd bp->bio_caller1 = zio; 864168404Spjd switch (zio->io_type) { 865168404Spjd case ZIO_TYPE_READ: 866168404Spjd case ZIO_TYPE_WRITE: 867168404Spjd bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE; 868168404Spjd bp->bio_data = zio->io_data; 869168404Spjd bp->bio_offset = zio->io_offset; 870168404Spjd bp->bio_length = zio->io_size; 871168404Spjd break; 872265152Ssmh case ZIO_TYPE_FREE: 873265152Ssmh bp->bio_cmd = BIO_DELETE; 874265152Ssmh bp->bio_data = NULL; 875265152Ssmh bp->bio_offset = zio->io_offset; 876265152Ssmh bp->bio_length = zio->io_size; 877265152Ssmh break; 878168404Spjd case ZIO_TYPE_IOCTL: 879269407Ssmh bp->bio_cmd = BIO_FLUSH; 880269407Ssmh bp->bio_flags |= BIO_ORDERED; 881269407Ssmh bp->bio_data = NULL; 882269407Ssmh bp->bio_offset = cp->provider->mediasize; 883269407Ssmh bp->bio_length = 0; 884168404Spjd break; 885168404Spjd } 886168404Spjd bp->bio_done = vdev_geom_io_intr; 887168404Spjd 888168404Spjd g_io_request(bp, cp); 889168404Spjd} 890168404Spjd 891168404Spjdstatic void 892168404Spjdvdev_geom_io_done(zio_t *zio) 893168404Spjd{ 894168404Spjd} 895168404Spjd 896219089Spjdstatic void 897219089Spjdvdev_geom_hold(vdev_t *vd) 898219089Spjd{ 899219089Spjd} 900219089Spjd 901219089Spjdstatic void 902219089Spjdvdev_geom_rele(vdev_t *vd) 903219089Spjd{ 904219089Spjd} 905219089Spjd 906168404Spjdvdev_ops_t vdev_geom_ops = { 907168404Spjd vdev_geom_open, 908168404Spjd vdev_geom_close, 909168404Spjd vdev_default_asize, 910168404Spjd vdev_geom_io_start, 911168404Spjd vdev_geom_io_done, 912168404Spjd NULL, 913219089Spjd vdev_geom_hold, 914219089Spjd vdev_geom_rele, 915168404Spjd VDEV_TYPE_DISK, /* name of this vdev type */ 916168404Spjd B_TRUE /* leaf vdev */ 917168404Spjd}; 918