Deleted Added
full compact
vdev_geom.c (294027) vdev_geom.c (294329)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
23 * All rights reserved.
24 *
25 * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>
26 */
27
28#include <sys/zfs_context.h>
29#include <sys/param.h>
30#include <sys/kernel.h>
31#include <sys/bio.h>
32#include <sys/disk.h>
33#include <sys/spa.h>
34#include <sys/spa_impl.h>
35#include <sys/vdev_impl.h>
36#include <sys/fs/zfs.h>
37#include <sys/zio.h>
38#include <geom/geom.h>
39#include <geom/geom_int.h>
40
41/*
42 * Virtual device vector for GEOM.
43 */
44
45static g_attrchanged_t vdev_geom_attrchanged;
46struct g_class zfs_vdev_class = {
47 .name = "ZFS::VDEV",
48 .version = G_VERSION,
49 .attrchanged = vdev_geom_attrchanged,
50};
51
52DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev);
53
54SYSCTL_DECL(_vfs_zfs_vdev);
55/* Don't send BIO_FLUSH. */
56static int vdev_geom_bio_flush_disable;
57SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RWTUN,
58 &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH");
59/* Don't send BIO_DELETE. */
60static int vdev_geom_bio_delete_disable;
61SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RWTUN,
62 &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE");
63
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
23 * All rights reserved.
24 *
25 * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>
26 */
27
28#include <sys/zfs_context.h>
29#include <sys/param.h>
30#include <sys/kernel.h>
31#include <sys/bio.h>
32#include <sys/disk.h>
33#include <sys/spa.h>
34#include <sys/spa_impl.h>
35#include <sys/vdev_impl.h>
36#include <sys/fs/zfs.h>
37#include <sys/zio.h>
38#include <geom/geom.h>
39#include <geom/geom_int.h>
40
41/*
42 * Virtual device vector for GEOM.
43 */
44
45static g_attrchanged_t vdev_geom_attrchanged;
46struct g_class zfs_vdev_class = {
47 .name = "ZFS::VDEV",
48 .version = G_VERSION,
49 .attrchanged = vdev_geom_attrchanged,
50};
51
52DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev);
53
54SYSCTL_DECL(_vfs_zfs_vdev);
55/* Don't send BIO_FLUSH. */
56static int vdev_geom_bio_flush_disable;
57SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RWTUN,
58 &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH");
59/* Don't send BIO_DELETE. */
60static int vdev_geom_bio_delete_disable;
61SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RWTUN,
62 &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE");
63
64/*
65 * Thread local storage used to indicate when a thread is probing geoms
66 * for their guids. If NULL, this thread is not tasting geoms. If non NULL,
67 * it is looking for a replacement for the vdev_t* that is its value.
68 */
69uint_t zfs_geom_probe_vdev_key;
70
64static void
65vdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp)
66{
67 int error;
68 uint16_t rate;
69
70 error = g_getattr("GEOM::rotation_rate", cp, &rate);
71 if (error == 0)
72 vd->vdev_rotation_rate = rate;
73 else
74 vd->vdev_rotation_rate = VDEV_RATE_UNKNOWN;
75}
76
77static void
78vdev_geom_attrchanged(struct g_consumer *cp, const char *attr)
79{
80 vdev_t *vd;
81 spa_t *spa;
82 char *physpath;
83 int error, physpath_len;
84
85 vd = cp->private;
86 if (vd == NULL)
87 return;
88
89 if (strcmp(attr, "GEOM::rotation_rate") == 0) {
90 vdev_geom_set_rotation_rate(vd, cp);
91 return;
92 }
93
94 if (strcmp(attr, "GEOM::physpath") != 0)
95 return;
96
97 if (g_access(cp, 1, 0, 0) != 0)
98 return;
99
100 /*
101 * Record/Update physical path information for this device.
102 */
103 spa = vd->vdev_spa;
104 physpath_len = MAXPATHLEN;
105 physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO);
106 error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath);
107 g_access(cp, -1, 0, 0);
108 if (error == 0) {
109 char *old_physpath;
110
111 old_physpath = vd->vdev_physpath;
112 vd->vdev_physpath = spa_strdup(physpath);
113 spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
114
115 if (old_physpath != NULL) {
116 int held_lock;
117
118 held_lock = spa_config_held(spa, SCL_STATE, RW_WRITER);
119 if (held_lock == 0) {
120 g_topology_unlock();
121 spa_config_enter(spa, SCL_STATE, FTAG,
122 RW_WRITER);
123 }
124
125 spa_strfree(old_physpath);
126
127 if (held_lock == 0) {
128 spa_config_exit(spa, SCL_STATE, FTAG);
129 g_topology_lock();
130 }
131 }
132 }
133 g_free(physpath);
134}
135
136static void
137vdev_geom_orphan(struct g_consumer *cp)
138{
139 vdev_t *vd;
140
141 g_topology_assert();
142
143 vd = cp->private;
144 if (vd == NULL) {
145 /* Vdev close in progress. Ignore the event. */
146 return;
147 }
148
149 /*
150 * Orphan callbacks occur from the GEOM event thread.
151 * Concurrent with this call, new I/O requests may be
152 * working their way through GEOM about to find out
153 * (only once executed by the g_down thread) that we've
154 * been orphaned from our disk provider. These I/Os
155 * must be retired before we can detach our consumer.
156 * This is most easily achieved by acquiring the
157 * SPA ZIO configuration lock as a writer, but doing
158 * so with the GEOM topology lock held would cause
159 * a lock order reversal. Instead, rely on the SPA's
160 * async removal support to invoke a close on this
161 * vdev once it is safe to do so.
162 */
163 vd->vdev_remove_wanted = B_TRUE;
164 spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE);
165}
166
167static struct g_consumer *
168vdev_geom_attach(struct g_provider *pp, vdev_t *vd)
169{
170 struct g_geom *gp;
171 struct g_consumer *cp;
172
173 g_topology_assert();
174
175 ZFS_LOG(1, "Attaching to %s.", pp->name);
176 /* Do we have geom already? No? Create one. */
177 LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) {
178 if (gp->flags & G_GEOM_WITHER)
179 continue;
180 if (strcmp(gp->name, "zfs::vdev") != 0)
181 continue;
182 break;
183 }
184 if (gp == NULL) {
185 gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev");
186 gp->orphan = vdev_geom_orphan;
187 gp->attrchanged = vdev_geom_attrchanged;
188 cp = g_new_consumer(gp);
189 if (g_attach(cp, pp) != 0) {
190 g_wither_geom(gp, ENXIO);
191 return (NULL);
192 }
193 if (g_access(cp, 1, 0, 1) != 0) {
194 g_wither_geom(gp, ENXIO);
195 return (NULL);
196 }
197 ZFS_LOG(1, "Created geom and consumer for %s.", pp->name);
198 } else {
199 /* Check if we are already connected to this provider. */
200 LIST_FOREACH(cp, &gp->consumer, consumer) {
201 if (cp->provider == pp) {
202 ZFS_LOG(1, "Found consumer for %s.", pp->name);
203 break;
204 }
205 }
206 if (cp == NULL) {
207 cp = g_new_consumer(gp);
208 if (g_attach(cp, pp) != 0) {
209 g_destroy_consumer(cp);
210 return (NULL);
211 }
212 if (g_access(cp, 1, 0, 1) != 0) {
213 g_detach(cp);
214 g_destroy_consumer(cp);
215 return (NULL);
216 }
217 ZFS_LOG(1, "Created consumer for %s.", pp->name);
218 } else {
219 if (g_access(cp, 1, 0, 1) != 0)
220 return (NULL);
221 ZFS_LOG(1, "Used existing consumer for %s.", pp->name);
222 }
223 }
224
225 /*
226 * BUG: cp may already belong to a vdev. This could happen if:
227 * 1) That vdev is a shared spare, or
228 * 2) We are trying to reopen a missing vdev and we are scanning by
229 * guid. In that case, we'll ultimately fail to open this consumer,
230 * but not until after setting the private field.
231 * The solution is to:
232 * 1) Don't set the private field until after the open succeeds, and
233 * 2) Set it to a linked list of vdevs, not just a single vdev
234 */
235 cp->private = vd;
236 vd->vdev_tsd = cp;
237
238 /* Fetch initial physical path information for this device. */
239 vdev_geom_attrchanged(cp, "GEOM::physpath");
240
241 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
242 return (cp);
243}
244
245static void
246vdev_geom_close_locked(vdev_t *vd)
247{
248 struct g_geom *gp;
249 struct g_consumer *cp;
250
251 g_topology_assert();
252
253 cp = vd->vdev_tsd;
254 if (cp == NULL)
255 return;
256
257 ZFS_LOG(1, "Closing access to %s.", cp->provider->name);
258 KASSERT(vd->vdev_tsd == cp, ("%s: vdev_tsd is not cp", __func__));
259 vd->vdev_tsd = NULL;
260 vd->vdev_delayed_close = B_FALSE;
261 cp->private = NULL;
262
263 gp = cp->geom;
264 g_access(cp, -1, 0, -1);
265 /* Destroy consumer on last close. */
266 if (cp->acr == 0 && cp->ace == 0) {
267 if (cp->acw > 0)
268 g_access(cp, 0, -cp->acw, 0);
269 if (cp->provider != NULL) {
270 ZFS_LOG(1, "Destroyed consumer to %s.",
271 cp->provider->name);
272 g_detach(cp);
273 }
274 g_destroy_consumer(cp);
275 }
276 /* Destroy geom if there are no consumers left. */
277 if (LIST_EMPTY(&gp->consumer)) {
278 ZFS_LOG(1, "Destroyed geom %s.", gp->name);
279 g_wither_geom(gp, ENXIO);
280 }
281}
282
283static void
284nvlist_get_guids(nvlist_t *list, uint64_t *pguid, uint64_t *vguid)
285{
286
287 nvlist_lookup_uint64(list, ZPOOL_CONFIG_GUID, vguid);
288 nvlist_lookup_uint64(list, ZPOOL_CONFIG_POOL_GUID, pguid);
289}
290
291static int
292vdev_geom_io(struct g_consumer *cp, int cmd, void *data, off_t offset, off_t size)
293{
294 struct bio *bp;
295 u_char *p;
296 off_t off, maxio;
297 int error;
298
299 ASSERT((offset % cp->provider->sectorsize) == 0);
300 ASSERT((size % cp->provider->sectorsize) == 0);
301
302 bp = g_alloc_bio();
303 off = offset;
304 offset += size;
305 p = data;
306 maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize);
307 error = 0;
308
309 for (; off < offset; off += maxio, p += maxio, size -= maxio) {
310 bzero(bp, sizeof(*bp));
311 bp->bio_cmd = cmd;
312 bp->bio_done = NULL;
313 bp->bio_offset = off;
314 bp->bio_length = MIN(size, maxio);
315 bp->bio_data = p;
316 g_io_request(bp, cp);
317 error = biowait(bp, "vdev_geom_io");
318 if (error != 0)
319 break;
320 }
321
322 g_destroy_bio(bp);
323 return (error);
324}
325
326static void
327vdev_geom_taste_orphan(struct g_consumer *cp)
328{
71static void
72vdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp)
73{
74 int error;
75 uint16_t rate;
76
77 error = g_getattr("GEOM::rotation_rate", cp, &rate);
78 if (error == 0)
79 vd->vdev_rotation_rate = rate;
80 else
81 vd->vdev_rotation_rate = VDEV_RATE_UNKNOWN;
82}
83
84static void
85vdev_geom_attrchanged(struct g_consumer *cp, const char *attr)
86{
87 vdev_t *vd;
88 spa_t *spa;
89 char *physpath;
90 int error, physpath_len;
91
92 vd = cp->private;
93 if (vd == NULL)
94 return;
95
96 if (strcmp(attr, "GEOM::rotation_rate") == 0) {
97 vdev_geom_set_rotation_rate(vd, cp);
98 return;
99 }
100
101 if (strcmp(attr, "GEOM::physpath") != 0)
102 return;
103
104 if (g_access(cp, 1, 0, 0) != 0)
105 return;
106
107 /*
108 * Record/Update physical path information for this device.
109 */
110 spa = vd->vdev_spa;
111 physpath_len = MAXPATHLEN;
112 physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO);
113 error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath);
114 g_access(cp, -1, 0, 0);
115 if (error == 0) {
116 char *old_physpath;
117
118 old_physpath = vd->vdev_physpath;
119 vd->vdev_physpath = spa_strdup(physpath);
120 spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
121
122 if (old_physpath != NULL) {
123 int held_lock;
124
125 held_lock = spa_config_held(spa, SCL_STATE, RW_WRITER);
126 if (held_lock == 0) {
127 g_topology_unlock();
128 spa_config_enter(spa, SCL_STATE, FTAG,
129 RW_WRITER);
130 }
131
132 spa_strfree(old_physpath);
133
134 if (held_lock == 0) {
135 spa_config_exit(spa, SCL_STATE, FTAG);
136 g_topology_lock();
137 }
138 }
139 }
140 g_free(physpath);
141}
142
143static void
144vdev_geom_orphan(struct g_consumer *cp)
145{
146 vdev_t *vd;
147
148 g_topology_assert();
149
150 vd = cp->private;
151 if (vd == NULL) {
152 /* Vdev close in progress. Ignore the event. */
153 return;
154 }
155
156 /*
157 * Orphan callbacks occur from the GEOM event thread.
158 * Concurrent with this call, new I/O requests may be
159 * working their way through GEOM about to find out
160 * (only once executed by the g_down thread) that we've
161 * been orphaned from our disk provider. These I/Os
162 * must be retired before we can detach our consumer.
163 * This is most easily achieved by acquiring the
164 * SPA ZIO configuration lock as a writer, but doing
165 * so with the GEOM topology lock held would cause
166 * a lock order reversal. Instead, rely on the SPA's
167 * async removal support to invoke a close on this
168 * vdev once it is safe to do so.
169 */
170 vd->vdev_remove_wanted = B_TRUE;
171 spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE);
172}
173
174static struct g_consumer *
175vdev_geom_attach(struct g_provider *pp, vdev_t *vd)
176{
177 struct g_geom *gp;
178 struct g_consumer *cp;
179
180 g_topology_assert();
181
182 ZFS_LOG(1, "Attaching to %s.", pp->name);
183 /* Do we have geom already? No? Create one. */
184 LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) {
185 if (gp->flags & G_GEOM_WITHER)
186 continue;
187 if (strcmp(gp->name, "zfs::vdev") != 0)
188 continue;
189 break;
190 }
191 if (gp == NULL) {
192 gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev");
193 gp->orphan = vdev_geom_orphan;
194 gp->attrchanged = vdev_geom_attrchanged;
195 cp = g_new_consumer(gp);
196 if (g_attach(cp, pp) != 0) {
197 g_wither_geom(gp, ENXIO);
198 return (NULL);
199 }
200 if (g_access(cp, 1, 0, 1) != 0) {
201 g_wither_geom(gp, ENXIO);
202 return (NULL);
203 }
204 ZFS_LOG(1, "Created geom and consumer for %s.", pp->name);
205 } else {
206 /* Check if we are already connected to this provider. */
207 LIST_FOREACH(cp, &gp->consumer, consumer) {
208 if (cp->provider == pp) {
209 ZFS_LOG(1, "Found consumer for %s.", pp->name);
210 break;
211 }
212 }
213 if (cp == NULL) {
214 cp = g_new_consumer(gp);
215 if (g_attach(cp, pp) != 0) {
216 g_destroy_consumer(cp);
217 return (NULL);
218 }
219 if (g_access(cp, 1, 0, 1) != 0) {
220 g_detach(cp);
221 g_destroy_consumer(cp);
222 return (NULL);
223 }
224 ZFS_LOG(1, "Created consumer for %s.", pp->name);
225 } else {
226 if (g_access(cp, 1, 0, 1) != 0)
227 return (NULL);
228 ZFS_LOG(1, "Used existing consumer for %s.", pp->name);
229 }
230 }
231
232 /*
233 * BUG: cp may already belong to a vdev. This could happen if:
234 * 1) That vdev is a shared spare, or
235 * 2) We are trying to reopen a missing vdev and we are scanning by
236 * guid. In that case, we'll ultimately fail to open this consumer,
237 * but not until after setting the private field.
238 * The solution is to:
239 * 1) Don't set the private field until after the open succeeds, and
240 * 2) Set it to a linked list of vdevs, not just a single vdev
241 */
242 cp->private = vd;
243 vd->vdev_tsd = cp;
244
245 /* Fetch initial physical path information for this device. */
246 vdev_geom_attrchanged(cp, "GEOM::physpath");
247
248 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
249 return (cp);
250}
251
252static void
253vdev_geom_close_locked(vdev_t *vd)
254{
255 struct g_geom *gp;
256 struct g_consumer *cp;
257
258 g_topology_assert();
259
260 cp = vd->vdev_tsd;
261 if (cp == NULL)
262 return;
263
264 ZFS_LOG(1, "Closing access to %s.", cp->provider->name);
265 KASSERT(vd->vdev_tsd == cp, ("%s: vdev_tsd is not cp", __func__));
266 vd->vdev_tsd = NULL;
267 vd->vdev_delayed_close = B_FALSE;
268 cp->private = NULL;
269
270 gp = cp->geom;
271 g_access(cp, -1, 0, -1);
272 /* Destroy consumer on last close. */
273 if (cp->acr == 0 && cp->ace == 0) {
274 if (cp->acw > 0)
275 g_access(cp, 0, -cp->acw, 0);
276 if (cp->provider != NULL) {
277 ZFS_LOG(1, "Destroyed consumer to %s.",
278 cp->provider->name);
279 g_detach(cp);
280 }
281 g_destroy_consumer(cp);
282 }
283 /* Destroy geom if there are no consumers left. */
284 if (LIST_EMPTY(&gp->consumer)) {
285 ZFS_LOG(1, "Destroyed geom %s.", gp->name);
286 g_wither_geom(gp, ENXIO);
287 }
288}
289
290static void
291nvlist_get_guids(nvlist_t *list, uint64_t *pguid, uint64_t *vguid)
292{
293
294 nvlist_lookup_uint64(list, ZPOOL_CONFIG_GUID, vguid);
295 nvlist_lookup_uint64(list, ZPOOL_CONFIG_POOL_GUID, pguid);
296}
297
298static int
299vdev_geom_io(struct g_consumer *cp, int cmd, void *data, off_t offset, off_t size)
300{
301 struct bio *bp;
302 u_char *p;
303 off_t off, maxio;
304 int error;
305
306 ASSERT((offset % cp->provider->sectorsize) == 0);
307 ASSERT((size % cp->provider->sectorsize) == 0);
308
309 bp = g_alloc_bio();
310 off = offset;
311 offset += size;
312 p = data;
313 maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize);
314 error = 0;
315
316 for (; off < offset; off += maxio, p += maxio, size -= maxio) {
317 bzero(bp, sizeof(*bp));
318 bp->bio_cmd = cmd;
319 bp->bio_done = NULL;
320 bp->bio_offset = off;
321 bp->bio_length = MIN(size, maxio);
322 bp->bio_data = p;
323 g_io_request(bp, cp);
324 error = biowait(bp, "vdev_geom_io");
325 if (error != 0)
326 break;
327 }
328
329 g_destroy_bio(bp);
330 return (error);
331}
332
333static void
334vdev_geom_taste_orphan(struct g_consumer *cp)
335{
329
330 KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
331 cp->provider->name));
336 ZFS_LOG(0, "WARNING: Orphan %s while tasting its VDev GUID.",
337 cp->provider->name);
332}
333
334static int
335vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config)
336{
337 struct g_provider *pp;
338 vdev_label_t *label;
339 char *p, *buf;
340 size_t buflen;
341 uint64_t psize;
342 off_t offset, size;
343 uint64_t state, txg;
344 int error, l, len;
345
346 g_topology_assert_not();
347
348 pp = cp->provider;
349 ZFS_LOG(1, "Reading config from %s...", pp->name);
350
351 psize = pp->mediasize;
352 psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t));
353
354 size = sizeof(*label) + pp->sectorsize -
355 ((sizeof(*label) - 1) % pp->sectorsize) - 1;
356
357 label = kmem_alloc(size, KM_SLEEP);
358 buflen = sizeof(label->vl_vdev_phys.vp_nvlist);
359
360 *config = NULL;
361 for (l = 0; l < VDEV_LABELS; l++) {
362
363 offset = vdev_label_offset(psize, l, 0);
364 if ((offset % pp->sectorsize) != 0)
365 continue;
366
367 if (vdev_geom_io(cp, BIO_READ, label, offset, size) != 0)
368 continue;
369 buf = label->vl_vdev_phys.vp_nvlist;
370
371 if (nvlist_unpack(buf, buflen, config, 0) != 0)
372 continue;
373
374 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
375 &state) != 0 || state > POOL_STATE_L2CACHE) {
376 nvlist_free(*config);
377 *config = NULL;
378 continue;
379 }
380
381 if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
382 (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
383 &txg) != 0 || txg == 0)) {
384 nvlist_free(*config);
385 *config = NULL;
386 continue;
387 }
388
389 break;
390 }
391
392 kmem_free(label, size);
393 return (*config == NULL ? ENOENT : 0);
394}
395
396static void
397resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id)
398{
399 nvlist_t **new_configs;
400 uint64_t i;
401
402 if (id < *count)
403 return;
404 new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *),
405 KM_SLEEP);
406 for (i = 0; i < *count; i++)
407 new_configs[i] = (*configs)[i];
408 if (*configs != NULL)
409 kmem_free(*configs, *count * sizeof(void *));
410 *configs = new_configs;
411 *count = id + 1;
412}
413
414static void
415process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg,
416 const char *name, uint64_t* known_pool_guid)
417{
418 nvlist_t *vdev_tree;
419 uint64_t pool_guid;
420 uint64_t vdev_guid, known_guid;
421 uint64_t id, txg, known_txg;
422 char *pname;
423 int i;
424
425 if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 ||
426 strcmp(pname, name) != 0)
427 goto ignore;
428
429 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0)
430 goto ignore;
431
432 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0)
433 goto ignore;
434
435 if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0)
436 goto ignore;
437
438 if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0)
439 goto ignore;
440
441 VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0);
442
443 if (*known_pool_guid != 0) {
444 if (pool_guid != *known_pool_guid)
445 goto ignore;
446 } else
447 *known_pool_guid = pool_guid;
448
449 resize_configs(configs, count, id);
450
451 if ((*configs)[id] != NULL) {
452 VERIFY(nvlist_lookup_uint64((*configs)[id],
453 ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0);
454 if (txg <= known_txg)
455 goto ignore;
456 nvlist_free((*configs)[id]);
457 }
458
459 (*configs)[id] = cfg;
460 return;
461
462ignore:
463 nvlist_free(cfg);
464}
465
466static int
467vdev_geom_attach_taster(struct g_consumer *cp, struct g_provider *pp)
468{
469 int error;
470
471 if (pp->flags & G_PF_WITHER)
472 return (EINVAL);
473 g_attach(cp, pp);
474 error = g_access(cp, 1, 0, 0);
475 if (error == 0) {
476 if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize))
477 error = EINVAL;
478 else if (pp->mediasize < SPA_MINDEVSIZE)
479 error = EINVAL;
480 if (error != 0)
481 g_access(cp, -1, 0, 0);
482 }
483 if (error != 0)
484 g_detach(cp);
485 return (error);
486}
487
488static void
489vdev_geom_detach_taster(struct g_consumer *cp)
490{
491 g_access(cp, -1, 0, 0);
492 g_detach(cp);
493}
494
495int
496vdev_geom_read_pool_label(const char *name,
497 nvlist_t ***configs, uint64_t *count)
498{
499 struct g_class *mp;
500 struct g_geom *gp, *zgp;
501 struct g_provider *pp;
502 struct g_consumer *zcp;
503 nvlist_t *vdev_cfg;
504 uint64_t pool_guid;
505 int error;
506
507 DROP_GIANT();
508 g_topology_lock();
509
510 zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
511 /* This orphan function should be never called. */
512 zgp->orphan = vdev_geom_taste_orphan;
513 zcp = g_new_consumer(zgp);
514
515 *configs = NULL;
516 *count = 0;
517 pool_guid = 0;
518 LIST_FOREACH(mp, &g_classes, class) {
519 if (mp == &zfs_vdev_class)
520 continue;
521 LIST_FOREACH(gp, &mp->geom, geom) {
522 if (gp->flags & G_GEOM_WITHER)
523 continue;
524 LIST_FOREACH(pp, &gp->provider, provider) {
525 if (pp->flags & G_PF_WITHER)
526 continue;
527 if (vdev_geom_attach_taster(zcp, pp) != 0)
528 continue;
529 g_topology_unlock();
530 error = vdev_geom_read_config(zcp, &vdev_cfg);
531 g_topology_lock();
532 vdev_geom_detach_taster(zcp);
533 if (error)
534 continue;
535 ZFS_LOG(1, "successfully read vdev config");
536
537 process_vdev_config(configs, count,
538 vdev_cfg, name, &pool_guid);
539 }
540 }
541 }
542
543 g_destroy_consumer(zcp);
544 g_destroy_geom(zgp);
545 g_topology_unlock();
546 PICKUP_GIANT();
547
548 return (*count > 0 ? 0 : ENOENT);
549}
550
551static void
552vdev_geom_read_guids(struct g_consumer *cp, uint64_t *pguid, uint64_t *vguid)
553{
554 nvlist_t *config;
555
556 g_topology_assert_not();
557
558 *pguid = 0;
559 *vguid = 0;
560 if (vdev_geom_read_config(cp, &config) == 0) {
561 nvlist_get_guids(config, pguid, vguid);
562 nvlist_free(config);
563 }
564}
565
566static struct g_consumer *
567vdev_geom_attach_by_guids(vdev_t *vd)
568{
569 struct g_class *mp;
570 struct g_geom *gp, *zgp;
571 struct g_provider *pp;
572 struct g_consumer *cp, *zcp;
573 uint64_t pguid, vguid;
574
575 g_topology_assert();
576
577 zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
338}
339
340static int
341vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config)
342{
343 struct g_provider *pp;
344 vdev_label_t *label;
345 char *p, *buf;
346 size_t buflen;
347 uint64_t psize;
348 off_t offset, size;
349 uint64_t state, txg;
350 int error, l, len;
351
352 g_topology_assert_not();
353
354 pp = cp->provider;
355 ZFS_LOG(1, "Reading config from %s...", pp->name);
356
357 psize = pp->mediasize;
358 psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t));
359
360 size = sizeof(*label) + pp->sectorsize -
361 ((sizeof(*label) - 1) % pp->sectorsize) - 1;
362
363 label = kmem_alloc(size, KM_SLEEP);
364 buflen = sizeof(label->vl_vdev_phys.vp_nvlist);
365
366 *config = NULL;
367 for (l = 0; l < VDEV_LABELS; l++) {
368
369 offset = vdev_label_offset(psize, l, 0);
370 if ((offset % pp->sectorsize) != 0)
371 continue;
372
373 if (vdev_geom_io(cp, BIO_READ, label, offset, size) != 0)
374 continue;
375 buf = label->vl_vdev_phys.vp_nvlist;
376
377 if (nvlist_unpack(buf, buflen, config, 0) != 0)
378 continue;
379
380 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
381 &state) != 0 || state > POOL_STATE_L2CACHE) {
382 nvlist_free(*config);
383 *config = NULL;
384 continue;
385 }
386
387 if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
388 (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
389 &txg) != 0 || txg == 0)) {
390 nvlist_free(*config);
391 *config = NULL;
392 continue;
393 }
394
395 break;
396 }
397
398 kmem_free(label, size);
399 return (*config == NULL ? ENOENT : 0);
400}
401
402static void
403resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id)
404{
405 nvlist_t **new_configs;
406 uint64_t i;
407
408 if (id < *count)
409 return;
410 new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *),
411 KM_SLEEP);
412 for (i = 0; i < *count; i++)
413 new_configs[i] = (*configs)[i];
414 if (*configs != NULL)
415 kmem_free(*configs, *count * sizeof(void *));
416 *configs = new_configs;
417 *count = id + 1;
418}
419
420static void
421process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg,
422 const char *name, uint64_t* known_pool_guid)
423{
424 nvlist_t *vdev_tree;
425 uint64_t pool_guid;
426 uint64_t vdev_guid, known_guid;
427 uint64_t id, txg, known_txg;
428 char *pname;
429 int i;
430
431 if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 ||
432 strcmp(pname, name) != 0)
433 goto ignore;
434
435 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0)
436 goto ignore;
437
438 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0)
439 goto ignore;
440
441 if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0)
442 goto ignore;
443
444 if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0)
445 goto ignore;
446
447 VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0);
448
449 if (*known_pool_guid != 0) {
450 if (pool_guid != *known_pool_guid)
451 goto ignore;
452 } else
453 *known_pool_guid = pool_guid;
454
455 resize_configs(configs, count, id);
456
457 if ((*configs)[id] != NULL) {
458 VERIFY(nvlist_lookup_uint64((*configs)[id],
459 ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0);
460 if (txg <= known_txg)
461 goto ignore;
462 nvlist_free((*configs)[id]);
463 }
464
465 (*configs)[id] = cfg;
466 return;
467
468ignore:
469 nvlist_free(cfg);
470}
471
472static int
473vdev_geom_attach_taster(struct g_consumer *cp, struct g_provider *pp)
474{
475 int error;
476
477 if (pp->flags & G_PF_WITHER)
478 return (EINVAL);
479 g_attach(cp, pp);
480 error = g_access(cp, 1, 0, 0);
481 if (error == 0) {
482 if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize))
483 error = EINVAL;
484 else if (pp->mediasize < SPA_MINDEVSIZE)
485 error = EINVAL;
486 if (error != 0)
487 g_access(cp, -1, 0, 0);
488 }
489 if (error != 0)
490 g_detach(cp);
491 return (error);
492}
493
494static void
495vdev_geom_detach_taster(struct g_consumer *cp)
496{
497 g_access(cp, -1, 0, 0);
498 g_detach(cp);
499}
500
501int
502vdev_geom_read_pool_label(const char *name,
503 nvlist_t ***configs, uint64_t *count)
504{
505 struct g_class *mp;
506 struct g_geom *gp, *zgp;
507 struct g_provider *pp;
508 struct g_consumer *zcp;
509 nvlist_t *vdev_cfg;
510 uint64_t pool_guid;
511 int error;
512
513 DROP_GIANT();
514 g_topology_lock();
515
516 zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
517 /* This orphan function should be never called. */
518 zgp->orphan = vdev_geom_taste_orphan;
519 zcp = g_new_consumer(zgp);
520
521 *configs = NULL;
522 *count = 0;
523 pool_guid = 0;
524 LIST_FOREACH(mp, &g_classes, class) {
525 if (mp == &zfs_vdev_class)
526 continue;
527 LIST_FOREACH(gp, &mp->geom, geom) {
528 if (gp->flags & G_GEOM_WITHER)
529 continue;
530 LIST_FOREACH(pp, &gp->provider, provider) {
531 if (pp->flags & G_PF_WITHER)
532 continue;
533 if (vdev_geom_attach_taster(zcp, pp) != 0)
534 continue;
535 g_topology_unlock();
536 error = vdev_geom_read_config(zcp, &vdev_cfg);
537 g_topology_lock();
538 vdev_geom_detach_taster(zcp);
539 if (error)
540 continue;
541 ZFS_LOG(1, "successfully read vdev config");
542
543 process_vdev_config(configs, count,
544 vdev_cfg, name, &pool_guid);
545 }
546 }
547 }
548
549 g_destroy_consumer(zcp);
550 g_destroy_geom(zgp);
551 g_topology_unlock();
552 PICKUP_GIANT();
553
554 return (*count > 0 ? 0 : ENOENT);
555}
556
557static void
558vdev_geom_read_guids(struct g_consumer *cp, uint64_t *pguid, uint64_t *vguid)
559{
560 nvlist_t *config;
561
562 g_topology_assert_not();
563
564 *pguid = 0;
565 *vguid = 0;
566 if (vdev_geom_read_config(cp, &config) == 0) {
567 nvlist_get_guids(config, pguid, vguid);
568 nvlist_free(config);
569 }
570}
571
572static struct g_consumer *
573vdev_geom_attach_by_guids(vdev_t *vd)
574{
575 struct g_class *mp;
576 struct g_geom *gp, *zgp;
577 struct g_provider *pp;
578 struct g_consumer *cp, *zcp;
579 uint64_t pguid, vguid;
580
581 g_topology_assert();
582
583 zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
578 /* This orphan function should be never called. */
579 zgp->orphan = vdev_geom_taste_orphan;
580 zcp = g_new_consumer(zgp);
581
582 cp = NULL;
583 LIST_FOREACH(mp, &g_classes, class) {
584 if (mp == &zfs_vdev_class)
585 continue;
586 LIST_FOREACH(gp, &mp->geom, geom) {
587 if (gp->flags & G_GEOM_WITHER)
588 continue;
589 LIST_FOREACH(pp, &gp->provider, provider) {
590 if (vdev_geom_attach_taster(zcp, pp) != 0)
591 continue;
592 g_topology_unlock();
593 vdev_geom_read_guids(zcp, &pguid, &vguid);
594 g_topology_lock();
595 vdev_geom_detach_taster(zcp);
596 /*
597 * Check that the label's vdev guid matches the
598 * desired guid. If the label has a pool guid,
599 * check that it matches too. (Inactive spares
600 * and L2ARCs do not have any pool guid in the
601 * label.)
602 */
603 if ((pguid != 0 &&
604 pguid != spa_guid(vd->vdev_spa)) ||
605 vguid != vd->vdev_guid)
606 continue;
607 cp = vdev_geom_attach(pp, vd);
608 if (cp == NULL) {
609 printf("ZFS WARNING: Unable to "
610 "attach to %s.\n", pp->name);
611 continue;
612 }
613 break;
614 }
615 if (cp != NULL)
616 break;
617 }
618 if (cp != NULL)
619 break;
620 }
621end:
622 g_destroy_consumer(zcp);
623 g_destroy_geom(zgp);
624 return (cp);
625}
626
627static struct g_consumer *
628vdev_geom_open_by_guids(vdev_t *vd)
629{
630 struct g_consumer *cp;
631 char *buf;
632 size_t len;
633
634 g_topology_assert();
635
636 ZFS_LOG(1, "Searching by guid [%ju].", (uintmax_t)vd->vdev_guid);
637 cp = vdev_geom_attach_by_guids(vd);
638 if (cp != NULL) {
639 len = strlen(cp->provider->name) + strlen("/dev/") + 1;
640 buf = kmem_alloc(len, KM_SLEEP);
641
642 snprintf(buf, len, "/dev/%s", cp->provider->name);
643 spa_strfree(vd->vdev_path);
644 vd->vdev_path = buf;
645
646 ZFS_LOG(1, "Attach by guid [%ju:%ju] succeeded, provider %s.",
647 (uintmax_t)spa_guid(vd->vdev_spa),
648 (uintmax_t)vd->vdev_guid, vd->vdev_path);
649 } else {
650 ZFS_LOG(1, "Search by guid [%ju:%ju] failed.",
651 (uintmax_t)spa_guid(vd->vdev_spa),
652 (uintmax_t)vd->vdev_guid);
653 }
654
655 return (cp);
656}
657
658static struct g_consumer *
659vdev_geom_open_by_path(vdev_t *vd, int check_guid)
660{
661 struct g_provider *pp;
662 struct g_consumer *cp;
663 uint64_t pguid, vguid;
664
665 g_topology_assert();
666
667 cp = NULL;
668 pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1);
669 if (pp != NULL) {
670 ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path);
671 cp = vdev_geom_attach(pp, vd);
672 if (cp != NULL && check_guid && ISP2(pp->sectorsize) &&
673 pp->sectorsize <= VDEV_PAD_SIZE) {
674 g_topology_unlock();
675 vdev_geom_read_guids(cp, &pguid, &vguid);
676 g_topology_lock();
677 if (pguid != spa_guid(vd->vdev_spa) ||
678 vguid != vd->vdev_guid) {
679 vdev_geom_close_locked(vd);
680 cp = NULL;
681 ZFS_LOG(1, "guid mismatch for provider %s: "
682 "%ju:%ju != %ju:%ju.", vd->vdev_path,
683 (uintmax_t)spa_guid(vd->vdev_spa),
684 (uintmax_t)vd->vdev_guid,
685 (uintmax_t)pguid, (uintmax_t)vguid);
686 } else {
687 ZFS_LOG(1, "guid match for provider %s.",
688 vd->vdev_path);
689 }
690 }
691 }
692
693 return (cp);
694}
695
696static int
697vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
698 uint64_t *logical_ashift, uint64_t *physical_ashift)
699{
700 struct g_provider *pp;
701 struct g_consumer *cp;
702 size_t bufsize;
703 int error;
704
584 zgp->orphan = vdev_geom_taste_orphan;
585 zcp = g_new_consumer(zgp);
586
587 cp = NULL;
588 LIST_FOREACH(mp, &g_classes, class) {
589 if (mp == &zfs_vdev_class)
590 continue;
591 LIST_FOREACH(gp, &mp->geom, geom) {
592 if (gp->flags & G_GEOM_WITHER)
593 continue;
594 LIST_FOREACH(pp, &gp->provider, provider) {
595 if (vdev_geom_attach_taster(zcp, pp) != 0)
596 continue;
597 g_topology_unlock();
598 vdev_geom_read_guids(zcp, &pguid, &vguid);
599 g_topology_lock();
600 vdev_geom_detach_taster(zcp);
601 /*
602 * Check that the label's vdev guid matches the
603 * desired guid. If the label has a pool guid,
604 * check that it matches too. (Inactive spares
605 * and L2ARCs do not have any pool guid in the
606 * label.)
607 */
608 if ((pguid != 0 &&
609 pguid != spa_guid(vd->vdev_spa)) ||
610 vguid != vd->vdev_guid)
611 continue;
612 cp = vdev_geom_attach(pp, vd);
613 if (cp == NULL) {
614 printf("ZFS WARNING: Unable to "
615 "attach to %s.\n", pp->name);
616 continue;
617 }
618 break;
619 }
620 if (cp != NULL)
621 break;
622 }
623 if (cp != NULL)
624 break;
625 }
626end:
627 g_destroy_consumer(zcp);
628 g_destroy_geom(zgp);
629 return (cp);
630}
631
632static struct g_consumer *
633vdev_geom_open_by_guids(vdev_t *vd)
634{
635 struct g_consumer *cp;
636 char *buf;
637 size_t len;
638
639 g_topology_assert();
640
641 ZFS_LOG(1, "Searching by guid [%ju].", (uintmax_t)vd->vdev_guid);
642 cp = vdev_geom_attach_by_guids(vd);
643 if (cp != NULL) {
644 len = strlen(cp->provider->name) + strlen("/dev/") + 1;
645 buf = kmem_alloc(len, KM_SLEEP);
646
647 snprintf(buf, len, "/dev/%s", cp->provider->name);
648 spa_strfree(vd->vdev_path);
649 vd->vdev_path = buf;
650
651 ZFS_LOG(1, "Attach by guid [%ju:%ju] succeeded, provider %s.",
652 (uintmax_t)spa_guid(vd->vdev_spa),
653 (uintmax_t)vd->vdev_guid, vd->vdev_path);
654 } else {
655 ZFS_LOG(1, "Search by guid [%ju:%ju] failed.",
656 (uintmax_t)spa_guid(vd->vdev_spa),
657 (uintmax_t)vd->vdev_guid);
658 }
659
660 return (cp);
661}
662
663static struct g_consumer *
664vdev_geom_open_by_path(vdev_t *vd, int check_guid)
665{
666 struct g_provider *pp;
667 struct g_consumer *cp;
668 uint64_t pguid, vguid;
669
670 g_topology_assert();
671
672 cp = NULL;
673 pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1);
674 if (pp != NULL) {
675 ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path);
676 cp = vdev_geom_attach(pp, vd);
677 if (cp != NULL && check_guid && ISP2(pp->sectorsize) &&
678 pp->sectorsize <= VDEV_PAD_SIZE) {
679 g_topology_unlock();
680 vdev_geom_read_guids(cp, &pguid, &vguid);
681 g_topology_lock();
682 if (pguid != spa_guid(vd->vdev_spa) ||
683 vguid != vd->vdev_guid) {
684 vdev_geom_close_locked(vd);
685 cp = NULL;
686 ZFS_LOG(1, "guid mismatch for provider %s: "
687 "%ju:%ju != %ju:%ju.", vd->vdev_path,
688 (uintmax_t)spa_guid(vd->vdev_spa),
689 (uintmax_t)vd->vdev_guid,
690 (uintmax_t)pguid, (uintmax_t)vguid);
691 } else {
692 ZFS_LOG(1, "guid match for provider %s.",
693 vd->vdev_path);
694 }
695 }
696 }
697
698 return (cp);
699}
700
701static int
702vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
703 uint64_t *logical_ashift, uint64_t *physical_ashift)
704{
705 struct g_provider *pp;
706 struct g_consumer *cp;
707 size_t bufsize;
708 int error;
709
710 /* Set the TLS to indicate downstack that we should not access zvols*/
711 VERIFY(tsd_set(zfs_geom_probe_vdev_key, vd) == 0);
712
705 /*
706 * We must have a pathname, and it must be absolute.
707 */
708 if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
709 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
710 return (EINVAL);
711 }
712
713 vd->vdev_tsd = NULL;
714
715 DROP_GIANT();
716 g_topology_lock();
717 error = 0;
718
719 if (vd->vdev_spa->spa_splitting_newspa ||
720 (vd->vdev_prevstate == VDEV_STATE_UNKNOWN &&
721 vd->vdev_spa->spa_load_state == SPA_LOAD_NONE)) {
722 /*
723 * We are dealing with a vdev that hasn't been previously
724 * opened (since boot), and we are not loading an
725 * existing pool configuration. This looks like a
726 * vdev add operation to a new or existing pool.
727 * Assume the user knows what he/she is doing and find
728 * GEOM provider by its name, ignoring GUID mismatches.
729 *
730 * XXPOLICY: It would be safer to only allow a device
731 * that is unlabeled or labeled but missing
732 * GUID information to be opened in this fashion,
733 * unless we are doing a split, in which case we
734 * should allow any guid.
735 */
736 cp = vdev_geom_open_by_path(vd, 0);
737 } else {
738 /*
739 * Try using the recorded path for this device, but only
740 * accept it if its label data contains the expected GUIDs.
741 */
742 cp = vdev_geom_open_by_path(vd, 1);
743 if (cp == NULL) {
744 /*
745 * The device at vd->vdev_path doesn't have the
746 * expected GUIDs. The disks might have merely
747 * moved around so try all other GEOM providers
748 * to find one with the right GUIDs.
749 */
750 cp = vdev_geom_open_by_guids(vd);
751 }
752 }
753
713 /*
714 * We must have a pathname, and it must be absolute.
715 */
716 if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
717 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
718 return (EINVAL);
719 }
720
721 vd->vdev_tsd = NULL;
722
723 DROP_GIANT();
724 g_topology_lock();
725 error = 0;
726
727 if (vd->vdev_spa->spa_splitting_newspa ||
728 (vd->vdev_prevstate == VDEV_STATE_UNKNOWN &&
729 vd->vdev_spa->spa_load_state == SPA_LOAD_NONE)) {
730 /*
731 * We are dealing with a vdev that hasn't been previously
732 * opened (since boot), and we are not loading an
733 * existing pool configuration. This looks like a
734 * vdev add operation to a new or existing pool.
735 * Assume the user knows what he/she is doing and find
736 * GEOM provider by its name, ignoring GUID mismatches.
737 *
738 * XXPOLICY: It would be safer to only allow a device
739 * that is unlabeled or labeled but missing
740 * GUID information to be opened in this fashion,
741 * unless we are doing a split, in which case we
742 * should allow any guid.
743 */
744 cp = vdev_geom_open_by_path(vd, 0);
745 } else {
746 /*
747 * Try using the recorded path for this device, but only
748 * accept it if its label data contains the expected GUIDs.
749 */
750 cp = vdev_geom_open_by_path(vd, 1);
751 if (cp == NULL) {
752 /*
753 * The device at vd->vdev_path doesn't have the
754 * expected GUIDs. The disks might have merely
755 * moved around so try all other GEOM providers
756 * to find one with the right GUIDs.
757 */
758 cp = vdev_geom_open_by_guids(vd);
759 }
760 }
761
762 /* Clear the TLS now that tasting is done */
763 VERIFY(tsd_set(zfs_geom_probe_vdev_key, NULL) == 0);
764
754 if (cp == NULL) {
755 ZFS_LOG(1, "Provider %s not found.", vd->vdev_path);
756 error = ENOENT;
757 } else if (cp->provider->sectorsize > VDEV_PAD_SIZE ||
758 !ISP2(cp->provider->sectorsize)) {
759 ZFS_LOG(1, "Provider %s has unsupported sectorsize.",
760 vd->vdev_path);
761
762 vdev_geom_close_locked(vd);
763 error = EINVAL;
764 cp = NULL;
765 } else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) {
766 int i;
767
768 for (i = 0; i < 5; i++) {
769 error = g_access(cp, 0, 1, 0);
770 if (error == 0)
771 break;
772 g_topology_unlock();
773 tsleep(vd, 0, "vdev", hz / 2);
774 g_topology_lock();
775 }
776 if (error != 0) {
777 printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n",
778 vd->vdev_path, error);
779 vdev_geom_close_locked(vd);
780 cp = NULL;
781 }
782 }
783
784 g_topology_unlock();
785 PICKUP_GIANT();
786 if (cp == NULL) {
787 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
788 return (error);
789 }
790 pp = cp->provider;
791
792 /*
793 * Determine the actual size of the device.
794 */
795 *max_psize = *psize = pp->mediasize;
796
797 /*
798 * Determine the device's minimum transfer size and preferred
799 * transfer size.
800 */
801 *logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1;
802 *physical_ashift = 0;
803 if (pp->stripesize)
804 *physical_ashift = highbit(pp->stripesize) - 1;
805
806 /*
807 * Clear the nowritecache settings, so that on a vdev_reopen()
808 * we will try again.
809 */
810 vd->vdev_nowritecache = B_FALSE;
811
812 /*
813 * Determine the device's rotation rate.
814 */
815 vdev_geom_set_rotation_rate(vd, cp);
816
817 return (0);
818}
819
820static void
821vdev_geom_close(vdev_t *vd)
822{
823
824 DROP_GIANT();
825 g_topology_lock();
826 vdev_geom_close_locked(vd);
827 g_topology_unlock();
828 PICKUP_GIANT();
829}
830
831static void
832vdev_geom_io_intr(struct bio *bp)
833{
834 vdev_t *vd;
835 zio_t *zio;
836
837 zio = bp->bio_caller1;
838 vd = zio->io_vd;
839 zio->io_error = bp->bio_error;
840 if (zio->io_error == 0 && bp->bio_resid != 0)
841 zio->io_error = SET_ERROR(EIO);
842
843 switch(zio->io_error) {
844 case ENOTSUP:
845 /*
846 * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know
847 * that future attempts will never succeed. In this case
848 * we set a persistent flag so that we don't bother with
849 * requests in the future.
850 */
851 switch(bp->bio_cmd) {
852 case BIO_FLUSH:
853 vd->vdev_nowritecache = B_TRUE;
854 break;
855 case BIO_DELETE:
856 vd->vdev_notrim = B_TRUE;
857 break;
858 }
859 break;
860 case ENXIO:
861 if (!vd->vdev_remove_wanted) {
862 /*
863 * If provider's error is set we assume it is being
864 * removed.
865 */
866 if (bp->bio_to->error != 0) {
867 vd->vdev_remove_wanted = B_TRUE;
868 spa_async_request(zio->io_spa,
869 SPA_ASYNC_REMOVE);
870 } else if (!vd->vdev_delayed_close) {
871 vd->vdev_delayed_close = B_TRUE;
872 }
873 }
874 break;
875 }
876 g_destroy_bio(bp);
877 zio_interrupt(zio);
878}
879
880static void
881vdev_geom_io_start(zio_t *zio)
882{
883 vdev_t *vd;
884 struct g_consumer *cp;
885 struct bio *bp;
886 int error;
887
888 vd = zio->io_vd;
889
890 switch (zio->io_type) {
891 case ZIO_TYPE_IOCTL:
892 /* XXPOLICY */
893 if (!vdev_readable(vd)) {
894 zio->io_error = SET_ERROR(ENXIO);
895 zio_interrupt(zio);
896 return;
897 } else {
898 switch (zio->io_cmd) {
899 case DKIOCFLUSHWRITECACHE:
900 if (zfs_nocacheflush || vdev_geom_bio_flush_disable)
901 break;
902 if (vd->vdev_nowritecache) {
903 zio->io_error = SET_ERROR(ENOTSUP);
904 break;
905 }
906 goto sendreq;
907 default:
908 zio->io_error = SET_ERROR(ENOTSUP);
909 }
910 }
911
912 zio_execute(zio);
913 return;
914 case ZIO_TYPE_FREE:
915 if (vd->vdev_notrim) {
916 zio->io_error = SET_ERROR(ENOTSUP);
917 } else if (!vdev_geom_bio_delete_disable) {
918 goto sendreq;
919 }
920 zio_execute(zio);
921 return;
922 }
923sendreq:
924 ASSERT(zio->io_type == ZIO_TYPE_READ ||
925 zio->io_type == ZIO_TYPE_WRITE ||
926 zio->io_type == ZIO_TYPE_FREE ||
927 zio->io_type == ZIO_TYPE_IOCTL);
928
929 cp = vd->vdev_tsd;
930 if (cp == NULL) {
931 zio->io_error = SET_ERROR(ENXIO);
932 zio_interrupt(zio);
933 return;
934 }
935 bp = g_alloc_bio();
936 bp->bio_caller1 = zio;
937 switch (zio->io_type) {
938 case ZIO_TYPE_READ:
939 case ZIO_TYPE_WRITE:
940 bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE;
941 bp->bio_data = zio->io_data;
942 bp->bio_offset = zio->io_offset;
943 bp->bio_length = zio->io_size;
944 break;
945 case ZIO_TYPE_FREE:
946 bp->bio_cmd = BIO_DELETE;
947 bp->bio_data = NULL;
948 bp->bio_offset = zio->io_offset;
949 bp->bio_length = zio->io_size;
950 break;
951 case ZIO_TYPE_IOCTL:
952 bp->bio_cmd = BIO_FLUSH;
953 bp->bio_flags |= BIO_ORDERED;
954 bp->bio_data = NULL;
955 bp->bio_offset = cp->provider->mediasize;
956 bp->bio_length = 0;
957 break;
958 }
959 bp->bio_done = vdev_geom_io_intr;
960
961 g_io_request(bp, cp);
962}
963
964static void
965vdev_geom_io_done(zio_t *zio)
966{
967}
968
969static void
970vdev_geom_hold(vdev_t *vd)
971{
972}
973
974static void
975vdev_geom_rele(vdev_t *vd)
976{
977}
978
979vdev_ops_t vdev_geom_ops = {
980 vdev_geom_open,
981 vdev_geom_close,
982 vdev_default_asize,
983 vdev_geom_io_start,
984 vdev_geom_io_done,
985 NULL,
986 vdev_geom_hold,
987 vdev_geom_rele,
988 VDEV_TYPE_DISK, /* name of this vdev type */
989 B_TRUE /* leaf vdev */
990};
765 if (cp == NULL) {
766 ZFS_LOG(1, "Provider %s not found.", vd->vdev_path);
767 error = ENOENT;
768 } else if (cp->provider->sectorsize > VDEV_PAD_SIZE ||
769 !ISP2(cp->provider->sectorsize)) {
770 ZFS_LOG(1, "Provider %s has unsupported sectorsize.",
771 vd->vdev_path);
772
773 vdev_geom_close_locked(vd);
774 error = EINVAL;
775 cp = NULL;
776 } else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) {
777 int i;
778
779 for (i = 0; i < 5; i++) {
780 error = g_access(cp, 0, 1, 0);
781 if (error == 0)
782 break;
783 g_topology_unlock();
784 tsleep(vd, 0, "vdev", hz / 2);
785 g_topology_lock();
786 }
787 if (error != 0) {
788 printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n",
789 vd->vdev_path, error);
790 vdev_geom_close_locked(vd);
791 cp = NULL;
792 }
793 }
794
795 g_topology_unlock();
796 PICKUP_GIANT();
797 if (cp == NULL) {
798 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
799 return (error);
800 }
801 pp = cp->provider;
802
803 /*
804 * Determine the actual size of the device.
805 */
806 *max_psize = *psize = pp->mediasize;
807
808 /*
809 * Determine the device's minimum transfer size and preferred
810 * transfer size.
811 */
812 *logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1;
813 *physical_ashift = 0;
814 if (pp->stripesize)
815 *physical_ashift = highbit(pp->stripesize) - 1;
816
817 /*
818 * Clear the nowritecache settings, so that on a vdev_reopen()
819 * we will try again.
820 */
821 vd->vdev_nowritecache = B_FALSE;
822
823 /*
824 * Determine the device's rotation rate.
825 */
826 vdev_geom_set_rotation_rate(vd, cp);
827
828 return (0);
829}
830
831static void
832vdev_geom_close(vdev_t *vd)
833{
834
835 DROP_GIANT();
836 g_topology_lock();
837 vdev_geom_close_locked(vd);
838 g_topology_unlock();
839 PICKUP_GIANT();
840}
841
842static void
843vdev_geom_io_intr(struct bio *bp)
844{
845 vdev_t *vd;
846 zio_t *zio;
847
848 zio = bp->bio_caller1;
849 vd = zio->io_vd;
850 zio->io_error = bp->bio_error;
851 if (zio->io_error == 0 && bp->bio_resid != 0)
852 zio->io_error = SET_ERROR(EIO);
853
854 switch(zio->io_error) {
855 case ENOTSUP:
856 /*
857 * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know
858 * that future attempts will never succeed. In this case
859 * we set a persistent flag so that we don't bother with
860 * requests in the future.
861 */
862 switch(bp->bio_cmd) {
863 case BIO_FLUSH:
864 vd->vdev_nowritecache = B_TRUE;
865 break;
866 case BIO_DELETE:
867 vd->vdev_notrim = B_TRUE;
868 break;
869 }
870 break;
871 case ENXIO:
872 if (!vd->vdev_remove_wanted) {
873 /*
874 * If provider's error is set we assume it is being
875 * removed.
876 */
877 if (bp->bio_to->error != 0) {
878 vd->vdev_remove_wanted = B_TRUE;
879 spa_async_request(zio->io_spa,
880 SPA_ASYNC_REMOVE);
881 } else if (!vd->vdev_delayed_close) {
882 vd->vdev_delayed_close = B_TRUE;
883 }
884 }
885 break;
886 }
887 g_destroy_bio(bp);
888 zio_interrupt(zio);
889}
890
891static void
892vdev_geom_io_start(zio_t *zio)
893{
894 vdev_t *vd;
895 struct g_consumer *cp;
896 struct bio *bp;
897 int error;
898
899 vd = zio->io_vd;
900
901 switch (zio->io_type) {
902 case ZIO_TYPE_IOCTL:
903 /* XXPOLICY */
904 if (!vdev_readable(vd)) {
905 zio->io_error = SET_ERROR(ENXIO);
906 zio_interrupt(zio);
907 return;
908 } else {
909 switch (zio->io_cmd) {
910 case DKIOCFLUSHWRITECACHE:
911 if (zfs_nocacheflush || vdev_geom_bio_flush_disable)
912 break;
913 if (vd->vdev_nowritecache) {
914 zio->io_error = SET_ERROR(ENOTSUP);
915 break;
916 }
917 goto sendreq;
918 default:
919 zio->io_error = SET_ERROR(ENOTSUP);
920 }
921 }
922
923 zio_execute(zio);
924 return;
925 case ZIO_TYPE_FREE:
926 if (vd->vdev_notrim) {
927 zio->io_error = SET_ERROR(ENOTSUP);
928 } else if (!vdev_geom_bio_delete_disable) {
929 goto sendreq;
930 }
931 zio_execute(zio);
932 return;
933 }
934sendreq:
935 ASSERT(zio->io_type == ZIO_TYPE_READ ||
936 zio->io_type == ZIO_TYPE_WRITE ||
937 zio->io_type == ZIO_TYPE_FREE ||
938 zio->io_type == ZIO_TYPE_IOCTL);
939
940 cp = vd->vdev_tsd;
941 if (cp == NULL) {
942 zio->io_error = SET_ERROR(ENXIO);
943 zio_interrupt(zio);
944 return;
945 }
946 bp = g_alloc_bio();
947 bp->bio_caller1 = zio;
948 switch (zio->io_type) {
949 case ZIO_TYPE_READ:
950 case ZIO_TYPE_WRITE:
951 bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE;
952 bp->bio_data = zio->io_data;
953 bp->bio_offset = zio->io_offset;
954 bp->bio_length = zio->io_size;
955 break;
956 case ZIO_TYPE_FREE:
957 bp->bio_cmd = BIO_DELETE;
958 bp->bio_data = NULL;
959 bp->bio_offset = zio->io_offset;
960 bp->bio_length = zio->io_size;
961 break;
962 case ZIO_TYPE_IOCTL:
963 bp->bio_cmd = BIO_FLUSH;
964 bp->bio_flags |= BIO_ORDERED;
965 bp->bio_data = NULL;
966 bp->bio_offset = cp->provider->mediasize;
967 bp->bio_length = 0;
968 break;
969 }
970 bp->bio_done = vdev_geom_io_intr;
971
972 g_io_request(bp, cp);
973}
974
975static void
976vdev_geom_io_done(zio_t *zio)
977{
978}
979
980static void
981vdev_geom_hold(vdev_t *vd)
982{
983}
984
985static void
986vdev_geom_rele(vdev_t *vd)
987{
988}
989
990vdev_ops_t vdev_geom_ops = {
991 vdev_geom_open,
992 vdev_geom_close,
993 vdev_default_asize,
994 vdev_geom_io_start,
995 vdev_geom_io_done,
996 NULL,
997 vdev_geom_hold,
998 vdev_geom_rele,
999 VDEV_TYPE_DISK, /* name of this vdev type */
1000 B_TRUE /* leaf vdev */
1001};