1168404Spjd/*
2168404Spjd * CDDL HEADER START
3168404Spjd *
4168404Spjd * The contents of this file are subject to the terms of the
5168404Spjd * Common Development and Distribution License (the "License").
6168404Spjd * You may not use this file except in compliance with the License.
7168404Spjd *
8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9168404Spjd * or http://www.opensolaris.org/os/licensing.
10168404Spjd * See the License for the specific language governing permissions
11168404Spjd * and limitations under the License.
12168404Spjd *
13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each
14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15168404Spjd * If applicable, add the following below this CDDL HEADER, with the
16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying
17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner]
18168404Spjd *
19168404Spjd * CDDL HEADER END
20168404Spjd */
21168404Spjd/*
22168404Spjd * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
23168404Spjd * All rights reserved.
24236155Smm *
25236155Smm * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>
26168404Spjd */
27168404Spjd
28168404Spjd#include <sys/zfs_context.h>
29168404Spjd#include <sys/param.h>
30168404Spjd#include <sys/kernel.h>
31168404Spjd#include <sys/bio.h>
32169303Spjd#include <sys/disk.h>
33168404Spjd#include <sys/spa.h>
34205346Spjd#include <sys/spa_impl.h>
35168404Spjd#include <sys/vdev_impl.h>
36168404Spjd#include <sys/fs/zfs.h>
37168404Spjd#include <sys/zio.h>
38168404Spjd#include <geom/geom.h>
39169303Spjd#include <geom/geom_int.h>
40168404Spjd
41168404Spjd/*
42168404Spjd * Virtual device vector for GEOM.
43168404Spjd */
44168404Spjd
45271238Ssmhstatic g_attrchanged_t vdev_geom_attrchanged;
46168404Spjdstruct g_class zfs_vdev_class = {
47168404Spjd	.name = "ZFS::VDEV",
48168404Spjd	.version = G_VERSION,
49271238Ssmh	.attrchanged = vdev_geom_attrchanged,
50168404Spjd};
51168404Spjd
52319268Sasomersstruct consumer_vdev_elem {
53319268Sasomers	SLIST_ENTRY(consumer_vdev_elem)	elems;
54319268Sasomers	vdev_t				*vd;
55319268Sasomers};
56319268Sasomers
57319268SasomersSLIST_HEAD(consumer_priv_t, consumer_vdev_elem);
58319268Sasomers_Static_assert(sizeof(((struct g_consumer*)NULL)->private)
59319268Sasomers    == sizeof(struct consumer_priv_t*),
60319268Sasomers    "consumer_priv_t* can't be stored in g_consumer.private");
61319268Sasomers
62168404SpjdDECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev);
63168404Spjd
64240868SpjdSYSCTL_DECL(_vfs_zfs_vdev);
65240868Spjd/* Don't send BIO_FLUSH. */
66219089Spjdstatic int vdev_geom_bio_flush_disable = 0;
67219089SpjdTUNABLE_INT("vfs.zfs.vdev.bio_flush_disable", &vdev_geom_bio_flush_disable);
68219089SpjdSYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RW,
69219089Spjd    &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH");
70240868Spjd/* Don't send BIO_DELETE. */
71240868Spjdstatic int vdev_geom_bio_delete_disable = 0;
72240868SpjdTUNABLE_INT("vfs.zfs.vdev.bio_delete_disable", &vdev_geom_bio_delete_disable);
73240868SpjdSYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RW,
74240868Spjd    &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE");
75219089Spjd
76308058Smav/* Declare local functions */
77308058Smavstatic void vdev_geom_detach(struct g_consumer *cp, boolean_t open_for_read);
78308058Smav
79308057Smav/*
80308057Smav * Thread local storage used to indicate when a thread is probing geoms
81308057Smav * for their guids.  If NULL, this thread is not tasting geoms.  If non NULL,
82308057Smav * it is looking for a replacement for the vdev_t* that is its value.
83308057Smav */
84308057Smavuint_t zfs_geom_probe_vdev_key;
85308057Smav
86168404Spjdstatic void
87271238Ssmhvdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp)
88271238Ssmh{
89271238Ssmh	int error;
90271238Ssmh	uint16_t rate;
91271238Ssmh
92271238Ssmh	error = g_getattr("GEOM::rotation_rate", cp, &rate);
93271238Ssmh	if (error == 0)
94271238Ssmh		vd->vdev_rotation_rate = rate;
95271238Ssmh	else
96271238Ssmh		vd->vdev_rotation_rate = VDEV_RATE_UNKNOWN;
97271238Ssmh}
98271238Ssmh
99271238Ssmhstatic void
100319268Sasomersvdev_geom_set_physpath(vdev_t *vd, struct g_consumer *cp,
101319268Sasomers		       boolean_t do_null_update)
102271238Ssmh{
103308061Smav	boolean_t needs_update = B_FALSE;
104297090Smav	char *physpath;
105297090Smav	int error, physpath_len;
106271238Ssmh
107297090Smav	physpath_len = MAXPATHLEN;
108297090Smav	physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO);
109297090Smav	error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath);
110297090Smav	if (error == 0) {
111297090Smav		char *old_physpath;
112297090Smav
113299376Sasomers		/* g_topology lock ensures that vdev has not been closed */
114299376Sasomers		g_topology_assert();
115297090Smav		old_physpath = vd->vdev_physpath;
116297090Smav		vd->vdev_physpath = spa_strdup(physpath);
117297090Smav
118308061Smav		if (old_physpath != NULL) {
119308061Smav			needs_update = (strcmp(old_physpath,
120308061Smav						vd->vdev_physpath) != 0);
121297090Smav			spa_strfree(old_physpath);
122308061Smav		} else
123308061Smav			needs_update = do_null_update;
124297090Smav	}
125297090Smav	g_free(physpath);
126308061Smav
127308061Smav	/*
128308061Smav	 * If the physical path changed, update the config.
129308061Smav	 * Only request an update for previously unset physpaths if
130308061Smav	 * requested by the caller.
131308061Smav	 */
132308061Smav	if (needs_update)
133308061Smav		spa_async_request(vd->vdev_spa, SPA_ASYNC_CONFIG_UPDATE);
134308061Smav
135271238Ssmh}
136271238Ssmh
137271238Ssmhstatic void
138308061Smavvdev_geom_attrchanged(struct g_consumer *cp, const char *attr)
139308061Smav{
140308061Smav	char *old_physpath;
141319268Sasomers	struct consumer_priv_t *priv;
142319268Sasomers	struct consumer_vdev_elem *elem;
143308061Smav	int error;
144308061Smav
145319268Sasomers	priv = (struct consumer_priv_t*)&cp->private;
146319268Sasomers	if (SLIST_EMPTY(priv))
147308061Smav		return;
148308061Smav
149319268Sasomers	SLIST_FOREACH(elem, priv, elems) {
150319268Sasomers		vdev_t *vd = elem->vd;
151319268Sasomers		if (strcmp(attr, "GEOM::rotation_rate") == 0) {
152319268Sasomers			vdev_geom_set_rotation_rate(vd, cp);
153319268Sasomers			return;
154319268Sasomers		}
155319268Sasomers		if (strcmp(attr, "GEOM::physpath") == 0) {
156319268Sasomers			vdev_geom_set_physpath(vd, cp, /*null_update*/B_TRUE);
157319268Sasomers			return;
158319268Sasomers		}
159308061Smav	}
160308061Smav}
161308061Smav
162308061Smavstatic void
163168404Spjdvdev_geom_orphan(struct g_consumer *cp)
164168404Spjd{
165319268Sasomers	struct consumer_priv_t *priv;
166319268Sasomers	struct consumer_vdev_elem *elem;
167168404Spjd
168168404Spjd	g_topology_assert();
169168404Spjd
170319268Sasomers	priv = (struct consumer_priv_t*)&cp->private;
171319268Sasomers	if (SLIST_EMPTY(priv))
172297090Smav		/* Vdev close in progress.  Ignore the event. */
173253754Smav		return;
174168404Spjd
175219089Spjd	/*
176219089Spjd	 * Orphan callbacks occur from the GEOM event thread.
177219089Spjd	 * Concurrent with this call, new I/O requests may be
178219089Spjd	 * working their way through GEOM about to find out
179219089Spjd	 * (only once executed by the g_down thread) that we've
180219089Spjd	 * been orphaned from our disk provider.  These I/Os
181219089Spjd	 * must be retired before we can detach our consumer.
182219089Spjd	 * This is most easily achieved by acquiring the
183219089Spjd	 * SPA ZIO configuration lock as a writer, but doing
184219089Spjd	 * so with the GEOM topology lock held would cause
185219089Spjd	 * a lock order reversal.  Instead, rely on the SPA's
186219089Spjd	 * async removal support to invoke a close on this
187219089Spjd	 * vdev once it is safe to do so.
188219089Spjd	 */
189319268Sasomers	SLIST_FOREACH(elem, priv, elems) {
190319268Sasomers		vdev_t *vd = elem->vd;
191319268Sasomers
192319268Sasomers		vd->vdev_remove_wanted = B_TRUE;
193319268Sasomers		spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE);
194319268Sasomers	}
195168404Spjd}
196168404Spjd
197168404Spjdstatic struct g_consumer *
198330524Sasomersvdev_geom_attach(struct g_provider *pp, vdev_t *vd, boolean_t sanity)
199168404Spjd{
200168404Spjd	struct g_geom *gp;
201168404Spjd	struct g_consumer *cp;
202299536Sasomers	int error;
203168404Spjd
204168404Spjd	g_topology_assert();
205168404Spjd
206168404Spjd	ZFS_LOG(1, "Attaching to %s.", pp->name);
207308058Smav
208330524Sasomers	if (sanity) {
209330524Sasomers		if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize)) {
210330524Sasomers			ZFS_LOG(1, "Failing attach of %s. "
211330524Sasomers				   "Incompatible sectorsize %d\n",
212330524Sasomers			    pp->name, pp->sectorsize);
213330524Sasomers			return (NULL);
214330524Sasomers		} else if (pp->mediasize < SPA_MINDEVSIZE) {
215330524Sasomers			ZFS_LOG(1, "Failing attach of %s. "
216330524Sasomers				   "Incompatible mediasize %ju\n",
217330524Sasomers			    pp->name, pp->mediasize);
218330524Sasomers			return (NULL);
219330524Sasomers		}
220308058Smav	}
221308058Smav
222168404Spjd	/* Do we have geom already? No? Create one. */
223168404Spjd	LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) {
224169303Spjd		if (gp->flags & G_GEOM_WITHER)
225169303Spjd			continue;
226169303Spjd		if (strcmp(gp->name, "zfs::vdev") != 0)
227169303Spjd			continue;
228169303Spjd		break;
229168404Spjd	}
230168404Spjd	if (gp == NULL) {
231168404Spjd		gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev");
232168404Spjd		gp->orphan = vdev_geom_orphan;
233297090Smav		gp->attrchanged = vdev_geom_attrchanged;
234168404Spjd		cp = g_new_consumer(gp);
235299536Sasomers		error = g_attach(cp, pp);
236299536Sasomers		if (error != 0) {
237299536Sasomers			ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", __func__,
238299536Sasomers			    __LINE__, error);
239308058Smav			vdev_geom_detach(cp, B_FALSE);
240168404Spjd			return (NULL);
241168404Spjd		}
242299536Sasomers		error = g_access(cp, 1, 0, 1);
243299536Sasomers		if (error != 0) {
244308588Smav			ZFS_LOG(1, "%s(%d): g_access failed: %d", __func__,
245299536Sasomers			       __LINE__, error);
246308058Smav			vdev_geom_detach(cp, B_FALSE);
247168404Spjd			return (NULL);
248168404Spjd		}
249168404Spjd		ZFS_LOG(1, "Created geom and consumer for %s.", pp->name);
250168404Spjd	} else {
251168404Spjd		/* Check if we are already connected to this provider. */
252168404Spjd		LIST_FOREACH(cp, &gp->consumer, consumer) {
253168404Spjd			if (cp->provider == pp) {
254168404Spjd				ZFS_LOG(1, "Found consumer for %s.", pp->name);
255168404Spjd				break;
256168404Spjd			}
257168404Spjd		}
258168404Spjd		if (cp == NULL) {
259168404Spjd			cp = g_new_consumer(gp);
260299536Sasomers			error = g_attach(cp, pp);
261299536Sasomers			if (error != 0) {
262299536Sasomers				ZFS_LOG(1, "%s(%d): g_attach failed: %d\n",
263299536Sasomers				    __func__, __LINE__, error);
264308058Smav				vdev_geom_detach(cp, B_FALSE);
265168404Spjd				return (NULL);
266168404Spjd			}
267299536Sasomers			error = g_access(cp, 1, 0, 1);
268299536Sasomers			if (error != 0) {
269299536Sasomers				ZFS_LOG(1, "%s(%d): g_access failed: %d\n",
270299536Sasomers				    __func__, __LINE__, error);
271308058Smav				vdev_geom_detach(cp, B_FALSE);
272168404Spjd				return (NULL);
273168404Spjd			}
274168404Spjd			ZFS_LOG(1, "Created consumer for %s.", pp->name);
275168404Spjd		} else {
276299536Sasomers			error = g_access(cp, 1, 0, 1);
277299536Sasomers			if (error != 0) {
278299536Sasomers				ZFS_LOG(1, "%s(%d): g_access failed: %d\n",
279299536Sasomers				    __func__, __LINE__, error);
280168404Spjd				return (NULL);
281299536Sasomers			}
282168404Spjd			ZFS_LOG(1, "Used existing consumer for %s.", pp->name);
283168404Spjd		}
284168404Spjd	}
285297090Smav
286319268Sasomers	if (vd != NULL)
287308058Smav		vd->vdev_tsd = cp;
288297090Smav
289260385Sscottl	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
290168404Spjd	return (cp);
291168404Spjd}
292168404Spjd
293168404Spjdstatic void
294308058Smavvdev_geom_detach(struct g_consumer *cp, boolean_t open_for_read)
295168404Spjd{
296168404Spjd	struct g_geom *gp;
297168404Spjd
298168404Spjd	g_topology_assert();
299168404Spjd
300308588Smav	ZFS_LOG(1, "Detaching from %s.",
301308058Smav	    cp->provider && cp->provider->name ? cp->provider->name : "NULL");
302297090Smav
303297090Smav	gp = cp->geom;
304308058Smav	if (open_for_read)
305308058Smav		g_access(cp, -1, 0, -1);
306168404Spjd	/* Destroy consumer on last close. */
307168404Spjd	if (cp->acr == 0 && cp->ace == 0) {
308168404Spjd		if (cp->acw > 0)
309168404Spjd			g_access(cp, 0, -cp->acw, 0);
310297090Smav		if (cp->provider != NULL) {
311308588Smav			ZFS_LOG(1, "Destroying consumer for %s.",
312308058Smav			    cp->provider->name ? cp->provider->name : "NULL");
313297090Smav			g_detach(cp);
314297090Smav		}
315168404Spjd		g_destroy_consumer(cp);
316168404Spjd	}
317168404Spjd	/* Destroy geom if there are no consumers left. */
318168404Spjd	if (LIST_EMPTY(&gp->consumer)) {
319168404Spjd		ZFS_LOG(1, "Destroyed geom %s.", gp->name);
320168404Spjd		g_wither_geom(gp, ENXIO);
321168404Spjd	}
322168404Spjd}
323168404Spjd
324294843Sasomersstatic void
325308058Smavvdev_geom_close_locked(vdev_t *vd)
326308058Smav{
327308058Smav	struct g_consumer *cp;
328319268Sasomers	struct consumer_priv_t *priv;
329319268Sasomers	struct consumer_vdev_elem *elem, *elem_temp;
330308058Smav
331308058Smav	g_topology_assert();
332308058Smav
333308058Smav	cp = vd->vdev_tsd;
334308059Smav	vd->vdev_delayed_close = B_FALSE;
335308058Smav	if (cp == NULL)
336308058Smav		return;
337308058Smav
338308058Smav	ZFS_LOG(1, "Closing access to %s.", cp->provider->name);
339319268Sasomers	KASSERT(cp->private != NULL, ("%s: cp->private is NULL", __func__));
340319268Sasomers	priv = (struct consumer_priv_t*)&cp->private;
341319268Sasomers	vd->vdev_tsd = NULL;
342319268Sasomers	SLIST_FOREACH_SAFE(elem, priv, elems, elem_temp) {
343319268Sasomers		if (elem->vd == vd) {
344319268Sasomers			SLIST_REMOVE(priv, elem, consumer_vdev_elem, elems);
345319268Sasomers			g_free(elem);
346319268Sasomers		}
347319268Sasomers	}
348308058Smav
349308058Smav	vdev_geom_detach(cp, B_TRUE);
350308058Smav}
351308058Smav
352308060Smav/*
353308060Smav * Issue one or more bios to the vdev in parallel
354308060Smav * cmds, datas, offsets, errors, and sizes are arrays of length ncmds.  Each IO
355308060Smav * operation is described by parallel entries from each array.  There may be
356308060Smav * more bios actually issued than entries in the array
357308060Smav */
358308060Smavstatic void
359308060Smavvdev_geom_io(struct g_consumer *cp, int *cmds, void **datas, off_t *offsets,
360308060Smav    off_t *sizes, int *errors, int ncmds)
361185029Spjd{
362308060Smav	struct bio **bios;
363185029Spjd	u_char *p;
364308060Smav	off_t off, maxio, s, end;
365308060Smav	int i, n_bios, j;
366308060Smav	size_t bios_size;
367185029Spjd
368208682Spjd	maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize);
369308060Smav	n_bios = 0;
370185029Spjd
371308060Smav	/* How many bios are required for all commands ? */
372308060Smav	for (i = 0; i < ncmds; i++)
373308060Smav		n_bios += (sizes[i] + maxio - 1) / maxio;
374308060Smav
375308060Smav	/* Allocate memory for the bios */
376308060Smav	bios_size = n_bios * sizeof(struct bio*);
377308060Smav	bios = kmem_zalloc(bios_size, KM_SLEEP);
378308060Smav
379308060Smav	/* Prepare and issue all of the bios */
380308060Smav	for (i = j = 0; i < ncmds; i++) {
381308060Smav		off = offsets[i];
382308060Smav		p = datas[i];
383308060Smav		s = sizes[i];
384308060Smav		end = off + s;
385308060Smav		ASSERT((off % cp->provider->sectorsize) == 0);
386308060Smav		ASSERT((s % cp->provider->sectorsize) == 0);
387308060Smav
388308060Smav		for (; off < end; off += maxio, p += maxio, s -= maxio, j++) {
389308060Smav			bios[j] = g_alloc_bio();
390308060Smav			bios[j]->bio_cmd = cmds[i];
391308060Smav			bios[j]->bio_done = NULL;
392308060Smav			bios[j]->bio_offset = off;
393308060Smav			bios[j]->bio_length = MIN(s, maxio);
394308060Smav			bios[j]->bio_data = p;
395308060Smav			g_io_request(bios[j], cp);
396308060Smav		}
397185029Spjd	}
398308060Smav	ASSERT(j == n_bios);
399185029Spjd
400308060Smav	/* Wait for all of the bios to complete, and clean them up */
401308060Smav	for (i = j = 0; i < ncmds; i++) {
402308060Smav		off = offsets[i];
403308060Smav		s = sizes[i];
404308060Smav		end = off + s;
405308060Smav
406308060Smav		for (; off < end; off += maxio, s -= maxio, j++) {
407308060Smav			errors[i] = biowait(bios[j], "vdev_geom_io") || errors[i];
408308060Smav			g_destroy_bio(bios[j]);
409308060Smav		}
410308060Smav	}
411308060Smav	kmem_free(bios, bios_size);
412185029Spjd}
413185029Spjd
414319262Sasomers/*
415319262Sasomers * Read the vdev config from a device.  Return the number of valid labels that
416319262Sasomers * were found.  The vdev config will be returned in config if and only if at
417319262Sasomers * least one valid label was found.
418319262Sasomers */
419241286Savgstatic int
420338905Smarkjvdev_geom_read_config(struct g_consumer *cp, nvlist_t **configp)
421241286Savg{
422185029Spjd	struct g_provider *pp;
423338905Smarkj	nvlist_t *config;
424308060Smav	vdev_phys_t *vdev_lists[VDEV_LABELS];
425319262Sasomers	char *buf;
426185029Spjd	size_t buflen;
427308060Smav	uint64_t psize, state, txg;
428308060Smav	off_t offsets[VDEV_LABELS];
429308060Smav	off_t size;
430308060Smav	off_t sizes[VDEV_LABELS];
431308060Smav	int cmds[VDEV_LABELS];
432308060Smav	int errors[VDEV_LABELS];
433319262Sasomers	int l, nlabels;
434185029Spjd
435185029Spjd	g_topology_assert_not();
436185029Spjd
437185029Spjd	pp = cp->provider;
438241286Savg	ZFS_LOG(1, "Reading config from %s...", pp->name);
439185029Spjd
440185029Spjd	psize = pp->mediasize;
441185029Spjd	psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t));
442185029Spjd
443308060Smav	size = sizeof(*vdev_lists[0]) + pp->sectorsize -
444308060Smav	    ((sizeof(*vdev_lists[0]) - 1) % pp->sectorsize) - 1;
445185029Spjd
446308060Smav	buflen = sizeof(vdev_lists[0]->vp_nvlist);
447185029Spjd
448308060Smav	/* Create all of the IO requests */
449185174Spjd	for (l = 0; l < VDEV_LABELS; l++) {
450308060Smav		cmds[l] = BIO_READ;
451308060Smav		vdev_lists[l] = kmem_alloc(size, KM_SLEEP);
452308060Smav		offsets[l] = vdev_label_offset(psize, l, 0) + VDEV_SKIP_SIZE;
453308060Smav		sizes[l] = size;
454308060Smav		errors[l] = 0;
455308060Smav		ASSERT(offsets[l] % pp->sectorsize == 0);
456308060Smav	}
457185029Spjd
458308060Smav	/* Issue the IO requests */
459308060Smav	vdev_geom_io(cp, cmds, (void**)vdev_lists, offsets, sizes, errors,
460308060Smav	    VDEV_LABELS);
461185029Spjd
462308060Smav	/* Parse the labels */
463338905Smarkj	config = *configp = NULL;
464319262Sasomers	nlabels = 0;
465308060Smav	for (l = 0; l < VDEV_LABELS; l++) {
466308060Smav		if (errors[l] != 0)
467185029Spjd			continue;
468185029Spjd
469308060Smav		buf = vdev_lists[l]->vp_nvlist;
470308060Smav
471338905Smarkj		if (nvlist_unpack(buf, buflen, &config, 0) != 0)
472185029Spjd			continue;
473185029Spjd
474338905Smarkj		if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
475252056Ssmh		    &state) != 0 || state > POOL_STATE_L2CACHE) {
476338905Smarkj			nvlist_free(config);
477241286Savg			continue;
478241286Savg		}
479241286Savg
480308060Smav		if (state != POOL_STATE_SPARE &&
481308060Smav		    state != POOL_STATE_L2CACHE &&
482338905Smarkj		    (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
483242135Savg		    &txg) != 0 || txg == 0)) {
484338905Smarkj			nvlist_free(config);
485241286Savg			continue;
486241286Savg		}
487241286Savg
488338905Smarkj		if (*configp != NULL)
489338905Smarkj			nvlist_free(*configp);
490338905Smarkj		*configp = config;
491338905Smarkj
492319262Sasomers		nlabels++;
493185029Spjd	}
494185029Spjd
495308060Smav	/* Free the label storage */
496308060Smav	for (l = 0; l < VDEV_LABELS; l++)
497308060Smav		kmem_free(vdev_lists[l], size);
498308060Smav
499319262Sasomers	return (nlabels);
500185029Spjd}
501185029Spjd
502243502Savgstatic void
503243502Savgresize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id)
504241286Savg{
505243502Savg	nvlist_t **new_configs;
506243502Savg	uint64_t i;
507243502Savg
508243502Savg	if (id < *count)
509243502Savg		return;
510244635Savg	new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *),
511244635Savg	    KM_SLEEP);
512243502Savg	for (i = 0; i < *count; i++)
513243502Savg		new_configs[i] = (*configs)[i];
514243502Savg	if (*configs != NULL)
515243502Savg		kmem_free(*configs, *count * sizeof(void *));
516243502Savg	*configs = new_configs;
517243502Savg	*count = id + 1;
518243502Savg}
519243502Savg
520243502Savgstatic void
521243502Savgprocess_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg,
522243502Savg    const char *name, uint64_t* known_pool_guid)
523243502Savg{
524243502Savg	nvlist_t *vdev_tree;
525243502Savg	uint64_t pool_guid;
526243502Savg	uint64_t vdev_guid, known_guid;
527243502Savg	uint64_t id, txg, known_txg;
528241286Savg	char *pname;
529243502Savg	int i;
530241286Savg
531243502Savg	if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 ||
532241286Savg	    strcmp(pname, name) != 0)
533243502Savg		goto ignore;
534241286Savg
535243502Savg	if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0)
536243502Savg		goto ignore;
537241286Savg
538243502Savg	if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0)
539243502Savg		goto ignore;
540241286Savg
541243502Savg	if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0)
542243502Savg		goto ignore;
543243502Savg
544243502Savg	if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0)
545243502Savg		goto ignore;
546243502Savg
547243502Savg	VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0);
548243502Savg
549243502Savg	if (*known_pool_guid != 0) {
550243502Savg		if (pool_guid != *known_pool_guid)
551243502Savg			goto ignore;
552243502Savg	} else
553243502Savg		*known_pool_guid = pool_guid;
554243502Savg
555243502Savg	resize_configs(configs, count, id);
556243502Savg
557243502Savg	if ((*configs)[id] != NULL) {
558243502Savg		VERIFY(nvlist_lookup_uint64((*configs)[id],
559243502Savg		    ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0);
560243502Savg		if (txg <= known_txg)
561243502Savg			goto ignore;
562243502Savg		nvlist_free((*configs)[id]);
563243502Savg	}
564243502Savg
565243502Savg	(*configs)[id] = cfg;
566243502Savg	return;
567243502Savg
568243502Savgignore:
569243502Savg	nvlist_free(cfg);
570241286Savg}
571241286Savg
572241286Savgint
573243502Savgvdev_geom_read_pool_label(const char *name,
574243502Savg    nvlist_t ***configs, uint64_t *count)
575241286Savg{
576241286Savg	struct g_class *mp;
577308058Smav	struct g_geom *gp;
578241286Savg	struct g_provider *pp;
579241286Savg	struct g_consumer *zcp;
580241286Savg	nvlist_t *vdev_cfg;
581243502Savg	uint64_t pool_guid;
582319262Sasomers	int error, nlabels;
583241286Savg
584241286Savg	DROP_GIANT();
585241286Savg	g_topology_lock();
586241286Savg
587243502Savg	*configs = NULL;
588243502Savg	*count = 0;
589243502Savg	pool_guid = 0;
590241286Savg	LIST_FOREACH(mp, &g_classes, class) {
591241286Savg		if (mp == &zfs_vdev_class)
592241286Savg			continue;
593241286Savg		LIST_FOREACH(gp, &mp->geom, geom) {
594241286Savg			if (gp->flags & G_GEOM_WITHER)
595241286Savg				continue;
596241286Savg			LIST_FOREACH(pp, &gp->provider, provider) {
597241286Savg				if (pp->flags & G_PF_WITHER)
598241286Savg					continue;
599330524Sasomers				zcp = vdev_geom_attach(pp, NULL, B_TRUE);
600308058Smav				if (zcp == NULL)
601241286Savg					continue;
602241286Savg				g_topology_unlock();
603319262Sasomers				nlabels = vdev_geom_read_config(zcp, &vdev_cfg);
604241286Savg				g_topology_lock();
605308058Smav				vdev_geom_detach(zcp, B_TRUE);
606319262Sasomers				if (nlabels == 0)
607241286Savg					continue;
608241286Savg				ZFS_LOG(1, "successfully read vdev config");
609241286Savg
610243502Savg				process_vdev_config(configs, count,
611243502Savg				    vdev_cfg, name, &pool_guid);
612241286Savg			}
613241286Savg		}
614241286Savg	}
615241286Savg	g_topology_unlock();
616241286Savg	PICKUP_GIANT();
617243502Savg
618243502Savg	return (*count > 0 ? 0 : ENOENT);
619169303Spjd}
620169303Spjd
621308590Smavenum match {
622319262Sasomers	NO_MATCH = 0,		/* No matching labels found */
623319262Sasomers	TOPGUID_MATCH = 1,	/* Labels match top guid, not vdev guid*/
624319262Sasomers	ZERO_MATCH = 1,		/* Should never be returned */
625319262Sasomers	ONE_MATCH = 2,		/* 1 label matching the vdev_guid */
626319262Sasomers	TWO_MATCH = 3,		/* 2 label matching the vdev_guid */
627319262Sasomers	THREE_MATCH = 4,	/* 3 label matching the vdev_guid */
628319262Sasomers	FULL_MATCH = 5		/* all labels match the vdev_guid */
629308590Smav};
630241286Savg
631308590Smavstatic enum match
632308058Smavvdev_attach_ok(vdev_t *vd, struct g_provider *pp)
633308058Smav{
634308590Smav	nvlist_t *config;
635308590Smav	uint64_t pool_guid, top_guid, vdev_guid;
636308590Smav	struct g_consumer *cp;
637319262Sasomers	int nlabels;
638308058Smav
639330524Sasomers	cp = vdev_geom_attach(pp, NULL, B_TRUE);
640308590Smav	if (cp == NULL) {
641308058Smav		ZFS_LOG(1, "Unable to attach tasting instance to %s.",
642308058Smav		    pp->name);
643308590Smav		return (NO_MATCH);
644308058Smav	}
645308058Smav	g_topology_unlock();
646319262Sasomers	nlabels = vdev_geom_read_config(cp, &config);
647330524Sasomers	g_topology_lock();
648330524Sasomers	vdev_geom_detach(cp, B_TRUE);
649319262Sasomers	if (nlabels == 0) {
650308590Smav		ZFS_LOG(1, "Unable to read config from %s.", pp->name);
651308590Smav		return (NO_MATCH);
652308590Smav	}
653308058Smav
654308590Smav	pool_guid = 0;
655308590Smav	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid);
656308590Smav	top_guid = 0;
657308590Smav	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID, &top_guid);
658308590Smav	vdev_guid = 0;
659308590Smav	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid);
660308590Smav	nvlist_free(config);
661308590Smav
662308590Smav	/*
663308590Smav	 * Check that the label's pool guid matches the desired guid.
664308590Smav	 * Inactive spares and L2ARCs do not have any pool guid in the label.
665308058Smav	 */
666308590Smav	if (pool_guid != 0 && pool_guid != spa_guid(vd->vdev_spa)) {
667308590Smav		ZFS_LOG(1, "pool guid mismatch for provider %s: %ju != %ju.",
668308590Smav		    pp->name,
669308590Smav		    (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)pool_guid);
670308590Smav		return (NO_MATCH);
671308058Smav	}
672308590Smav
673308590Smav	/*
674308590Smav	 * Check that the label's vdev guid matches the desired guid.
675308590Smav	 * The second condition handles possible race on vdev detach, when
676308590Smav	 * remaining vdev receives GUID of destroyed top level mirror vdev.
677308590Smav	 */
678308590Smav	if (vdev_guid == vd->vdev_guid) {
679308590Smav		ZFS_LOG(1, "guids match for provider %s.", pp->name);
680319262Sasomers		return (ZERO_MATCH + nlabels);
681308590Smav	} else if (top_guid == vd->vdev_guid && vd == vd->vdev_top) {
682308590Smav		ZFS_LOG(1, "top vdev guid match for provider %s.", pp->name);
683319262Sasomers		return (TOPGUID_MATCH);
684308590Smav	}
685308590Smav	ZFS_LOG(1, "vdev guid mismatch for provider %s: %ju != %ju.",
686308590Smav	    pp->name, (uintmax_t)vd->vdev_guid, (uintmax_t)vdev_guid);
687308590Smav	return (NO_MATCH);
688308058Smav}
689308058Smav
690219089Spjdstatic struct g_consumer *
691297090Smavvdev_geom_attach_by_guids(vdev_t *vd)
692169303Spjd{
693169303Spjd	struct g_class *mp;
694308058Smav	struct g_geom *gp;
695319262Sasomers	struct g_provider *pp, *best_pp;
696308058Smav	struct g_consumer *cp;
697319262Sasomers	enum match match, best_match;
698169303Spjd
699169303Spjd	g_topology_assert();
700169303Spjd
701219089Spjd	cp = NULL;
702319262Sasomers	best_pp = NULL;
703319262Sasomers	best_match = NO_MATCH;
704169303Spjd	LIST_FOREACH(mp, &g_classes, class) {
705169303Spjd		if (mp == &zfs_vdev_class)
706169303Spjd			continue;
707169303Spjd		LIST_FOREACH(gp, &mp->geom, geom) {
708169303Spjd			if (gp->flags & G_GEOM_WITHER)
709169303Spjd				continue;
710169303Spjd			LIST_FOREACH(pp, &gp->provider, provider) {
711319262Sasomers				match = vdev_attach_ok(vd, pp);
712319262Sasomers				if (match > best_match) {
713319262Sasomers					best_match = match;
714319262Sasomers					best_pp = pp;
715308590Smav				}
716319262Sasomers				if (match == FULL_MATCH)
717319262Sasomers					goto out;
718169303Spjd			}
719169303Spjd		}
720169303Spjd	}
721319262Sasomers
722319262Sasomersout:
723319262Sasomers	if (best_pp) {
724330524Sasomers		cp = vdev_geom_attach(best_pp, vd, B_TRUE);
725319262Sasomers		if (cp == NULL) {
726319262Sasomers			printf("ZFS WARNING: Unable to attach to %s.\n",
727319262Sasomers			    best_pp->name);
728319262Sasomers		}
729319262Sasomers	}
730169303Spjd	return (cp);
731169303Spjd}
732169303Spjd
733185029Spjdstatic struct g_consumer *
734294843Sasomersvdev_geom_open_by_guids(vdev_t *vd)
735168404Spjd{
736185174Spjd	struct g_consumer *cp;
737185174Spjd	char *buf;
738185174Spjd	size_t len;
739185174Spjd
740219089Spjd	g_topology_assert();
741219089Spjd
742299536Sasomers	ZFS_LOG(1, "Searching by guids [%ju:%ju].",
743299536Sasomers		(uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)vd->vdev_guid);
744297090Smav	cp = vdev_geom_attach_by_guids(vd);
745185174Spjd	if (cp != NULL) {
746185174Spjd		len = strlen(cp->provider->name) + strlen("/dev/") + 1;
747185174Spjd		buf = kmem_alloc(len, KM_SLEEP);
748197842Spjd
749185174Spjd		snprintf(buf, len, "/dev/%s", cp->provider->name);
750185174Spjd		spa_strfree(vd->vdev_path);
751185174Spjd		vd->vdev_path = buf;
752185174Spjd
753294843Sasomers		ZFS_LOG(1, "Attach by guid [%ju:%ju] succeeded, provider %s.",
754294843Sasomers		    (uintmax_t)spa_guid(vd->vdev_spa),
755326325Sasomers		    (uintmax_t)vd->vdev_guid, cp->provider->name);
756185174Spjd	} else {
757294843Sasomers		ZFS_LOG(1, "Search by guid [%ju:%ju] failed.",
758294843Sasomers		    (uintmax_t)spa_guid(vd->vdev_spa),
759185174Spjd		    (uintmax_t)vd->vdev_guid);
760185174Spjd	}
761185174Spjd
762185174Spjd	return (cp);
763185174Spjd}
764185174Spjd
765185174Spjdstatic struct g_consumer *
766200158Spjdvdev_geom_open_by_path(vdev_t *vd, int check_guid)
767185174Spjd{
768168404Spjd	struct g_provider *pp;
769168404Spjd	struct g_consumer *cp;
770168404Spjd
771219089Spjd	g_topology_assert();
772219089Spjd
773169303Spjd	cp = NULL;
774168404Spjd	pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1);
775169303Spjd	if (pp != NULL) {
776169303Spjd		ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path);
777308590Smav		if (!check_guid || vdev_attach_ok(vd, pp) == FULL_MATCH)
778330524Sasomers			cp = vdev_geom_attach(pp, vd, B_FALSE);
779168404Spjd	}
780169303Spjd
781185029Spjd	return (cp);
782185029Spjd}
783169303Spjd
784185029Spjdstatic int
785236155Smmvdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
786254591Sgibbs    uint64_t *logical_ashift, uint64_t *physical_ashift)
787185029Spjd{
788185029Spjd	struct g_provider *pp;
789185029Spjd	struct g_consumer *cp;
790219089Spjd	size_t bufsize;
791224791Spjd	int error;
792185029Spjd
793308057Smav	/* Set the TLS to indicate downstack that we should not access zvols*/
794308057Smav	VERIFY(tsd_set(zfs_geom_probe_vdev_key, vd) == 0);
795308057Smav
796185029Spjd	/*
797185029Spjd	 * We must have a pathname, and it must be absolute.
798185029Spjd	 */
799185029Spjd	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
800185029Spjd		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
801185029Spjd		return (EINVAL);
802185029Spjd	}
803185029Spjd
804308592Smav	/*
805308592Smav	 * Reopen the device if it's not currently open. Otherwise,
806308592Smav	 * just update the physical size of the device.
807308592Smav	 */
808308592Smav	if ((cp = vd->vdev_tsd) != NULL) {
809308592Smav		ASSERT(vd->vdev_reopening);
810308592Smav		goto skip_open;
811308592Smav	}
812185029Spjd
813219089Spjd	DROP_GIANT();
814219089Spjd	g_topology_lock();
815203504Spjd	error = 0;
816205346Spjd
817294843Sasomers	if (vd->vdev_spa->spa_splitting_newspa ||
818294843Sasomers	    (vd->vdev_prevstate == VDEV_STATE_UNKNOWN &&
819307296Smav	     vd->vdev_spa->spa_load_state == SPA_LOAD_NONE ||
820307296Smav	     vd->vdev_spa->spa_load_state == SPA_LOAD_CREATE)) {
821294843Sasomers		/*
822294843Sasomers		 * We are dealing with a vdev that hasn't been previously
823294843Sasomers		 * opened (since boot), and we are not loading an
824294843Sasomers		 * existing pool configuration.  This looks like a
825294843Sasomers		 * vdev add operation to a new or existing pool.
826294843Sasomers		 * Assume the user knows what he/she is doing and find
827294843Sasomers		 * GEOM provider by its name, ignoring GUID mismatches.
828294843Sasomers		 *
829294843Sasomers		 * XXPOLICY: It would be safer to only allow a device
830294843Sasomers		 *           that is unlabeled or labeled but missing
831294843Sasomers		 *           GUID information to be opened in this fashion,
832294843Sasomers		 *           unless we are doing a split, in which case we
833294843Sasomers		 *           should allow any guid.
834294843Sasomers		 */
835205346Spjd		cp = vdev_geom_open_by_path(vd, 0);
836294843Sasomers	} else {
837294843Sasomers		/*
838294843Sasomers		 * Try using the recorded path for this device, but only
839294843Sasomers		 * accept it if its label data contains the expected GUIDs.
840294843Sasomers		 */
841205346Spjd		cp = vdev_geom_open_by_path(vd, 1);
842205346Spjd		if (cp == NULL) {
843205346Spjd			/*
844205346Spjd			 * The device at vd->vdev_path doesn't have the
845294843Sasomers			 * expected GUIDs. The disks might have merely
846205346Spjd			 * moved around so try all other GEOM providers
847294843Sasomers			 * to find one with the right GUIDs.
848205346Spjd			 */
849294843Sasomers			cp = vdev_geom_open_by_guids(vd);
850205346Spjd		}
851169303Spjd	}
852205346Spjd
853308057Smav	/* Clear the TLS now that tasting is done */
854308057Smav	VERIFY(tsd_set(zfs_geom_probe_vdev_key, NULL) == 0);
855308057Smav
856185174Spjd	if (cp == NULL) {
857326325Sasomers		ZFS_LOG(1, "Vdev %s not found.", vd->vdev_path);
858203504Spjd		error = ENOENT;
859330522Sasomers	} else {
860319268Sasomers		struct consumer_priv_t *priv;
861319268Sasomers		struct consumer_vdev_elem *elem;
862330522Sasomers		int spamode;
863297090Smav
864319268Sasomers		priv = (struct consumer_priv_t*)&cp->private;
865319268Sasomers		if (cp->private == NULL)
866319268Sasomers			SLIST_INIT(priv);
867319268Sasomers		elem = g_malloc(sizeof(*elem), M_WAITOK|M_ZERO);
868319268Sasomers		elem->vd = vd;
869319268Sasomers		SLIST_INSERT_HEAD(priv, elem, elems);
870330522Sasomers
871330522Sasomers		spamode = spa_mode(vd->vdev_spa);
872330522Sasomers		if (cp->provider->sectorsize > VDEV_PAD_SIZE ||
873330522Sasomers		    !ISP2(cp->provider->sectorsize)) {
874330522Sasomers			ZFS_LOG(1, "Provider %s has unsupported sectorsize.",
875330522Sasomers			    cp->provider->name);
876330522Sasomers
877330522Sasomers			vdev_geom_close_locked(vd);
878330522Sasomers			error = EINVAL;
879330522Sasomers			cp = NULL;
880330522Sasomers		} else if (cp->acw == 0 && (spamode & FWRITE) != 0) {
881330522Sasomers			int i;
882330522Sasomers
883330522Sasomers			for (i = 0; i < 5; i++) {
884330522Sasomers				error = g_access(cp, 0, 1, 0);
885330522Sasomers				if (error == 0)
886330522Sasomers					break;
887330522Sasomers				g_topology_unlock();
888330522Sasomers				tsleep(vd, 0, "vdev", hz / 2);
889330522Sasomers				g_topology_lock();
890330522Sasomers			}
891330522Sasomers			if (error != 0) {
892330522Sasomers				printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n",
893330522Sasomers				    cp->provider->name, error);
894330522Sasomers				vdev_geom_close_locked(vd);
895330522Sasomers				cp = NULL;
896330522Sasomers			}
897330522Sasomers		}
898319268Sasomers	}
899319268Sasomers
900299958Sasomers	/* Fetch initial physical path information for this device. */
901319268Sasomers	if (cp != NULL) {
902299958Sasomers		vdev_geom_attrchanged(cp, "GEOM::physpath");
903299958Sasomers
904319268Sasomers		/* Set other GEOM characteristics */
905319268Sasomers		vdev_geom_set_physpath(vd, cp, /*do_null_update*/B_FALSE);
906319268Sasomers		vdev_geom_set_rotation_rate(vd, cp);
907319268Sasomers	}
908319268Sasomers
909219089Spjd	g_topology_unlock();
910219089Spjd	PICKUP_GIANT();
911203504Spjd	if (cp == NULL) {
912203504Spjd		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
913203504Spjd		return (error);
914203504Spjd	}
915308592Smavskip_open:
916169303Spjd	pp = cp->provider;
917168404Spjd
918168404Spjd	/*
919168404Spjd	 * Determine the actual size of the device.
920168404Spjd	 */
921236155Smm	*max_psize = *psize = pp->mediasize;
922168404Spjd
923168404Spjd	/*
924254591Sgibbs	 * Determine the device's minimum transfer size and preferred
925254591Sgibbs	 * transfer size.
926168404Spjd	 */
927254591Sgibbs	*logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1;
928254591Sgibbs	*physical_ashift = 0;
929297122Smav	if (pp->stripesize > (1 << *logical_ashift) && ISP2(pp->stripesize) &&
930297123Smav	    pp->stripesize <= (1 << SPA_MAXASHIFT) && pp->stripeoffset == 0)
931254591Sgibbs		*physical_ashift = highbit(pp->stripesize) - 1;
932168404Spjd
933168404Spjd	/*
934240868Spjd	 * Clear the nowritecache settings, so that on a vdev_reopen()
935240868Spjd	 * we will try again.
936168404Spjd	 */
937168404Spjd	vd->vdev_nowritecache = B_FALSE;
938168404Spjd
939168404Spjd	return (0);
940168404Spjd}
941168404Spjd
942168404Spjdstatic void
943168404Spjdvdev_geom_close(vdev_t *vd)
944168404Spjd{
945325913Savg	struct g_consumer *cp;
946168404Spjd
947325913Savg	cp = vd->vdev_tsd;
948308592Smav
949297090Smav	DROP_GIANT();
950297090Smav	g_topology_lock();
951325913Savg
952325913Savg	if (!vd->vdev_reopening ||
953325913Savg	    (cp != NULL && ((cp->flags & G_CF_ORPHAN) != 0 ||
954325913Savg	    (cp->provider != NULL && cp->provider->error != 0))))
955325913Savg		vdev_geom_close_locked(vd);
956325913Savg
957297090Smav	g_topology_unlock();
958297090Smav	PICKUP_GIANT();
959168404Spjd}
960168404Spjd
961168404Spjdstatic void
962168404Spjdvdev_geom_io_intr(struct bio *bp)
963168404Spjd{
964219089Spjd	vdev_t *vd;
965168404Spjd	zio_t *zio;
966168404Spjd
967168404Spjd	zio = bp->bio_caller1;
968219089Spjd	vd = zio->io_vd;
969208142Spjd	zio->io_error = bp->bio_error;
970208142Spjd	if (zio->io_error == 0 && bp->bio_resid != 0)
971270312Ssmh		zio->io_error = SET_ERROR(EIO);
972266720Ssmh
973266720Ssmh	switch(zio->io_error) {
974266720Ssmh	case ENOTSUP:
975208142Spjd		/*
976266720Ssmh		 * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know
977266720Ssmh		 * that future attempts will never succeed. In this case
978266720Ssmh		 * we set a persistent flag so that we don't bother with
979266720Ssmh		 * requests in the future.
980208142Spjd		 */
981266720Ssmh		switch(bp->bio_cmd) {
982266720Ssmh		case BIO_FLUSH:
983266720Ssmh			vd->vdev_nowritecache = B_TRUE;
984266720Ssmh			break;
985266720Ssmh		case BIO_DELETE:
986266720Ssmh			vd->vdev_notrim = B_TRUE;
987266720Ssmh			break;
988219089Spjd		}
989266720Ssmh		break;
990266720Ssmh	case ENXIO:
991266720Ssmh		if (!vd->vdev_remove_wanted) {
992266720Ssmh			/*
993266720Ssmh			 * If provider's error is set we assume it is being
994266720Ssmh			 * removed.
995266720Ssmh			 */
996266720Ssmh			if (bp->bio_to->error != 0) {
997266720Ssmh				vd->vdev_remove_wanted = B_TRUE;
998266720Ssmh				spa_async_request(zio->io_spa,
999266720Ssmh				    SPA_ASYNC_REMOVE);
1000266720Ssmh			} else if (!vd->vdev_delayed_close) {
1001266720Ssmh				vd->vdev_delayed_close = B_TRUE;
1002266720Ssmh			}
1003266720Ssmh		}
1004266720Ssmh		break;
1005219089Spjd	}
1006208142Spjd	g_destroy_bio(bp);
1007297108Smav	zio_delay_interrupt(zio);
1008168404Spjd}
1009168404Spjd
1010297078Smavstatic void
1011168404Spjdvdev_geom_io_start(zio_t *zio)
1012168404Spjd{
1013168404Spjd	vdev_t *vd;
1014168404Spjd	struct g_consumer *cp;
1015168404Spjd	struct bio *bp;
1016168404Spjd	int error;
1017168404Spjd
1018168404Spjd	vd = zio->io_vd;
1019168404Spjd
1020270312Ssmh	switch (zio->io_type) {
1021270312Ssmh	case ZIO_TYPE_IOCTL:
1022168404Spjd		/* XXPOLICY */
1023185029Spjd		if (!vdev_readable(vd)) {
1024270312Ssmh			zio->io_error = SET_ERROR(ENXIO);
1025297078Smav			zio_interrupt(zio);
1026297078Smav			return;
1027270312Ssmh		} else {
1028270312Ssmh			switch (zio->io_cmd) {
1029270312Ssmh			case DKIOCFLUSHWRITECACHE:
1030270312Ssmh				if (zfs_nocacheflush || vdev_geom_bio_flush_disable)
1031270312Ssmh					break;
1032270312Ssmh				if (vd->vdev_nowritecache) {
1033270312Ssmh					zio->io_error = SET_ERROR(ENOTSUP);
1034270312Ssmh					break;
1035270312Ssmh				}
1036270312Ssmh				goto sendreq;
1037270312Ssmh			default:
1038270312Ssmh				zio->io_error = SET_ERROR(ENOTSUP);
1039270312Ssmh			}
1040168404Spjd		}
1041168404Spjd
1042297078Smav		zio_execute(zio);
1043297078Smav		return;
1044270312Ssmh	case ZIO_TYPE_FREE:
1045270312Ssmh		if (vd->vdev_notrim) {
1046270312Ssmh			zio->io_error = SET_ERROR(ENOTSUP);
1047270312Ssmh		} else if (!vdev_geom_bio_delete_disable) {
1048168404Spjd			goto sendreq;
1049168404Spjd		}
1050297078Smav		zio_execute(zio);
1051297078Smav		return;
1052168404Spjd	}
1053168404Spjdsendreq:
1054274800Ssmh	ASSERT(zio->io_type == ZIO_TYPE_READ ||
1055274800Ssmh	    zio->io_type == ZIO_TYPE_WRITE ||
1056274800Ssmh	    zio->io_type == ZIO_TYPE_FREE ||
1057274800Ssmh	    zio->io_type == ZIO_TYPE_IOCTL);
1058274800Ssmh
1059208142Spjd	cp = vd->vdev_tsd;
1060185029Spjd	if (cp == NULL) {
1061270312Ssmh		zio->io_error = SET_ERROR(ENXIO);
1062270312Ssmh		zio_interrupt(zio);
1063297078Smav		return;
1064168404Spjd	}
1065168404Spjd	bp = g_alloc_bio();
1066168404Spjd	bp->bio_caller1 = zio;
1067168404Spjd	switch (zio->io_type) {
1068168404Spjd	case ZIO_TYPE_READ:
1069168404Spjd	case ZIO_TYPE_WRITE:
1070297108Smav		zio->io_target_timestamp = zio_handle_io_delay(zio);
1071168404Spjd		bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE;
1072168404Spjd		bp->bio_data = zio->io_data;
1073168404Spjd		bp->bio_offset = zio->io_offset;
1074168404Spjd		bp->bio_length = zio->io_size;
1075168404Spjd		break;
1076270312Ssmh	case ZIO_TYPE_FREE:
1077270312Ssmh		bp->bio_cmd = BIO_DELETE;
1078270312Ssmh		bp->bio_data = NULL;
1079270312Ssmh		bp->bio_offset = zio->io_offset;
1080270312Ssmh		bp->bio_length = zio->io_size;
1081270312Ssmh		break;
1082168404Spjd	case ZIO_TYPE_IOCTL:
1083270312Ssmh		bp->bio_cmd = BIO_FLUSH;
1084270312Ssmh		bp->bio_flags |= BIO_ORDERED;
1085270312Ssmh		bp->bio_data = NULL;
1086270312Ssmh		bp->bio_offset = cp->provider->mediasize;
1087270312Ssmh		bp->bio_length = 0;
1088168404Spjd		break;
1089168404Spjd	}
1090168404Spjd	bp->bio_done = vdev_geom_io_intr;
1091168404Spjd
1092168404Spjd	g_io_request(bp, cp);
1093168404Spjd}
1094168404Spjd
1095168404Spjdstatic void
1096168404Spjdvdev_geom_io_done(zio_t *zio)
1097168404Spjd{
1098168404Spjd}
1099168404Spjd
1100219089Spjdstatic void
1101219089Spjdvdev_geom_hold(vdev_t *vd)
1102219089Spjd{
1103219089Spjd}
1104219089Spjd
1105219089Spjdstatic void
1106219089Spjdvdev_geom_rele(vdev_t *vd)
1107219089Spjd{
1108219089Spjd}
1109219089Spjd
1110168404Spjdvdev_ops_t vdev_geom_ops = {
1111168404Spjd	vdev_geom_open,
1112168404Spjd	vdev_geom_close,
1113168404Spjd	vdev_default_asize,
1114168404Spjd	vdev_geom_io_start,
1115168404Spjd	vdev_geom_io_done,
1116168404Spjd	NULL,
1117219089Spjd	vdev_geom_hold,
1118219089Spjd	vdev_geom_rele,
1119168404Spjd	VDEV_TYPE_DISK,		/* name of this vdev type */
1120168404Spjd	B_TRUE			/* leaf vdev */
1121168404Spjd};
1122