geom_fox.c revision 219029
133965Sjdp/*-
2218822Sdim * Copyright (c) 2003 Poul-Henning Kamp
3218822Sdim * All rights reserved.
433965Sjdp *
533965Sjdp * Redistribution and use in source and binary forms, with or without
633965Sjdp * modification, are permitted provided that the following conditions
733965Sjdp * are met:
833965Sjdp * 1. Redistributions of source code must retain the above copyright
933965Sjdp *    notice, this list of conditions and the following disclaimer.
1033965Sjdp * 2. Redistributions in binary form must reproduce the above copyright
1133965Sjdp *    notice, this list of conditions and the following disclaimer in the
1233965Sjdp *    documentation and/or other materials provided with the distribution.
1333965Sjdp * 3. The names of the authors may not be used to endorse or promote
1433965Sjdp *    products derived from this software without specific prior written
1533965Sjdp *    permission.
1633965Sjdp *
1733965Sjdp * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1833965Sjdp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19218822Sdim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2033965Sjdp * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21130561Sobrien * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22130561Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23130561Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24130561Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25130561Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26130561Sobrien * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27130561Sobrien * SUCH DAMAGE.
28130561Sobrien *
29130561Sobrien * $FreeBSD: head/sys/geom/geom_fox.c 219029 2011-02-25 10:24:35Z netchild $
30130561Sobrien */
31130561Sobrien
32130561Sobrien/* This is a GEOM module for handling path selection for multi-path
33130561Sobrien * storage devices.  It is named "fox" because it, like they, prefer
34218822Sdim * to have multiple exits to choose from.
35218822Sdim *
36218822Sdim */
37218822Sdim
38218822Sdim#include <sys/param.h>
39130561Sobrien#include <sys/systm.h>
40130561Sobrien#include <sys/sysctl.h>
4133965Sjdp#include <sys/kernel.h>
42130561Sobrien#include <sys/conf.h>
43130561Sobrien#include <sys/bio.h>
44130561Sobrien#include <sys/malloc.h>
45130561Sobrien#include <sys/lock.h>
4633965Sjdp#include <sys/mutex.h>
47130561Sobrien#include <sys/libkern.h>
4833965Sjdp#include <sys/endian.h>
49130561Sobrien#include <sys/md5.h>
5033965Sjdp#include <sys/errno.h>
51130561Sobrien#include <geom/geom.h>
52218822Sdim
53218822Sdim#define FOX_CLASS_NAME "FOX"
54218822Sdim#define FOX_MAGIC	"GEOM::FOX"
55130561Sobrien
56130561SobrienFEATURE(geom_fox, "GEOM FOX redundant path mitigation support");
57218822Sdim
58130561Sobrienstruct g_fox_softc {
59218822Sdim	off_t			mediasize;
60218822Sdim	u_int			sectorsize;
61218822Sdim	TAILQ_HEAD(, bio)	queue;
62218822Sdim	struct mtx		lock;
63218822Sdim	u_char 			magic[16];
64218822Sdim	struct g_consumer 	*path;
65218822Sdim	struct g_consumer 	*opath;
66130561Sobrien	int			waiting;
67130561Sobrien	int			cr, cw, ce;
68218822Sdim};
69218822Sdim
70130561Sobrien/*
71130561Sobrien * This function is called whenever we need to select a new path.
72130561Sobrien */
73130561Sobrienstatic void
7433965Sjdpg_fox_select_path(void *arg, int flag)
7533965Sjdp{
7633965Sjdp	struct g_geom *gp;
7733965Sjdp	struct g_fox_softc *sc;
7833965Sjdp	struct g_consumer *cp1;
7933965Sjdp	struct bio *bp;
8033965Sjdp	int error;
8133965Sjdp
8277298Sobrien	g_topology_assert();
8377298Sobrien	if (flag == EV_CANCEL)
84130561Sobrien		return;
85130561Sobrien	gp = arg;
8633965Sjdp	sc = gp->softc;
8733965Sjdp
8833965Sjdp	if (sc->opath != NULL) {
8977298Sobrien		/*
9077298Sobrien		 * First, close the old path entirely.
91130561Sobrien		 */
92130561Sobrien		printf("Closing old path (%s) on fox (%s)\n",
9333965Sjdp			sc->opath->provider->name, gp->name);
9433965Sjdp
9533965Sjdp		cp1 = LIST_NEXT(sc->opath, consumer);
9677298Sobrien
9777298Sobrien		g_access(sc->opath, -sc->cr, -sc->cw, -(sc->ce + 1));
98130561Sobrien
99130561Sobrien		/*
10033965Sjdp		 * The attempt to reopen it with a exclusive count
10133965Sjdp		 */
10233965Sjdp		error = g_access(sc->opath, 0, 0, 1);
10377298Sobrien		if (error) {
10477298Sobrien			/*
105130561Sobrien			 * Ok, ditch this consumer, we can't use it.
106130561Sobrien			 */
10733965Sjdp			printf("Drop old path (%s) on fox (%s)\n",
10833965Sjdp				sc->opath->provider->name, gp->name);
10938889Sjdp			g_detach(sc->opath);
11038889Sjdp			g_destroy_consumer(sc->opath);
11138889Sjdp			if (LIST_EMPTY(&gp->consumer)) {
11238889Sjdp				/* No consumers left */
11338889Sjdp				g_wither_geom(gp, ENXIO);
11477298Sobrien				for (;;) {
11577298Sobrien					bp = TAILQ_FIRST(&sc->queue);
116130561Sobrien					if (bp == NULL)
117130561Sobrien						break;
11838889Sjdp					TAILQ_REMOVE(&sc->queue, bp, bio_queue);
11938889Sjdp					bp->bio_error = ENXIO;
120218822Sdim					g_std_done(bp);
121130561Sobrien				}
122218822Sdim				return;
123218822Sdim			}
124218822Sdim		} else {
125218822Sdim			printf("Got e-bit on old path (%s) on fox (%s)\n",
126218822Sdim				sc->opath->provider->name, gp->name);
127218822Sdim		}
128218822Sdim		sc->opath = NULL;
129218822Sdim	} else {
130218822Sdim		cp1 = LIST_FIRST(&gp->consumer);
131218822Sdim	}
132218822Sdim	if (cp1 == NULL)
133218822Sdim		cp1 = LIST_FIRST(&gp->consumer);
134218822Sdim	printf("Open new path (%s) on fox (%s)\n",
135218822Sdim		cp1->provider->name, gp->name);
136218822Sdim	error = g_access(cp1, sc->cr, sc->cw, sc->ce);
137218822Sdim	if (error) {
138218822Sdim		/*
139218822Sdim		 * If we failed, we take another trip through here
140218822Sdim		 */
141218822Sdim		printf("Open new path (%s) on fox (%s) failed, reselect.\n",
142218822Sdim			cp1->provider->name, gp->name);
143218822Sdim		sc->opath = cp1;
144218822Sdim		g_post_event(g_fox_select_path, gp, M_WAITOK, gp, NULL);
145218822Sdim	} else {
146218822Sdim		printf("Open new path (%s) on fox (%s) succeeded\n",
147130561Sobrien			cp1->provider->name, gp->name);
148218822Sdim		mtx_lock(&sc->lock);
149130561Sobrien		sc->path = cp1;
150130561Sobrien		sc->waiting = 0;
151130561Sobrien		for (;;) {
152130561Sobrien			bp = TAILQ_FIRST(&sc->queue);
153130561Sobrien			if (bp == NULL)
154130561Sobrien				break;
155130561Sobrien			TAILQ_REMOVE(&sc->queue, bp, bio_queue);
156218822Sdim			g_io_request(bp, sc->path);
157130561Sobrien		}
158130561Sobrien		mtx_unlock(&sc->lock);
159130561Sobrien	}
160130561Sobrien}
161218822Sdim
162130561Sobrienstatic void
163130561Sobrieng_fox_orphan(struct g_consumer *cp)
164130561Sobrien{
165130561Sobrien	struct g_geom *gp;
166130561Sobrien	struct g_fox_softc *sc;
167130561Sobrien	int error, mark;
16833965Sjdp
16933965Sjdp	g_topology_assert();
17033965Sjdp	gp = cp->geom;
17177298Sobrien	sc = gp->softc;
172130561Sobrien	printf("Removing path (%s) from fox (%s)\n",
173130561Sobrien	    cp->provider->name, gp->name);
17433965Sjdp	mtx_lock(&sc->lock);
17533965Sjdp	if (cp == sc->path) {
17633965Sjdp		sc->opath = NULL;
17733965Sjdp		sc->path = NULL;
17877298Sobrien		sc->waiting = 1;
17977298Sobrien		mark = 1;
180130561Sobrien	} else {
181130561Sobrien		mark = 0;
18233965Sjdp	}
18333965Sjdp	mtx_unlock(&sc->lock);
18433965Sjdp
18533965Sjdp	g_access(cp, -cp->acr, -cp->acw, -cp->ace);
18633965Sjdp	error = cp->provider->error;
18777298Sobrien	g_detach(cp);
188130561Sobrien	g_destroy_consumer(cp);
189130561Sobrien	if (!LIST_EMPTY(&gp->consumer)) {
19033965Sjdp		if (mark)
19133965Sjdp			g_post_event(g_fox_select_path, gp, M_WAITOK, gp, NULL);
19233965Sjdp		return;
19389857Sobrien	}
19489857Sobrien
195130561Sobrien	mtx_destroy(&sc->lock);
196130561Sobrien	g_free(gp->softc);
19789857Sobrien	gp->softc = NULL;
19889857Sobrien	g_wither_geom(gp, ENXIO);
19989857Sobrien}
20033965Sjdp
20133965Sjdpstatic void
20233965Sjdpg_fox_done(struct bio *bp)
20377298Sobrien{
204130561Sobrien	struct g_geom *gp;
205130561Sobrien	struct g_fox_softc *sc;
20633965Sjdp	int error;
20789857Sobrien
20889857Sobrien	if (bp->bio_error == 0) {
20989857Sobrien		g_std_done(bp);
21089857Sobrien		return;
21189857Sobrien	}
212130561Sobrien	gp = bp->bio_from->geom;
213130561Sobrien	sc = gp->softc;
21489857Sobrien	if (bp->bio_from != sc->path) {
21589857Sobrien		g_io_request(bp, sc->path);
21689857Sobrien		return;
21789857Sobrien	}
21889857Sobrien	mtx_lock(&sc->lock);
21989857Sobrien	sc->opath = sc->path;
220130561Sobrien	sc->path = NULL;
221130561Sobrien	error = g_post_event(g_fox_select_path, gp, M_NOWAIT, gp, NULL);
22289857Sobrien	if (error) {
22389857Sobrien		bp->bio_error = ENOMEM;
22489857Sobrien		g_std_done(bp);
22589857Sobrien	} else {
22689857Sobrien		sc->waiting = 1;
227130561Sobrien		TAILQ_INSERT_TAIL(&sc->queue, bp, bio_queue);
228130561Sobrien	}
22989857Sobrien	mtx_unlock(&sc->lock);
23089857Sobrien}
23189857Sobrien
23289857Sobrienstatic void
23389857Sobrieng_fox_start(struct bio *bp)
234130561Sobrien{
235130561Sobrien	struct g_geom *gp;
23689857Sobrien	struct bio *bp2;
23789857Sobrien	struct g_fox_softc *sc;
23889857Sobrien	int error;
23989857Sobrien
24089857Sobrien	gp = bp->bio_to->geom;
241130561Sobrien	sc = gp->softc;
242130561Sobrien	if (sc == NULL) {
24389857Sobrien		g_io_deliver(bp, ENXIO);
24489857Sobrien		return;
24589857Sobrien	}
24689857Sobrien	switch(bp->bio_cmd) {
24789857Sobrien	case BIO_READ:
248130561Sobrien	case BIO_WRITE:
249130561Sobrien	case BIO_DELETE:
25089857Sobrien		bp2 = g_clone_bio(bp);
25133965Sjdp		if (bp2 == NULL) {
252218822Sdim			g_io_deliver(bp, ENOMEM);
253218822Sdim			break;
254218822Sdim		}
255218822Sdim		bp2->bio_offset += sc->sectorsize;
256218822Sdim		bp2->bio_done = g_fox_done;
257130561Sobrien		mtx_lock(&sc->lock);
25833965Sjdp		if (sc->path == NULL || !TAILQ_EMPTY(&sc->queue)) {
25933965Sjdp			if (sc->waiting == 0) {
260218822Sdim				error = g_post_event(g_fox_select_path, gp,
261218822Sdim				    M_NOWAIT, gp, NULL);
26233965Sjdp				if (error) {
263218822Sdim					g_destroy_bio(bp2);
26433965Sjdp					bp2 = NULL;
265218822Sdim					g_io_deliver(bp, error);
266218822Sdim				} else {
26733965Sjdp					sc->waiting = 1;
268218822Sdim				}
269218822Sdim			}
270218822Sdim			if (bp2 != NULL)
27133965Sjdp				TAILQ_INSERT_TAIL(&sc->queue, bp2,
27233965Sjdp				    bio_queue);
273218822Sdim		} else {
27433965Sjdp			g_io_request(bp2, sc->path);
275218822Sdim		}
27633965Sjdp		mtx_unlock(&sc->lock);
277218822Sdim		break;
278218822Sdim	default:
279218822Sdim		g_io_deliver(bp, EOPNOTSUPP);
280218822Sdim		break;
28133965Sjdp	}
282218822Sdim	return;
283218822Sdim}
284218822Sdim
285218822Sdimstatic int
286218822Sdimg_fox_access(struct g_provider *pp, int dr, int dw, int de)
287218822Sdim{
288218822Sdim	struct g_geom *gp;
289218822Sdim	struct g_fox_softc *sc;
290218822Sdim	struct g_consumer *cp1;
29133965Sjdp	int error;
292218822Sdim
293218822Sdim	g_topology_assert();
29433965Sjdp	gp = pp->geom;
29533965Sjdp	sc = gp->softc;
29633965Sjdp	if (sc == NULL) {
29733965Sjdp		if (dr <= 0 && dw <= 0 && de <= 0)
29833965Sjdp			return (0);
29933965Sjdp		else
30033965Sjdp			return (ENXIO);
30133965Sjdp	}
302218822Sdim
303218822Sdim	if (sc->cr == 0 && sc->cw == 0 && sc->ce == 0) {
30433965Sjdp		/*
305218822Sdim		 * First open, open all consumers with an exclusive bit
30633965Sjdp		 */
30733965Sjdp		error = 0;
30833965Sjdp		LIST_FOREACH(cp1, &gp->consumer, consumer) {
30933965Sjdp			error = g_access(cp1, 0, 0, 1);
31033965Sjdp			if (error) {
31133965Sjdp				printf("FOX: access(%s,0,0,1) = %d\n",
31233965Sjdp				    cp1->provider->name, error);
31333965Sjdp				break;
31433965Sjdp			}
31533965Sjdp		}
316130561Sobrien		if (error) {
317130561Sobrien			LIST_FOREACH(cp1, &gp->consumer, consumer) {
318130561Sobrien				if (cp1->ace)
319130561Sobrien					g_access(cp1, 0, 0, -1);
320130561Sobrien			}
321130561Sobrien			return (error);
322130561Sobrien		}
323130561Sobrien	}
324130561Sobrien	if (sc->path == NULL)
325130561Sobrien		g_fox_select_path(gp, 0);
326130561Sobrien	if (sc->path == NULL)
327130561Sobrien		error = ENXIO;
328130561Sobrien	else
329130561Sobrien		error = g_access(sc->path, dr, dw, de);
330130561Sobrien	if (error == 0) {
331130561Sobrien		sc->cr += dr;
332130561Sobrien		sc->cw += dw;
333130561Sobrien		sc->ce += de;
334130561Sobrien		if (sc->cr == 0 && sc->cw == 0 && sc->ce == 0) {
335130561Sobrien			/*
336130561Sobrien			 * Last close, remove e-bit on all consumers
337130561Sobrien			 */
338130561Sobrien			LIST_FOREACH(cp1, &gp->consumer, consumer)
339130561Sobrien				g_access(cp1, 0, 0, -1);
340130561Sobrien		}
341130561Sobrien	}
342218822Sdim	return (error);
343218822Sdim}
344218822Sdim
345218822Sdimstatic struct g_geom *
346218822Sdimg_fox_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
347218822Sdim{
348218822Sdim	struct g_geom *gp, *gp2;
349130561Sobrien	struct g_provider *pp2;
350130561Sobrien	struct g_consumer *cp, *cp2;
351130561Sobrien	struct g_fox_softc *sc, *sc2;
352130561Sobrien	int error;
353130561Sobrien	u_int sectorsize;
354130561Sobrien	u_char *buf;
355130561Sobrien
356130561Sobrien	g_trace(G_T_TOPOLOGY, "fox_taste(%s, %s)", mp->name, pp->name);
357130561Sobrien	g_topology_assert();
358130561Sobrien	if (!strcmp(pp->geom->class->name, mp->name))
359130561Sobrien		return (NULL);
360130561Sobrien	gp = g_new_geomf(mp, "%s.fox", pp->name);
361130561Sobrien	gp->softc = g_malloc(sizeof(struct g_fox_softc), M_WAITOK | M_ZERO);
36233965Sjdp	sc = gp->softc;
36333965Sjdp
36433965Sjdp	cp = g_new_consumer(gp);
36533965Sjdp	g_attach(cp, pp);
36660484Sobrien	error = g_access(cp, 1, 0, 0);
36733965Sjdp	if (error) {
36833965Sjdp		g_free(sc);
36933965Sjdp		g_detach(cp);
37033965Sjdp		g_destroy_consumer(cp);
37133965Sjdp		g_destroy_geom(gp);
37233965Sjdp		return(NULL);
37333965Sjdp	}
37433965Sjdp	do {
37533965Sjdp		sectorsize = cp->provider->sectorsize;
37633965Sjdp		g_topology_unlock();
37733965Sjdp		buf = g_read_data(cp, 0, sectorsize, NULL);
37833965Sjdp		g_topology_lock();
37960484Sobrien		if (buf == NULL)
38060484Sobrien			break;
38160484Sobrien		if (memcmp(buf, FOX_MAGIC, strlen(FOX_MAGIC)))
38260484Sobrien			break;
38360484Sobrien
38460484Sobrien		/*
38533965Sjdp		 * First we need to see if this a new path for an existing fox.
38633965Sjdp		 */
38733965Sjdp		LIST_FOREACH(gp2, &mp->geom, geom) {
38833965Sjdp			sc2 = gp2->softc;
38933965Sjdp			if (sc2 == NULL)
39033965Sjdp				continue;
39133965Sjdp			if (memcmp(buf + 16, sc2->magic, sizeof sc2->magic))
39233965Sjdp				continue;
393130561Sobrien			break;
394130561Sobrien		}
395218822Sdim		if (gp2 != NULL) {
396130561Sobrien			/*
397130561Sobrien			 * It was.  Create a new consumer for that fox,
398130561Sobrien			 * attach it, and if the fox is open, open this
399130561Sobrien			 * path with an exclusive count of one.
400130561Sobrien			 */
401130561Sobrien			printf("Adding path (%s) to fox (%s)\n",
402130561Sobrien			    pp->name, gp2->name);
403130561Sobrien			cp2 = g_new_consumer(gp2);
404130561Sobrien			g_attach(cp2, pp);
40533965Sjdp			pp2 = LIST_FIRST(&gp2->provider);
40633965Sjdp			if (pp2->acr > 0 || pp2->acw > 0 || pp2->ace > 0) {
40733965Sjdp				error = g_access(cp2, 0, 0, 1);
40833965Sjdp				if (error) {
40933965Sjdp					/*
41033965Sjdp					 * This is bad, or more likely,
41133965Sjdp					 * the user is doing something stupid
41233965Sjdp					 */
41333965Sjdp					printf(
41433965Sjdp	"WARNING: New path (%s) to fox(%s) not added: %s\n%s",
415218822Sdim					    cp2->provider->name, gp2->name,
416218822Sdim	"Could not get exclusive bit.",
417218822Sdim	"WARNING: This indicates a risk of data inconsistency."
418218822Sdim					);
419218822Sdim					g_detach(cp2);
42033965Sjdp					g_destroy_consumer(cp2);
421130561Sobrien				}
42233965Sjdp			}
42333965Sjdp			break;
424218822Sdim		}
425218822Sdim		printf("Creating new fox (%s)\n", pp->name);
426218822Sdim		sc->path = cp;
42733965Sjdp		memcpy(sc->magic, buf + 16, sizeof sc->magic);
42833965Sjdp		pp2 = g_new_providerf(gp, "%s", gp->name);
429218822Sdim		pp2->mediasize = sc->mediasize = pp->mediasize - pp->sectorsize;
430218822Sdim		pp2->sectorsize = sc->sectorsize = pp->sectorsize;
43133965Sjdpprintf("fox %s lock %p\n", gp->name, &sc->lock);
432218822Sdim
433218822Sdim		mtx_init(&sc->lock, "fox queue", NULL, MTX_DEF);
434218822Sdim		TAILQ_INIT(&sc->queue);
43533965Sjdp		g_error_provider(pp2, 0);
43633965Sjdp	} while (0);
437218822Sdim	if (buf != NULL)
43833965Sjdp		g_free(buf);
439218822Sdim	g_access(cp, -1, 0, 0);
44033965Sjdp
441218822Sdim	if (!LIST_EMPTY(&gp->provider))
442218822Sdim		return (gp);
443218822Sdim
444218822Sdim	g_free(gp->softc);
44533965Sjdp	g_detach(cp);
446218822Sdim	g_destroy_consumer(cp);
447218822Sdim	g_destroy_geom(gp);
448218822Sdim	return (NULL);
449218822Sdim}
450218822Sdim
451218822Sdimstatic int
452218822Sdimg_fox_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
453218822Sdim{
454218822Sdim	struct g_fox_softc *sc;
45533965Sjdp
456218822Sdim	g_topology_assert();
45733965Sjdp	sc = gp->softc;
45833965Sjdp	mtx_destroy(&sc->lock);
45933965Sjdp	g_free(gp->softc);
46033965Sjdp	gp->softc = NULL;
46133965Sjdp	g_wither_geom(gp, ENXIO);
46233965Sjdp	return (0);
46333965Sjdp}
464218822Sdim
465218822Sdimstatic struct g_class g_fox_class	= {
46633965Sjdp	.name = FOX_CLASS_NAME,
46733965Sjdp	.version = G_VERSION,
46833965Sjdp	.taste = g_fox_taste,
46933965Sjdp	.destroy_geom = g_fox_destroy_geom,
47033965Sjdp	.start = g_fox_start,
47133965Sjdp	.spoiled = g_fox_orphan,
472218822Sdim	.orphan = g_fox_orphan,
47333965Sjdp	.access= g_fox_access,
474130561Sobrien};
47533965Sjdp
476130561SobrienDECLARE_GEOM_CLASS(g_fox_class, g_fox);
477130561Sobrien