1116518Sphk/*-
2116518Sphk * Copyright (c) 2003 Poul-Henning Kamp
3116518Sphk * All rights reserved.
4116518Sphk *
5116518Sphk * Redistribution and use in source and binary forms, with or without
6116518Sphk * modification, are permitted provided that the following conditions
7116518Sphk * are met:
8116518Sphk * 1. Redistributions of source code must retain the above copyright
9116518Sphk *    notice, this list of conditions and the following disclaimer.
10116518Sphk * 2. Redistributions in binary form must reproduce the above copyright
11116518Sphk *    notice, this list of conditions and the following disclaimer in the
12116518Sphk *    documentation and/or other materials provided with the distribution.
13116518Sphk * 3. The names of the authors may not be used to endorse or promote
14116518Sphk *    products derived from this software without specific prior written
15116518Sphk *    permission.
16116518Sphk *
17116518Sphk * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18116518Sphk * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19116518Sphk * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20116518Sphk * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21116518Sphk * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22116518Sphk * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23116518Sphk * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24116518Sphk * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25116518Sphk * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26116518Sphk * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27116518Sphk * SUCH DAMAGE.
28116518Sphk *
29116518Sphk * $FreeBSD: releng/10.3/sys/geom/geom_fox.c 219029 2011-02-25 10:24:35Z netchild $
30139778Simp */
31139778Simp
32139778Simp/* This is a GEOM module for handling path selection for multi-path
33116518Sphk * storage devices.  It is named "fox" because it, like they, prefer
34116518Sphk * to have multiple exits to choose from.
35116518Sphk *
36116518Sphk */
37116518Sphk
38116518Sphk#include <sys/param.h>
39116518Sphk#include <sys/systm.h>
40219029Snetchild#include <sys/sysctl.h>
41116518Sphk#include <sys/kernel.h>
42116518Sphk#include <sys/conf.h>
43116518Sphk#include <sys/bio.h>
44116518Sphk#include <sys/malloc.h>
45116518Sphk#include <sys/lock.h>
46116518Sphk#include <sys/mutex.h>
47116518Sphk#include <sys/libkern.h>
48116518Sphk#include <sys/endian.h>
49116518Sphk#include <sys/md5.h>
50116518Sphk#include <sys/errno.h>
51116518Sphk#include <geom/geom.h>
52116518Sphk
53116518Sphk#define FOX_CLASS_NAME "FOX"
54116518Sphk#define FOX_MAGIC	"GEOM::FOX"
55116518Sphk
56219029SnetchildFEATURE(geom_fox, "GEOM FOX redundant path mitigation support");
57219029Snetchild
58116518Sphkstruct g_fox_softc {
59116518Sphk	off_t			mediasize;
60116518Sphk	u_int			sectorsize;
61116518Sphk	TAILQ_HEAD(, bio)	queue;
62116518Sphk	struct mtx		lock;
63116518Sphk	u_char 			magic[16];
64116518Sphk	struct g_consumer 	*path;
65116518Sphk	struct g_consumer 	*opath;
66116518Sphk	int			waiting;
67116518Sphk	int			cr, cw, ce;
68116518Sphk};
69116518Sphk
70116518Sphk/*
71116518Sphk * This function is called whenever we need to select a new path.
72116518Sphk */
73116518Sphkstatic void
74116518Sphkg_fox_select_path(void *arg, int flag)
75116518Sphk{
76116518Sphk	struct g_geom *gp;
77116518Sphk	struct g_fox_softc *sc;
78116518Sphk	struct g_consumer *cp1;
79116518Sphk	struct bio *bp;
80116518Sphk	int error;
81116518Sphk
82116518Sphk	g_topology_assert();
83116518Sphk	if (flag == EV_CANCEL)
84116518Sphk		return;
85116518Sphk	gp = arg;
86116518Sphk	sc = gp->softc;
87116518Sphk
88116518Sphk	if (sc->opath != NULL) {
89116518Sphk		/*
90116518Sphk		 * First, close the old path entirely.
91116518Sphk		 */
92116518Sphk		printf("Closing old path (%s) on fox (%s)\n",
93116518Sphk			sc->opath->provider->name, gp->name);
94116518Sphk
95116518Sphk		cp1 = LIST_NEXT(sc->opath, consumer);
96116518Sphk
97125803Sphk		g_access(sc->opath, -sc->cr, -sc->cw, -(sc->ce + 1));
98116518Sphk
99116518Sphk		/*
100116518Sphk		 * The attempt to reopen it with a exclusive count
101116518Sphk		 */
102125755Sphk		error = g_access(sc->opath, 0, 0, 1);
103116518Sphk		if (error) {
104116518Sphk			/*
105116518Sphk			 * Ok, ditch this consumer, we can't use it.
106116518Sphk			 */
107116518Sphk			printf("Drop old path (%s) on fox (%s)\n",
108116518Sphk				sc->opath->provider->name, gp->name);
109116518Sphk			g_detach(sc->opath);
110116518Sphk			g_destroy_consumer(sc->opath);
111116518Sphk			if (LIST_EMPTY(&gp->consumer)) {
112116518Sphk				/* No consumers left */
113116518Sphk				g_wither_geom(gp, ENXIO);
114116518Sphk				for (;;) {
115116518Sphk					bp = TAILQ_FIRST(&sc->queue);
116116518Sphk					if (bp == NULL)
117116518Sphk						break;
118116518Sphk					TAILQ_REMOVE(&sc->queue, bp, bio_queue);
119116518Sphk					bp->bio_error = ENXIO;
120116518Sphk					g_std_done(bp);
121116518Sphk				}
122116518Sphk				return;
123116518Sphk			}
124116518Sphk		} else {
125116518Sphk			printf("Got e-bit on old path (%s) on fox (%s)\n",
126116518Sphk				sc->opath->provider->name, gp->name);
127116518Sphk		}
128116518Sphk		sc->opath = NULL;
129116518Sphk	} else {
130116518Sphk		cp1 = LIST_FIRST(&gp->consumer);
131116518Sphk	}
132116518Sphk	if (cp1 == NULL)
133116518Sphk		cp1 = LIST_FIRST(&gp->consumer);
134116518Sphk	printf("Open new path (%s) on fox (%s)\n",
135116518Sphk		cp1->provider->name, gp->name);
136125755Sphk	error = g_access(cp1, sc->cr, sc->cw, sc->ce);
137116518Sphk	if (error) {
138116518Sphk		/*
139116518Sphk		 * If we failed, we take another trip through here
140116518Sphk		 */
141116518Sphk		printf("Open new path (%s) on fox (%s) failed, reselect.\n",
142116518Sphk			cp1->provider->name, gp->name);
143116518Sphk		sc->opath = cp1;
144116518Sphk		g_post_event(g_fox_select_path, gp, M_WAITOK, gp, NULL);
145116518Sphk	} else {
146116518Sphk		printf("Open new path (%s) on fox (%s) succeeded\n",
147116518Sphk			cp1->provider->name, gp->name);
148116518Sphk		mtx_lock(&sc->lock);
149116518Sphk		sc->path = cp1;
150116518Sphk		sc->waiting = 0;
151116518Sphk		for (;;) {
152116518Sphk			bp = TAILQ_FIRST(&sc->queue);
153116518Sphk			if (bp == NULL)
154116518Sphk				break;
155116518Sphk			TAILQ_REMOVE(&sc->queue, bp, bio_queue);
156116518Sphk			g_io_request(bp, sc->path);
157116518Sphk		}
158116518Sphk		mtx_unlock(&sc->lock);
159116518Sphk	}
160116518Sphk}
161116518Sphk
162116518Sphkstatic void
163116518Sphkg_fox_orphan(struct g_consumer *cp)
164116518Sphk{
165116518Sphk	struct g_geom *gp;
166116518Sphk	struct g_fox_softc *sc;
167116518Sphk	int error, mark;
168116518Sphk
169116518Sphk	g_topology_assert();
170116518Sphk	gp = cp->geom;
171116518Sphk	sc = gp->softc;
172116518Sphk	printf("Removing path (%s) from fox (%s)\n",
173116518Sphk	    cp->provider->name, gp->name);
174116518Sphk	mtx_lock(&sc->lock);
175116518Sphk	if (cp == sc->path) {
176116518Sphk		sc->opath = NULL;
177116518Sphk		sc->path = NULL;
178116518Sphk		sc->waiting = 1;
179116518Sphk		mark = 1;
180116518Sphk	} else {
181116518Sphk		mark = 0;
182116518Sphk	}
183116518Sphk	mtx_unlock(&sc->lock);
184116518Sphk
185125755Sphk	g_access(cp, -cp->acr, -cp->acw, -cp->ace);
186116518Sphk	error = cp->provider->error;
187116518Sphk	g_detach(cp);
188116518Sphk	g_destroy_consumer(cp);
189116518Sphk	if (!LIST_EMPTY(&gp->consumer)) {
190116518Sphk		if (mark)
191116518Sphk			g_post_event(g_fox_select_path, gp, M_WAITOK, gp, NULL);
192116518Sphk		return;
193116518Sphk	}
194116518Sphk
195116518Sphk	mtx_destroy(&sc->lock);
196121366Sphk	g_free(gp->softc);
197116518Sphk	gp->softc = NULL;
198116518Sphk	g_wither_geom(gp, ENXIO);
199116518Sphk}
200116518Sphk
201116518Sphkstatic void
202116518Sphkg_fox_done(struct bio *bp)
203116518Sphk{
204116518Sphk	struct g_geom *gp;
205116518Sphk	struct g_fox_softc *sc;
206116518Sphk	int error;
207116518Sphk
208116518Sphk	if (bp->bio_error == 0) {
209116518Sphk		g_std_done(bp);
210116518Sphk		return;
211116518Sphk	}
212116518Sphk	gp = bp->bio_from->geom;
213116518Sphk	sc = gp->softc;
214116518Sphk	if (bp->bio_from != sc->path) {
215116518Sphk		g_io_request(bp, sc->path);
216116518Sphk		return;
217116518Sphk	}
218116518Sphk	mtx_lock(&sc->lock);
219116518Sphk	sc->opath = sc->path;
220116518Sphk	sc->path = NULL;
221116518Sphk	error = g_post_event(g_fox_select_path, gp, M_NOWAIT, gp, NULL);
222116518Sphk	if (error) {
223116518Sphk		bp->bio_error = ENOMEM;
224116518Sphk		g_std_done(bp);
225116518Sphk	} else {
226116518Sphk		sc->waiting = 1;
227116518Sphk		TAILQ_INSERT_TAIL(&sc->queue, bp, bio_queue);
228116518Sphk	}
229116518Sphk	mtx_unlock(&sc->lock);
230116518Sphk}
231116518Sphk
232116518Sphkstatic void
233116518Sphkg_fox_start(struct bio *bp)
234116518Sphk{
235116518Sphk	struct g_geom *gp;
236116518Sphk	struct bio *bp2;
237116518Sphk	struct g_fox_softc *sc;
238116518Sphk	int error;
239116518Sphk
240116518Sphk	gp = bp->bio_to->geom;
241116518Sphk	sc = gp->softc;
242116518Sphk	if (sc == NULL) {
243116518Sphk		g_io_deliver(bp, ENXIO);
244116518Sphk		return;
245116518Sphk	}
246116518Sphk	switch(bp->bio_cmd) {
247116518Sphk	case BIO_READ:
248116518Sphk	case BIO_WRITE:
249116518Sphk	case BIO_DELETE:
250116518Sphk		bp2 = g_clone_bio(bp);
251116518Sphk		if (bp2 == NULL) {
252116518Sphk			g_io_deliver(bp, ENOMEM);
253116518Sphk			break;
254116518Sphk		}
255116518Sphk		bp2->bio_offset += sc->sectorsize;
256116518Sphk		bp2->bio_done = g_fox_done;
257116518Sphk		mtx_lock(&sc->lock);
258116518Sphk		if (sc->path == NULL || !TAILQ_EMPTY(&sc->queue)) {
259116518Sphk			if (sc->waiting == 0) {
260116518Sphk				error = g_post_event(g_fox_select_path, gp,
261116518Sphk				    M_NOWAIT, gp, NULL);
262116518Sphk				if (error) {
263116518Sphk					g_destroy_bio(bp2);
264116518Sphk					bp2 = NULL;
265116518Sphk					g_io_deliver(bp, error);
266116518Sphk				} else {
267116518Sphk					sc->waiting = 1;
268116518Sphk				}
269116518Sphk			}
270116518Sphk			if (bp2 != NULL)
271116518Sphk				TAILQ_INSERT_TAIL(&sc->queue, bp2,
272116518Sphk				    bio_queue);
273116518Sphk		} else {
274116518Sphk			g_io_request(bp2, sc->path);
275116518Sphk		}
276116518Sphk		mtx_unlock(&sc->lock);
277116518Sphk		break;
278116518Sphk	default:
279116518Sphk		g_io_deliver(bp, EOPNOTSUPP);
280116518Sphk		break;
281116518Sphk	}
282116518Sphk	return;
283116518Sphk}
284116518Sphk
285116518Sphkstatic int
286116518Sphkg_fox_access(struct g_provider *pp, int dr, int dw, int de)
287116518Sphk{
288116518Sphk	struct g_geom *gp;
289116518Sphk	struct g_fox_softc *sc;
290116518Sphk	struct g_consumer *cp1;
291116518Sphk	int error;
292116518Sphk
293116518Sphk	g_topology_assert();
294116518Sphk	gp = pp->geom;
295116518Sphk	sc = gp->softc;
296125803Sphk	if (sc == NULL) {
297125803Sphk		if (dr <= 0 && dw <= 0 && de <= 0)
298125803Sphk			return (0);
299125803Sphk		else
300125803Sphk			return (ENXIO);
301125803Sphk	}
302116518Sphk
303116518Sphk	if (sc->cr == 0 && sc->cw == 0 && sc->ce == 0) {
304116518Sphk		/*
305116518Sphk		 * First open, open all consumers with an exclusive bit
306116518Sphk		 */
307116518Sphk		error = 0;
308116518Sphk		LIST_FOREACH(cp1, &gp->consumer, consumer) {
309125755Sphk			error = g_access(cp1, 0, 0, 1);
310116518Sphk			if (error) {
311116518Sphk				printf("FOX: access(%s,0,0,1) = %d\n",
312116518Sphk				    cp1->provider->name, error);
313116518Sphk				break;
314116518Sphk			}
315116518Sphk		}
316116518Sphk		if (error) {
317116518Sphk			LIST_FOREACH(cp1, &gp->consumer, consumer) {
318116518Sphk				if (cp1->ace)
319125755Sphk					g_access(cp1, 0, 0, -1);
320116518Sphk			}
321116518Sphk			return (error);
322116518Sphk		}
323116518Sphk	}
324116518Sphk	if (sc->path == NULL)
325116518Sphk		g_fox_select_path(gp, 0);
326116518Sphk	if (sc->path == NULL)
327116518Sphk		error = ENXIO;
328116518Sphk	else
329125755Sphk		error = g_access(sc->path, dr, dw, de);
330116518Sphk	if (error == 0) {
331116518Sphk		sc->cr += dr;
332116518Sphk		sc->cw += dw;
333116518Sphk		sc->ce += de;
334116518Sphk		if (sc->cr == 0 && sc->cw == 0 && sc->ce == 0) {
335116518Sphk			/*
336116518Sphk			 * Last close, remove e-bit on all consumers
337116518Sphk			 */
338116518Sphk			LIST_FOREACH(cp1, &gp->consumer, consumer)
339125755Sphk				g_access(cp1, 0, 0, -1);
340116518Sphk		}
341116518Sphk	}
342116518Sphk	return (error);
343116518Sphk}
344116518Sphk
345116518Sphkstatic struct g_geom *
346116518Sphkg_fox_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
347116518Sphk{
348116518Sphk	struct g_geom *gp, *gp2;
349116518Sphk	struct g_provider *pp2;
350116518Sphk	struct g_consumer *cp, *cp2;
351116518Sphk	struct g_fox_softc *sc, *sc2;
352116518Sphk	int error;
353116518Sphk	u_int sectorsize;
354116518Sphk	u_char *buf;
355116518Sphk
356116518Sphk	g_trace(G_T_TOPOLOGY, "fox_taste(%s, %s)", mp->name, pp->name);
357116518Sphk	g_topology_assert();
358116518Sphk	if (!strcmp(pp->geom->class->name, mp->name))
359116518Sphk		return (NULL);
360116518Sphk	gp = g_new_geomf(mp, "%s.fox", pp->name);
361116518Sphk	gp->softc = g_malloc(sizeof(struct g_fox_softc), M_WAITOK | M_ZERO);
362116518Sphk	sc = gp->softc;
363116518Sphk
364116518Sphk	cp = g_new_consumer(gp);
365116518Sphk	g_attach(cp, pp);
366125755Sphk	error = g_access(cp, 1, 0, 0);
367116518Sphk	if (error) {
368116518Sphk		g_free(sc);
369116518Sphk		g_detach(cp);
370116518Sphk		g_destroy_consumer(cp);
371116518Sphk		g_destroy_geom(gp);
372116518Sphk		return(NULL);
373116518Sphk	}
374116518Sphk	do {
375116518Sphk		sectorsize = cp->provider->sectorsize;
376116518Sphk		g_topology_unlock();
377152971Ssobomax		buf = g_read_data(cp, 0, sectorsize, NULL);
378116518Sphk		g_topology_lock();
379152967Ssobomax		if (buf == NULL)
380116518Sphk			break;
381116518Sphk		if (memcmp(buf, FOX_MAGIC, strlen(FOX_MAGIC)))
382116518Sphk			break;
383116518Sphk
384116518Sphk		/*
385116518Sphk		 * First we need to see if this a new path for an existing fox.
386116518Sphk		 */
387116518Sphk		LIST_FOREACH(gp2, &mp->geom, geom) {
388116518Sphk			sc2 = gp2->softc;
389121475Sphk			if (sc2 == NULL)
390116518Sphk				continue;
391116518Sphk			if (memcmp(buf + 16, sc2->magic, sizeof sc2->magic))
392116518Sphk				continue;
393116518Sphk			break;
394116518Sphk		}
395116518Sphk		if (gp2 != NULL) {
396116518Sphk			/*
397116518Sphk			 * It was.  Create a new consumer for that fox,
398116518Sphk			 * attach it, and if the fox is open, open this
399116518Sphk			 * path with an exclusive count of one.
400116518Sphk			 */
401116518Sphk			printf("Adding path (%s) to fox (%s)\n",
402116518Sphk			    pp->name, gp2->name);
403116518Sphk			cp2 = g_new_consumer(gp2);
404116518Sphk			g_attach(cp2, pp);
405116518Sphk			pp2 = LIST_FIRST(&gp2->provider);
406116518Sphk			if (pp2->acr > 0 || pp2->acw > 0 || pp2->ace > 0) {
407125755Sphk				error = g_access(cp2, 0, 0, 1);
408116518Sphk				if (error) {
409116518Sphk					/*
410116518Sphk					 * This is bad, or more likely,
411116518Sphk					 * the user is doing something stupid
412116518Sphk					 */
413116518Sphk					printf(
414116518Sphk	"WARNING: New path (%s) to fox(%s) not added: %s\n%s",
415121475Sphk					    cp2->provider->name, gp2->name,
416116518Sphk	"Could not get exclusive bit.",
417116518Sphk	"WARNING: This indicates a risk of data inconsistency."
418116518Sphk					);
419116518Sphk					g_detach(cp2);
420116518Sphk					g_destroy_consumer(cp2);
421116518Sphk				}
422116518Sphk			}
423116518Sphk			break;
424116518Sphk		}
425116518Sphk		printf("Creating new fox (%s)\n", pp->name);
426116518Sphk		sc->path = cp;
427116518Sphk		memcpy(sc->magic, buf + 16, sizeof sc->magic);
428116518Sphk		pp2 = g_new_providerf(gp, "%s", gp->name);
429116518Sphk		pp2->mediasize = sc->mediasize = pp->mediasize - pp->sectorsize;
430116518Sphk		pp2->sectorsize = sc->sectorsize = pp->sectorsize;
431116518Sphkprintf("fox %s lock %p\n", gp->name, &sc->lock);
432116518Sphk
433116518Sphk		mtx_init(&sc->lock, "fox queue", NULL, MTX_DEF);
434116518Sphk		TAILQ_INIT(&sc->queue);
435116518Sphk		g_error_provider(pp2, 0);
436116518Sphk	} while (0);
437116518Sphk	if (buf != NULL)
438116518Sphk		g_free(buf);
439125755Sphk	g_access(cp, -1, 0, 0);
440116518Sphk
441116518Sphk	if (!LIST_EMPTY(&gp->provider))
442116518Sphk		return (gp);
443116518Sphk
444116518Sphk	g_free(gp->softc);
445116518Sphk	g_detach(cp);
446116518Sphk	g_destroy_consumer(cp);
447116518Sphk	g_destroy_geom(gp);
448116518Sphk	return (NULL);
449116518Sphk}
450116518Sphk
451116518Sphkstatic int
452116518Sphkg_fox_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
453116518Sphk{
454116518Sphk	struct g_fox_softc *sc;
455116518Sphk
456116518Sphk	g_topology_assert();
457116518Sphk	sc = gp->softc;
458116518Sphk	mtx_destroy(&sc->lock);
459116518Sphk	g_free(gp->softc);
460125538Sle	gp->softc = NULL;
461116518Sphk	g_wither_geom(gp, ENXIO);
462116518Sphk	return (0);
463116518Sphk}
464116518Sphk
465116518Sphkstatic struct g_class g_fox_class	= {
466116518Sphk	.name = FOX_CLASS_NAME,
467133318Sphk	.version = G_VERSION,
468116518Sphk	.taste = g_fox_taste,
469116518Sphk	.destroy_geom = g_fox_destroy_geom,
470133314Sphk	.start = g_fox_start,
471133314Sphk	.spoiled = g_fox_orphan,
472133314Sphk	.orphan = g_fox_orphan,
473133314Sphk	.access= g_fox_access,
474116518Sphk};
475116518Sphk
476116518SphkDECLARE_GEOM_CLASS(g_fox_class, g_fox);
477