1/*-
2 * Copyright (c) 2003 Poul-Henning Kamp
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. The names of the authors may not be used to endorse or promote
14 *    products derived from this software without specific prior written
15 *    permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * $FreeBSD$
30 */
31
32/* This is a GEOM module for handling path selection for multi-path
33 * storage devices.  It is named "fox" because it, like they, prefer
34 * to have multiple exits to choose from.
35 *
36 */
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/sysctl.h>
41#include <sys/kernel.h>
42#include <sys/conf.h>
43#include <sys/bio.h>
44#include <sys/malloc.h>
45#include <sys/lock.h>
46#include <sys/mutex.h>
47#include <sys/libkern.h>
48#include <sys/endian.h>
49#include <sys/md5.h>
50#include <sys/errno.h>
51#include <geom/geom.h>
52
53#define FOX_CLASS_NAME "FOX"
54#define FOX_MAGIC	"GEOM::FOX"
55
56FEATURE(geom_fox, "GEOM FOX redundant path mitigation support");
57
58struct g_fox_softc {
59	off_t			mediasize;
60	u_int			sectorsize;
61	TAILQ_HEAD(, bio)	queue;
62	struct mtx		lock;
63	u_char 			magic[16];
64	struct g_consumer 	*path;
65	struct g_consumer 	*opath;
66	int			waiting;
67	int			cr, cw, ce;
68};
69
70/*
71 * This function is called whenever we need to select a new path.
72 */
73static void
74g_fox_select_path(void *arg, int flag)
75{
76	struct g_geom *gp;
77	struct g_fox_softc *sc;
78	struct g_consumer *cp1;
79	struct bio *bp;
80	int error;
81
82	g_topology_assert();
83	if (flag == EV_CANCEL)
84		return;
85	gp = arg;
86	sc = gp->softc;
87
88	if (sc->opath != NULL) {
89		/*
90		 * First, close the old path entirely.
91		 */
92		printf("Closing old path (%s) on fox (%s)\n",
93			sc->opath->provider->name, gp->name);
94
95		cp1 = LIST_NEXT(sc->opath, consumer);
96
97		g_access(sc->opath, -sc->cr, -sc->cw, -(sc->ce + 1));
98
99		/*
100		 * The attempt to reopen it with a exclusive count
101		 */
102		error = g_access(sc->opath, 0, 0, 1);
103		if (error) {
104			/*
105			 * Ok, ditch this consumer, we can't use it.
106			 */
107			printf("Drop old path (%s) on fox (%s)\n",
108				sc->opath->provider->name, gp->name);
109			g_detach(sc->opath);
110			g_destroy_consumer(sc->opath);
111			if (LIST_EMPTY(&gp->consumer)) {
112				/* No consumers left */
113				g_wither_geom(gp, ENXIO);
114				for (;;) {
115					bp = TAILQ_FIRST(&sc->queue);
116					if (bp == NULL)
117						break;
118					TAILQ_REMOVE(&sc->queue, bp, bio_queue);
119					bp->bio_error = ENXIO;
120					g_std_done(bp);
121				}
122				return;
123			}
124		} else {
125			printf("Got e-bit on old path (%s) on fox (%s)\n",
126				sc->opath->provider->name, gp->name);
127		}
128		sc->opath = NULL;
129	} else {
130		cp1 = LIST_FIRST(&gp->consumer);
131	}
132	if (cp1 == NULL)
133		cp1 = LIST_FIRST(&gp->consumer);
134	printf("Open new path (%s) on fox (%s)\n",
135		cp1->provider->name, gp->name);
136	error = g_access(cp1, sc->cr, sc->cw, sc->ce);
137	if (error) {
138		/*
139		 * If we failed, we take another trip through here
140		 */
141		printf("Open new path (%s) on fox (%s) failed, reselect.\n",
142			cp1->provider->name, gp->name);
143		sc->opath = cp1;
144		g_post_event(g_fox_select_path, gp, M_WAITOK, gp, NULL);
145	} else {
146		printf("Open new path (%s) on fox (%s) succeeded\n",
147			cp1->provider->name, gp->name);
148		mtx_lock(&sc->lock);
149		sc->path = cp1;
150		sc->waiting = 0;
151		for (;;) {
152			bp = TAILQ_FIRST(&sc->queue);
153			if (bp == NULL)
154				break;
155			TAILQ_REMOVE(&sc->queue, bp, bio_queue);
156			g_io_request(bp, sc->path);
157		}
158		mtx_unlock(&sc->lock);
159	}
160}
161
162static void
163g_fox_orphan(struct g_consumer *cp)
164{
165	struct g_geom *gp;
166	struct g_fox_softc *sc;
167	int error, mark;
168
169	g_topology_assert();
170	gp = cp->geom;
171	sc = gp->softc;
172	printf("Removing path (%s) from fox (%s)\n",
173	    cp->provider->name, gp->name);
174	mtx_lock(&sc->lock);
175	if (cp == sc->path) {
176		sc->opath = NULL;
177		sc->path = NULL;
178		sc->waiting = 1;
179		mark = 1;
180	} else {
181		mark = 0;
182	}
183	mtx_unlock(&sc->lock);
184
185	g_access(cp, -cp->acr, -cp->acw, -cp->ace);
186	error = cp->provider->error;
187	g_detach(cp);
188	g_destroy_consumer(cp);
189	if (!LIST_EMPTY(&gp->consumer)) {
190		if (mark)
191			g_post_event(g_fox_select_path, gp, M_WAITOK, gp, NULL);
192		return;
193	}
194
195	mtx_destroy(&sc->lock);
196	g_free(gp->softc);
197	gp->softc = NULL;
198	g_wither_geom(gp, ENXIO);
199}
200
201static void
202g_fox_done(struct bio *bp)
203{
204	struct g_geom *gp;
205	struct g_fox_softc *sc;
206	int error;
207
208	if (bp->bio_error == 0) {
209		g_std_done(bp);
210		return;
211	}
212	gp = bp->bio_from->geom;
213	sc = gp->softc;
214	if (bp->bio_from != sc->path) {
215		g_io_request(bp, sc->path);
216		return;
217	}
218	mtx_lock(&sc->lock);
219	sc->opath = sc->path;
220	sc->path = NULL;
221	error = g_post_event(g_fox_select_path, gp, M_NOWAIT, gp, NULL);
222	if (error) {
223		bp->bio_error = ENOMEM;
224		g_std_done(bp);
225	} else {
226		sc->waiting = 1;
227		TAILQ_INSERT_TAIL(&sc->queue, bp, bio_queue);
228	}
229	mtx_unlock(&sc->lock);
230}
231
232static void
233g_fox_start(struct bio *bp)
234{
235	struct g_geom *gp;
236	struct bio *bp2;
237	struct g_fox_softc *sc;
238	int error;
239
240	gp = bp->bio_to->geom;
241	sc = gp->softc;
242	if (sc == NULL) {
243		g_io_deliver(bp, ENXIO);
244		return;
245	}
246	switch(bp->bio_cmd) {
247	case BIO_READ:
248	case BIO_WRITE:
249	case BIO_DELETE:
250		bp2 = g_clone_bio(bp);
251		if (bp2 == NULL) {
252			g_io_deliver(bp, ENOMEM);
253			break;
254		}
255		bp2->bio_offset += sc->sectorsize;
256		bp2->bio_done = g_fox_done;
257		mtx_lock(&sc->lock);
258		if (sc->path == NULL || !TAILQ_EMPTY(&sc->queue)) {
259			if (sc->waiting == 0) {
260				error = g_post_event(g_fox_select_path, gp,
261				    M_NOWAIT, gp, NULL);
262				if (error) {
263					g_destroy_bio(bp2);
264					bp2 = NULL;
265					g_io_deliver(bp, error);
266				} else {
267					sc->waiting = 1;
268				}
269			}
270			if (bp2 != NULL)
271				TAILQ_INSERT_TAIL(&sc->queue, bp2,
272				    bio_queue);
273		} else {
274			g_io_request(bp2, sc->path);
275		}
276		mtx_unlock(&sc->lock);
277		break;
278	default:
279		g_io_deliver(bp, EOPNOTSUPP);
280		break;
281	}
282	return;
283}
284
285static int
286g_fox_access(struct g_provider *pp, int dr, int dw, int de)
287{
288	struct g_geom *gp;
289	struct g_fox_softc *sc;
290	struct g_consumer *cp1;
291	int error;
292
293	g_topology_assert();
294	gp = pp->geom;
295	sc = gp->softc;
296	if (sc == NULL) {
297		if (dr <= 0 && dw <= 0 && de <= 0)
298			return (0);
299		else
300			return (ENXIO);
301	}
302
303	if (sc->cr == 0 && sc->cw == 0 && sc->ce == 0) {
304		/*
305		 * First open, open all consumers with an exclusive bit
306		 */
307		error = 0;
308		LIST_FOREACH(cp1, &gp->consumer, consumer) {
309			error = g_access(cp1, 0, 0, 1);
310			if (error) {
311				printf("FOX: access(%s,0,0,1) = %d\n",
312				    cp1->provider->name, error);
313				break;
314			}
315		}
316		if (error) {
317			LIST_FOREACH(cp1, &gp->consumer, consumer) {
318				if (cp1->ace)
319					g_access(cp1, 0, 0, -1);
320			}
321			return (error);
322		}
323	}
324	if (sc->path == NULL)
325		g_fox_select_path(gp, 0);
326	if (sc->path == NULL)
327		error = ENXIO;
328	else
329		error = g_access(sc->path, dr, dw, de);
330	if (error == 0) {
331		sc->cr += dr;
332		sc->cw += dw;
333		sc->ce += de;
334		if (sc->cr == 0 && sc->cw == 0 && sc->ce == 0) {
335			/*
336			 * Last close, remove e-bit on all consumers
337			 */
338			LIST_FOREACH(cp1, &gp->consumer, consumer)
339				g_access(cp1, 0, 0, -1);
340		}
341	}
342	return (error);
343}
344
345static struct g_geom *
346g_fox_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
347{
348	struct g_geom *gp, *gp2;
349	struct g_provider *pp2;
350	struct g_consumer *cp, *cp2;
351	struct g_fox_softc *sc, *sc2;
352	int error;
353	u_int sectorsize;
354	u_char *buf;
355
356	g_trace(G_T_TOPOLOGY, "fox_taste(%s, %s)", mp->name, pp->name);
357	g_topology_assert();
358	if (!strcmp(pp->geom->class->name, mp->name))
359		return (NULL);
360	gp = g_new_geomf(mp, "%s.fox", pp->name);
361	gp->softc = g_malloc(sizeof(struct g_fox_softc), M_WAITOK | M_ZERO);
362	sc = gp->softc;
363
364	cp = g_new_consumer(gp);
365	g_attach(cp, pp);
366	error = g_access(cp, 1, 0, 0);
367	if (error) {
368		g_free(sc);
369		g_detach(cp);
370		g_destroy_consumer(cp);
371		g_destroy_geom(gp);
372		return(NULL);
373	}
374	do {
375		sectorsize = cp->provider->sectorsize;
376		g_topology_unlock();
377		buf = g_read_data(cp, 0, sectorsize, NULL);
378		g_topology_lock();
379		if (buf == NULL)
380			break;
381		if (memcmp(buf, FOX_MAGIC, strlen(FOX_MAGIC)))
382			break;
383
384		/*
385		 * First we need to see if this a new path for an existing fox.
386		 */
387		LIST_FOREACH(gp2, &mp->geom, geom) {
388			sc2 = gp2->softc;
389			if (sc2 == NULL)
390				continue;
391			if (memcmp(buf + 16, sc2->magic, sizeof sc2->magic))
392				continue;
393			break;
394		}
395		if (gp2 != NULL) {
396			/*
397			 * It was.  Create a new consumer for that fox,
398			 * attach it, and if the fox is open, open this
399			 * path with an exclusive count of one.
400			 */
401			printf("Adding path (%s) to fox (%s)\n",
402			    pp->name, gp2->name);
403			cp2 = g_new_consumer(gp2);
404			g_attach(cp2, pp);
405			pp2 = LIST_FIRST(&gp2->provider);
406			if (pp2->acr > 0 || pp2->acw > 0 || pp2->ace > 0) {
407				error = g_access(cp2, 0, 0, 1);
408				if (error) {
409					/*
410					 * This is bad, or more likely,
411					 * the user is doing something stupid
412					 */
413					printf(
414	"WARNING: New path (%s) to fox(%s) not added: %s\n%s",
415					    cp2->provider->name, gp2->name,
416	"Could not get exclusive bit.",
417	"WARNING: This indicates a risk of data inconsistency."
418					);
419					g_detach(cp2);
420					g_destroy_consumer(cp2);
421				}
422			}
423			break;
424		}
425		printf("Creating new fox (%s)\n", pp->name);
426		sc->path = cp;
427		memcpy(sc->magic, buf + 16, sizeof sc->magic);
428		pp2 = g_new_providerf(gp, "%s", gp->name);
429		pp2->mediasize = sc->mediasize = pp->mediasize - pp->sectorsize;
430		pp2->sectorsize = sc->sectorsize = pp->sectorsize;
431printf("fox %s lock %p\n", gp->name, &sc->lock);
432
433		mtx_init(&sc->lock, "fox queue", NULL, MTX_DEF);
434		TAILQ_INIT(&sc->queue);
435		g_error_provider(pp2, 0);
436	} while (0);
437	if (buf != NULL)
438		g_free(buf);
439	g_access(cp, -1, 0, 0);
440
441	if (!LIST_EMPTY(&gp->provider))
442		return (gp);
443
444	g_free(gp->softc);
445	g_detach(cp);
446	g_destroy_consumer(cp);
447	g_destroy_geom(gp);
448	return (NULL);
449}
450
451static int
452g_fox_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
453{
454	struct g_fox_softc *sc;
455
456	g_topology_assert();
457	sc = gp->softc;
458	mtx_destroy(&sc->lock);
459	g_free(gp->softc);
460	gp->softc = NULL;
461	g_wither_geom(gp, ENXIO);
462	return (0);
463}
464
465static struct g_class g_fox_class	= {
466	.name = FOX_CLASS_NAME,
467	.version = G_VERSION,
468	.taste = g_fox_taste,
469	.destroy_geom = g_fox_destroy_geom,
470	.start = g_fox_start,
471	.spoiled = g_fox_orphan,
472	.orphan = g_fox_orphan,
473	.access= g_fox_access,
474};
475
476DECLARE_GEOM_CLASS(g_fox_class, g_fox);
477