geom_vinum_volume.c revision 184292
1/*-
2 * Copyright (c) 2004 Lukas Ertl
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/geom/vinum/geom_vinum_volume.c 184292 2008-10-26 17:20:37Z lulf $");
29
30#include <sys/param.h>
31#include <sys/bio.h>
32#include <sys/conf.h>
33#include <sys/kernel.h>
34#include <sys/kthread.h>
35#include <sys/libkern.h>
36#include <sys/lock.h>
37#include <sys/malloc.h>
38#include <sys/module.h>
39#include <sys/mutex.h>
40#include <sys/systm.h>
41
42#include <geom/geom.h>
43#include <geom/vinum/geom_vinum_var.h>
44#include <geom/vinum/geom_vinum.h>
45
46static void gv_vol_completed_request(struct gv_volume *, struct bio *);
47static void gv_vol_normal_request(struct gv_volume *, struct bio *);
48
49static void
50gv_volume_orphan(struct g_consumer *cp)
51{
52	struct g_geom *gp;
53	struct gv_volume *v;
54	int error;
55
56	g_topology_assert();
57	gp = cp->geom;
58	g_trace(G_T_TOPOLOGY, "gv_volume_orphan(%s)", gp->name);
59	if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0)
60		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
61	error = cp->provider->error;
62	if (error == 0)
63		error = ENXIO;
64	g_detach(cp);
65	g_destroy_consumer(cp);
66	if (!LIST_EMPTY(&gp->consumer))
67		return;
68	v = gp->softc;
69	if (v != NULL) {
70		gv_kill_vol_thread(v);
71		v->geom = NULL;
72	}
73	gp->softc = NULL;
74	g_wither_geom(gp, error);
75}
76
77/* We end up here after the requests to our plexes are done. */
78static void
79gv_volume_done(struct bio *bp)
80{
81	struct gv_volume *v;
82
83	v = bp->bio_from->geom->softc;
84	bp->bio_cflags |= GV_BIO_DONE;
85	mtx_lock(&v->bqueue_mtx);
86	bioq_insert_tail(v->bqueue, bp);
87	wakeup(v);
88	mtx_unlock(&v->bqueue_mtx);
89}
90
91static void
92gv_volume_start(struct bio *bp)
93{
94	struct gv_volume *v;
95
96	switch(bp->bio_cmd) {
97	case BIO_READ:
98	case BIO_WRITE:
99	case BIO_DELETE:
100		break;
101	case BIO_GETATTR:
102	default:
103		g_io_deliver(bp, EOPNOTSUPP);
104		return;
105	}
106
107	v = bp->bio_to->geom->softc;
108	if (v->state != GV_VOL_UP) {
109		g_io_deliver(bp, ENXIO);
110		return;
111	}
112
113	mtx_lock(&v->bqueue_mtx);
114	bioq_disksort(v->bqueue, bp);
115	wakeup(v);
116	mtx_unlock(&v->bqueue_mtx);
117}
118
119static void
120gv_vol_worker(void *arg)
121{
122	struct bio *bp;
123	struct gv_volume *v;
124
125	v = arg;
126	KASSERT(v != NULL, ("NULL v"));
127	mtx_lock(&v->bqueue_mtx);
128	for (;;) {
129		/* We were signaled to exit. */
130		if (v->flags & GV_VOL_THREAD_DIE)
131			break;
132
133		/* Take the first BIO from our queue. */
134		bp = bioq_takefirst(v->bqueue);
135		if (bp == NULL) {
136			msleep(v, &v->bqueue_mtx, PRIBIO, "-", hz/10);
137			continue;
138		}
139		mtx_unlock(&v->bqueue_mtx);
140
141		if (bp->bio_cflags & GV_BIO_DONE)
142			gv_vol_completed_request(v, bp);
143		else
144			gv_vol_normal_request(v, bp);
145
146		mtx_lock(&v->bqueue_mtx);
147	}
148	mtx_unlock(&v->bqueue_mtx);
149	v->flags |= GV_VOL_THREAD_DEAD;
150	wakeup(v);
151
152	kproc_exit(ENXIO);
153}
154
155static void
156gv_vol_completed_request(struct gv_volume *v, struct bio *bp)
157{
158	struct bio *pbp;
159	struct g_geom *gp;
160	struct g_consumer *cp, *cp2;
161
162	pbp = bp->bio_parent;
163
164	if (pbp->bio_error == 0)
165		pbp->bio_error = bp->bio_error;
166
167	switch (pbp->bio_cmd) {
168	case BIO_READ:
169		if (bp->bio_error == 0)
170			break;
171
172		if (pbp->bio_cflags & GV_BIO_RETRY)
173			break;
174
175		/* Check if we have another plex left. */
176		cp = bp->bio_from;
177		gp = cp->geom;
178		cp2 = LIST_NEXT(cp, consumer);
179		if (cp2 == NULL)
180			break;
181
182		if (LIST_NEXT(cp2, consumer) == NULL)
183			pbp->bio_cflags |= GV_BIO_RETRY;
184
185		g_destroy_bio(bp);
186		pbp->bio_children--;
187		mtx_lock(&v->bqueue_mtx);
188		bioq_disksort(v->bqueue, pbp);
189		mtx_unlock(&v->bqueue_mtx);
190		return;
191
192	case BIO_WRITE:
193	case BIO_DELETE:
194		/* Remember if this write request succeeded. */
195		if (bp->bio_error == 0)
196			pbp->bio_cflags |= GV_BIO_SUCCEED;
197		break;
198	}
199
200	/* When the original request is finished, we deliver it. */
201	pbp->bio_inbed++;
202	if (pbp->bio_inbed == pbp->bio_children) {
203		if (pbp->bio_cflags & GV_BIO_SUCCEED)
204			pbp->bio_error = 0;
205		pbp->bio_completed = bp->bio_length;
206		g_io_deliver(pbp, pbp->bio_error);
207	}
208
209	g_destroy_bio(bp);
210}
211
212static void
213gv_vol_normal_request(struct gv_volume *v, struct bio *bp)
214{
215	struct bio_queue_head queue;
216	struct g_geom *gp;
217	struct gv_plex *p, *lp;
218	struct bio *cbp;
219
220	gp = v->geom;
221
222	switch (bp->bio_cmd) {
223	case BIO_READ:
224		cbp = g_clone_bio(bp);
225		if (cbp == NULL) {
226			g_io_deliver(bp, ENOMEM);
227			return;
228		}
229		cbp->bio_done = gv_volume_done;
230		/*
231		 * Try to find a good plex where we can send the request to.
232		 * The plex either has to be up, or it's a degraded RAID5 plex.
233		 */
234		lp = v->last_read_plex;
235		if (lp == NULL)
236			lp = LIST_FIRST(&v->plexes);
237		p = LIST_NEXT(lp, in_volume);
238		do {
239			if (p == NULL)
240				p = LIST_FIRST(&v->plexes);
241			if ((p->state > GV_PLEX_DEGRADED) ||
242			    (p->state >= GV_PLEX_DEGRADED &&
243			    p->org == GV_PLEX_RAID5))
244				break;
245			p = LIST_NEXT(p, in_volume);
246		} while (p != lp);
247
248		if (p == NULL ||
249		    (p->org == GV_PLEX_RAID5 && p->state < GV_PLEX_DEGRADED) ||
250		    (p->org != GV_PLEX_RAID5 && p->state <= GV_PLEX_DEGRADED)) {
251			g_destroy_bio(cbp);
252			bp->bio_children--;
253			g_io_deliver(bp, ENXIO);
254			return;
255		}
256		g_io_request(cbp, p->consumer);
257		v->last_read_plex = p;
258
259		break;
260
261	case BIO_WRITE:
262	case BIO_DELETE:
263		bioq_init(&queue);
264		LIST_FOREACH(p, &v->plexes, in_volume) {
265			if (p->state < GV_PLEX_DEGRADED)
266				continue;
267			cbp = g_clone_bio(bp);
268			if (cbp == NULL) {
269				for (cbp = bioq_first(&queue); cbp != NULL;
270				    cbp = bioq_first(&queue)) {
271					bioq_remove(&queue, cbp);
272					g_destroy_bio(cbp);
273				}
274				if (bp->bio_error == 0)
275					bp->bio_error = ENOMEM;
276				g_io_deliver(bp, bp->bio_error);
277				return;
278			}
279			bioq_insert_tail(&queue, cbp);
280			cbp->bio_done = gv_volume_done;
281			cbp->bio_caller1 = p->consumer;
282		}
283		/* Fire off all sub-requests. */
284		for (cbp = bioq_first(&queue); cbp != NULL;
285		     cbp = bioq_first(&queue)) {
286			bioq_remove(&queue, cbp);
287			g_io_request(cbp, cbp->bio_caller1);
288		}
289		break;
290	}
291}
292
293static int
294gv_volume_access(struct g_provider *pp, int dr, int dw, int de)
295{
296	struct g_geom *gp;
297	struct g_consumer *cp, *cp2;
298	int error;
299
300	gp = pp->geom;
301
302	error = ENXIO;
303	LIST_FOREACH(cp, &gp->consumer, consumer) {
304		error = g_access(cp, dr, dw, de);
305		if (error) {
306			LIST_FOREACH(cp2, &gp->consumer, consumer) {
307				if (cp == cp2)
308					break;
309				g_access(cp2, -dr, -dw, -de);
310			}
311			return (error);
312		}
313	}
314	return (error);
315}
316
317static struct g_geom *
318gv_volume_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
319{
320	struct g_geom *gp;
321	struct g_provider *pp2;
322	struct g_consumer *cp, *ocp;
323	struct gv_softc *sc;
324	struct gv_volume *v;
325	struct gv_plex *p;
326	int error, first;
327
328	g_trace(G_T_TOPOLOGY, "gv_volume_taste(%s, %s)", mp->name, pp->name);
329	g_topology_assert();
330
331	/* First, find the VINUM class and its associated geom. */
332	gp = find_vinum_geom();
333	if (gp == NULL)
334		return (NULL);
335
336	sc = gp->softc;
337	KASSERT(sc != NULL, ("gv_volume_taste: NULL sc"));
338
339	gp = pp->geom;
340
341	/* We only want to attach to plexes. */
342	if (strcmp(gp->class->name, "VINUMPLEX"))
343		return (NULL);
344
345	first = 0;
346	p = gp->softc;
347
348	/* Let's see if the volume this plex wants is already configured. */
349	v = gv_find_vol(sc, p->volume);
350	if (v == NULL)
351		return (NULL);
352	if (v->geom == NULL) {
353		gp = g_new_geomf(mp, "%s", p->volume);
354		gp->start = gv_volume_start;
355		gp->orphan = gv_volume_orphan;
356		gp->access = gv_volume_access;
357		gp->softc = v;
358		first++;
359	} else
360		gp = v->geom;
361
362	/* Create bio queue, queue mutex, and worker thread, if necessary. */
363	if (v->bqueue == NULL) {
364		v->bqueue = g_malloc(sizeof(struct bio_queue_head),
365		    M_WAITOK | M_ZERO);
366		bioq_init(v->bqueue);
367	}
368	if (mtx_initialized(&v->bqueue_mtx) == 0)
369		mtx_init(&v->bqueue_mtx, "gv_plex", NULL, MTX_DEF);
370
371	if (!(v->flags & GV_VOL_THREAD_ACTIVE)) {
372		kproc_create(gv_vol_worker, v, NULL, 0, 0, "gv_v %s",
373		    v->name);
374		v->flags |= GV_VOL_THREAD_ACTIVE;
375	}
376
377	/*
378	 * Create a new consumer and attach it to the plex geom.  Since this
379	 * volume might already have a plex attached, we need to adjust the
380	 * access counts of the new consumer.
381	 */
382	ocp = LIST_FIRST(&gp->consumer);
383	cp = g_new_consumer(gp);
384	g_attach(cp, pp);
385	if ((ocp != NULL) && (ocp->acr > 0 || ocp->acw > 0 || ocp->ace > 0)) {
386		error = g_access(cp, ocp->acr, ocp->acw, ocp->ace);
387		if (error) {
388			G_VINUM_DEBUG(0, "failed g_access %s -> %s; "
389			    "errno %d", v->name, p->name, error);
390			g_detach(cp);
391			g_destroy_consumer(cp);
392			if (first)
393				g_destroy_geom(gp);
394			return (NULL);
395		}
396	}
397
398	p->consumer = cp;
399
400	if (p->vol_sc != v) {
401		p->vol_sc = v;
402		v->plexcount++;
403		LIST_INSERT_HEAD(&v->plexes, p, in_volume);
404	}
405
406	/* We need to setup a new VINUMVOLUME geom. */
407	if (first) {
408		pp2 = g_new_providerf(gp, "gvinum/%s", v->name);
409		pp2->mediasize = pp->mediasize;
410		pp2->sectorsize = pp->sectorsize;
411		g_error_provider(pp2, 0);
412		v->size = pp2->mediasize;
413		v->geom = gp;
414		return (gp);
415	}
416
417	return (NULL);
418}
419
420static int
421gv_volume_destroy_geom(struct gctl_req *req, struct g_class *mp,
422    struct g_geom *gp)
423{
424	struct gv_volume *v;
425
426	g_trace(G_T_TOPOLOGY, "gv_volume_destroy_geom: %s", gp->name);
427	g_topology_assert();
428
429	v = gp->softc;
430	gv_kill_vol_thread(v);
431	g_wither_geom(gp, ENXIO);
432	return (0);
433}
434
435#define	VINUMVOLUME_CLASS_NAME "VINUMVOLUME"
436
437static struct g_class g_vinum_volume_class = {
438	.name = VINUMVOLUME_CLASS_NAME,
439	.version = G_VERSION,
440	.taste = gv_volume_taste,
441	.destroy_geom = gv_volume_destroy_geom,
442};
443
444DECLARE_GEOM_CLASS(g_vinum_volume_class, g_vinum_volume);
445