geom_vinum_plex.c revision 184292
1/*-
2 * Copyright (c) 2004 Lukas Ertl
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/geom/vinum/geom_vinum_plex.c 184292 2008-10-26 17:20:37Z lulf $");
29
30#include <sys/param.h>
31#include <sys/bio.h>
32#include <sys/kernel.h>
33#include <sys/kthread.h>
34#include <sys/libkern.h>
35#include <sys/lock.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/mutex.h>
39#include <sys/systm.h>
40
41#include <geom/geom.h>
42#include <geom/vinum/geom_vinum_var.h>
43#include <geom/vinum/geom_vinum_raid5.h>
44#include <geom/vinum/geom_vinum.h>
45
46static void gv_plex_completed_request(struct gv_plex *, struct bio *);
47static void gv_plex_normal_request(struct gv_plex *, struct bio *);
48static void gv_plex_worker(void *);
49static int gv_check_parity(struct gv_plex *, struct bio *,
50    struct gv_raid5_packet *);
51static int gv_normal_parity(struct gv_plex *, struct bio *,
52    struct gv_raid5_packet *);
53
54/* XXX: is this the place to catch dying subdisks? */
55static void
56gv_plex_orphan(struct g_consumer *cp)
57{
58	struct g_geom *gp;
59	struct gv_plex *p;
60	int error;
61
62	g_topology_assert();
63	gp = cp->geom;
64	g_trace(G_T_TOPOLOGY, "gv_plex_orphan(%s)", gp->name);
65
66	if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0)
67		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
68	error = cp->provider->error;
69	if (error == 0)
70		error = ENXIO;
71	g_detach(cp);
72	g_destroy_consumer(cp);
73	if (!LIST_EMPTY(&gp->consumer))
74		return;
75
76	p = gp->softc;
77	if (p != NULL) {
78		gv_kill_plex_thread(p);
79		p->geom = NULL;
80		p->provider = NULL;
81		p->consumer = NULL;
82	}
83	gp->softc = NULL;
84	g_wither_geom(gp, error);
85}
86
87void
88gv_plex_done(struct bio *bp)
89{
90	struct gv_plex *p;
91
92	p = bp->bio_from->geom->softc;
93	bp->bio_cflags |= GV_BIO_DONE;
94	mtx_lock(&p->bqueue_mtx);
95	bioq_insert_tail(p->bqueue, bp);
96	wakeup(p);
97	mtx_unlock(&p->bqueue_mtx);
98}
99
100/* Find the correct subdisk to send the bio to and build a bio to send. */
101static int
102gv_plexbuffer(struct gv_plex *p, struct bio *bp, caddr_t addr, off_t boff, off_t bcount)
103{
104	struct g_geom *gp;
105	struct gv_sd *s;
106	struct bio *cbp, *pbp;
107	int i, sdno;
108	off_t len_left, real_len, real_off;
109	off_t stripeend, stripeno, stripestart;
110
111	if (p == NULL || LIST_EMPTY(&p->subdisks))
112		return (ENXIO);
113
114	s = NULL;
115	gp = bp->bio_to->geom;
116
117	/*
118	 * We only handle concatenated and striped plexes here.  RAID5 plexes
119	 * are handled in build_raid5_request().
120	 */
121	switch (p->org) {
122	case GV_PLEX_CONCAT:
123		/*
124		 * Find the subdisk where this request starts.  The subdisks in
125		 * this list must be ordered by plex_offset.
126		 */
127		LIST_FOREACH(s, &p->subdisks, in_plex) {
128			if (s->plex_offset <= boff &&
129			    s->plex_offset + s->size > boff)
130				break;
131		}
132		/* Subdisk not found. */
133		if (s == NULL)
134			return (ENXIO);
135
136		/* Calculate corresponding offsets on disk. */
137		real_off = boff - s->plex_offset;
138		len_left = s->size - real_off;
139		real_len = (bcount > len_left) ? len_left : bcount;
140		break;
141
142	case GV_PLEX_STRIPED:
143		/* The number of the stripe where the request starts. */
144		stripeno = boff / p->stripesize;
145
146		/* The number of the subdisk where the stripe resides. */
147		sdno = stripeno % p->sdcount;
148
149		/* Find the right subdisk. */
150		i = 0;
151		LIST_FOREACH(s, &p->subdisks, in_plex) {
152			if (i == sdno)
153				break;
154			i++;
155		}
156
157		/* Subdisk not found. */
158		if (s == NULL)
159			return (ENXIO);
160
161		/* The offset of the stripe from the start of the subdisk. */
162		stripestart = (stripeno / p->sdcount) *
163		    p->stripesize;
164
165		/* The offset at the end of the stripe. */
166		stripeend = stripestart + p->stripesize;
167
168		/* The offset of the request on this subdisk. */
169		real_off = boff - (stripeno * p->stripesize) +
170		    stripestart;
171
172		/* The length left in this stripe. */
173		len_left = stripeend - real_off;
174
175		real_len = (bcount <= len_left) ? bcount : len_left;
176		break;
177
178	default:
179		return (EINVAL);
180	}
181
182	/* Now check if we can handle the request on this subdisk. */
183	switch (s->state) {
184	case GV_SD_UP:
185		/* If the subdisk is up, just continue. */
186		break;
187
188	case GV_SD_STALE:
189		if (!(bp->bio_cflags & GV_BIO_SYNCREQ))
190			return (ENXIO);
191
192		G_VINUM_DEBUG(1, "sd %s is initializing", s->name);
193		gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE);
194		break;
195
196	case GV_SD_INITIALIZING:
197		if (bp->bio_cmd == BIO_READ)
198			return (ENXIO);
199		break;
200
201	default:
202		/* All other subdisk states mean it's not accessible. */
203		return (ENXIO);
204	}
205
206	/* Clone the bio and adjust the offsets and sizes. */
207	cbp = g_clone_bio(bp);
208	if (cbp == NULL)
209		return (ENOMEM);
210	cbp->bio_offset = real_off;
211	cbp->bio_length = real_len;
212	cbp->bio_data = addr;
213	cbp->bio_done = g_std_done;
214	cbp->bio_caller2 = s->consumer;
215	if ((bp->bio_cflags & GV_BIO_SYNCREQ)) {
216		cbp->bio_cflags |= GV_BIO_SYNCREQ;
217		cbp->bio_done = gv_plex_done;
218	}
219
220	if (bp->bio_driver1 == NULL) {
221		bp->bio_driver1 = cbp;
222	} else {
223		pbp = bp->bio_driver1;
224		while (pbp->bio_caller1 != NULL)
225			pbp = pbp->bio_caller1;
226		pbp->bio_caller1 = cbp;
227	}
228
229	return (0);
230}
231
232static void
233gv_plex_start(struct bio *bp)
234{
235	struct gv_plex *p;
236
237	switch(bp->bio_cmd) {
238	case BIO_READ:
239	case BIO_WRITE:
240	case BIO_DELETE:
241		break;
242	case BIO_GETATTR:
243	default:
244		g_io_deliver(bp, EOPNOTSUPP);
245		return;
246	}
247
248	/*
249	 * We cannot handle this request if too many of our subdisks are
250	 * inaccessible.
251	 */
252	p = bp->bio_to->geom->softc;
253	if ((p->state < GV_PLEX_DEGRADED) &&
254	    !(bp->bio_cflags & GV_BIO_SYNCREQ)) {
255		g_io_deliver(bp, ENXIO);
256		return;
257	}
258
259	mtx_lock(&p->bqueue_mtx);
260	bioq_disksort(p->bqueue, bp);
261	wakeup(p);
262	mtx_unlock(&p->bqueue_mtx);
263}
264
265static void
266gv_plex_worker(void *arg)
267{
268	struct bio *bp;
269	struct gv_plex *p;
270	struct gv_sd *s;
271
272	p = arg;
273	KASSERT(p != NULL, ("NULL p"));
274
275	mtx_lock(&p->bqueue_mtx);
276	for (;;) {
277		/* We were signaled to exit. */
278		if (p->flags & GV_PLEX_THREAD_DIE)
279			break;
280
281		/* Take the first BIO from our queue. */
282		bp = bioq_takefirst(p->bqueue);
283		if (bp == NULL) {
284			msleep(p, &p->bqueue_mtx, PRIBIO, "-", hz/10);
285			continue;
286		}
287		mtx_unlock(&p->bqueue_mtx);
288
289		/* A completed request. */
290		if (bp->bio_cflags & GV_BIO_DONE) {
291			if (bp->bio_cflags & GV_BIO_SYNCREQ ||
292			    bp->bio_cflags & GV_BIO_REBUILD) {
293				s = bp->bio_to->private;
294				if (bp->bio_error == 0)
295					s->initialized += bp->bio_length;
296				if (s->initialized >= s->size) {
297					g_topology_lock();
298					gv_set_sd_state(s, GV_SD_UP,
299					    GV_SETSTATE_CONFIG);
300					g_topology_unlock();
301					s->initialized = 0;
302				}
303			}
304
305			if (bp->bio_cflags & GV_BIO_SYNCREQ)
306				g_std_done(bp);
307			else
308				gv_plex_completed_request(p, bp);
309		/*
310		 * A sub-request that was hold back because it interfered with
311		 * another sub-request.
312		 */
313		} else if (bp->bio_cflags & GV_BIO_ONHOLD) {
314			/* Is it still locked out? */
315			if (gv_stripe_active(p, bp)) {
316				/* Park the bio on the waiting queue. */
317				mtx_lock(&p->bqueue_mtx);
318				bioq_disksort(p->wqueue, bp);
319				mtx_unlock(&p->bqueue_mtx);
320			} else {
321				bp->bio_cflags &= ~GV_BIO_ONHOLD;
322				g_io_request(bp, bp->bio_caller2);
323			}
324
325		/* A normal request to this plex. */
326		} else
327			gv_plex_normal_request(p, bp);
328
329		mtx_lock(&p->bqueue_mtx);
330	}
331	mtx_unlock(&p->bqueue_mtx);
332	p->flags |= GV_PLEX_THREAD_DEAD;
333	wakeup(p);
334
335	kproc_exit(ENXIO);
336}
337
338static int
339gv_normal_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
340{
341	struct bio *cbp, *pbp;
342	int finished, i;
343
344	finished = 1;
345
346	if (wp->waiting != NULL) {
347		pbp = wp->waiting;
348		wp->waiting = NULL;
349		cbp = wp->parity;
350		for (i = 0; i < wp->length; i++)
351			cbp->bio_data[i] ^= pbp->bio_data[i];
352		g_io_request(pbp, pbp->bio_caller2);
353		finished = 0;
354
355	} else if (wp->parity != NULL) {
356		cbp = wp->parity;
357		wp->parity = NULL;
358		g_io_request(cbp, cbp->bio_caller2);
359		finished = 0;
360	}
361
362	return (finished);
363}
364
365static int
366gv_check_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
367{
368	struct bio *pbp;
369	int err, finished, i;
370
371	err = 0;
372	finished = 1;
373
374	if (wp->waiting != NULL) {
375		pbp = wp->waiting;
376		wp->waiting = NULL;
377		g_io_request(pbp, pbp->bio_caller2);
378		finished = 0;
379
380	} else if (wp->parity != NULL) {
381		pbp = wp->parity;
382		wp->parity = NULL;
383
384		/* Check if the parity is correct. */
385		for (i = 0; i < wp->length; i++) {
386			if (bp->bio_data[i] != pbp->bio_data[i]) {
387				err = 1;
388				break;
389			}
390		}
391
392		/* The parity is not correct... */
393		if (err) {
394			bp->bio_parent->bio_error = EAGAIN;
395
396			/* ... but we rebuild it. */
397			if (bp->bio_parent->bio_cflags & GV_BIO_PARITY) {
398				g_io_request(pbp, pbp->bio_caller2);
399				finished = 0;
400			}
401		}
402
403		/*
404		 * Clean up the BIO we would have used for rebuilding the
405		 * parity.
406		 */
407		if (finished) {
408			bp->bio_parent->bio_inbed++;
409			g_destroy_bio(pbp);
410		}
411
412	}
413
414	return (finished);
415}
416
417void
418gv_plex_completed_request(struct gv_plex *p, struct bio *bp)
419{
420	struct bio *cbp, *pbp;
421	struct gv_bioq *bq, *bq2;
422	struct gv_raid5_packet *wp;
423	int i;
424
425	wp = bp->bio_driver1;
426
427	switch (bp->bio_parent->bio_cmd) {
428	case BIO_READ:
429		if (wp == NULL)
430			break;
431
432		TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
433			if (bq->bp == bp) {
434				TAILQ_REMOVE(&wp->bits, bq, queue);
435				g_free(bq);
436				for (i = 0; i < wp->length; i++)
437					wp->data[i] ^= bp->bio_data[i];
438				break;
439			}
440		}
441		if (TAILQ_EMPTY(&wp->bits)) {
442			bp->bio_parent->bio_completed += wp->length;
443			if (wp->lockbase != -1) {
444				TAILQ_REMOVE(&p->packets, wp, list);
445				/* Bring the waiting bios back into the game. */
446				mtx_lock(&p->bqueue_mtx);
447				pbp = bioq_takefirst(p->wqueue);
448				while (pbp != NULL) {
449					bioq_disksort(p->bqueue, pbp);
450					pbp = bioq_takefirst(p->wqueue);
451				}
452				mtx_unlock(&p->bqueue_mtx);
453			}
454			g_free(wp);
455		}
456
457		break;
458
459 	case BIO_WRITE:
460		if (wp == NULL)
461			break;
462
463		/* Check if we need to handle parity data. */
464		TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
465			if (bq->bp == bp) {
466				TAILQ_REMOVE(&wp->bits, bq, queue);
467				g_free(bq);
468				cbp = wp->parity;
469				if (cbp != NULL) {
470					for (i = 0; i < wp->length; i++)
471						cbp->bio_data[i] ^=
472						    bp->bio_data[i];
473				}
474				break;
475			}
476		}
477
478		/* Handle parity data. */
479		if (TAILQ_EMPTY(&wp->bits)) {
480			if (bp->bio_parent->bio_cflags & GV_BIO_CHECK)
481				i = gv_check_parity(p, bp, wp);
482			else
483				i = gv_normal_parity(p, bp, wp);
484
485			/* All of our sub-requests have finished. */
486			if (i) {
487				bp->bio_parent->bio_completed += wp->length;
488				TAILQ_REMOVE(&p->packets, wp, list);
489				/* Bring the waiting bios back into the game. */
490				mtx_lock(&p->bqueue_mtx);
491				pbp = bioq_takefirst(p->wqueue);
492				while (pbp != NULL) {
493					bioq_disksort(p->bqueue, pbp);
494					pbp = bioq_takefirst(p->wqueue);
495				}
496				mtx_unlock(&p->bqueue_mtx);
497				g_free(wp);
498			}
499		}
500
501		break;
502	}
503
504	pbp = bp->bio_parent;
505	if (pbp->bio_error == 0)
506		pbp->bio_error = bp->bio_error;
507
508	/* When the original request is finished, we deliver it. */
509	pbp->bio_inbed++;
510	if (pbp->bio_inbed == pbp->bio_children)
511		g_io_deliver(pbp, pbp->bio_error);
512
513	/* Clean up what we allocated. */
514	if (bp->bio_cflags & GV_BIO_MALLOC)
515		g_free(bp->bio_data);
516	g_destroy_bio(bp);
517}
518
519void
520gv_plex_normal_request(struct gv_plex *p, struct bio *bp)
521{
522	struct bio *cbp, *pbp;
523	struct gv_bioq *bq, *bq2;
524	struct gv_raid5_packet *wp, *wp2;
525	caddr_t addr;
526	off_t bcount, boff;
527	int err;
528
529	bcount = bp->bio_length;
530	addr = bp->bio_data;
531	boff = bp->bio_offset;
532
533	/* Walk over the whole length of the request, we might split it up. */
534	while (bcount > 0) {
535		wp = NULL;
536
537 		/*
538		 * RAID5 plexes need special treatment, as a single write
539		 * request involves several read/write sub-requests.
540 		 */
541		if (p->org == GV_PLEX_RAID5) {
542			wp = g_malloc(sizeof(*wp), M_WAITOK | M_ZERO);
543			wp->bio = bp;
544			TAILQ_INIT(&wp->bits);
545
546			if (bp->bio_cflags & GV_BIO_REBUILD)
547				err = gv_rebuild_raid5(p, wp, bp, addr,
548				    boff, bcount);
549			else if (bp->bio_cflags & GV_BIO_CHECK)
550				err = gv_check_raid5(p, wp, bp, addr,
551				    boff, bcount);
552			else
553				err = gv_build_raid5_req(p, wp, bp, addr,
554				    boff, bcount);
555
556 			/*
557			 * Building the sub-request failed, we probably need to
558			 * clean up a lot.
559 			 */
560 			if (err) {
561				G_VINUM_LOGREQ(0, bp, "plex request failed.");
562				TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
563					TAILQ_REMOVE(&wp->bits, bq, queue);
564					g_free(bq);
565				}
566				if (wp->waiting != NULL) {
567					if (wp->waiting->bio_cflags &
568					    GV_BIO_MALLOC)
569						g_free(wp->waiting->bio_data);
570					g_destroy_bio(wp->waiting);
571				}
572				if (wp->parity != NULL) {
573					if (wp->parity->bio_cflags &
574					    GV_BIO_MALLOC)
575						g_free(wp->parity->bio_data);
576					g_destroy_bio(wp->parity);
577				}
578				g_free(wp);
579
580				TAILQ_FOREACH_SAFE(wp, &p->packets, list, wp2) {
581					if (wp->bio == bp) {
582						TAILQ_REMOVE(&p->packets, wp,
583						    list);
584						TAILQ_FOREACH_SAFE(bq,
585						    &wp->bits, queue, bq2) {
586							TAILQ_REMOVE(&wp->bits,
587							    bq, queue);
588							g_free(bq);
589						}
590						g_free(wp);
591					}
592				}
593
594				cbp = bp->bio_driver1;
595				while (cbp != NULL) {
596					pbp = cbp->bio_caller1;
597					if (cbp->bio_cflags & GV_BIO_MALLOC)
598						g_free(cbp->bio_data);
599					g_destroy_bio(cbp);
600					cbp = pbp;
601				}
602
603				g_io_deliver(bp, err);
604 				return;
605 			}
606
607			if (TAILQ_EMPTY(&wp->bits))
608				g_free(wp);
609			else if (wp->lockbase != -1)
610				TAILQ_INSERT_TAIL(&p->packets, wp, list);
611
612		/*
613		 * Requests to concatenated and striped plexes go straight
614		 * through.
615		 */
616		} else {
617			err = gv_plexbuffer(p, bp, addr, boff, bcount);
618
619			/* Building the sub-request failed. */
620			if (err) {
621				G_VINUM_LOGREQ(0, bp, "plex request failed.");
622				cbp = bp->bio_driver1;
623				while (cbp != NULL) {
624					pbp = cbp->bio_caller1;
625					g_destroy_bio(cbp);
626					cbp = pbp;
627				}
628				g_io_deliver(bp, err);
629				return;
630			}
631		}
632
633		/* Abuse bio_caller1 as linked list. */
634		pbp = bp->bio_driver1;
635		while (pbp->bio_caller1 != NULL)
636			pbp = pbp->bio_caller1;
637		bcount -= pbp->bio_length;
638		addr += pbp->bio_length;
639		boff += pbp->bio_length;
640	}
641
642	/* Fire off all sub-requests. */
643	pbp = bp->bio_driver1;
644	while (pbp != NULL) {
645		/*
646		 * RAID5 sub-requests need to come in correct order, otherwise
647		 * we trip over the parity, as it might be overwritten by
648		 * another sub-request.
649		 */
650		if (pbp->bio_driver1 != NULL &&
651		    gv_stripe_active(p, pbp)) {
652			/* Park the bio on the waiting queue. */
653			pbp->bio_cflags |= GV_BIO_ONHOLD;
654			mtx_lock(&p->bqueue_mtx);
655			bioq_disksort(p->wqueue, pbp);
656			mtx_unlock(&p->bqueue_mtx);
657		} else
658			g_io_request(pbp, pbp->bio_caller2);
659		pbp = pbp->bio_caller1;
660	}
661}
662
663static int
664gv_plex_access(struct g_provider *pp, int dr, int dw, int de)
665{
666	struct g_geom *gp;
667	struct g_consumer *cp, *cp2;
668	int error;
669
670	gp = pp->geom;
671
672	LIST_FOREACH(cp, &gp->consumer, consumer) {
673		error = g_access(cp, dr, dw, de);
674		if (error) {
675			LIST_FOREACH(cp2, &gp->consumer, consumer) {
676				if (cp == cp2)
677					break;
678				g_access(cp2, -dr, -dw, -de);
679			}
680			return (error);
681		}
682	}
683	return (0);
684}
685
686static struct g_geom *
687gv_plex_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
688{
689	struct g_geom *gp;
690	struct g_consumer *cp, *cp2;
691	struct g_provider *pp2;
692	struct gv_plex *p;
693	struct gv_sd *s;
694	struct gv_softc *sc;
695	int error;
696
697	g_trace(G_T_TOPOLOGY, "gv_plex_taste(%s, %s)", mp->name, pp->name);
698	g_topology_assert();
699
700	/* We only want to attach to subdisks. */
701	if (strcmp(pp->geom->class->name, "VINUMDRIVE"))
702		return (NULL);
703
704	/* Find the VINUM class and its associated geom. */
705	gp = find_vinum_geom();
706	if (gp == NULL)
707		return (NULL);
708	sc = gp->softc;
709	KASSERT(sc != NULL, ("gv_plex_taste: NULL sc"));
710
711	/* Find out which subdisk the offered provider corresponds to. */
712	s = pp->private;
713	KASSERT(s != NULL, ("gv_plex_taste: NULL s"));
714
715	/* Now find the correct plex where this subdisk belongs to. */
716	p = gv_find_plex(sc, s->plex);
717	if (p == NULL) {
718		G_VINUM_DEBUG(0, "%s: NULL p for '%s'", __func__, s->name);
719		return (NULL);
720	}
721
722	/*
723	 * Add this subdisk to this plex.  Since we trust the on-disk
724	 * configuration, we don't check the given value (should we?).
725	 * XXX: shouldn't be done here
726	 */
727	gv_sd_to_plex(p, s, 0);
728
729	/* Now check if there's already a geom for this plex. */
730	gp = p->geom;
731
732	/* Yes, there is already a geom, so we just add the consumer. */
733	if (gp != NULL) {
734		cp2 = LIST_FIRST(&gp->consumer);
735		/* Need to attach a new consumer to this subdisk. */
736		cp = g_new_consumer(gp);
737		error = g_attach(cp, pp);
738		if (error) {
739			G_VINUM_DEBUG(0, "unable to attach consumer to %s",
740			    pp->name);
741			g_destroy_consumer(cp);
742			return (NULL);
743		}
744		/* Adjust the access counts of the new consumer. */
745		if ((cp2 != NULL) && (cp2->acr || cp2->acw || cp2->ace)) {
746			error = g_access(cp, cp2->acr, cp2->acw, cp2->ace);
747			if (error) {
748				G_VINUM_DEBUG(0, "unable to set access counts"
749				    " for consumer on %s", pp->name);
750				g_detach(cp);
751				g_destroy_consumer(cp);
752				return (NULL);
753			}
754		}
755		s->consumer = cp;
756
757		/* Adjust the size of the providers this plex has. */
758		LIST_FOREACH(pp2, &gp->provider, provider)
759			pp2->mediasize = p->size;
760
761		/* Update the size of the volume this plex is attached to. */
762		if (p->vol_sc != NULL)
763			gv_update_vol_size(p->vol_sc, p->size);
764
765		/*
766		 * If necessary, create bio queues, queue mutex and a worker
767		 * thread.
768		 */
769		if (p->bqueue == NULL) {
770			p->bqueue = g_malloc(sizeof(struct bio_queue_head),
771			    M_WAITOK | M_ZERO);
772			bioq_init(p->bqueue);
773		}
774		if (p->wqueue == NULL) {
775			p->wqueue = g_malloc(sizeof(struct bio_queue_head),
776			    M_WAITOK | M_ZERO);
777			bioq_init(p->wqueue);
778		}
779		if (mtx_initialized(&p->bqueue_mtx) == 0)
780			mtx_init(&p->bqueue_mtx, "gv_plex", NULL, MTX_DEF);
781		if (!(p->flags & GV_PLEX_THREAD_ACTIVE)) {
782			kproc_create(gv_plex_worker, p, NULL, 0, 0, "gv_p %s",
783			    p->name);
784			p->flags |= GV_PLEX_THREAD_ACTIVE;
785		}
786
787		return (NULL);
788
789	/* We need to create a new geom. */
790	} else {
791		gp = g_new_geomf(mp, "%s", p->name);
792		gp->start = gv_plex_start;
793		gp->orphan = gv_plex_orphan;
794		gp->access = gv_plex_access;
795		gp->softc = p;
796		p->geom = gp;
797
798		TAILQ_INIT(&p->packets);
799		p->bqueue = g_malloc(sizeof(struct bio_queue_head),
800		    M_WAITOK | M_ZERO);
801		bioq_init(p->bqueue);
802		p->wqueue = g_malloc(sizeof(struct bio_queue_head),
803		    M_WAITOK | M_ZERO);
804		bioq_init(p->wqueue);
805		mtx_init(&p->bqueue_mtx, "gv_plex", NULL, MTX_DEF);
806		kproc_create(gv_plex_worker, p, NULL, 0, 0, "gv_p %s",
807		    p->name);
808		p->flags |= GV_PLEX_THREAD_ACTIVE;
809
810		/* Attach a consumer to this provider. */
811		cp = g_new_consumer(gp);
812		g_attach(cp, pp);
813		s->consumer = cp;
814
815		/* Create a provider for the outside world. */
816		pp2 = g_new_providerf(gp, "gvinum/plex/%s", p->name);
817		pp2->mediasize = p->size;
818		pp2->sectorsize = pp->sectorsize;
819		p->provider = pp2;
820		g_error_provider(pp2, 0);
821		return (gp);
822	}
823}
824
825static int
826gv_plex_destroy_geom(struct gctl_req *req, struct g_class *mp,
827    struct g_geom *gp)
828{
829	struct gv_plex *p;
830
831	g_trace(G_T_TOPOLOGY, "gv_plex_destroy_geom: %s", gp->name);
832	g_topology_assert();
833
834	p = gp->softc;
835
836	KASSERT(p != NULL, ("gv_plex_destroy_geom: null p of '%s'", gp->name));
837
838	/*
839	 * If this is a RAID5 plex, check if its worker thread is still active
840	 * and signal it to self destruct.
841	 */
842	gv_kill_plex_thread(p);
843	/* g_free(sc); */
844	g_wither_geom(gp, ENXIO);
845	return (0);
846}
847
848#define	VINUMPLEX_CLASS_NAME "VINUMPLEX"
849
850static struct g_class g_vinum_plex_class = {
851	.name = VINUMPLEX_CLASS_NAME,
852	.version = G_VERSION,
853	.taste = gv_plex_taste,
854	.destroy_geom = gv_plex_destroy_geom,
855};
856
857DECLARE_GEOM_CLASS(g_vinum_plex_class, g_vinum_plex);
858