geom_vinum_plex.c revision 138110
1/*-
2 * Copyright (c) 2004 Lukas Ertl
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/geom/vinum/geom_vinum_plex.c 138110 2004-11-26 12:01:00Z le $");
29
30#include <sys/param.h>
31#include <sys/bio.h>
32#include <sys/kernel.h>
33#include <sys/kthread.h>
34#include <sys/libkern.h>
35#include <sys/lock.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/mutex.h>
39#include <sys/systm.h>
40
41#include <geom/geom.h>
42#include <geom/vinum/geom_vinum_var.h>
43#include <geom/vinum/geom_vinum_raid5.h>
44#include <geom/vinum/geom_vinum.h>
45
46static void gv_plex_completed_request(struct gv_plex *, struct bio *);
47static void gv_plex_normal_request(struct gv_plex *, struct bio *);
48static void gv_plex_worker(void *);
49static int gv_check_parity(struct gv_plex *, struct bio *,
50    struct gv_raid5_packet *);
51static int gv_normal_parity(struct gv_plex *, struct bio *,
52    struct gv_raid5_packet *);
53
54/* XXX: is this the place to catch dying subdisks? */
55static void
56gv_plex_orphan(struct g_consumer *cp)
57{
58	struct g_geom *gp;
59	struct gv_plex *p;
60	int error;
61
62	g_topology_assert();
63	gp = cp->geom;
64	g_trace(G_T_TOPOLOGY, "gv_plex_orphan(%s)", gp->name);
65
66	if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0)
67		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
68	error = cp->provider->error;
69	if (error == 0)
70		error = ENXIO;
71	g_detach(cp);
72	g_destroy_consumer(cp);
73	if (!LIST_EMPTY(&gp->consumer))
74		return;
75
76	p = gp->softc;
77	if (p != NULL) {
78		gv_kill_plex_thread(p);
79		p->geom = NULL;
80		p->provider = NULL;
81		p->consumer = NULL;
82	}
83	gp->softc = NULL;
84	g_wither_geom(gp, error);
85}
86
87void
88gv_plex_done(struct bio *bp)
89{
90	struct gv_plex *p;
91	struct gv_bioq *bq;
92
93	p = bp->bio_from->geom->softc;
94	bp->bio_cflags |= GV_BIO_DONE;
95	bq = g_malloc(sizeof(*bq), M_NOWAIT | M_ZERO);
96	bq->bp = bp;
97	mtx_lock(&p->bqueue_mtx);
98	TAILQ_INSERT_TAIL(&p->bqueue, bq, queue);
99	wakeup(p);
100	mtx_unlock(&p->bqueue_mtx);
101}
102
103/* Find the correct subdisk to send the bio to and build a bio to send. */
104static int
105gv_plexbuffer(struct gv_plex *p, struct bio *bp, caddr_t addr, off_t boff, off_t bcount)
106{
107	struct g_geom *gp;
108	struct gv_sd *s;
109	struct bio *cbp, *pbp;
110	int i, sdno;
111	off_t len_left, real_len, real_off;
112	off_t stripeend, stripeno, stripestart;
113
114	if (p == NULL || LIST_EMPTY(&p->subdisks))
115		return (ENXIO);
116
117	s = NULL;
118	gp = bp->bio_to->geom;
119
120	/*
121	 * We only handle concatenated and striped plexes here.  RAID5 plexes
122	 * are handled in build_raid5_request().
123	 */
124	switch (p->org) {
125	case GV_PLEX_CONCAT:
126		/*
127		 * Find the subdisk where this request starts.  The subdisks in
128		 * this list must be ordered by plex_offset.
129		 */
130		LIST_FOREACH(s, &p->subdisks, in_plex) {
131			if (s->plex_offset <= boff &&
132			    s->plex_offset + s->size > boff)
133				break;
134		}
135		/* Subdisk not found. */
136		if (s == NULL)
137			return (ENXIO);
138
139		/* Calculate corresponding offsets on disk. */
140		real_off = boff - s->plex_offset;
141		len_left = s->size - real_off;
142		real_len = (bcount > len_left) ? len_left : bcount;
143		break;
144
145	case GV_PLEX_STRIPED:
146		/* The number of the stripe where the request starts. */
147		stripeno = boff / p->stripesize;
148
149		/* The number of the subdisk where the stripe resides. */
150		sdno = stripeno % p->sdcount;
151
152		/* Find the right subdisk. */
153		i = 0;
154		LIST_FOREACH(s, &p->subdisks, in_plex) {
155			if (i == sdno)
156				break;
157			i++;
158		}
159
160		/* Subdisk not found. */
161		if (s == NULL)
162			return (ENXIO);
163
164		/* The offset of the stripe from the start of the subdisk. */
165		stripestart = (stripeno / p->sdcount) *
166		    p->stripesize;
167
168		/* The offset at the end of the stripe. */
169		stripeend = stripestart + p->stripesize;
170
171		/* The offset of the request on this subdisk. */
172		real_off = boff - (stripeno * p->stripesize) +
173		    stripestart;
174
175		/* The length left in this stripe. */
176		len_left = stripeend - real_off;
177
178		real_len = (bcount <= len_left) ? bcount : len_left;
179		break;
180
181	default:
182		return (EINVAL);
183	}
184
185	/* Now check if we can handle the request on this subdisk. */
186	switch (s->state) {
187	case GV_SD_UP:
188		/* If the subdisk is up, just continue. */
189		break;
190
191	case GV_SD_STALE:
192		if (!(bp->bio_cflags & GV_BIO_SYNCREQ))
193			return (ENXIO);
194
195		printf("GEOM_VINUM: sd %s is initializing\n", s->name);
196		gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE);
197		break;
198
199	case GV_SD_INITIALIZING:
200		if (bp->bio_cmd == BIO_READ)
201			return (ENXIO);
202		break;
203
204	default:
205		/* All other subdisk states mean it's not accessible. */
206		return (ENXIO);
207	}
208
209	/* Clone the bio and adjust the offsets and sizes. */
210	cbp = g_clone_bio(bp);
211	if (cbp == NULL)
212		return (ENOMEM);
213	cbp->bio_offset = real_off;
214	cbp->bio_length = real_len;
215	cbp->bio_data = addr;
216	cbp->bio_done = g_std_done;
217	cbp->bio_caller2 = s->consumer;
218	if ((bp->bio_cflags & GV_BIO_SYNCREQ)) {
219		cbp->bio_cflags |= GV_BIO_SYNCREQ;
220		cbp->bio_done = gv_plex_done;
221	}
222
223	if (bp->bio_driver1 == NULL) {
224		bp->bio_driver1 = cbp;
225	} else {
226		pbp = bp->bio_driver1;
227		while (pbp->bio_caller1 != NULL)
228			pbp = pbp->bio_caller1;
229		pbp->bio_caller1 = cbp;
230	}
231
232	return (0);
233}
234
235static void
236gv_plex_start(struct bio *bp)
237{
238	struct gv_plex *p;
239	struct gv_bioq *bq;
240
241	switch(bp->bio_cmd) {
242	case BIO_READ:
243	case BIO_WRITE:
244	case BIO_DELETE:
245		break;
246	case BIO_GETATTR:
247	default:
248		g_io_deliver(bp, EOPNOTSUPP);
249		return;
250	}
251
252	/*
253	 * We cannot handle this request if too many of our subdisks are
254	 * inaccessible.
255	 */
256	p = bp->bio_to->geom->softc;
257	if ((p->state < GV_PLEX_DEGRADED) &&
258	    !(bp->bio_cflags & GV_BIO_SYNCREQ)) {
259		g_io_deliver(bp, ENXIO);
260		return;
261	}
262
263	bq = g_malloc(sizeof(*bq), M_NOWAIT | M_ZERO);
264	bq->bp = bp;
265	mtx_lock(&p->bqueue_mtx);
266	TAILQ_INSERT_TAIL(&p->bqueue, bq, queue);
267	wakeup(p);
268	mtx_unlock(&p->bqueue_mtx);
269}
270
271static void
272gv_plex_worker(void *arg)
273{
274	struct bio *bp;
275	struct gv_plex *p;
276	struct gv_sd *s;
277	struct gv_bioq *bq;
278
279	p = arg;
280	KASSERT(p != NULL, ("NULL p"));
281
282	mtx_lock(&p->bqueue_mtx);
283	for (;;) {
284		/* We were signaled to exit. */
285		if (p->flags & GV_PLEX_THREAD_DIE)
286			break;
287
288		/* Take the first BIO from our queue. */
289		bq = TAILQ_FIRST(&p->bqueue);
290		if (bq == NULL) {
291			msleep(p, &p->bqueue_mtx, PRIBIO, "-", hz/10);
292			continue;
293		}
294		TAILQ_REMOVE(&p->bqueue, bq, queue);
295		mtx_unlock(&p->bqueue_mtx);
296
297		bp = bq->bp;
298
299		/* A completed request. */
300		if (bp->bio_cflags & GV_BIO_DONE) {
301			g_free(bq);
302
303			if (bp->bio_cflags & GV_BIO_SYNCREQ ||
304			    bp->bio_cflags & GV_BIO_REBUILD) {
305				s = bp->bio_to->private;
306				if (bp->bio_error == 0)
307					s->initialized += bp->bio_length;
308				if (s->initialized >= s->size) {
309					g_topology_lock();
310					gv_set_sd_state(s, GV_SD_UP,
311					    GV_SETSTATE_CONFIG);
312					g_topology_unlock();
313					s->initialized = 0;
314				}
315			}
316
317			if (bp->bio_cflags & GV_BIO_SYNCREQ)
318				g_std_done(bp);
319			else
320				gv_plex_completed_request(p, bp);
321		/*
322		 * A sub-request that was hold back because it interfered with
323		 * another sub-request.
324		 */
325		} else if (bp->bio_cflags & GV_BIO_ONHOLD) {
326			/* Is it still locked out? */
327			if (gv_stripe_active(p, bp)) {
328				/* Park the bio on the waiting queue. */
329				mtx_lock(&p->bqueue_mtx);
330				TAILQ_INSERT_TAIL(&p->wqueue, bq, queue);
331				mtx_unlock(&p->bqueue_mtx);
332			} else {
333				g_free(bq);
334				bp->bio_cflags &= ~GV_BIO_ONHOLD;
335				g_io_request(bp, bp->bio_caller2);
336			}
337
338		/* A normal request to this plex. */
339		} else {
340			g_free(bq);
341			gv_plex_normal_request(p, bp);
342		}
343
344		mtx_lock(&p->bqueue_mtx);
345	}
346	mtx_unlock(&p->bqueue_mtx);
347	p->flags |= GV_PLEX_THREAD_DEAD;
348	wakeup(p);
349
350	kthread_exit(ENXIO);
351}
352
353static int
354gv_normal_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
355{
356	struct bio *cbp, *pbp;
357	int finished, i;
358
359	finished = 1;
360
361	if (wp->waiting != NULL) {
362		pbp = wp->waiting;
363		wp->waiting = NULL;
364		cbp = wp->parity;
365		for (i = 0; i < wp->length; i++)
366			cbp->bio_data[i] ^= pbp->bio_data[i];
367		g_io_request(pbp, pbp->bio_caller2);
368		finished = 0;
369
370	} else if (wp->parity != NULL) {
371		cbp = wp->parity;
372		wp->parity = NULL;
373		g_io_request(cbp, cbp->bio_caller2);
374		finished = 0;
375	}
376
377	return (finished);
378}
379
380static int
381gv_check_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
382{
383	struct bio *cbp, *pbp;
384	int err, finished, i;
385
386	err = 0;
387	finished = 1;
388
389	if (wp->waiting != NULL) {
390		pbp = wp->waiting;
391		wp->waiting = NULL;
392		g_io_request(pbp, pbp->bio_caller2);
393		finished = 0;
394
395	} else if (wp->parity != NULL) {
396		cbp = wp->parity;
397		wp->parity = NULL;
398
399		/* Check if the parity is correct. */
400		for (i = 0; i < wp->length; i++) {
401			if (bp->bio_data[i] != cbp->bio_data[i]) {
402				err = 1;
403				break;
404			}
405		}
406
407		/* The parity is not correct... */
408		if (err) {
409			bp->bio_parent->bio_error = EAGAIN;
410
411			/* ... but we rebuild it. */
412			if (bp->bio_parent->bio_cflags & GV_BIO_PARITY) {
413				g_io_request(cbp, cbp->bio_caller2);
414				finished = 0;
415			}
416		}
417
418		/*
419		 * Clean up the BIO we would have used for rebuilding the
420		 * parity.
421		 */
422		if (finished) {
423			bp->bio_parent->bio_inbed++;
424			g_destroy_bio(cbp);
425		}
426
427	}
428
429	return (finished);
430}
431
432void
433gv_plex_completed_request(struct gv_plex *p, struct bio *bp)
434{
435	struct bio *cbp, *pbp;
436	struct gv_bioq *bq, *bq2;
437	struct gv_raid5_packet *wp;
438	int i;
439
440	wp = bp->bio_driver1;
441
442	switch (bp->bio_parent->bio_cmd) {
443	case BIO_READ:
444		if (wp == NULL)
445			break;
446
447		TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
448			if (bq->bp == bp) {
449				TAILQ_REMOVE(&wp->bits, bq, queue);
450				g_free(bq);
451				for (i = 0; i < wp->length; i++)
452					wp->data[i] ^= bp->bio_data[i];
453				break;
454			}
455		}
456		if (TAILQ_EMPTY(&wp->bits)) {
457			bp->bio_parent->bio_completed += wp->length;
458			if (wp->lockbase != -1) {
459				TAILQ_REMOVE(&p->packets, wp, list);
460				/* Bring the waiting bios back into the game. */
461				mtx_lock(&p->bqueue_mtx);
462				TAILQ_CONCAT(&p->bqueue, &p->wqueue, queue);
463				mtx_unlock(&p->bqueue_mtx);
464			}
465			g_free(wp);
466		}
467
468		break;
469
470 	case BIO_WRITE:
471		if (wp == NULL)
472			break;
473
474		/* Check if we need to handle parity data. */
475		TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
476			if (bq->bp == bp) {
477				TAILQ_REMOVE(&wp->bits, bq, queue);
478				g_free(bq);
479				cbp = wp->parity;
480				if (cbp != NULL) {
481					for (i = 0; i < wp->length; i++)
482						cbp->bio_data[i] ^=
483						    bp->bio_data[i];
484				}
485				break;
486			}
487		}
488
489		/* Handle parity data. */
490		if (TAILQ_EMPTY(&wp->bits)) {
491			if (bp->bio_parent->bio_cflags & GV_BIO_CHECK)
492				i = gv_check_parity(p, bp, wp);
493			else
494				i = gv_normal_parity(p, bp, wp);
495
496			/* All of our sub-requests have finished. */
497			if (i) {
498				bp->bio_parent->bio_completed += wp->length;
499				TAILQ_REMOVE(&p->packets, wp, list);
500				/* Bring the waiting bios back into the game. */
501				mtx_lock(&p->bqueue_mtx);
502				TAILQ_CONCAT(&p->bqueue, &p->wqueue, queue);
503				mtx_unlock(&p->bqueue_mtx);
504				g_free(wp);
505			}
506		}
507
508		break;
509	}
510
511	pbp = bp->bio_parent;
512	if (pbp->bio_error == 0)
513		pbp->bio_error = bp->bio_error;
514
515	/* When the original request is finished, we deliver it. */
516	pbp->bio_inbed++;
517	if (pbp->bio_inbed == pbp->bio_children)
518		g_io_deliver(pbp, pbp->bio_error);
519
520	/* Clean up what we allocated. */
521	if (bp->bio_cflags & GV_BIO_MALLOC)
522		g_free(bp->bio_data);
523	g_destroy_bio(bp);
524}
525
526void
527gv_plex_normal_request(struct gv_plex *p, struct bio *bp)
528{
529	struct bio *cbp, *pbp;
530	struct gv_bioq *bq, *bq2;
531	struct gv_raid5_packet *wp, *wp2;
532	caddr_t addr;
533	off_t bcount, boff;
534	int err;
535
536	bcount = bp->bio_length;
537	addr = bp->bio_data;
538	boff = bp->bio_offset;
539
540	/* Walk over the whole length of the request, we might split it up. */
541	while (bcount > 0) {
542		wp = NULL;
543
544 		/*
545		 * RAID5 plexes need special treatment, as a single write
546		 * request involves several read/write sub-requests.
547 		 */
548		if (p->org == GV_PLEX_RAID5) {
549			wp = g_malloc(sizeof(*wp), M_WAITOK | M_ZERO);
550			wp->bio = bp;
551			TAILQ_INIT(&wp->bits);
552
553			if (bp->bio_cflags & GV_BIO_REBUILD)
554				err = gv_rebuild_raid5(p, wp, bp, addr,
555				    boff, bcount);
556			else if (bp->bio_cflags & GV_BIO_CHECK)
557				err = gv_check_raid5(p, wp, bp, addr,
558				    boff, bcount);
559			else
560				err = gv_build_raid5_req(p, wp, bp, addr,
561				    boff, bcount);
562
563 			/*
564			 * Building the sub-request failed, we probably need to
565			 * clean up a lot.
566 			 */
567 			if (err) {
568				printf("GEOM_VINUM: plex request failed for ");
569				g_print_bio(bp);
570				printf("\n");
571				TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
572					TAILQ_REMOVE(&wp->bits, bq, queue);
573					g_free(bq);
574				}
575				if (wp->waiting != NULL) {
576					if (wp->waiting->bio_cflags &
577					    GV_BIO_MALLOC)
578						g_free(wp->waiting->bio_data);
579					g_destroy_bio(wp->waiting);
580				}
581				if (wp->parity != NULL) {
582					if (wp->parity->bio_cflags &
583					    GV_BIO_MALLOC)
584						g_free(wp->parity->bio_data);
585					g_destroy_bio(wp->parity);
586				}
587				g_free(wp);
588
589				TAILQ_FOREACH_SAFE(wp, &p->packets, list, wp2) {
590					if (wp->bio == bp) {
591						TAILQ_REMOVE(&p->packets, wp,
592						    list);
593						TAILQ_FOREACH_SAFE(bq,
594						    &wp->bits, queue, bq2) {
595							TAILQ_REMOVE(&wp->bits,
596							    bq, queue);
597							g_free(bq);
598						}
599						g_free(wp);
600					}
601				}
602
603				cbp = bp->bio_driver1;
604				while (cbp != NULL) {
605					pbp = cbp->bio_caller1;
606					if (cbp->bio_cflags & GV_BIO_MALLOC)
607						g_free(cbp->bio_data);
608					g_destroy_bio(cbp);
609					cbp = pbp;
610				}
611
612				g_io_deliver(bp, err);
613 				return;
614 			}
615
616			if (TAILQ_EMPTY(&wp->bits))
617				g_free(wp);
618			else if (wp->lockbase != -1)
619				TAILQ_INSERT_TAIL(&p->packets, wp, list);
620
621		/*
622		 * Requests to concatenated and striped plexes go straight
623		 * through.
624		 */
625		} else {
626			err = gv_plexbuffer(p, bp, addr, boff, bcount);
627
628			/* Building the sub-request failed. */
629			if (err) {
630				printf("GEOM_VINUM: plex request failed for ");
631				g_print_bio(bp);
632				printf("\n");
633				cbp = bp->bio_driver1;
634				while (cbp != NULL) {
635					pbp = cbp->bio_caller1;
636					g_destroy_bio(cbp);
637					cbp = pbp;
638				}
639				g_io_deliver(bp, err);
640				return;
641			}
642		}
643
644		/* Abuse bio_caller1 as linked list. */
645		pbp = bp->bio_driver1;
646		while (pbp->bio_caller1 != NULL)
647			pbp = pbp->bio_caller1;
648		bcount -= pbp->bio_length;
649		addr += pbp->bio_length;
650		boff += pbp->bio_length;
651	}
652
653	/* Fire off all sub-requests. */
654	pbp = bp->bio_driver1;
655	while (pbp != NULL) {
656		/*
657		 * RAID5 sub-requests need to come in correct order, otherwise
658		 * we trip over the parity, as it might be overwritten by
659		 * another sub-request.
660		 */
661		if (pbp->bio_driver1 != NULL &&
662		    gv_stripe_active(p, pbp)) {
663			/* Park the bio on the waiting queue. */
664			pbp->bio_cflags |= GV_BIO_ONHOLD;
665			bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
666			bq->bp = pbp;
667			mtx_lock(&p->bqueue_mtx);
668			TAILQ_INSERT_TAIL(&p->wqueue, bq, queue);
669			mtx_unlock(&p->bqueue_mtx);
670		} else
671			g_io_request(pbp, pbp->bio_caller2);
672		pbp = pbp->bio_caller1;
673	}
674}
675
676static int
677gv_plex_access(struct g_provider *pp, int dr, int dw, int de)
678{
679	struct g_geom *gp;
680	struct g_consumer *cp, *cp2;
681	int error;
682
683	gp = pp->geom;
684
685	error = ENXIO;
686	LIST_FOREACH(cp, &gp->consumer, consumer) {
687		error = g_access(cp, dr, dw, de);
688		if (error) {
689			LIST_FOREACH(cp2, &gp->consumer, consumer) {
690				if (cp == cp2)
691					break;
692				g_access(cp2, -dr, -dw, -de);
693			}
694			return (error);
695		}
696	}
697	return (error);
698}
699
700static struct g_geom *
701gv_plex_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
702{
703	struct g_geom *gp;
704	struct g_consumer *cp, *cp2;
705	struct g_provider *pp2;
706	struct gv_plex *p;
707	struct gv_sd *s;
708	struct gv_softc *sc;
709	int error;
710
711	g_trace(G_T_TOPOLOGY, "gv_plex_taste(%s, %s)", mp->name, pp->name);
712	g_topology_assert();
713
714	/* We only want to attach to subdisks. */
715	if (strcmp(pp->geom->class->name, "VINUMDRIVE"))
716		return (NULL);
717
718	/* Find the VINUM class and its associated geom. */
719	gp = find_vinum_geom();
720	if (gp == NULL)
721		return (NULL);
722	sc = gp->softc;
723	KASSERT(sc != NULL, ("gv_plex_taste: NULL sc"));
724
725	/* Find out which subdisk the offered provider corresponds to. */
726	s = pp->private;
727	KASSERT(s != NULL, ("gv_plex_taste: NULL s"));
728
729	/* Now find the correct plex where this subdisk belongs to. */
730	p = gv_find_plex(sc, s->plex);
731	KASSERT(p != NULL, ("gv_plex_taste: NULL p"));
732
733	/*
734	 * Add this subdisk to this plex.  Since we trust the on-disk
735	 * configuration, we don't check the given value (should we?).
736	 * XXX: shouldn't be done here
737	 */
738	gv_sd_to_plex(p, s, 0);
739
740	/* Now check if there's already a geom for this plex. */
741	gp = p->geom;
742
743	/* Yes, there is already a geom, so we just add the consumer. */
744	if (gp != NULL) {
745		cp2 = LIST_FIRST(&gp->consumer);
746		/* Need to attach a new consumer to this subdisk. */
747		cp = g_new_consumer(gp);
748		error = g_attach(cp, pp);
749		if (error) {
750			printf("geom_vinum: couldn't attach consumer to %s\n",
751			    pp->name);
752			g_destroy_consumer(cp);
753			return (NULL);
754		}
755		/* Adjust the access counts of the new consumer. */
756		if ((cp2 != NULL) && (cp2->acr || cp2->acw || cp2->ace)) {
757			error = g_access(cp, cp2->acr, cp2->acw, cp2->ace);
758			if (error) {
759				printf("geom_vinum: couldn't set access counts"
760				    " for consumer on %s\n", pp->name);
761				g_detach(cp);
762				g_destroy_consumer(cp);
763				return (NULL);
764			}
765		}
766		s->consumer = cp;
767
768		/* Adjust the size of the providers this plex has. */
769		LIST_FOREACH(pp2, &gp->provider, provider)
770			pp2->mediasize = p->size;
771
772		/* Update the size of the volume this plex is attached to. */
773		if (p->vol_sc != NULL)
774			gv_update_vol_size(p->vol_sc, p->size);
775
776		return (NULL);
777
778	/* We need to create a new geom. */
779	} else {
780		gp = g_new_geomf(mp, "%s", p->name);
781		gp->start = gv_plex_start;
782		gp->orphan = gv_plex_orphan;
783		gp->access = gv_plex_access;
784		gp->softc = p;
785		p->geom = gp;
786
787		TAILQ_INIT(&p->packets);
788		TAILQ_INIT(&p->bqueue);
789		TAILQ_INIT(&p->wqueue);
790		mtx_init(&p->bqueue_mtx, "gv_plex", NULL, MTX_DEF);
791		kthread_create(gv_plex_worker, p, NULL, 0, 0, "gv_p %s",
792		    p->name);
793		p->flags |= GV_PLEX_THREAD_ACTIVE;
794
795		/* Attach a consumer to this provider. */
796		cp = g_new_consumer(gp);
797		g_attach(cp, pp);
798		s->consumer = cp;
799
800		/* Create a provider for the outside world. */
801		pp2 = g_new_providerf(gp, "gvinum/plex/%s", p->name);
802		pp2->mediasize = p->size;
803		pp2->sectorsize = pp->sectorsize;
804		p->provider = pp2;
805		g_error_provider(pp2, 0);
806		return (gp);
807	}
808}
809
810static int
811gv_plex_destroy_geom(struct gctl_req *req, struct g_class *mp,
812    struct g_geom *gp)
813{
814	struct gv_plex *p;
815
816	g_trace(G_T_TOPOLOGY, "gv_plex_destroy_geom: %s", gp->name);
817	g_topology_assert();
818
819	p = gp->softc;
820
821	KASSERT(p != NULL, ("gv_plex_destroy_geom: null p of '%s'", gp->name));
822
823	/*
824	 * If this is a RAID5 plex, check if its worker thread is still active
825	 * and signal it to self destruct.
826	 */
827	gv_kill_plex_thread(p);
828	/* g_free(sc); */
829	g_wither_geom(gp, ENXIO);
830	return (0);
831}
832
833#define	VINUMPLEX_CLASS_NAME "VINUMPLEX"
834
835static struct g_class g_vinum_plex_class = {
836	.name = VINUMPLEX_CLASS_NAME,
837	.version = G_VERSION,
838	.taste = gv_plex_taste,
839	.destroy_geom = gv_plex_destroy_geom,
840};
841
842DECLARE_GEOM_CLASS(g_vinum_plex_class, g_vinum_plex);
843