1130389Sle/*-
2190507Slulf * Copyright (c) 2004, 2007 Lukas Ertl
3190507Slulf * Copyright (c) 2007, 2009 Ulf Lilleengen
4130389Sle * All rights reserved.
5130389Sle *
6130389Sle * Redistribution and use in source and binary forms, with or without
7130389Sle * modification, are permitted provided that the following conditions
8130389Sle * are met:
9130389Sle * 1. Redistributions of source code must retain the above copyright
10130389Sle *    notice, this list of conditions and the following disclaimer.
11130389Sle * 2. Redistributions in binary form must reproduce the above copyright
12130389Sle *    notice, this list of conditions and the following disclaimer in the
13130389Sle *    documentation and/or other materials provided with the distribution.
14130389Sle *
15130389Sle * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16130389Sle * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17130389Sle * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18130389Sle * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19130389Sle * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20130389Sle * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21130389Sle * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22130389Sle * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23130389Sle * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24130389Sle * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25130389Sle * SUCH DAMAGE.
26130389Sle */
27130389Sle
28130389Sle#include <sys/cdefs.h>
29130389Sle__FBSDID("$FreeBSD$");
30130389Sle
31130389Sle#include <sys/param.h>
32130389Sle#include <sys/bio.h>
33130389Sle#include <sys/lock.h>
34130389Sle#include <sys/malloc.h>
35130389Sle#include <sys/systm.h>
36130389Sle
37130389Sle#include <geom/geom.h>
38130389Sle#include <geom/vinum/geom_vinum_var.h>
39130389Sle#include <geom/vinum/geom_vinum_raid5.h>
40130389Sle#include <geom/vinum/geom_vinum.h>
41130389Sle
42190507Slulfstatic int	gv_check_parity(struct gv_plex *, struct bio *,
43190507Slulf		    struct gv_raid5_packet *);
44190507Slulfstatic int	gv_normal_parity(struct gv_plex *, struct bio *,
45190507Slulf		    struct gv_raid5_packet *);
46190507Slulfstatic void	gv_plex_flush(struct gv_plex *);
47190507Slulfstatic int	gv_plex_offset(struct gv_plex *, off_t, off_t, off_t *, off_t *,
48190507Slulf		    int *, int);
49190507Slulfstatic int 	gv_plex_normal_request(struct gv_plex *, struct bio *, off_t,
50190507Slulf		    off_t,  caddr_t);
51191856Slulfstatic void	gv_post_bio(struct gv_softc *, struct bio *);
52191856Slulf
53190507Slulfvoid
54190507Slulfgv_plex_start(struct gv_plex *p, struct bio *bp)
55130389Sle{
56190507Slulf	struct bio *cbp;
57190507Slulf	struct gv_sd *s;
58190507Slulf	struct gv_raid5_packet *wp;
59190507Slulf	caddr_t addr;
60190507Slulf	off_t bcount, boff, len;
61130389Sle
62190507Slulf	bcount = bp->bio_length;
63190507Slulf	addr = bp->bio_data;
64190507Slulf	boff = bp->bio_offset;
65130389Sle
66190507Slulf	/* Walk over the whole length of the request, we might split it up. */
67190507Slulf	while (bcount > 0) {
68190507Slulf		wp = NULL;
69130389Sle
70190507Slulf 		/*
71190507Slulf		 * RAID5 plexes need special treatment, as a single request
72190507Slulf		 * might involve several read/write sub-requests.
73190507Slulf 		 */
74190507Slulf		if (p->org == GV_PLEX_RAID5) {
75190507Slulf			wp = gv_raid5_start(p, bp, addr, boff, bcount);
76190507Slulf 			if (wp == NULL)
77190507Slulf 				return;
78190507Slulf
79190507Slulf			len = wp->length;
80190507Slulf
81190507Slulf			if (TAILQ_EMPTY(&wp->bits))
82190507Slulf				g_free(wp);
83190507Slulf			else if (wp->lockbase != -1)
84190507Slulf				TAILQ_INSERT_TAIL(&p->packets, wp, list);
85190507Slulf
86190507Slulf		/*
87190507Slulf		 * Requests to concatenated and striped plexes go straight
88190507Slulf		 * through.
89190507Slulf		 */
90190507Slulf		} else {
91190507Slulf			len = gv_plex_normal_request(p, bp, boff, bcount, addr);
92190507Slulf		}
93190507Slulf		if (len < 0)
94190507Slulf			return;
95190507Slulf
96190507Slulf		bcount -= len;
97190507Slulf		addr += len;
98190507Slulf		boff += len;
99130697Sle	}
100130389Sle
101190507Slulf	/*
102190507Slulf	 * Fire off all sub-requests.  We get the correct consumer (== drive)
103190507Slulf	 * to send each request to via the subdisk that was stored in
104190507Slulf	 * cbp->bio_caller1.
105190507Slulf	 */
106190507Slulf	cbp = bioq_takefirst(p->bqueue);
107190507Slulf	while (cbp != NULL) {
108190507Slulf		/*
109190507Slulf		 * RAID5 sub-requests need to come in correct order, otherwise
110190507Slulf		 * we trip over the parity, as it might be overwritten by
111190507Slulf		 * another sub-request.  We abuse cbp->bio_caller2 to mark
112190507Slulf		 * potential overlap situations.
113190507Slulf		 */
114190507Slulf		if (cbp->bio_caller2 != NULL && gv_stripe_active(p, cbp)) {
115190507Slulf			/* Park the bio on the waiting queue. */
116191856Slulf			cbp->bio_pflags |= GV_BIO_ONHOLD;
117190507Slulf			bioq_disksort(p->wqueue, cbp);
118190507Slulf		} else {
119190507Slulf			s = cbp->bio_caller1;
120190507Slulf			g_io_request(cbp, s->drive_sc->consumer);
121190507Slulf		}
122190507Slulf		cbp = bioq_takefirst(p->bqueue);
123190507Slulf	}
124130389Sle}
125130389Sle
126130389Slestatic int
127190507Slulfgv_plex_offset(struct gv_plex *p, off_t boff, off_t bcount, off_t *real_off,
128190507Slulf    off_t *real_len, int *sdno, int growing)
129130389Sle{
130130389Sle	struct gv_sd *s;
131190507Slulf	int i, sdcount;
132190507Slulf	off_t len_left, stripeend, stripeno, stripestart;
133130389Sle
134130389Sle	switch (p->org) {
135130389Sle	case GV_PLEX_CONCAT:
136130389Sle		/*
137130389Sle		 * Find the subdisk where this request starts.  The subdisks in
138130389Sle		 * this list must be ordered by plex_offset.
139130389Sle		 */
140190507Slulf		i = 0;
141130389Sle		LIST_FOREACH(s, &p->subdisks, in_plex) {
142130389Sle			if (s->plex_offset <= boff &&
143190507Slulf			    s->plex_offset + s->size > boff) {
144190507Slulf				*sdno = i;
145130389Sle				break;
146190507Slulf			}
147190507Slulf			i++;
148130389Sle		}
149190507Slulf		if (s == NULL || s->drive_sc == NULL)
150190507Slulf			return (GV_ERR_NOTFOUND);
151130389Sle
152130389Sle		/* Calculate corresponding offsets on disk. */
153190507Slulf		*real_off = boff - s->plex_offset;
154190507Slulf		len_left = s->size - (*real_off);
155190507Slulf		KASSERT(len_left >= 0, ("gv_plex_offset: len_left < 0"));
156190507Slulf		*real_len = (bcount > len_left) ? len_left : bcount;
157130389Sle		break;
158130389Sle
159130389Sle	case GV_PLEX_STRIPED:
160130389Sle		/* The number of the stripe where the request starts. */
161130389Sle		stripeno = boff / p->stripesize;
162190507Slulf		KASSERT(stripeno >= 0, ("gv_plex_offset: stripeno < 0"));
163130389Sle
164190507Slulf		/* Take growing subdisks into account when calculating. */
165190507Slulf		sdcount = gv_sdcount(p, (boff >= p->synced));
166130389Sle
167190507Slulf		if (!(boff + bcount <= p->synced) &&
168190507Slulf		    (p->flags & GV_PLEX_GROWING) &&
169190507Slulf		    !growing)
170190507Slulf			return (GV_ERR_ISBUSY);
171190507Slulf		*sdno = stripeno % sdcount;
172130389Sle
173190507Slulf		KASSERT(sdno >= 0, ("gv_plex_offset: sdno < 0"));
174190507Slulf		stripestart = (stripeno / sdcount) *
175130389Sle		    p->stripesize;
176190507Slulf		KASSERT(stripestart >= 0, ("gv_plex_offset: stripestart < 0"));
177130389Sle		stripeend = stripestart + p->stripesize;
178190507Slulf		*real_off = boff - (stripeno * p->stripesize) +
179130389Sle		    stripestart;
180190507Slulf		len_left = stripeend - *real_off;
181190507Slulf		KASSERT(len_left >= 0, ("gv_plex_offset: len_left < 0"));
182130389Sle
183190507Slulf		*real_len = (bcount <= len_left) ? bcount : len_left;
184130389Sle		break;
185130389Sle
186130389Sle	default:
187190507Slulf		return (GV_ERR_PLEXORG);
188130389Sle	}
189190507Slulf	return (0);
190190507Slulf}
191130389Sle
192190507Slulf/*
193190507Slulf * Prepare a normal plex request.
194190507Slulf */
195190507Slulfstatic int
196190507Slulfgv_plex_normal_request(struct gv_plex *p, struct bio *bp, off_t boff,
197190507Slulf    off_t bcount,  caddr_t addr)
198190507Slulf{
199190507Slulf	struct gv_sd *s;
200190507Slulf	struct bio *cbp;
201190507Slulf	off_t real_len, real_off;
202190507Slulf	int i, err, sdno;
203190507Slulf
204190507Slulf	s = NULL;
205190507Slulf	sdno = -1;
206190507Slulf	real_len = real_off = 0;
207190507Slulf
208190507Slulf	err = ENXIO;
209190507Slulf
210190507Slulf	if (p == NULL || LIST_EMPTY(&p->subdisks))
211190507Slulf		goto bad;
212190507Slulf
213190507Slulf	err = gv_plex_offset(p, boff, bcount, &real_off,
214191856Slulf	    &real_len, &sdno, (bp->bio_pflags & GV_BIO_GROW));
215190507Slulf	/* If the request was blocked, put it into wait. */
216190507Slulf	if (err == GV_ERR_ISBUSY) {
217190507Slulf		bioq_disksort(p->rqueue, bp);
218190507Slulf		return (-1); /* "Fail", and delay request. */
219190507Slulf	}
220190507Slulf	if (err) {
221190507Slulf		err = ENXIO;
222190507Slulf		goto bad;
223190507Slulf	}
224190507Slulf	err = ENXIO;
225190507Slulf
226190507Slulf	/* Find the right subdisk. */
227190507Slulf	i = 0;
228190507Slulf	LIST_FOREACH(s, &p->subdisks, in_plex) {
229190507Slulf		if (i == sdno)
230190507Slulf			break;
231190507Slulf		i++;
232190507Slulf	}
233190507Slulf
234190507Slulf	/* Subdisk not found. */
235190507Slulf	if (s == NULL || s->drive_sc == NULL)
236190507Slulf		goto bad;
237190507Slulf
238130389Sle	/* Now check if we can handle the request on this subdisk. */
239130389Sle	switch (s->state) {
240130389Sle	case GV_SD_UP:
241130389Sle		/* If the subdisk is up, just continue. */
242130389Sle		break;
243190507Slulf	case GV_SD_DOWN:
244191856Slulf		if (bp->bio_pflags & GV_BIO_INTERNAL)
245190507Slulf			G_VINUM_DEBUG(0, "subdisk must be in the stale state in"
246190507Slulf			    " order to perform administrative requests");
247190507Slulf		goto bad;
248130389Sle	case GV_SD_STALE:
249191856Slulf		if (!(bp->bio_pflags & GV_BIO_SYNCREQ)) {
250190507Slulf			G_VINUM_DEBUG(0, "subdisk stale, unable to perform "
251190507Slulf			    "regular requests");
252190507Slulf			goto bad;
253190507Slulf		}
254130389Sle
255184292Slulf		G_VINUM_DEBUG(1, "sd %s is initializing", s->name);
256130389Sle		gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE);
257130389Sle		break;
258130389Sle	case GV_SD_INITIALIZING:
259130389Sle		if (bp->bio_cmd == BIO_READ)
260190507Slulf			goto bad;
261130389Sle		break;
262130389Sle	default:
263130389Sle		/* All other subdisk states mean it's not accessible. */
264190507Slulf		goto bad;
265130389Sle	}
266130389Sle
267130389Sle	/* Clone the bio and adjust the offsets and sizes. */
268130389Sle	cbp = g_clone_bio(bp);
269190507Slulf	if (cbp == NULL) {
270190507Slulf		err = ENOMEM;
271190507Slulf		goto bad;
272190507Slulf	}
273190507Slulf	cbp->bio_offset = real_off + s->drive_offset;
274130389Sle	cbp->bio_length = real_len;
275130389Sle	cbp->bio_data = addr;
276190507Slulf	cbp->bio_done = gv_done;
277190507Slulf	cbp->bio_caller1 = s;
278135426Sle
279190507Slulf	/* Store the sub-requests now and let others issue them. */
280190507Slulf	bioq_insert_tail(p->bqueue, cbp);
281190507Slulf	return (real_len);
282190507Slulfbad:
283190507Slulf	G_VINUM_LOGREQ(0, bp, "plex request failed.");
284190507Slulf	/* Building the sub-request failed. If internal BIO, do not deliver. */
285191856Slulf	if (bp->bio_pflags & GV_BIO_INTERNAL) {
286191856Slulf		if (bp->bio_pflags & GV_BIO_MALLOC)
287190507Slulf			g_free(bp->bio_data);
288190507Slulf		g_destroy_bio(bp);
289190507Slulf		p->flags &= ~(GV_PLEX_SYNCING | GV_PLEX_REBUILDING |
290190507Slulf		    GV_PLEX_GROWING);
291190507Slulf		return (-1);
292135426Sle	}
293190507Slulf	g_io_deliver(bp, err);
294190507Slulf	return (-1);
295130389Sle}
296130389Sle
297190507Slulf/*
298190507Slulf * Handle a completed request to a striped or concatenated plex.
299190507Slulf */
300190507Slulfvoid
301190507Slulfgv_plex_normal_done(struct gv_plex *p, struct bio *bp)
302130389Sle{
303190507Slulf	struct bio *pbp;
304130389Sle
305190507Slulf	pbp = bp->bio_parent;
306190507Slulf	if (pbp->bio_error == 0)
307190507Slulf		pbp->bio_error = bp->bio_error;
308190507Slulf	g_destroy_bio(bp);
309190507Slulf	pbp->bio_inbed++;
310190507Slulf	if (pbp->bio_children == pbp->bio_inbed) {
311190507Slulf		/* Just set it to length since multiple plexes will
312190507Slulf		 * screw things up. */
313190507Slulf		pbp->bio_completed = pbp->bio_length;
314191856Slulf		if (pbp->bio_pflags & GV_BIO_SYNCREQ)
315190507Slulf			gv_sync_complete(p, pbp);
316191856Slulf		else if (pbp->bio_pflags & GV_BIO_GROW)
317190507Slulf			gv_grow_complete(p, pbp);
318190507Slulf		else
319190507Slulf			g_io_deliver(pbp, pbp->bio_error);
320135426Sle	}
321135426Sle}
322135426Sle
323190507Slulf/*
324190507Slulf * Handle a completed request to a RAID-5 plex.
325190507Slulf */
326190507Slulfvoid
327190507Slulfgv_plex_raid5_done(struct gv_plex *p, struct bio *bp)
328135426Sle{
329190507Slulf	struct gv_softc *sc;
330190507Slulf	struct bio *cbp, *pbp;
331190507Slulf	struct gv_bioq *bq, *bq2;
332190507Slulf	struct gv_raid5_packet *wp;
333190507Slulf	off_t completed;
334190507Slulf	int i;
335135426Sle
336190507Slulf	completed = 0;
337190507Slulf	sc = p->vinumconf;
338190507Slulf	wp = bp->bio_caller2;
339135426Sle
340190507Slulf	switch (bp->bio_parent->bio_cmd) {
341190507Slulf	case BIO_READ:
342190507Slulf		if (wp == NULL) {
343190507Slulf			completed = bp->bio_completed;
344135426Sle			break;
345190507Slulf		}
346135426Sle
347190507Slulf		TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
348190507Slulf			if (bq->bp != bp)
349190507Slulf				continue;
350190507Slulf			TAILQ_REMOVE(&wp->bits, bq, queue);
351190507Slulf			g_free(bq);
352190507Slulf			for (i = 0; i < wp->length; i++)
353190507Slulf				wp->data[i] ^= bp->bio_data[i];
354190507Slulf			break;
355135426Sle		}
356190507Slulf		if (TAILQ_EMPTY(&wp->bits)) {
357190507Slulf			completed = wp->length;
358190507Slulf			if (wp->lockbase != -1) {
359190507Slulf				TAILQ_REMOVE(&p->packets, wp, list);
360190507Slulf				/* Bring the waiting bios back into the game. */
361190507Slulf				pbp = bioq_takefirst(p->wqueue);
362190507Slulf				while (pbp != NULL) {
363191849Slulf					gv_post_bio(sc, pbp);
364190507Slulf					pbp = bioq_takefirst(p->wqueue);
365135426Sle				}
366135966Sle			}
367190507Slulf			g_free(wp);
368190507Slulf		}
369135966Sle
370190507Slulf		break;
371130389Sle
372190507Slulf 	case BIO_WRITE:
373190507Slulf		/* XXX can this ever happen? */
374190507Slulf		if (wp == NULL) {
375190507Slulf			completed = bp->bio_completed;
376190507Slulf			break;
377190507Slulf		}
378135426Sle
379190507Slulf		/* Check if we need to handle parity data. */
380190507Slulf		TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
381190507Slulf			if (bq->bp != bp)
382190507Slulf				continue;
383190507Slulf			TAILQ_REMOVE(&wp->bits, bq, queue);
384190507Slulf			g_free(bq);
385190507Slulf			cbp = wp->parity;
386190507Slulf			if (cbp != NULL) {
387190507Slulf				for (i = 0; i < wp->length; i++)
388190507Slulf					cbp->bio_data[i] ^= bp->bio_data[i];
389190507Slulf			}
390190507Slulf			break;
391190507Slulf		}
392135426Sle
393190507Slulf		/* Handle parity data. */
394190507Slulf		if (TAILQ_EMPTY(&wp->bits)) {
395191856Slulf			if (bp->bio_parent->bio_pflags & GV_BIO_CHECK)
396190507Slulf				i = gv_check_parity(p, bp, wp);
397190507Slulf			else
398190507Slulf				i = gv_normal_parity(p, bp, wp);
399135426Sle
400190507Slulf			/* All of our sub-requests have finished. */
401190507Slulf			if (i) {
402190507Slulf				completed = wp->length;
403190507Slulf				TAILQ_REMOVE(&p->packets, wp, list);
404190507Slulf				/* Bring the waiting bios back into the game. */
405190507Slulf				pbp = bioq_takefirst(p->wqueue);
406190507Slulf				while (pbp != NULL) {
407191849Slulf					gv_post_bio(sc, pbp);
408190507Slulf					pbp = bioq_takefirst(p->wqueue);
409190507Slulf				}
410190507Slulf				g_free(wp);
411190507Slulf			}
412190507Slulf		}
413138110Sle
414190507Slulf		break;
415190507Slulf	}
416138110Sle
417190507Slulf	pbp = bp->bio_parent;
418190507Slulf	if (pbp->bio_error == 0)
419190507Slulf		pbp->bio_error = bp->bio_error;
420190507Slulf	pbp->bio_completed += completed;
421138110Sle
422190507Slulf	/* When the original request is finished, we deliver it. */
423190507Slulf	pbp->bio_inbed++;
424190507Slulf	if (pbp->bio_inbed == pbp->bio_children) {
425190507Slulf		/* Hand it over for checking or delivery. */
426190507Slulf		if (pbp->bio_cmd == BIO_WRITE &&
427191856Slulf		    (pbp->bio_pflags & GV_BIO_CHECK)) {
428190507Slulf			gv_parity_complete(p, pbp);
429190507Slulf		} else if (pbp->bio_cmd == BIO_WRITE &&
430191856Slulf		    (pbp->bio_pflags & GV_BIO_REBUILD)) {
431190507Slulf			gv_rebuild_complete(p, pbp);
432191856Slulf		} else if (pbp->bio_pflags & GV_BIO_INIT) {
433190507Slulf			gv_init_complete(p, pbp);
434191856Slulf		} else if (pbp->bio_pflags & GV_BIO_SYNCREQ) {
435190507Slulf			gv_sync_complete(p, pbp);
436191856Slulf		} else if (pbp->bio_pflags & GV_BIO_GROW) {
437190507Slulf			gv_grow_complete(p, pbp);
438190507Slulf		} else {
439190507Slulf			g_io_deliver(pbp, pbp->bio_error);
440190507Slulf		}
441138110Sle	}
442138110Sle
443190507Slulf	/* Clean up what we allocated. */
444190507Slulf	if (bp->bio_cflags & GV_BIO_MALLOC)
445190507Slulf		g_free(bp->bio_data);
446190507Slulf	g_destroy_bio(bp);
447138110Sle}
448138110Sle
449138110Slestatic int
450138110Slegv_check_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
451138110Sle{
452154075Sle	struct bio *pbp;
453190507Slulf	struct gv_sd *s;
454138110Sle	int err, finished, i;
455138110Sle
456138110Sle	err = 0;
457138110Sle	finished = 1;
458138110Sle
459138110Sle	if (wp->waiting != NULL) {
460138110Sle		pbp = wp->waiting;
461138110Sle		wp->waiting = NULL;
462190507Slulf		s = pbp->bio_caller1;
463190507Slulf		g_io_request(pbp, s->drive_sc->consumer);
464138110Sle		finished = 0;
465138110Sle
466138110Sle	} else if (wp->parity != NULL) {
467154075Sle		pbp = wp->parity;
468138110Sle		wp->parity = NULL;
469138110Sle
470138110Sle		/* Check if the parity is correct. */
471138110Sle		for (i = 0; i < wp->length; i++) {
472154075Sle			if (bp->bio_data[i] != pbp->bio_data[i]) {
473138110Sle				err = 1;
474138110Sle				break;
475138110Sle			}
476138110Sle		}
477138110Sle
478138110Sle		/* The parity is not correct... */
479138110Sle		if (err) {
480138110Sle			bp->bio_parent->bio_error = EAGAIN;
481138110Sle
482138110Sle			/* ... but we rebuild it. */
483191856Slulf			if (bp->bio_parent->bio_pflags & GV_BIO_PARITY) {
484190507Slulf				s = pbp->bio_caller1;
485190507Slulf				g_io_request(pbp, s->drive_sc->consumer);
486138110Sle				finished = 0;
487138110Sle			}
488138110Sle		}
489138110Sle
490138110Sle		/*
491138110Sle		 * Clean up the BIO we would have used for rebuilding the
492138110Sle		 * parity.
493138110Sle		 */
494138110Sle		if (finished) {
495138110Sle			bp->bio_parent->bio_inbed++;
496154075Sle			g_destroy_bio(pbp);
497138110Sle		}
498138110Sle
499138110Sle	}
500138110Sle
501138110Sle	return (finished);
502138110Sle}
503138110Sle
504190507Slulfstatic int
505190507Slulfgv_normal_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
506135426Sle{
507135426Sle	struct bio *cbp, *pbp;
508190507Slulf	struct gv_sd *s;
509190507Slulf	int finished, i;
510135426Sle
511190507Slulf	finished = 1;
512135426Sle
513190507Slulf	if (wp->waiting != NULL) {
514190507Slulf		pbp = wp->waiting;
515190507Slulf		wp->waiting = NULL;
516190507Slulf		cbp = wp->parity;
517190507Slulf		for (i = 0; i < wp->length; i++)
518190507Slulf			cbp->bio_data[i] ^= pbp->bio_data[i];
519190507Slulf		s = pbp->bio_caller1;
520190507Slulf		g_io_request(pbp, s->drive_sc->consumer);
521190507Slulf		finished = 0;
522135426Sle
523190507Slulf	} else if (wp->parity != NULL) {
524190507Slulf		cbp = wp->parity;
525190507Slulf		wp->parity = NULL;
526190507Slulf		s = cbp->bio_caller1;
527190507Slulf		g_io_request(cbp, s->drive_sc->consumer);
528190507Slulf		finished = 0;
529190507Slulf	}
530130389Sle
531190507Slulf	return (finished);
532190507Slulf}
533135426Sle
534190507Slulf/* Flush the queue with delayed requests. */
535190507Slulfstatic void
536190507Slulfgv_plex_flush(struct gv_plex *p)
537190507Slulf{
538190507Slulf	struct gv_softc *sc;
539190507Slulf	struct bio *bp;
540135426Sle
541190507Slulf	sc = p->vinumconf;
542190507Slulf	bp = bioq_takefirst(p->rqueue);
543190507Slulf	while (bp != NULL) {
544190507Slulf		gv_plex_start(p, bp);
545190507Slulf		bp = bioq_takefirst(p->rqueue);
546190507Slulf	}
547190507Slulf}
548135426Sle
549191856Slulfstatic void
550191856Slulfgv_post_bio(struct gv_softc *sc, struct bio *bp)
551191856Slulf{
552191856Slulf
553191856Slulf	KASSERT(sc != NULL, ("NULL sc"));
554191856Slulf	KASSERT(bp != NULL, ("NULL bp"));
555191856Slulf	mtx_lock(&sc->bqueue_mtx);
556191856Slulf	bioq_disksort(sc->bqueue_down, bp);
557191856Slulf	wakeup(sc);
558191856Slulf	mtx_unlock(&sc->bqueue_mtx);
559191856Slulf}
560191856Slulf
561190507Slulfint
562190507Slulfgv_sync_request(struct gv_plex *from, struct gv_plex *to, off_t offset,
563190507Slulf    off_t length, int type, caddr_t data)
564190507Slulf{
565190507Slulf	struct gv_softc *sc;
566190507Slulf	struct bio *bp;
567138110Sle
568190507Slulf	KASSERT(from != NULL, ("NULL from"));
569190507Slulf	KASSERT(to != NULL, ("NULL to"));
570190507Slulf	sc = from->vinumconf;
571190507Slulf	KASSERT(sc != NULL, ("NULL sc"));
572130389Sle
573190507Slulf	bp = g_new_bio();
574190507Slulf	if (bp == NULL) {
575190507Slulf		G_VINUM_DEBUG(0, "sync from '%s' failed at offset "
576190507Slulf		    " %jd; out of memory", from->name, offset);
577190507Slulf		return (ENOMEM);
578135426Sle	}
579190507Slulf	bp->bio_length = length;
580190507Slulf	bp->bio_done = gv_done;
581191856Slulf	bp->bio_pflags |= GV_BIO_SYNCREQ;
582190507Slulf	bp->bio_offset = offset;
583190507Slulf	bp->bio_caller1 = from;
584190507Slulf	bp->bio_caller2 = to;
585190507Slulf	bp->bio_cmd = type;
586190507Slulf	if (data == NULL)
587190507Slulf		data = g_malloc(length, M_WAITOK);
588191856Slulf	bp->bio_pflags |= GV_BIO_MALLOC; /* Free on the next run. */
589190507Slulf	bp->bio_data = data;
590135426Sle
591190507Slulf	/* Send down next. */
592191849Slulf	gv_post_bio(sc, bp);
593190507Slulf	//gv_plex_start(from, bp);
594190507Slulf	return (0);
595190507Slulf}
596135426Sle
597190507Slulf/*
598190507Slulf * Handle a finished plex sync bio.
599190507Slulf */
600190507Slulfint
601190507Slulfgv_sync_complete(struct gv_plex *to, struct bio *bp)
602190507Slulf{
603190507Slulf	struct gv_plex *from, *p;
604190507Slulf	struct gv_sd *s;
605190507Slulf	struct gv_volume *v;
606190507Slulf	struct gv_softc *sc;
607190507Slulf	off_t offset;
608190507Slulf	int err;
609135426Sle
610190507Slulf	g_topology_assert_not();
611190507Slulf
612190507Slulf	err = 0;
613190507Slulf	KASSERT(to != NULL, ("NULL to"));
614190507Slulf	KASSERT(bp != NULL, ("NULL bp"));
615190507Slulf	from = bp->bio_caller2;
616190507Slulf	KASSERT(from != NULL, ("NULL from"));
617190507Slulf	v = to->vol_sc;
618190507Slulf	KASSERT(v != NULL, ("NULL v"));
619190507Slulf	sc = v->vinumconf;
620190507Slulf	KASSERT(sc != NULL, ("NULL sc"));
621190507Slulf
622190507Slulf	/* If it was a read, write it. */
623190507Slulf	if (bp->bio_cmd == BIO_READ) {
624190507Slulf		err = gv_sync_request(from, to, bp->bio_offset, bp->bio_length,
625190507Slulf	    	    BIO_WRITE, bp->bio_data);
626190507Slulf	/* If it was a write, read the next one. */
627190507Slulf	} else if (bp->bio_cmd == BIO_WRITE) {
628191856Slulf		if (bp->bio_pflags & GV_BIO_MALLOC)
629190507Slulf			g_free(bp->bio_data);
630190507Slulf		to->synced += bp->bio_length;
631190507Slulf		/* If we're finished, clean up. */
632190507Slulf		if (bp->bio_offset + bp->bio_length >= from->size) {
633190507Slulf			G_VINUM_DEBUG(1, "syncing of %s from %s completed",
634190507Slulf			    to->name, from->name);
635190507Slulf			/* Update our state. */
636190507Slulf			LIST_FOREACH(s, &to->subdisks, in_plex)
637190507Slulf				gv_set_sd_state(s, GV_SD_UP, 0);
638190507Slulf			gv_update_plex_state(to);
639190507Slulf			to->flags &= ~GV_PLEX_SYNCING;
640190507Slulf			to->synced = 0;
641190507Slulf			gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
642190507Slulf		} else {
643190507Slulf			offset = bp->bio_offset + bp->bio_length;
644190507Slulf			err = gv_sync_request(from, to, offset,
645190507Slulf			    MIN(bp->bio_length, from->size - offset),
646190507Slulf			    BIO_READ, NULL);
647190507Slulf		}
648190507Slulf	}
649135426Sle	g_destroy_bio(bp);
650190507Slulf	/* Clean up if there was an error. */
651190507Slulf	if (err) {
652190507Slulf		to->flags &= ~GV_PLEX_SYNCING;
653190507Slulf		G_VINUM_DEBUG(0, "error syncing plexes: error code %d", err);
654190507Slulf	}
655190507Slulf
656190507Slulf	/* Check if all plexes are synced, and lower refcounts. */
657190507Slulf	g_topology_lock();
658190507Slulf	LIST_FOREACH(p, &v->plexes, in_volume) {
659190507Slulf		if (p->flags & GV_PLEX_SYNCING) {
660190507Slulf			g_topology_unlock();
661190507Slulf			return (-1);
662190507Slulf		}
663190507Slulf	}
664190507Slulf	/* If we came here, all plexes are synced, and we're free. */
665190507Slulf	gv_access(v->provider, -1, -1, 0);
666190507Slulf	g_topology_unlock();
667190507Slulf	G_VINUM_DEBUG(1, "plex sync completed");
668190507Slulf	gv_volume_flush(v);
669190507Slulf	return (0);
670135426Sle}
671135426Sle
672190507Slulf/*
673190507Slulf * Create a new bio struct for the next grow request.
674190507Slulf */
675190507Slulfint
676190507Slulfgv_grow_request(struct gv_plex *p, off_t offset, off_t length, int type,
677190507Slulf    caddr_t data)
678135426Sle{
679190507Slulf	struct gv_softc *sc;
680190507Slulf	struct bio *bp;
681135426Sle
682190507Slulf	KASSERT(p != NULL, ("gv_grow_request: NULL p"));
683190507Slulf	sc = p->vinumconf;
684190507Slulf	KASSERT(sc != NULL, ("gv_grow_request: NULL sc"));
685135426Sle
686190507Slulf	bp = g_new_bio();
687190507Slulf	if (bp == NULL) {
688190507Slulf		G_VINUM_DEBUG(0, "grow of %s failed creating bio: "
689190507Slulf		    "out of memory", p->name);
690190507Slulf		return (ENOMEM);
691190507Slulf	}
692135426Sle
693190507Slulf	bp->bio_cmd = type;
694190507Slulf	bp->bio_done = gv_done;
695190507Slulf	bp->bio_error = 0;
696190507Slulf	bp->bio_caller1 = p;
697190507Slulf	bp->bio_offset = offset;
698190507Slulf	bp->bio_length = length;
699191856Slulf	bp->bio_pflags |= GV_BIO_GROW;
700190507Slulf	if (data == NULL)
701190507Slulf		data = g_malloc(length, M_WAITOK);
702191856Slulf	bp->bio_pflags |= GV_BIO_MALLOC;
703190507Slulf	bp->bio_data = data;
704135426Sle
705191849Slulf	gv_post_bio(sc, bp);
706190507Slulf	//gv_plex_start(p, bp);
707190507Slulf	return (0);
708190507Slulf}
709135426Sle
710190507Slulf/*
711190507Slulf * Finish handling of a bio to a growing plex.
712190507Slulf */
713190507Slulfvoid
714190507Slulfgv_grow_complete(struct gv_plex *p, struct bio *bp)
715190507Slulf{
716190507Slulf	struct gv_softc *sc;
717190507Slulf	struct gv_sd *s;
718190507Slulf	struct gv_volume *v;
719190507Slulf	off_t origsize, offset;
720190507Slulf	int sdcount, err;
721135426Sle
722190507Slulf	v = p->vol_sc;
723190507Slulf	KASSERT(v != NULL, ("gv_grow_complete: NULL v"));
724190507Slulf	sc = v->vinumconf;
725190507Slulf	KASSERT(sc != NULL, ("gv_grow_complete: NULL sc"));
726190507Slulf	err = 0;
727135426Sle
728190507Slulf	/* If it was a read, write it. */
729190507Slulf	if (bp->bio_cmd == BIO_READ) {
730190507Slulf		p->synced += bp->bio_length;
731190507Slulf		err = gv_grow_request(p, bp->bio_offset, bp->bio_length,
732190507Slulf		    BIO_WRITE, bp->bio_data);
733190507Slulf	/* If it was a write, read next. */
734190507Slulf	} else if (bp->bio_cmd == BIO_WRITE) {
735191856Slulf		if (bp->bio_pflags & GV_BIO_MALLOC)
736190507Slulf			g_free(bp->bio_data);
737135426Sle
738190507Slulf		/* Find the real size of the plex. */
739190507Slulf		sdcount = gv_sdcount(p, 1);
740190507Slulf		s = LIST_FIRST(&p->subdisks);
741190507Slulf		KASSERT(s != NULL, ("NULL s"));
742190507Slulf		origsize = (s->size * (sdcount - 1));
743190507Slulf		if (bp->bio_offset + bp->bio_length >= origsize) {
744190507Slulf			G_VINUM_DEBUG(1, "growing of %s completed", p->name);
745190507Slulf			p->flags &= ~GV_PLEX_GROWING;
746190507Slulf			LIST_FOREACH(s, &p->subdisks, in_plex) {
747190507Slulf				s->flags &= ~GV_SD_GROW;
748190507Slulf				gv_set_sd_state(s, GV_SD_UP, 0);
749190507Slulf			}
750190507Slulf			p->size = gv_plex_size(p);
751190507Slulf			gv_update_vol_size(v, gv_vol_size(v));
752190507Slulf			gv_set_plex_state(p, GV_PLEX_UP, 0);
753190507Slulf			g_topology_lock();
754190507Slulf			gv_access(v->provider, -1, -1, 0);
755190507Slulf			g_topology_unlock();
756190507Slulf			p->synced = 0;
757190507Slulf			gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
758190507Slulf			/* Issue delayed requests. */
759190507Slulf			gv_plex_flush(p);
760135426Sle		} else {
761190507Slulf			offset = bp->bio_offset + bp->bio_length;
762190507Slulf			err = gv_grow_request(p, offset,
763190507Slulf			   MIN(bp->bio_length, origsize - offset),
764190507Slulf			   BIO_READ, NULL);
765130389Sle		}
766135426Sle	}
767190507Slulf	g_destroy_bio(bp);
768130389Sle
769190507Slulf	if (err) {
770190507Slulf		p->flags &= ~GV_PLEX_GROWING;
771190507Slulf		G_VINUM_DEBUG(0, "error growing plex: error code %d", err);
772130389Sle	}
773130389Sle}
774130389Sle
775190507Slulf
776190507Slulf/*
777190507Slulf * Create an initialization BIO and send it off to the consumer. Assume that
778190507Slulf * we're given initialization data as parameter.
779190507Slulf */
780190507Slulfvoid
781190507Slulfgv_init_request(struct gv_sd *s, off_t start, caddr_t data, off_t length)
782130389Sle{
783190507Slulf	struct gv_drive *d;
784190507Slulf	struct g_consumer *cp;
785190507Slulf	struct bio *bp, *cbp;
786130389Sle
787190507Slulf	KASSERT(s != NULL, ("gv_init_request: NULL s"));
788190507Slulf	d = s->drive_sc;
789190507Slulf	KASSERT(d != NULL, ("gv_init_request: NULL d"));
790190507Slulf	cp = d->consumer;
791190507Slulf	KASSERT(cp != NULL, ("gv_init_request: NULL cp"));
792130389Sle
793190507Slulf	bp = g_new_bio();
794190507Slulf	if (bp == NULL) {
795190507Slulf		G_VINUM_DEBUG(0, "subdisk '%s' init: write failed at offset %jd"
796190507Slulf		    " (drive offset %jd); out of memory", s->name,
797190507Slulf		    (intmax_t)s->initialized, (intmax_t)start);
798190507Slulf		return; /* XXX: Error codes. */
799186517Slulf	}
800190507Slulf	bp->bio_cmd = BIO_WRITE;
801190507Slulf	bp->bio_data = data;
802190507Slulf	bp->bio_done = gv_done;
803190507Slulf	bp->bio_error = 0;
804190507Slulf	bp->bio_length = length;
805191856Slulf	bp->bio_pflags |= GV_BIO_INIT;
806190507Slulf	bp->bio_offset = start;
807190507Slulf	bp->bio_caller1 = s;
808186517Slulf
809190507Slulf	/* Then ofcourse, we have to clone it. */
810190507Slulf	cbp = g_clone_bio(bp);
811190507Slulf	if (cbp == NULL) {
812190507Slulf		G_VINUM_DEBUG(0, "subdisk '%s' init: write failed at offset %jd"
813190507Slulf		    " (drive offset %jd); out of memory", s->name,
814190507Slulf		    (intmax_t)s->initialized, (intmax_t)start);
815190507Slulf		return; /* XXX: Error codes. */
816130389Sle	}
817190507Slulf	cbp->bio_done = gv_done;
818190507Slulf	cbp->bio_caller1 = s;
819190507Slulf	/* Send it off to the consumer. */
820190507Slulf	g_io_request(cbp, cp);
821130389Sle}
822130389Sle
823190507Slulf/*
824190507Slulf * Handle a finished initialization BIO.
825190507Slulf */
826190507Slulfvoid
827190507Slulfgv_init_complete(struct gv_plex *p, struct bio *bp)
828130389Sle{
829190507Slulf	struct gv_softc *sc;
830190507Slulf	struct gv_drive *d;
831190507Slulf	struct g_consumer *cp;
832130389Sle	struct gv_sd *s;
833190507Slulf	off_t start, length;
834190507Slulf	caddr_t data;
835132906Sle	int error;
836130389Sle
837190507Slulf	s = bp->bio_caller1;
838190507Slulf	start = bp->bio_offset;
839190507Slulf	length = bp->bio_length;
840190507Slulf	error = bp->bio_error;
841190507Slulf	data = bp->bio_data;
842130389Sle
843190507Slulf	KASSERT(s != NULL, ("gv_init_complete: NULL s"));
844190507Slulf	d = s->drive_sc;
845190507Slulf	KASSERT(d != NULL, ("gv_init_complete: NULL d"));
846190507Slulf	cp = d->consumer;
847190507Slulf	KASSERT(cp != NULL, ("gv_init_complete: NULL cp"));
848190507Slulf	sc = p->vinumconf;
849190507Slulf	KASSERT(sc != NULL, ("gv_init_complete: NULL sc"));
850130389Sle
851190507Slulf	g_destroy_bio(bp);
852130389Sle
853190507Slulf	/*
854190507Slulf	 * First we need to find out if it was okay, and abort if it's not.
855190507Slulf	 * Then we need to free previous buffers, find out the correct subdisk,
856190507Slulf	 * as well as getting the correct starting point and length of the BIO.
857190507Slulf	 */
858190507Slulf	if (start >= s->drive_offset + s->size) {
859190507Slulf		/* Free the data we initialized. */
860190507Slulf		if (data != NULL)
861190507Slulf			g_free(data);
862190507Slulf		g_topology_assert_not();
863190507Slulf		g_topology_lock();
864190507Slulf		g_access(cp, 0, -1, 0);
865190507Slulf		g_topology_unlock();
866190507Slulf		if (error) {
867190507Slulf			gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE |
868190507Slulf			    GV_SETSTATE_CONFIG);
869190507Slulf		} else {
870190507Slulf			gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_CONFIG);
871190507Slulf			s->initialized = 0;
872190507Slulf			gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
873190507Slulf			G_VINUM_DEBUG(1, "subdisk '%s' init: finished "
874190507Slulf			    "successfully", s->name);
875190507Slulf		}
876190507Slulf		return;
877190507Slulf	}
878190507Slulf	s->initialized += length;
879190507Slulf	start += length;
880190507Slulf	gv_init_request(s, start, data, length);
881190507Slulf}
882130389Sle
883190507Slulf/*
884190507Slulf * Create a new bio struct for the next parity rebuild. Used both by internal
885190507Slulf * rebuild of degraded plexes as well as user initiated rebuilds/checks.
886190507Slulf */
887190507Slulfvoid
888190507Slulfgv_parity_request(struct gv_plex *p, int flags, off_t offset)
889190507Slulf{
890190507Slulf	struct gv_softc *sc;
891190507Slulf	struct bio *bp;
892190507Slulf
893190507Slulf	KASSERT(p != NULL, ("gv_parity_request: NULL p"));
894190507Slulf	sc = p->vinumconf;
895190507Slulf	KASSERT(sc != NULL, ("gv_parity_request: NULL sc"));
896190507Slulf
897190507Slulf	bp = g_new_bio();
898190507Slulf	if (bp == NULL) {
899190507Slulf		G_VINUM_DEBUG(0, "rebuild of %s failed creating bio: "
900190507Slulf		    "out of memory", p->name);
901190507Slulf		return;
902149140Sle	}
903130389Sle
904190507Slulf	bp->bio_cmd = BIO_WRITE;
905190507Slulf	bp->bio_done = gv_done;
906190507Slulf	bp->bio_error = 0;
907190507Slulf	bp->bio_length = p->stripesize;
908190507Slulf	bp->bio_caller1 = p;
909190507Slulf
910130389Sle	/*
911190507Slulf	 * Check if it's a rebuild of a degraded plex or a user request of
912190507Slulf	 * parity rebuild.
913130389Sle	 */
914190507Slulf	if (flags & GV_BIO_REBUILD)
915190507Slulf		bp->bio_data = g_malloc(GV_DFLT_SYNCSIZE, M_WAITOK);
916190507Slulf	else if (flags & GV_BIO_CHECK)
917190507Slulf		bp->bio_data = g_malloc(p->stripesize, M_WAITOK | M_ZERO);
918190507Slulf	else {
919190507Slulf		G_VINUM_DEBUG(0, "invalid flags given in rebuild");
920190507Slulf		return;
921190507Slulf	}
922130389Sle
923191856Slulf	bp->bio_pflags = flags;
924191856Slulf	bp->bio_pflags |= GV_BIO_MALLOC;
925130389Sle
926190507Slulf	/* We still have more parity to build. */
927190507Slulf	bp->bio_offset = offset;
928191849Slulf	gv_post_bio(sc, bp);
929190507Slulf	//gv_plex_start(p, bp); /* Send it down to the plex. */
930190507Slulf}
931130389Sle
932190507Slulf/*
933190507Slulf * Handle a finished parity write.
934190507Slulf */
935190507Slulfvoid
936190507Slulfgv_parity_complete(struct gv_plex *p, struct bio *bp)
937190507Slulf{
938190507Slulf	struct gv_softc *sc;
939190507Slulf	int error, flags;
940130389Sle
941190507Slulf	error = bp->bio_error;
942191856Slulf	flags = bp->bio_pflags;
943190507Slulf	flags &= ~GV_BIO_MALLOC;
944132940Sle
945190507Slulf	sc = p->vinumconf;
946190507Slulf	KASSERT(sc != NULL, ("gv_parity_complete: NULL sc"));
947140475Sle
948190507Slulf	/* Clean up what we allocated. */
949191856Slulf	if (bp->bio_pflags & GV_BIO_MALLOC)
950190507Slulf		g_free(bp->bio_data);
951190507Slulf	g_destroy_bio(bp);
952130389Sle
953190507Slulf	if (error == EAGAIN) {
954190507Slulf		G_VINUM_DEBUG(0, "parity incorrect at offset 0x%jx",
955190507Slulf		    (intmax_t)p->synced);
956190507Slulf	}
957190507Slulf
958190507Slulf	/* Any error is fatal, except EAGAIN when we're rebuilding. */
959190507Slulf	if (error && !(error == EAGAIN && (flags & GV_BIO_PARITY))) {
960190507Slulf		/* Make sure we don't have the lock. */
961190507Slulf		g_topology_assert_not();
962190507Slulf		g_topology_lock();
963190507Slulf		gv_access(p->vol_sc->provider, -1, -1, 0);
964190507Slulf		g_topology_unlock();
965190507Slulf		G_VINUM_DEBUG(0, "parity check on %s failed at 0x%jx "
966190507Slulf		    "errno %d", p->name, (intmax_t)p->synced, error);
967190507Slulf		return;
968130389Sle	} else {
969190507Slulf		p->synced += p->stripesize;
970190507Slulf	}
971130389Sle
972190507Slulf	if (p->synced >= p->size) {
973190507Slulf		/* Make sure we don't have the lock. */
974190507Slulf		g_topology_assert_not();
975190507Slulf		g_topology_lock();
976190507Slulf		gv_access(p->vol_sc->provider, -1, -1, 0);
977190507Slulf		g_topology_unlock();
978190507Slulf		/* We're finished. */
979190507Slulf		G_VINUM_DEBUG(1, "parity operation on %s finished", p->name);
980190507Slulf		p->synced = 0;
981190507Slulf		gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
982190507Slulf		return;
983190507Slulf	}
984130389Sle
985190507Slulf	/* Send down next. It will determine if we need to itself. */
986190507Slulf	gv_parity_request(p, flags, p->synced);
987130389Sle}
988130389Sle
989190507Slulf/*
990190507Slulf * Handle a finished plex rebuild bio.
991190507Slulf */
992190507Slulfvoid
993190507Slulfgv_rebuild_complete(struct gv_plex *p, struct bio *bp)
994130389Sle{
995190507Slulf	struct gv_softc *sc;
996190507Slulf	struct gv_sd *s;
997190507Slulf	int error, flags;
998190507Slulf	off_t offset;
999130389Sle
1000190507Slulf	error = bp->bio_error;
1001191856Slulf	flags = bp->bio_pflags;
1002190507Slulf	offset = bp->bio_offset;
1003190507Slulf	flags &= ~GV_BIO_MALLOC;
1004190507Slulf	sc = p->vinumconf;
1005190507Slulf	KASSERT(sc != NULL, ("gv_rebuild_complete: NULL sc"));
1006130389Sle
1007190507Slulf	/* Clean up what we allocated. */
1008191856Slulf	if (bp->bio_pflags & GV_BIO_MALLOC)
1009190507Slulf		g_free(bp->bio_data);
1010190507Slulf	g_destroy_bio(bp);
1011130389Sle
1012190507Slulf	if (error) {
1013190507Slulf		g_topology_assert_not();
1014190507Slulf		g_topology_lock();
1015190507Slulf		gv_access(p->vol_sc->provider, -1, -1, 0);
1016190507Slulf		g_topology_unlock();
1017190507Slulf
1018190507Slulf		G_VINUM_DEBUG(0, "rebuild of %s failed at offset %jd errno: %d",
1019190507Slulf		    p->name, (intmax_t)offset, error);
1020190507Slulf		p->flags &= ~GV_PLEX_REBUILDING;
1021190507Slulf		p->synced = 0;
1022190507Slulf		gv_plex_flush(p); /* Flush out remaining rebuild BIOs. */
1023190507Slulf		return;
1024190507Slulf	}
1025130389Sle
1026190507Slulf	offset += (p->stripesize * (gv_sdcount(p, 1) - 1));
1027190507Slulf	if (offset >= p->size) {
1028190507Slulf		/* We're finished. */
1029190507Slulf		g_topology_assert_not();
1030190507Slulf		g_topology_lock();
1031190507Slulf		gv_access(p->vol_sc->provider, -1, -1, 0);
1032190507Slulf		g_topology_unlock();
1033190507Slulf
1034190507Slulf		G_VINUM_DEBUG(1, "rebuild of %s finished", p->name);
1035190507Slulf		gv_save_config(p->vinumconf);
1036190507Slulf		p->flags &= ~GV_PLEX_REBUILDING;
1037190507Slulf		p->synced = 0;
1038190507Slulf		/* Try to up all subdisks. */
1039190507Slulf		LIST_FOREACH(s, &p->subdisks, in_plex)
1040190507Slulf			gv_update_sd_state(s);
1041190507Slulf		gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
1042190507Slulf		gv_plex_flush(p); /* Flush out remaining rebuild BIOs. */
1043190507Slulf		return;
1044190507Slulf	}
1045190507Slulf
1046190507Slulf	/* Send down next. It will determine if we need to itself. */
1047190507Slulf	gv_parity_request(p, flags, offset);
1048130389Sle}
1049