Deleted Added
full compact
geom_vinum_plex.c (186517) geom_vinum_plex.c (190507)
1/*-
1/*-
2 * Copyright (c) 2004 Lukas Ertl
2 * Copyright (c) 2004, 2007 Lukas Ertl
3 * Copyright (c) 2007, 2009 Ulf Lilleengen
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright

--- 9 unchanged lines hidden (view full) ---

20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright

--- 9 unchanged lines hidden (view full) ---

21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/geom/vinum/geom_vinum_plex.c 186517 2008-12-27 14:32:39Z lulf $");
29__FBSDID("$FreeBSD: head/sys/geom/vinum/geom_vinum_plex.c 190507 2009-03-28 17:20:08Z lulf $");
29
30#include <sys/param.h>
31#include <sys/bio.h>
30
31#include <sys/param.h>
32#include <sys/bio.h>
32#include <sys/kernel.h>
33#include <sys/kthread.h>
34#include <sys/libkern.h>
35#include <sys/lock.h>
36#include <sys/malloc.h>
33#include <sys/lock.h>
34#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/mutex.h>
39#include <sys/systm.h>
40
41#include <geom/geom.h>
42#include <geom/vinum/geom_vinum_var.h>
43#include <geom/vinum/geom_vinum_raid5.h>
44#include <geom/vinum/geom_vinum.h>
45
35#include <sys/systm.h>
36
37#include <geom/geom.h>
38#include <geom/vinum/geom_vinum_var.h>
39#include <geom/vinum/geom_vinum_raid5.h>
40#include <geom/vinum/geom_vinum.h>
41
46static void gv_plex_completed_request(struct gv_plex *, struct bio *);
47static void gv_plex_normal_request(struct gv_plex *, struct bio *);
48static void gv_plex_worker(void *);
49static int gv_check_parity(struct gv_plex *, struct bio *,
50 struct gv_raid5_packet *);
51static int gv_normal_parity(struct gv_plex *, struct bio *,
52 struct gv_raid5_packet *);
53
54/* XXX: is this the place to catch dying subdisks? */
55static void
56gv_plex_orphan(struct g_consumer *cp)
42static int gv_check_parity(struct gv_plex *, struct bio *,
43 struct gv_raid5_packet *);
44static int gv_normal_parity(struct gv_plex *, struct bio *,
45 struct gv_raid5_packet *);
46static void gv_plex_flush(struct gv_plex *);
47static int gv_plex_offset(struct gv_plex *, off_t, off_t, off_t *, off_t *,
48 int *, int);
49static int gv_plex_normal_request(struct gv_plex *, struct bio *, off_t,
50 off_t, caddr_t);
51void
52gv_plex_start(struct gv_plex *p, struct bio *bp)
57{
53{
58 struct g_geom *gp;
59 struct gv_plex *p;
60 int error;
54 struct bio *cbp;
55 struct gv_sd *s;
56 struct gv_raid5_packet *wp;
57 caddr_t addr;
58 off_t bcount, boff, len;
61
59
62 g_topology_assert();
63 gp = cp->geom;
64 g_trace(G_T_TOPOLOGY, "gv_plex_orphan(%s)", gp->name);
60 bcount = bp->bio_length;
61 addr = bp->bio_data;
62 boff = bp->bio_offset;
65
63
66 if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0)
67 g_access(cp, -cp->acr, -cp->acw, -cp->ace);
68 error = cp->provider->error;
69 if (error == 0)
70 error = ENXIO;
71 g_detach(cp);
72 g_destroy_consumer(cp);
73 if (!LIST_EMPTY(&gp->consumer))
74 return;
64 /* Walk over the whole length of the request, we might split it up. */
65 while (bcount > 0) {
66 wp = NULL;
75
67
76 p = gp->softc;
77 if (p != NULL) {
78 gv_kill_plex_thread(p);
79 p->geom = NULL;
80 p->provider = NULL;
81 p->consumer = NULL;
82 }
83 gp->softc = NULL;
84 g_wither_geom(gp, error);
85}
68 /*
69 * RAID5 plexes need special treatment, as a single request
70 * might involve several read/write sub-requests.
71 */
72 if (p->org == GV_PLEX_RAID5) {
73 wp = gv_raid5_start(p, bp, addr, boff, bcount);
74 if (wp == NULL)
75 return;
76
77 len = wp->length;
86
78
87void
88gv_plex_done(struct bio *bp)
89{
90 struct gv_plex *p;
79 if (TAILQ_EMPTY(&wp->bits))
80 g_free(wp);
81 else if (wp->lockbase != -1)
82 TAILQ_INSERT_TAIL(&p->packets, wp, list);
91
83
92 p = bp->bio_from->geom->softc;
93 bp->bio_cflags |= GV_BIO_DONE;
94 mtx_lock(&p->bqueue_mtx);
95 bioq_insert_tail(p->bqueue, bp);
96 wakeup(p);
97 mtx_unlock(&p->bqueue_mtx);
84 /*
85 * Requests to concatenated and striped plexes go straight
86 * through.
87 */
88 } else {
89 len = gv_plex_normal_request(p, bp, boff, bcount, addr);
90 }
91 if (len < 0)
92 return;
93
94 bcount -= len;
95 addr += len;
96 boff += len;
97 }
98
99 /*
100 * Fire off all sub-requests. We get the correct consumer (== drive)
101 * to send each request to via the subdisk that was stored in
102 * cbp->bio_caller1.
103 */
104 cbp = bioq_takefirst(p->bqueue);
105 while (cbp != NULL) {
106 /*
107 * RAID5 sub-requests need to come in correct order, otherwise
108 * we trip over the parity, as it might be overwritten by
109 * another sub-request. We abuse cbp->bio_caller2 to mark
110 * potential overlap situations.
111 */
112 if (cbp->bio_caller2 != NULL && gv_stripe_active(p, cbp)) {
113 /* Park the bio on the waiting queue. */
114 cbp->bio_cflags |= GV_BIO_ONHOLD;
115 bioq_disksort(p->wqueue, cbp);
116 } else {
117 s = cbp->bio_caller1;
118 g_io_request(cbp, s->drive_sc->consumer);
119 }
120 cbp = bioq_takefirst(p->bqueue);
121 }
98}
99
122}
123
100/* Find the correct subdisk to send the bio to and build a bio to send. */
101static int
124static int
102gv_plexbuffer(struct gv_plex *p, struct bio *bp, caddr_t addr, off_t boff, off_t bcount)
125gv_plex_offset(struct gv_plex *p, off_t boff, off_t bcount, off_t *real_off,
126 off_t *real_len, int *sdno, int growing)
103{
127{
104 struct g_geom *gp;
105 struct gv_sd *s;
128 struct gv_sd *s;
106 struct bio *cbp, *pbp;
107 int i, sdno;
108 off_t len_left, real_len, real_off;
109 off_t stripeend, stripeno, stripestart;
129 int i, sdcount;
130 off_t len_left, stripeend, stripeno, stripestart;
110
131
111 if (p == NULL || LIST_EMPTY(&p->subdisks))
112 return (ENXIO);
113
114 s = NULL;
115 gp = bp->bio_to->geom;
116
117 /*
118 * We only handle concatenated and striped plexes here. RAID5 plexes
119 * are handled in build_raid5_request().
120 */
121 switch (p->org) {
122 case GV_PLEX_CONCAT:
123 /*
124 * Find the subdisk where this request starts. The subdisks in
125 * this list must be ordered by plex_offset.
126 */
132 switch (p->org) {
133 case GV_PLEX_CONCAT:
134 /*
135 * Find the subdisk where this request starts. The subdisks in
136 * this list must be ordered by plex_offset.
137 */
138 i = 0;
127 LIST_FOREACH(s, &p->subdisks, in_plex) {
128 if (s->plex_offset <= boff &&
139 LIST_FOREACH(s, &p->subdisks, in_plex) {
140 if (s->plex_offset <= boff &&
129 s->plex_offset + s->size > boff)
141 s->plex_offset + s->size > boff) {
142 *sdno = i;
130 break;
143 break;
144 }
145 i++;
131 }
146 }
132 /* Subdisk not found. */
133 if (s == NULL)
134 return (ENXIO);
147 if (s == NULL || s->drive_sc == NULL)
148 return (GV_ERR_NOTFOUND);
135
136 /* Calculate corresponding offsets on disk. */
149
150 /* Calculate corresponding offsets on disk. */
137 real_off = boff - s->plex_offset;
138 len_left = s->size - real_off;
139 real_len = (bcount > len_left) ? len_left : bcount;
151 *real_off = boff - s->plex_offset;
152 len_left = s->size - (*real_off);
153 KASSERT(len_left >= 0, ("gv_plex_offset: len_left < 0"));
154 *real_len = (bcount > len_left) ? len_left : bcount;
140 break;
141
142 case GV_PLEX_STRIPED:
143 /* The number of the stripe where the request starts. */
144 stripeno = boff / p->stripesize;
155 break;
156
157 case GV_PLEX_STRIPED:
158 /* The number of the stripe where the request starts. */
159 stripeno = boff / p->stripesize;
160 KASSERT(stripeno >= 0, ("gv_plex_offset: stripeno < 0"));
145
161
146 /* The number of the subdisk where the stripe resides. */
147 sdno = stripeno % p->sdcount;
162 /* Take growing subdisks into account when calculating. */
163 sdcount = gv_sdcount(p, (boff >= p->synced));
148
164
149 /* Find the right subdisk. */
150 i = 0;
151 LIST_FOREACH(s, &p->subdisks, in_plex) {
152 if (i == sdno)
153 break;
154 i++;
155 }
165 if (!(boff + bcount <= p->synced) &&
166 (p->flags & GV_PLEX_GROWING) &&
167 !growing)
168 return (GV_ERR_ISBUSY);
169 *sdno = stripeno % sdcount;
156
170
157 /* Subdisk not found. */
158 if (s == NULL)
159 return (ENXIO);
160
161 /* The offset of the stripe from the start of the subdisk. */
162 stripestart = (stripeno / p->sdcount) *
171 KASSERT(sdno >= 0, ("gv_plex_offset: sdno < 0"));
172 stripestart = (stripeno / sdcount) *
163 p->stripesize;
173 p->stripesize;
164
165 /* The offset at the end of the stripe. */
174 KASSERT(stripestart >= 0, ("gv_plex_offset: stripestart < 0"));
166 stripeend = stripestart + p->stripesize;
175 stripeend = stripestart + p->stripesize;
167
168 /* The offset of the request on this subdisk. */
169 real_off = boff - (stripeno * p->stripesize) +
176 *real_off = boff - (stripeno * p->stripesize) +
170 stripestart;
177 stripestart;
178 len_left = stripeend - *real_off;
179 KASSERT(len_left >= 0, ("gv_plex_offset: len_left < 0"));
171
180
172 /* The length left in this stripe. */
173 len_left = stripeend - real_off;
174
175 real_len = (bcount <= len_left) ? bcount : len_left;
181 *real_len = (bcount <= len_left) ? bcount : len_left;
176 break;
177
178 default:
182 break;
183
184 default:
179 return (EINVAL);
185 return (GV_ERR_PLEXORG);
180 }
186 }
187 return (0);
188}
181
189
190/*
191 * Prepare a normal plex request.
192 */
193static int
194gv_plex_normal_request(struct gv_plex *p, struct bio *bp, off_t boff,
195 off_t bcount, caddr_t addr)
196{
197 struct gv_sd *s;
198 struct bio *cbp;
199 off_t real_len, real_off;
200 int i, err, sdno;
201
202 s = NULL;
203 sdno = -1;
204 real_len = real_off = 0;
205
206 err = ENXIO;
207
208 if (p == NULL || LIST_EMPTY(&p->subdisks))
209 goto bad;
210
211 err = gv_plex_offset(p, boff, bcount, &real_off,
212 &real_len, &sdno, (bp->bio_pflags & GV_BIO_SYNCREQ));
213 /* If the request was blocked, put it into wait. */
214 if (err == GV_ERR_ISBUSY) {
215 bioq_disksort(p->rqueue, bp);
216 return (-1); /* "Fail", and delay request. */
217 }
218 if (err) {
219 err = ENXIO;
220 goto bad;
221 }
222 err = ENXIO;
223
224 /* Find the right subdisk. */
225 i = 0;
226 LIST_FOREACH(s, &p->subdisks, in_plex) {
227 if (i == sdno)
228 break;
229 i++;
230 }
231
232 /* Subdisk not found. */
233 if (s == NULL || s->drive_sc == NULL)
234 goto bad;
235
182 /* Now check if we can handle the request on this subdisk. */
183 switch (s->state) {
184 case GV_SD_UP:
185 /* If the subdisk is up, just continue. */
186 break;
236 /* Now check if we can handle the request on this subdisk. */
237 switch (s->state) {
238 case GV_SD_UP:
239 /* If the subdisk is up, just continue. */
240 break;
187
241 case GV_SD_DOWN:
242 if (bp->bio_cflags & GV_BIO_INTERNAL)
243 G_VINUM_DEBUG(0, "subdisk must be in the stale state in"
244 " order to perform administrative requests");
245 goto bad;
188 case GV_SD_STALE:
246 case GV_SD_STALE:
189 if (!(bp->bio_cflags & GV_BIO_SYNCREQ))
190 return (ENXIO);
247 if (!(bp->bio_cflags & GV_BIO_SYNCREQ)) {
248 G_VINUM_DEBUG(0, "subdisk stale, unable to perform "
249 "regular requests");
250 goto bad;
251 }
191
192 G_VINUM_DEBUG(1, "sd %s is initializing", s->name);
193 gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE);
194 break;
252
253 G_VINUM_DEBUG(1, "sd %s is initializing", s->name);
254 gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE);
255 break;
195
196 case GV_SD_INITIALIZING:
197 if (bp->bio_cmd == BIO_READ)
256 case GV_SD_INITIALIZING:
257 if (bp->bio_cmd == BIO_READ)
198 return (ENXIO);
258 goto bad;
199 break;
259 break;
200
201 default:
202 /* All other subdisk states mean it's not accessible. */
260 default:
261 /* All other subdisk states mean it's not accessible. */
203 return (ENXIO);
262 goto bad;
204 }
205
206 /* Clone the bio and adjust the offsets and sizes. */
207 cbp = g_clone_bio(bp);
263 }
264
265 /* Clone the bio and adjust the offsets and sizes. */
266 cbp = g_clone_bio(bp);
208 if (cbp == NULL)
209 return (ENOMEM);
210 cbp->bio_offset = real_off;
267 if (cbp == NULL) {
268 err = ENOMEM;
269 goto bad;
270 }
271 cbp->bio_offset = real_off + s->drive_offset;
211 cbp->bio_length = real_len;
212 cbp->bio_data = addr;
272 cbp->bio_length = real_len;
273 cbp->bio_data = addr;
213 cbp->bio_done = g_std_done;
214 cbp->bio_caller2 = s->consumer;
215 if ((bp->bio_cflags & GV_BIO_SYNCREQ)) {
274 cbp->bio_done = gv_done;
275 cbp->bio_caller1 = s;
276 if ((bp->bio_cflags & GV_BIO_SYNCREQ))
216 cbp->bio_cflags |= GV_BIO_SYNCREQ;
277 cbp->bio_cflags |= GV_BIO_SYNCREQ;
217 cbp->bio_done = gv_plex_done;
218 }
219
278
220 if (bp->bio_driver1 == NULL) {
221 bp->bio_driver1 = cbp;
222 } else {
223 pbp = bp->bio_driver1;
224 while (pbp->bio_caller1 != NULL)
225 pbp = pbp->bio_caller1;
226 pbp->bio_caller1 = cbp;
279 /* Store the sub-requests now and let others issue them. */
280 bioq_insert_tail(p->bqueue, cbp);
281 return (real_len);
282bad:
283 G_VINUM_LOGREQ(0, bp, "plex request failed.");
284 /* Building the sub-request failed. If internal BIO, do not deliver. */
285 if (bp->bio_cflags & GV_BIO_INTERNAL) {
286 if (bp->bio_cflags & GV_BIO_MALLOC)
287 g_free(bp->bio_data);
288 g_destroy_bio(bp);
289 p->flags &= ~(GV_PLEX_SYNCING | GV_PLEX_REBUILDING |
290 GV_PLEX_GROWING);
291 return (-1);
227 }
292 }
228
229 return (0);
293 g_io_deliver(bp, err);
294 return (-1);
230}
231
295}
296
232static void
233gv_plex_start(struct bio *bp)
297/*
298 * Handle a completed request to a striped or concatenated plex.
299 */
300void
301gv_plex_normal_done(struct gv_plex *p, struct bio *bp)
234{
302{
235 struct gv_plex *p;
303 struct bio *pbp;
236
304
237 switch(bp->bio_cmd) {
238 case BIO_READ:
239 case BIO_WRITE:
240 case BIO_DELETE:
241 break;
242 case BIO_GETATTR:
243 default:
244 g_io_deliver(bp, EOPNOTSUPP);
245 return;
305 pbp = bp->bio_parent;
306 if (pbp->bio_error == 0)
307 pbp->bio_error = bp->bio_error;
308 g_destroy_bio(bp);
309 pbp->bio_inbed++;
310 if (pbp->bio_children == pbp->bio_inbed) {
311 /* Just set it to length since multiple plexes will
312 * screw things up. */
313 pbp->bio_completed = pbp->bio_length;
314 if (pbp->bio_cflags & GV_BIO_SYNCREQ)
315 gv_sync_complete(p, pbp);
316 else if (pbp->bio_pflags & GV_BIO_SYNCREQ)
317 gv_grow_complete(p, pbp);
318 else
319 g_io_deliver(pbp, pbp->bio_error);
246 }
320 }
247
248 /*
249 * We cannot handle this request if too many of our subdisks are
250 * inaccessible.
251 */
252 p = bp->bio_to->geom->softc;
253 if ((p->state < GV_PLEX_DEGRADED) &&
254 !(bp->bio_cflags & GV_BIO_SYNCREQ)) {
255 g_io_deliver(bp, ENXIO);
256 return;
257 }
258
259 mtx_lock(&p->bqueue_mtx);
260 bioq_disksort(p->bqueue, bp);
261 wakeup(p);
262 mtx_unlock(&p->bqueue_mtx);
263}
264
321}
322
265static void
266gv_plex_worker(void *arg)
323/*
324 * Handle a completed request to a RAID-5 plex.
325 */
326void
327gv_plex_raid5_done(struct gv_plex *p, struct bio *bp)
267{
328{
268 struct bio *bp;
269 struct gv_plex *p;
270 struct gv_sd *s;
329 struct gv_softc *sc;
330 struct bio *cbp, *pbp;
331 struct gv_bioq *bq, *bq2;
332 struct gv_raid5_packet *wp;
333 off_t completed;
334 int i;
271
335
272 p = arg;
273 KASSERT(p != NULL, ("NULL p"));
336 completed = 0;
337 sc = p->vinumconf;
338 wp = bp->bio_caller2;
274
339
275 mtx_lock(&p->bqueue_mtx);
276 for (;;) {
277 /* We were signaled to exit. */
278 if (p->flags & GV_PLEX_THREAD_DIE)
340 switch (bp->bio_parent->bio_cmd) {
341 case BIO_READ:
342 if (wp == NULL) {
343 completed = bp->bio_completed;
279 break;
344 break;
280
281 /* Take the first BIO from our queue. */
282 bp = bioq_takefirst(p->bqueue);
283 if (bp == NULL) {
284 msleep(p, &p->bqueue_mtx, PRIBIO, "-", hz/10);
285 continue;
286 }
345 }
287 mtx_unlock(&p->bqueue_mtx);
288
346
289 /* A completed request. */
290 if (bp->bio_cflags & GV_BIO_DONE) {
291 if (bp->bio_cflags & GV_BIO_SYNCREQ ||
292 bp->bio_cflags & GV_BIO_REBUILD) {
293 s = bp->bio_to->private;
294 if (bp->bio_error == 0)
295 s->initialized += bp->bio_length;
296 if (s->initialized >= s->size) {
297 g_topology_lock();
298 gv_set_sd_state(s, GV_SD_UP,
299 GV_SETSTATE_CONFIG);
300 g_topology_unlock();
301 s->initialized = 0;
347 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
348 if (bq->bp != bp)
349 continue;
350 TAILQ_REMOVE(&wp->bits, bq, queue);
351 g_free(bq);
352 for (i = 0; i < wp->length; i++)
353 wp->data[i] ^= bp->bio_data[i];
354 break;
355 }
356 if (TAILQ_EMPTY(&wp->bits)) {
357 completed = wp->length;
358 if (wp->lockbase != -1) {
359 TAILQ_REMOVE(&p->packets, wp, list);
360 /* Bring the waiting bios back into the game. */
361 pbp = bioq_takefirst(p->wqueue);
362 while (pbp != NULL) {
363 mtx_lock(&sc->queue_mtx);
364 bioq_disksort(sc->bqueue, pbp);
365 mtx_unlock(&sc->queue_mtx);
366 pbp = bioq_takefirst(p->wqueue);
302 }
303 }
367 }
368 }
369 g_free(wp);
370 }
304
371
305 if (bp->bio_cflags & GV_BIO_SYNCREQ)
306 g_std_done(bp);
307 else
308 gv_plex_completed_request(p, bp);
309 /*
310 * A sub-request that was hold back because it interfered with
311 * another sub-request.
312 */
313 } else if (bp->bio_cflags & GV_BIO_ONHOLD) {
314 /* Is it still locked out? */
315 if (gv_stripe_active(p, bp)) {
316 /* Park the bio on the waiting queue. */
317 mtx_lock(&p->bqueue_mtx);
318 bioq_disksort(p->wqueue, bp);
319 mtx_unlock(&p->bqueue_mtx);
320 } else {
321 bp->bio_cflags &= ~GV_BIO_ONHOLD;
322 g_io_request(bp, bp->bio_caller2);
323 }
372 break;
324
373
325 /* A normal request to this plex. */
326 } else
327 gv_plex_normal_request(p, bp);
374 case BIO_WRITE:
375 /* XXX can this ever happen? */
376 if (wp == NULL) {
377 completed = bp->bio_completed;
378 break;
379 }
328
380
329 mtx_lock(&p->bqueue_mtx);
330 }
331 mtx_unlock(&p->bqueue_mtx);
332 p->flags |= GV_PLEX_THREAD_DEAD;
333 wakeup(p);
381 /* Check if we need to handle parity data. */
382 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
383 if (bq->bp != bp)
384 continue;
385 TAILQ_REMOVE(&wp->bits, bq, queue);
386 g_free(bq);
387 cbp = wp->parity;
388 if (cbp != NULL) {
389 for (i = 0; i < wp->length; i++)
390 cbp->bio_data[i] ^= bp->bio_data[i];
391 }
392 break;
393 }
334
394
335 kproc_exit(ENXIO);
336}
395 /* Handle parity data. */
396 if (TAILQ_EMPTY(&wp->bits)) {
397 if (bp->bio_parent->bio_cflags & GV_BIO_CHECK)
398 i = gv_check_parity(p, bp, wp);
399 else
400 i = gv_normal_parity(p, bp, wp);
337
401
338static int
339gv_normal_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
340{
341 struct bio *cbp, *pbp;
342 int finished, i;
402 /* All of our sub-requests have finished. */
403 if (i) {
404 completed = wp->length;
405 TAILQ_REMOVE(&p->packets, wp, list);
406 /* Bring the waiting bios back into the game. */
407 pbp = bioq_takefirst(p->wqueue);
408 while (pbp != NULL) {
409 mtx_lock(&sc->queue_mtx);
410 bioq_disksort(sc->bqueue, pbp);
411 mtx_unlock(&sc->queue_mtx);
412 pbp = bioq_takefirst(p->wqueue);
413 }
414 g_free(wp);
415 }
416 }
343
417
344 finished = 1;
418 break;
419 }
345
420
346 if (wp->waiting != NULL) {
347 pbp = wp->waiting;
348 wp->waiting = NULL;
349 cbp = wp->parity;
350 for (i = 0; i < wp->length; i++)
351 cbp->bio_data[i] ^= pbp->bio_data[i];
352 g_io_request(pbp, pbp->bio_caller2);
353 finished = 0;
421 pbp = bp->bio_parent;
422 if (pbp->bio_error == 0)
423 pbp->bio_error = bp->bio_error;
424 pbp->bio_completed += completed;
354
425
355 } else if (wp->parity != NULL) {
356 cbp = wp->parity;
357 wp->parity = NULL;
358 g_io_request(cbp, cbp->bio_caller2);
359 finished = 0;
426 /* When the original request is finished, we deliver it. */
427 pbp->bio_inbed++;
428 if (pbp->bio_inbed == pbp->bio_children) {
429 /* Hand it over for checking or delivery. */
430 if (pbp->bio_cmd == BIO_WRITE &&
431 (pbp->bio_cflags & GV_BIO_CHECK)) {
432 gv_parity_complete(p, pbp);
433 } else if (pbp->bio_cmd == BIO_WRITE &&
434 (pbp->bio_cflags & GV_BIO_REBUILD)) {
435 gv_rebuild_complete(p, pbp);
436 } else if (pbp->bio_cflags & GV_BIO_INIT) {
437 gv_init_complete(p, pbp);
438 } else if (pbp->bio_cflags & GV_BIO_SYNCREQ) {
439 gv_sync_complete(p, pbp);
440 } else if (pbp->bio_pflags & GV_BIO_SYNCREQ) {
441 gv_grow_complete(p, pbp);
442 } else {
443 g_io_deliver(pbp, pbp->bio_error);
444 }
360 }
361
445 }
446
362 return (finished);
447 /* Clean up what we allocated. */
448 if (bp->bio_cflags & GV_BIO_MALLOC)
449 g_free(bp->bio_data);
450 g_destroy_bio(bp);
363}
364
365static int
366gv_check_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
367{
368 struct bio *pbp;
451}
452
453static int
454gv_check_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
455{
456 struct bio *pbp;
457 struct gv_sd *s;
369 int err, finished, i;
370
371 err = 0;
372 finished = 1;
373
374 if (wp->waiting != NULL) {
375 pbp = wp->waiting;
376 wp->waiting = NULL;
458 int err, finished, i;
459
460 err = 0;
461 finished = 1;
462
463 if (wp->waiting != NULL) {
464 pbp = wp->waiting;
465 wp->waiting = NULL;
377 g_io_request(pbp, pbp->bio_caller2);
466 s = pbp->bio_caller1;
467 g_io_request(pbp, s->drive_sc->consumer);
378 finished = 0;
379
380 } else if (wp->parity != NULL) {
381 pbp = wp->parity;
382 wp->parity = NULL;
383
384 /* Check if the parity is correct. */
385 for (i = 0; i < wp->length; i++) {

--- 4 unchanged lines hidden (view full) ---

390 }
391
392 /* The parity is not correct... */
393 if (err) {
394 bp->bio_parent->bio_error = EAGAIN;
395
396 /* ... but we rebuild it. */
397 if (bp->bio_parent->bio_cflags & GV_BIO_PARITY) {
468 finished = 0;
469
470 } else if (wp->parity != NULL) {
471 pbp = wp->parity;
472 wp->parity = NULL;
473
474 /* Check if the parity is correct. */
475 for (i = 0; i < wp->length; i++) {

--- 4 unchanged lines hidden (view full) ---

480 }
481
482 /* The parity is not correct... */
483 if (err) {
484 bp->bio_parent->bio_error = EAGAIN;
485
486 /* ... but we rebuild it. */
487 if (bp->bio_parent->bio_cflags & GV_BIO_PARITY) {
398 g_io_request(pbp, pbp->bio_caller2);
488 s = pbp->bio_caller1;
489 g_io_request(pbp, s->drive_sc->consumer);
399 finished = 0;
400 }
401 }
402
403 /*
404 * Clean up the BIO we would have used for rebuilding the
405 * parity.
406 */
407 if (finished) {
408 bp->bio_parent->bio_inbed++;
409 g_destroy_bio(pbp);
410 }
411
412 }
413
414 return (finished);
415}
416
490 finished = 0;
491 }
492 }
493
494 /*
495 * Clean up the BIO we would have used for rebuilding the
496 * parity.
497 */
498 if (finished) {
499 bp->bio_parent->bio_inbed++;
500 g_destroy_bio(pbp);
501 }
502
503 }
504
505 return (finished);
506}
507
417void
418gv_plex_completed_request(struct gv_plex *p, struct bio *bp)
508static int
509gv_normal_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
419{
420 struct bio *cbp, *pbp;
510{
511 struct bio *cbp, *pbp;
421 struct gv_bioq *bq, *bq2;
422 struct gv_raid5_packet *wp;
423 int i;
512 struct gv_sd *s;
513 int finished, i;
424
514
425 wp = bp->bio_driver1;
515 finished = 1;
426
516
427 switch (bp->bio_parent->bio_cmd) {
428 case BIO_READ:
429 if (wp == NULL)
430 break;
517 if (wp->waiting != NULL) {
518 pbp = wp->waiting;
519 wp->waiting = NULL;
520 cbp = wp->parity;
521 for (i = 0; i < wp->length; i++)
522 cbp->bio_data[i] ^= pbp->bio_data[i];
523 s = pbp->bio_caller1;
524 g_io_request(pbp, s->drive_sc->consumer);
525 finished = 0;
431
526
432 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
433 if (bq->bp == bp) {
434 TAILQ_REMOVE(&wp->bits, bq, queue);
435 g_free(bq);
436 for (i = 0; i < wp->length; i++)
437 wp->data[i] ^= bp->bio_data[i];
438 break;
439 }
440 }
441 if (TAILQ_EMPTY(&wp->bits)) {
442 bp->bio_parent->bio_completed += wp->length;
443 if (wp->lockbase != -1) {
444 TAILQ_REMOVE(&p->packets, wp, list);
445 /* Bring the waiting bios back into the game. */
446 mtx_lock(&p->bqueue_mtx);
447 pbp = bioq_takefirst(p->wqueue);
448 while (pbp != NULL) {
449 bioq_disksort(p->bqueue, pbp);
450 pbp = bioq_takefirst(p->wqueue);
451 }
452 mtx_unlock(&p->bqueue_mtx);
453 }
454 g_free(wp);
455 }
527 } else if (wp->parity != NULL) {
528 cbp = wp->parity;
529 wp->parity = NULL;
530 s = cbp->bio_caller1;
531 g_io_request(cbp, s->drive_sc->consumer);
532 finished = 0;
533 }
456
534
457 break;
535 return (finished);
536}
458
537
459 case BIO_WRITE:
460 if (wp == NULL)
461 break;
538/* Flush the queue with delayed requests. */
539static void
540gv_plex_flush(struct gv_plex *p)
541{
542 struct gv_softc *sc;
543 struct bio *bp;
462
544
463 /* Check if we need to handle parity data. */
464 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
465 if (bq->bp == bp) {
466 TAILQ_REMOVE(&wp->bits, bq, queue);
467 g_free(bq);
468 cbp = wp->parity;
469 if (cbp != NULL) {
470 for (i = 0; i < wp->length; i++)
471 cbp->bio_data[i] ^=
472 bp->bio_data[i];
473 }
474 break;
475 }
476 }
545 sc = p->vinumconf;
546 bp = bioq_takefirst(p->rqueue);
547 while (bp != NULL) {
548 gv_plex_start(p, bp);
549 bp = bioq_takefirst(p->rqueue);
550 }
551}
477
552
478 /* Handle parity data. */
479 if (TAILQ_EMPTY(&wp->bits)) {
480 if (bp->bio_parent->bio_cflags & GV_BIO_CHECK)
481 i = gv_check_parity(p, bp, wp);
482 else
483 i = gv_normal_parity(p, bp, wp);
553int
554gv_sync_request(struct gv_plex *from, struct gv_plex *to, off_t offset,
555 off_t length, int type, caddr_t data)
556{
557 struct gv_softc *sc;
558 struct bio *bp;
484
559
485 /* All of our sub-requests have finished. */
486 if (i) {
487 bp->bio_parent->bio_completed += wp->length;
488 TAILQ_REMOVE(&p->packets, wp, list);
489 /* Bring the waiting bios back into the game. */
490 mtx_lock(&p->bqueue_mtx);
491 pbp = bioq_takefirst(p->wqueue);
492 while (pbp != NULL) {
493 bioq_disksort(p->bqueue, pbp);
494 pbp = bioq_takefirst(p->wqueue);
495 }
496 mtx_unlock(&p->bqueue_mtx);
497 g_free(wp);
498 }
499 }
560 KASSERT(from != NULL, ("NULL from"));
561 KASSERT(to != NULL, ("NULL to"));
562 sc = from->vinumconf;
563 KASSERT(sc != NULL, ("NULL sc"));
500
564
501 break;
565 bp = g_new_bio();
566 if (bp == NULL) {
567 G_VINUM_DEBUG(0, "sync from '%s' failed at offset "
568 " %jd; out of memory", from->name, offset);
569 return (ENOMEM);
502 }
570 }
571 bp->bio_length = length;
572 bp->bio_done = gv_done;
573 bp->bio_cflags |= GV_BIO_SYNCREQ;
574 bp->bio_offset = offset;
575 bp->bio_caller1 = from;
576 bp->bio_caller2 = to;
577 bp->bio_cmd = type;
578 if (data == NULL)
579 data = g_malloc(length, M_WAITOK);
580 bp->bio_cflags |= GV_BIO_MALLOC; /* Free on the next run. */
581 bp->bio_data = data;
503
582
504 pbp = bp->bio_parent;
505 if (pbp->bio_error == 0)
506 pbp->bio_error = bp->bio_error;
507
508 /* When the original request is finished, we deliver it. */
509 pbp->bio_inbed++;
510 if (pbp->bio_inbed == pbp->bio_children)
511 g_io_deliver(pbp, pbp->bio_error);
512
513 /* Clean up what we allocated. */
514 if (bp->bio_cflags & GV_BIO_MALLOC)
515 g_free(bp->bio_data);
516 g_destroy_bio(bp);
583 /* Send down next. */
584 mtx_lock(&sc->queue_mtx);
585 bioq_disksort(sc->bqueue, bp);
586 mtx_unlock(&sc->queue_mtx);
587 //gv_plex_start(from, bp);
588 return (0);
517}
518
589}
590
519void
520gv_plex_normal_request(struct gv_plex *p, struct bio *bp)
591/*
592 * Handle a finished plex sync bio.
593 */
594int
595gv_sync_complete(struct gv_plex *to, struct bio *bp)
521{
596{
522 struct bio *cbp, *pbp;
523 struct gv_bioq *bq, *bq2;
524 struct gv_raid5_packet *wp, *wp2;
525 caddr_t addr;
526 off_t bcount, boff;
597 struct gv_plex *from, *p;
598 struct gv_sd *s;
599 struct gv_volume *v;
600 struct gv_softc *sc;
601 off_t offset;
527 int err;
528
602 int err;
603
529 bcount = bp->bio_length;
530 addr = bp->bio_data;
531 boff = bp->bio_offset;
604 g_topology_assert_not();
532
605
533 /* Walk over the whole length of the request, we might split it up. */
534 while (bcount > 0) {
535 wp = NULL;
606 err = 0;
607 KASSERT(to != NULL, ("NULL to"));
608 KASSERT(bp != NULL, ("NULL bp"));
609 from = bp->bio_caller2;
610 KASSERT(from != NULL, ("NULL from"));
611 v = to->vol_sc;
612 KASSERT(v != NULL, ("NULL v"));
613 sc = v->vinumconf;
614 KASSERT(sc != NULL, ("NULL sc"));
536
615
537 /*
538 * RAID5 plexes need special treatment, as a single write
539 * request involves several read/write sub-requests.
540 */
541 if (p->org == GV_PLEX_RAID5) {
542 wp = g_malloc(sizeof(*wp), M_WAITOK | M_ZERO);
543 wp->bio = bp;
544 TAILQ_INIT(&wp->bits);
616 /* If it was a read, write it. */
617 if (bp->bio_cmd == BIO_READ) {
618 err = gv_sync_request(from, to, bp->bio_offset, bp->bio_length,
619 BIO_WRITE, bp->bio_data);
620 /* If it was a write, read the next one. */
621 } else if (bp->bio_cmd == BIO_WRITE) {
622 if (bp->bio_cflags & GV_BIO_MALLOC)
623 g_free(bp->bio_data);
624 to->synced += bp->bio_length;
625 /* If we're finished, clean up. */
626 if (bp->bio_offset + bp->bio_length >= from->size) {
627 G_VINUM_DEBUG(1, "syncing of %s from %s completed",
628 to->name, from->name);
629 /* Update our state. */
630 LIST_FOREACH(s, &to->subdisks, in_plex)
631 gv_set_sd_state(s, GV_SD_UP, 0);
632 gv_update_plex_state(to);
633 to->flags &= ~GV_PLEX_SYNCING;
634 to->synced = 0;
635 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
636 } else {
637 offset = bp->bio_offset + bp->bio_length;
638 err = gv_sync_request(from, to, offset,
639 MIN(bp->bio_length, from->size - offset),
640 BIO_READ, NULL);
641 }
642 }
643 g_destroy_bio(bp);
644 /* Clean up if there was an error. */
645 if (err) {
646 to->flags &= ~GV_PLEX_SYNCING;
647 G_VINUM_DEBUG(0, "error syncing plexes: error code %d", err);
648 }
545
649
546 if (bp->bio_cflags & GV_BIO_REBUILD)
547 err = gv_rebuild_raid5(p, wp, bp, addr,
548 boff, bcount);
549 else if (bp->bio_cflags & GV_BIO_CHECK)
550 err = gv_check_raid5(p, wp, bp, addr,
551 boff, bcount);
552 else
553 err = gv_build_raid5_req(p, wp, bp, addr,
554 boff, bcount);
650 /* Check if all plexes are synced, and lower refcounts. */
651 g_topology_lock();
652 LIST_FOREACH(p, &v->plexes, in_volume) {
653 if (p->flags & GV_PLEX_SYNCING) {
654 g_topology_unlock();
655 return (-1);
656 }
657 }
658 /* If we came here, all plexes are synced, and we're free. */
659 gv_access(v->provider, -1, -1, 0);
660 g_topology_unlock();
661 G_VINUM_DEBUG(1, "plex sync completed");
662 gv_volume_flush(v);
663 return (0);
664}
555
665
556 /*
557 * Building the sub-request failed, we probably need to
558 * clean up a lot.
559 */
560 if (err) {
561 G_VINUM_LOGREQ(0, bp, "plex request failed.");
562 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
563 TAILQ_REMOVE(&wp->bits, bq, queue);
564 g_free(bq);
565 }
566 if (wp->waiting != NULL) {
567 if (wp->waiting->bio_cflags &
568 GV_BIO_MALLOC)
569 g_free(wp->waiting->bio_data);
570 g_destroy_bio(wp->waiting);
571 }
572 if (wp->parity != NULL) {
573 if (wp->parity->bio_cflags &
574 GV_BIO_MALLOC)
575 g_free(wp->parity->bio_data);
576 g_destroy_bio(wp->parity);
577 }
578 g_free(wp);
666/*
667 * Create a new bio struct for the next grow request.
668 */
669int
670gv_grow_request(struct gv_plex *p, off_t offset, off_t length, int type,
671 caddr_t data)
672{
673 struct gv_softc *sc;
674 struct bio *bp;
579
675
580 TAILQ_FOREACH_SAFE(wp, &p->packets, list, wp2) {
581 if (wp->bio == bp) {
582 TAILQ_REMOVE(&p->packets, wp,
583 list);
584 TAILQ_FOREACH_SAFE(bq,
585 &wp->bits, queue, bq2) {
586 TAILQ_REMOVE(&wp->bits,
587 bq, queue);
588 g_free(bq);
589 }
590 g_free(wp);
591 }
592 }
676 KASSERT(p != NULL, ("gv_grow_request: NULL p"));
677 sc = p->vinumconf;
678 KASSERT(sc != NULL, ("gv_grow_request: NULL sc"));
593
679
594 cbp = bp->bio_driver1;
595 while (cbp != NULL) {
596 pbp = cbp->bio_caller1;
597 if (cbp->bio_cflags & GV_BIO_MALLOC)
598 g_free(cbp->bio_data);
599 g_destroy_bio(cbp);
600 cbp = pbp;
601 }
680 bp = g_new_bio();
681 if (bp == NULL) {
682 G_VINUM_DEBUG(0, "grow of %s failed creating bio: "
683 "out of memory", p->name);
684 return (ENOMEM);
685 }
602
686
603 g_io_deliver(bp, err);
604 return;
605 }
606
607 if (TAILQ_EMPTY(&wp->bits))
608 g_free(wp);
609 else if (wp->lockbase != -1)
610 TAILQ_INSERT_TAIL(&p->packets, wp, list);
687 bp->bio_cmd = type;
688 bp->bio_done = gv_done;
689 bp->bio_error = 0;
690 bp->bio_caller1 = p;
691 bp->bio_offset = offset;
692 bp->bio_length = length;
693 bp->bio_pflags |= GV_BIO_SYNCREQ; /* XXX: misuse of pflags AND syncreq.*/
694 if (data == NULL)
695 data = g_malloc(length, M_WAITOK);
696 bp->bio_cflags |= GV_BIO_MALLOC;
697 bp->bio_data = data;
611
698
612 /*
613 * Requests to concatenated and striped plexes go straight
614 * through.
615 */
616 } else {
617 err = gv_plexbuffer(p, bp, addr, boff, bcount);
699 mtx_lock(&sc->queue_mtx);
700 bioq_disksort(sc->bqueue, bp);
701 mtx_unlock(&sc->queue_mtx);
702 //gv_plex_start(p, bp);
703 return (0);
704}
618
705
619 /* Building the sub-request failed. */
620 if (err) {
621 G_VINUM_LOGREQ(0, bp, "plex request failed.");
622 cbp = bp->bio_driver1;
623 while (cbp != NULL) {
624 pbp = cbp->bio_caller1;
625 g_destroy_bio(cbp);
626 cbp = pbp;
627 }
628 g_io_deliver(bp, err);
629 return;
706/*
707 * Finish handling of a bio to a growing plex.
708 */
709void
710gv_grow_complete(struct gv_plex *p, struct bio *bp)
711{
712 struct gv_softc *sc;
713 struct gv_sd *s;
714 struct gv_volume *v;
715 off_t origsize, offset;
716 int sdcount, err;
717
718 v = p->vol_sc;
719 KASSERT(v != NULL, ("gv_grow_complete: NULL v"));
720 sc = v->vinumconf;
721 KASSERT(sc != NULL, ("gv_grow_complete: NULL sc"));
722 err = 0;
723
724 /* If it was a read, write it. */
725 if (bp->bio_cmd == BIO_READ) {
726 p->synced += bp->bio_length;
727 err = gv_grow_request(p, bp->bio_offset, bp->bio_length,
728 BIO_WRITE, bp->bio_data);
729 /* If it was a write, read next. */
730 } else if (bp->bio_cmd == BIO_WRITE) {
731 if (bp->bio_cflags & GV_BIO_MALLOC)
732 g_free(bp->bio_data);
733
734 /* Find the real size of the plex. */
735 sdcount = gv_sdcount(p, 1);
736 s = LIST_FIRST(&p->subdisks);
737 KASSERT(s != NULL, ("NULL s"));
738 origsize = (s->size * (sdcount - 1));
739 if (bp->bio_offset + bp->bio_length >= origsize) {
740 G_VINUM_DEBUG(1, "growing of %s completed", p->name);
741 p->flags &= ~GV_PLEX_GROWING;
742 LIST_FOREACH(s, &p->subdisks, in_plex) {
743 s->flags &= ~GV_SD_GROW;
744 gv_set_sd_state(s, GV_SD_UP, 0);
630 }
745 }
746 p->size = gv_plex_size(p);
747 gv_update_vol_size(v, gv_vol_size(v));
748 gv_set_plex_state(p, GV_PLEX_UP, 0);
749 g_topology_lock();
750 gv_access(v->provider, -1, -1, 0);
751 g_topology_unlock();
752 p->synced = 0;
753 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
754 /* Issue delayed requests. */
755 gv_plex_flush(p);
756 } else {
757 offset = bp->bio_offset + bp->bio_length;
758 err = gv_grow_request(p, offset,
759 MIN(bp->bio_length, origsize - offset),
760 BIO_READ, NULL);
631 }
761 }
632
633 /* Abuse bio_caller1 as linked list. */
634 pbp = bp->bio_driver1;
635 while (pbp->bio_caller1 != NULL)
636 pbp = pbp->bio_caller1;
637 bcount -= pbp->bio_length;
638 addr += pbp->bio_length;
639 boff += pbp->bio_length;
640 }
762 }
763 g_destroy_bio(bp);
641
764
642 /* Fire off all sub-requests. */
643 pbp = bp->bio_driver1;
644 while (pbp != NULL) {
645 /*
646 * RAID5 sub-requests need to come in correct order, otherwise
647 * we trip over the parity, as it might be overwritten by
648 * another sub-request.
649 */
650 if (pbp->bio_driver1 != NULL &&
651 gv_stripe_active(p, pbp)) {
652 /* Park the bio on the waiting queue. */
653 pbp->bio_cflags |= GV_BIO_ONHOLD;
654 mtx_lock(&p->bqueue_mtx);
655 bioq_disksort(p->wqueue, pbp);
656 mtx_unlock(&p->bqueue_mtx);
657 } else
658 g_io_request(pbp, pbp->bio_caller2);
659 pbp = pbp->bio_caller1;
765 if (err) {
766 p->flags &= ~GV_PLEX_GROWING;
767 G_VINUM_DEBUG(0, "error growing plex: error code %d", err);
660 }
661}
662
768 }
769}
770
663static int
664gv_plex_access(struct g_provider *pp, int dr, int dw, int de)
771
772/*
773 * Create an initialization BIO and send it off to the consumer. Assume that
774 * we're given initialization data as parameter.
775 */
776void
777gv_init_request(struct gv_sd *s, off_t start, caddr_t data, off_t length)
665{
778{
666 struct gv_plex *p;
667 struct g_geom *gp;
668 struct g_consumer *cp, *cp2;
669 int error;
779 struct gv_drive *d;
780 struct g_consumer *cp;
781 struct bio *bp, *cbp;
670
782
671 gp = pp->geom;
672 p = gp->softc;
673 KASSERT(p != NULL, ("NULL p"));
783 KASSERT(s != NULL, ("gv_init_request: NULL s"));
784 d = s->drive_sc;
785 KASSERT(d != NULL, ("gv_init_request: NULL d"));
786 cp = d->consumer;
787 KASSERT(cp != NULL, ("gv_init_request: NULL cp"));
674
788
675 if (p->org == GV_PLEX_RAID5) {
676 if (dw > 0 && dr == 0)
677 dr = 1;
678 else if (dw < 0 && dr == 0)
679 dr = -1;
789 bp = g_new_bio();
790 if (bp == NULL) {
791 G_VINUM_DEBUG(0, "subdisk '%s' init: write failed at offset %jd"
792 " (drive offset %jd); out of memory", s->name,
793 (intmax_t)s->initialized, (intmax_t)start);
794 return; /* XXX: Error codes. */
680 }
795 }
796 bp->bio_cmd = BIO_WRITE;
797 bp->bio_data = data;
798 bp->bio_done = gv_done;
799 bp->bio_error = 0;
800 bp->bio_length = length;
801 bp->bio_cflags |= GV_BIO_INIT;
802 bp->bio_offset = start;
803 bp->bio_caller1 = s;
681
804
682 LIST_FOREACH(cp, &gp->consumer, consumer) {
683 error = g_access(cp, dr, dw, de);
684 if (error) {
685 LIST_FOREACH(cp2, &gp->consumer, consumer) {
686 if (cp == cp2)
687 break;
688 g_access(cp2, -dr, -dw, -de);
689 }
690 return (error);
691 }
805 /* Then ofcourse, we have to clone it. */
806 cbp = g_clone_bio(bp);
807 if (cbp == NULL) {
808 G_VINUM_DEBUG(0, "subdisk '%s' init: write failed at offset %jd"
809 " (drive offset %jd); out of memory", s->name,
810 (intmax_t)s->initialized, (intmax_t)start);
811 return; /* XXX: Error codes. */
692 }
812 }
693 return (0);
813 cbp->bio_done = gv_done;
814 cbp->bio_caller1 = s;
815 /* Send it off to the consumer. */
816 g_io_request(cbp, cp);
694}
695
817}
818
696static struct g_geom *
697gv_plex_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
819/*
820 * Handle a finished initialization BIO.
821 */
822void
823gv_init_complete(struct gv_plex *p, struct bio *bp)
698{
824{
699 struct g_geom *gp;
700 struct g_consumer *cp, *cp2;
701 struct g_provider *pp2;
702 struct gv_plex *p;
703 struct gv_sd *s;
704 struct gv_softc *sc;
825 struct gv_softc *sc;
826 struct gv_drive *d;
827 struct g_consumer *cp;
828 struct gv_sd *s;
829 off_t start, length;
830 caddr_t data;
705 int error;
706
831 int error;
832
707 g_trace(G_T_TOPOLOGY, "gv_plex_taste(%s, %s)", mp->name, pp->name);
708 g_topology_assert();
833 s = bp->bio_caller1;
834 start = bp->bio_offset;
835 length = bp->bio_length;
836 error = bp->bio_error;
837 data = bp->bio_data;
709
838
710 /* We only want to attach to subdisks. */
711 if (strcmp(pp->geom->class->name, "VINUMDRIVE"))
712 return (NULL);
839 KASSERT(s != NULL, ("gv_init_complete: NULL s"));
840 d = s->drive_sc;
841 KASSERT(d != NULL, ("gv_init_complete: NULL d"));
842 cp = d->consumer;
843 KASSERT(cp != NULL, ("gv_init_complete: NULL cp"));
844 sc = p->vinumconf;
845 KASSERT(sc != NULL, ("gv_init_complete: NULL sc"));
713
846
714 /* Find the VINUM class and its associated geom. */
715 gp = find_vinum_geom();
716 if (gp == NULL)
717 return (NULL);
718 sc = gp->softc;
719 KASSERT(sc != NULL, ("gv_plex_taste: NULL sc"));
847 g_destroy_bio(bp);
720
848
721 /* Find out which subdisk the offered provider corresponds to. */
722 s = pp->private;
723 KASSERT(s != NULL, ("gv_plex_taste: NULL s"));
849 /*
850 * First we need to find out if it was okay, and abort if it's not.
851 * Then we need to free previous buffers, find out the correct subdisk,
852 * as well as getting the correct starting point and length of the BIO.
853 */
854 if (start >= s->drive_offset + s->size) {
855 /* Free the data we initialized. */
856 if (data != NULL)
857 g_free(data);
858 g_topology_assert_not();
859 g_topology_lock();
860 g_access(cp, 0, -1, 0);
861 g_topology_unlock();
862 if (error) {
863 gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE |
864 GV_SETSTATE_CONFIG);
865 } else {
866 gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_CONFIG);
867 s->initialized = 0;
868 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
869 G_VINUM_DEBUG(1, "subdisk '%s' init: finished "
870 "successfully", s->name);
871 }
872 return;
873 }
874 s->initialized += length;
875 start += length;
876 gv_init_request(s, start, data, length);
877}
724
878
725 /* Now find the correct plex where this subdisk belongs to. */
726 p = gv_find_plex(sc, s->plex);
727 if (p == NULL) {
728 G_VINUM_DEBUG(0, "%s: NULL p for '%s'", __func__, s->name);
729 return (NULL);
879/*
880 * Create a new bio struct for the next parity rebuild. Used both by internal
881 * rebuild of degraded plexes as well as user initiated rebuilds/checks.
882 */
883void
884gv_parity_request(struct gv_plex *p, int flags, off_t offset)
885{
886 struct gv_softc *sc;
887 struct bio *bp;
888
889 KASSERT(p != NULL, ("gv_parity_request: NULL p"));
890 sc = p->vinumconf;
891 KASSERT(sc != NULL, ("gv_parity_request: NULL sc"));
892
893 bp = g_new_bio();
894 if (bp == NULL) {
895 G_VINUM_DEBUG(0, "rebuild of %s failed creating bio: "
896 "out of memory", p->name);
897 return;
730 }
731
898 }
899
900 bp->bio_cmd = BIO_WRITE;
901 bp->bio_done = gv_done;
902 bp->bio_error = 0;
903 bp->bio_length = p->stripesize;
904 bp->bio_caller1 = p;
905
732 /*
906 /*
733 * Add this subdisk to this plex. Since we trust the on-disk
734 * configuration, we don't check the given value (should we?).
735 * XXX: shouldn't be done here
907 * Check if it's a rebuild of a degraded plex or a user request of
908 * parity rebuild.
736 */
909 */
737 gv_sd_to_plex(p, s, 0);
910 if (flags & GV_BIO_REBUILD)
911 bp->bio_data = g_malloc(GV_DFLT_SYNCSIZE, M_WAITOK);
912 else if (flags & GV_BIO_CHECK)
913 bp->bio_data = g_malloc(p->stripesize, M_WAITOK | M_ZERO);
914 else {
915 G_VINUM_DEBUG(0, "invalid flags given in rebuild");
916 return;
917 }
738
918
739 /* Now check if there's already a geom for this plex. */
740 gp = p->geom;
919 bp->bio_cflags = flags;
920 bp->bio_cflags |= GV_BIO_MALLOC;
741
921
742 /* Yes, there is already a geom, so we just add the consumer. */
743 if (gp != NULL) {
744 cp2 = LIST_FIRST(&gp->consumer);
745 /* Need to attach a new consumer to this subdisk. */
746 cp = g_new_consumer(gp);
747 error = g_attach(cp, pp);
748 if (error) {
749 G_VINUM_DEBUG(0, "unable to attach consumer to %s",
750 pp->name);
751 g_destroy_consumer(cp);
752 return (NULL);
753 }
754 /* Adjust the access counts of the new consumer. */
755 if ((cp2 != NULL) && (cp2->acr || cp2->acw || cp2->ace)) {
756 error = g_access(cp, cp2->acr, cp2->acw, cp2->ace);
757 if (error) {
758 G_VINUM_DEBUG(0, "unable to set access counts"
759 " for consumer on %s", pp->name);
760 g_detach(cp);
761 g_destroy_consumer(cp);
762 return (NULL);
763 }
764 }
765 s->consumer = cp;
922 /* We still have more parity to build. */
923 bp->bio_offset = offset;
924 mtx_lock(&sc->queue_mtx);
925 bioq_disksort(sc->bqueue, bp);
926 mtx_unlock(&sc->queue_mtx);
927 //gv_plex_start(p, bp); /* Send it down to the plex. */
928}
766
929
767 /* Adjust the size of the providers this plex has. */
768 LIST_FOREACH(pp2, &gp->provider, provider)
769 pp2->mediasize = p->size;
930/*
931 * Handle a finished parity write.
932 */
933void
934gv_parity_complete(struct gv_plex *p, struct bio *bp)
935{
936 struct gv_softc *sc;
937 int error, flags;
770
938
771 /* Update the size of the volume this plex is attached to. */
772 if (p->vol_sc != NULL)
773 gv_update_vol_size(p->vol_sc, p->size);
939 error = bp->bio_error;
940 flags = bp->bio_cflags;
941 flags &= ~GV_BIO_MALLOC;
774
942
775 /*
776 * If necessary, create bio queues, queue mutex and a worker
777 * thread.
778 */
779 if (p->bqueue == NULL) {
780 p->bqueue = g_malloc(sizeof(struct bio_queue_head),
781 M_WAITOK | M_ZERO);
782 bioq_init(p->bqueue);
783 }
784 if (p->wqueue == NULL) {
785 p->wqueue = g_malloc(sizeof(struct bio_queue_head),
786 M_WAITOK | M_ZERO);
787 bioq_init(p->wqueue);
788 }
789 if (mtx_initialized(&p->bqueue_mtx) == 0)
790 mtx_init(&p->bqueue_mtx, "gv_plex", NULL, MTX_DEF);
791 if (!(p->flags & GV_PLEX_THREAD_ACTIVE)) {
792 kproc_create(gv_plex_worker, p, NULL, 0, 0, "gv_p %s",
793 p->name);
794 p->flags |= GV_PLEX_THREAD_ACTIVE;
795 }
943 sc = p->vinumconf;
944 KASSERT(sc != NULL, ("gv_parity_complete: NULL sc"));
796
945
797 return (NULL);
946 /* Clean up what we allocated. */
947 if (bp->bio_cflags & GV_BIO_MALLOC)
948 g_free(bp->bio_data);
949 g_destroy_bio(bp);
798
950
799 /* We need to create a new geom. */
951 if (error == EAGAIN) {
952 G_VINUM_DEBUG(0, "parity incorrect at offset 0x%jx",
953 (intmax_t)p->synced);
954 }
955
956 /* Any error is fatal, except EAGAIN when we're rebuilding. */
957 if (error && !(error == EAGAIN && (flags & GV_BIO_PARITY))) {
958 /* Make sure we don't have the lock. */
959 g_topology_assert_not();
960 g_topology_lock();
961 gv_access(p->vol_sc->provider, -1, -1, 0);
962 g_topology_unlock();
963 G_VINUM_DEBUG(0, "parity check on %s failed at 0x%jx "
964 "errno %d", p->name, (intmax_t)p->synced, error);
965 return;
800 } else {
966 } else {
801 gp = g_new_geomf(mp, "%s", p->name);
802 gp->start = gv_plex_start;
803 gp->orphan = gv_plex_orphan;
804 gp->access = gv_plex_access;
805 gp->softc = p;
806 p->geom = gp;
967 p->synced += p->stripesize;
968 }
807
969
808 TAILQ_INIT(&p->packets);
809 p->bqueue = g_malloc(sizeof(struct bio_queue_head),
810 M_WAITOK | M_ZERO);
811 bioq_init(p->bqueue);
812 p->wqueue = g_malloc(sizeof(struct bio_queue_head),
813 M_WAITOK | M_ZERO);
814 bioq_init(p->wqueue);
815 mtx_init(&p->bqueue_mtx, "gv_plex", NULL, MTX_DEF);
816 kproc_create(gv_plex_worker, p, NULL, 0, 0, "gv_p %s",
817 p->name);
818 p->flags |= GV_PLEX_THREAD_ACTIVE;
819
820 /* Attach a consumer to this provider. */
821 cp = g_new_consumer(gp);
822 g_attach(cp, pp);
823 s->consumer = cp;
824
825 /* Create a provider for the outside world. */
826 pp2 = g_new_providerf(gp, "gvinum/plex/%s", p->name);
827 pp2->mediasize = p->size;
828 pp2->sectorsize = pp->sectorsize;
829 p->provider = pp2;
830 g_error_provider(pp2, 0);
831 return (gp);
970 if (p->synced >= p->size) {
971 /* Make sure we don't have the lock. */
972 g_topology_assert_not();
973 g_topology_lock();
974 gv_access(p->vol_sc->provider, -1, -1, 0);
975 g_topology_unlock();
976 /* We're finished. */
977 G_VINUM_DEBUG(1, "parity operation on %s finished", p->name);
978 p->synced = 0;
979 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
980 return;
832 }
981 }
982
983 /* Send down next. It will determine if we need to itself. */
984 gv_parity_request(p, flags, p->synced);
833}
834
985}
986
835static int
836gv_plex_destroy_geom(struct gctl_req *req, struct g_class *mp,
837 struct g_geom *gp)
987/*
988 * Handle a finished plex rebuild bio.
989 */
990void
991gv_rebuild_complete(struct gv_plex *p, struct bio *bp)
838{
992{
839 struct gv_plex *p;
993 struct gv_softc *sc;
994 struct gv_sd *s;
995 int error, flags;
996 off_t offset;
840
997
841 g_trace(G_T_TOPOLOGY, "gv_plex_destroy_geom: %s", gp->name);
842 g_topology_assert();
998 error = bp->bio_error;
999 flags = bp->bio_cflags;
1000 offset = bp->bio_offset;
1001 flags &= ~GV_BIO_MALLOC;
1002 sc = p->vinumconf;
1003 KASSERT(sc != NULL, ("gv_rebuild_complete: NULL sc"));
843
1004
844 p = gp->softc;
1005 /* Clean up what we allocated. */
1006 if (bp->bio_cflags & GV_BIO_MALLOC)
1007 g_free(bp->bio_data);
1008 g_destroy_bio(bp);
845
1009
846 KASSERT(p != NULL, ("gv_plex_destroy_geom: null p of '%s'", gp->name));
1010 if (error) {
1011 g_topology_assert_not();
1012 g_topology_lock();
1013 gv_access(p->vol_sc->provider, -1, -1, 0);
1014 g_topology_unlock();
1015
1016 G_VINUM_DEBUG(0, "rebuild of %s failed at offset %jd errno: %d",
1017 p->name, (intmax_t)offset, error);
1018 p->flags &= ~GV_PLEX_REBUILDING;
1019 p->synced = 0;
1020 gv_plex_flush(p); /* Flush out remaining rebuild BIOs. */
1021 return;
1022 }
847
1023
848 /*
849 * If this is a RAID5 plex, check if its worker thread is still active
850 * and signal it to self destruct.
851 */
852 gv_kill_plex_thread(p);
853 /* g_free(sc); */
854 g_wither_geom(gp, ENXIO);
855 return (0);
856}
1024 offset += (p->stripesize * (gv_sdcount(p, 1) - 1));
1025 if (offset >= p->size) {
1026 /* We're finished. */
1027 g_topology_assert_not();
1028 g_topology_lock();
1029 gv_access(p->vol_sc->provider, -1, -1, 0);
1030 g_topology_unlock();
1031
1032 G_VINUM_DEBUG(1, "rebuild of %s finished", p->name);
1033 gv_save_config(p->vinumconf);
1034 p->flags &= ~GV_PLEX_REBUILDING;
1035 p->synced = 0;
1036 /* Try to up all subdisks. */
1037 LIST_FOREACH(s, &p->subdisks, in_plex)
1038 gv_update_sd_state(s);
1039 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
1040 gv_plex_flush(p); /* Flush out remaining rebuild BIOs. */
1041 return;
1042 }
857
1043
858#define VINUMPLEX_CLASS_NAME "VINUMPLEX"
859
860static struct g_class g_vinum_plex_class = {
861 .name = VINUMPLEX_CLASS_NAME,
862 .version = G_VERSION,
863 .taste = gv_plex_taste,
864 .destroy_geom = gv_plex_destroy_geom,
865};
866
867DECLARE_GEOM_CLASS(g_vinum_plex_class, g_vinum_plex);
1044 /* Send down next. It will determine if we need to itself. */
1045 gv_parity_request(p, flags, offset);
1046}