geom_vinum_plex.c (186517) | geom_vinum_plex.c (190507) |
---|---|
1/*- | 1/*- |
2 * Copyright (c) 2004 Lukas Ertl | 2 * Copyright (c) 2004, 2007 Lukas Ertl 3 * Copyright (c) 2007, 2009 Ulf Lilleengen |
3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright --- 9 unchanged lines hidden (view full) --- 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> | 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright --- 9 unchanged lines hidden (view full) --- 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> |
28__FBSDID("$FreeBSD: head/sys/geom/vinum/geom_vinum_plex.c 186517 2008-12-27 14:32:39Z lulf $"); | 29__FBSDID("$FreeBSD: head/sys/geom/vinum/geom_vinum_plex.c 190507 2009-03-28 17:20:08Z lulf $"); |
29 30#include <sys/param.h> 31#include <sys/bio.h> | 30 31#include <sys/param.h> 32#include <sys/bio.h> |
32#include <sys/kernel.h> 33#include <sys/kthread.h> 34#include <sys/libkern.h> | |
35#include <sys/lock.h> 36#include <sys/malloc.h> | 33#include <sys/lock.h> 34#include <sys/malloc.h> |
37#include <sys/module.h> 38#include <sys/mutex.h> | |
39#include <sys/systm.h> 40 41#include <geom/geom.h> 42#include <geom/vinum/geom_vinum_var.h> 43#include <geom/vinum/geom_vinum_raid5.h> 44#include <geom/vinum/geom_vinum.h> 45 | 35#include <sys/systm.h> 36 37#include <geom/geom.h> 38#include <geom/vinum/geom_vinum_var.h> 39#include <geom/vinum/geom_vinum_raid5.h> 40#include <geom/vinum/geom_vinum.h> 41 |
46static void gv_plex_completed_request(struct gv_plex *, struct bio *); 47static void gv_plex_normal_request(struct gv_plex *, struct bio *); 48static void gv_plex_worker(void *); 49static int gv_check_parity(struct gv_plex *, struct bio *, 50 struct gv_raid5_packet *); 51static int gv_normal_parity(struct gv_plex *, struct bio *, 52 struct gv_raid5_packet *); 53 54/* XXX: is this the place to catch dying subdisks? */ 55static void 56gv_plex_orphan(struct g_consumer *cp) | 42static int gv_check_parity(struct gv_plex *, struct bio *, 43 struct gv_raid5_packet *); 44static int gv_normal_parity(struct gv_plex *, struct bio *, 45 struct gv_raid5_packet *); 46static void gv_plex_flush(struct gv_plex *); 47static int gv_plex_offset(struct gv_plex *, off_t, off_t, off_t *, off_t *, 48 int *, int); 49static int gv_plex_normal_request(struct gv_plex *, struct bio *, off_t, 50 off_t, caddr_t); 51void 52gv_plex_start(struct gv_plex *p, struct bio *bp) |
57{ | 53{ |
58 struct g_geom *gp; 59 struct gv_plex *p; 60 int error; | 54 struct bio *cbp; 55 struct gv_sd *s; 56 struct gv_raid5_packet *wp; 57 caddr_t addr; 58 off_t bcount, boff, len; |
61 | 59 |
62 g_topology_assert(); 63 gp = cp->geom; 64 g_trace(G_T_TOPOLOGY, "gv_plex_orphan(%s)", gp->name); | 60 bcount = bp->bio_length; 61 addr = bp->bio_data; 62 boff = bp->bio_offset; |
65 | 63 |
66 if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0) 67 g_access(cp, -cp->acr, -cp->acw, -cp->ace); 68 error = cp->provider->error; 69 if (error == 0) 70 error = ENXIO; 71 g_detach(cp); 72 g_destroy_consumer(cp); 73 if (!LIST_EMPTY(&gp->consumer)) 74 return; | 64 /* Walk over the whole length of the request, we might split it up. */ 65 while (bcount > 0) { 66 wp = NULL; |
75 | 67 |
76 p = gp->softc; 77 if (p != NULL) { 78 gv_kill_plex_thread(p); 79 p->geom = NULL; 80 p->provider = NULL; 81 p->consumer = NULL; 82 } 83 gp->softc = NULL; 84 g_wither_geom(gp, error); 85} | 68 /* 69 * RAID5 plexes need special treatment, as a single request 70 * might involve several read/write sub-requests. 71 */ 72 if (p->org == GV_PLEX_RAID5) { 73 wp = gv_raid5_start(p, bp, addr, boff, bcount); 74 if (wp == NULL) 75 return; 76 77 len = wp->length; |
86 | 78 |
87void 88gv_plex_done(struct bio *bp) 89{ 90 struct gv_plex *p; | 79 if (TAILQ_EMPTY(&wp->bits)) 80 g_free(wp); 81 else if (wp->lockbase != -1) 82 TAILQ_INSERT_TAIL(&p->packets, wp, list); |
91 | 83 |
92 p = bp->bio_from->geom->softc; 93 bp->bio_cflags |= GV_BIO_DONE; 94 mtx_lock(&p->bqueue_mtx); 95 bioq_insert_tail(p->bqueue, bp); 96 wakeup(p); 97 mtx_unlock(&p->bqueue_mtx); | 84 /* 85 * Requests to concatenated and striped plexes go straight 86 * through. 87 */ 88 } else { 89 len = gv_plex_normal_request(p, bp, boff, bcount, addr); 90 } 91 if (len < 0) 92 return; 93 94 bcount -= len; 95 addr += len; 96 boff += len; 97 } 98 99 /* 100 * Fire off all sub-requests. We get the correct consumer (== drive) 101 * to send each request to via the subdisk that was stored in 102 * cbp->bio_caller1. 103 */ 104 cbp = bioq_takefirst(p->bqueue); 105 while (cbp != NULL) { 106 /* 107 * RAID5 sub-requests need to come in correct order, otherwise 108 * we trip over the parity, as it might be overwritten by 109 * another sub-request. We abuse cbp->bio_caller2 to mark 110 * potential overlap situations. 111 */ 112 if (cbp->bio_caller2 != NULL && gv_stripe_active(p, cbp)) { 113 /* Park the bio on the waiting queue. */ 114 cbp->bio_cflags |= GV_BIO_ONHOLD; 115 bioq_disksort(p->wqueue, cbp); 116 } else { 117 s = cbp->bio_caller1; 118 g_io_request(cbp, s->drive_sc->consumer); 119 } 120 cbp = bioq_takefirst(p->bqueue); 121 } |
98} 99 | 122} 123 |
100/* Find the correct subdisk to send the bio to and build a bio to send. */ | |
101static int | 124static int |
102gv_plexbuffer(struct gv_plex *p, struct bio *bp, caddr_t addr, off_t boff, off_t bcount) | 125gv_plex_offset(struct gv_plex *p, off_t boff, off_t bcount, off_t *real_off, 126 off_t *real_len, int *sdno, int growing) |
103{ | 127{ |
104 struct g_geom *gp; | |
105 struct gv_sd *s; | 128 struct gv_sd *s; |
106 struct bio *cbp, *pbp; 107 int i, sdno; 108 off_t len_left, real_len, real_off; 109 off_t stripeend, stripeno, stripestart; | 129 int i, sdcount; 130 off_t len_left, stripeend, stripeno, stripestart; |
110 | 131 |
111 if (p == NULL || LIST_EMPTY(&p->subdisks)) 112 return (ENXIO); 113 114 s = NULL; 115 gp = bp->bio_to->geom; 116 117 /* 118 * We only handle concatenated and striped plexes here. RAID5 plexes 119 * are handled in build_raid5_request(). 120 */ | |
121 switch (p->org) { 122 case GV_PLEX_CONCAT: 123 /* 124 * Find the subdisk where this request starts. The subdisks in 125 * this list must be ordered by plex_offset. 126 */ | 132 switch (p->org) { 133 case GV_PLEX_CONCAT: 134 /* 135 * Find the subdisk where this request starts. The subdisks in 136 * this list must be ordered by plex_offset. 137 */ |
138 i = 0; |
|
127 LIST_FOREACH(s, &p->subdisks, in_plex) { 128 if (s->plex_offset <= boff && | 139 LIST_FOREACH(s, &p->subdisks, in_plex) { 140 if (s->plex_offset <= boff && |
129 s->plex_offset + s->size > boff) | 141 s->plex_offset + s->size > boff) { 142 *sdno = i; |
130 break; | 143 break; |
144 } 145 i++; |
|
131 } | 146 } |
132 /* Subdisk not found. */ 133 if (s == NULL) 134 return (ENXIO); | 147 if (s == NULL || s->drive_sc == NULL) 148 return (GV_ERR_NOTFOUND); |
135 136 /* Calculate corresponding offsets on disk. */ | 149 150 /* Calculate corresponding offsets on disk. */ |
137 real_off = boff - s->plex_offset; 138 len_left = s->size - real_off; 139 real_len = (bcount > len_left) ? len_left : bcount; | 151 *real_off = boff - s->plex_offset; 152 len_left = s->size - (*real_off); 153 KASSERT(len_left >= 0, ("gv_plex_offset: len_left < 0")); 154 *real_len = (bcount > len_left) ? len_left : bcount; |
140 break; 141 142 case GV_PLEX_STRIPED: 143 /* The number of the stripe where the request starts. */ 144 stripeno = boff / p->stripesize; | 155 break; 156 157 case GV_PLEX_STRIPED: 158 /* The number of the stripe where the request starts. */ 159 stripeno = boff / p->stripesize; |
160 KASSERT(stripeno >= 0, ("gv_plex_offset: stripeno < 0")); |
|
145 | 161 |
146 /* The number of the subdisk where the stripe resides. */ 147 sdno = stripeno % p->sdcount; | 162 /* Take growing subdisks into account when calculating. */ 163 sdcount = gv_sdcount(p, (boff >= p->synced)); |
148 | 164 |
149 /* Find the right subdisk. */ 150 i = 0; 151 LIST_FOREACH(s, &p->subdisks, in_plex) { 152 if (i == sdno) 153 break; 154 i++; 155 } | 165 if (!(boff + bcount <= p->synced) && 166 (p->flags & GV_PLEX_GROWING) && 167 !growing) 168 return (GV_ERR_ISBUSY); 169 *sdno = stripeno % sdcount; |
156 | 170 |
157 /* Subdisk not found. */ 158 if (s == NULL) 159 return (ENXIO); 160 161 /* The offset of the stripe from the start of the subdisk. */ 162 stripestart = (stripeno / p->sdcount) * | 171 KASSERT(sdno >= 0, ("gv_plex_offset: sdno < 0")); 172 stripestart = (stripeno / sdcount) * |
163 p->stripesize; | 173 p->stripesize; |
164 165 /* The offset at the end of the stripe. */ | 174 KASSERT(stripestart >= 0, ("gv_plex_offset: stripestart < 0")); |
166 stripeend = stripestart + p->stripesize; | 175 stripeend = stripestart + p->stripesize; |
167 168 /* The offset of the request on this subdisk. */ 169 real_off = boff - (stripeno * p->stripesize) + | 176 *real_off = boff - (stripeno * p->stripesize) + |
170 stripestart; | 177 stripestart; |
178 len_left = stripeend - *real_off; 179 KASSERT(len_left >= 0, ("gv_plex_offset: len_left < 0")); |
|
171 | 180 |
172 /* The length left in this stripe. */ 173 len_left = stripeend - real_off; 174 175 real_len = (bcount <= len_left) ? bcount : len_left; | 181 *real_len = (bcount <= len_left) ? bcount : len_left; |
176 break; 177 178 default: | 182 break; 183 184 default: |
179 return (EINVAL); | 185 return (GV_ERR_PLEXORG); |
180 } | 186 } |
187 return (0); 188} |
|
181 | 189 |
190/* 191 * Prepare a normal plex request. 192 */ 193static int 194gv_plex_normal_request(struct gv_plex *p, struct bio *bp, off_t boff, 195 off_t bcount, caddr_t addr) 196{ 197 struct gv_sd *s; 198 struct bio *cbp; 199 off_t real_len, real_off; 200 int i, err, sdno; 201 202 s = NULL; 203 sdno = -1; 204 real_len = real_off = 0; 205 206 err = ENXIO; 207 208 if (p == NULL || LIST_EMPTY(&p->subdisks)) 209 goto bad; 210 211 err = gv_plex_offset(p, boff, bcount, &real_off, 212 &real_len, &sdno, (bp->bio_pflags & GV_BIO_SYNCREQ)); 213 /* If the request was blocked, put it into wait. */ 214 if (err == GV_ERR_ISBUSY) { 215 bioq_disksort(p->rqueue, bp); 216 return (-1); /* "Fail", and delay request. */ 217 } 218 if (err) { 219 err = ENXIO; 220 goto bad; 221 } 222 err = ENXIO; 223 224 /* Find the right subdisk. */ 225 i = 0; 226 LIST_FOREACH(s, &p->subdisks, in_plex) { 227 if (i == sdno) 228 break; 229 i++; 230 } 231 232 /* Subdisk not found. */ 233 if (s == NULL || s->drive_sc == NULL) 234 goto bad; 235 |
|
182 /* Now check if we can handle the request on this subdisk. */ 183 switch (s->state) { 184 case GV_SD_UP: 185 /* If the subdisk is up, just continue. */ 186 break; | 236 /* Now check if we can handle the request on this subdisk. */ 237 switch (s->state) { 238 case GV_SD_UP: 239 /* If the subdisk is up, just continue. */ 240 break; |
187 | 241 case GV_SD_DOWN: 242 if (bp->bio_cflags & GV_BIO_INTERNAL) 243 G_VINUM_DEBUG(0, "subdisk must be in the stale state in" 244 " order to perform administrative requests"); 245 goto bad; |
188 case GV_SD_STALE: | 246 case GV_SD_STALE: |
189 if (!(bp->bio_cflags & GV_BIO_SYNCREQ)) 190 return (ENXIO); | 247 if (!(bp->bio_cflags & GV_BIO_SYNCREQ)) { 248 G_VINUM_DEBUG(0, "subdisk stale, unable to perform " 249 "regular requests"); 250 goto bad; 251 } |
191 192 G_VINUM_DEBUG(1, "sd %s is initializing", s->name); 193 gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE); 194 break; | 252 253 G_VINUM_DEBUG(1, "sd %s is initializing", s->name); 254 gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE); 255 break; |
195 | |
196 case GV_SD_INITIALIZING: 197 if (bp->bio_cmd == BIO_READ) | 256 case GV_SD_INITIALIZING: 257 if (bp->bio_cmd == BIO_READ) |
198 return (ENXIO); | 258 goto bad; |
199 break; | 259 break; |
200 | |
201 default: 202 /* All other subdisk states mean it's not accessible. */ | 260 default: 261 /* All other subdisk states mean it's not accessible. */ |
203 return (ENXIO); | 262 goto bad; |
204 } 205 206 /* Clone the bio and adjust the offsets and sizes. */ 207 cbp = g_clone_bio(bp); | 263 } 264 265 /* Clone the bio and adjust the offsets and sizes. */ 266 cbp = g_clone_bio(bp); |
208 if (cbp == NULL) 209 return (ENOMEM); 210 cbp->bio_offset = real_off; | 267 if (cbp == NULL) { 268 err = ENOMEM; 269 goto bad; 270 } 271 cbp->bio_offset = real_off + s->drive_offset; |
211 cbp->bio_length = real_len; 212 cbp->bio_data = addr; | 272 cbp->bio_length = real_len; 273 cbp->bio_data = addr; |
213 cbp->bio_done = g_std_done; 214 cbp->bio_caller2 = s->consumer; 215 if ((bp->bio_cflags & GV_BIO_SYNCREQ)) { | 274 cbp->bio_done = gv_done; 275 cbp->bio_caller1 = s; 276 if ((bp->bio_cflags & GV_BIO_SYNCREQ)) |
216 cbp->bio_cflags |= GV_BIO_SYNCREQ; | 277 cbp->bio_cflags |= GV_BIO_SYNCREQ; |
217 cbp->bio_done = gv_plex_done; 218 } | |
219 | 278 |
220 if (bp->bio_driver1 == NULL) { 221 bp->bio_driver1 = cbp; 222 } else { 223 pbp = bp->bio_driver1; 224 while (pbp->bio_caller1 != NULL) 225 pbp = pbp->bio_caller1; 226 pbp->bio_caller1 = cbp; | 279 /* Store the sub-requests now and let others issue them. */ 280 bioq_insert_tail(p->bqueue, cbp); 281 return (real_len); 282bad: 283 G_VINUM_LOGREQ(0, bp, "plex request failed."); 284 /* Building the sub-request failed. If internal BIO, do not deliver. */ 285 if (bp->bio_cflags & GV_BIO_INTERNAL) { 286 if (bp->bio_cflags & GV_BIO_MALLOC) 287 g_free(bp->bio_data); 288 g_destroy_bio(bp); 289 p->flags &= ~(GV_PLEX_SYNCING | GV_PLEX_REBUILDING | 290 GV_PLEX_GROWING); 291 return (-1); |
227 } | 292 } |
228 229 return (0); | 293 g_io_deliver(bp, err); 294 return (-1); |
230} 231 | 295} 296 |
232static void 233gv_plex_start(struct bio *bp) | 297/* 298 * Handle a completed request to a striped or concatenated plex. 299 */ 300void 301gv_plex_normal_done(struct gv_plex *p, struct bio *bp) |
234{ | 302{ |
235 struct gv_plex *p; | 303 struct bio *pbp; |
236 | 304 |
237 switch(bp->bio_cmd) { 238 case BIO_READ: 239 case BIO_WRITE: 240 case BIO_DELETE: 241 break; 242 case BIO_GETATTR: 243 default: 244 g_io_deliver(bp, EOPNOTSUPP); 245 return; | 305 pbp = bp->bio_parent; 306 if (pbp->bio_error == 0) 307 pbp->bio_error = bp->bio_error; 308 g_destroy_bio(bp); 309 pbp->bio_inbed++; 310 if (pbp->bio_children == pbp->bio_inbed) { 311 /* Just set it to length since multiple plexes will 312 * screw things up. */ 313 pbp->bio_completed = pbp->bio_length; 314 if (pbp->bio_cflags & GV_BIO_SYNCREQ) 315 gv_sync_complete(p, pbp); 316 else if (pbp->bio_pflags & GV_BIO_SYNCREQ) 317 gv_grow_complete(p, pbp); 318 else 319 g_io_deliver(pbp, pbp->bio_error); |
246 } | 320 } |
247 248 /* 249 * We cannot handle this request if too many of our subdisks are 250 * inaccessible. 251 */ 252 p = bp->bio_to->geom->softc; 253 if ((p->state < GV_PLEX_DEGRADED) && 254 !(bp->bio_cflags & GV_BIO_SYNCREQ)) { 255 g_io_deliver(bp, ENXIO); 256 return; 257 } 258 259 mtx_lock(&p->bqueue_mtx); 260 bioq_disksort(p->bqueue, bp); 261 wakeup(p); 262 mtx_unlock(&p->bqueue_mtx); | |
263} 264 | 321} 322 |
265static void 266gv_plex_worker(void *arg) | 323/* 324 * Handle a completed request to a RAID-5 plex. 325 */ 326void 327gv_plex_raid5_done(struct gv_plex *p, struct bio *bp) |
267{ | 328{ |
268 struct bio *bp; 269 struct gv_plex *p; 270 struct gv_sd *s; | 329 struct gv_softc *sc; 330 struct bio *cbp, *pbp; 331 struct gv_bioq *bq, *bq2; 332 struct gv_raid5_packet *wp; 333 off_t completed; 334 int i; |
271 | 335 |
272 p = arg; 273 KASSERT(p != NULL, ("NULL p")); | 336 completed = 0; 337 sc = p->vinumconf; 338 wp = bp->bio_caller2; |
274 | 339 |
275 mtx_lock(&p->bqueue_mtx); 276 for (;;) { 277 /* We were signaled to exit. */ 278 if (p->flags & GV_PLEX_THREAD_DIE) | 340 switch (bp->bio_parent->bio_cmd) { 341 case BIO_READ: 342 if (wp == NULL) { 343 completed = bp->bio_completed; |
279 break; | 344 break; |
280 281 /* Take the first BIO from our queue. */ 282 bp = bioq_takefirst(p->bqueue); 283 if (bp == NULL) { 284 msleep(p, &p->bqueue_mtx, PRIBIO, "-", hz/10); 285 continue; | |
286 } | 345 } |
287 mtx_unlock(&p->bqueue_mtx); | |
288 | 346 |
289 /* A completed request. */ 290 if (bp->bio_cflags & GV_BIO_DONE) { 291 if (bp->bio_cflags & GV_BIO_SYNCREQ || 292 bp->bio_cflags & GV_BIO_REBUILD) { 293 s = bp->bio_to->private; 294 if (bp->bio_error == 0) 295 s->initialized += bp->bio_length; 296 if (s->initialized >= s->size) { 297 g_topology_lock(); 298 gv_set_sd_state(s, GV_SD_UP, 299 GV_SETSTATE_CONFIG); 300 g_topology_unlock(); 301 s->initialized = 0; | 347 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) { 348 if (bq->bp != bp) 349 continue; 350 TAILQ_REMOVE(&wp->bits, bq, queue); 351 g_free(bq); 352 for (i = 0; i < wp->length; i++) 353 wp->data[i] ^= bp->bio_data[i]; 354 break; 355 } 356 if (TAILQ_EMPTY(&wp->bits)) { 357 completed = wp->length; 358 if (wp->lockbase != -1) { 359 TAILQ_REMOVE(&p->packets, wp, list); 360 /* Bring the waiting bios back into the game. */ 361 pbp = bioq_takefirst(p->wqueue); 362 while (pbp != NULL) { 363 mtx_lock(&sc->queue_mtx); 364 bioq_disksort(sc->bqueue, pbp); 365 mtx_unlock(&sc->queue_mtx); 366 pbp = bioq_takefirst(p->wqueue); |
302 } 303 } | 367 } 368 } |
369 g_free(wp); 370 } |
|
304 | 371 |
305 if (bp->bio_cflags & GV_BIO_SYNCREQ) 306 g_std_done(bp); 307 else 308 gv_plex_completed_request(p, bp); 309 /* 310 * A sub-request that was hold back because it interfered with 311 * another sub-request. 312 */ 313 } else if (bp->bio_cflags & GV_BIO_ONHOLD) { 314 /* Is it still locked out? */ 315 if (gv_stripe_active(p, bp)) { 316 /* Park the bio on the waiting queue. */ 317 mtx_lock(&p->bqueue_mtx); 318 bioq_disksort(p->wqueue, bp); 319 mtx_unlock(&p->bqueue_mtx); 320 } else { 321 bp->bio_cflags &= ~GV_BIO_ONHOLD; 322 g_io_request(bp, bp->bio_caller2); 323 } | 372 break; |
324 | 373 |
325 /* A normal request to this plex. */ 326 } else 327 gv_plex_normal_request(p, bp); | 374 case BIO_WRITE: 375 /* XXX can this ever happen? */ 376 if (wp == NULL) { 377 completed = bp->bio_completed; 378 break; 379 } |
328 | 380 |
329 mtx_lock(&p->bqueue_mtx); 330 } 331 mtx_unlock(&p->bqueue_mtx); 332 p->flags |= GV_PLEX_THREAD_DEAD; 333 wakeup(p); | 381 /* Check if we need to handle parity data. */ 382 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) { 383 if (bq->bp != bp) 384 continue; 385 TAILQ_REMOVE(&wp->bits, bq, queue); 386 g_free(bq); 387 cbp = wp->parity; 388 if (cbp != NULL) { 389 for (i = 0; i < wp->length; i++) 390 cbp->bio_data[i] ^= bp->bio_data[i]; 391 } 392 break; 393 } |
334 | 394 |
335 kproc_exit(ENXIO); 336} | 395 /* Handle parity data. */ 396 if (TAILQ_EMPTY(&wp->bits)) { 397 if (bp->bio_parent->bio_cflags & GV_BIO_CHECK) 398 i = gv_check_parity(p, bp, wp); 399 else 400 i = gv_normal_parity(p, bp, wp); |
337 | 401 |
338static int 339gv_normal_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp) 340{ 341 struct bio *cbp, *pbp; 342 int finished, i; | 402 /* All of our sub-requests have finished. */ 403 if (i) { 404 completed = wp->length; 405 TAILQ_REMOVE(&p->packets, wp, list); 406 /* Bring the waiting bios back into the game. */ 407 pbp = bioq_takefirst(p->wqueue); 408 while (pbp != NULL) { 409 mtx_lock(&sc->queue_mtx); 410 bioq_disksort(sc->bqueue, pbp); 411 mtx_unlock(&sc->queue_mtx); 412 pbp = bioq_takefirst(p->wqueue); 413 } 414 g_free(wp); 415 } 416 } |
343 | 417 |
344 finished = 1; | 418 break; 419 } |
345 | 420 |
346 if (wp->waiting != NULL) { 347 pbp = wp->waiting; 348 wp->waiting = NULL; 349 cbp = wp->parity; 350 for (i = 0; i < wp->length; i++) 351 cbp->bio_data[i] ^= pbp->bio_data[i]; 352 g_io_request(pbp, pbp->bio_caller2); 353 finished = 0; | 421 pbp = bp->bio_parent; 422 if (pbp->bio_error == 0) 423 pbp->bio_error = bp->bio_error; 424 pbp->bio_completed += completed; |
354 | 425 |
355 } else if (wp->parity != NULL) { 356 cbp = wp->parity; 357 wp->parity = NULL; 358 g_io_request(cbp, cbp->bio_caller2); 359 finished = 0; | 426 /* When the original request is finished, we deliver it. */ 427 pbp->bio_inbed++; 428 if (pbp->bio_inbed == pbp->bio_children) { 429 /* Hand it over for checking or delivery. */ 430 if (pbp->bio_cmd == BIO_WRITE && 431 (pbp->bio_cflags & GV_BIO_CHECK)) { 432 gv_parity_complete(p, pbp); 433 } else if (pbp->bio_cmd == BIO_WRITE && 434 (pbp->bio_cflags & GV_BIO_REBUILD)) { 435 gv_rebuild_complete(p, pbp); 436 } else if (pbp->bio_cflags & GV_BIO_INIT) { 437 gv_init_complete(p, pbp); 438 } else if (pbp->bio_cflags & GV_BIO_SYNCREQ) { 439 gv_sync_complete(p, pbp); 440 } else if (pbp->bio_pflags & GV_BIO_SYNCREQ) { 441 gv_grow_complete(p, pbp); 442 } else { 443 g_io_deliver(pbp, pbp->bio_error); 444 } |
360 } 361 | 445 } 446 |
362 return (finished); | 447 /* Clean up what we allocated. */ 448 if (bp->bio_cflags & GV_BIO_MALLOC) 449 g_free(bp->bio_data); 450 g_destroy_bio(bp); |
363} 364 365static int 366gv_check_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp) 367{ 368 struct bio *pbp; | 451} 452 453static int 454gv_check_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp) 455{ 456 struct bio *pbp; |
457 struct gv_sd *s; |
|
369 int err, finished, i; 370 371 err = 0; 372 finished = 1; 373 374 if (wp->waiting != NULL) { 375 pbp = wp->waiting; 376 wp->waiting = NULL; | 458 int err, finished, i; 459 460 err = 0; 461 finished = 1; 462 463 if (wp->waiting != NULL) { 464 pbp = wp->waiting; 465 wp->waiting = NULL; |
377 g_io_request(pbp, pbp->bio_caller2); | 466 s = pbp->bio_caller1; 467 g_io_request(pbp, s->drive_sc->consumer); |
378 finished = 0; 379 380 } else if (wp->parity != NULL) { 381 pbp = wp->parity; 382 wp->parity = NULL; 383 384 /* Check if the parity is correct. */ 385 for (i = 0; i < wp->length; i++) { --- 4 unchanged lines hidden (view full) --- 390 } 391 392 /* The parity is not correct... */ 393 if (err) { 394 bp->bio_parent->bio_error = EAGAIN; 395 396 /* ... but we rebuild it. */ 397 if (bp->bio_parent->bio_cflags & GV_BIO_PARITY) { | 468 finished = 0; 469 470 } else if (wp->parity != NULL) { 471 pbp = wp->parity; 472 wp->parity = NULL; 473 474 /* Check if the parity is correct. */ 475 for (i = 0; i < wp->length; i++) { --- 4 unchanged lines hidden (view full) --- 480 } 481 482 /* The parity is not correct... */ 483 if (err) { 484 bp->bio_parent->bio_error = EAGAIN; 485 486 /* ... but we rebuild it. */ 487 if (bp->bio_parent->bio_cflags & GV_BIO_PARITY) { |
398 g_io_request(pbp, pbp->bio_caller2); | 488 s = pbp->bio_caller1; 489 g_io_request(pbp, s->drive_sc->consumer); |
399 finished = 0; 400 } 401 } 402 403 /* 404 * Clean up the BIO we would have used for rebuilding the 405 * parity. 406 */ 407 if (finished) { 408 bp->bio_parent->bio_inbed++; 409 g_destroy_bio(pbp); 410 } 411 412 } 413 414 return (finished); 415} 416 | 490 finished = 0; 491 } 492 } 493 494 /* 495 * Clean up the BIO we would have used for rebuilding the 496 * parity. 497 */ 498 if (finished) { 499 bp->bio_parent->bio_inbed++; 500 g_destroy_bio(pbp); 501 } 502 503 } 504 505 return (finished); 506} 507 |
417void 418gv_plex_completed_request(struct gv_plex *p, struct bio *bp) | 508static int 509gv_normal_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp) |
419{ 420 struct bio *cbp, *pbp; | 510{ 511 struct bio *cbp, *pbp; |
421 struct gv_bioq *bq, *bq2; 422 struct gv_raid5_packet *wp; 423 int i; | 512 struct gv_sd *s; 513 int finished, i; |
424 | 514 |
425 wp = bp->bio_driver1; | 515 finished = 1; |
426 | 516 |
427 switch (bp->bio_parent->bio_cmd) { 428 case BIO_READ: 429 if (wp == NULL) 430 break; | 517 if (wp->waiting != NULL) { 518 pbp = wp->waiting; 519 wp->waiting = NULL; 520 cbp = wp->parity; 521 for (i = 0; i < wp->length; i++) 522 cbp->bio_data[i] ^= pbp->bio_data[i]; 523 s = pbp->bio_caller1; 524 g_io_request(pbp, s->drive_sc->consumer); 525 finished = 0; |
431 | 526 |
432 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) { 433 if (bq->bp == bp) { 434 TAILQ_REMOVE(&wp->bits, bq, queue); 435 g_free(bq); 436 for (i = 0; i < wp->length; i++) 437 wp->data[i] ^= bp->bio_data[i]; 438 break; 439 } 440 } 441 if (TAILQ_EMPTY(&wp->bits)) { 442 bp->bio_parent->bio_completed += wp->length; 443 if (wp->lockbase != -1) { 444 TAILQ_REMOVE(&p->packets, wp, list); 445 /* Bring the waiting bios back into the game. */ 446 mtx_lock(&p->bqueue_mtx); 447 pbp = bioq_takefirst(p->wqueue); 448 while (pbp != NULL) { 449 bioq_disksort(p->bqueue, pbp); 450 pbp = bioq_takefirst(p->wqueue); 451 } 452 mtx_unlock(&p->bqueue_mtx); 453 } 454 g_free(wp); 455 } | 527 } else if (wp->parity != NULL) { 528 cbp = wp->parity; 529 wp->parity = NULL; 530 s = cbp->bio_caller1; 531 g_io_request(cbp, s->drive_sc->consumer); 532 finished = 0; 533 } |
456 | 534 |
457 break; | 535 return (finished); 536} |
458 | 537 |
459 case BIO_WRITE: 460 if (wp == NULL) 461 break; | 538/* Flush the queue with delayed requests. */ 539static void 540gv_plex_flush(struct gv_plex *p) 541{ 542 struct gv_softc *sc; 543 struct bio *bp; |
462 | 544 |
463 /* Check if we need to handle parity data. */ 464 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) { 465 if (bq->bp == bp) { 466 TAILQ_REMOVE(&wp->bits, bq, queue); 467 g_free(bq); 468 cbp = wp->parity; 469 if (cbp != NULL) { 470 for (i = 0; i < wp->length; i++) 471 cbp->bio_data[i] ^= 472 bp->bio_data[i]; 473 } 474 break; 475 } 476 } | 545 sc = p->vinumconf; 546 bp = bioq_takefirst(p->rqueue); 547 while (bp != NULL) { 548 gv_plex_start(p, bp); 549 bp = bioq_takefirst(p->rqueue); 550 } 551} |
477 | 552 |
478 /* Handle parity data. */ 479 if (TAILQ_EMPTY(&wp->bits)) { 480 if (bp->bio_parent->bio_cflags & GV_BIO_CHECK) 481 i = gv_check_parity(p, bp, wp); 482 else 483 i = gv_normal_parity(p, bp, wp); | 553int 554gv_sync_request(struct gv_plex *from, struct gv_plex *to, off_t offset, 555 off_t length, int type, caddr_t data) 556{ 557 struct gv_softc *sc; 558 struct bio *bp; |
484 | 559 |
485 /* All of our sub-requests have finished. */ 486 if (i) { 487 bp->bio_parent->bio_completed += wp->length; 488 TAILQ_REMOVE(&p->packets, wp, list); 489 /* Bring the waiting bios back into the game. */ 490 mtx_lock(&p->bqueue_mtx); 491 pbp = bioq_takefirst(p->wqueue); 492 while (pbp != NULL) { 493 bioq_disksort(p->bqueue, pbp); 494 pbp = bioq_takefirst(p->wqueue); 495 } 496 mtx_unlock(&p->bqueue_mtx); 497 g_free(wp); 498 } 499 } | 560 KASSERT(from != NULL, ("NULL from")); 561 KASSERT(to != NULL, ("NULL to")); 562 sc = from->vinumconf; 563 KASSERT(sc != NULL, ("NULL sc")); |
500 | 564 |
501 break; | 565 bp = g_new_bio(); 566 if (bp == NULL) { 567 G_VINUM_DEBUG(0, "sync from '%s' failed at offset " 568 " %jd; out of memory", from->name, offset); 569 return (ENOMEM); |
502 } | 570 } |
571 bp->bio_length = length; 572 bp->bio_done = gv_done; 573 bp->bio_cflags |= GV_BIO_SYNCREQ; 574 bp->bio_offset = offset; 575 bp->bio_caller1 = from; 576 bp->bio_caller2 = to; 577 bp->bio_cmd = type; 578 if (data == NULL) 579 data = g_malloc(length, M_WAITOK); 580 bp->bio_cflags |= GV_BIO_MALLOC; /* Free on the next run. */ 581 bp->bio_data = data; |
|
503 | 582 |
504 pbp = bp->bio_parent; 505 if (pbp->bio_error == 0) 506 pbp->bio_error = bp->bio_error; 507 508 /* When the original request is finished, we deliver it. */ 509 pbp->bio_inbed++; 510 if (pbp->bio_inbed == pbp->bio_children) 511 g_io_deliver(pbp, pbp->bio_error); 512 513 /* Clean up what we allocated. */ 514 if (bp->bio_cflags & GV_BIO_MALLOC) 515 g_free(bp->bio_data); 516 g_destroy_bio(bp); | 583 /* Send down next. */ 584 mtx_lock(&sc->queue_mtx); 585 bioq_disksort(sc->bqueue, bp); 586 mtx_unlock(&sc->queue_mtx); 587 //gv_plex_start(from, bp); 588 return (0); |
517} 518 | 589} 590 |
519void 520gv_plex_normal_request(struct gv_plex *p, struct bio *bp) | 591/* 592 * Handle a finished plex sync bio. 593 */ 594int 595gv_sync_complete(struct gv_plex *to, struct bio *bp) |
521{ | 596{ |
522 struct bio *cbp, *pbp; 523 struct gv_bioq *bq, *bq2; 524 struct gv_raid5_packet *wp, *wp2; 525 caddr_t addr; 526 off_t bcount, boff; | 597 struct gv_plex *from, *p; 598 struct gv_sd *s; 599 struct gv_volume *v; 600 struct gv_softc *sc; 601 off_t offset; |
527 int err; 528 | 602 int err; 603 |
529 bcount = bp->bio_length; 530 addr = bp->bio_data; 531 boff = bp->bio_offset; | 604 g_topology_assert_not(); |
532 | 605 |
533 /* Walk over the whole length of the request, we might split it up. */ 534 while (bcount > 0) { 535 wp = NULL; | 606 err = 0; 607 KASSERT(to != NULL, ("NULL to")); 608 KASSERT(bp != NULL, ("NULL bp")); 609 from = bp->bio_caller2; 610 KASSERT(from != NULL, ("NULL from")); 611 v = to->vol_sc; 612 KASSERT(v != NULL, ("NULL v")); 613 sc = v->vinumconf; 614 KASSERT(sc != NULL, ("NULL sc")); |
536 | 615 |
537 /* 538 * RAID5 plexes need special treatment, as a single write 539 * request involves several read/write sub-requests. 540 */ 541 if (p->org == GV_PLEX_RAID5) { 542 wp = g_malloc(sizeof(*wp), M_WAITOK | M_ZERO); 543 wp->bio = bp; 544 TAILQ_INIT(&wp->bits); | 616 /* If it was a read, write it. */ 617 if (bp->bio_cmd == BIO_READ) { 618 err = gv_sync_request(from, to, bp->bio_offset, bp->bio_length, 619 BIO_WRITE, bp->bio_data); 620 /* If it was a write, read the next one. */ 621 } else if (bp->bio_cmd == BIO_WRITE) { 622 if (bp->bio_cflags & GV_BIO_MALLOC) 623 g_free(bp->bio_data); 624 to->synced += bp->bio_length; 625 /* If we're finished, clean up. */ 626 if (bp->bio_offset + bp->bio_length >= from->size) { 627 G_VINUM_DEBUG(1, "syncing of %s from %s completed", 628 to->name, from->name); 629 /* Update our state. */ 630 LIST_FOREACH(s, &to->subdisks, in_plex) 631 gv_set_sd_state(s, GV_SD_UP, 0); 632 gv_update_plex_state(to); 633 to->flags &= ~GV_PLEX_SYNCING; 634 to->synced = 0; 635 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 636 } else { 637 offset = bp->bio_offset + bp->bio_length; 638 err = gv_sync_request(from, to, offset, 639 MIN(bp->bio_length, from->size - offset), 640 BIO_READ, NULL); 641 } 642 } 643 g_destroy_bio(bp); 644 /* Clean up if there was an error. */ 645 if (err) { 646 to->flags &= ~GV_PLEX_SYNCING; 647 G_VINUM_DEBUG(0, "error syncing plexes: error code %d", err); 648 } |
545 | 649 |
546 if (bp->bio_cflags & GV_BIO_REBUILD) 547 err = gv_rebuild_raid5(p, wp, bp, addr, 548 boff, bcount); 549 else if (bp->bio_cflags & GV_BIO_CHECK) 550 err = gv_check_raid5(p, wp, bp, addr, 551 boff, bcount); 552 else 553 err = gv_build_raid5_req(p, wp, bp, addr, 554 boff, bcount); | 650 /* Check if all plexes are synced, and lower refcounts. */ 651 g_topology_lock(); 652 LIST_FOREACH(p, &v->plexes, in_volume) { 653 if (p->flags & GV_PLEX_SYNCING) { 654 g_topology_unlock(); 655 return (-1); 656 } 657 } 658 /* If we came here, all plexes are synced, and we're free. */ 659 gv_access(v->provider, -1, -1, 0); 660 g_topology_unlock(); 661 G_VINUM_DEBUG(1, "plex sync completed"); 662 gv_volume_flush(v); 663 return (0); 664} |
555 | 665 |
556 /* 557 * Building the sub-request failed, we probably need to 558 * clean up a lot. 559 */ 560 if (err) { 561 G_VINUM_LOGREQ(0, bp, "plex request failed."); 562 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) { 563 TAILQ_REMOVE(&wp->bits, bq, queue); 564 g_free(bq); 565 } 566 if (wp->waiting != NULL) { 567 if (wp->waiting->bio_cflags & 568 GV_BIO_MALLOC) 569 g_free(wp->waiting->bio_data); 570 g_destroy_bio(wp->waiting); 571 } 572 if (wp->parity != NULL) { 573 if (wp->parity->bio_cflags & 574 GV_BIO_MALLOC) 575 g_free(wp->parity->bio_data); 576 g_destroy_bio(wp->parity); 577 } 578 g_free(wp); | 666/* 667 * Create a new bio struct for the next grow request. 668 */ 669int 670gv_grow_request(struct gv_plex *p, off_t offset, off_t length, int type, 671 caddr_t data) 672{ 673 struct gv_softc *sc; 674 struct bio *bp; |
579 | 675 |
580 TAILQ_FOREACH_SAFE(wp, &p->packets, list, wp2) { 581 if (wp->bio == bp) { 582 TAILQ_REMOVE(&p->packets, wp, 583 list); 584 TAILQ_FOREACH_SAFE(bq, 585 &wp->bits, queue, bq2) { 586 TAILQ_REMOVE(&wp->bits, 587 bq, queue); 588 g_free(bq); 589 } 590 g_free(wp); 591 } 592 } | 676 KASSERT(p != NULL, ("gv_grow_request: NULL p")); 677 sc = p->vinumconf; 678 KASSERT(sc != NULL, ("gv_grow_request: NULL sc")); |
593 | 679 |
594 cbp = bp->bio_driver1; 595 while (cbp != NULL) { 596 pbp = cbp->bio_caller1; 597 if (cbp->bio_cflags & GV_BIO_MALLOC) 598 g_free(cbp->bio_data); 599 g_destroy_bio(cbp); 600 cbp = pbp; 601 } | 680 bp = g_new_bio(); 681 if (bp == NULL) { 682 G_VINUM_DEBUG(0, "grow of %s failed creating bio: " 683 "out of memory", p->name); 684 return (ENOMEM); 685 } |
602 | 686 |
603 g_io_deliver(bp, err); 604 return; 605 } 606 607 if (TAILQ_EMPTY(&wp->bits)) 608 g_free(wp); 609 else if (wp->lockbase != -1) 610 TAILQ_INSERT_TAIL(&p->packets, wp, list); | 687 bp->bio_cmd = type; 688 bp->bio_done = gv_done; 689 bp->bio_error = 0; 690 bp->bio_caller1 = p; 691 bp->bio_offset = offset; 692 bp->bio_length = length; 693 bp->bio_pflags |= GV_BIO_SYNCREQ; /* XXX: misuse of pflags AND syncreq.*/ 694 if (data == NULL) 695 data = g_malloc(length, M_WAITOK); 696 bp->bio_cflags |= GV_BIO_MALLOC; 697 bp->bio_data = data; |
611 | 698 |
612 /* 613 * Requests to concatenated and striped plexes go straight 614 * through. 615 */ 616 } else { 617 err = gv_plexbuffer(p, bp, addr, boff, bcount); | 699 mtx_lock(&sc->queue_mtx); 700 bioq_disksort(sc->bqueue, bp); 701 mtx_unlock(&sc->queue_mtx); 702 //gv_plex_start(p, bp); 703 return (0); 704} |
618 | 705 |
619 /* Building the sub-request failed. */ 620 if (err) { 621 G_VINUM_LOGREQ(0, bp, "plex request failed."); 622 cbp = bp->bio_driver1; 623 while (cbp != NULL) { 624 pbp = cbp->bio_caller1; 625 g_destroy_bio(cbp); 626 cbp = pbp; 627 } 628 g_io_deliver(bp, err); 629 return; | 706/* 707 * Finish handling of a bio to a growing plex. 708 */ 709void 710gv_grow_complete(struct gv_plex *p, struct bio *bp) 711{ 712 struct gv_softc *sc; 713 struct gv_sd *s; 714 struct gv_volume *v; 715 off_t origsize, offset; 716 int sdcount, err; 717 718 v = p->vol_sc; 719 KASSERT(v != NULL, ("gv_grow_complete: NULL v")); 720 sc = v->vinumconf; 721 KASSERT(sc != NULL, ("gv_grow_complete: NULL sc")); 722 err = 0; 723 724 /* If it was a read, write it. */ 725 if (bp->bio_cmd == BIO_READ) { 726 p->synced += bp->bio_length; 727 err = gv_grow_request(p, bp->bio_offset, bp->bio_length, 728 BIO_WRITE, bp->bio_data); 729 /* If it was a write, read next. */ 730 } else if (bp->bio_cmd == BIO_WRITE) { 731 if (bp->bio_cflags & GV_BIO_MALLOC) 732 g_free(bp->bio_data); 733 734 /* Find the real size of the plex. */ 735 sdcount = gv_sdcount(p, 1); 736 s = LIST_FIRST(&p->subdisks); 737 KASSERT(s != NULL, ("NULL s")); 738 origsize = (s->size * (sdcount - 1)); 739 if (bp->bio_offset + bp->bio_length >= origsize) { 740 G_VINUM_DEBUG(1, "growing of %s completed", p->name); 741 p->flags &= ~GV_PLEX_GROWING; 742 LIST_FOREACH(s, &p->subdisks, in_plex) { 743 s->flags &= ~GV_SD_GROW; 744 gv_set_sd_state(s, GV_SD_UP, 0); |
630 } | 745 } |
746 p->size = gv_plex_size(p); 747 gv_update_vol_size(v, gv_vol_size(v)); 748 gv_set_plex_state(p, GV_PLEX_UP, 0); 749 g_topology_lock(); 750 gv_access(v->provider, -1, -1, 0); 751 g_topology_unlock(); 752 p->synced = 0; 753 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 754 /* Issue delayed requests. */ 755 gv_plex_flush(p); 756 } else { 757 offset = bp->bio_offset + bp->bio_length; 758 err = gv_grow_request(p, offset, 759 MIN(bp->bio_length, origsize - offset), 760 BIO_READ, NULL); |
|
631 } | 761 } |
632 633 /* Abuse bio_caller1 as linked list. */ 634 pbp = bp->bio_driver1; 635 while (pbp->bio_caller1 != NULL) 636 pbp = pbp->bio_caller1; 637 bcount -= pbp->bio_length; 638 addr += pbp->bio_length; 639 boff += pbp->bio_length; | |
640 } | 762 } |
763 g_destroy_bio(bp); |
|
641 | 764 |
642 /* Fire off all sub-requests. */ 643 pbp = bp->bio_driver1; 644 while (pbp != NULL) { 645 /* 646 * RAID5 sub-requests need to come in correct order, otherwise 647 * we trip over the parity, as it might be overwritten by 648 * another sub-request. 649 */ 650 if (pbp->bio_driver1 != NULL && 651 gv_stripe_active(p, pbp)) { 652 /* Park the bio on the waiting queue. */ 653 pbp->bio_cflags |= GV_BIO_ONHOLD; 654 mtx_lock(&p->bqueue_mtx); 655 bioq_disksort(p->wqueue, pbp); 656 mtx_unlock(&p->bqueue_mtx); 657 } else 658 g_io_request(pbp, pbp->bio_caller2); 659 pbp = pbp->bio_caller1; | 765 if (err) { 766 p->flags &= ~GV_PLEX_GROWING; 767 G_VINUM_DEBUG(0, "error growing plex: error code %d", err); |
660 } 661} 662 | 768 } 769} 770 |
663static int 664gv_plex_access(struct g_provider *pp, int dr, int dw, int de) | 771 772/* 773 * Create an initialization BIO and send it off to the consumer. Assume that 774 * we're given initialization data as parameter. 775 */ 776void 777gv_init_request(struct gv_sd *s, off_t start, caddr_t data, off_t length) |
665{ | 778{ |
666 struct gv_plex *p; 667 struct g_geom *gp; 668 struct g_consumer *cp, *cp2; 669 int error; | 779 struct gv_drive *d; 780 struct g_consumer *cp; 781 struct bio *bp, *cbp; |
670 | 782 |
671 gp = pp->geom; 672 p = gp->softc; 673 KASSERT(p != NULL, ("NULL p")); | 783 KASSERT(s != NULL, ("gv_init_request: NULL s")); 784 d = s->drive_sc; 785 KASSERT(d != NULL, ("gv_init_request: NULL d")); 786 cp = d->consumer; 787 KASSERT(cp != NULL, ("gv_init_request: NULL cp")); |
674 | 788 |
675 if (p->org == GV_PLEX_RAID5) { 676 if (dw > 0 && dr == 0) 677 dr = 1; 678 else if (dw < 0 && dr == 0) 679 dr = -1; | 789 bp = g_new_bio(); 790 if (bp == NULL) { 791 G_VINUM_DEBUG(0, "subdisk '%s' init: write failed at offset %jd" 792 " (drive offset %jd); out of memory", s->name, 793 (intmax_t)s->initialized, (intmax_t)start); 794 return; /* XXX: Error codes. */ |
680 } | 795 } |
796 bp->bio_cmd = BIO_WRITE; 797 bp->bio_data = data; 798 bp->bio_done = gv_done; 799 bp->bio_error = 0; 800 bp->bio_length = length; 801 bp->bio_cflags |= GV_BIO_INIT; 802 bp->bio_offset = start; 803 bp->bio_caller1 = s; |
|
681 | 804 |
682 LIST_FOREACH(cp, &gp->consumer, consumer) { 683 error = g_access(cp, dr, dw, de); 684 if (error) { 685 LIST_FOREACH(cp2, &gp->consumer, consumer) { 686 if (cp == cp2) 687 break; 688 g_access(cp2, -dr, -dw, -de); 689 } 690 return (error); 691 } | 805 /* Then ofcourse, we have to clone it. */ 806 cbp = g_clone_bio(bp); 807 if (cbp == NULL) { 808 G_VINUM_DEBUG(0, "subdisk '%s' init: write failed at offset %jd" 809 " (drive offset %jd); out of memory", s->name, 810 (intmax_t)s->initialized, (intmax_t)start); 811 return; /* XXX: Error codes. */ |
692 } | 812 } |
693 return (0); | 813 cbp->bio_done = gv_done; 814 cbp->bio_caller1 = s; 815 /* Send it off to the consumer. */ 816 g_io_request(cbp, cp); |
694} 695 | 817} 818 |
696static struct g_geom * 697gv_plex_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) | 819/* 820 * Handle a finished initialization BIO. 821 */ 822void 823gv_init_complete(struct gv_plex *p, struct bio *bp) |
698{ | 824{ |
699 struct g_geom *gp; 700 struct g_consumer *cp, *cp2; 701 struct g_provider *pp2; 702 struct gv_plex *p; 703 struct gv_sd *s; | |
704 struct gv_softc *sc; | 825 struct gv_softc *sc; |
826 struct gv_drive *d; 827 struct g_consumer *cp; 828 struct gv_sd *s; 829 off_t start, length; 830 caddr_t data; |
|
705 int error; 706 | 831 int error; 832 |
707 g_trace(G_T_TOPOLOGY, "gv_plex_taste(%s, %s)", mp->name, pp->name); 708 g_topology_assert(); | 833 s = bp->bio_caller1; 834 start = bp->bio_offset; 835 length = bp->bio_length; 836 error = bp->bio_error; 837 data = bp->bio_data; |
709 | 838 |
710 /* We only want to attach to subdisks. */ 711 if (strcmp(pp->geom->class->name, "VINUMDRIVE")) 712 return (NULL); | 839 KASSERT(s != NULL, ("gv_init_complete: NULL s")); 840 d = s->drive_sc; 841 KASSERT(d != NULL, ("gv_init_complete: NULL d")); 842 cp = d->consumer; 843 KASSERT(cp != NULL, ("gv_init_complete: NULL cp")); 844 sc = p->vinumconf; 845 KASSERT(sc != NULL, ("gv_init_complete: NULL sc")); |
713 | 846 |
714 /* Find the VINUM class and its associated geom. */ 715 gp = find_vinum_geom(); 716 if (gp == NULL) 717 return (NULL); 718 sc = gp->softc; 719 KASSERT(sc != NULL, ("gv_plex_taste: NULL sc")); | 847 g_destroy_bio(bp); |
720 | 848 |
721 /* Find out which subdisk the offered provider corresponds to. */ 722 s = pp->private; 723 KASSERT(s != NULL, ("gv_plex_taste: NULL s")); | 849 /* 850 * First we need to find out if it was okay, and abort if it's not. 851 * Then we need to free previous buffers, find out the correct subdisk, 852 * as well as getting the correct starting point and length of the BIO. 853 */ 854 if (start >= s->drive_offset + s->size) { 855 /* Free the data we initialized. */ 856 if (data != NULL) 857 g_free(data); 858 g_topology_assert_not(); 859 g_topology_lock(); 860 g_access(cp, 0, -1, 0); 861 g_topology_unlock(); 862 if (error) { 863 gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE | 864 GV_SETSTATE_CONFIG); 865 } else { 866 gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_CONFIG); 867 s->initialized = 0; 868 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 869 G_VINUM_DEBUG(1, "subdisk '%s' init: finished " 870 "successfully", s->name); 871 } 872 return; 873 } 874 s->initialized += length; 875 start += length; 876 gv_init_request(s, start, data, length); 877} |
724 | 878 |
725 /* Now find the correct plex where this subdisk belongs to. */ 726 p = gv_find_plex(sc, s->plex); 727 if (p == NULL) { 728 G_VINUM_DEBUG(0, "%s: NULL p for '%s'", __func__, s->name); 729 return (NULL); | 879/* 880 * Create a new bio struct for the next parity rebuild. Used both by internal 881 * rebuild of degraded plexes as well as user initiated rebuilds/checks. 882 */ 883void 884gv_parity_request(struct gv_plex *p, int flags, off_t offset) 885{ 886 struct gv_softc *sc; 887 struct bio *bp; 888 889 KASSERT(p != NULL, ("gv_parity_request: NULL p")); 890 sc = p->vinumconf; 891 KASSERT(sc != NULL, ("gv_parity_request: NULL sc")); 892 893 bp = g_new_bio(); 894 if (bp == NULL) { 895 G_VINUM_DEBUG(0, "rebuild of %s failed creating bio: " 896 "out of memory", p->name); 897 return; |
730 } 731 | 898 } 899 |
900 bp->bio_cmd = BIO_WRITE; 901 bp->bio_done = gv_done; 902 bp->bio_error = 0; 903 bp->bio_length = p->stripesize; 904 bp->bio_caller1 = p; 905 |
|
732 /* | 906 /* |
733 * Add this subdisk to this plex. Since we trust the on-disk 734 * configuration, we don't check the given value (should we?). 735 * XXX: shouldn't be done here | 907 * Check if it's a rebuild of a degraded plex or a user request of 908 * parity rebuild. |
736 */ | 909 */ |
737 gv_sd_to_plex(p, s, 0); | 910 if (flags & GV_BIO_REBUILD) 911 bp->bio_data = g_malloc(GV_DFLT_SYNCSIZE, M_WAITOK); 912 else if (flags & GV_BIO_CHECK) 913 bp->bio_data = g_malloc(p->stripesize, M_WAITOK | M_ZERO); 914 else { 915 G_VINUM_DEBUG(0, "invalid flags given in rebuild"); 916 return; 917 } |
738 | 918 |
739 /* Now check if there's already a geom for this plex. */ 740 gp = p->geom; | 919 bp->bio_cflags = flags; 920 bp->bio_cflags |= GV_BIO_MALLOC; |
741 | 921 |
742 /* Yes, there is already a geom, so we just add the consumer. */ 743 if (gp != NULL) { 744 cp2 = LIST_FIRST(&gp->consumer); 745 /* Need to attach a new consumer to this subdisk. */ 746 cp = g_new_consumer(gp); 747 error = g_attach(cp, pp); 748 if (error) { 749 G_VINUM_DEBUG(0, "unable to attach consumer to %s", 750 pp->name); 751 g_destroy_consumer(cp); 752 return (NULL); 753 } 754 /* Adjust the access counts of the new consumer. */ 755 if ((cp2 != NULL) && (cp2->acr || cp2->acw || cp2->ace)) { 756 error = g_access(cp, cp2->acr, cp2->acw, cp2->ace); 757 if (error) { 758 G_VINUM_DEBUG(0, "unable to set access counts" 759 " for consumer on %s", pp->name); 760 g_detach(cp); 761 g_destroy_consumer(cp); 762 return (NULL); 763 } 764 } 765 s->consumer = cp; | 922 /* We still have more parity to build. */ 923 bp->bio_offset = offset; 924 mtx_lock(&sc->queue_mtx); 925 bioq_disksort(sc->bqueue, bp); 926 mtx_unlock(&sc->queue_mtx); 927 //gv_plex_start(p, bp); /* Send it down to the plex. */ 928} |
766 | 929 |
767 /* Adjust the size of the providers this plex has. */ 768 LIST_FOREACH(pp2, &gp->provider, provider) 769 pp2->mediasize = p->size; | 930/* 931 * Handle a finished parity write. 932 */ 933void 934gv_parity_complete(struct gv_plex *p, struct bio *bp) 935{ 936 struct gv_softc *sc; 937 int error, flags; |
770 | 938 |
771 /* Update the size of the volume this plex is attached to. */ 772 if (p->vol_sc != NULL) 773 gv_update_vol_size(p->vol_sc, p->size); | 939 error = bp->bio_error; 940 flags = bp->bio_cflags; 941 flags &= ~GV_BIO_MALLOC; |
774 | 942 |
775 /* 776 * If necessary, create bio queues, queue mutex and a worker 777 * thread. 778 */ 779 if (p->bqueue == NULL) { 780 p->bqueue = g_malloc(sizeof(struct bio_queue_head), 781 M_WAITOK | M_ZERO); 782 bioq_init(p->bqueue); 783 } 784 if (p->wqueue == NULL) { 785 p->wqueue = g_malloc(sizeof(struct bio_queue_head), 786 M_WAITOK | M_ZERO); 787 bioq_init(p->wqueue); 788 } 789 if (mtx_initialized(&p->bqueue_mtx) == 0) 790 mtx_init(&p->bqueue_mtx, "gv_plex", NULL, MTX_DEF); 791 if (!(p->flags & GV_PLEX_THREAD_ACTIVE)) { 792 kproc_create(gv_plex_worker, p, NULL, 0, 0, "gv_p %s", 793 p->name); 794 p->flags |= GV_PLEX_THREAD_ACTIVE; 795 } | 943 sc = p->vinumconf; 944 KASSERT(sc != NULL, ("gv_parity_complete: NULL sc")); |
796 | 945 |
797 return (NULL); | 946 /* Clean up what we allocated. */ 947 if (bp->bio_cflags & GV_BIO_MALLOC) 948 g_free(bp->bio_data); 949 g_destroy_bio(bp); |
798 | 950 |
799 /* We need to create a new geom. */ | 951 if (error == EAGAIN) { 952 G_VINUM_DEBUG(0, "parity incorrect at offset 0x%jx", 953 (intmax_t)p->synced); 954 } 955 956 /* Any error is fatal, except EAGAIN when we're rebuilding. */ 957 if (error && !(error == EAGAIN && (flags & GV_BIO_PARITY))) { 958 /* Make sure we don't have the lock. */ 959 g_topology_assert_not(); 960 g_topology_lock(); 961 gv_access(p->vol_sc->provider, -1, -1, 0); 962 g_topology_unlock(); 963 G_VINUM_DEBUG(0, "parity check on %s failed at 0x%jx " 964 "errno %d", p->name, (intmax_t)p->synced, error); 965 return; |
800 } else { | 966 } else { |
801 gp = g_new_geomf(mp, "%s", p->name); 802 gp->start = gv_plex_start; 803 gp->orphan = gv_plex_orphan; 804 gp->access = gv_plex_access; 805 gp->softc = p; 806 p->geom = gp; | 967 p->synced += p->stripesize; 968 } |
807 | 969 |
808 TAILQ_INIT(&p->packets); 809 p->bqueue = g_malloc(sizeof(struct bio_queue_head), 810 M_WAITOK | M_ZERO); 811 bioq_init(p->bqueue); 812 p->wqueue = g_malloc(sizeof(struct bio_queue_head), 813 M_WAITOK | M_ZERO); 814 bioq_init(p->wqueue); 815 mtx_init(&p->bqueue_mtx, "gv_plex", NULL, MTX_DEF); 816 kproc_create(gv_plex_worker, p, NULL, 0, 0, "gv_p %s", 817 p->name); 818 p->flags |= GV_PLEX_THREAD_ACTIVE; 819 820 /* Attach a consumer to this provider. */ 821 cp = g_new_consumer(gp); 822 g_attach(cp, pp); 823 s->consumer = cp; 824 825 /* Create a provider for the outside world. */ 826 pp2 = g_new_providerf(gp, "gvinum/plex/%s", p->name); 827 pp2->mediasize = p->size; 828 pp2->sectorsize = pp->sectorsize; 829 p->provider = pp2; 830 g_error_provider(pp2, 0); 831 return (gp); | 970 if (p->synced >= p->size) { 971 /* Make sure we don't have the lock. */ 972 g_topology_assert_not(); 973 g_topology_lock(); 974 gv_access(p->vol_sc->provider, -1, -1, 0); 975 g_topology_unlock(); 976 /* We're finished. */ 977 G_VINUM_DEBUG(1, "parity operation on %s finished", p->name); 978 p->synced = 0; 979 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 980 return; |
832 } | 981 } |
982 983 /* Send down next. It will determine if we need to itself. */ 984 gv_parity_request(p, flags, p->synced); |
|
833} 834 | 985} 986 |
835static int 836gv_plex_destroy_geom(struct gctl_req *req, struct g_class *mp, 837 struct g_geom *gp) | 987/* 988 * Handle a finished plex rebuild bio. 989 */ 990void 991gv_rebuild_complete(struct gv_plex *p, struct bio *bp) |
838{ | 992{ |
839 struct gv_plex *p; | 993 struct gv_softc *sc; 994 struct gv_sd *s; 995 int error, flags; 996 off_t offset; |
840 | 997 |
841 g_trace(G_T_TOPOLOGY, "gv_plex_destroy_geom: %s", gp->name); 842 g_topology_assert(); | 998 error = bp->bio_error; 999 flags = bp->bio_cflags; 1000 offset = bp->bio_offset; 1001 flags &= ~GV_BIO_MALLOC; 1002 sc = p->vinumconf; 1003 KASSERT(sc != NULL, ("gv_rebuild_complete: NULL sc")); |
843 | 1004 |
844 p = gp->softc; | 1005 /* Clean up what we allocated. */ 1006 if (bp->bio_cflags & GV_BIO_MALLOC) 1007 g_free(bp->bio_data); 1008 g_destroy_bio(bp); |
845 | 1009 |
846 KASSERT(p != NULL, ("gv_plex_destroy_geom: null p of '%s'", gp->name)); | 1010 if (error) { 1011 g_topology_assert_not(); 1012 g_topology_lock(); 1013 gv_access(p->vol_sc->provider, -1, -1, 0); 1014 g_topology_unlock(); 1015 1016 G_VINUM_DEBUG(0, "rebuild of %s failed at offset %jd errno: %d", 1017 p->name, (intmax_t)offset, error); 1018 p->flags &= ~GV_PLEX_REBUILDING; 1019 p->synced = 0; 1020 gv_plex_flush(p); /* Flush out remaining rebuild BIOs. */ 1021 return; 1022 } |
847 | 1023 |
848 /* 849 * If this is a RAID5 plex, check if its worker thread is still active 850 * and signal it to self destruct. 851 */ 852 gv_kill_plex_thread(p); 853 /* g_free(sc); */ 854 g_wither_geom(gp, ENXIO); 855 return (0); 856} | 1024 offset += (p->stripesize * (gv_sdcount(p, 1) - 1)); 1025 if (offset >= p->size) { 1026 /* We're finished. */ 1027 g_topology_assert_not(); 1028 g_topology_lock(); 1029 gv_access(p->vol_sc->provider, -1, -1, 0); 1030 g_topology_unlock(); 1031 1032 G_VINUM_DEBUG(1, "rebuild of %s finished", p->name); 1033 gv_save_config(p->vinumconf); 1034 p->flags &= ~GV_PLEX_REBUILDING; 1035 p->synced = 0; 1036 /* Try to up all subdisks. */ 1037 LIST_FOREACH(s, &p->subdisks, in_plex) 1038 gv_update_sd_state(s); 1039 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 1040 gv_plex_flush(p); /* Flush out remaining rebuild BIOs. */ 1041 return; 1042 } |
857 | 1043 |
858#define VINUMPLEX_CLASS_NAME "VINUMPLEX" 859 860static struct g_class g_vinum_plex_class = { 861 .name = VINUMPLEX_CLASS_NAME, 862 .version = G_VERSION, 863 .taste = gv_plex_taste, 864 .destroy_geom = gv_plex_destroy_geom, 865}; 866 867DECLARE_GEOM_CLASS(g_vinum_plex_class, g_vinum_plex); | 1044 /* Send down next. It will determine if we need to itself. */ 1045 gv_parity_request(p, flags, offset); 1046} |