Deleted Added
full compact
geom_vinum_plex.c (132940) geom_vinum_plex.c (133318)
1/*-
2 * Copyright (c) 2004 Lukas Ertl
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 2004 Lukas Ertl
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/geom/vinum/geom_vinum_plex.c 132940 2004-07-31 21:34:21Z le $");
28__FBSDID("$FreeBSD: head/sys/geom/vinum/geom_vinum_plex.c 133318 2004-08-08 07:57:53Z phk $");
29
30#include <sys/param.h>
31#include <sys/bio.h>
32#include <sys/kernel.h>
33#include <sys/kthread.h>
34#include <sys/libkern.h>
35#include <sys/lock.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/mutex.h>
39#include <sys/systm.h>
40
41#include <geom/geom.h>
42#include <geom/vinum/geom_vinum_var.h>
43#include <geom/vinum/geom_vinum_raid5.h>
44#include <geom/vinum/geom_vinum.h>
45
46/* XXX: is this the place to catch dying subdisks? */
47static void
48gv_plex_orphan(struct g_consumer *cp)
49{
50 struct g_geom *gp;
51 struct gv_plex *p;
52 int error;
53
54 g_topology_assert();
55 gp = cp->geom;
56 g_trace(G_T_TOPOLOGY, "gv_plex_orphan(%s)", gp->name);
57
58 if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0)
59 g_access(cp, -cp->acr, -cp->acw, -cp->ace);
60 error = cp->provider->error;
61 if (error == 0)
62 error = ENXIO;
63 g_detach(cp);
64 g_destroy_consumer(cp);
65 if (!LIST_EMPTY(&gp->consumer))
66 return;
67
68 p = gp->softc;
69 if (p != NULL) {
70 gv_kill_thread(p);
71 p->geom = NULL;
72 p->provider = NULL;
73 p->consumer = NULL;
74 }
75 gp->softc = NULL;
76 g_wither_geom(gp, error);
77}
78
79static void
80gv_plex_done(struct bio *bp)
81{
82 struct g_geom *gp;
83 struct gv_sd *s;
84
85 gp = bp->bio_to->geom;
86
87 s = bp->bio_caller1;
88 KASSERT(s != NULL, ("gv_plex_done: NULL s"));
89
90 if (bp->bio_error == 0)
91 s->initialized += bp->bio_length;
92
93 if (s->initialized >= s->size) {
94 gv_set_sd_state(s, GV_SD_UP, 0);
95 s->initialized = 0;
96 }
97
98 g_std_done(bp);
99}
100
101/* Find the correct subdisk to send the bio to and build a bio to send. */
102static int
103gv_plexbuffer(struct bio *bp, struct bio **bp2, struct g_consumer **cp,
104 caddr_t addr, long bcount, off_t boff)
105{
106 struct g_geom *gp;
107 struct gv_plex *p;
108 struct gv_sd *s;
109 struct bio *cbp;
110 int i, sdno;
111 off_t len_left, real_len, real_off, stripeend, stripeno, stripestart;
112
113 s = NULL;
114
115 gp = bp->bio_to->geom;
116 p = gp->softc;
117
118 if (p == NULL || LIST_EMPTY(&p->subdisks))
119 return (ENXIO);
120
121 /*
122 * We only handle concatenated and striped plexes here. RAID5 plexes
123 * are handled in build_raid5_request().
124 */
125 switch (p->org) {
126 case GV_PLEX_CONCAT:
127 /*
128 * Find the subdisk where this request starts. The subdisks in
129 * this list must be ordered by plex_offset.
130 */
131 LIST_FOREACH(s, &p->subdisks, in_plex) {
132 if (s->plex_offset <= boff &&
133 s->plex_offset + s->size > boff)
134 break;
135 }
136 /* Subdisk not found. */
137 if (s == NULL)
138 return (ENXIO);
139
140 /* Calculate corresponding offsets on disk. */
141 real_off = boff - s->plex_offset;
142 len_left = s->size - real_off;
143 real_len = (bcount > len_left) ? len_left : bcount;
144 break;
145
146 case GV_PLEX_STRIPED:
147 /* The number of the stripe where the request starts. */
148 stripeno = boff / p->stripesize;
149
150 /* The number of the subdisk where the stripe resides. */
151 sdno = stripeno % p->sdcount;
152
153 /* Find the right subdisk. */
154 i = 0;
155 LIST_FOREACH(s, &p->subdisks, in_plex) {
156 if (i == sdno)
157 break;
158 i++;
159 }
160
161 /* Subdisk not found. */
162 if (s == NULL)
163 return (ENXIO);
164
165 /* The offset of the stripe from the start of the subdisk. */
166 stripestart = (stripeno / p->sdcount) *
167 p->stripesize;
168
169 /* The offset at the end of the stripe. */
170 stripeend = stripestart + p->stripesize;
171
172 /* The offset of the request on this subdisk. */
173 real_off = boff - (stripeno * p->stripesize) +
174 stripestart;
175
176 /* The length left in this stripe. */
177 len_left = stripeend - real_off;
178
179 real_len = (bcount <= len_left) ? bcount : len_left;
180 break;
181
182 default:
183 return (EINVAL);
184 }
185
186 /* Now check if we can handle the request on this subdisk. */
187 switch (s->state) {
188 case GV_SD_UP:
189 /* If the subdisk is up, just continue. */
190 break;
191
192 case GV_SD_STALE:
193 if (bp->bio_caller1 != p)
194 return (ENXIO);
195
196 printf("FOO: setting sd %s to GV_SD_INITIALIZING\n", s->name);
197 gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE);
198 break;
199
200 case GV_SD_INITIALIZING:
201 if (bp->bio_cmd == BIO_READ)
202 return (ENXIO);
203 break;
204
205 default:
206 /* All other subdisk states mean it's not accessible. */
207 return (ENXIO);
208 }
209
210 /* Clone the bio and adjust the offsets and sizes. */
211 cbp = g_clone_bio(bp);
212 if (cbp == NULL)
213 return (ENOMEM);
214 cbp->bio_offset = real_off;
215 cbp->bio_length = real_len;
216 cbp->bio_data = addr;
217 if (bp->bio_caller1 == p) {
218 cbp->bio_caller1 = s;
219 cbp->bio_done = gv_plex_done;
220 } else
221 cbp->bio_done = g_std_done;
222 *bp2 = cbp;
223 *cp = s->consumer;
224 return (0);
225}
226
227static void
228gv_plex_start(struct bio *bp)
229{
230 struct g_geom *gp;
231 struct g_consumer *cp;
232 struct gv_plex *p;
233 struct gv_raid5_packet *wp;
234 struct bio *bp2;
235 caddr_t addr;
236 off_t boff;
237 long bcount, rcount;
238 int err;
239
240 gp = bp->bio_to->geom;
241 p = gp->softc;
242
243 /*
244 * We cannot handle this request if too many of our subdisks are
245 * inaccessible.
246 */
247 if ((p->state < GV_PLEX_DEGRADED) && (bp->bio_caller1 != p)) {
248 g_io_deliver(bp, ENXIO); /* XXX: correct way? */
249 return;
250 }
251
252 switch(bp->bio_cmd) {
253 case BIO_READ:
254 case BIO_WRITE:
255 case BIO_DELETE:
256 /*
257 * We split up the request in smaller packets and hand them
258 * down to our subdisks.
259 */
260 wp = NULL;
261 addr = bp->bio_data;
262 boff = bp->bio_offset;
263 for (bcount = bp->bio_length; bcount > 0; bcount -= rcount) {
264 /*
265 * RAID5 requests usually need to be split up in
266 * several subrequests.
267 */
268 if (p->org == GV_PLEX_RAID5) {
269 wp = gv_new_raid5_packet();
270 wp->bio = bp;
271 err = gv_build_raid5_req(wp, bp, addr, bcount,
272 boff);
273 } else
274 err = gv_plexbuffer(bp, &bp2, &cp, addr, bcount,
275 boff);
276
277 if (err) {
278 if (p->org == GV_PLEX_RAID5)
279 gv_free_raid5_packet(wp);
280 bp->bio_completed += bcount;
281 if (bp->bio_error == 0)
282 bp->bio_error = err;
283 if (bp->bio_completed == bp->bio_length)
284 g_io_deliver(bp, bp->bio_error);
285 return;
286 }
287
288 if (p->org != GV_PLEX_RAID5) {
289 rcount = bp2->bio_length;
290 g_io_request(bp2, cp);
291
292 /*
293 * RAID5 subrequests are queued on a worklist
294 * and picked up from the worker thread. This
295 * ensures correct order.
296 */
297 } else {
298 mtx_lock(&p->worklist_mtx);
299 TAILQ_INSERT_TAIL(&p->worklist, wp,
300 list);
301 mtx_unlock(&p->worklist_mtx);
302 wakeup(&p);
303 rcount = wp->length;
304 }
305
306 boff += rcount;
307 addr += rcount;
308 }
309 return;
310
311 default:
312 g_io_deliver(bp, EOPNOTSUPP);
313 return;
314 }
315}
316
317static int
318gv_plex_access(struct g_provider *pp, int dr, int dw, int de)
319{
320 struct g_geom *gp;
321 struct g_consumer *cp, *cp2;
322 int error;
323
324 gp = pp->geom;
325
326 error = ENXIO;
327 LIST_FOREACH(cp, &gp->consumer, consumer) {
328 error = g_access(cp, dr, dw, de);
329 if (error) {
330 LIST_FOREACH(cp2, &gp->consumer, consumer) {
331 if (cp == cp2)
332 break;
333 g_access(cp2, -dr, -dw, -de);
334 }
335 return (error);
336 }
337 }
338 return (error);
339}
340
341static struct g_geom *
342gv_plex_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
343{
344 struct g_geom *gp;
345 struct g_consumer *cp, *cp2;
346 struct g_provider *pp2;
347 struct gv_plex *p;
348 struct gv_sd *s;
349 struct gv_softc *sc;
350 int error;
351
352 g_trace(G_T_TOPOLOGY, "gv_plex_taste(%s, %s)", mp->name, pp->name);
353 g_topology_assert();
354
355 /* We only want to attach to subdisks. */
356 if (strcmp(pp->geom->class->name, "VINUMDRIVE"))
357 return (NULL);
358
359 /* Find the VINUM class and its associated geom. */
360 gp = find_vinum_geom();
361 if (gp == NULL)
362 return (NULL);
363 sc = gp->softc;
364 KASSERT(sc != NULL, ("gv_plex_taste: NULL sc"));
365
366 /* Find out which subdisk the offered provider corresponds to. */
367 s = pp->private;
368 KASSERT(s != NULL, ("gv_plex_taste: NULL s"));
369
370 /* Now find the correct plex where this subdisk belongs to. */
371 p = gv_find_plex(sc, s->plex);
372 KASSERT(p != NULL, ("gv_plex_taste: NULL p"));
373
374 /*
375 * Add this subdisk to this plex. Since we trust the on-disk
376 * configuration, we don't check the given value (should we?).
377 * XXX: shouldn't be done here
378 */
379 gv_sd_to_plex(p, s, 0);
380
381 /* Now check if there's already a geom for this plex. */
382 gp = p->geom;
383
384 /* Yes, there is already a geom, so we just add the consumer. */
385 if (gp != NULL) {
386 cp2 = LIST_FIRST(&gp->consumer);
387 /* Need to attach a new consumer to this subdisk. */
388 cp = g_new_consumer(gp);
389 error = g_attach(cp, pp);
390 if (error) {
391 printf("geom_vinum: couldn't attach consumer to %s\n",
392 pp->name);
393 g_destroy_consumer(cp);
394 return (NULL);
395 }
396 /* Adjust the access counts of the new consumer. */
397 if ((cp2 != NULL) && (cp2->acr || cp2->acw || cp2->ace)) {
398 error = g_access(cp, cp2->acr, cp2->acw, cp2->ace);
399 if (error) {
400 printf("geom_vinum: couldn't set access counts"
401 " for consumer on %s\n", pp->name);
402 g_detach(cp);
403 g_destroy_consumer(cp);
404 return (NULL);
405 }
406 }
407 s->consumer = cp;
408
409 /* Adjust the size of the providers this plex has. */
410 LIST_FOREACH(pp2, &gp->provider, provider)
411 pp2->mediasize = p->size;
412
413 /* Update the size of the volume this plex is attached to. */
414 if (p->vol_sc != NULL)
415 gv_update_vol_size(p->vol_sc, p->size);
416
417 return (NULL);
418
419 /* We need to create a new geom. */
420 } else {
421 gp = g_new_geomf(mp, "%s", p->name);
422 gp->start = gv_plex_start;
423 gp->orphan = gv_plex_orphan;
424 gp->access = gv_plex_access;
425 gp->softc = p;
426 p->geom = gp;
427
428 /* RAID5 plexes need a 'worker' thread, where IO is handled. */
429 if (p->org == GV_PLEX_RAID5) {
430 TAILQ_INIT(&p->worklist);
431 mtx_init(&p->worklist_mtx, "gvinum_worklist", NULL,
432 MTX_DEF);
433 p->flags &= ~GV_PLEX_THREAD_DIE;
434 kthread_create(gv_raid5_worker, gp, NULL, 0, 0,
435 "gv_raid5");
436 p->flags |= GV_PLEX_THREAD_ACTIVE;
437 }
438
439 /* Attach a consumer to this provider. */
440 cp = g_new_consumer(gp);
441 g_attach(cp, pp);
442 s->consumer = cp;
443
444 /* Create a provider for the outside world. */
445 pp2 = g_new_providerf(gp, "gvinum/plex/%s", p->name);
446 pp2->mediasize = p->size;
447 pp2->sectorsize = pp->sectorsize;
448 p->provider = pp2;
449 g_error_provider(pp2, 0);
450 return (gp);
451 }
452}
453
454static int
455gv_plex_destroy_geom(struct gctl_req *req, struct g_class *mp,
456 struct g_geom *gp)
457{
458 struct gv_plex *p;
459
460 g_trace(G_T_TOPOLOGY, "gv_plex_destroy_geom: %s", gp->name);
461 g_topology_assert();
462
463 p = gp->softc;
464
465 KASSERT(p != NULL, ("gv_plex_destroy_geom: null p of '%s'", gp->name));
466
467 /*
468 * If this is a RAID5 plex, check if its worker thread is still active
469 * and signal it to self destruct.
470 */
471 gv_kill_thread(p);
472 mtx_destroy(&p->worklist_mtx);
473 /* g_free(sc); */
474 g_wither_geom(gp, ENXIO);
475 return (0);
476}
477
478#define VINUMPLEX_CLASS_NAME "VINUMPLEX"
479
480static struct g_class g_vinum_plex_class = {
481 .name = VINUMPLEX_CLASS_NAME,
29
30#include <sys/param.h>
31#include <sys/bio.h>
32#include <sys/kernel.h>
33#include <sys/kthread.h>
34#include <sys/libkern.h>
35#include <sys/lock.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/mutex.h>
39#include <sys/systm.h>
40
41#include <geom/geom.h>
42#include <geom/vinum/geom_vinum_var.h>
43#include <geom/vinum/geom_vinum_raid5.h>
44#include <geom/vinum/geom_vinum.h>
45
46/* XXX: is this the place to catch dying subdisks? */
47static void
48gv_plex_orphan(struct g_consumer *cp)
49{
50 struct g_geom *gp;
51 struct gv_plex *p;
52 int error;
53
54 g_topology_assert();
55 gp = cp->geom;
56 g_trace(G_T_TOPOLOGY, "gv_plex_orphan(%s)", gp->name);
57
58 if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0)
59 g_access(cp, -cp->acr, -cp->acw, -cp->ace);
60 error = cp->provider->error;
61 if (error == 0)
62 error = ENXIO;
63 g_detach(cp);
64 g_destroy_consumer(cp);
65 if (!LIST_EMPTY(&gp->consumer))
66 return;
67
68 p = gp->softc;
69 if (p != NULL) {
70 gv_kill_thread(p);
71 p->geom = NULL;
72 p->provider = NULL;
73 p->consumer = NULL;
74 }
75 gp->softc = NULL;
76 g_wither_geom(gp, error);
77}
78
79static void
80gv_plex_done(struct bio *bp)
81{
82 struct g_geom *gp;
83 struct gv_sd *s;
84
85 gp = bp->bio_to->geom;
86
87 s = bp->bio_caller1;
88 KASSERT(s != NULL, ("gv_plex_done: NULL s"));
89
90 if (bp->bio_error == 0)
91 s->initialized += bp->bio_length;
92
93 if (s->initialized >= s->size) {
94 gv_set_sd_state(s, GV_SD_UP, 0);
95 s->initialized = 0;
96 }
97
98 g_std_done(bp);
99}
100
101/* Find the correct subdisk to send the bio to and build a bio to send. */
102static int
103gv_plexbuffer(struct bio *bp, struct bio **bp2, struct g_consumer **cp,
104 caddr_t addr, long bcount, off_t boff)
105{
106 struct g_geom *gp;
107 struct gv_plex *p;
108 struct gv_sd *s;
109 struct bio *cbp;
110 int i, sdno;
111 off_t len_left, real_len, real_off, stripeend, stripeno, stripestart;
112
113 s = NULL;
114
115 gp = bp->bio_to->geom;
116 p = gp->softc;
117
118 if (p == NULL || LIST_EMPTY(&p->subdisks))
119 return (ENXIO);
120
121 /*
122 * We only handle concatenated and striped plexes here. RAID5 plexes
123 * are handled in build_raid5_request().
124 */
125 switch (p->org) {
126 case GV_PLEX_CONCAT:
127 /*
128 * Find the subdisk where this request starts. The subdisks in
129 * this list must be ordered by plex_offset.
130 */
131 LIST_FOREACH(s, &p->subdisks, in_plex) {
132 if (s->plex_offset <= boff &&
133 s->plex_offset + s->size > boff)
134 break;
135 }
136 /* Subdisk not found. */
137 if (s == NULL)
138 return (ENXIO);
139
140 /* Calculate corresponding offsets on disk. */
141 real_off = boff - s->plex_offset;
142 len_left = s->size - real_off;
143 real_len = (bcount > len_left) ? len_left : bcount;
144 break;
145
146 case GV_PLEX_STRIPED:
147 /* The number of the stripe where the request starts. */
148 stripeno = boff / p->stripesize;
149
150 /* The number of the subdisk where the stripe resides. */
151 sdno = stripeno % p->sdcount;
152
153 /* Find the right subdisk. */
154 i = 0;
155 LIST_FOREACH(s, &p->subdisks, in_plex) {
156 if (i == sdno)
157 break;
158 i++;
159 }
160
161 /* Subdisk not found. */
162 if (s == NULL)
163 return (ENXIO);
164
165 /* The offset of the stripe from the start of the subdisk. */
166 stripestart = (stripeno / p->sdcount) *
167 p->stripesize;
168
169 /* The offset at the end of the stripe. */
170 stripeend = stripestart + p->stripesize;
171
172 /* The offset of the request on this subdisk. */
173 real_off = boff - (stripeno * p->stripesize) +
174 stripestart;
175
176 /* The length left in this stripe. */
177 len_left = stripeend - real_off;
178
179 real_len = (bcount <= len_left) ? bcount : len_left;
180 break;
181
182 default:
183 return (EINVAL);
184 }
185
186 /* Now check if we can handle the request on this subdisk. */
187 switch (s->state) {
188 case GV_SD_UP:
189 /* If the subdisk is up, just continue. */
190 break;
191
192 case GV_SD_STALE:
193 if (bp->bio_caller1 != p)
194 return (ENXIO);
195
196 printf("FOO: setting sd %s to GV_SD_INITIALIZING\n", s->name);
197 gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE);
198 break;
199
200 case GV_SD_INITIALIZING:
201 if (bp->bio_cmd == BIO_READ)
202 return (ENXIO);
203 break;
204
205 default:
206 /* All other subdisk states mean it's not accessible. */
207 return (ENXIO);
208 }
209
210 /* Clone the bio and adjust the offsets and sizes. */
211 cbp = g_clone_bio(bp);
212 if (cbp == NULL)
213 return (ENOMEM);
214 cbp->bio_offset = real_off;
215 cbp->bio_length = real_len;
216 cbp->bio_data = addr;
217 if (bp->bio_caller1 == p) {
218 cbp->bio_caller1 = s;
219 cbp->bio_done = gv_plex_done;
220 } else
221 cbp->bio_done = g_std_done;
222 *bp2 = cbp;
223 *cp = s->consumer;
224 return (0);
225}
226
227static void
228gv_plex_start(struct bio *bp)
229{
230 struct g_geom *gp;
231 struct g_consumer *cp;
232 struct gv_plex *p;
233 struct gv_raid5_packet *wp;
234 struct bio *bp2;
235 caddr_t addr;
236 off_t boff;
237 long bcount, rcount;
238 int err;
239
240 gp = bp->bio_to->geom;
241 p = gp->softc;
242
243 /*
244 * We cannot handle this request if too many of our subdisks are
245 * inaccessible.
246 */
247 if ((p->state < GV_PLEX_DEGRADED) && (bp->bio_caller1 != p)) {
248 g_io_deliver(bp, ENXIO); /* XXX: correct way? */
249 return;
250 }
251
252 switch(bp->bio_cmd) {
253 case BIO_READ:
254 case BIO_WRITE:
255 case BIO_DELETE:
256 /*
257 * We split up the request in smaller packets and hand them
258 * down to our subdisks.
259 */
260 wp = NULL;
261 addr = bp->bio_data;
262 boff = bp->bio_offset;
263 for (bcount = bp->bio_length; bcount > 0; bcount -= rcount) {
264 /*
265 * RAID5 requests usually need to be split up in
266 * several subrequests.
267 */
268 if (p->org == GV_PLEX_RAID5) {
269 wp = gv_new_raid5_packet();
270 wp->bio = bp;
271 err = gv_build_raid5_req(wp, bp, addr, bcount,
272 boff);
273 } else
274 err = gv_plexbuffer(bp, &bp2, &cp, addr, bcount,
275 boff);
276
277 if (err) {
278 if (p->org == GV_PLEX_RAID5)
279 gv_free_raid5_packet(wp);
280 bp->bio_completed += bcount;
281 if (bp->bio_error == 0)
282 bp->bio_error = err;
283 if (bp->bio_completed == bp->bio_length)
284 g_io_deliver(bp, bp->bio_error);
285 return;
286 }
287
288 if (p->org != GV_PLEX_RAID5) {
289 rcount = bp2->bio_length;
290 g_io_request(bp2, cp);
291
292 /*
293 * RAID5 subrequests are queued on a worklist
294 * and picked up from the worker thread. This
295 * ensures correct order.
296 */
297 } else {
298 mtx_lock(&p->worklist_mtx);
299 TAILQ_INSERT_TAIL(&p->worklist, wp,
300 list);
301 mtx_unlock(&p->worklist_mtx);
302 wakeup(&p);
303 rcount = wp->length;
304 }
305
306 boff += rcount;
307 addr += rcount;
308 }
309 return;
310
311 default:
312 g_io_deliver(bp, EOPNOTSUPP);
313 return;
314 }
315}
316
317static int
318gv_plex_access(struct g_provider *pp, int dr, int dw, int de)
319{
320 struct g_geom *gp;
321 struct g_consumer *cp, *cp2;
322 int error;
323
324 gp = pp->geom;
325
326 error = ENXIO;
327 LIST_FOREACH(cp, &gp->consumer, consumer) {
328 error = g_access(cp, dr, dw, de);
329 if (error) {
330 LIST_FOREACH(cp2, &gp->consumer, consumer) {
331 if (cp == cp2)
332 break;
333 g_access(cp2, -dr, -dw, -de);
334 }
335 return (error);
336 }
337 }
338 return (error);
339}
340
341static struct g_geom *
342gv_plex_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
343{
344 struct g_geom *gp;
345 struct g_consumer *cp, *cp2;
346 struct g_provider *pp2;
347 struct gv_plex *p;
348 struct gv_sd *s;
349 struct gv_softc *sc;
350 int error;
351
352 g_trace(G_T_TOPOLOGY, "gv_plex_taste(%s, %s)", mp->name, pp->name);
353 g_topology_assert();
354
355 /* We only want to attach to subdisks. */
356 if (strcmp(pp->geom->class->name, "VINUMDRIVE"))
357 return (NULL);
358
359 /* Find the VINUM class and its associated geom. */
360 gp = find_vinum_geom();
361 if (gp == NULL)
362 return (NULL);
363 sc = gp->softc;
364 KASSERT(sc != NULL, ("gv_plex_taste: NULL sc"));
365
366 /* Find out which subdisk the offered provider corresponds to. */
367 s = pp->private;
368 KASSERT(s != NULL, ("gv_plex_taste: NULL s"));
369
370 /* Now find the correct plex where this subdisk belongs to. */
371 p = gv_find_plex(sc, s->plex);
372 KASSERT(p != NULL, ("gv_plex_taste: NULL p"));
373
374 /*
375 * Add this subdisk to this plex. Since we trust the on-disk
376 * configuration, we don't check the given value (should we?).
377 * XXX: shouldn't be done here
378 */
379 gv_sd_to_plex(p, s, 0);
380
381 /* Now check if there's already a geom for this plex. */
382 gp = p->geom;
383
384 /* Yes, there is already a geom, so we just add the consumer. */
385 if (gp != NULL) {
386 cp2 = LIST_FIRST(&gp->consumer);
387 /* Need to attach a new consumer to this subdisk. */
388 cp = g_new_consumer(gp);
389 error = g_attach(cp, pp);
390 if (error) {
391 printf("geom_vinum: couldn't attach consumer to %s\n",
392 pp->name);
393 g_destroy_consumer(cp);
394 return (NULL);
395 }
396 /* Adjust the access counts of the new consumer. */
397 if ((cp2 != NULL) && (cp2->acr || cp2->acw || cp2->ace)) {
398 error = g_access(cp, cp2->acr, cp2->acw, cp2->ace);
399 if (error) {
400 printf("geom_vinum: couldn't set access counts"
401 " for consumer on %s\n", pp->name);
402 g_detach(cp);
403 g_destroy_consumer(cp);
404 return (NULL);
405 }
406 }
407 s->consumer = cp;
408
409 /* Adjust the size of the providers this plex has. */
410 LIST_FOREACH(pp2, &gp->provider, provider)
411 pp2->mediasize = p->size;
412
413 /* Update the size of the volume this plex is attached to. */
414 if (p->vol_sc != NULL)
415 gv_update_vol_size(p->vol_sc, p->size);
416
417 return (NULL);
418
419 /* We need to create a new geom. */
420 } else {
421 gp = g_new_geomf(mp, "%s", p->name);
422 gp->start = gv_plex_start;
423 gp->orphan = gv_plex_orphan;
424 gp->access = gv_plex_access;
425 gp->softc = p;
426 p->geom = gp;
427
428 /* RAID5 plexes need a 'worker' thread, where IO is handled. */
429 if (p->org == GV_PLEX_RAID5) {
430 TAILQ_INIT(&p->worklist);
431 mtx_init(&p->worklist_mtx, "gvinum_worklist", NULL,
432 MTX_DEF);
433 p->flags &= ~GV_PLEX_THREAD_DIE;
434 kthread_create(gv_raid5_worker, gp, NULL, 0, 0,
435 "gv_raid5");
436 p->flags |= GV_PLEX_THREAD_ACTIVE;
437 }
438
439 /* Attach a consumer to this provider. */
440 cp = g_new_consumer(gp);
441 g_attach(cp, pp);
442 s->consumer = cp;
443
444 /* Create a provider for the outside world. */
445 pp2 = g_new_providerf(gp, "gvinum/plex/%s", p->name);
446 pp2->mediasize = p->size;
447 pp2->sectorsize = pp->sectorsize;
448 p->provider = pp2;
449 g_error_provider(pp2, 0);
450 return (gp);
451 }
452}
453
454static int
455gv_plex_destroy_geom(struct gctl_req *req, struct g_class *mp,
456 struct g_geom *gp)
457{
458 struct gv_plex *p;
459
460 g_trace(G_T_TOPOLOGY, "gv_plex_destroy_geom: %s", gp->name);
461 g_topology_assert();
462
463 p = gp->softc;
464
465 KASSERT(p != NULL, ("gv_plex_destroy_geom: null p of '%s'", gp->name));
466
467 /*
468 * If this is a RAID5 plex, check if its worker thread is still active
469 * and signal it to self destruct.
470 */
471 gv_kill_thread(p);
472 mtx_destroy(&p->worklist_mtx);
473 /* g_free(sc); */
474 g_wither_geom(gp, ENXIO);
475 return (0);
476}
477
478#define VINUMPLEX_CLASS_NAME "VINUMPLEX"
479
480static struct g_class g_vinum_plex_class = {
481 .name = VINUMPLEX_CLASS_NAME,
482 .version = G_VERSION,
482 .taste = gv_plex_taste,
483 .destroy_geom = gv_plex_destroy_geom,
484};
485
486DECLARE_GEOM_CLASS(g_vinum_plex_class, g_vinum_plex);
483 .taste = gv_plex_taste,
484 .destroy_geom = gv_plex_destroy_geom,
485};
486
487DECLARE_GEOM_CLASS(g_vinum_plex_class, g_vinum_plex);