Deleted Added
sdiff udiff text old ( 139671 ) new ( 139940 )
full compact
1/*-
2 * Copyright (c) 2004 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/geom/raid3/g_raid3.c 139940 2005-01-09 14:43:39Z pjd $");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/kernel.h>
33#include <sys/module.h>
34#include <sys/limits.h>
35#include <sys/lock.h>
36#include <sys/mutex.h>
37#include <sys/bio.h>
38#include <sys/sysctl.h>
39#include <sys/malloc.h>
40#include <sys/eventhandler.h>
41#include <vm/uma.h>
42#include <geom/geom.h>
43#include <sys/proc.h>
44#include <sys/kthread.h>
45#include <sys/sched.h>
46#include <geom/raid3/g_raid3.h>
47
48
49static MALLOC_DEFINE(M_RAID3, "raid3 data", "GEOM_RAID3 Data");
50
51SYSCTL_DECL(_kern_geom);
52SYSCTL_NODE(_kern_geom, OID_AUTO, raid3, CTLFLAG_RW, 0, "GEOM_RAID3 stuff");
53u_int g_raid3_debug = 0;
54TUNABLE_INT("kern.geom.raid3.debug", &g_raid3_debug);
55SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, debug, CTLFLAG_RW, &g_raid3_debug, 0,
56 "Debug level");
57static u_int g_raid3_timeout = 4;
58TUNABLE_INT("kern.geom.raid3.timeout", &g_raid3_timeout);
59SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, timeout, CTLFLAG_RW, &g_raid3_timeout,
60 0, "Time to wait on all raid3 components");
61static u_int g_raid3_idletime = 5;
62TUNABLE_INT("kern.geom.raid3.idletime", &g_raid3_idletime);
63SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, idletime, CTLFLAG_RW,
64 &g_raid3_idletime, 0, "Mark components as clean when idling");
65static u_int g_raid3_reqs_per_sync = 5;
66SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, reqs_per_sync, CTLFLAG_RW,
67 &g_raid3_reqs_per_sync, 0,
68 "Number of regular I/O requests per synchronization request");
69static u_int g_raid3_syncs_per_sec = 1000;
70SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, syncs_per_sec, CTLFLAG_RW,
71 &g_raid3_syncs_per_sec, 0,
72 "Number of synchronizations requests per second");
73
74static u_int g_raid3_n64k = 50;
75TUNABLE_INT("kern.geom.raid3.n64k", &g_raid3_n64k);
76SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, n64k, CTLFLAG_RD, &g_raid3_n64k, 0,
77 "Maximum number of 64kB allocations");
78static u_int g_raid3_n16k = 200;
79TUNABLE_INT("kern.geom.raid3.n16k", &g_raid3_n16k);
80SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, n16k, CTLFLAG_RD, &g_raid3_n16k, 0,
81 "Maximum number of 16kB allocations");
82static u_int g_raid3_n4k = 1200;
83TUNABLE_INT("kern.geom.raid3.n4k", &g_raid3_n4k);
84SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, n4k, CTLFLAG_RD, &g_raid3_n4k, 0,
85 "Maximum number of 4kB allocations");
86
87SYSCTL_NODE(_kern_geom_raid3, OID_AUTO, stat, CTLFLAG_RW, 0,
88 "GEOM_RAID3 statistics");
89static u_int g_raid3_parity_mismatch = 0;
90SYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, parity_mismatch, CTLFLAG_RD,
91 &g_raid3_parity_mismatch, 0, "Number of failures in VERIFY mode");
92static u_int g_raid3_64k_requested = 0;
93SYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, 64k_requested, CTLFLAG_RD,
94 &g_raid3_64k_requested, 0, "Number of requested 64kB allocations");
95static u_int g_raid3_64k_failed = 0;
96SYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, 64k_failed, CTLFLAG_RD,
97 &g_raid3_64k_failed, 0, "Number of failed 64kB allocations");
98static u_int g_raid3_16k_requested = 0;
99SYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, 16k_requested, CTLFLAG_RD,
100 &g_raid3_16k_requested, 0, "Number of requested 16kB allocations");
101static u_int g_raid3_16k_failed = 0;
102SYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, 16k_failed, CTLFLAG_RD,
103 &g_raid3_16k_failed, 0, "Number of failed 16kB allocations");
104static u_int g_raid3_4k_requested = 0;
105SYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, 4k_requested, CTLFLAG_RD,
106 &g_raid3_4k_requested, 0, "Number of requested 4kB allocations");
107static u_int g_raid3_4k_failed = 0;
108SYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, 4k_failed, CTLFLAG_RD,
109 &g_raid3_4k_failed, 0, "Number of failed 4kB allocations");
110
111#define MSLEEP(ident, mtx, priority, wmesg, timeout) do { \
112 G_RAID3_DEBUG(4, "%s: Sleeping %p.", __func__, (ident)); \
113 msleep((ident), (mtx), (priority), (wmesg), (timeout)); \
114 G_RAID3_DEBUG(4, "%s: Woken up %p.", __func__, (ident)); \
115} while (0)
116
117static eventhandler_tag g_raid3_ehtag = NULL;
118
119static int g_raid3_destroy_geom(struct gctl_req *req, struct g_class *mp,
120 struct g_geom *gp);
121static g_taste_t g_raid3_taste;
122static void g_raid3_init(struct g_class *mp);
123static void g_raid3_fini(struct g_class *mp);
124
125struct g_class g_raid3_class = {
126 .name = G_RAID3_CLASS_NAME,
127 .version = G_VERSION,
128 .ctlreq = g_raid3_config,
129 .taste = g_raid3_taste,
130 .destroy_geom = g_raid3_destroy_geom,
131 .init = g_raid3_init,
132 .fini = g_raid3_fini
133};
134
135
136static void g_raid3_destroy_provider(struct g_raid3_softc *sc);
137static int g_raid3_update_disk(struct g_raid3_disk *disk, u_int state);
138static void g_raid3_update_device(struct g_raid3_softc *sc, boolean_t force);
139static void g_raid3_dumpconf(struct sbuf *sb, const char *indent,
140 struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
141static void g_raid3_sync_stop(struct g_raid3_softc *sc, int type);
142
143
144static const char *
145g_raid3_disk_state2str(int state)
146{
147
148 switch (state) {
149 case G_RAID3_DISK_STATE_NODISK:
150 return ("NODISK");
151 case G_RAID3_DISK_STATE_NONE:
152 return ("NONE");
153 case G_RAID3_DISK_STATE_NEW:
154 return ("NEW");
155 case G_RAID3_DISK_STATE_ACTIVE:
156 return ("ACTIVE");
157 case G_RAID3_DISK_STATE_STALE:
158 return ("STALE");
159 case G_RAID3_DISK_STATE_SYNCHRONIZING:
160 return ("SYNCHRONIZING");
161 case G_RAID3_DISK_STATE_DISCONNECTED:
162 return ("DISCONNECTED");
163 default:
164 return ("INVALID");
165 }
166}
167
168static const char *
169g_raid3_device_state2str(int state)
170{
171
172 switch (state) {
173 case G_RAID3_DEVICE_STATE_STARTING:
174 return ("STARTING");
175 case G_RAID3_DEVICE_STATE_DEGRADED:
176 return ("DEGRADED");
177 case G_RAID3_DEVICE_STATE_COMPLETE:
178 return ("COMPLETE");
179 default:
180 return ("INVALID");
181 }
182}
183
184const char *
185g_raid3_get_diskname(struct g_raid3_disk *disk)
186{
187
188 if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
189 return ("[unknown]");
190 return (disk->d_name);
191}
192
193#define g_raid3_xor(src1, src2, dst, size) \
194 _g_raid3_xor((uint64_t *)(src1), (uint64_t *)(src2), \
195 (uint64_t *)(dst), (size_t)size)
196static void
197_g_raid3_xor(uint64_t *src1, uint64_t *src2, uint64_t *dst, size_t size)
198{
199
200 KASSERT((size % 128) == 0, ("Invalid size: %zu.", size));
201 for (; size > 0; size -= 128) {
202 *dst++ = (*src1++) ^ (*src2++);
203 *dst++ = (*src1++) ^ (*src2++);
204 *dst++ = (*src1++) ^ (*src2++);
205 *dst++ = (*src1++) ^ (*src2++);
206 *dst++ = (*src1++) ^ (*src2++);
207 *dst++ = (*src1++) ^ (*src2++);
208 *dst++ = (*src1++) ^ (*src2++);
209 *dst++ = (*src1++) ^ (*src2++);
210 *dst++ = (*src1++) ^ (*src2++);
211 *dst++ = (*src1++) ^ (*src2++);
212 *dst++ = (*src1++) ^ (*src2++);
213 *dst++ = (*src1++) ^ (*src2++);
214 *dst++ = (*src1++) ^ (*src2++);
215 *dst++ = (*src1++) ^ (*src2++);
216 *dst++ = (*src1++) ^ (*src2++);
217 *dst++ = (*src1++) ^ (*src2++);
218 }
219}
220
221static int
222g_raid3_is_zero(struct bio *bp)
223{
224 static const uint64_t zeros[] = {
225 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
226 };
227 u_char *addr;
228 ssize_t size;
229
230 size = bp->bio_length;
231 addr = (u_char *)bp->bio_data;
232 for (; size > 0; size -= sizeof(zeros), addr += sizeof(zeros)) {
233 if (bcmp(addr, zeros, sizeof(zeros)) != 0)
234 return (0);
235 }
236 return (1);
237}
238
239/*
240 * --- Events handling functions ---
241 * Events in geom_raid3 are used to maintain disks and device status
242 * from one thread to simplify locking.
243 */
244static void
245g_raid3_event_free(struct g_raid3_event *ep)
246{
247
248 free(ep, M_RAID3);
249}
250
251int
252g_raid3_event_send(void *arg, int state, int flags)
253{
254 struct g_raid3_softc *sc;
255 struct g_raid3_disk *disk;
256 struct g_raid3_event *ep;
257 int error;
258
259 ep = malloc(sizeof(*ep), M_RAID3, M_WAITOK);
260 G_RAID3_DEBUG(4, "%s: Sending event %p.", __func__, ep);
261 if ((flags & G_RAID3_EVENT_DEVICE) != 0) {
262 disk = NULL;
263 sc = arg;
264 } else {
265 disk = arg;
266 sc = disk->d_softc;
267 }
268 ep->e_disk = disk;
269 ep->e_state = state;
270 ep->e_flags = flags;
271 ep->e_error = 0;
272 mtx_lock(&sc->sc_events_mtx);
273 TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
274 mtx_unlock(&sc->sc_events_mtx);
275 G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, sc);
276 mtx_lock(&sc->sc_queue_mtx);
277 wakeup(sc);
278 wakeup(&sc->sc_queue);
279 mtx_unlock(&sc->sc_queue_mtx);
280 if ((flags & G_RAID3_EVENT_DONTWAIT) != 0)
281 return (0);
282 g_topology_assert();
283 G_RAID3_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
284 g_topology_unlock();
285 while ((ep->e_flags & G_RAID3_EVENT_DONE) == 0) {
286 mtx_lock(&sc->sc_events_mtx);
287 MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "r3:event",
288 hz * 5);
289 }
290 /* Don't even try to use 'sc' here, because it could be already dead. */
291 g_topology_lock();
292 error = ep->e_error;
293 g_raid3_event_free(ep);
294 return (error);
295}
296
297static struct g_raid3_event *
298g_raid3_event_get(struct g_raid3_softc *sc)
299{
300 struct g_raid3_event *ep;
301
302 mtx_lock(&sc->sc_events_mtx);
303 ep = TAILQ_FIRST(&sc->sc_events);
304 mtx_unlock(&sc->sc_events_mtx);
305 return (ep);
306}
307
308static void
309g_raid3_event_remove(struct g_raid3_softc *sc, struct g_raid3_event *ep)
310{
311
312 mtx_lock(&sc->sc_events_mtx);
313 TAILQ_REMOVE(&sc->sc_events, ep, e_next);
314 mtx_unlock(&sc->sc_events_mtx);
315}
316
317static void
318g_raid3_event_cancel(struct g_raid3_disk *disk)
319{
320 struct g_raid3_softc *sc;
321 struct g_raid3_event *ep, *tmpep;
322
323 g_topology_assert();
324
325 sc = disk->d_softc;
326 mtx_lock(&sc->sc_events_mtx);
327 TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
328 if ((ep->e_flags & G_RAID3_EVENT_DEVICE) != 0)
329 continue;
330 if (ep->e_disk != disk)
331 continue;
332 TAILQ_REMOVE(&sc->sc_events, ep, e_next);
333 if ((ep->e_flags & G_RAID3_EVENT_DONTWAIT) != 0)
334 g_raid3_event_free(ep);
335 else {
336 ep->e_error = ECANCELED;
337 wakeup(ep);
338 }
339 }
340 mtx_unlock(&sc->sc_events_mtx);
341}
342
343/*
344 * Return the number of disks in the given state.
345 * If state is equal to -1, count all connected disks.
346 */
347u_int
348g_raid3_ndisks(struct g_raid3_softc *sc, int state)
349{
350 struct g_raid3_disk *disk;
351 u_int n, ndisks;
352
353 for (n = ndisks = 0; n < sc->sc_ndisks; n++) {
354 disk = &sc->sc_disks[n];
355 if (disk->d_state == G_RAID3_DISK_STATE_NODISK)
356 continue;
357 if (state == -1 || disk->d_state == state)
358 ndisks++;
359 }
360 return (ndisks);
361}
362
363static u_int
364g_raid3_nrequests(struct g_raid3_softc *sc, struct g_consumer *cp)
365{
366 struct bio *bp;
367 u_int nreqs = 0;
368
369 mtx_lock(&sc->sc_queue_mtx);
370 TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
371 if (bp->bio_from == cp)
372 nreqs++;
373 }
374 mtx_unlock(&sc->sc_queue_mtx);
375 return (nreqs);
376}
377
378static int
379g_raid3_is_busy(struct g_raid3_softc *sc, struct g_consumer *cp)
380{
381
382 if (cp->index > 0) {
383 G_RAID3_DEBUG(2,
384 "I/O requests for %s exist, can't destroy it now.",
385 cp->provider->name);
386 return (1);
387 }
388 if (g_raid3_nrequests(sc, cp) > 0) {
389 G_RAID3_DEBUG(2,
390 "I/O requests for %s in queue, can't destroy it now.",
391 cp->provider->name);
392 return (1);
393 }
394 return (0);
395}
396
397static void
398g_raid3_destroy_consumer(void *arg, int flags __unused)
399{
400 struct g_consumer *cp;
401
402 cp = arg;
403 G_RAID3_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
404 g_detach(cp);
405 g_destroy_consumer(cp);
406}
407
408static void
409g_raid3_kill_consumer(struct g_raid3_softc *sc, struct g_consumer *cp)
410{
411 struct g_provider *pp;
412 int retaste_wait;
413
414 g_topology_assert();
415
416 cp->private = NULL;
417 if (g_raid3_is_busy(sc, cp))
418 return;
419 G_RAID3_DEBUG(2, "Consumer %s destroyed.", cp->provider->name);
420 pp = cp->provider;
421 retaste_wait = 0;
422 if (cp->acw == 1) {
423 if ((pp->geom->flags & G_GEOM_WITHER) == 0)
424 retaste_wait = 1;
425 }
426 G_RAID3_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr,
427 -cp->acw, -cp->ace, 0);
428 if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
429 g_access(cp, -cp->acr, -cp->acw, -cp->ace);
430 if (retaste_wait) {
431 /*
432 * After retaste event was send (inside g_access()), we can send
433 * event to detach and destroy consumer.
434 * A class, which has consumer to the given provider connected
435 * will not receive retaste event for the provider.
436 * This is the way how I ignore retaste events when I close
437 * consumers opened for write: I detach and destroy consumer
438 * after retaste event is sent.
439 */
440 g_post_event(g_raid3_destroy_consumer, cp, M_WAITOK, NULL);
441 return;
442 }
443 G_RAID3_DEBUG(1, "Consumer %s destroyed.", pp->name);
444 g_detach(cp);
445 g_destroy_consumer(cp);
446}
447
448static int
449g_raid3_connect_disk(struct g_raid3_disk *disk, struct g_provider *pp)
450{
451 int error;
452
453 g_topology_assert();
454 KASSERT(disk->d_consumer == NULL,
455 ("Disk already connected (device %s).", disk->d_softc->sc_name));
456
457 disk->d_consumer = g_new_consumer(disk->d_softc->sc_geom);
458 disk->d_consumer->private = disk;
459 disk->d_consumer->index = 0;
460 error = g_attach(disk->d_consumer, pp);
461 if (error != 0)
462 return (error);
463 error = g_access(disk->d_consumer, 1, 1, 1);
464 if (error != 0) {
465 G_RAID3_DEBUG(0, "Cannot open consumer %s (error=%d).",
466 pp->name, error);
467 return (error);
468 }
469 G_RAID3_DEBUG(2, "Disk %s connected.", g_raid3_get_diskname(disk));
470 return (0);
471}
472
473static void
474g_raid3_disconnect_consumer(struct g_raid3_softc *sc, struct g_consumer *cp)
475{
476
477 g_topology_assert();
478
479 if (cp == NULL)
480 return;
481 if (cp->provider != NULL)
482 g_raid3_kill_consumer(sc, cp);
483 else
484 g_destroy_consumer(cp);
485}
486
487/*
488 * Initialize disk. This means allocate memory, create consumer, attach it
489 * to the provider and open access (r1w1e1) to it.
490 */
491static struct g_raid3_disk *
492g_raid3_init_disk(struct g_raid3_softc *sc, struct g_provider *pp,
493 struct g_raid3_metadata *md, int *errorp)
494{
495 struct g_raid3_disk *disk;
496 int error;
497
498 disk = &sc->sc_disks[md->md_no];
499 error = g_raid3_connect_disk(disk, pp);
500 if (error != 0)
501 goto fail;
502 disk->d_state = G_RAID3_DISK_STATE_NONE;
503 disk->d_flags = md->md_dflags;
504 if (md->md_provider[0] != '\0')
505 disk->d_flags |= G_RAID3_DISK_FLAG_HARDCODED;
506 disk->d_sync.ds_consumer = NULL;
507 disk->d_sync.ds_offset = md->md_sync_offset;
508 disk->d_sync.ds_offset_done = md->md_sync_offset;
509 disk->d_sync.ds_resync = -1;
510 disk->d_genid = md->md_genid;
511 disk->d_sync.ds_syncid = md->md_syncid;
512 if (errorp != NULL)
513 *errorp = 0;
514 return (disk);
515fail:
516 if (errorp != NULL)
517 *errorp = error;
518 if (disk != NULL)
519 g_raid3_disconnect_consumer(sc, disk->d_consumer);
520 return (NULL);
521}
522
523static void
524g_raid3_destroy_disk(struct g_raid3_disk *disk)
525{
526 struct g_raid3_softc *sc;
527
528 g_topology_assert();
529
530 if (disk->d_state == G_RAID3_DISK_STATE_NODISK)
531 return;
532 g_raid3_event_cancel(disk);
533 sc = disk->d_softc;
534 switch (disk->d_state) {
535 case G_RAID3_DISK_STATE_SYNCHRONIZING:
536 if (sc->sc_syncdisk != NULL)
537 g_raid3_sync_stop(sc, 1);
538 /* FALLTHROUGH */
539 case G_RAID3_DISK_STATE_NEW:
540 case G_RAID3_DISK_STATE_STALE:
541 case G_RAID3_DISK_STATE_ACTIVE:
542 g_raid3_disconnect_consumer(sc, disk->d_consumer);
543 disk->d_consumer = NULL;
544 break;
545 default:
546 KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
547 g_raid3_get_diskname(disk),
548 g_raid3_disk_state2str(disk->d_state)));
549 }
550 disk->d_state = G_RAID3_DISK_STATE_NODISK;
551}
552
553static void
554g_raid3_destroy_device(struct g_raid3_softc *sc)
555{
556 struct g_raid3_event *ep;
557 struct g_raid3_disk *disk;
558 struct g_geom *gp;
559 struct g_consumer *cp;
560 u_int n;
561
562 g_topology_assert();
563
564 gp = sc->sc_geom;
565 if (sc->sc_provider != NULL)
566 g_raid3_destroy_provider(sc);
567 for (n = 0; n < sc->sc_ndisks; n++) {
568 disk = &sc->sc_disks[n];
569 if (disk->d_state != G_RAID3_DISK_STATE_NODISK) {
570 disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
571 g_raid3_update_metadata(disk);
572 g_raid3_destroy_disk(disk);
573 }
574 }
575 while ((ep = g_raid3_event_get(sc)) != NULL) {
576 g_raid3_event_remove(sc, ep);
577 if ((ep->e_flags & G_RAID3_EVENT_DONTWAIT) != 0)
578 g_raid3_event_free(ep);
579 else {
580 ep->e_error = ECANCELED;
581 ep->e_flags |= G_RAID3_EVENT_DONE;
582 G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, ep);
583 mtx_lock(&sc->sc_events_mtx);
584 wakeup(ep);
585 mtx_unlock(&sc->sc_events_mtx);
586 }
587 }
588 callout_drain(&sc->sc_callout);
589 gp->softc = NULL;
590 cp = LIST_FIRST(&sc->sc_sync.ds_geom->consumer);
591 if (cp != NULL)
592 g_raid3_disconnect_consumer(sc, cp);
593 sc->sc_sync.ds_geom->softc = NULL;
594 g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
595 uma_zdestroy(sc->sc_zone_64k);
596 uma_zdestroy(sc->sc_zone_16k);
597 uma_zdestroy(sc->sc_zone_4k);
598 mtx_destroy(&sc->sc_queue_mtx);
599 mtx_destroy(&sc->sc_events_mtx);
600 G_RAID3_DEBUG(0, "Device %s destroyed.", gp->name);
601 g_wither_geom(gp, ENXIO);
602}
603
604static void
605g_raid3_orphan(struct g_consumer *cp)
606{
607 struct g_raid3_disk *disk;
608
609 g_topology_assert();
610
611 disk = cp->private;
612 if (disk == NULL)
613 return;
614 disk->d_softc->sc_bump_id = G_RAID3_BUMP_SYNCID;
615 g_raid3_event_send(disk, G_RAID3_DISK_STATE_DISCONNECTED,
616 G_RAID3_EVENT_DONTWAIT);
617}
618
619static int
620g_raid3_write_metadata(struct g_raid3_disk *disk, struct g_raid3_metadata *md)
621{
622 struct g_raid3_softc *sc;
623 struct g_consumer *cp;
624 off_t offset, length;
625 u_char *sector;
626 int error = 0;
627
628 g_topology_assert();
629
630 sc = disk->d_softc;
631 cp = disk->d_consumer;
632 KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
633 KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
634 KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
635 ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
636 cp->acw, cp->ace));
637 length = cp->provider->sectorsize;
638 offset = cp->provider->mediasize - length;
639 sector = malloc((size_t)length, M_RAID3, M_WAITOK | M_ZERO);
640 if (md != NULL)
641 raid3_metadata_encode(md, sector);
642 g_topology_unlock();
643 error = g_write_data(cp, offset, sector, length);
644 g_topology_lock();
645 free(sector, M_RAID3);
646 if (error != 0) {
647 disk->d_softc->sc_bump_id = G_RAID3_BUMP_GENID;
648 g_raid3_event_send(disk, G_RAID3_DISK_STATE_DISCONNECTED,
649 G_RAID3_EVENT_DONTWAIT);
650 }
651 return (error);
652}
653
654int
655g_raid3_clear_metadata(struct g_raid3_disk *disk)
656{
657 int error;
658
659 g_topology_assert();
660 error = g_raid3_write_metadata(disk, NULL);
661 if (error == 0) {
662 G_RAID3_DEBUG(2, "Metadata on %s cleared.",
663 g_raid3_get_diskname(disk));
664 } else {
665 G_RAID3_DEBUG(0,
666 "Cannot clear metadata on disk %s (error=%d).",
667 g_raid3_get_diskname(disk), error);
668 }
669 return (error);
670}
671
672void
673g_raid3_fill_metadata(struct g_raid3_disk *disk, struct g_raid3_metadata *md)
674{
675 struct g_raid3_softc *sc;
676
677 sc = disk->d_softc;
678 strlcpy(md->md_magic, G_RAID3_MAGIC, sizeof(md->md_magic));
679 md->md_version = G_RAID3_VERSION;
680 strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
681 md->md_id = sc->sc_id;
682 md->md_all = sc->sc_ndisks;
683 md->md_genid = sc->sc_genid;
684 md->md_mediasize = sc->sc_mediasize;
685 md->md_sectorsize = sc->sc_sectorsize;
686 md->md_mflags = (sc->sc_flags & G_RAID3_DEVICE_FLAG_MASK);
687 md->md_no = disk->d_no;
688 md->md_syncid = disk->d_sync.ds_syncid;
689 md->md_dflags = (disk->d_flags & G_RAID3_DISK_FLAG_MASK);
690 if (disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING)
691 md->md_sync_offset = disk->d_sync.ds_offset_done;
692 else
693 md->md_sync_offset = 0;
694 if ((disk->d_flags & G_RAID3_DISK_FLAG_HARDCODED) != 0 &&
695 disk->d_consumer != NULL && disk->d_consumer->provider != NULL) {
696 strlcpy(md->md_provider, disk->d_consumer->provider->name,
697 sizeof(md->md_provider));
698 } else {
699 bzero(md->md_provider, sizeof(md->md_provider));
700 }
701}
702
703void
704g_raid3_update_metadata(struct g_raid3_disk *disk)
705{
706 struct g_raid3_metadata md;
707 int error;
708
709 g_topology_assert();
710 g_raid3_fill_metadata(disk, &md);
711 error = g_raid3_write_metadata(disk, &md);
712 if (error == 0) {
713 G_RAID3_DEBUG(2, "Metadata on %s updated.",
714 g_raid3_get_diskname(disk));
715 } else {
716 G_RAID3_DEBUG(0,
717 "Cannot update metadata on disk %s (error=%d).",
718 g_raid3_get_diskname(disk), error);
719 }
720}
721
722static void
723g_raid3_bump_syncid(struct g_raid3_softc *sc)
724{
725 struct g_raid3_disk *disk;
726 u_int n;
727
728 g_topology_assert();
729 KASSERT(g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) > 0,
730 ("%s called with no active disks (device=%s).", __func__,
731 sc->sc_name));
732
733 sc->sc_syncid++;
734 G_RAID3_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
735 sc->sc_syncid);
736 for (n = 0; n < sc->sc_ndisks; n++) {
737 disk = &sc->sc_disks[n];
738 if (disk->d_state == G_RAID3_DISK_STATE_ACTIVE ||
739 disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) {
740 disk->d_sync.ds_syncid = sc->sc_syncid;
741 g_raid3_update_metadata(disk);
742 }
743 }
744}
745
746static void
747g_raid3_bump_genid(struct g_raid3_softc *sc)
748{
749 struct g_raid3_disk *disk;
750 u_int n;
751
752 g_topology_assert();
753 KASSERT(g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) > 0,
754 ("%s called with no active disks (device=%s).", __func__,
755 sc->sc_name));
756
757 sc->sc_genid++;
758 G_RAID3_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name,
759 sc->sc_genid);
760 for (n = 0; n < sc->sc_ndisks; n++) {
761 disk = &sc->sc_disks[n];
762 if (disk->d_state == G_RAID3_DISK_STATE_ACTIVE ||
763 disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) {
764 disk->d_genid = sc->sc_genid;
765 g_raid3_update_metadata(disk);
766 }
767 }
768}
769
770static void
771g_raid3_idle(struct g_raid3_softc *sc)
772{
773 struct g_raid3_disk *disk;
774 u_int i;
775
776 if (sc->sc_provider == NULL || sc->sc_provider->acw == 0)
777 return;
778 sc->sc_idle = 1;
779 g_topology_lock();
780 for (i = 0; i < sc->sc_ndisks; i++) {
781 disk = &sc->sc_disks[i];
782 if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE)
783 continue;
784 G_RAID3_DEBUG(1, "Disk %s (device %s) marked as clean.",
785 g_raid3_get_diskname(disk), sc->sc_name);
786 disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
787 g_raid3_update_metadata(disk);
788 }
789 g_topology_unlock();
790}
791
792static void
793g_raid3_unidle(struct g_raid3_softc *sc)
794{
795 struct g_raid3_disk *disk;
796 u_int i;
797
798 sc->sc_idle = 0;
799 g_topology_lock();
800 for (i = 0; i < sc->sc_ndisks; i++) {
801 disk = &sc->sc_disks[i];
802 if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE)
803 continue;
804 G_RAID3_DEBUG(1, "Disk %s (device %s) marked as dirty.",
805 g_raid3_get_diskname(disk), sc->sc_name);
806 disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY;
807 g_raid3_update_metadata(disk);
808 }
809 g_topology_unlock();
810}
811
812/*
813 * Return 1 if we should check if RAID3 device is idling.
814 */
815static int
816g_raid3_check_idle(struct g_raid3_softc *sc)
817{
818 struct g_raid3_disk *disk;
819 u_int i;
820
821 if (sc->sc_idle)
822 return (0);
823 if (sc->sc_provider != NULL && sc->sc_provider->acw == 0)
824 return (0);
825 /*
826 * Check if there are no in-flight requests.
827 */
828 for (i = 0; i < sc->sc_ndisks; i++) {
829 disk = &sc->sc_disks[i];
830 if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE)
831 continue;
832 if (disk->d_consumer->index > 0)
833 return (0);
834 }
835 return (1);
836}
837
838/*
839 * Treat bio_driver1 field in parent bio as list head and field bio_caller1
840 * in child bio as pointer to the next element on the list.
841 */
842#define G_RAID3_HEAD_BIO(pbp) (pbp)->bio_driver1
843
844#define G_RAID3_NEXT_BIO(cbp) (cbp)->bio_caller1
845
846#define G_RAID3_FOREACH_BIO(pbp, bp) \
847 for ((bp) = G_RAID3_HEAD_BIO(pbp); (bp) != NULL; \
848 (bp) = G_RAID3_NEXT_BIO(bp))
849
850#define G_RAID3_FOREACH_SAFE_BIO(pbp, bp, tmpbp) \
851 for ((bp) = G_RAID3_HEAD_BIO(pbp); \
852 (bp) != NULL && ((tmpbp) = G_RAID3_NEXT_BIO(bp), 1); \
853 (bp) = (tmpbp))
854
855static void
856g_raid3_init_bio(struct bio *pbp)
857{
858
859 G_RAID3_HEAD_BIO(pbp) = NULL;
860}
861
862static void
863g_raid3_remove_bio(struct bio *cbp)
864{
865 struct bio *pbp, *bp;
866
867 pbp = cbp->bio_parent;
868 if (G_RAID3_HEAD_BIO(pbp) == cbp)
869 G_RAID3_HEAD_BIO(pbp) = G_RAID3_NEXT_BIO(cbp);
870 else {
871 G_RAID3_FOREACH_BIO(pbp, bp) {
872 if (G_RAID3_NEXT_BIO(bp) == cbp) {
873 G_RAID3_NEXT_BIO(bp) = G_RAID3_NEXT_BIO(cbp);
874 break;
875 }
876 }
877 }
878 G_RAID3_NEXT_BIO(cbp) = NULL;
879}
880
881static void
882g_raid3_replace_bio(struct bio *sbp, struct bio *dbp)
883{
884 struct bio *pbp, *bp;
885
886 g_raid3_remove_bio(sbp);
887 pbp = dbp->bio_parent;
888 G_RAID3_NEXT_BIO(sbp) = G_RAID3_NEXT_BIO(dbp);
889 if (G_RAID3_HEAD_BIO(pbp) == dbp)
890 G_RAID3_HEAD_BIO(pbp) = sbp;
891 else {
892 G_RAID3_FOREACH_BIO(pbp, bp) {
893 if (G_RAID3_NEXT_BIO(bp) == dbp) {
894 G_RAID3_NEXT_BIO(bp) = sbp;
895 break;
896 }
897 }
898 }
899 G_RAID3_NEXT_BIO(dbp) = NULL;
900}
901
902static void
903g_raid3_destroy_bio(struct g_raid3_softc *sc, struct bio *cbp)
904{
905 struct bio *bp, *pbp;
906 size_t size;
907
908 pbp = cbp->bio_parent;
909 pbp->bio_children--;
910 KASSERT(cbp->bio_data != NULL, ("NULL bio_data"));
911 size = pbp->bio_length / (sc->sc_ndisks - 1);
912 if (size > 16384)
913 uma_zfree(sc->sc_zone_64k, cbp->bio_data);
914 else if (size > 4096)
915 uma_zfree(sc->sc_zone_16k, cbp->bio_data);
916 else
917 uma_zfree(sc->sc_zone_4k, cbp->bio_data);
918 if (G_RAID3_HEAD_BIO(pbp) == cbp) {
919 G_RAID3_HEAD_BIO(pbp) = G_RAID3_NEXT_BIO(cbp);
920 G_RAID3_NEXT_BIO(cbp) = NULL;
921 g_destroy_bio(cbp);
922 } else {
923 G_RAID3_FOREACH_BIO(pbp, bp) {
924 if (G_RAID3_NEXT_BIO(bp) == cbp)
925 break;
926 }
927 if (bp != NULL) {
928 KASSERT(G_RAID3_NEXT_BIO(bp) != NULL,
929 ("NULL bp->bio_driver1"));
930 G_RAID3_NEXT_BIO(bp) = G_RAID3_NEXT_BIO(cbp);
931 G_RAID3_NEXT_BIO(cbp) = NULL;
932 }
933 g_destroy_bio(cbp);
934 }
935}
936
937static struct bio *
938g_raid3_clone_bio(struct g_raid3_softc *sc, struct bio *pbp)
939{
940 struct bio *bp, *cbp;
941 size_t size;
942
943 cbp = g_clone_bio(pbp);
944 if (cbp == NULL)
945 return (NULL);
946 size = pbp->bio_length / (sc->sc_ndisks - 1);
947 if (size > 16384) {
948 cbp->bio_data = uma_zalloc(sc->sc_zone_64k, M_NOWAIT);
949 g_raid3_64k_requested++;
950 } else if (size > 4096) {
951 cbp->bio_data = uma_zalloc(sc->sc_zone_16k, M_NOWAIT);
952 g_raid3_16k_requested++;
953 } else {
954 cbp->bio_data = uma_zalloc(sc->sc_zone_4k, M_NOWAIT);
955 g_raid3_4k_requested++;
956 }
957 if (cbp->bio_data == NULL) {
958 if (size > 16384)
959 g_raid3_64k_failed++;
960 if (size > 4096)
961 g_raid3_16k_failed++;
962 else
963 g_raid3_4k_failed++;
964 pbp->bio_children--;
965 g_destroy_bio(cbp);
966 return (NULL);
967 }
968 G_RAID3_NEXT_BIO(cbp) = NULL;
969 if (G_RAID3_HEAD_BIO(pbp) == NULL)
970 G_RAID3_HEAD_BIO(pbp) = cbp;
971 else {
972 G_RAID3_FOREACH_BIO(pbp, bp) {
973 if (G_RAID3_NEXT_BIO(bp) == NULL) {
974 G_RAID3_NEXT_BIO(bp) = cbp;
975 break;
976 }
977 }
978 }
979 return (cbp);
980}
981
982static void
983g_raid3_scatter(struct bio *pbp)
984{
985 struct g_raid3_softc *sc;
986 struct g_raid3_disk *disk;
987 struct bio *bp, *cbp;
988 off_t atom, cadd, padd, left;
989
990 sc = pbp->bio_to->geom->softc;
991 bp = NULL;
992 if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_NOPARITY) == 0) {
993 /*
994 * Find bio for which we should calculate data.
995 */
996 G_RAID3_FOREACH_BIO(pbp, cbp) {
997 if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) {
998 bp = cbp;
999 break;
1000 }
1001 }
1002 KASSERT(bp != NULL, ("NULL parity bio."));
1003 }
1004 atom = sc->sc_sectorsize / (sc->sc_ndisks - 1);
1005 cadd = padd = 0;
1006 for (left = pbp->bio_length; left > 0; left -= sc->sc_sectorsize) {
1007 G_RAID3_FOREACH_BIO(pbp, cbp) {
1008 if (cbp == bp)
1009 continue;
1010 bcopy(pbp->bio_data + padd, cbp->bio_data + cadd, atom);
1011 padd += atom;
1012 }
1013 cadd += atom;
1014 }
1015 if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_NOPARITY) == 0) {
1016 struct bio *tmpbp;
1017
1018 /*
1019 * Calculate parity.
1020 */
1021 bzero(bp->bio_data, bp->bio_length);
1022 G_RAID3_FOREACH_SAFE_BIO(pbp, cbp, tmpbp) {
1023 if (cbp == bp)
1024 continue;
1025 g_raid3_xor(cbp->bio_data, bp->bio_data, bp->bio_data,
1026 bp->bio_length);
1027 if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_NODISK) != 0)
1028 g_raid3_destroy_bio(sc, cbp);
1029 }
1030 }
1031 G_RAID3_FOREACH_BIO(pbp, cbp) {
1032 struct g_consumer *cp;
1033
1034 disk = cbp->bio_caller2;
1035 cp = disk->d_consumer;
1036 cbp->bio_to = cp->provider;
1037 G_RAID3_LOGREQ(3, cbp, "Sending request.");
1038 KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
1039 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1040 cp->acr, cp->acw, cp->ace));
1041 cp->index++;
1042 g_io_request(cbp, cp);
1043 }
1044}
1045
1046static void
1047g_raid3_gather(struct bio *pbp)
1048{
1049 struct g_raid3_softc *sc;
1050 struct g_raid3_disk *disk;
1051 struct bio *xbp, *fbp, *cbp;
1052 off_t atom, cadd, padd, left;
1053
1054 sc = pbp->bio_to->geom->softc;
1055 /*
1056 * Find bio for which we have to calculate data.
1057 * While going through this path, check if all requests
1058 * succeeded, if not, deny whole request.
1059 * If we're in COMPLETE mode, we allow one request to fail,
1060 * so if we find one, we're sending it to the parity consumer.
1061 * If there are more failed requests, we deny whole request.
1062 */
1063 xbp = fbp = NULL;
1064 G_RAID3_FOREACH_BIO(pbp, cbp) {
1065 if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) {
1066 KASSERT(xbp == NULL, ("More than one parity bio."));
1067 xbp = cbp;
1068 }
1069 if (cbp->bio_error == 0)
1070 continue;
1071 /*
1072 * Found failed request.
1073 */
1074 G_RAID3_LOGREQ(0, cbp, "Request failed.");
1075 disk = cbp->bio_caller2;
1076 if (disk != NULL) {
1077 /*
1078 * Actually this is pointless to bump genid,
1079 * because whole device is fucked up.
1080 */
1081 sc->sc_bump_id |= G_RAID3_BUMP_GENID;
1082 g_raid3_event_send(disk,
1083 G_RAID3_DISK_STATE_DISCONNECTED,
1084 G_RAID3_EVENT_DONTWAIT);
1085 }
1086 if (fbp == NULL) {
1087 if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_DEGRADED) != 0) {
1088 /*
1089 * We are already in degraded mode, so we can't
1090 * accept any failures.
1091 */
1092 if (pbp->bio_error == 0)
1093 pbp->bio_error = fbp->bio_error;
1094 } else {
1095 fbp = cbp;
1096 }
1097 } else {
1098 /*
1099 * Next failed request, that's too many.
1100 */
1101 if (pbp->bio_error == 0)
1102 pbp->bio_error = fbp->bio_error;
1103 }
1104 }
1105 if (pbp->bio_error != 0)
1106 goto finish;
1107 if (fbp != NULL && (pbp->bio_pflags & G_RAID3_BIO_PFLAG_VERIFY) != 0) {
1108 pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_VERIFY;
1109 if (xbp != fbp)
1110 g_raid3_replace_bio(xbp, fbp);
1111 g_raid3_destroy_bio(sc, fbp);
1112 } else if (fbp != NULL) {
1113 struct g_consumer *cp;
1114
1115 /*
1116 * One request failed, so send the same request to
1117 * the parity consumer.
1118 */
1119 disk = pbp->bio_driver2;
1120 if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) {
1121 pbp->bio_error = fbp->bio_error;
1122 goto finish;
1123 }
1124 pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED;
1125 pbp->bio_inbed--;
1126 fbp->bio_flags &= ~(BIO_DONE | BIO_ERROR);
1127 if (disk->d_no == sc->sc_ndisks - 1)
1128 fbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY;
1129 fbp->bio_error = 0;
1130 fbp->bio_completed = 0;
1131 fbp->bio_children = 0;
1132 fbp->bio_inbed = 0;
1133 cp = disk->d_consumer;
1134 fbp->bio_caller2 = disk;
1135 fbp->bio_to = cp->provider;
1136 G_RAID3_LOGREQ(3, fbp, "Sending request (recover).");
1137 KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
1138 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1139 cp->acr, cp->acw, cp->ace));
1140 cp->index++;
1141 g_io_request(fbp, cp);
1142 return;
1143 }
1144 if (xbp != NULL) {
1145 /*
1146 * Calculate parity.
1147 */
1148 G_RAID3_FOREACH_BIO(pbp, cbp) {
1149 if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0)
1150 continue;
1151 g_raid3_xor(cbp->bio_data, xbp->bio_data, xbp->bio_data,
1152 xbp->bio_length);
1153 }
1154 xbp->bio_cflags &= ~G_RAID3_BIO_CFLAG_PARITY;
1155 if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_VERIFY) != 0) {
1156 if (!g_raid3_is_zero(xbp)) {
1157 g_raid3_parity_mismatch++;
1158 pbp->bio_error = EIO;
1159 goto finish;
1160 }
1161 g_raid3_destroy_bio(sc, xbp);
1162 }
1163 }
1164 atom = sc->sc_sectorsize / (sc->sc_ndisks - 1);
1165 cadd = padd = 0;
1166 for (left = pbp->bio_length; left > 0; left -= sc->sc_sectorsize) {
1167 G_RAID3_FOREACH_BIO(pbp, cbp) {
1168 bcopy(cbp->bio_data + cadd, pbp->bio_data + padd, atom);
1169 pbp->bio_completed += atom;
1170 padd += atom;
1171 }
1172 cadd += atom;
1173 }
1174finish:
1175 if (pbp->bio_error == 0)
1176 G_RAID3_LOGREQ(3, pbp, "Request finished.");
1177 else {
1178 if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_VERIFY) != 0)
1179 G_RAID3_LOGREQ(1, pbp, "Verification error.");
1180 else
1181 G_RAID3_LOGREQ(0, pbp, "Request failed.");
1182 }
1183 pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_MASK;
1184 g_io_deliver(pbp, pbp->bio_error);
1185 while ((cbp = G_RAID3_HEAD_BIO(pbp)) != NULL)
1186 g_raid3_destroy_bio(sc, cbp);
1187}
1188
1189static void
1190g_raid3_done(struct bio *bp)
1191{
1192 struct g_raid3_softc *sc;
1193
1194 sc = bp->bio_from->geom->softc;
1195 bp->bio_cflags |= G_RAID3_BIO_CFLAG_REGULAR;
1196 G_RAID3_LOGREQ(3, bp, "Regular request done (error=%d).", bp->bio_error);
1197 mtx_lock(&sc->sc_queue_mtx);
1198 bioq_insert_head(&sc->sc_queue, bp);
1199 wakeup(sc);
1200 wakeup(&sc->sc_queue);
1201 mtx_unlock(&sc->sc_queue_mtx);
1202}
1203
1204static void
1205g_raid3_regular_request(struct bio *cbp)
1206{
1207 struct g_raid3_softc *sc;
1208 struct g_raid3_disk *disk;
1209 struct bio *pbp;
1210
1211 g_topology_assert_not();
1212
1213 cbp->bio_from->index--;
1214 pbp = cbp->bio_parent;
1215 sc = pbp->bio_to->geom->softc;
1216 disk = cbp->bio_from->private;
1217 if (disk == NULL) {
1218 g_topology_lock();
1219 g_raid3_kill_consumer(sc, cbp->bio_from);
1220 g_topology_unlock();
1221 }
1222
1223 G_RAID3_LOGREQ(3, cbp, "Request finished.");
1224 pbp->bio_inbed++;
1225 KASSERT(pbp->bio_inbed <= pbp->bio_children,
1226 ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
1227 pbp->bio_children));
1228 if (pbp->bio_inbed != pbp->bio_children)
1229 return;
1230 switch (pbp->bio_cmd) {
1231 case BIO_READ:
1232 g_raid3_gather(pbp);
1233 break;
1234 case BIO_WRITE:
1235 case BIO_DELETE:
1236 {
1237 int error = 0;
1238
1239 pbp->bio_completed = pbp->bio_length;
1240 while ((cbp = G_RAID3_HEAD_BIO(pbp)) != NULL) {
1241 if (cbp->bio_error != 0) {
1242 disk = cbp->bio_caller2;
1243 if (disk != NULL) {
1244 sc->sc_bump_id |= G_RAID3_BUMP_GENID;
1245 g_raid3_event_send(disk,
1246 G_RAID3_DISK_STATE_DISCONNECTED,
1247 G_RAID3_EVENT_DONTWAIT);
1248 }
1249 if (error == 0)
1250 error = cbp->bio_error;
1251 else if (pbp->bio_error == 0) {
1252 /*
1253 * Next failed request, that's too many.
1254 */
1255 pbp->bio_error = error;
1256 }
1257 }
1258 g_raid3_destroy_bio(sc, cbp);
1259 }
1260 if (pbp->bio_error == 0)
1261 G_RAID3_LOGREQ(3, pbp, "Request finished.");
1262 else
1263 G_RAID3_LOGREQ(0, pbp, "Request failed.");
1264 pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_DEGRADED;
1265 pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_NOPARITY;
1266 g_io_deliver(pbp, pbp->bio_error);
1267 break;
1268 }
1269 }
1270}
1271
1272static void
1273g_raid3_sync_done(struct bio *bp)
1274{
1275 struct g_raid3_softc *sc;
1276
1277 G_RAID3_LOGREQ(3, bp, "Synchronization request delivered.");
1278 sc = bp->bio_from->geom->softc;
1279 bp->bio_cflags |= G_RAID3_BIO_CFLAG_SYNC;
1280 mtx_lock(&sc->sc_queue_mtx);
1281 bioq_insert_head(&sc->sc_queue, bp);
1282 wakeup(sc);
1283 wakeup(&sc->sc_queue);
1284 mtx_unlock(&sc->sc_queue_mtx);
1285}
1286
1287static void
1288g_raid3_start(struct bio *bp)
1289{
1290 struct g_raid3_softc *sc;
1291
1292 sc = bp->bio_to->geom->softc;
1293 /*
1294 * If sc == NULL or there are no valid disks, provider's error
1295 * should be set and g_raid3_start() should not be called at all.
1296 */
1297 KASSERT(sc != NULL && (sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED ||
1298 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE),
1299 ("Provider's error should be set (error=%d)(device=%s).",
1300 bp->bio_to->error, bp->bio_to->name));
1301 G_RAID3_LOGREQ(3, bp, "Request received.");
1302
1303 switch (bp->bio_cmd) {
1304 case BIO_READ:
1305 case BIO_WRITE:
1306 case BIO_DELETE:
1307 break;
1308 case BIO_GETATTR:
1309 default:
1310 g_io_deliver(bp, EOPNOTSUPP);
1311 return;
1312 }
1313 mtx_lock(&sc->sc_queue_mtx);
1314 bioq_insert_tail(&sc->sc_queue, bp);
1315 G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1316 wakeup(sc);
1317 mtx_unlock(&sc->sc_queue_mtx);
1318}
1319
1320/*
1321 * Send one synchronization request.
1322 */
1323static void
1324g_raid3_sync_one(struct g_raid3_softc *sc)
1325{
1326 struct g_raid3_disk *disk;
1327 struct bio *bp;
1328
1329 KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED,
1330 ("Wrong device state (%s, %s).", sc->sc_name,
1331 g_raid3_device_state2str(sc->sc_state)));
1332 disk = sc->sc_syncdisk;
1333 KASSERT(disk != NULL, ("No sync disk (%s).", sc->sc_name));
1334 KASSERT(disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING,
1335 ("Disk %s is not marked for synchronization.",
1336 g_raid3_get_diskname(disk)));
1337
1338 bp = g_new_bio();
1339 if (bp == NULL)
1340 return;
1341 bp->bio_parent = NULL;
1342 bp->bio_cmd = BIO_READ;
1343 bp->bio_offset = disk->d_sync.ds_offset * (sc->sc_ndisks - 1);
1344 bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
1345 bp->bio_cflags = 0;
1346 bp->bio_done = g_raid3_sync_done;
1347 bp->bio_data = disk->d_sync.ds_data;
1348 if (bp->bio_data == NULL) {
1349 g_destroy_bio(bp);
1350 return;
1351 }
1352 bp->bio_cflags = G_RAID3_BIO_CFLAG_REGSYNC;
1353 disk->d_sync.ds_offset += bp->bio_length / (sc->sc_ndisks - 1);
1354 bp->bio_to = sc->sc_provider;
1355 G_RAID3_LOGREQ(3, bp, "Sending synchronization request.");
1356 disk->d_sync.ds_consumer->index++;
1357 g_io_request(bp, disk->d_sync.ds_consumer);
1358}
1359
1360static void
1361g_raid3_sync_request(struct bio *bp)
1362{
1363 struct g_raid3_softc *sc;
1364 struct g_raid3_disk *disk;
1365
1366 bp->bio_from->index--;
1367 sc = bp->bio_from->geom->softc;
1368 disk = bp->bio_from->private;
1369 if (disk == NULL) {
1370 g_topology_lock();
1371 g_raid3_kill_consumer(sc, bp->bio_from);
1372 g_topology_unlock();
1373 g_destroy_bio(bp);
1374 return;
1375 }
1376
1377 /*
1378 * Synchronization request.
1379 */
1380 switch (bp->bio_cmd) {
1381 case BIO_READ:
1382 {
1383 struct g_consumer *cp;
1384 u_char *dst, *src;
1385 off_t left;
1386 u_int atom;
1387
1388 if (bp->bio_error != 0) {
1389 G_RAID3_LOGREQ(0, bp,
1390 "Synchronization request failed (error=%d).",
1391 bp->bio_error);
1392 g_destroy_bio(bp);
1393 return;
1394 }
1395 G_RAID3_LOGREQ(3, bp, "Synchronization request finished.");
1396 atom = sc->sc_sectorsize / (sc->sc_ndisks - 1);
1397 dst = src = bp->bio_data;
1398 if (disk->d_no == sc->sc_ndisks - 1) {
1399 u_int n;
1400
1401 /* Parity component. */
1402 for (left = bp->bio_length; left > 0;
1403 left -= sc->sc_sectorsize) {
1404 bcopy(src, dst, atom);
1405 src += atom;
1406 for (n = 1; n < sc->sc_ndisks - 1; n++) {
1407 g_raid3_xor(src, dst, dst, atom);
1408 src += atom;
1409 }
1410 dst += atom;
1411 }
1412 } else {
1413 /* Regular component. */
1414 src += atom * disk->d_no;
1415 for (left = bp->bio_length; left > 0;
1416 left -= sc->sc_sectorsize) {
1417 bcopy(src, dst, atom);
1418 src += sc->sc_sectorsize;
1419 dst += atom;
1420 }
1421 }
1422 bp->bio_offset /= sc->sc_ndisks - 1;
1423 bp->bio_length /= sc->sc_ndisks - 1;
1424 bp->bio_cmd = BIO_WRITE;
1425 bp->bio_cflags = 0;
1426 bp->bio_children = bp->bio_inbed = 0;
1427 cp = disk->d_consumer;
1428 KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
1429 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1430 cp->acr, cp->acw, cp->ace));
1431 cp->index++;
1432 g_io_request(bp, cp);
1433 return;
1434 }
1435 case BIO_WRITE:
1436 {
1437 struct g_raid3_disk_sync *sync;
1438
1439 if (bp->bio_error != 0) {
1440 G_RAID3_LOGREQ(0, bp,
1441 "Synchronization request failed (error=%d).",
1442 bp->bio_error);
1443 g_destroy_bio(bp);
1444 sc->sc_bump_id |= G_RAID3_BUMP_GENID;
1445 g_raid3_event_send(disk,
1446 G_RAID3_DISK_STATE_DISCONNECTED,
1447 G_RAID3_EVENT_DONTWAIT);
1448 return;
1449 }
1450 G_RAID3_LOGREQ(3, bp, "Synchronization request finished.");
1451 sync = &disk->d_sync;
1452 sync->ds_offset_done = bp->bio_offset + bp->bio_length;
1453 g_destroy_bio(bp);
1454 if (sync->ds_resync != -1)
1455 return;
1456 if (sync->ds_offset_done ==
1457 sc->sc_mediasize / (sc->sc_ndisks - 1)) {
1458 /*
1459 * Disk up-to-date, activate it.
1460 */
1461 g_raid3_event_send(disk, G_RAID3_DISK_STATE_ACTIVE,
1462 G_RAID3_EVENT_DONTWAIT);
1463 return;
1464 } else if (sync->ds_offset_done % (MAXPHYS * 100) == 0) {
1465 /*
1466 * Update offset_done on every 100 blocks.
1467 * XXX: This should be configurable.
1468 */
1469 g_topology_lock();
1470 g_raid3_update_metadata(disk);
1471 g_topology_unlock();
1472 }
1473 return;
1474 }
1475 default:
1476 KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1477 bp->bio_cmd, sc->sc_name));
1478 break;
1479 }
1480}
1481
1482static int
1483g_raid3_register_request(struct bio *pbp)
1484{
1485 struct g_raid3_softc *sc;
1486 struct g_raid3_disk *disk;
1487 struct g_consumer *cp;
1488 struct bio *cbp;
1489 off_t offset, length;
1490 u_int n, ndisks;
1491 int round_robin, verify;
1492
1493 ndisks = 0;
1494 sc = pbp->bio_to->geom->softc;
1495 if ((pbp->bio_cflags & G_RAID3_BIO_CFLAG_REGSYNC) != 0 &&
1496 sc->sc_syncdisk == NULL) {
1497 g_io_deliver(pbp, EIO);
1498 return (0);
1499 }
1500 g_raid3_init_bio(pbp);
1501 length = pbp->bio_length / (sc->sc_ndisks - 1);
1502 offset = pbp->bio_offset / (sc->sc_ndisks - 1);
1503 round_robin = verify = 0;
1504 switch (pbp->bio_cmd) {
1505 case BIO_READ:
1506 if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_VERIFY) != 0 &&
1507 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) {
1508 pbp->bio_pflags |= G_RAID3_BIO_PFLAG_VERIFY;
1509 verify = 1;
1510 ndisks = sc->sc_ndisks;
1511 } else {
1512 verify = 0;
1513 ndisks = sc->sc_ndisks - 1;
1514 }
1515 if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0 &&
1516 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) {
1517 round_robin = 1;
1518 } else {
1519 round_robin = 0;
1520 }
1521 KASSERT(!round_robin || !verify,
1522 ("ROUND-ROBIN and VERIFY are mutually exclusive."));
1523 pbp->bio_driver2 = &sc->sc_disks[sc->sc_ndisks - 1];
1524 break;
1525 case BIO_WRITE:
1526 case BIO_DELETE:
1527 {
1528 struct g_raid3_disk_sync *sync;
1529
1530 if (sc->sc_idle)
1531 g_raid3_unidle(sc);
1532
1533 ndisks = sc->sc_ndisks;
1534
1535 if (sc->sc_syncdisk == NULL)
1536 break;
1537 sync = &sc->sc_syncdisk->d_sync;
1538 if (offset >= sync->ds_offset)
1539 break;
1540 if (offset + length <= sync->ds_offset_done)
1541 break;
1542 if (offset >= sync->ds_resync && sync->ds_resync != -1)
1543 break;
1544 sync->ds_resync = offset - (offset % MAXPHYS);
1545 break;
1546 }
1547 }
1548 for (n = 0; n < ndisks; n++) {
1549 disk = &sc->sc_disks[n];
1550 cbp = g_raid3_clone_bio(sc, pbp);
1551 if (cbp == NULL) {
1552 while ((cbp = G_RAID3_HEAD_BIO(pbp)) != NULL)
1553 g_raid3_destroy_bio(sc, cbp);
1554 return (ENOMEM);
1555 }
1556 cbp->bio_offset = offset;
1557 cbp->bio_length = length;
1558 cbp->bio_done = g_raid3_done;
1559 switch (pbp->bio_cmd) {
1560 case BIO_READ:
1561 if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) {
1562 /*
1563 * Replace invalid component with the parity
1564 * component.
1565 */
1566 disk = &sc->sc_disks[sc->sc_ndisks - 1];
1567 cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY;
1568 pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED;
1569 } else if (round_robin &&
1570 disk->d_no == sc->sc_round_robin) {
1571 /*
1572 * In round-robin mode skip one data component
1573 * and use parity component when reading.
1574 */
1575 pbp->bio_driver2 = disk;
1576 disk = &sc->sc_disks[sc->sc_ndisks - 1];
1577 cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY;
1578 sc->sc_round_robin++;
1579 round_robin = 0;
1580 } else if (verify && disk->d_no == sc->sc_ndisks - 1) {
1581 cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY;
1582 }
1583 break;
1584 case BIO_WRITE:
1585 case BIO_DELETE:
1586 if (disk->d_state == G_RAID3_DISK_STATE_ACTIVE ||
1587 disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) {
1588 if (n == ndisks - 1) {
1589 /*
1590 * Active parity component, mark it as such.
1591 */
1592 cbp->bio_cflags |=
1593 G_RAID3_BIO_CFLAG_PARITY;
1594 }
1595 } else {
1596 pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED;
1597 if (n == ndisks - 1) {
1598 /*
1599 * Parity component is not connected,
1600 * so destroy its request.
1601 */
1602 pbp->bio_pflags |=
1603 G_RAID3_BIO_PFLAG_NOPARITY;
1604 g_raid3_destroy_bio(sc, cbp);
1605 cbp = NULL;
1606 } else {
1607 cbp->bio_cflags |=
1608 G_RAID3_BIO_CFLAG_NODISK;
1609 disk = NULL;
1610 }
1611 }
1612 break;
1613 }
1614 if (cbp != NULL)
1615 cbp->bio_caller2 = disk;
1616 }
1617 switch (pbp->bio_cmd) {
1618 case BIO_READ:
1619 if (round_robin) {
1620 /*
1621 * If we are in round-robin mode and 'round_robin' is
1622 * still 1, it means, that we skipped parity component
1623 * for this read and must reset sc_round_robin field.
1624 */
1625 sc->sc_round_robin = 0;
1626 }
1627 G_RAID3_FOREACH_BIO(pbp, cbp) {
1628 disk = cbp->bio_caller2;
1629 cp = disk->d_consumer;
1630 cbp->bio_to = cp->provider;
1631 G_RAID3_LOGREQ(3, cbp, "Sending request.");
1632 KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
1633 ("Consumer %s not opened (r%dw%de%d).",
1634 cp->provider->name, cp->acr, cp->acw, cp->ace));
1635 cp->index++;
1636 g_io_request(cbp, cp);
1637 }
1638 break;
1639 case BIO_WRITE:
1640 case BIO_DELETE:
1641 /*
1642 * Bump syncid on first write.
1643 */
1644 if ((sc->sc_bump_id & G_RAID3_BUMP_SYNCID) != 0) {
1645 sc->sc_bump_id &= ~G_RAID3_BUMP_SYNCID;
1646 g_topology_lock();
1647 g_raid3_bump_syncid(sc);
1648 g_topology_unlock();
1649 }
1650 g_raid3_scatter(pbp);
1651 break;
1652 }
1653 return (0);
1654}
1655
1656static int
1657g_raid3_can_destroy(struct g_raid3_softc *sc)
1658{
1659 struct g_geom *gp;
1660 struct g_consumer *cp;
1661
1662 g_topology_assert();
1663 gp = sc->sc_geom;
1664 LIST_FOREACH(cp, &gp->consumer, consumer) {
1665 if (g_raid3_is_busy(sc, cp))
1666 return (0);
1667 }
1668 gp = sc->sc_sync.ds_geom;
1669 LIST_FOREACH(cp, &gp->consumer, consumer) {
1670 if (g_raid3_is_busy(sc, cp))
1671 return (0);
1672 }
1673 G_RAID3_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
1674 sc->sc_name);
1675 return (1);
1676}
1677
1678static int
1679g_raid3_try_destroy(struct g_raid3_softc *sc)
1680{
1681
1682 g_topology_lock();
1683 if (!g_raid3_can_destroy(sc)) {
1684 g_topology_unlock();
1685 return (0);
1686 }
1687 if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_WAIT) != 0) {
1688 g_topology_unlock();
1689 G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__,
1690 &sc->sc_worker);
1691 wakeup(&sc->sc_worker);
1692 sc->sc_worker = NULL;
1693 } else {
1694 g_raid3_destroy_device(sc);
1695 g_topology_unlock();
1696 free(sc->sc_disks, M_RAID3);
1697 free(sc, M_RAID3);
1698 }
1699 return (1);
1700}
1701
1702/*
1703 * Worker thread.
1704 */
1705static void
1706g_raid3_worker(void *arg)
1707{
1708 struct g_raid3_softc *sc;
1709 struct g_raid3_disk *disk;
1710 struct g_raid3_disk_sync *sync;
1711 struct g_raid3_event *ep;
1712 struct bio *bp;
1713 u_int nreqs;
1714
1715 sc = arg;
1716 mtx_lock_spin(&sched_lock);
1717 sched_prio(curthread, PRIBIO);
1718 mtx_unlock_spin(&sched_lock);
1719
1720 nreqs = 0;
1721 for (;;) {
1722 G_RAID3_DEBUG(5, "%s: Let's see...", __func__);
1723 /*
1724 * First take a look at events.
1725 * This is important to handle events before any I/O requests.
1726 */
1727 ep = g_raid3_event_get(sc);
1728 if (ep != NULL && g_topology_try_lock()) {
1729 g_raid3_event_remove(sc, ep);
1730 if ((ep->e_flags & G_RAID3_EVENT_DEVICE) != 0) {
1731 /* Update only device status. */
1732 G_RAID3_DEBUG(3,
1733 "Running event for device %s.",
1734 sc->sc_name);
1735 ep->e_error = 0;
1736 g_raid3_update_device(sc, 1);
1737 } else {
1738 /* Update disk status. */
1739 G_RAID3_DEBUG(3, "Running event for disk %s.",
1740 g_raid3_get_diskname(ep->e_disk));
1741 ep->e_error = g_raid3_update_disk(ep->e_disk,
1742 ep->e_state);
1743 if (ep->e_error == 0)
1744 g_raid3_update_device(sc, 0);
1745 }
1746 g_topology_unlock();
1747 if ((ep->e_flags & G_RAID3_EVENT_DONTWAIT) != 0) {
1748 KASSERT(ep->e_error == 0,
1749 ("Error cannot be handled."));
1750 g_raid3_event_free(ep);
1751 } else {
1752 ep->e_flags |= G_RAID3_EVENT_DONE;
1753 G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__,
1754 ep);
1755 mtx_lock(&sc->sc_events_mtx);
1756 wakeup(ep);
1757 mtx_unlock(&sc->sc_events_mtx);
1758 }
1759 if ((sc->sc_flags &
1760 G_RAID3_DEVICE_FLAG_DESTROY) != 0) {
1761 if (g_raid3_try_destroy(sc))
1762 kthread_exit(0);
1763 }
1764 G_RAID3_DEBUG(5, "%s: I'm here 1.", __func__);
1765 continue;
1766 }
1767 /*
1768 * Now I/O requests.
1769 */
1770 /* Get first request from the queue. */
1771 mtx_lock(&sc->sc_queue_mtx);
1772 bp = bioq_first(&sc->sc_queue);
1773 if (bp == NULL) {
1774 if (ep != NULL) {
1775 /*
1776 * No I/O requests and topology lock was
1777 * already held? Try again.
1778 */
1779 mtx_unlock(&sc->sc_queue_mtx);
1780 tsleep(ep, PRIBIO, "r3:top1", hz / 5);
1781 continue;
1782 }
1783 if ((sc->sc_flags &
1784 G_RAID3_DEVICE_FLAG_DESTROY) != 0) {
1785 mtx_unlock(&sc->sc_queue_mtx);
1786 if (g_raid3_try_destroy(sc))
1787 kthread_exit(0);
1788 mtx_lock(&sc->sc_queue_mtx);
1789 }
1790 }
1791 if (sc->sc_syncdisk != NULL &&
1792 (bp == NULL || nreqs > g_raid3_reqs_per_sync)) {
1793 mtx_unlock(&sc->sc_queue_mtx);
1794 /*
1795 * It is time for synchronization...
1796 */
1797 nreqs = 0;
1798 disk = sc->sc_syncdisk;
1799 sync = &disk->d_sync;
1800 if (sync->ds_offset <
1801 sc->sc_mediasize / (sc->sc_ndisks - 1) &&
1802 sync->ds_offset == sync->ds_offset_done) {
1803 if (sync->ds_resync != -1) {
1804 sync->ds_offset = sync->ds_resync;
1805 sync->ds_offset_done = sync->ds_resync;
1806 sync->ds_resync = -1;
1807 }
1808 g_raid3_sync_one(sc);
1809 }
1810 G_RAID3_DEBUG(5, "%s: I'm here 2.", __func__);
1811 goto sleep;
1812 }
1813 if (bp == NULL) {
1814 if (g_raid3_check_idle(sc)) {
1815 u_int idletime;
1816
1817 idletime = g_raid3_idletime;
1818 if (idletime == 0)
1819 idletime = 1;
1820 idletime *= hz;
1821 if (msleep(sc, &sc->sc_queue_mtx, PRIBIO | PDROP,
1822 "r3:w1", idletime) == EWOULDBLOCK) {
1823 G_RAID3_DEBUG(5, "%s: I'm here 3.",
1824 __func__);
1825 /*
1826 * No I/O requests in 'idletime'
1827 * seconds, so mark components as clean.
1828 */
1829 g_raid3_idle(sc);
1830 }
1831 G_RAID3_DEBUG(5, "%s: I'm here 4.", __func__);
1832 } else {
1833 MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP,
1834 "r3:w2", 0);
1835 G_RAID3_DEBUG(5, "%s: I'm here 5.", __func__);
1836 }
1837 continue;
1838 }
1839 nreqs++;
1840 bioq_remove(&sc->sc_queue, bp);
1841 mtx_unlock(&sc->sc_queue_mtx);
1842
1843 if ((bp->bio_cflags & G_RAID3_BIO_CFLAG_REGULAR) != 0) {
1844 g_raid3_regular_request(bp);
1845 } else if ((bp->bio_cflags & G_RAID3_BIO_CFLAG_SYNC) != 0) {
1846 u_int timeout, sps;
1847
1848 g_raid3_sync_request(bp);
1849sleep:
1850 sps = atomic_load_acq_int(&g_raid3_syncs_per_sec);
1851 if (sps == 0) {
1852 G_RAID3_DEBUG(5, "%s: I'm here 6.", __func__);
1853 continue;
1854 }
1855 if (ep != NULL) {
1856 /*
1857 * We have some pending events, don't sleep now.
1858 */
1859 G_RAID3_DEBUG(5, "%s: I'm here 7.", __func__);
1860 tsleep(ep, PRIBIO, "r3:top2", hz / 5);
1861 continue;
1862 }
1863 mtx_lock(&sc->sc_queue_mtx);
1864 if (bioq_first(&sc->sc_queue) != NULL) {
1865 mtx_unlock(&sc->sc_queue_mtx);
1866 G_RAID3_DEBUG(5, "%s: I'm here 8.", __func__);
1867 continue;
1868 }
1869 timeout = hz / sps;
1870 if (timeout == 0)
1871 timeout = 1;
1872 MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "r3:w2",
1873 timeout);
1874 } else {
1875 if (g_raid3_register_request(bp) != 0) {
1876 mtx_lock(&sc->sc_queue_mtx);
1877 bioq_insert_tail(&sc->sc_queue, bp);
1878 MSLEEP(&sc->sc_queue, &sc->sc_queue_mtx,
1879 PRIBIO | PDROP, "r3:lowmem", hz / 10);
1880 }
1881 }
1882 G_RAID3_DEBUG(5, "%s: I'm here 9.", __func__);
1883 }
1884}
1885
1886/*
1887 * Open disk's consumer if needed.
1888 */
1889static void
1890g_raid3_update_access(struct g_raid3_disk *disk)
1891{
1892 struct g_provider *pp;
1893
1894 g_topology_assert();
1895
1896 pp = disk->d_softc->sc_provider;
1897 if (pp == NULL)
1898 return;
1899 if (pp->acw > 0) {
1900 if ((disk->d_flags & G_RAID3_DISK_FLAG_DIRTY) == 0) {
1901 G_RAID3_DEBUG(1, "Disk %s (device %s) marked as dirty.",
1902 g_raid3_get_diskname(disk), disk->d_softc->sc_name);
1903 disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY;
1904 }
1905 } else if (pp->acw == 0) {
1906 if ((disk->d_flags & G_RAID3_DISK_FLAG_DIRTY) != 0) {
1907 G_RAID3_DEBUG(1, "Disk %s (device %s) marked as clean.",
1908 g_raid3_get_diskname(disk), disk->d_softc->sc_name);
1909 disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
1910 }
1911 }
1912}
1913
1914static void
1915g_raid3_sync_start(struct g_raid3_softc *sc)
1916{
1917 struct g_raid3_disk *disk;
1918 int error;
1919 u_int n;
1920
1921 g_topology_assert();
1922
1923 KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED,
1924 ("Device not in DEGRADED state (%s, %u).", sc->sc_name,
1925 sc->sc_state));
1926 KASSERT(sc->sc_syncdisk == NULL, ("Syncdisk is not NULL (%s, %u).",
1927 sc->sc_name, sc->sc_state));
1928 disk = NULL;
1929 for (n = 0; n < sc->sc_ndisks; n++) {
1930 if (sc->sc_disks[n].d_state != G_RAID3_DISK_STATE_SYNCHRONIZING)
1931 continue;
1932 disk = &sc->sc_disks[n];
1933 break;
1934 }
1935 if (disk == NULL)
1936 return;
1937
1938 G_RAID3_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
1939 g_raid3_get_diskname(disk));
1940 disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY;
1941 KASSERT(disk->d_sync.ds_consumer == NULL,
1942 ("Sync consumer already exists (device=%s, disk=%s).",
1943 sc->sc_name, g_raid3_get_diskname(disk)));
1944 disk->d_sync.ds_consumer = g_new_consumer(sc->sc_sync.ds_geom);
1945 disk->d_sync.ds_consumer->private = disk;
1946 disk->d_sync.ds_consumer->index = 0;
1947 error = g_attach(disk->d_sync.ds_consumer, disk->d_softc->sc_provider);
1948 KASSERT(error == 0, ("Cannot attach to %s (error=%d).",
1949 disk->d_softc->sc_name, error));
1950 error = g_access(disk->d_sync.ds_consumer, 1, 0, 0);
1951 KASSERT(error == 0, ("Cannot open %s (error=%d).",
1952 disk->d_softc->sc_name, error));
1953 disk->d_sync.ds_data = malloc(MAXPHYS, M_RAID3, M_WAITOK);
1954 sc->sc_syncdisk = disk;
1955}
1956
1957/*
1958 * Stop synchronization process.
1959 * type: 0 - synchronization finished
1960 * 1 - synchronization stopped
1961 */
1962static void
1963g_raid3_sync_stop(struct g_raid3_softc *sc, int type)
1964{
1965 struct g_raid3_disk *disk;
1966
1967 g_topology_assert();
1968 KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED,
1969 ("Device not in DEGRADED state (%s, %u).", sc->sc_name,
1970 sc->sc_state));
1971 disk = sc->sc_syncdisk;
1972 sc->sc_syncdisk = NULL;
1973 KASSERT(disk != NULL, ("No disk was synchronized (%s).", sc->sc_name));
1974 KASSERT(disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING,
1975 ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk),
1976 g_raid3_disk_state2str(disk->d_state)));
1977 if (disk->d_sync.ds_consumer == NULL)
1978 return;
1979
1980 if (type == 0) {
1981 G_RAID3_DEBUG(0, "Device %s: rebuilding provider %s finished.",
1982 disk->d_softc->sc_name, g_raid3_get_diskname(disk));
1983 } else /* if (type == 1) */ {
1984 G_RAID3_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
1985 disk->d_softc->sc_name, g_raid3_get_diskname(disk));
1986 }
1987 g_raid3_kill_consumer(disk->d_softc, disk->d_sync.ds_consumer);
1988 free(disk->d_sync.ds_data, M_RAID3);
1989 disk->d_sync.ds_consumer = NULL;
1990 disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
1991}
1992
1993static void
1994g_raid3_launch_provider(struct g_raid3_softc *sc)
1995{
1996 struct g_provider *pp;
1997
1998 g_topology_assert();
1999
2000 pp = g_new_providerf(sc->sc_geom, "raid3/%s", sc->sc_name);
2001 pp->mediasize = sc->sc_mediasize;
2002 pp->sectorsize = sc->sc_sectorsize;
2003 sc->sc_provider = pp;
2004 g_error_provider(pp, 0);
2005 G_RAID3_DEBUG(0, "Device %s: provider %s launched.", sc->sc_name,
2006 pp->name);
2007 if (sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED)
2008 g_raid3_sync_start(sc);
2009}
2010
2011static void
2012g_raid3_destroy_provider(struct g_raid3_softc *sc)
2013{
2014 struct bio *bp;
2015
2016 g_topology_assert();
2017 KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
2018 sc->sc_name));
2019
2020 g_error_provider(sc->sc_provider, ENXIO);
2021 mtx_lock(&sc->sc_queue_mtx);
2022 while ((bp = bioq_first(&sc->sc_queue)) != NULL) {
2023 bioq_remove(&sc->sc_queue, bp);
2024 g_io_deliver(bp, ENXIO);
2025 }
2026 mtx_unlock(&sc->sc_queue_mtx);
2027 G_RAID3_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name,
2028 sc->sc_provider->name);
2029 sc->sc_provider->flags |= G_PF_WITHER;
2030 g_orphan_provider(sc->sc_provider, ENXIO);
2031 sc->sc_provider = NULL;
2032 if (sc->sc_syncdisk != NULL)
2033 g_raid3_sync_stop(sc, 1);
2034}
2035
2036static void
2037g_raid3_go(void *arg)
2038{
2039 struct g_raid3_softc *sc;
2040
2041 sc = arg;
2042 G_RAID3_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
2043 g_raid3_event_send(sc, 0,
2044 G_RAID3_EVENT_DONTWAIT | G_RAID3_EVENT_DEVICE);
2045}
2046
2047static u_int
2048g_raid3_determine_state(struct g_raid3_disk *disk)
2049{
2050 struct g_raid3_softc *sc;
2051 u_int state;
2052
2053 sc = disk->d_softc;
2054 if (sc->sc_syncid == disk->d_sync.ds_syncid) {
2055 if ((disk->d_flags &
2056 G_RAID3_DISK_FLAG_SYNCHRONIZING) == 0) {
2057 /* Disk does not need synchronization. */
2058 state = G_RAID3_DISK_STATE_ACTIVE;
2059 } else {
2060 if ((sc->sc_flags &
2061 G_RAID3_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2062 (disk->d_flags &
2063 G_RAID3_DISK_FLAG_FORCE_SYNC) != 0) {
2064 /*
2065 * We can start synchronization from
2066 * the stored offset.
2067 */
2068 state = G_RAID3_DISK_STATE_SYNCHRONIZING;
2069 } else {
2070 state = G_RAID3_DISK_STATE_STALE;
2071 }
2072 }
2073 } else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
2074 /*
2075 * Reset all synchronization data for this disk,
2076 * because if it even was synchronized, it was
2077 * synchronized to disks with different syncid.
2078 */
2079 disk->d_flags |= G_RAID3_DISK_FLAG_SYNCHRONIZING;
2080 disk->d_sync.ds_offset = 0;
2081 disk->d_sync.ds_offset_done = 0;
2082 disk->d_sync.ds_syncid = sc->sc_syncid;
2083 if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2084 (disk->d_flags & G_RAID3_DISK_FLAG_FORCE_SYNC) != 0) {
2085 state = G_RAID3_DISK_STATE_SYNCHRONIZING;
2086 } else {
2087 state = G_RAID3_DISK_STATE_STALE;
2088 }
2089 } else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
2090 /*
2091 * Not good, NOT GOOD!
2092 * It means that device was started on stale disks
2093 * and more fresh disk just arrive.
2094 * If there were writes, device is fucked up, sorry.
2095 * I think the best choice here is don't touch
2096 * this disk and inform the user laudly.
2097 */
2098 G_RAID3_DEBUG(0, "Device %s was started before the freshest "
2099 "disk (%s) arrives!! It will not be connected to the "
2100 "running device.", sc->sc_name,
2101 g_raid3_get_diskname(disk));
2102 g_raid3_destroy_disk(disk);
2103 state = G_RAID3_DISK_STATE_NONE;
2104 /* Return immediately, because disk was destroyed. */
2105 return (state);
2106 }
2107 G_RAID3_DEBUG(3, "State for %s disk: %s.",
2108 g_raid3_get_diskname(disk), g_raid3_disk_state2str(state));
2109 return (state);
2110}
2111
2112/*
2113 * Update device state.
2114 */
2115static void
2116g_raid3_update_device(struct g_raid3_softc *sc, boolean_t force)
2117{
2118 struct g_raid3_disk *disk;
2119 u_int state;
2120
2121 g_topology_assert();
2122
2123 switch (sc->sc_state) {
2124 case G_RAID3_DEVICE_STATE_STARTING:
2125 {
2126 u_int n, ndirty, ndisks, genid, syncid;
2127
2128 KASSERT(sc->sc_provider == NULL,
2129 ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
2130 /*
2131 * Are we ready? We are, if all disks are connected or
2132 * one disk is missing and 'force' is true.
2133 */
2134 if (g_raid3_ndisks(sc, -1) + force == sc->sc_ndisks) {
2135 if (!force)
2136 callout_drain(&sc->sc_callout);
2137 } else {
2138 if (force) {
2139 /*
2140 * Timeout expired, so destroy device.
2141 */
2142 sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY;
2143 }
2144 return;
2145 }
2146
2147 /*
2148 * Find the biggest genid.
2149 */
2150 genid = 0;
2151 for (n = 0; n < sc->sc_ndisks; n++) {
2152 disk = &sc->sc_disks[n];
2153 if (disk->d_state == G_RAID3_DISK_STATE_NODISK)
2154 continue;
2155 if (disk->d_genid > genid)
2156 genid = disk->d_genid;
2157 }
2158 sc->sc_genid = genid;
2159 /*
2160 * Remove all disks without the biggest genid.
2161 */
2162 for (n = 0; n < sc->sc_ndisks; n++) {
2163 disk = &sc->sc_disks[n];
2164 if (disk->d_state == G_RAID3_DISK_STATE_NODISK)
2165 continue;
2166 if (disk->d_genid < genid) {
2167 G_RAID3_DEBUG(0,
2168 "Component %s (device %s) broken, skipping.",
2169 g_raid3_get_diskname(disk), sc->sc_name);
2170 g_raid3_destroy_disk(disk);
2171 }
2172 }
2173
2174 /*
2175 * There must be at least 'sc->sc_ndisks - 1' components
2176 * with the same syncid and without SYNCHRONIZING flag.
2177 */
2178
2179 /*
2180 * Find the biggest syncid, number of valid components and
2181 * number of dirty components.
2182 */
2183 ndirty = ndisks = syncid = 0;
2184 for (n = 0; n < sc->sc_ndisks; n++) {
2185 disk = &sc->sc_disks[n];
2186 if (disk->d_state == G_RAID3_DISK_STATE_NODISK)
2187 continue;
2188 if ((disk->d_flags & G_RAID3_DISK_FLAG_DIRTY) != 0)
2189 ndirty++;
2190 if (disk->d_sync.ds_syncid > syncid) {
2191 syncid = disk->d_sync.ds_syncid;
2192 ndisks = 0;
2193 } else if (disk->d_sync.ds_syncid < syncid) {
2194 continue;
2195 }
2196 if ((disk->d_flags &
2197 G_RAID3_DISK_FLAG_SYNCHRONIZING) != 0) {
2198 continue;
2199 }
2200 ndisks++;
2201 }
2202 /*
2203 * Do we have enough valid components?
2204 */
2205 if (ndisks + 1 < sc->sc_ndisks) {
2206 G_RAID3_DEBUG(0,
2207 "Device %s is broken, too few valid components.",
2208 sc->sc_name);
2209 sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY;
2210 return;
2211 }
2212 /*
2213 * If there is one DIRTY component and all disks are present,
2214 * mark it for synchronization. If there is more than one DIRTY
2215 * component, mark parity component for synchronization.
2216 */
2217 if (ndisks == sc->sc_ndisks && ndirty == 1) {
2218 for (n = 0; n < sc->sc_ndisks; n++) {
2219 disk = &sc->sc_disks[n];
2220 if ((disk->d_flags &
2221 G_RAID3_DISK_FLAG_DIRTY) == 0) {
2222 continue;
2223 }
2224 disk->d_flags |=
2225 G_RAID3_DISK_FLAG_SYNCHRONIZING;
2226 }
2227 } else if (ndisks == sc->sc_ndisks && ndirty > 1) {
2228 disk = &sc->sc_disks[sc->sc_ndisks - 1];
2229 disk->d_flags |= G_RAID3_DISK_FLAG_SYNCHRONIZING;
2230 }
2231
2232 sc->sc_syncid = syncid;
2233 if (force) {
2234 /* Remember to bump syncid on first write. */
2235 sc->sc_bump_id |= G_RAID3_BUMP_SYNCID;
2236 }
2237 if (ndisks == sc->sc_ndisks)
2238 state = G_RAID3_DEVICE_STATE_COMPLETE;
2239 else /* if (ndisks == sc->sc_ndisks - 1) */
2240 state = G_RAID3_DEVICE_STATE_DEGRADED;
2241 G_RAID3_DEBUG(1, "Device %s state changed from %s to %s.",
2242 sc->sc_name, g_raid3_device_state2str(sc->sc_state),
2243 g_raid3_device_state2str(state));
2244 sc->sc_state = state;
2245 for (n = 0; n < sc->sc_ndisks; n++) {
2246 disk = &sc->sc_disks[n];
2247 if (disk->d_state == G_RAID3_DISK_STATE_NODISK)
2248 continue;
2249 state = g_raid3_determine_state(disk);
2250 g_raid3_event_send(disk, state, G_RAID3_EVENT_DONTWAIT);
2251 if (state == G_RAID3_DISK_STATE_STALE)
2252 sc->sc_bump_id |= G_RAID3_BUMP_SYNCID;
2253 }
2254 break;
2255 }
2256 case G_RAID3_DEVICE_STATE_DEGRADED:
2257 /*
2258 * Genid need to be bumped immediately, so do it here.
2259 */
2260 if ((sc->sc_bump_id & G_RAID3_BUMP_GENID) != 0) {
2261 sc->sc_bump_id &= ~G_RAID3_BUMP_GENID;
2262 g_raid3_bump_genid(sc);
2263 }
2264
2265 if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_NEW) > 0)
2266 return;
2267 if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) <
2268 sc->sc_ndisks - 1) {
2269 if (sc->sc_provider != NULL)
2270 g_raid3_destroy_provider(sc);
2271 sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY;
2272 return;
2273 }
2274 if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) ==
2275 sc->sc_ndisks) {
2276 state = G_RAID3_DEVICE_STATE_COMPLETE;
2277 G_RAID3_DEBUG(1,
2278 "Device %s state changed from %s to %s.",
2279 sc->sc_name, g_raid3_device_state2str(sc->sc_state),
2280 g_raid3_device_state2str(state));
2281 sc->sc_state = state;
2282 }
2283 if (sc->sc_provider == NULL)
2284 g_raid3_launch_provider(sc);
2285 break;
2286 case G_RAID3_DEVICE_STATE_COMPLETE:
2287 /*
2288 * Genid need to be bumped immediately, so do it here.
2289 */
2290 if ((sc->sc_bump_id & G_RAID3_BUMP_GENID) != 0) {
2291 sc->sc_bump_id &= ~G_RAID3_BUMP_GENID;
2292 g_raid3_bump_genid(sc);
2293 }
2294
2295 if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_NEW) > 0)
2296 return;
2297 KASSERT(g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) >=
2298 sc->sc_ndisks - 1,
2299 ("Too few ACTIVE components in COMPLETE state (device %s).",
2300 sc->sc_name));
2301 if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) ==
2302 sc->sc_ndisks - 1) {
2303 state = G_RAID3_DEVICE_STATE_DEGRADED;
2304 G_RAID3_DEBUG(1,
2305 "Device %s state changed from %s to %s.",
2306 sc->sc_name, g_raid3_device_state2str(sc->sc_state),
2307 g_raid3_device_state2str(state));
2308 sc->sc_state = state;
2309 }
2310 if (sc->sc_provider == NULL)
2311 g_raid3_launch_provider(sc);
2312 break;
2313 default:
2314 KASSERT(1 == 0, ("Wrong device state (%s, %s).", sc->sc_name,
2315 g_raid3_device_state2str(sc->sc_state)));
2316 break;
2317 }
2318}
2319
2320/*
2321 * Update disk state and device state if needed.
2322 */
2323#define DISK_STATE_CHANGED() G_RAID3_DEBUG(1, \
2324 "Disk %s state changed from %s to %s (device %s).", \
2325 g_raid3_get_diskname(disk), \
2326 g_raid3_disk_state2str(disk->d_state), \
2327 g_raid3_disk_state2str(state), sc->sc_name)
2328static int
2329g_raid3_update_disk(struct g_raid3_disk *disk, u_int state)
2330{
2331 struct g_raid3_softc *sc;
2332
2333 g_topology_assert();
2334
2335 sc = disk->d_softc;
2336again:
2337 G_RAID3_DEBUG(3, "Changing disk %s state from %s to %s.",
2338 g_raid3_get_diskname(disk), g_raid3_disk_state2str(disk->d_state),
2339 g_raid3_disk_state2str(state));
2340 switch (state) {
2341 case G_RAID3_DISK_STATE_NEW:
2342 /*
2343 * Possible scenarios:
2344 * 1. New disk arrive.
2345 */
2346 /* Previous state should be NONE. */
2347 KASSERT(disk->d_state == G_RAID3_DISK_STATE_NONE,
2348 ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk),
2349 g_raid3_disk_state2str(disk->d_state)));
2350 DISK_STATE_CHANGED();
2351
2352 disk->d_state = state;
2353 G_RAID3_DEBUG(0, "Device %s: provider %s detected.",
2354 sc->sc_name, g_raid3_get_diskname(disk));
2355 if (sc->sc_state == G_RAID3_DEVICE_STATE_STARTING)
2356 break;
2357 KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED ||
2358 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE,
2359 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2360 g_raid3_device_state2str(sc->sc_state),
2361 g_raid3_get_diskname(disk),
2362 g_raid3_disk_state2str(disk->d_state)));
2363 state = g_raid3_determine_state(disk);
2364 if (state != G_RAID3_DISK_STATE_NONE)
2365 goto again;
2366 break;
2367 case G_RAID3_DISK_STATE_ACTIVE:
2368 /*
2369 * Possible scenarios:
2370 * 1. New disk does not need synchronization.
2371 * 2. Synchronization process finished successfully.
2372 */
2373 KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED ||
2374 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE,
2375 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2376 g_raid3_device_state2str(sc->sc_state),
2377 g_raid3_get_diskname(disk),
2378 g_raid3_disk_state2str(disk->d_state)));
2379 /* Previous state should be NEW or SYNCHRONIZING. */
2380 KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW ||
2381 disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING,
2382 ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk),
2383 g_raid3_disk_state2str(disk->d_state)));
2384 DISK_STATE_CHANGED();
2385
2386 if (disk->d_state == G_RAID3_DISK_STATE_NEW)
2387 disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
2388 else if (disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) {
2389 disk->d_flags &= ~G_RAID3_DISK_FLAG_SYNCHRONIZING;
2390 disk->d_flags &= ~G_RAID3_DISK_FLAG_FORCE_SYNC;
2391 g_raid3_sync_stop(sc, 0);
2392 }
2393 disk->d_state = state;
2394 disk->d_sync.ds_offset = 0;
2395 disk->d_sync.ds_offset_done = 0;
2396 g_raid3_update_access(disk);
2397 g_raid3_update_metadata(disk);
2398 G_RAID3_DEBUG(0, "Device %s: provider %s activated.",
2399 sc->sc_name, g_raid3_get_diskname(disk));
2400 break;
2401 case G_RAID3_DISK_STATE_STALE:
2402 /*
2403 * Possible scenarios:
2404 * 1. Stale disk was connected.
2405 */
2406 /* Previous state should be NEW. */
2407 KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW,
2408 ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk),
2409 g_raid3_disk_state2str(disk->d_state)));
2410 KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED ||
2411 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE,
2412 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2413 g_raid3_device_state2str(sc->sc_state),
2414 g_raid3_get_diskname(disk),
2415 g_raid3_disk_state2str(disk->d_state)));
2416 /*
2417 * STALE state is only possible if device is marked
2418 * NOAUTOSYNC.
2419 */
2420 KASSERT((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOAUTOSYNC) != 0,
2421 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2422 g_raid3_device_state2str(sc->sc_state),
2423 g_raid3_get_diskname(disk),
2424 g_raid3_disk_state2str(disk->d_state)));
2425 DISK_STATE_CHANGED();
2426
2427 disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
2428 disk->d_state = state;
2429 g_raid3_update_metadata(disk);
2430 G_RAID3_DEBUG(0, "Device %s: provider %s is stale.",
2431 sc->sc_name, g_raid3_get_diskname(disk));
2432 break;
2433 case G_RAID3_DISK_STATE_SYNCHRONIZING:
2434 /*
2435 * Possible scenarios:
2436 * 1. Disk which needs synchronization was connected.
2437 */
2438 /* Previous state should be NEW. */
2439 KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW,
2440 ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk),
2441 g_raid3_disk_state2str(disk->d_state)));
2442 KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED ||
2443 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE,
2444 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2445 g_raid3_device_state2str(sc->sc_state),
2446 g_raid3_get_diskname(disk),
2447 g_raid3_disk_state2str(disk->d_state)));
2448 DISK_STATE_CHANGED();
2449
2450 if (disk->d_state == G_RAID3_DISK_STATE_NEW)
2451 disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
2452 disk->d_state = state;
2453 if (sc->sc_provider != NULL) {
2454 g_raid3_sync_start(sc);
2455 g_raid3_update_metadata(disk);
2456 }
2457 break;
2458 case G_RAID3_DISK_STATE_DISCONNECTED:
2459 /*
2460 * Possible scenarios:
2461 * 1. Device wasn't running yet, but disk disappear.
2462 * 2. Disk was active and disapppear.
2463 * 3. Disk disappear during synchronization process.
2464 */
2465 if (sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED ||
2466 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) {
2467 /*
2468 * Previous state should be ACTIVE, STALE or
2469 * SYNCHRONIZING.
2470 */
2471 KASSERT(disk->d_state == G_RAID3_DISK_STATE_ACTIVE ||
2472 disk->d_state == G_RAID3_DISK_STATE_STALE ||
2473 disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING,
2474 ("Wrong disk state (%s, %s).",
2475 g_raid3_get_diskname(disk),
2476 g_raid3_disk_state2str(disk->d_state)));
2477 } else if (sc->sc_state == G_RAID3_DEVICE_STATE_STARTING) {
2478 /* Previous state should be NEW. */
2479 KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW,
2480 ("Wrong disk state (%s, %s).",
2481 g_raid3_get_diskname(disk),
2482 g_raid3_disk_state2str(disk->d_state)));
2483 /*
2484 * Reset bumping syncid if disk disappeared in STARTING
2485 * state.
2486 */
2487 if ((sc->sc_bump_id & G_RAID3_BUMP_SYNCID) != 0)
2488 sc->sc_bump_id &= ~G_RAID3_BUMP_SYNCID;
2489#ifdef INVARIANTS
2490 } else {
2491 KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2492 sc->sc_name,
2493 g_raid3_device_state2str(sc->sc_state),
2494 g_raid3_get_diskname(disk),
2495 g_raid3_disk_state2str(disk->d_state)));
2496#endif
2497 }
2498 DISK_STATE_CHANGED();
2499 G_RAID3_DEBUG(0, "Device %s: provider %s disconnected.",
2500 sc->sc_name, g_raid3_get_diskname(disk));
2501
2502 g_raid3_destroy_disk(disk);
2503 break;
2504 default:
2505 KASSERT(1 == 0, ("Unknown state (%u).", state));
2506 break;
2507 }
2508 return (0);
2509}
2510#undef DISK_STATE_CHANGED
2511
2512int
2513g_raid3_read_metadata(struct g_consumer *cp, struct g_raid3_metadata *md)
2514{
2515 struct g_provider *pp;
2516 u_char *buf;
2517 int error;
2518
2519 g_topology_assert();
2520
2521 error = g_access(cp, 1, 0, 0);
2522 if (error != 0)
2523 return (error);
2524 pp = cp->provider;
2525 g_topology_unlock();
2526 /* Metadata are stored on last sector. */
2527 buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2528 &error);
2529 g_topology_lock();
2530 g_access(cp, -1, 0, 0);
2531 if (error != 0) {
2532 G_RAID3_DEBUG(1, "Cannot read metadata from %s (error=%d).",
2533 cp->provider->name, error);
2534 if (buf != NULL)
2535 g_free(buf);
2536 return (error);
2537 }
2538
2539 /* Decode metadata. */
2540 error = raid3_metadata_decode(buf, md);
2541 g_free(buf);
2542 if (strcmp(md->md_magic, G_RAID3_MAGIC) != 0)
2543 return (EINVAL);
2544 if (md->md_version > G_RAID3_VERSION) {
2545 G_RAID3_DEBUG(0,
2546 "Kernel module is too old to handle metadata from %s.",
2547 cp->provider->name);
2548 return (EINVAL);
2549 }
2550 if (error != 0) {
2551 G_RAID3_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2552 cp->provider->name);
2553 return (error);
2554 }
2555
2556 return (0);
2557}
2558
2559static int
2560g_raid3_check_metadata(struct g_raid3_softc *sc, struct g_provider *pp,
2561 struct g_raid3_metadata *md)
2562{
2563
2564 if (md->md_no >= sc->sc_ndisks) {
2565 G_RAID3_DEBUG(1, "Invalid disk %s number (no=%u), skipping.",
2566 pp->name, md->md_no);
2567 return (EINVAL);
2568 }
2569 if (sc->sc_disks[md->md_no].d_state != G_RAID3_DISK_STATE_NODISK) {
2570 G_RAID3_DEBUG(1, "Disk %s (no=%u) already exists, skipping.",
2571 pp->name, md->md_no);
2572 return (EEXIST);
2573 }
2574 if (md->md_all != sc->sc_ndisks) {
2575 G_RAID3_DEBUG(1,
2576 "Invalid '%s' field on disk %s (device %s), skipping.",
2577 "md_all", pp->name, sc->sc_name);
2578 return (EINVAL);
2579 }
2580 if (md->md_mediasize != sc->sc_mediasize) {
2581 G_RAID3_DEBUG(1,
2582 "Invalid '%s' field on disk %s (device %s), skipping.",
2583 "md_mediasize", pp->name, sc->sc_name);
2584 return (EINVAL);
2585 }
2586 if ((md->md_mediasize % (sc->sc_ndisks - 1)) != 0) {
2587 G_RAID3_DEBUG(1,
2588 "Invalid '%s' field on disk %s (device %s), skipping.",
2589 "md_mediasize", pp->name, sc->sc_name);
2590 return (EINVAL);
2591 }
2592 if ((sc->sc_mediasize / (sc->sc_ndisks - 1)) > pp->mediasize) {
2593 G_RAID3_DEBUG(1,
2594 "Invalid size of disk %s (device %s), skipping.", pp->name,
2595 sc->sc_name);
2596 return (EINVAL);
2597 }
2598 if ((md->md_sectorsize / pp->sectorsize) < sc->sc_ndisks - 1) {
2599 G_RAID3_DEBUG(1,
2600 "Invalid '%s' field on disk %s (device %s), skipping.",
2601 "md_sectorsize", pp->name, sc->sc_name);
2602 return (EINVAL);
2603 }
2604 if (md->md_sectorsize != sc->sc_sectorsize) {
2605 G_RAID3_DEBUG(1,
2606 "Invalid '%s' field on disk %s (device %s), skipping.",
2607 "md_sectorsize", pp->name, sc->sc_name);
2608 return (EINVAL);
2609 }
2610 if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2611 G_RAID3_DEBUG(1,
2612 "Invalid sector size of disk %s (device %s), skipping.",
2613 pp->name, sc->sc_name);
2614 return (EINVAL);
2615 }
2616 if ((md->md_mflags & ~G_RAID3_DEVICE_FLAG_MASK) != 0) {
2617 G_RAID3_DEBUG(1,
2618 "Invalid device flags on disk %s (device %s), skipping.",
2619 pp->name, sc->sc_name);
2620 return (EINVAL);
2621 }
2622 if ((md->md_mflags & G_RAID3_DEVICE_FLAG_VERIFY) != 0 &&
2623 (md->md_mflags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0) {
2624 /*
2625 * VERIFY and ROUND-ROBIN options are mutally exclusive.
2626 */
2627 G_RAID3_DEBUG(1, "Both VERIFY and ROUND-ROBIN flags exist on "
2628 "disk %s (device %s), skipping.", pp->name, sc->sc_name);
2629 return (EINVAL);
2630 }
2631 if ((md->md_dflags & ~G_RAID3_DISK_FLAG_MASK) != 0) {
2632 G_RAID3_DEBUG(1,
2633 "Invalid disk flags on disk %s (device %s), skipping.",
2634 pp->name, sc->sc_name);
2635 return (EINVAL);
2636 }
2637 return (0);
2638}
2639
2640int
2641g_raid3_add_disk(struct g_raid3_softc *sc, struct g_provider *pp,
2642 struct g_raid3_metadata *md)
2643{
2644 struct g_raid3_disk *disk;
2645 int error;
2646
2647 g_topology_assert();
2648 G_RAID3_DEBUG(2, "Adding disk %s.", pp->name);
2649
2650 error = g_raid3_check_metadata(sc, pp, md);
2651 if (error != 0)
2652 return (error);
2653 if (sc->sc_state != G_RAID3_DEVICE_STATE_STARTING &&
2654 md->md_genid < sc->sc_genid) {
2655 G_RAID3_DEBUG(0, "Component %s (device %s) broken, skipping.",
2656 pp->name, sc->sc_name);
2657 return (EINVAL);
2658 }
2659 disk = g_raid3_init_disk(sc, pp, md, &error);
2660 if (disk == NULL)
2661 return (error);
2662 error = g_raid3_event_send(disk, G_RAID3_DISK_STATE_NEW,
2663 G_RAID3_EVENT_WAIT);
2664 if (error != 0)
2665 return (error);
2666 if (md->md_version < G_RAID3_VERSION) {
2667 G_RAID3_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).",
2668 pp->name, md->md_version, G_RAID3_VERSION);
2669 g_raid3_update_metadata(disk);
2670 }
2671 return (0);
2672}
2673
2674static int
2675g_raid3_access(struct g_provider *pp, int acr, int acw, int ace)
2676{
2677 struct g_raid3_softc *sc;
2678 struct g_raid3_disk *disk;
2679 int dcr, dcw, dce;
2680 u_int n;
2681
2682 g_topology_assert();
2683 G_RAID3_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
2684 acw, ace);
2685
2686 dcr = pp->acr + acr;
2687 dcw = pp->acw + acw;
2688 dce = pp->ace + ace;
2689
2690 sc = pp->geom->softc;
2691 if (sc == NULL ||
2692 g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) < sc->sc_ndisks - 1 ||
2693 (sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) != 0) {
2694 if (acr <= 0 && acw <= 0 && ace <= 0)
2695 return (0);
2696 else
2697 return (ENXIO);
2698 }
2699 for (n = 0; n < sc->sc_ndisks; n++) {
2700 disk = &sc->sc_disks[n];
2701 if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE)
2702 continue;
2703 /*
2704 * Mark disk as dirty on open and unmark on close.
2705 */
2706 if (pp->acw == 0 && dcw > 0) {
2707 G_RAID3_DEBUG(1, "Disk %s (device %s) marked as dirty.",
2708 g_raid3_get_diskname(disk), sc->sc_name);
2709 disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY;
2710 g_raid3_update_metadata(disk);
2711 } else if (pp->acw > 0 && dcw == 0) {
2712 G_RAID3_DEBUG(1, "Disk %s (device %s) marked as clean.",
2713 g_raid3_get_diskname(disk), sc->sc_name);
2714 disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
2715 g_raid3_update_metadata(disk);
2716 }
2717 }
2718 return (0);
2719}
2720
2721static struct g_geom *
2722g_raid3_create(struct g_class *mp, const struct g_raid3_metadata *md)
2723{
2724 struct g_raid3_softc *sc;
2725 struct g_geom *gp;
2726 int error, timeout;
2727 u_int n;
2728
2729 g_topology_assert();
2730 G_RAID3_DEBUG(1, "Creating device %s (id=%u).", md->md_name, md->md_id);
2731
2732 /* One disk is minimum. */
2733 if (md->md_all < 1)
2734 return (NULL);
2735 /*
2736 * Action geom.
2737 */
2738 gp = g_new_geomf(mp, "%s", md->md_name);
2739 sc = malloc(sizeof(*sc), M_RAID3, M_WAITOK | M_ZERO);
2740 sc->sc_disks = malloc(sizeof(struct g_raid3_disk) * md->md_all, M_RAID3,
2741 M_WAITOK | M_ZERO);
2742 gp->start = g_raid3_start;
2743 gp->orphan = g_raid3_orphan;
2744 gp->access = g_raid3_access;
2745 gp->dumpconf = g_raid3_dumpconf;
2746
2747 sc->sc_id = md->md_id;
2748 sc->sc_mediasize = md->md_mediasize;
2749 sc->sc_sectorsize = md->md_sectorsize;
2750 sc->sc_ndisks = md->md_all;
2751 sc->sc_round_robin = 0;
2752 sc->sc_flags = md->md_mflags;
2753 sc->sc_bump_id = 0;
2754 sc->sc_idle = 0;
2755 for (n = 0; n < sc->sc_ndisks; n++) {
2756 sc->sc_disks[n].d_softc = sc;
2757 sc->sc_disks[n].d_no = n;
2758 sc->sc_disks[n].d_state = G_RAID3_DISK_STATE_NODISK;
2759 }
2760 bioq_init(&sc->sc_queue);
2761 mtx_init(&sc->sc_queue_mtx, "graid3:queue", NULL, MTX_DEF);
2762 TAILQ_INIT(&sc->sc_events);
2763 mtx_init(&sc->sc_events_mtx, "graid3:events", NULL, MTX_DEF);
2764 callout_init(&sc->sc_callout, CALLOUT_MPSAFE);
2765 sc->sc_state = G_RAID3_DEVICE_STATE_STARTING;
2766 gp->softc = sc;
2767 sc->sc_geom = gp;
2768 sc->sc_provider = NULL;
2769 /*
2770 * Synchronization geom.
2771 */
2772 gp = g_new_geomf(mp, "%s.sync", md->md_name);
2773 gp->softc = sc;
2774 gp->orphan = g_raid3_orphan;
2775 sc->sc_sync.ds_geom = gp;
2776 sc->sc_zone_64k = uma_zcreate("gr3:64k", 65536, NULL, NULL, NULL, NULL,
2777 UMA_ALIGN_PTR, 0);
2778 uma_zone_set_max(sc->sc_zone_64k, g_raid3_n64k);
2779 sc->sc_zone_16k = uma_zcreate("gr3:16k", 16384, NULL, NULL, NULL, NULL,
2780 UMA_ALIGN_PTR, 0);
2781 uma_zone_set_max(sc->sc_zone_64k, g_raid3_n16k);
2782 sc->sc_zone_4k = uma_zcreate("gr3:4k", 4096, NULL, NULL, NULL, NULL,
2783 UMA_ALIGN_PTR, 0);
2784 uma_zone_set_max(sc->sc_zone_4k, g_raid3_n4k);
2785 error = kthread_create(g_raid3_worker, sc, &sc->sc_worker, 0, 0,
2786 "g_raid3 %s", md->md_name);
2787 if (error != 0) {
2788 G_RAID3_DEBUG(1, "Cannot create kernel thread for %s.",
2789 sc->sc_name);
2790 uma_zdestroy(sc->sc_zone_64k);
2791 uma_zdestroy(sc->sc_zone_16k);
2792 uma_zdestroy(sc->sc_zone_4k);
2793 g_destroy_geom(sc->sc_sync.ds_geom);
2794 mtx_destroy(&sc->sc_events_mtx);
2795 mtx_destroy(&sc->sc_queue_mtx);
2796 g_destroy_geom(sc->sc_geom);
2797 free(sc->sc_disks, M_RAID3);
2798 free(sc, M_RAID3);
2799 return (NULL);
2800 }
2801
2802 G_RAID3_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id);
2803
2804 /*
2805 * Run timeout.
2806 */
2807 timeout = atomic_load_acq_int(&g_raid3_timeout);
2808 callout_reset(&sc->sc_callout, timeout * hz, g_raid3_go, sc);
2809 return (sc->sc_geom);
2810}
2811
2812int
2813g_raid3_destroy(struct g_raid3_softc *sc, boolean_t force)
2814{
2815 struct g_provider *pp;
2816
2817 g_topology_assert();
2818
2819 if (sc == NULL)
2820 return (ENXIO);
2821 pp = sc->sc_provider;
2822 if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
2823 if (force) {
2824 G_RAID3_DEBUG(1, "Device %s is still open, so it "
2825 "can't be definitely removed.", pp->name);
2826 } else {
2827 G_RAID3_DEBUG(1,
2828 "Device %s is still open (r%dw%de%d).", pp->name,
2829 pp->acr, pp->acw, pp->ace);
2830 return (EBUSY);
2831 }
2832 }
2833
2834 sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY;
2835 sc->sc_flags |= G_RAID3_DEVICE_FLAG_WAIT;
2836 g_topology_unlock();
2837 G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, sc);
2838 mtx_lock(&sc->sc_queue_mtx);
2839 wakeup(sc);
2840 wakeup(&sc->sc_queue);
2841 mtx_unlock(&sc->sc_queue_mtx);
2842 G_RAID3_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
2843 while (sc->sc_worker != NULL)
2844 tsleep(&sc->sc_worker, PRIBIO, "r3:destroy", hz / 5);
2845 G_RAID3_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
2846 g_topology_lock();
2847 g_raid3_destroy_device(sc);
2848 free(sc->sc_disks, M_RAID3);
2849 free(sc, M_RAID3);
2850 return (0);
2851}
2852
2853static void
2854g_raid3_taste_orphan(struct g_consumer *cp)
2855{
2856
2857 KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
2858 cp->provider->name));
2859}
2860
2861static struct g_geom *
2862g_raid3_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
2863{
2864 struct g_raid3_metadata md;
2865 struct g_raid3_softc *sc;
2866 struct g_consumer *cp;
2867 struct g_geom *gp;
2868 int error;
2869
2870 g_topology_assert();
2871 g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
2872 G_RAID3_DEBUG(2, "Tasting %s.", pp->name);
2873
2874 gp = g_new_geomf(mp, "raid3:taste");
2875 /* This orphan function should be never called. */
2876 gp->orphan = g_raid3_taste_orphan;
2877 cp = g_new_consumer(gp);
2878 g_attach(cp, pp);
2879 error = g_raid3_read_metadata(cp, &md);
2880 g_detach(cp);
2881 g_destroy_consumer(cp);
2882 g_destroy_geom(gp);
2883 if (error != 0)
2884 return (NULL);
2885 gp = NULL;
2886
2887 if (md.md_provider[0] != '\0' && strcmp(md.md_provider, pp->name) != 0)
2888 return (NULL);
2889 if (g_raid3_debug >= 2)
2890 raid3_metadata_dump(&md);
2891
2892 /*
2893 * Let's check if device already exists.
2894 */
2895 sc = NULL;
2896 LIST_FOREACH(gp, &mp->geom, geom) {
2897 sc = gp->softc;
2898 if (sc == NULL)
2899 continue;
2900 if (sc->sc_sync.ds_geom == gp)
2901 continue;
2902 if (strcmp(md.md_name, sc->sc_name) != 0)
2903 continue;
2904 if (md.md_id != sc->sc_id) {
2905 G_RAID3_DEBUG(0, "Device %s already configured.",
2906 sc->sc_name);
2907 return (NULL);
2908 }
2909 break;
2910 }
2911 if (gp == NULL) {
2912 gp = g_raid3_create(mp, &md);
2913 if (gp == NULL) {
2914 G_RAID3_DEBUG(0, "Cannot create device %s.",
2915 md.md_name);
2916 return (NULL);
2917 }
2918 sc = gp->softc;
2919 }
2920 G_RAID3_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
2921 error = g_raid3_add_disk(sc, pp, &md);
2922 if (error != 0) {
2923 G_RAID3_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
2924 pp->name, gp->name, error);
2925 if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_NODISK) ==
2926 sc->sc_ndisks) {
2927 g_raid3_destroy(sc, 1);
2928 }
2929 return (NULL);
2930 }
2931 return (gp);
2932}
2933
2934static int
2935g_raid3_destroy_geom(struct gctl_req *req __unused, struct g_class *mp __unused,
2936 struct g_geom *gp)
2937{
2938
2939 return (g_raid3_destroy(gp->softc, 0));
2940}
2941
2942static void
2943g_raid3_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
2944 struct g_consumer *cp, struct g_provider *pp)
2945{
2946 struct g_raid3_softc *sc;
2947
2948 g_topology_assert();
2949
2950 sc = gp->softc;
2951 if (sc == NULL)
2952 return;
2953 /* Skip synchronization geom. */
2954 if (gp == sc->sc_sync.ds_geom)
2955 return;
2956 if (pp != NULL) {
2957 /* Nothing here. */
2958 } else if (cp != NULL) {
2959 struct g_raid3_disk *disk;
2960
2961 disk = cp->private;
2962 if (disk == NULL)
2963 return;
2964 sbuf_printf(sb, "%s<Type>", indent);
2965 if (disk->d_no == sc->sc_ndisks - 1)
2966 sbuf_printf(sb, "PARITY");
2967 else
2968 sbuf_printf(sb, "DATA");
2969 sbuf_printf(sb, "</Type>\n");
2970 sbuf_printf(sb, "%s<Number>%u</Number>\n", indent,
2971 (u_int)disk->d_no);
2972 if (disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) {
2973 sbuf_printf(sb, "%s<Synchronized>", indent);
2974 if (disk->d_sync.ds_offset_done == 0)
2975 sbuf_printf(sb, "0%%");
2976 else {
2977 sbuf_printf(sb, "%u%%",
2978 (u_int)((disk->d_sync.ds_offset_done * 100) /
2979 (sc->sc_mediasize / (sc->sc_ndisks - 1))));
2980 }
2981 sbuf_printf(sb, "</Synchronized>\n");
2982 }
2983 sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
2984 disk->d_sync.ds_syncid);
2985 sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, disk->d_genid);
2986 sbuf_printf(sb, "%s<Flags>", indent);
2987 if (disk->d_flags == 0)
2988 sbuf_printf(sb, "NONE");
2989 else {
2990 int first = 1;
2991
2992#define ADD_FLAG(flag, name) do { \
2993 if ((disk->d_flags & (flag)) != 0) { \
2994 if (!first) \
2995 sbuf_printf(sb, ", "); \
2996 else \
2997 first = 0; \
2998 sbuf_printf(sb, name); \
2999 } \
3000} while (0)
3001 ADD_FLAG(G_RAID3_DISK_FLAG_DIRTY, "DIRTY");
3002 ADD_FLAG(G_RAID3_DISK_FLAG_HARDCODED, "HARDCODED");
3003 ADD_FLAG(G_RAID3_DISK_FLAG_SYNCHRONIZING,
3004 "SYNCHRONIZING");
3005 ADD_FLAG(G_RAID3_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
3006#undef ADD_FLAG
3007 }
3008 sbuf_printf(sb, "</Flags>\n");
3009 sbuf_printf(sb, "%s<State>%s</State>\n", indent,
3010 g_raid3_disk_state2str(disk->d_state));
3011 } else {
3012 sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
3013 sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
3014 sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid);
3015 sbuf_printf(sb, "%s<Flags>", indent);
3016 if (sc->sc_flags == 0)
3017 sbuf_printf(sb, "NONE");
3018 else {
3019 int first = 1;
3020
3021#define ADD_FLAG(flag, name) do { \
3022 if ((sc->sc_flags & (flag)) != 0) { \
3023 if (!first) \
3024 sbuf_printf(sb, ", "); \
3025 else \
3026 first = 0; \
3027 sbuf_printf(sb, name); \
3028 } \
3029} while (0)
3030 ADD_FLAG(G_RAID3_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
3031 ADD_FLAG(G_RAID3_DEVICE_FLAG_ROUND_ROBIN,
3032 "ROUND-ROBIN");
3033 ADD_FLAG(G_RAID3_DEVICE_FLAG_VERIFY, "VERIFY");
3034#undef ADD_FLAG
3035 }
3036 sbuf_printf(sb, "</Flags>\n");
3037 sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
3038 sc->sc_ndisks);
3039 sbuf_printf(sb, "%s<State>%s</State>\n", indent,
3040 g_raid3_device_state2str(sc->sc_state));
3041 }
3042}
3043
3044static void
3045g_raid3_shutdown(void *arg, int howto)
3046{
3047 struct g_class *mp;
3048 struct g_geom *gp, *gp2;
3049
3050 mp = arg;
3051 DROP_GIANT();
3052 g_topology_lock();
3053 LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
3054 if (gp->softc == NULL)
3055 continue;
3056 g_raid3_destroy(gp->softc, 1);
3057 }
3058 g_topology_unlock();
3059 PICKUP_GIANT();
3060#if 0
3061 tsleep(&gp, PRIBIO, "r3:shutdown", hz * 20);
3062#endif
3063}
3064
3065static void
3066g_raid3_init(struct g_class *mp)
3067{
3068
3069 g_raid3_ehtag = EVENTHANDLER_REGISTER(shutdown_post_sync,
3070 g_raid3_shutdown, mp, SHUTDOWN_PRI_FIRST);
3071 if (g_raid3_ehtag == NULL)
3072 G_RAID3_DEBUG(0, "Warning! Cannot register shutdown event.");
3073}
3074
3075static void
3076g_raid3_fini(struct g_class *mp)
3077{
3078
3079 if (g_raid3_ehtag == NULL)
3080 return;
3081 EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_raid3_ehtag);
3082}
3083
3084DECLARE_GEOM_CLASS(g_raid3_class, g_raid3);