g_raid3.c revision 139379
1133819Stjr/*- 2133819Stjr * Copyright (c) 2004 Pawel Jakub Dawidek <pjd@FreeBSD.org> 3133819Stjr * All rights reserved. 4133819Stjr * 5133819Stjr * Redistribution and use in source and binary forms, with or without 6163761Snetchild * modification, are permitted provided that the following conditions 7133819Stjr * are met: 8133819Stjr * 1. Redistributions of source code must retain the above copyright 9133819Stjr * notice, this list of conditions and the following disclaimer. 10133819Stjr * 2. Redistributions in binary form must reproduce the above copyright 11133819Stjr * notice, this list of conditions and the following disclaimer in the 12133819Stjr * documentation and/or other materials provided with the distribution. 13133819Stjr * 14164199Sru * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15133819Stjr * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16133819Stjr * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17161330Sjhb * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18161330Sjhb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19133819Stjr * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20133819Stjr * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21133819Stjr * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22133819Stjr * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23133819Stjr * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24133819Stjr * SUCH DAMAGE. 25133819Stjr */ 26133819Stjr 27133819Stjr#include <sys/cdefs.h> 28133819Stjr__FBSDID("$FreeBSD: head/sys/geom/raid3/g_raid3.c 139379 2004-12-28 21:52:45Z pjd $"); 29133819Stjr 30133819Stjr#include <sys/param.h> 31133819Stjr#include <sys/systm.h> 32133819Stjr#include <sys/kernel.h> 33133819Stjr#include <sys/module.h> 34143198Ssobomax#include <sys/limits.h> 35133819Stjr#include <sys/lock.h> 36133819Stjr#include <sys/mutex.h> 37133819Stjr#include <sys/bio.h> 38133819Stjr#include <sys/sysctl.h> 39133819Stjr#include <sys/malloc.h> 40133819Stjr#include <sys/eventhandler.h> 41133819Stjr#include <vm/uma.h> 42133819Stjr#include <machine/atomic.h> 43133819Stjr#include <geom/geom.h> 44133819Stjr#include <sys/proc.h> 45133819Stjr#include <sys/kthread.h> 46133819Stjr#include <geom/raid3/g_raid3.h> 47133819Stjr 48133819Stjr 49133819Stjrstatic MALLOC_DEFINE(M_RAID3, "raid3 data", "GEOM_RAID3 Data"); 50133819Stjr 51133819StjrSYSCTL_DECL(_kern_geom); 52133819StjrSYSCTL_NODE(_kern_geom, OID_AUTO, raid3, CTLFLAG_RW, 0, "GEOM_RAID3 stuff"); 53133819Stjru_int g_raid3_debug = 0; 54133819StjrTUNABLE_INT("kern.geom.raid3.debug", &g_raid3_debug); 55133819StjrSYSCTL_UINT(_kern_geom_raid3, OID_AUTO, debug, CTLFLAG_RW, &g_raid3_debug, 0, 56133819Stjr "Debug level"); 57133819Stjrstatic u_int g_raid3_timeout = 4; 58133819StjrTUNABLE_INT("kern.geom.raid3.timeout", &g_raid3_timeout); 59133819StjrSYSCTL_UINT(_kern_geom_raid3, OID_AUTO, timeout, CTLFLAG_RW, &g_raid3_timeout, 60133819Stjr 0, "Time to wait on all raid3 components"); 61133819Stjrstatic u_int g_raid3_idletime = 5; 62133819StjrTUNABLE_INT("kern.geom.raid3.idletime", &g_raid3_idletime); 63133819StjrSYSCTL_UINT(_kern_geom_raid3, OID_AUTO, idletime, CTLFLAG_RW, 64133819Stjr &g_raid3_idletime, 0, "Mark components as clean when idling"); 65133819Stjrstatic u_int g_raid3_reqs_per_sync = 5; 66133819StjrSYSCTL_UINT(_kern_geom_raid3, OID_AUTO, reqs_per_sync, CTLFLAG_RW, 67133819Stjr &g_raid3_reqs_per_sync, 0, 68133819Stjr "Number of regular I/O requests per synchronization request"); 69133819Stjrstatic u_int g_raid3_syncs_per_sec = 100; 70133819StjrSYSCTL_UINT(_kern_geom_raid3, OID_AUTO, syncs_per_sec, CTLFLAG_RW, 71133819Stjr &g_raid3_syncs_per_sec, 0, 72133819Stjr "Number of synchronizations requests per second"); 73133819Stjr 74133819Stjrstatic u_int g_raid3_n64k = 50; 75133819StjrTUNABLE_INT("kern.geom.raid3.n64k", &g_raid3_n64k); 76133819StjrSYSCTL_UINT(_kern_geom_raid3, OID_AUTO, n64k, CTLFLAG_RD, &g_raid3_n64k, 0, 77133819Stjr "Maximum number of 64kB allocations"); 78133819Stjrstatic u_int g_raid3_n16k = 200; 79133819StjrTUNABLE_INT("kern.geom.raid3.n16k", &g_raid3_n16k); 80133819StjrSYSCTL_UINT(_kern_geom_raid3, OID_AUTO, n16k, CTLFLAG_RD, &g_raid3_n16k, 0, 81133819Stjr "Maximum number of 16kB allocations"); 82133819Stjrstatic u_int g_raid3_n4k = 1200; 83133819StjrTUNABLE_INT("kern.geom.raid3.n4k", &g_raid3_n4k); 84156919SnetchildSYSCTL_UINT(_kern_geom_raid3, OID_AUTO, n4k, CTLFLAG_RD, &g_raid3_n4k, 0, 85156919Snetchild "Maximum number of 4kB allocations"); 86156919Snetchild 87156919SnetchildSYSCTL_NODE(_kern_geom_raid3, OID_AUTO, stat, CTLFLAG_RW, 0, 88133819Stjr "GEOM_RAID3 statistics"); 89133819Stjrstatic u_int g_raid3_parity_mismatch = 0; 90133819StjrSYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, parity_mismatch, CTLFLAG_RD, 91133819Stjr &g_raid3_parity_mismatch, 0, "Number of failures in VERIFY mode"); 92133819Stjrstatic u_int g_raid3_64k_requested = 0; 93133819StjrSYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, 64k_requested, CTLFLAG_RD, 94133819Stjr &g_raid3_64k_requested, 0, "Number of requested 64kB allocations"); 95133819Stjrstatic u_int g_raid3_64k_failed = 0; 96133819StjrSYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, 64k_failed, CTLFLAG_RD, 97133819Stjr &g_raid3_64k_failed, 0, "Number of failed 64kB allocations"); 98133819Stjrstatic u_int g_raid3_16k_requested = 0; 99133819StjrSYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, 16k_requested, CTLFLAG_RD, 100133819Stjr &g_raid3_16k_requested, 0, "Number of requested 16kB allocations"); 101133819Stjrstatic u_int g_raid3_16k_failed = 0; 102133819StjrSYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, 16k_failed, CTLFLAG_RD, 103133819Stjr &g_raid3_16k_failed, 0, "Number of failed 16kB allocations"); 104133819Stjrstatic u_int g_raid3_4k_requested = 0; 105133819StjrSYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, 4k_requested, CTLFLAG_RD, 106133819Stjr &g_raid3_4k_requested, 0, "Number of requested 4kB allocations"); 107133819Stjrstatic u_int g_raid3_4k_failed = 0; 108133819StjrSYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, 4k_failed, CTLFLAG_RD, 109133819Stjr &g_raid3_4k_failed, 0, "Number of failed 4kB allocations"); 110133819Stjr 111133819Stjr#define MSLEEP(ident, mtx, priority, wmesg, timeout) do { \ 112133819Stjr G_RAID3_DEBUG(4, "%s: Sleeping %p.", __func__, (ident)); \ 113133819Stjr msleep((ident), (mtx), (priority), (wmesg), (timeout)); \ 114133819Stjr G_RAID3_DEBUG(4, "%s: Woken up %p.", __func__, (ident)); \ 115133819Stjr} while (0) 116133819Stjr 117133819Stjrstatic eventhandler_tag g_raid3_ehtag = NULL; 118133819Stjr 119133819Stjrstatic int g_raid3_destroy_geom(struct gctl_req *req, struct g_class *mp, 120133819Stjr struct g_geom *gp); 121133819Stjrstatic g_taste_t g_raid3_taste; 122133819Stjrstatic void g_raid3_init(struct g_class *mp); 123133819Stjrstatic void g_raid3_fini(struct g_class *mp); 124133819Stjr 125133819Stjrstruct g_class g_raid3_class = { 126133819Stjr .name = G_RAID3_CLASS_NAME, 127133819Stjr .version = G_VERSION, 128133819Stjr .ctlreq = g_raid3_config, 129133819Stjr .taste = g_raid3_taste, 130133819Stjr .destroy_geom = g_raid3_destroy_geom, 131133819Stjr .init = g_raid3_init, 132133819Stjr .fini = g_raid3_fini 133133819Stjr}; 134133819Stjr 135133819Stjr 136133819Stjrstatic void g_raid3_destroy_provider(struct g_raid3_softc *sc); 137133819Stjrstatic int g_raid3_update_disk(struct g_raid3_disk *disk, u_int state); 138133819Stjrstatic void g_raid3_update_device(struct g_raid3_softc *sc, boolean_t force); 139133819Stjrstatic void g_raid3_dumpconf(struct sbuf *sb, const char *indent, 140133819Stjr struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp); 141133819Stjrstatic void g_raid3_sync_stop(struct g_raid3_softc *sc, int type); 142133819Stjr 143133819Stjr 144133819Stjrstatic const char * 145133819Stjrg_raid3_disk_state2str(int state) 146133819Stjr{ 147133819Stjr 148133819Stjr switch (state) { 149133819Stjr case G_RAID3_DISK_STATE_NODISK: 150133819Stjr return ("NODISK"); 151133819Stjr case G_RAID3_DISK_STATE_NONE: 152133819Stjr return ("NONE"); 153133819Stjr case G_RAID3_DISK_STATE_NEW: 154133819Stjr return ("NEW"); 155133819Stjr case G_RAID3_DISK_STATE_ACTIVE: 156133819Stjr return ("ACTIVE"); 157133819Stjr case G_RAID3_DISK_STATE_STALE: 158133819Stjr return ("STALE"); 159133819Stjr case G_RAID3_DISK_STATE_SYNCHRONIZING: 160133819Stjr return ("SYNCHRONIZING"); 161133819Stjr case G_RAID3_DISK_STATE_DISCONNECTED: 162133819Stjr return ("DISCONNECTED"); 163133819Stjr default: 164133819Stjr return ("INVALID"); 165133819Stjr } 166133819Stjr} 167133819Stjr 168133819Stjrstatic const char * 169133819Stjrg_raid3_device_state2str(int state) 170133819Stjr{ 171133819Stjr 172133819Stjr switch (state) { 173133819Stjr case G_RAID3_DEVICE_STATE_STARTING: 174133819Stjr return ("STARTING"); 175133819Stjr case G_RAID3_DEVICE_STATE_DEGRADED: 176133819Stjr return ("DEGRADED"); 177133819Stjr case G_RAID3_DEVICE_STATE_COMPLETE: 178133819Stjr return ("COMPLETE"); 179133819Stjr default: 180133819Stjr return ("INVALID"); 181133819Stjr } 182133819Stjr} 183133819Stjr 184133819Stjrconst char * 185133819Stjrg_raid3_get_diskname(struct g_raid3_disk *disk) 186133819Stjr{ 187133819Stjr 188133819Stjr if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL) 189133819Stjr return ("[unknown]"); 190133819Stjr return (disk->d_name); 191133819Stjr} 192133819Stjr 193133819Stjr#define g_raid3_xor(src1, src2, dst, size) \ 194133819Stjr _g_raid3_xor((uint64_t *)(src1), (uint64_t *)(src2), \ 195133819Stjr (uint64_t *)(dst), (size_t)size) 196133819Stjrstatic void 197133819Stjr_g_raid3_xor(uint64_t *src1, uint64_t *src2, uint64_t *dst, size_t size) 198133819Stjr{ 199161309Snetchild 200161309Snetchild KASSERT((size % 128) == 0, ("Invalid size: %zu.", size)); 201161309Snetchild for (; size > 0; size -= 128) { 202133819Stjr *dst++ = (*src1++) ^ (*src2++); 203133819Stjr *dst++ = (*src1++) ^ (*src2++); 204133819Stjr *dst++ = (*src1++) ^ (*src2++); 205133819Stjr *dst++ = (*src1++) ^ (*src2++); 206133819Stjr *dst++ = (*src1++) ^ (*src2++); 207133819Stjr *dst++ = (*src1++) ^ (*src2++); 208133819Stjr *dst++ = (*src1++) ^ (*src2++); 209133819Stjr *dst++ = (*src1++) ^ (*src2++); 210133819Stjr *dst++ = (*src1++) ^ (*src2++); 211133819Stjr *dst++ = (*src1++) ^ (*src2++); 212133819Stjr *dst++ = (*src1++) ^ (*src2++); 213133819Stjr *dst++ = (*src1++) ^ (*src2++); 214133819Stjr *dst++ = (*src1++) ^ (*src2++); 215133819Stjr *dst++ = (*src1++) ^ (*src2++); 216133819Stjr *dst++ = (*src1++) ^ (*src2++); 217133819Stjr *dst++ = (*src1++) ^ (*src2++); 218133819Stjr } 219133819Stjr} 220133819Stjr 221133819Stjrstatic int 222133819Stjrg_raid3_is_zero(struct bio *bp) 223133819Stjr{ 224133819Stjr static const uint64_t zeros[] = { 225133819Stjr 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 226133819Stjr }; 227133819Stjr u_char *addr; 228133819Stjr ssize_t size; 229156843Snetchild 230156843Snetchild size = bp->bio_length; 231156843Snetchild addr = (u_char *)bp->bio_data; 232156843Snetchild for (; size > 0; size -= sizeof(zeros), addr += sizeof(zeros)) { 233133819Stjr if (bcmp(addr, zeros, sizeof(zeros)) != 0) 234133819Stjr return (0); 235133819Stjr } 236133819Stjr return (1); 237133819Stjr} 238133819Stjr 239133819Stjr/* 240133819Stjr * --- Events handling functions --- 241133819Stjr * Events in geom_raid3 are used to maintain disks and device status 242133819Stjr * from one thread to simplify locking. 243133819Stjr */ 244133819Stjrstatic void 245133819Stjrg_raid3_event_free(struct g_raid3_event *ep) 246133819Stjr{ 247133819Stjr 248133819Stjr free(ep, M_RAID3); 249133819Stjr} 250133819Stjr 251133819Stjrint 252133819Stjrg_raid3_event_send(void *arg, int state, int flags) 253133819Stjr{ 254133819Stjr struct g_raid3_softc *sc; 255133819Stjr struct g_raid3_disk *disk; 256133819Stjr struct g_raid3_event *ep; 257133819Stjr int error; 258133819Stjr 259133819Stjr ep = malloc(sizeof(*ep), M_RAID3, M_WAITOK); 260133819Stjr G_RAID3_DEBUG(4, "%s: Sending event %p.", __func__, ep); 261133819Stjr if ((flags & G_RAID3_EVENT_DEVICE) != 0) { 262133819Stjr disk = NULL; 263133819Stjr sc = arg; 264133819Stjr } else { 265133819Stjr disk = arg; 266133819Stjr sc = disk->d_softc; 267133819Stjr } 268156919Snetchild ep->e_disk = disk; 269156843Snetchild ep->e_state = state; 270156919Snetchild ep->e_flags = flags; 271156843Snetchild ep->e_error = 0; 272133819Stjr mtx_lock(&sc->sc_events_mtx); 273133819Stjr TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next); 274133819Stjr mtx_unlock(&sc->sc_events_mtx); 275133819Stjr G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, sc); 276133819Stjr mtx_lock(&sc->sc_queue_mtx); 277133819Stjr wakeup(sc); 278133819Stjr wakeup(&sc->sc_queue); 279133819Stjr mtx_unlock(&sc->sc_queue_mtx); 280133819Stjr if ((flags & G_RAID3_EVENT_DONTWAIT) != 0) 281133819Stjr return (0); 282133819Stjr g_topology_assert(); 283133819Stjr G_RAID3_DEBUG(4, "%s: Sleeping %p.", __func__, ep); 284133819Stjr g_topology_unlock(); 285133819Stjr while ((ep->e_flags & G_RAID3_EVENT_DONE) == 0) { 286133819Stjr mtx_lock(&sc->sc_events_mtx); 287133819Stjr MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "r3:event", 288133819Stjr hz * 5); 289133819Stjr } 290133819Stjr /* Don't even try to use 'sc' here, because it could be already dead. */ 291133819Stjr g_topology_lock(); 292133819Stjr error = ep->e_error; 293133819Stjr g_raid3_event_free(ep); 294133819Stjr return (error); 295156843Snetchild} 296156843Snetchild 297156843Snetchildstatic struct g_raid3_event * 298156843Snetchildg_raid3_event_get(struct g_raid3_softc *sc) 299147142Ssobomax{ 300147142Ssobomax struct g_raid3_event *ep; 301147142Ssobomax 302147142Ssobomax mtx_lock(&sc->sc_events_mtx); 303133819Stjr ep = TAILQ_FIRST(&sc->sc_events); 304133819Stjr mtx_unlock(&sc->sc_events_mtx); 305133819Stjr return (ep); 306133819Stjr} 307133819Stjr 308133819Stjrstatic void 309133819Stjrg_raid3_event_remove(struct g_raid3_softc *sc, struct g_raid3_event *ep) 310133819Stjr{ 311133819Stjr 312133819Stjr mtx_lock(&sc->sc_events_mtx); 313133819Stjr TAILQ_REMOVE(&sc->sc_events, ep, e_next); 314133819Stjr mtx_unlock(&sc->sc_events_mtx); 315133819Stjr} 316133819Stjr 317133819Stjrstatic void 318133819Stjrg_raid3_event_cancel(struct g_raid3_disk *disk) 319133819Stjr{ 320133819Stjr struct g_raid3_softc *sc; 321133819Stjr struct g_raid3_event *ep, *tmpep; 322133819Stjr 323133819Stjr g_topology_assert(); 324133819Stjr 325133819Stjr sc = disk->d_softc; 326133819Stjr mtx_lock(&sc->sc_events_mtx); 327133819Stjr TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) { 328133819Stjr if ((ep->e_flags & G_RAID3_EVENT_DEVICE) != 0) 329133819Stjr continue; 330133819Stjr if (ep->e_disk != disk) 331133819Stjr continue; 332133819Stjr TAILQ_REMOVE(&sc->sc_events, ep, e_next); 333133819Stjr if ((ep->e_flags & G_RAID3_EVENT_DONTWAIT) != 0) 334133819Stjr g_raid3_event_free(ep); 335133819Stjr else { 336133819Stjr ep->e_error = ECANCELED; 337133819Stjr wakeup(ep); 338133819Stjr } 339133819Stjr } 340133819Stjr mtx_unlock(&sc->sc_events_mtx); 341133819Stjr} 342133819Stjr 343133819Stjr/* 344133819Stjr * Return the number of disks in the given state. 345133819Stjr * If state is equal to -1, count all connected disks. 346133819Stjr */ 347133819Stjru_int 348133819Stjrg_raid3_ndisks(struct g_raid3_softc *sc, int state) 349133819Stjr{ 350133819Stjr struct g_raid3_disk *disk; 351133819Stjr u_int n, ndisks; 352133819Stjr 353133819Stjr for (n = ndisks = 0; n < sc->sc_ndisks; n++) { 354133819Stjr disk = &sc->sc_disks[n]; 355133819Stjr if (disk->d_state == G_RAID3_DISK_STATE_NODISK) 356133819Stjr continue; 357133819Stjr if (state == -1 || disk->d_state == state) 358133819Stjr ndisks++; 359133819Stjr } 360133819Stjr return (ndisks); 361133819Stjr} 362133819Stjr 363133819Stjrstatic u_int 364133819Stjrg_raid3_nrequests(struct g_raid3_softc *sc, struct g_consumer *cp) 365133819Stjr{ 366133819Stjr struct bio *bp; 367133819Stjr u_int nreqs = 0; 368133819Stjr 369133819Stjr mtx_lock(&sc->sc_queue_mtx); 370133819Stjr TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) { 371133819Stjr if (bp->bio_from == cp) 372133819Stjr nreqs++; 373161309Snetchild } 374161309Snetchild mtx_unlock(&sc->sc_queue_mtx); 375161309Snetchild return (nreqs); 376133819Stjr} 377133819Stjr 378133819Stjrstatic int 379133819Stjrg_raid3_is_busy(struct g_raid3_softc *sc, struct g_consumer *cp) 380133819Stjr{ 381133819Stjr 382133819Stjr if (cp->index > 0) { 383133819Stjr G_RAID3_DEBUG(2, 384133819Stjr "I/O requests for %s exist, can't destroy it now.", 385133819Stjr cp->provider->name); 386133819Stjr return (1); 387133819Stjr } 388133819Stjr if (g_raid3_nrequests(sc, cp) > 0) { 389133819Stjr G_RAID3_DEBUG(2, 390133819Stjr "I/O requests for %s in queue, can't destroy it now.", 391133819Stjr cp->provider->name); 392133819Stjr return (1); 393133819Stjr } 394133819Stjr return (0); 395133819Stjr} 396133819Stjr 397133819Stjrstatic void 398133819Stjrg_raid3_destroy_consumer(void *arg, int flags __unused) 399133819Stjr{ 400133819Stjr struct g_consumer *cp; 401133819Stjr 402133819Stjr cp = arg; 403133819Stjr G_RAID3_DEBUG(1, "Consumer %s destroyed.", cp->provider->name); 404133819Stjr g_detach(cp); 405133819Stjr g_destroy_consumer(cp); 406133819Stjr} 407133819Stjr 408133819Stjrstatic void 409133819Stjrg_raid3_kill_consumer(struct g_raid3_softc *sc, struct g_consumer *cp) 410133819Stjr{ 411133819Stjr struct g_provider *pp; 412133819Stjr int retaste_wait; 413133819Stjr 414133819Stjr g_topology_assert(); 415133819Stjr 416133819Stjr cp->private = NULL; 417133819Stjr if (g_raid3_is_busy(sc, cp)) 418133819Stjr return; 419133819Stjr G_RAID3_DEBUG(2, "Consumer %s destroyed.", cp->provider->name); 420133819Stjr pp = cp->provider; 421133819Stjr retaste_wait = 0; 422133819Stjr if (cp->acw == 1) { 423133819Stjr if ((pp->geom->flags & G_GEOM_WITHER) == 0) 424133819Stjr retaste_wait = 1; 425133819Stjr } 426133819Stjr G_RAID3_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr, 427133819Stjr -cp->acw, -cp->ace, 0); 428133819Stjr if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) 429133819Stjr g_access(cp, -cp->acr, -cp->acw, -cp->ace); 430133819Stjr if (retaste_wait) { 431133819Stjr /* 432133819Stjr * After retaste event was send (inside g_access()), we can send 433133819Stjr * event to detach and destroy consumer. 434133819Stjr * A class, which has consumer to the given provider connected 435133819Stjr * will not receive retaste event for the provider. 436133819Stjr * This is the way how I ignore retaste events when I close 437133819Stjr * consumers opened for write: I detach and destroy consumer 438133819Stjr * after retaste event is sent. 439133819Stjr */ 440133819Stjr g_post_event(g_raid3_destroy_consumer, cp, M_WAITOK, NULL); 441133819Stjr return; 442133819Stjr } 443133819Stjr G_RAID3_DEBUG(1, "Consumer %s destroyed.", pp->name); 444133819Stjr g_detach(cp); 445133819Stjr g_destroy_consumer(cp); 446133819Stjr} 447133819Stjr 448133819Stjrstatic int 449133819Stjrg_raid3_connect_disk(struct g_raid3_disk *disk, struct g_provider *pp) 450133819Stjr{ 451133819Stjr int error; 452133819Stjr 453133819Stjr g_topology_assert(); 454133819Stjr KASSERT(disk->d_consumer == NULL, 455133819Stjr ("Disk already connected (device %s).", disk->d_softc->sc_name)); 456133819Stjr 457133819Stjr disk->d_consumer = g_new_consumer(disk->d_softc->sc_geom); 458133819Stjr disk->d_consumer->private = disk; 459133819Stjr disk->d_consumer->index = 0; 460133819Stjr error = g_attach(disk->d_consumer, pp); 461133819Stjr if (error != 0) 462133819Stjr return (error); 463133819Stjr error = g_access(disk->d_consumer, 1, 1, 1); 464133819Stjr if (error != 0) { 465133819Stjr G_RAID3_DEBUG(0, "Cannot open consumer %s (error=%d).", 466133819Stjr pp->name, error); 467133819Stjr return (error); 468133819Stjr } 469133819Stjr G_RAID3_DEBUG(2, "Disk %s connected.", g_raid3_get_diskname(disk)); 470133819Stjr return (0); 471133819Stjr} 472133819Stjr 473133819Stjrstatic void 474133819Stjrg_raid3_disconnect_consumer(struct g_raid3_softc *sc, struct g_consumer *cp) 475133819Stjr{ 476133819Stjr 477133819Stjr g_topology_assert(); 478133819Stjr 479133819Stjr if (cp == NULL) 480133819Stjr return; 481133819Stjr if (cp->provider != NULL) 482133819Stjr g_raid3_kill_consumer(sc, cp); 483133819Stjr else 484133819Stjr g_destroy_consumer(cp); 485133819Stjr} 486133819Stjr 487133819Stjr/* 488133819Stjr * Initialize disk. This means allocate memory, create consumer, attach it 489133819Stjr * to the provider and open access (r1w1e1) to it. 490133819Stjr */ 491133819Stjrstatic struct g_raid3_disk * 492133819Stjrg_raid3_init_disk(struct g_raid3_softc *sc, struct g_provider *pp, 493133819Stjr struct g_raid3_metadata *md, int *errorp) 494133819Stjr{ 495133819Stjr struct g_raid3_disk *disk; 496133819Stjr int error; 497133819Stjr 498133819Stjr disk = &sc->sc_disks[md->md_no]; 499133819Stjr error = g_raid3_connect_disk(disk, pp); 500133819Stjr if (error != 0) 501133819Stjr goto fail; 502133819Stjr disk->d_state = G_RAID3_DISK_STATE_NONE; 503133819Stjr disk->d_flags = md->md_dflags; 504133819Stjr if (md->md_provider[0] != '\0') 505133819Stjr disk->d_flags |= G_RAID3_DISK_FLAG_HARDCODED; 506133819Stjr disk->d_sync.ds_consumer = NULL; 507133819Stjr disk->d_sync.ds_offset = md->md_sync_offset; 508133819Stjr disk->d_sync.ds_offset_done = md->md_sync_offset; 509133819Stjr disk->d_sync.ds_resync = -1; 510133819Stjr disk->d_genid = md->md_genid; 511133819Stjr disk->d_sync.ds_syncid = md->md_syncid; 512133819Stjr if (errorp != NULL) 513133819Stjr *errorp = 0; 514133819Stjr return (disk); 515133819Stjrfail: 516133819Stjr if (errorp != NULL) 517133819Stjr *errorp = error; 518133819Stjr if (disk != NULL) 519133819Stjr g_raid3_disconnect_consumer(sc, disk->d_consumer); 520133819Stjr return (NULL); 521133819Stjr} 522133819Stjr 523133819Stjrstatic void 524163736Snetchildg_raid3_destroy_disk(struct g_raid3_disk *disk) 525163736Snetchild{ 526163736Snetchild struct g_raid3_softc *sc; 527163736Snetchild 528163736Snetchild g_topology_assert(); 529133819Stjr 530133819Stjr if (disk->d_state == G_RAID3_DISK_STATE_NODISK) 531133819Stjr return; 532133819Stjr g_raid3_event_cancel(disk); 533133819Stjr sc = disk->d_softc; 534133819Stjr switch (disk->d_state) { 535133819Stjr case G_RAID3_DISK_STATE_SYNCHRONIZING: 536133819Stjr if (sc->sc_syncdisk != NULL) 537133819Stjr g_raid3_sync_stop(sc, 1); 538133819Stjr /* FALLTHROUGH */ 539133819Stjr case G_RAID3_DISK_STATE_NEW: 540133819Stjr case G_RAID3_DISK_STATE_STALE: 541133819Stjr case G_RAID3_DISK_STATE_ACTIVE: 542133819Stjr g_raid3_disconnect_consumer(sc, disk->d_consumer); 543133819Stjr disk->d_consumer = NULL; 544133819Stjr break; 545133819Stjr default: 546158407Snetchild KASSERT(0 == 1, ("Wrong disk state (%s, %s).", 547158407Snetchild g_raid3_get_diskname(disk), 548133819Stjr g_raid3_disk_state2str(disk->d_state))); 549133819Stjr } 550133819Stjr disk->d_state = G_RAID3_DISK_STATE_NODISK; 551133819Stjr} 552133819Stjr 553133819Stjrstatic void 554133819Stjrg_raid3_destroy_device(struct g_raid3_softc *sc) 555133819Stjr{ 556133819Stjr struct g_raid3_event *ep; 557133819Stjr struct g_raid3_disk *disk; 558133819Stjr struct g_geom *gp; 559133819Stjr struct g_consumer *cp; 560133819Stjr u_int n; 561133819Stjr 562133819Stjr g_topology_assert(); 563133819Stjr 564133819Stjr gp = sc->sc_geom; 565133819Stjr if (sc->sc_provider != NULL) 566133819Stjr g_raid3_destroy_provider(sc); 567133819Stjr for (n = 0; n < sc->sc_ndisks; n++) { 568133819Stjr disk = &sc->sc_disks[n]; 569133819Stjr if (disk->d_state != G_RAID3_DISK_STATE_NODISK) { 570133819Stjr disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY; 571133819Stjr g_raid3_update_metadata(disk); 572133819Stjr g_raid3_destroy_disk(disk); 573133819Stjr } 574133819Stjr } 575133819Stjr while ((ep = g_raid3_event_get(sc)) != NULL) { 576133819Stjr g_raid3_event_remove(sc, ep); 577133819Stjr if ((ep->e_flags & G_RAID3_EVENT_DONTWAIT) != 0) 578133819Stjr g_raid3_event_free(ep); 579133819Stjr else { 580133819Stjr ep->e_error = ECANCELED; 581133819Stjr ep->e_flags |= G_RAID3_EVENT_DONE; 582133819Stjr G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, ep); 583133819Stjr mtx_lock(&sc->sc_events_mtx); 584133819Stjr wakeup(ep); 585133819Stjr mtx_unlock(&sc->sc_events_mtx); 586133819Stjr } 587133819Stjr } 588133819Stjr callout_drain(&sc->sc_callout); 589133819Stjr gp->softc = NULL; 590133819Stjr cp = LIST_FIRST(&sc->sc_sync.ds_geom->consumer); 591133819Stjr if (cp != NULL) 592133819Stjr g_raid3_disconnect_consumer(sc, cp); 593133819Stjr sc->sc_sync.ds_geom->softc = NULL; 594133819Stjr g_wither_geom(sc->sc_sync.ds_geom, ENXIO); 595133819Stjr uma_zdestroy(sc->sc_zone_64k); 596133819Stjr uma_zdestroy(sc->sc_zone_16k); 597133819Stjr uma_zdestroy(sc->sc_zone_4k); 598133819Stjr mtx_destroy(&sc->sc_queue_mtx); 599133819Stjr mtx_destroy(&sc->sc_events_mtx); 600133819Stjr G_RAID3_DEBUG(0, "Device %s destroyed.", gp->name); 601133819Stjr g_wither_geom(gp, ENXIO); 602133819Stjr} 603133819Stjr 604133819Stjrstatic void 605133819Stjrg_raid3_orphan(struct g_consumer *cp) 606133819Stjr{ 607133819Stjr struct g_raid3_disk *disk; 608133819Stjr 609133819Stjr g_topology_assert(); 610133819Stjr 611133819Stjr disk = cp->private; 612133819Stjr if (disk == NULL) 613133819Stjr return; 614133819Stjr disk->d_softc->sc_bump_id = G_RAID3_BUMP_SYNCID_OFW; 615133819Stjr g_raid3_event_send(disk, G_RAID3_DISK_STATE_DISCONNECTED, 616133819Stjr G_RAID3_EVENT_DONTWAIT); 617133819Stjr} 618133819Stjr 619133819Stjrstatic void 620133819Stjrg_raid3_spoiled(struct g_consumer *cp) 621133819Stjr{ 622133819Stjr struct g_raid3_disk *disk; 623133819Stjr 624133819Stjr g_topology_assert(); 625133819Stjr 626133819Stjr disk = cp->private; 627133819Stjr if (disk == NULL) 628133819Stjr return; 629133819Stjr disk->d_softc->sc_bump_id = G_RAID3_BUMP_SYNCID_IMM; 630133819Stjr g_raid3_event_send(disk, G_RAID3_DISK_STATE_DISCONNECTED, 631133819Stjr G_RAID3_EVENT_DONTWAIT); 632133819Stjr} 633133819Stjr 634133819Stjrstatic int 635133819Stjrg_raid3_write_metadata(struct g_raid3_disk *disk, struct g_raid3_metadata *md) 636133819Stjr{ 637133819Stjr struct g_raid3_softc *sc; 638133819Stjr struct g_consumer *cp; 639133819Stjr off_t offset, length; 640133819Stjr u_char *sector; 641133819Stjr int error = 0; 642133819Stjr 643133819Stjr g_topology_assert(); 644133819Stjr 645133819Stjr sc = disk->d_softc; 646133819Stjr cp = disk->d_consumer; 647133819Stjr KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name)); 648133819Stjr KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name)); 649133819Stjr KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1, 650133819Stjr ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr, 651133819Stjr cp->acw, cp->ace)); 652133819Stjr length = cp->provider->sectorsize; 653133819Stjr offset = cp->provider->mediasize - length; 654133819Stjr sector = malloc((size_t)length, M_RAID3, M_WAITOK | M_ZERO); 655133819Stjr if (md != NULL) 656133819Stjr raid3_metadata_encode(md, sector); 657133819Stjr g_topology_unlock(); 658133819Stjr error = g_write_data(cp, offset, sector, length); 659133819Stjr g_topology_lock(); 660133819Stjr free(sector, M_RAID3); 661133819Stjr if (error != 0) { 662133819Stjr disk->d_softc->sc_bump_id = G_RAID3_BUMP_GENID_IMM; 663133819Stjr g_raid3_event_send(disk, G_RAID3_DISK_STATE_DISCONNECTED, 664133819Stjr G_RAID3_EVENT_DONTWAIT); 665133819Stjr } 666133819Stjr return (error); 667133819Stjr} 668133819Stjr 669133819Stjrint 670133819Stjrg_raid3_clear_metadata(struct g_raid3_disk *disk) 671133819Stjr{ 672133819Stjr int error; 673133819Stjr 674133819Stjr g_topology_assert(); 675133819Stjr error = g_raid3_write_metadata(disk, NULL); 676133819Stjr if (error == 0) { 677133819Stjr G_RAID3_DEBUG(2, "Metadata on %s cleared.", 678133819Stjr g_raid3_get_diskname(disk)); 679133819Stjr } else { 680161309Snetchild G_RAID3_DEBUG(0, 681161309Snetchild "Cannot clear metadata on disk %s (error=%d).", 682161309Snetchild g_raid3_get_diskname(disk), error); 683133819Stjr } 684133819Stjr return (error); 685133819Stjr} 686133819Stjr 687133819Stjrvoid 688133819Stjrg_raid3_fill_metadata(struct g_raid3_disk *disk, struct g_raid3_metadata *md) 689133819Stjr{ 690133819Stjr struct g_raid3_softc *sc; 691133819Stjr 692133819Stjr sc = disk->d_softc; 693133819Stjr strlcpy(md->md_magic, G_RAID3_MAGIC, sizeof(md->md_magic)); 694133819Stjr md->md_version = G_RAID3_VERSION; 695133819Stjr strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name)); 696133819Stjr md->md_id = sc->sc_id; 697133819Stjr md->md_all = sc->sc_ndisks; 698133819Stjr md->md_genid = sc->sc_genid; 699133819Stjr md->md_mediasize = sc->sc_mediasize; 700133819Stjr md->md_sectorsize = sc->sc_sectorsize; 701133819Stjr md->md_mflags = (sc->sc_flags & G_RAID3_DEVICE_FLAG_MASK); 702133819Stjr md->md_no = disk->d_no; 703133819Stjr md->md_syncid = disk->d_sync.ds_syncid; 704133819Stjr md->md_dflags = (disk->d_flags & G_RAID3_DISK_FLAG_MASK); 705133819Stjr if (disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) 706133819Stjr md->md_sync_offset = disk->d_sync.ds_offset_done; 707133819Stjr else 708133819Stjr md->md_sync_offset = 0; 709133819Stjr if ((disk->d_flags & G_RAID3_DISK_FLAG_HARDCODED) != 0 && 710133819Stjr disk->d_consumer != NULL && disk->d_consumer->provider != NULL) { 711133819Stjr strlcpy(md->md_provider, disk->d_consumer->provider->name, 712133819Stjr sizeof(md->md_provider)); 713133819Stjr } else { 714133819Stjr bzero(md->md_provider, sizeof(md->md_provider)); 715133819Stjr } 716133819Stjr} 717133819Stjr 718133819Stjrvoid 719161309Snetchildg_raid3_update_metadata(struct g_raid3_disk *disk) 720161309Snetchild{ 721161309Snetchild struct g_raid3_metadata md; 722161309Snetchild int error; 723161309Snetchild 724161309Snetchild g_topology_assert(); 725161309Snetchild g_raid3_fill_metadata(disk, &md); 726161309Snetchild error = g_raid3_write_metadata(disk, &md); 727161309Snetchild if (error == 0) { 728161309Snetchild G_RAID3_DEBUG(2, "Metadata on %s updated.", 729161309Snetchild g_raid3_get_diskname(disk)); 730161309Snetchild } else { 731133819Stjr G_RAID3_DEBUG(0, 732133819Stjr "Cannot update metadata on disk %s (error=%d).", 733133819Stjr g_raid3_get_diskname(disk), error); 734161309Snetchild } 735161309Snetchild} 736161309Snetchild 737159801Snetchildstatic void 738159801Snetchildg_raid3_bump_syncid(struct g_raid3_softc *sc) 739159801Snetchild{ 740159801Snetchild struct g_raid3_disk *disk; 741159801Snetchild u_int n; 742159801Snetchild 743159801Snetchild g_topology_assert(); 744159801Snetchild KASSERT(g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) > 0, 745159801Snetchild ("%s called with no active disks (device=%s).", __func__, 746159801Snetchild sc->sc_name)); 747159801Snetchild 748159801Snetchild sc->sc_syncid++; 749159801Snetchild G_RAID3_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name, 750159801Snetchild sc->sc_syncid); 751159801Snetchild for (n = 0; n < sc->sc_ndisks; n++) { 752159801Snetchild disk = &sc->sc_disks[n]; 753161309Snetchild if (disk->d_state == G_RAID3_DISK_STATE_ACTIVE || 754159801Snetchild disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) { 755159801Snetchild disk->d_sync.ds_syncid = sc->sc_syncid; 756159801Snetchild g_raid3_update_metadata(disk); 757159801Snetchild } 758159801Snetchild } 759159801Snetchild} 760159801Snetchild 761159801Snetchildstatic void 762159801Snetchildg_raid3_bump_genid(struct g_raid3_softc *sc) 763159801Snetchild{ 764159801Snetchild struct g_raid3_disk *disk; 765159801Snetchild u_int n; 766159801Snetchild 767159801Snetchild g_topology_assert(); 768159801Snetchild KASSERT(g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) > 0, 769159801Snetchild ("%s called with no active disks (device=%s).", __func__, 770159801Snetchild sc->sc_name)); 771161309Snetchild 772161309Snetchild sc->sc_genid++; 773159801Snetchild G_RAID3_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name, 774159801Snetchild sc->sc_genid); 775161309Snetchild for (n = 0; n < sc->sc_ndisks; n++) { 776161309Snetchild disk = &sc->sc_disks[n]; 777159801Snetchild if (disk->d_state == G_RAID3_DISK_STATE_ACTIVE || 778159801Snetchild disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) { 779161309Snetchild disk->d_genid = sc->sc_genid; 780161309Snetchild g_raid3_update_metadata(disk); 781159801Snetchild } 782159801Snetchild } 783161309Snetchild} 784161309Snetchild 785161309Snetchildstatic void 786161309Snetchildg_raid3_idle(struct g_raid3_softc *sc) 787159801Snetchild{ 788159801Snetchild struct g_raid3_disk *disk; 789161666Snetchild u_int i; 790161666Snetchild 791159801Snetchild if (sc->sc_provider == NULL || sc->sc_provider->acw == 0) 792159801Snetchild return; 793159801Snetchild sc->sc_idle = 1; 794159801Snetchild g_topology_lock(); 795159801Snetchild for (i = 0; i < sc->sc_ndisks; i++) { 796161309Snetchild disk = &sc->sc_disks[i]; 797161309Snetchild if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) 798161309Snetchild continue; 799159801Snetchild G_RAID3_DEBUG(1, "Disk %s (device %s) marked as clean.", 800159801Snetchild g_raid3_get_diskname(disk), sc->sc_name); 801159801Snetchild disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY; 802159801Snetchild g_raid3_update_metadata(disk); 803159801Snetchild } 804159801Snetchild g_topology_unlock(); 805159801Snetchild} 806159801Snetchild 807159801Snetchildstatic void 808159801Snetchildg_raid3_unidle(struct g_raid3_softc *sc) 809159801Snetchild{ 810159801Snetchild struct g_raid3_disk *disk; 811159801Snetchild u_int i; 812159801Snetchild 813159801Snetchild sc->sc_idle = 0; 814159801Snetchild g_topology_lock(); 815159801Snetchild for (i = 0; i < sc->sc_ndisks; i++) { 816159801Snetchild disk = &sc->sc_disks[i]; 817159801Snetchild if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) 818159801Snetchild continue; 819159801Snetchild G_RAID3_DEBUG(1, "Disk %s (device %s) marked as dirty.", 820159801Snetchild g_raid3_get_diskname(disk), sc->sc_name); 821159801Snetchild disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY; 822159801Snetchild g_raid3_update_metadata(disk); 823159801Snetchild } 824159801Snetchild g_topology_unlock(); 825159801Snetchild} 826159801Snetchild 827159801Snetchild/* 828159801Snetchild * Return 1 if we should check if RAID3 device is idling. 829159801Snetchild */ 830159801Snetchildstatic int 831159801Snetchildg_raid3_check_idle(struct g_raid3_softc *sc) 832159801Snetchild{ 833159801Snetchild struct g_raid3_disk *disk; 834159801Snetchild u_int i; 835159801Snetchild 836159801Snetchild if (sc->sc_idle) 837159801Snetchild return (0); 838159801Snetchild if (sc->sc_provider != NULL && sc->sc_provider->acw == 0) 839159801Snetchild return (0); 840159801Snetchild /* 841159801Snetchild * Check if there are no in-flight requests. 842159801Snetchild */ 843159801Snetchild for (i = 0; i < sc->sc_ndisks; i++) { 844159801Snetchild disk = &sc->sc_disks[i]; 845159801Snetchild if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) 846159801Snetchild continue; 847159801Snetchild if (disk->d_consumer->index > 0) 848159801Snetchild return (0); 849159801Snetchild } 850159801Snetchild return (1); 851159801Snetchild} 852159801Snetchild 853159801Snetchild/* 854159801Snetchild * Treat bio_driver1 field in parent bio as list head and field bio_caller1 855159801Snetchild * in child bio as pointer to the next element on the list. 856159801Snetchild */ 857159801Snetchild#define G_RAID3_HEAD_BIO(pbp) (pbp)->bio_driver1 858159801Snetchild 859159801Snetchild#define G_RAID3_NEXT_BIO(cbp) (cbp)->bio_caller1 860159801Snetchild 861159801Snetchild#define G_RAID3_FOREACH_BIO(pbp, bp) \ 862159801Snetchild for ((bp) = G_RAID3_HEAD_BIO(pbp); (bp) != NULL; \ 863159801Snetchild (bp) = G_RAID3_NEXT_BIO(bp)) 864159801Snetchild 865159801Snetchild#define G_RAID3_FOREACH_SAFE_BIO(pbp, bp, tmpbp) \ 866159801Snetchild for ((bp) = G_RAID3_HEAD_BIO(pbp); \ 867159801Snetchild (bp) != NULL && ((tmpbp) = G_RAID3_NEXT_BIO(bp), 1); \ 868159801Snetchild (bp) = (tmpbp)) 869159801Snetchild 870159801Snetchildstatic void 871159801Snetchildg_raid3_init_bio(struct bio *pbp) 872159801Snetchild{ 873159801Snetchild 874159801Snetchild G_RAID3_HEAD_BIO(pbp) = NULL; 875159801Snetchild} 876159801Snetchild 877159801Snetchildstatic void 878159801Snetchildg_raid3_remove_bio(struct bio *cbp) 879159801Snetchild{ 880159801Snetchild struct bio *pbp, *bp; 881159801Snetchild 882159801Snetchild pbp = cbp->bio_parent; 883159801Snetchild if (G_RAID3_HEAD_BIO(pbp) == cbp) 884159801Snetchild G_RAID3_HEAD_BIO(pbp) = G_RAID3_NEXT_BIO(cbp); 885159801Snetchild else { 886159801Snetchild G_RAID3_FOREACH_BIO(pbp, bp) { 887159801Snetchild if (G_RAID3_NEXT_BIO(bp) == cbp) { 888159801Snetchild G_RAID3_NEXT_BIO(bp) = G_RAID3_NEXT_BIO(cbp); 889159801Snetchild break; 890159801Snetchild } 891159801Snetchild } 892159801Snetchild } 893159801Snetchild G_RAID3_NEXT_BIO(cbp) = NULL; 894159801Snetchild} 895159801Snetchild 896159801Snetchildstatic void 897159801Snetchildg_raid3_replace_bio(struct bio *sbp, struct bio *dbp) 898159801Snetchild{ 899159801Snetchild struct bio *pbp, *bp; 900159801Snetchild 901159801Snetchild g_raid3_remove_bio(sbp); 902159801Snetchild pbp = dbp->bio_parent; 903159801Snetchild G_RAID3_NEXT_BIO(sbp) = G_RAID3_NEXT_BIO(dbp); 904159801Snetchild if (G_RAID3_HEAD_BIO(pbp) == dbp) 905159801Snetchild G_RAID3_HEAD_BIO(pbp) = sbp; 906159801Snetchild else { 907159801Snetchild G_RAID3_FOREACH_BIO(pbp, bp) { 908159801Snetchild if (G_RAID3_NEXT_BIO(bp) == dbp) { 909159801Snetchild G_RAID3_NEXT_BIO(bp) = sbp; 910159801Snetchild break; 911159801Snetchild } 912159801Snetchild } 913159801Snetchild } 914143198Ssobomax G_RAID3_NEXT_BIO(dbp) = NULL; 915133819Stjr} 916133819Stjr 917133819Stjrstatic void 918133819Stjrg_raid3_destroy_bio(struct g_raid3_softc *sc, struct bio *cbp) 919133819Stjr{ 920133819Stjr struct bio *bp, *pbp; 921133819Stjr size_t size; 922133819Stjr 923133819Stjr pbp = cbp->bio_parent; 924133819Stjr pbp->bio_children--; 925133819Stjr KASSERT(cbp->bio_data != NULL, ("NULL bio_data")); 926133819Stjr size = pbp->bio_length / (sc->sc_ndisks - 1); 927156919Snetchild if (size > 16384) 928133819Stjr uma_zfree(sc->sc_zone_64k, cbp->bio_data); 929133819Stjr else if (size > 4096) 930133819Stjr uma_zfree(sc->sc_zone_16k, cbp->bio_data); 931133819Stjr else 932133819Stjr uma_zfree(sc->sc_zone_4k, cbp->bio_data); 933133819Stjr if (G_RAID3_HEAD_BIO(pbp) == cbp) { 934133819Stjr G_RAID3_HEAD_BIO(pbp) = G_RAID3_NEXT_BIO(cbp); 935133819Stjr G_RAID3_NEXT_BIO(cbp) = NULL; 936133819Stjr g_destroy_bio(cbp); 937133819Stjr } else { 938133819Stjr G_RAID3_FOREACH_BIO(pbp, bp) { 939133819Stjr if (G_RAID3_NEXT_BIO(bp) == cbp) 940133819Stjr break; 941133819Stjr } 942133819Stjr if (bp != NULL) { 943133819Stjr KASSERT(G_RAID3_NEXT_BIO(bp) != NULL, 944133819Stjr ("NULL bp->bio_driver1")); 945133819Stjr G_RAID3_NEXT_BIO(bp) = G_RAID3_NEXT_BIO(cbp); 946133819Stjr G_RAID3_NEXT_BIO(cbp) = NULL; 947133819Stjr } 948133819Stjr g_destroy_bio(cbp); 949133819Stjr } 950133819Stjr} 951133819Stjr 952133819Stjrstatic struct bio * 953133819Stjrg_raid3_clone_bio(struct g_raid3_softc *sc, struct bio *pbp) 954133819Stjr{ 955133819Stjr struct bio *bp, *cbp; 956133819Stjr size_t size; 957133819Stjr 958161309Snetchild cbp = g_clone_bio(pbp); 959133819Stjr if (cbp == NULL) 960133819Stjr return (NULL); 961133819Stjr size = pbp->bio_length / (sc->sc_ndisks - 1); 962133819Stjr if (size > 16384) { 963133819Stjr cbp->bio_data = uma_zalloc(sc->sc_zone_64k, M_NOWAIT); 964133819Stjr g_raid3_64k_requested++; 965133819Stjr } else if (size > 4096) { 966156843Snetchild cbp->bio_data = uma_zalloc(sc->sc_zone_16k, M_NOWAIT); 967133819Stjr g_raid3_16k_requested++; 968133819Stjr } else { 969133819Stjr cbp->bio_data = uma_zalloc(sc->sc_zone_4k, M_NOWAIT); 970133819Stjr g_raid3_4k_requested++; 971133819Stjr } 972133819Stjr if (cbp->bio_data == NULL) { 973133819Stjr if (size > 16384) 974133819Stjr g_raid3_64k_failed++; 975133819Stjr if (size > 4096) 976156919Snetchild g_raid3_16k_failed++; 977133819Stjr else 978133819Stjr g_raid3_4k_failed++; 979133819Stjr pbp->bio_children--; 980133819Stjr g_destroy_bio(cbp); 981133819Stjr return (NULL); 982156843Snetchild } 983147142Ssobomax G_RAID3_NEXT_BIO(cbp) = NULL; 984133819Stjr if (G_RAID3_HEAD_BIO(pbp) == NULL) 985133819Stjr G_RAID3_HEAD_BIO(pbp) = cbp; 986133819Stjr else { 987133819Stjr G_RAID3_FOREACH_BIO(pbp, bp) { 988133819Stjr if (G_RAID3_NEXT_BIO(bp) == NULL) { 989133819Stjr G_RAID3_NEXT_BIO(bp) = cbp; 990133819Stjr break; 991133819Stjr } 992133819Stjr } 993133819Stjr } 994133819Stjr return (cbp); 995133819Stjr} 996133819Stjr 997133819Stjrstatic void 998133819Stjrg_raid3_scatter(struct bio *pbp) 999133819Stjr{ 1000133819Stjr struct g_raid3_softc *sc; 1001133819Stjr struct g_raid3_disk *disk; 1002133819Stjr struct bio *bp, *cbp; 1003133819Stjr off_t atom, cadd, padd, left; 1004133819Stjr 1005133819Stjr sc = pbp->bio_to->geom->softc; 1006133819Stjr bp = NULL; 1007133819Stjr if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_NOPARITY) == 0) { 1008133819Stjr /* 1009133819Stjr * Find bio for which we should calculate data. 1010133819Stjr */ 1011133819Stjr G_RAID3_FOREACH_BIO(pbp, cbp) { 1012133819Stjr if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) { 1013133819Stjr bp = cbp; 1014133819Stjr break; 1015133819Stjr } 1016133819Stjr } 1017133819Stjr KASSERT(bp != NULL, ("NULL parity bio.")); 1018133819Stjr } 1019133819Stjr atom = sc->sc_sectorsize / (sc->sc_ndisks - 1); 1020133819Stjr cadd = padd = 0; 1021133819Stjr for (left = pbp->bio_length; left > 0; left -= sc->sc_sectorsize) { 1022133819Stjr G_RAID3_FOREACH_BIO(pbp, cbp) { 1023133819Stjr if (cbp == bp) 1024133819Stjr continue; 1025133819Stjr bcopy(pbp->bio_data + padd, cbp->bio_data + cadd, atom); 1026133819Stjr padd += atom; 1027133819Stjr } 1028133819Stjr cadd += atom; 1029133819Stjr } 1030133819Stjr if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_NOPARITY) == 0) { 1031133819Stjr struct bio *tmpbp; 1032133819Stjr 1033133819Stjr /* 1034133819Stjr * Calculate parity. 1035133819Stjr */ 1036133819Stjr bzero(bp->bio_data, bp->bio_length); 1037133819Stjr G_RAID3_FOREACH_SAFE_BIO(pbp, cbp, tmpbp) { 1038133819Stjr if (cbp == bp) 1039133819Stjr continue; 1040133819Stjr g_raid3_xor(cbp->bio_data, bp->bio_data, bp->bio_data, 1041133819Stjr bp->bio_length); 1042133819Stjr if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_NODISK) != 0) 1043133819Stjr g_raid3_destroy_bio(sc, cbp); 1044133819Stjr } 1045133819Stjr } 1046133819Stjr G_RAID3_FOREACH_BIO(pbp, cbp) { 1047133819Stjr struct g_consumer *cp; 1048133819Stjr 1049133819Stjr disk = cbp->bio_caller2; 1050133819Stjr cp = disk->d_consumer; 1051133819Stjr cbp->bio_to = cp->provider; 1052133819Stjr G_RAID3_LOGREQ(3, cbp, "Sending request."); 1053133819Stjr KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1, 1054133819Stjr ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, 1055133819Stjr cp->acr, cp->acw, cp->ace)); 1056133819Stjr cp->index++; 1057133819Stjr g_io_request(cbp, cp); 1058133819Stjr } 1059133819Stjr} 1060133819Stjr 1061133819Stjrstatic void 1062133819Stjrg_raid3_gather(struct bio *pbp) 1063133819Stjr{ 1064133819Stjr struct g_raid3_softc *sc; 1065133819Stjr struct g_raid3_disk *disk; 1066133819Stjr struct bio *xbp, *fbp, *cbp; 1067133819Stjr off_t atom, cadd, padd, left; 1068133819Stjr 1069133819Stjr sc = pbp->bio_to->geom->softc; 1070133819Stjr /* 1071133819Stjr * Find bio for which we have to calculate data. 1072133819Stjr * While going through this path, check if all requests 1073161309Snetchild * succeeded, if not, deny whole request. 1074133819Stjr * If we're in COMPLETE mode, we allow one request to fail, 1075133819Stjr * so if we find one, we're sending it to the parity consumer. 1076133819Stjr * If there are more failed requests, we deny whole request. 1077133819Stjr */ 1078133819Stjr xbp = fbp = NULL; 1079133819Stjr G_RAID3_FOREACH_BIO(pbp, cbp) { 1080133819Stjr if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) { 1081133819Stjr KASSERT(xbp == NULL, ("More than one parity bio.")); 1082133819Stjr xbp = cbp; 1083133819Stjr } 1084133819Stjr if (cbp->bio_error == 0) 1085133819Stjr continue; 1086161309Snetchild /* 1087161309Snetchild * Found failed request. 1088133819Stjr */ 1089161309Snetchild G_RAID3_LOGREQ(0, cbp, "Request failed."); 1090159801Snetchild disk = cbp->bio_caller2; 1091159801Snetchild if (disk != NULL) { 1092159801Snetchild /* 1093159801Snetchild * Actually this is pointless to bump genid, 1094159801Snetchild * because whole device is fucked up. 1095159801Snetchild */ 1096159801Snetchild sc->sc_bump_id |= G_RAID3_BUMP_GENID_IMM; 1097159801Snetchild g_raid3_event_send(disk, 1098159801Snetchild G_RAID3_DISK_STATE_DISCONNECTED, 1099159801Snetchild G_RAID3_EVENT_DONTWAIT); 1100159801Snetchild } 1101159801Snetchild if (fbp == NULL) { 1102159801Snetchild if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_DEGRADED) != 0) { 1103159801Snetchild /* 1104159801Snetchild * We are already in degraded mode, so we can't 1105159801Snetchild * accept any failures. 1106159801Snetchild */ 1107159801Snetchild if (pbp->bio_error == 0) 1108159801Snetchild pbp->bio_error = fbp->bio_error; 1109159801Snetchild } else { 1110159801Snetchild fbp = cbp; 1111159801Snetchild } 1112159801Snetchild } else { 1113159801Snetchild /* 1114159801Snetchild * Next failed request, that's too many. 1115159801Snetchild */ 1116159801Snetchild if (pbp->bio_error == 0) 1117159801Snetchild pbp->bio_error = fbp->bio_error; 1118159801Snetchild } 1119159801Snetchild } 1120159801Snetchild if (pbp->bio_error != 0) 1121159801Snetchild goto finish; 1122159801Snetchild if (fbp != NULL && (pbp->bio_pflags & G_RAID3_BIO_PFLAG_VERIFY) != 0) { 1123159801Snetchild pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_VERIFY; 1124159801Snetchild if (xbp != fbp) 1125159801Snetchild g_raid3_replace_bio(xbp, fbp); 1126159801Snetchild g_raid3_destroy_bio(sc, fbp); 1127159801Snetchild } else if (fbp != NULL) { 1128159801Snetchild struct g_consumer *cp; 1129159801Snetchild 1130159801Snetchild /* 1131159801Snetchild * One request failed, so send the same request to 1132159801Snetchild * the parity consumer. 1133159801Snetchild */ 1134159801Snetchild disk = pbp->bio_driver2; 1135159801Snetchild if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) { 1136159801Snetchild pbp->bio_error = fbp->bio_error; 1137159801Snetchild goto finish; 1138159801Snetchild } 1139159801Snetchild pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED; 1140159801Snetchild pbp->bio_inbed--; 1141159801Snetchild fbp->bio_flags &= ~(BIO_DONE | BIO_ERROR); 1142159801Snetchild if (disk->d_no == sc->sc_ndisks - 1) 1143159801Snetchild fbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY; 1144159801Snetchild fbp->bio_error = 0; 1145159801Snetchild fbp->bio_completed = 0; 1146133819Stjr fbp->bio_children = 0; 1147133819Stjr fbp->bio_inbed = 0; 1148133819Stjr cp = disk->d_consumer; 1149143198Ssobomax fbp->bio_caller2 = disk; 1150133819Stjr fbp->bio_to = cp->provider; 1151133819Stjr G_RAID3_LOGREQ(3, fbp, "Sending request (recover)."); 1152133819Stjr KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1, 1153133819Stjr ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, 1154133819Stjr cp->acr, cp->acw, cp->ace)); 1155133819Stjr cp->index++; 1156143198Ssobomax g_io_request(fbp, cp); 1157133819Stjr return; 1158133819Stjr } 1159133819Stjr if (xbp != NULL) { 1160161330Sjhb /* 1161161330Sjhb * Calculate parity. 1162161330Sjhb */ 1163161330Sjhb G_RAID3_FOREACH_BIO(pbp, cbp) { 1164161330Sjhb if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) 1165161330Sjhb continue; 1166161330Sjhb g_raid3_xor(cbp->bio_data, xbp->bio_data, xbp->bio_data, 1167161330Sjhb xbp->bio_length); 1168161330Sjhb } 1169161330Sjhb xbp->bio_cflags &= ~G_RAID3_BIO_CFLAG_PARITY; 1170161330Sjhb if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_VERIFY) != 0) { 1171161330Sjhb if (!g_raid3_is_zero(xbp)) { 1172161330Sjhb g_raid3_parity_mismatch++; 1173161330Sjhb pbp->bio_error = EIO; 1174161330Sjhb goto finish; 1175161330Sjhb } 1176161330Sjhb g_raid3_destroy_bio(sc, xbp); 1177161330Sjhb } 1178161330Sjhb } 1179161330Sjhb atom = sc->sc_sectorsize / (sc->sc_ndisks - 1); 1180161330Sjhb cadd = padd = 0; 1181161330Sjhb for (left = pbp->bio_length; left > 0; left -= sc->sc_sectorsize) { 1182161330Sjhb G_RAID3_FOREACH_BIO(pbp, cbp) { 1183161330Sjhb bcopy(cbp->bio_data + cadd, pbp->bio_data + padd, atom); 1184161330Sjhb pbp->bio_completed += atom; 1185161330Sjhb padd += atom; 1186161330Sjhb } 1187161330Sjhb cadd += atom; 1188161330Sjhb } 1189161330Sjhbfinish: 1190161330Sjhb if (pbp->bio_error == 0) 1191161330Sjhb G_RAID3_LOGREQ(3, pbp, "Request finished."); 1192161330Sjhb else { 1193161330Sjhb if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_VERIFY) != 0) 1194161330Sjhb G_RAID3_LOGREQ(1, pbp, "Verification error."); 1195161330Sjhb else 1196161330Sjhb G_RAID3_LOGREQ(0, pbp, "Request failed."); 1197161330Sjhb } 1198161330Sjhb pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_MASK; 1199161330Sjhb g_io_deliver(pbp, pbp->bio_error); 1200161330Sjhb while ((cbp = G_RAID3_HEAD_BIO(pbp)) != NULL) 1201161330Sjhb g_raid3_destroy_bio(sc, cbp); 1202161330Sjhb} 1203161330Sjhb 1204161330Sjhbstatic void 1205161330Sjhbg_raid3_done(struct bio *bp) 1206161330Sjhb{ 1207161330Sjhb struct g_raid3_softc *sc; 1208161330Sjhb 1209161330Sjhb sc = bp->bio_from->geom->softc; 1210161330Sjhb bp->bio_cflags |= G_RAID3_BIO_CFLAG_REGULAR; 1211161330Sjhb G_RAID3_LOGREQ(3, bp, "Regular request done (error=%d).", bp->bio_error); 1212161330Sjhb mtx_lock(&sc->sc_queue_mtx); 1213161330Sjhb bioq_insert_head(&sc->sc_queue, bp); 1214161330Sjhb wakeup(sc); 1215161330Sjhb wakeup(&sc->sc_queue); 1216161330Sjhb mtx_unlock(&sc->sc_queue_mtx); 1217161330Sjhb} 1218161330Sjhb 1219161330Sjhbstatic void 1220161330Sjhbg_raid3_regular_request(struct bio *cbp) 1221161330Sjhb{ 1222161330Sjhb struct g_raid3_softc *sc; 1223161330Sjhb struct g_raid3_disk *disk; 1224161330Sjhb struct bio *pbp; 1225161330Sjhb 1226161330Sjhb g_topology_assert_not(); 1227161330Sjhb 1228161330Sjhb cbp->bio_from->index--; 1229161330Sjhb pbp = cbp->bio_parent; 1230161330Sjhb sc = pbp->bio_to->geom->softc; 1231161330Sjhb disk = cbp->bio_from->private; 1232161330Sjhb if (disk == NULL) { 1233161330Sjhb g_topology_lock(); 1234161330Sjhb g_raid3_kill_consumer(sc, cbp->bio_from); 1235161330Sjhb g_topology_unlock(); 1236161330Sjhb } 1237161330Sjhb 1238161330Sjhb G_RAID3_LOGREQ(3, cbp, "Request finished."); 1239161330Sjhb pbp->bio_inbed++; 1240161330Sjhb KASSERT(pbp->bio_inbed <= pbp->bio_children, 1241161330Sjhb ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed, 1242161330Sjhb pbp->bio_children)); 1243161330Sjhb if (pbp->bio_inbed != pbp->bio_children) 1244161330Sjhb return; 1245161330Sjhb switch (pbp->bio_cmd) { 1246161330Sjhb case BIO_READ: 1247161330Sjhb g_raid3_gather(pbp); 1248161330Sjhb break; 1249161330Sjhb case BIO_WRITE: 1250161330Sjhb case BIO_DELETE: 1251161330Sjhb { 1252161330Sjhb int error = 0; 1253161330Sjhb 1254161330Sjhb pbp->bio_completed = pbp->bio_length; 1255161330Sjhb while ((cbp = G_RAID3_HEAD_BIO(pbp)) != NULL) { 1256161330Sjhb if (cbp->bio_error != 0) { 1257161330Sjhb disk = cbp->bio_caller2; 1258161330Sjhb if (disk != NULL) { 1259161330Sjhb sc->sc_bump_id |= 1260161330Sjhb G_RAID3_BUMP_GENID_IMM; 1261161330Sjhb g_raid3_event_send(disk, 1262161330Sjhb G_RAID3_DISK_STATE_DISCONNECTED, 1263161330Sjhb G_RAID3_EVENT_DONTWAIT); 1264161330Sjhb } 1265161330Sjhb if (error == 0) 1266161330Sjhb error = cbp->bio_error; 1267161330Sjhb else if (pbp->bio_error == 0) { 1268161330Sjhb /* 1269161330Sjhb * Next failed request, that's too many. 1270161330Sjhb */ 1271161330Sjhb pbp->bio_error = error; 1272161330Sjhb } 1273161330Sjhb } 1274161330Sjhb g_raid3_destroy_bio(sc, cbp); 1275161330Sjhb } 1276161330Sjhb if (pbp->bio_error == 0) 1277161330Sjhb G_RAID3_LOGREQ(3, pbp, "Request finished."); 1278161330Sjhb else 1279161330Sjhb G_RAID3_LOGREQ(0, pbp, "Request failed."); 1280161330Sjhb pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_DEGRADED; 1281161330Sjhb pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_NOPARITY; 1282161330Sjhb g_io_deliver(pbp, pbp->bio_error); 1283161330Sjhb break; 1284161330Sjhb } 1285161330Sjhb } 1286161330Sjhb} 1287161330Sjhb 1288161330Sjhbstatic void 1289161330Sjhbg_raid3_sync_done(struct bio *bp) 1290161330Sjhb{ 1291161330Sjhb struct g_raid3_softc *sc; 1292161330Sjhb 1293161330Sjhb G_RAID3_LOGREQ(3, bp, "Synchronization request delivered."); 1294161330Sjhb sc = bp->bio_from->geom->softc; 1295161330Sjhb bp->bio_cflags |= G_RAID3_BIO_CFLAG_SYNC; 1296161330Sjhb mtx_lock(&sc->sc_queue_mtx); 1297161330Sjhb bioq_insert_head(&sc->sc_queue, bp); 1298161330Sjhb wakeup(sc); 1299161330Sjhb wakeup(&sc->sc_queue); 1300161330Sjhb mtx_unlock(&sc->sc_queue_mtx); 1301161330Sjhb} 1302161330Sjhb 1303161330Sjhbstatic void 1304161330Sjhbg_raid3_start(struct bio *bp) 1305161330Sjhb{ 1306161330Sjhb struct g_raid3_softc *sc; 1307161330Sjhb 1308161330Sjhb sc = bp->bio_to->geom->softc; 1309161330Sjhb /* 1310161330Sjhb * If sc == NULL or there are no valid disks, provider's error 1311161330Sjhb * should be set and g_raid3_start() should not be called at all. 1312161330Sjhb */ 1313161330Sjhb KASSERT(sc != NULL && (sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED || 1314161330Sjhb sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE), 1315161330Sjhb ("Provider's error should be set (error=%d)(device=%s).", 1316161330Sjhb bp->bio_to->error, bp->bio_to->name)); 1317161330Sjhb G_RAID3_LOGREQ(3, bp, "Request received."); 1318161330Sjhb 1319161330Sjhb switch (bp->bio_cmd) { 1320161330Sjhb case BIO_READ: 1321161330Sjhb case BIO_WRITE: 1322161330Sjhb case BIO_DELETE: 1323161330Sjhb break; 1324161330Sjhb case BIO_GETATTR: 1325161330Sjhb default: 1326161330Sjhb g_io_deliver(bp, EOPNOTSUPP); 1327161330Sjhb return; 1328161330Sjhb } 1329161330Sjhb mtx_lock(&sc->sc_queue_mtx); 1330161330Sjhb bioq_insert_tail(&sc->sc_queue, bp); 1331161330Sjhb G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, sc); 1332161330Sjhb wakeup(sc); 1333161330Sjhb mtx_unlock(&sc->sc_queue_mtx); 1334161330Sjhb} 1335161330Sjhb 1336161330Sjhb/* 1337161330Sjhb * Send one synchronization request. 1338161330Sjhb */ 1339161330Sjhbstatic void 1340161330Sjhbg_raid3_sync_one(struct g_raid3_softc *sc) 1341161330Sjhb{ 1342161330Sjhb struct g_raid3_disk *disk; 1343161330Sjhb struct bio *bp; 1344161330Sjhb 1345161330Sjhb KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED, 1346161330Sjhb ("Wrong device state (%s, %s).", sc->sc_name, 1347161330Sjhb g_raid3_device_state2str(sc->sc_state))); 1348161330Sjhb disk = sc->sc_syncdisk; 1349161330Sjhb KASSERT(disk != NULL, ("No sync disk (%s).", sc->sc_name)); 1350161330Sjhb KASSERT(disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING, 1351161330Sjhb ("Disk %s is not marked for synchronization.", 1352161330Sjhb g_raid3_get_diskname(disk))); 1353161330Sjhb 1354161330Sjhb bp = g_new_bio(); 1355161330Sjhb if (bp == NULL) 1356161330Sjhb return; 1357161330Sjhb bp->bio_parent = NULL; 1358161330Sjhb bp->bio_cmd = BIO_READ; 1359161330Sjhb bp->bio_offset = disk->d_sync.ds_offset * (sc->sc_ndisks - 1); 1360161330Sjhb bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset); 1361161330Sjhb bp->bio_cflags = 0; 1362161330Sjhb bp->bio_done = g_raid3_sync_done; 1363161330Sjhb bp->bio_data = disk->d_sync.ds_data; 1364161330Sjhb if (bp->bio_data == NULL) { 1365161330Sjhb g_destroy_bio(bp); 1366161330Sjhb return; 1367161330Sjhb } 1368161330Sjhb bp->bio_cflags = G_RAID3_BIO_CFLAG_REGSYNC; 1369161330Sjhb disk->d_sync.ds_offset += bp->bio_length / (sc->sc_ndisks - 1); 1370161330Sjhb bp->bio_to = sc->sc_provider; 1371161330Sjhb G_RAID3_LOGREQ(3, bp, "Sending synchronization request."); 1372161330Sjhb disk->d_sync.ds_consumer->index++; 1373161330Sjhb g_io_request(bp, disk->d_sync.ds_consumer); 1374161330Sjhb} 1375161330Sjhb 1376161330Sjhbstatic void 1377161330Sjhbg_raid3_sync_request(struct bio *bp) 1378161330Sjhb{ 1379161330Sjhb struct g_raid3_softc *sc; 1380161330Sjhb struct g_raid3_disk *disk; 1381161330Sjhb 1382161330Sjhb bp->bio_from->index--; 1383161330Sjhb sc = bp->bio_from->geom->softc; 1384161330Sjhb disk = bp->bio_from->private; 1385161330Sjhb if (disk == NULL) { 1386161330Sjhb g_topology_lock(); 1387161330Sjhb g_raid3_kill_consumer(sc, bp->bio_from); 1388161330Sjhb g_topology_unlock(); 1389161330Sjhb g_destroy_bio(bp); 1390161330Sjhb return; 1391161330Sjhb } 1392133819Stjr 1393133819Stjr /* 1394133819Stjr * Synchronization request. 1395133819Stjr */ 1396133819Stjr switch (bp->bio_cmd) { 1397 case BIO_READ: 1398 { 1399 struct g_consumer *cp; 1400 u_char *dst, *src; 1401 off_t left; 1402 u_int atom; 1403 1404 if (bp->bio_error != 0) { 1405 G_RAID3_LOGREQ(0, bp, 1406 "Synchronization request failed (error=%d).", 1407 bp->bio_error); 1408 g_destroy_bio(bp); 1409 return; 1410 } 1411 G_RAID3_LOGREQ(3, bp, "Synchronization request finished."); 1412 atom = sc->sc_sectorsize / (sc->sc_ndisks - 1); 1413 dst = src = bp->bio_data; 1414 if (disk->d_no == sc->sc_ndisks - 1) { 1415 u_int n; 1416 1417 /* Parity component. */ 1418 for (left = bp->bio_length; left > 0; 1419 left -= sc->sc_sectorsize) { 1420 bcopy(src, dst, atom); 1421 src += atom; 1422 for (n = 1; n < sc->sc_ndisks - 1; n++) { 1423 g_raid3_xor(src, dst, dst, atom); 1424 src += atom; 1425 } 1426 dst += atom; 1427 } 1428 } else { 1429 /* Regular component. */ 1430 src += atom * disk->d_no; 1431 for (left = bp->bio_length; left > 0; 1432 left -= sc->sc_sectorsize) { 1433 bcopy(src, dst, atom); 1434 src += sc->sc_sectorsize; 1435 dst += atom; 1436 } 1437 } 1438 bp->bio_offset /= sc->sc_ndisks - 1; 1439 bp->bio_length /= sc->sc_ndisks - 1; 1440 bp->bio_cmd = BIO_WRITE; 1441 bp->bio_cflags = 0; 1442 bp->bio_children = bp->bio_inbed = 0; 1443 cp = disk->d_consumer; 1444 KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1, 1445 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, 1446 cp->acr, cp->acw, cp->ace)); 1447 cp->index++; 1448 g_io_request(bp, cp); 1449 return; 1450 } 1451 case BIO_WRITE: 1452 { 1453 struct g_raid3_disk_sync *sync; 1454 1455 if (bp->bio_error != 0) { 1456 G_RAID3_LOGREQ(0, bp, 1457 "Synchronization request failed (error=%d).", 1458 bp->bio_error); 1459 g_destroy_bio(bp); 1460 sc->sc_bump_id |= G_RAID3_BUMP_GENID_IMM; 1461 g_raid3_event_send(disk, 1462 G_RAID3_DISK_STATE_DISCONNECTED, 1463 G_RAID3_EVENT_DONTWAIT); 1464 return; 1465 } 1466 G_RAID3_LOGREQ(3, bp, "Synchronization request finished."); 1467 sync = &disk->d_sync; 1468 sync->ds_offset_done = bp->bio_offset + bp->bio_length; 1469 g_destroy_bio(bp); 1470 if (sync->ds_resync != -1) 1471 return; 1472 if (sync->ds_offset_done == 1473 sc->sc_mediasize / (sc->sc_ndisks - 1)) { 1474 /* 1475 * Disk up-to-date, activate it. 1476 */ 1477 g_raid3_event_send(disk, G_RAID3_DISK_STATE_ACTIVE, 1478 G_RAID3_EVENT_DONTWAIT); 1479 return; 1480 } else if (sync->ds_offset_done % (MAXPHYS * 100) == 0) { 1481 /* 1482 * Update offset_done on every 100 blocks. 1483 * XXX: This should be configurable. 1484 */ 1485 g_topology_lock(); 1486 g_raid3_update_metadata(disk); 1487 g_topology_unlock(); 1488 } 1489 return; 1490 } 1491 default: 1492 KASSERT(1 == 0, ("Invalid command here: %u (device=%s)", 1493 bp->bio_cmd, sc->sc_name)); 1494 break; 1495 } 1496} 1497 1498static int 1499g_raid3_register_request(struct bio *pbp) 1500{ 1501 struct g_raid3_softc *sc; 1502 struct g_raid3_disk *disk; 1503 struct g_consumer *cp; 1504 struct bio *cbp; 1505 off_t offset, length; 1506 u_int n, ndisks; 1507 int round_robin, verify; 1508 1509 ndisks = 0; 1510 sc = pbp->bio_to->geom->softc; 1511 if ((pbp->bio_cflags & G_RAID3_BIO_CFLAG_REGSYNC) != 0 && 1512 sc->sc_syncdisk == NULL) { 1513 g_io_deliver(pbp, EIO); 1514 return (0); 1515 } 1516 g_raid3_init_bio(pbp); 1517 length = pbp->bio_length / (sc->sc_ndisks - 1); 1518 offset = pbp->bio_offset / (sc->sc_ndisks - 1); 1519 round_robin = verify = 0; 1520 switch (pbp->bio_cmd) { 1521 case BIO_READ: 1522 if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_VERIFY) != 0 && 1523 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) { 1524 pbp->bio_pflags |= G_RAID3_BIO_PFLAG_VERIFY; 1525 verify = 1; 1526 ndisks = sc->sc_ndisks; 1527 } else { 1528 verify = 0; 1529 ndisks = sc->sc_ndisks - 1; 1530 } 1531 if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0 && 1532 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) { 1533 round_robin = 1; 1534 } else { 1535 round_robin = 0; 1536 } 1537 KASSERT(!round_robin || !verify, 1538 ("ROUND-ROBIN and VERIFY are mutually exclusive.")); 1539 pbp->bio_driver2 = &sc->sc_disks[sc->sc_ndisks - 1]; 1540 break; 1541 case BIO_WRITE: 1542 case BIO_DELETE: 1543 { 1544 struct g_raid3_disk_sync *sync; 1545 1546 if (sc->sc_idle) 1547 g_raid3_unidle(sc); 1548 1549 ndisks = sc->sc_ndisks; 1550 1551 if (sc->sc_syncdisk == NULL) 1552 break; 1553 sync = &sc->sc_syncdisk->d_sync; 1554 if (offset >= sync->ds_offset) 1555 break; 1556 if (offset + length <= sync->ds_offset_done) 1557 break; 1558 if (offset >= sync->ds_resync && sync->ds_resync != -1) 1559 break; 1560 sync->ds_resync = offset - (offset % MAXPHYS); 1561 break; 1562 } 1563 } 1564 for (n = 0; n < ndisks; n++) { 1565 disk = &sc->sc_disks[n]; 1566 cbp = g_raid3_clone_bio(sc, pbp); 1567 if (cbp == NULL) { 1568 while ((cbp = G_RAID3_HEAD_BIO(pbp)) != NULL) 1569 g_raid3_destroy_bio(sc, cbp); 1570 return (ENOMEM); 1571 } 1572 cbp->bio_offset = offset; 1573 cbp->bio_length = length; 1574 cbp->bio_done = g_raid3_done; 1575 switch (pbp->bio_cmd) { 1576 case BIO_READ: 1577 if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) { 1578 /* 1579 * Replace invalid component with the parity 1580 * component. 1581 */ 1582 disk = &sc->sc_disks[sc->sc_ndisks - 1]; 1583 cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY; 1584 pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED; 1585 } else if (round_robin && 1586 disk->d_no == sc->sc_round_robin) { 1587 /* 1588 * In round-robin mode skip one data component 1589 * and use parity component when reading. 1590 */ 1591 pbp->bio_driver2 = disk; 1592 disk = &sc->sc_disks[sc->sc_ndisks - 1]; 1593 cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY; 1594 sc->sc_round_robin++; 1595 round_robin = 0; 1596 } else if (verify && disk->d_no == sc->sc_ndisks - 1) { 1597 cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY; 1598 } 1599 break; 1600 case BIO_WRITE: 1601 case BIO_DELETE: 1602 if (disk->d_state == G_RAID3_DISK_STATE_ACTIVE || 1603 disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) { 1604 if (n == ndisks - 1) { 1605 /* 1606 * Active parity component, mark it as such. 1607 */ 1608 cbp->bio_cflags |= 1609 G_RAID3_BIO_CFLAG_PARITY; 1610 } 1611 } else { 1612 pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED; 1613 if (n == ndisks - 1) { 1614 /* 1615 * Parity component is not connected, 1616 * so destroy its request. 1617 */ 1618 pbp->bio_pflags |= 1619 G_RAID3_BIO_PFLAG_NOPARITY; 1620 g_raid3_destroy_bio(sc, cbp); 1621 cbp = NULL; 1622 } else { 1623 cbp->bio_cflags |= 1624 G_RAID3_BIO_CFLAG_NODISK; 1625 disk = NULL; 1626 } 1627 } 1628 break; 1629 } 1630 if (cbp != NULL) 1631 cbp->bio_caller2 = disk; 1632 } 1633 switch (pbp->bio_cmd) { 1634 case BIO_READ: 1635 if (round_robin) { 1636 /* 1637 * If we are in round-robin mode and 'round_robin' is 1638 * still 1, it means, that we skipped parity component 1639 * for this read and must reset sc_round_robin field. 1640 */ 1641 sc->sc_round_robin = 0; 1642 } 1643 G_RAID3_FOREACH_BIO(pbp, cbp) { 1644 disk = cbp->bio_caller2; 1645 cp = disk->d_consumer; 1646 cbp->bio_to = cp->provider; 1647 G_RAID3_LOGREQ(3, cbp, "Sending request."); 1648 KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1, 1649 ("Consumer %s not opened (r%dw%de%d).", 1650 cp->provider->name, cp->acr, cp->acw, cp->ace)); 1651 cp->index++; 1652 g_io_request(cbp, cp); 1653 } 1654 break; 1655 case BIO_WRITE: 1656 case BIO_DELETE: 1657 /* 1658 * Bump syncid on first write. 1659 */ 1660 if ((sc->sc_bump_id & G_RAID3_BUMP_SYNCID_OFW) != 0) { 1661 sc->sc_bump_id &= ~G_RAID3_BUMP_SYNCID; 1662 g_topology_lock(); 1663 g_raid3_bump_syncid(sc); 1664 g_topology_unlock(); 1665 } 1666 g_raid3_scatter(pbp); 1667 break; 1668 } 1669 return (0); 1670} 1671 1672static int 1673g_raid3_can_destroy(struct g_raid3_softc *sc) 1674{ 1675 struct g_geom *gp; 1676 struct g_consumer *cp; 1677 1678 g_topology_assert(); 1679 gp = sc->sc_geom; 1680 LIST_FOREACH(cp, &gp->consumer, consumer) { 1681 if (g_raid3_is_busy(sc, cp)) 1682 return (0); 1683 } 1684 gp = sc->sc_sync.ds_geom; 1685 LIST_FOREACH(cp, &gp->consumer, consumer) { 1686 if (g_raid3_is_busy(sc, cp)) 1687 return (0); 1688 } 1689 G_RAID3_DEBUG(2, "No I/O requests for %s, it can be destroyed.", 1690 sc->sc_name); 1691 return (1); 1692} 1693 1694static int 1695g_raid3_try_destroy(struct g_raid3_softc *sc) 1696{ 1697 1698 g_topology_lock(); 1699 if (!g_raid3_can_destroy(sc)) { 1700 g_topology_unlock(); 1701 return (0); 1702 } 1703 if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_WAIT) != 0) { 1704 g_topology_unlock(); 1705 G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, 1706 &sc->sc_worker); 1707 wakeup(&sc->sc_worker); 1708 sc->sc_worker = NULL; 1709 } else { 1710 g_raid3_destroy_device(sc); 1711 g_topology_unlock(); 1712 free(sc->sc_disks, M_RAID3); 1713 free(sc, M_RAID3); 1714 } 1715 return (1); 1716} 1717 1718/* 1719 * Worker thread. 1720 */ 1721static void 1722g_raid3_worker(void *arg) 1723{ 1724 struct g_raid3_softc *sc; 1725 struct g_raid3_disk *disk; 1726 struct g_raid3_disk_sync *sync; 1727 struct g_raid3_event *ep; 1728 struct bio *bp; 1729 u_int nreqs; 1730 1731 sc = arg; 1732 curthread->td_base_pri = PRIBIO; 1733 1734 nreqs = 0; 1735 for (;;) { 1736 G_RAID3_DEBUG(5, "%s: Let's see...", __func__); 1737 /* 1738 * First take a look at events. 1739 * This is important to handle events before any I/O requests. 1740 */ 1741 ep = g_raid3_event_get(sc); 1742 if (ep != NULL && g_topology_try_lock()) { 1743 g_raid3_event_remove(sc, ep); 1744 if ((ep->e_flags & G_RAID3_EVENT_DEVICE) != 0) { 1745 /* Update only device status. */ 1746 G_RAID3_DEBUG(3, 1747 "Running event for device %s.", 1748 sc->sc_name); 1749 ep->e_error = 0; 1750 g_raid3_update_device(sc, 1); 1751 } else { 1752 /* Update disk status. */ 1753 G_RAID3_DEBUG(3, "Running event for disk %s.", 1754 g_raid3_get_diskname(ep->e_disk)); 1755 ep->e_error = g_raid3_update_disk(ep->e_disk, 1756 ep->e_state); 1757 if (ep->e_error == 0) 1758 g_raid3_update_device(sc, 0); 1759 } 1760 g_topology_unlock(); 1761 if ((ep->e_flags & G_RAID3_EVENT_DONTWAIT) != 0) { 1762 KASSERT(ep->e_error == 0, 1763 ("Error cannot be handled.")); 1764 g_raid3_event_free(ep); 1765 } else { 1766 ep->e_flags |= G_RAID3_EVENT_DONE; 1767 G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, 1768 ep); 1769 mtx_lock(&sc->sc_events_mtx); 1770 wakeup(ep); 1771 mtx_unlock(&sc->sc_events_mtx); 1772 } 1773 if ((sc->sc_flags & 1774 G_RAID3_DEVICE_FLAG_DESTROY) != 0) { 1775 if (g_raid3_try_destroy(sc)) 1776 kthread_exit(0); 1777 } 1778 G_RAID3_DEBUG(5, "%s: I'm here 1.", __func__); 1779 continue; 1780 } 1781 /* 1782 * Now I/O requests. 1783 */ 1784 /* Get first request from the queue. */ 1785 mtx_lock(&sc->sc_queue_mtx); 1786 bp = bioq_first(&sc->sc_queue); 1787 if (bp == NULL) { 1788 if (ep != NULL) { 1789 /* 1790 * No I/O requests and topology lock was 1791 * already held? Try again. 1792 */ 1793 mtx_unlock(&sc->sc_queue_mtx); 1794 tsleep(ep, PRIBIO, "r3:top1", hz / 5); 1795 continue; 1796 } 1797 if ((sc->sc_flags & 1798 G_RAID3_DEVICE_FLAG_DESTROY) != 0) { 1799 mtx_unlock(&sc->sc_queue_mtx); 1800 if (g_raid3_try_destroy(sc)) 1801 kthread_exit(0); 1802 mtx_lock(&sc->sc_queue_mtx); 1803 } 1804 } 1805 if (sc->sc_syncdisk != NULL && 1806 (bp == NULL || nreqs > g_raid3_reqs_per_sync)) { 1807 mtx_unlock(&sc->sc_queue_mtx); 1808 /* 1809 * It is time for synchronization... 1810 */ 1811 nreqs = 0; 1812 disk = sc->sc_syncdisk; 1813 sync = &disk->d_sync; 1814 if (sync->ds_offset < 1815 sc->sc_mediasize / (sc->sc_ndisks - 1) && 1816 sync->ds_offset == sync->ds_offset_done) { 1817 if (sync->ds_resync != -1) { 1818 sync->ds_offset = sync->ds_resync; 1819 sync->ds_offset_done = sync->ds_resync; 1820 sync->ds_resync = -1; 1821 } 1822 g_raid3_sync_one(sc); 1823 } 1824 G_RAID3_DEBUG(5, "%s: I'm here 2.", __func__); 1825 goto sleep; 1826 } 1827 if (bp == NULL) { 1828 if (g_raid3_check_idle(sc)) { 1829 u_int idletime; 1830 1831 idletime = g_raid3_idletime; 1832 if (idletime == 0) 1833 idletime = 1; 1834 idletime *= hz; 1835 if (msleep(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, 1836 "r3:w1", idletime) == EWOULDBLOCK) { 1837 G_RAID3_DEBUG(5, "%s: I'm here 3.", 1838 __func__); 1839 /* 1840 * No I/O requests in 'idletime' 1841 * seconds, so mark components as clean. 1842 */ 1843 g_raid3_idle(sc); 1844 } 1845 G_RAID3_DEBUG(5, "%s: I'm here 4.", __func__); 1846 } else { 1847 MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, 1848 "r3:w2", 0); 1849 G_RAID3_DEBUG(5, "%s: I'm here 5.", __func__); 1850 } 1851 continue; 1852 } 1853 nreqs++; 1854 bioq_remove(&sc->sc_queue, bp); 1855 mtx_unlock(&sc->sc_queue_mtx); 1856 1857 if ((bp->bio_cflags & G_RAID3_BIO_CFLAG_REGULAR) != 0) { 1858 g_raid3_regular_request(bp); 1859 } else if ((bp->bio_cflags & G_RAID3_BIO_CFLAG_SYNC) != 0) { 1860 u_int timeout, sps; 1861 1862 g_raid3_sync_request(bp); 1863sleep: 1864 sps = atomic_load_acq_int(&g_raid3_syncs_per_sec); 1865 if (sps == 0) { 1866 G_RAID3_DEBUG(5, "%s: I'm here 6.", __func__); 1867 continue; 1868 } 1869 if (ep != NULL) { 1870 /* 1871 * We have some pending events, don't sleep now. 1872 */ 1873 G_RAID3_DEBUG(5, "%s: I'm here 7.", __func__); 1874 tsleep(ep, PRIBIO, "r3:top2", hz / 5); 1875 continue; 1876 } 1877 mtx_lock(&sc->sc_queue_mtx); 1878 if (bioq_first(&sc->sc_queue) != NULL) { 1879 mtx_unlock(&sc->sc_queue_mtx); 1880 G_RAID3_DEBUG(5, "%s: I'm here 8.", __func__); 1881 continue; 1882 } 1883 timeout = hz / sps; 1884 if (timeout == 0) 1885 timeout = 1; 1886 MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "r3:w2", 1887 timeout); 1888 } else { 1889 if (g_raid3_register_request(bp) != 0) { 1890 mtx_lock(&sc->sc_queue_mtx); 1891 bioq_insert_tail(&sc->sc_queue, bp); 1892 MSLEEP(&sc->sc_queue, &sc->sc_queue_mtx, 1893 PRIBIO | PDROP, "r3:lowmem", hz / 10); 1894 } 1895 } 1896 G_RAID3_DEBUG(5, "%s: I'm here 9.", __func__); 1897 } 1898} 1899 1900/* 1901 * Open disk's consumer if needed. 1902 */ 1903static void 1904g_raid3_update_access(struct g_raid3_disk *disk) 1905{ 1906 struct g_provider *pp; 1907 1908 g_topology_assert(); 1909 1910 pp = disk->d_softc->sc_provider; 1911 if (pp == NULL) 1912 return; 1913 if (pp->acw > 0) { 1914 if ((disk->d_flags & G_RAID3_DISK_FLAG_DIRTY) == 0) { 1915 G_RAID3_DEBUG(1, "Disk %s (device %s) marked as dirty.", 1916 g_raid3_get_diskname(disk), disk->d_softc->sc_name); 1917 disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY; 1918 } 1919 } else if (pp->acw == 0) { 1920 if ((disk->d_flags & G_RAID3_DISK_FLAG_DIRTY) != 0) { 1921 G_RAID3_DEBUG(1, "Disk %s (device %s) marked as clean.", 1922 g_raid3_get_diskname(disk), disk->d_softc->sc_name); 1923 disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY; 1924 } 1925 } 1926} 1927 1928static void 1929g_raid3_sync_start(struct g_raid3_softc *sc) 1930{ 1931 struct g_raid3_disk *disk; 1932 int error; 1933 u_int n; 1934 1935 g_topology_assert(); 1936 1937 KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED, 1938 ("Device not in DEGRADED state (%s, %u).", sc->sc_name, 1939 sc->sc_state)); 1940 KASSERT(sc->sc_syncdisk == NULL, ("Syncdisk is not NULL (%s, %u).", 1941 sc->sc_name, sc->sc_state)); 1942 disk = NULL; 1943 for (n = 0; n < sc->sc_ndisks; n++) { 1944 if (sc->sc_disks[n].d_state != G_RAID3_DISK_STATE_SYNCHRONIZING) 1945 continue; 1946 disk = &sc->sc_disks[n]; 1947 break; 1948 } 1949 if (disk == NULL) 1950 return; 1951 1952 G_RAID3_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name, 1953 g_raid3_get_diskname(disk)); 1954 disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY; 1955 KASSERT(disk->d_sync.ds_consumer == NULL, 1956 ("Sync consumer already exists (device=%s, disk=%s).", 1957 sc->sc_name, g_raid3_get_diskname(disk))); 1958 disk->d_sync.ds_consumer = g_new_consumer(sc->sc_sync.ds_geom); 1959 disk->d_sync.ds_consumer->private = disk; 1960 disk->d_sync.ds_consumer->index = 0; 1961 error = g_attach(disk->d_sync.ds_consumer, disk->d_softc->sc_provider); 1962 KASSERT(error == 0, ("Cannot attach to %s (error=%d).", 1963 disk->d_softc->sc_name, error)); 1964 error = g_access(disk->d_sync.ds_consumer, 1, 0, 0); 1965 KASSERT(error == 0, ("Cannot open %s (error=%d).", 1966 disk->d_softc->sc_name, error)); 1967 disk->d_sync.ds_data = malloc(MAXPHYS, M_RAID3, M_WAITOK); 1968 sc->sc_syncdisk = disk; 1969} 1970 1971/* 1972 * Stop synchronization process. 1973 * type: 0 - synchronization finished 1974 * 1 - synchronization stopped 1975 */ 1976static void 1977g_raid3_sync_stop(struct g_raid3_softc *sc, int type) 1978{ 1979 struct g_raid3_disk *disk; 1980 1981 g_topology_assert(); 1982 KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED, 1983 ("Device not in DEGRADED state (%s, %u).", sc->sc_name, 1984 sc->sc_state)); 1985 disk = sc->sc_syncdisk; 1986 sc->sc_syncdisk = NULL; 1987 KASSERT(disk != NULL, ("No disk was synchronized (%s).", sc->sc_name)); 1988 KASSERT(disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING, 1989 ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk), 1990 g_raid3_disk_state2str(disk->d_state))); 1991 if (disk->d_sync.ds_consumer == NULL) 1992 return; 1993 1994 if (type == 0) { 1995 G_RAID3_DEBUG(0, "Device %s: rebuilding provider %s finished.", 1996 disk->d_softc->sc_name, g_raid3_get_diskname(disk)); 1997 } else /* if (type == 1) */ { 1998 G_RAID3_DEBUG(0, "Device %s: rebuilding provider %s stopped.", 1999 disk->d_softc->sc_name, g_raid3_get_diskname(disk)); 2000 } 2001 g_raid3_kill_consumer(disk->d_softc, disk->d_sync.ds_consumer); 2002 free(disk->d_sync.ds_data, M_RAID3); 2003 disk->d_sync.ds_consumer = NULL; 2004 disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY; 2005} 2006 2007static void 2008g_raid3_launch_provider(struct g_raid3_softc *sc) 2009{ 2010 struct g_provider *pp; 2011 2012 g_topology_assert(); 2013 2014 pp = g_new_providerf(sc->sc_geom, "raid3/%s", sc->sc_name); 2015 pp->mediasize = sc->sc_mediasize; 2016 pp->sectorsize = sc->sc_sectorsize; 2017 sc->sc_provider = pp; 2018 g_error_provider(pp, 0); 2019 G_RAID3_DEBUG(0, "Device %s: provider %s launched.", sc->sc_name, 2020 pp->name); 2021 if (sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED) 2022 g_raid3_sync_start(sc); 2023} 2024 2025static void 2026g_raid3_destroy_provider(struct g_raid3_softc *sc) 2027{ 2028 struct bio *bp; 2029 2030 g_topology_assert(); 2031 KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).", 2032 sc->sc_name)); 2033 2034 g_error_provider(sc->sc_provider, ENXIO); 2035 mtx_lock(&sc->sc_queue_mtx); 2036 while ((bp = bioq_first(&sc->sc_queue)) != NULL) { 2037 bioq_remove(&sc->sc_queue, bp); 2038 g_io_deliver(bp, ENXIO); 2039 } 2040 mtx_unlock(&sc->sc_queue_mtx); 2041 G_RAID3_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name, 2042 sc->sc_provider->name); 2043 sc->sc_provider->flags |= G_PF_WITHER; 2044 g_orphan_provider(sc->sc_provider, ENXIO); 2045 sc->sc_provider = NULL; 2046 if (sc->sc_syncdisk != NULL) 2047 g_raid3_sync_stop(sc, 1); 2048} 2049 2050static void 2051g_raid3_go(void *arg) 2052{ 2053 struct g_raid3_softc *sc; 2054 2055 sc = arg; 2056 G_RAID3_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name); 2057 g_raid3_event_send(sc, 0, 2058 G_RAID3_EVENT_DONTWAIT | G_RAID3_EVENT_DEVICE); 2059} 2060 2061static u_int 2062g_raid3_determine_state(struct g_raid3_disk *disk) 2063{ 2064 struct g_raid3_softc *sc; 2065 u_int state; 2066 2067 sc = disk->d_softc; 2068 if (sc->sc_syncid == disk->d_sync.ds_syncid) { 2069 if ((disk->d_flags & 2070 G_RAID3_DISK_FLAG_SYNCHRONIZING) == 0) { 2071 /* Disk does not need synchronization. */ 2072 state = G_RAID3_DISK_STATE_ACTIVE; 2073 } else { 2074 if ((sc->sc_flags & 2075 G_RAID3_DEVICE_FLAG_NOAUTOSYNC) == 0 || 2076 (disk->d_flags & 2077 G_RAID3_DISK_FLAG_FORCE_SYNC) != 0) { 2078 /* 2079 * We can start synchronization from 2080 * the stored offset. 2081 */ 2082 state = G_RAID3_DISK_STATE_SYNCHRONIZING; 2083 } else { 2084 state = G_RAID3_DISK_STATE_STALE; 2085 } 2086 } 2087 } else if (disk->d_sync.ds_syncid < sc->sc_syncid) { 2088 /* 2089 * Reset all synchronization data for this disk, 2090 * because if it even was synchronized, it was 2091 * synchronized to disks with different syncid. 2092 */ 2093 disk->d_flags |= G_RAID3_DISK_FLAG_SYNCHRONIZING; 2094 disk->d_sync.ds_offset = 0; 2095 disk->d_sync.ds_offset_done = 0; 2096 disk->d_sync.ds_syncid = sc->sc_syncid; 2097 if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOAUTOSYNC) == 0 || 2098 (disk->d_flags & G_RAID3_DISK_FLAG_FORCE_SYNC) != 0) { 2099 state = G_RAID3_DISK_STATE_SYNCHRONIZING; 2100 } else { 2101 state = G_RAID3_DISK_STATE_STALE; 2102 } 2103 } else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ { 2104 /* 2105 * Not good, NOT GOOD! 2106 * It means that device was started on stale disks 2107 * and more fresh disk just arrive. 2108 * If there were writes, device is fucked up, sorry. 2109 * I think the best choice here is don't touch 2110 * this disk and inform the user laudly. 2111 */ 2112 G_RAID3_DEBUG(0, "Device %s was started before the freshest " 2113 "disk (%s) arrives!! It will not be connected to the " 2114 "running device.", sc->sc_name, 2115 g_raid3_get_diskname(disk)); 2116 g_raid3_destroy_disk(disk); 2117 state = G_RAID3_DISK_STATE_NONE; 2118 /* Return immediately, because disk was destroyed. */ 2119 return (state); 2120 } 2121 G_RAID3_DEBUG(3, "State for %s disk: %s.", 2122 g_raid3_get_diskname(disk), g_raid3_disk_state2str(state)); 2123 return (state); 2124} 2125 2126/* 2127 * Update device state. 2128 */ 2129static void 2130g_raid3_update_device(struct g_raid3_softc *sc, boolean_t force) 2131{ 2132 struct g_raid3_disk *disk; 2133 u_int state; 2134 2135 g_topology_assert(); 2136 2137 switch (sc->sc_state) { 2138 case G_RAID3_DEVICE_STATE_STARTING: 2139 { 2140 u_int n, ndirty, ndisks, genid, syncid; 2141 2142 KASSERT(sc->sc_provider == NULL, 2143 ("Non-NULL provider in STARTING state (%s).", sc->sc_name)); 2144 /* 2145 * Are we ready? We are, if all disks are connected or 2146 * one disk is missing and 'force' is true. 2147 */ 2148 if (g_raid3_ndisks(sc, -1) + force == sc->sc_ndisks) { 2149 if (!force) 2150 callout_drain(&sc->sc_callout); 2151 } else { 2152 if (force) { 2153 /* 2154 * Timeout expired, so destroy device. 2155 */ 2156 sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY; 2157 } 2158 return; 2159 } 2160 2161 /* 2162 * Find the biggest genid. 2163 */ 2164 genid = 0; 2165 for (n = 0; n < sc->sc_ndisks; n++) { 2166 disk = &sc->sc_disks[n]; 2167 if (disk->d_state == G_RAID3_DISK_STATE_NODISK) 2168 continue; 2169 if (disk->d_genid > genid) 2170 genid = disk->d_genid; 2171 } 2172 sc->sc_genid = genid; 2173 /* 2174 * Remove all disks without the biggest genid. 2175 */ 2176 for (n = 0; n < sc->sc_ndisks; n++) { 2177 disk = &sc->sc_disks[n]; 2178 if (disk->d_state == G_RAID3_DISK_STATE_NODISK) 2179 continue; 2180 if (disk->d_genid < genid) { 2181 G_RAID3_DEBUG(0, 2182 "Component %s (device %s) broken, skipping.", 2183 g_raid3_get_diskname(disk), sc->sc_name); 2184 g_raid3_destroy_disk(disk); 2185 } 2186 } 2187 2188 /* 2189 * There must be at least 'sc->sc_ndisks - 1' components 2190 * with the same syncid and without SYNCHRONIZING flag. 2191 */ 2192 2193 /* 2194 * Find the biggest syncid, number of valid components and 2195 * number of dirty components. 2196 */ 2197 ndirty = ndisks = syncid = 0; 2198 for (n = 0; n < sc->sc_ndisks; n++) { 2199 disk = &sc->sc_disks[n]; 2200 if (disk->d_state == G_RAID3_DISK_STATE_NODISK) 2201 continue; 2202 if ((disk->d_flags & G_RAID3_DISK_FLAG_DIRTY) != 0) 2203 ndirty++; 2204 if (disk->d_sync.ds_syncid > syncid) { 2205 syncid = disk->d_sync.ds_syncid; 2206 ndisks = 0; 2207 } else if (disk->d_sync.ds_syncid < syncid) { 2208 continue; 2209 } 2210 if ((disk->d_flags & 2211 G_RAID3_DISK_FLAG_SYNCHRONIZING) != 0) { 2212 continue; 2213 } 2214 ndisks++; 2215 } 2216 /* 2217 * Do we have enough valid components? 2218 */ 2219 if (ndisks + 1 < sc->sc_ndisks) { 2220 G_RAID3_DEBUG(0, 2221 "Device %s is broken, too few valid components.", 2222 sc->sc_name); 2223 sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY; 2224 return; 2225 } 2226 /* 2227 * If there is one DIRTY component and all disks are present, 2228 * mark it for synchronization. If there is more than one DIRTY 2229 * component, mark parity component for synchronization. 2230 */ 2231 if (ndisks == sc->sc_ndisks && ndirty == 1) { 2232 for (n = 0; n < sc->sc_ndisks; n++) { 2233 disk = &sc->sc_disks[n]; 2234 if ((disk->d_flags & 2235 G_RAID3_DISK_FLAG_DIRTY) == 0) { 2236 continue; 2237 } 2238 disk->d_flags |= 2239 G_RAID3_DISK_FLAG_SYNCHRONIZING; 2240 } 2241 } else if (ndisks == sc->sc_ndisks && ndirty > 1) { 2242 disk = &sc->sc_disks[sc->sc_ndisks - 1]; 2243 disk->d_flags |= G_RAID3_DISK_FLAG_SYNCHRONIZING; 2244 } 2245 2246 sc->sc_syncid = syncid; 2247 if (force) { 2248 /* Remember to bump syncid on first write. */ 2249 sc->sc_bump_id |= G_RAID3_BUMP_SYNCID_OFW; 2250 } 2251 if (ndisks == sc->sc_ndisks) 2252 state = G_RAID3_DEVICE_STATE_COMPLETE; 2253 else /* if (ndisks == sc->sc_ndisks - 1) */ 2254 state = G_RAID3_DEVICE_STATE_DEGRADED; 2255 G_RAID3_DEBUG(1, "Device %s state changed from %s to %s.", 2256 sc->sc_name, g_raid3_device_state2str(sc->sc_state), 2257 g_raid3_device_state2str(state)); 2258 sc->sc_state = state; 2259 for (n = 0; n < sc->sc_ndisks; n++) { 2260 disk = &sc->sc_disks[n]; 2261 if (disk->d_state == G_RAID3_DISK_STATE_NODISK) 2262 continue; 2263 state = g_raid3_determine_state(disk); 2264 g_raid3_event_send(disk, state, G_RAID3_EVENT_DONTWAIT); 2265 if (state == G_RAID3_DISK_STATE_STALE) 2266 sc->sc_bump_id |= G_RAID3_BUMP_SYNCID_OFW; 2267 } 2268 break; 2269 } 2270 case G_RAID3_DEVICE_STATE_DEGRADED: 2271 /* 2272 * Bump syncid and/or genid here, if we need to do it 2273 * immediately. 2274 */ 2275 if ((sc->sc_bump_id & G_RAID3_BUMP_SYNCID_IMM) != 0) { 2276 sc->sc_bump_id &= ~G_RAID3_BUMP_SYNCID; 2277 g_raid3_bump_syncid(sc); 2278 } 2279 if ((sc->sc_bump_id & G_RAID3_BUMP_GENID_IMM) != 0) { 2280 sc->sc_bump_id &= ~G_RAID3_BUMP_GENID; 2281 g_raid3_bump_genid(sc); 2282 } 2283 2284 if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_NEW) > 0) 2285 return; 2286 if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) < 2287 sc->sc_ndisks - 1) { 2288 if (sc->sc_provider != NULL) 2289 g_raid3_destroy_provider(sc); 2290 sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY; 2291 return; 2292 } 2293 if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) == 2294 sc->sc_ndisks) { 2295 state = G_RAID3_DEVICE_STATE_COMPLETE; 2296 G_RAID3_DEBUG(1, 2297 "Device %s state changed from %s to %s.", 2298 sc->sc_name, g_raid3_device_state2str(sc->sc_state), 2299 g_raid3_device_state2str(state)); 2300 sc->sc_state = state; 2301 } 2302 if (sc->sc_provider == NULL) 2303 g_raid3_launch_provider(sc); 2304 break; 2305 case G_RAID3_DEVICE_STATE_COMPLETE: 2306 /* 2307 * Bump syncid and/or genid here, if we need to do it 2308 * immediately. 2309 */ 2310 if ((sc->sc_bump_id & G_RAID3_BUMP_SYNCID_IMM) != 0) { 2311 sc->sc_bump_id &= ~G_RAID3_BUMP_SYNCID; 2312 g_raid3_bump_syncid(sc); 2313 } 2314 if ((sc->sc_bump_id & G_RAID3_BUMP_GENID_IMM) != 0) { 2315 sc->sc_bump_id &= ~G_RAID3_BUMP_GENID; 2316 g_raid3_bump_genid(sc); 2317 } 2318 2319 if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_NEW) > 0) 2320 return; 2321 KASSERT(g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) >= 2322 sc->sc_ndisks - 1, 2323 ("Too few ACTIVE components in COMPLETE state (device %s).", 2324 sc->sc_name)); 2325 if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) == 2326 sc->sc_ndisks - 1) { 2327 state = G_RAID3_DEVICE_STATE_DEGRADED; 2328 G_RAID3_DEBUG(1, 2329 "Device %s state changed from %s to %s.", 2330 sc->sc_name, g_raid3_device_state2str(sc->sc_state), 2331 g_raid3_device_state2str(state)); 2332 sc->sc_state = state; 2333 } 2334 if (sc->sc_provider == NULL) 2335 g_raid3_launch_provider(sc); 2336 break; 2337 default: 2338 KASSERT(1 == 0, ("Wrong device state (%s, %s).", sc->sc_name, 2339 g_raid3_device_state2str(sc->sc_state))); 2340 break; 2341 } 2342} 2343 2344/* 2345 * Update disk state and device state if needed. 2346 */ 2347#define DISK_STATE_CHANGED() G_RAID3_DEBUG(1, \ 2348 "Disk %s state changed from %s to %s (device %s).", \ 2349 g_raid3_get_diskname(disk), \ 2350 g_raid3_disk_state2str(disk->d_state), \ 2351 g_raid3_disk_state2str(state), sc->sc_name) 2352static int 2353g_raid3_update_disk(struct g_raid3_disk *disk, u_int state) 2354{ 2355 struct g_raid3_softc *sc; 2356 2357 g_topology_assert(); 2358 2359 sc = disk->d_softc; 2360again: 2361 G_RAID3_DEBUG(3, "Changing disk %s state from %s to %s.", 2362 g_raid3_get_diskname(disk), g_raid3_disk_state2str(disk->d_state), 2363 g_raid3_disk_state2str(state)); 2364 switch (state) { 2365 case G_RAID3_DISK_STATE_NEW: 2366 /* 2367 * Possible scenarios: 2368 * 1. New disk arrive. 2369 */ 2370 /* Previous state should be NONE. */ 2371 KASSERT(disk->d_state == G_RAID3_DISK_STATE_NONE, 2372 ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk), 2373 g_raid3_disk_state2str(disk->d_state))); 2374 DISK_STATE_CHANGED(); 2375 2376 disk->d_state = state; 2377 G_RAID3_DEBUG(0, "Device %s: provider %s detected.", 2378 sc->sc_name, g_raid3_get_diskname(disk)); 2379 if (sc->sc_state == G_RAID3_DEVICE_STATE_STARTING) 2380 break; 2381 KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED || 2382 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE, 2383 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2384 g_raid3_device_state2str(sc->sc_state), 2385 g_raid3_get_diskname(disk), 2386 g_raid3_disk_state2str(disk->d_state))); 2387 state = g_raid3_determine_state(disk); 2388 if (state != G_RAID3_DISK_STATE_NONE) 2389 goto again; 2390 break; 2391 case G_RAID3_DISK_STATE_ACTIVE: 2392 /* 2393 * Possible scenarios: 2394 * 1. New disk does not need synchronization. 2395 * 2. Synchronization process finished successfully. 2396 */ 2397 KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED || 2398 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE, 2399 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2400 g_raid3_device_state2str(sc->sc_state), 2401 g_raid3_get_diskname(disk), 2402 g_raid3_disk_state2str(disk->d_state))); 2403 /* Previous state should be NEW or SYNCHRONIZING. */ 2404 KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW || 2405 disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING, 2406 ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk), 2407 g_raid3_disk_state2str(disk->d_state))); 2408 DISK_STATE_CHANGED(); 2409 2410 if (disk->d_state == G_RAID3_DISK_STATE_NEW) 2411 disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY; 2412 else if (disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) { 2413 disk->d_flags &= ~G_RAID3_DISK_FLAG_SYNCHRONIZING; 2414 disk->d_flags &= ~G_RAID3_DISK_FLAG_FORCE_SYNC; 2415 g_raid3_sync_stop(sc, 0); 2416 } 2417 disk->d_state = state; 2418 disk->d_sync.ds_offset = 0; 2419 disk->d_sync.ds_offset_done = 0; 2420 g_raid3_update_access(disk); 2421 g_raid3_update_metadata(disk); 2422 G_RAID3_DEBUG(0, "Device %s: provider %s activated.", 2423 sc->sc_name, g_raid3_get_diskname(disk)); 2424 break; 2425 case G_RAID3_DISK_STATE_STALE: 2426 /* 2427 * Possible scenarios: 2428 * 1. Stale disk was connected. 2429 */ 2430 /* Previous state should be NEW. */ 2431 KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW, 2432 ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk), 2433 g_raid3_disk_state2str(disk->d_state))); 2434 KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED || 2435 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE, 2436 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2437 g_raid3_device_state2str(sc->sc_state), 2438 g_raid3_get_diskname(disk), 2439 g_raid3_disk_state2str(disk->d_state))); 2440 /* 2441 * STALE state is only possible if device is marked 2442 * NOAUTOSYNC. 2443 */ 2444 KASSERT((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOAUTOSYNC) != 0, 2445 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2446 g_raid3_device_state2str(sc->sc_state), 2447 g_raid3_get_diskname(disk), 2448 g_raid3_disk_state2str(disk->d_state))); 2449 DISK_STATE_CHANGED(); 2450 2451 disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY; 2452 disk->d_state = state; 2453 g_raid3_update_metadata(disk); 2454 G_RAID3_DEBUG(0, "Device %s: provider %s is stale.", 2455 sc->sc_name, g_raid3_get_diskname(disk)); 2456 break; 2457 case G_RAID3_DISK_STATE_SYNCHRONIZING: 2458 /* 2459 * Possible scenarios: 2460 * 1. Disk which needs synchronization was connected. 2461 */ 2462 /* Previous state should be NEW. */ 2463 KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW, 2464 ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk), 2465 g_raid3_disk_state2str(disk->d_state))); 2466 KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED || 2467 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE, 2468 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2469 g_raid3_device_state2str(sc->sc_state), 2470 g_raid3_get_diskname(disk), 2471 g_raid3_disk_state2str(disk->d_state))); 2472 DISK_STATE_CHANGED(); 2473 2474 if (disk->d_state == G_RAID3_DISK_STATE_NEW) 2475 disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY; 2476 disk->d_state = state; 2477 if (sc->sc_provider != NULL) { 2478 g_raid3_sync_start(sc); 2479 g_raid3_update_metadata(disk); 2480 } 2481 break; 2482 case G_RAID3_DISK_STATE_DISCONNECTED: 2483 /* 2484 * Possible scenarios: 2485 * 1. Device wasn't running yet, but disk disappear. 2486 * 2. Disk was active and disapppear. 2487 * 3. Disk disappear during synchronization process. 2488 */ 2489 if (sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED || 2490 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) { 2491 /* 2492 * Previous state should be ACTIVE, STALE or 2493 * SYNCHRONIZING. 2494 */ 2495 KASSERT(disk->d_state == G_RAID3_DISK_STATE_ACTIVE || 2496 disk->d_state == G_RAID3_DISK_STATE_STALE || 2497 disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING, 2498 ("Wrong disk state (%s, %s).", 2499 g_raid3_get_diskname(disk), 2500 g_raid3_disk_state2str(disk->d_state))); 2501 } else if (sc->sc_state == G_RAID3_DEVICE_STATE_STARTING) { 2502 /* Previous state should be NEW. */ 2503 KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW, 2504 ("Wrong disk state (%s, %s).", 2505 g_raid3_get_diskname(disk), 2506 g_raid3_disk_state2str(disk->d_state))); 2507 /* 2508 * Reset bumping syncid if disk disappeared in STARTING 2509 * state. 2510 */ 2511 if ((sc->sc_bump_id & G_RAID3_BUMP_SYNCID_OFW) != 0) 2512 sc->sc_bump_id &= ~G_RAID3_BUMP_SYNCID; 2513#ifdef INVARIANTS 2514 } else { 2515 KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).", 2516 sc->sc_name, 2517 g_raid3_device_state2str(sc->sc_state), 2518 g_raid3_get_diskname(disk), 2519 g_raid3_disk_state2str(disk->d_state))); 2520#endif 2521 } 2522 DISK_STATE_CHANGED(); 2523 G_RAID3_DEBUG(0, "Device %s: provider %s disconnected.", 2524 sc->sc_name, g_raid3_get_diskname(disk)); 2525 2526 g_raid3_destroy_disk(disk); 2527 break; 2528 default: 2529 KASSERT(1 == 0, ("Unknown state (%u).", state)); 2530 break; 2531 } 2532 return (0); 2533} 2534#undef DISK_STATE_CHANGED 2535 2536static int 2537g_raid3_read_metadata(struct g_consumer *cp, struct g_raid3_metadata *md) 2538{ 2539 struct g_provider *pp; 2540 u_char *buf; 2541 int error; 2542 2543 g_topology_assert(); 2544 2545 error = g_access(cp, 1, 0, 0); 2546 if (error != 0) 2547 return (error); 2548 pp = cp->provider; 2549 g_topology_unlock(); 2550 /* Metadata are stored on last sector. */ 2551 buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize, 2552 &error); 2553 g_topology_lock(); 2554 g_access(cp, -1, 0, 0); 2555 if (error != 0) { 2556 G_RAID3_DEBUG(1, "Cannot read metadata from %s (error=%d).", 2557 cp->provider->name, error); 2558 if (buf != NULL) 2559 g_free(buf); 2560 return (error); 2561 } 2562 2563 /* Decode metadata. */ 2564 error = raid3_metadata_decode(buf, md); 2565 g_free(buf); 2566 if (strcmp(md->md_magic, G_RAID3_MAGIC) != 0) 2567 return (EINVAL); 2568 if (md->md_version > G_RAID3_VERSION) { 2569 G_RAID3_DEBUG(0, 2570 "Kernel module is too old to handle metadata from %s.", 2571 cp->provider->name); 2572 return (EINVAL); 2573 } 2574 if (error != 0) { 2575 G_RAID3_DEBUG(1, "MD5 metadata hash mismatch for provider %s.", 2576 cp->provider->name); 2577 return (error); 2578 } 2579 2580 return (0); 2581} 2582 2583static int 2584g_raid3_check_metadata(struct g_raid3_softc *sc, struct g_provider *pp, 2585 struct g_raid3_metadata *md) 2586{ 2587 2588 if (md->md_no >= sc->sc_ndisks) { 2589 G_RAID3_DEBUG(1, "Invalid disk %s number (no=%u), skipping.", 2590 pp->name, md->md_no); 2591 return (EINVAL); 2592 } 2593 if (sc->sc_disks[md->md_no].d_state != G_RAID3_DISK_STATE_NODISK) { 2594 G_RAID3_DEBUG(1, "Disk %s (no=%u) already exists, skipping.", 2595 pp->name, md->md_no); 2596 return (EEXIST); 2597 } 2598 if (md->md_all != sc->sc_ndisks) { 2599 G_RAID3_DEBUG(1, 2600 "Invalid '%s' field on disk %s (device %s), skipping.", 2601 "md_all", pp->name, sc->sc_name); 2602 return (EINVAL); 2603 } 2604 if (md->md_mediasize != sc->sc_mediasize) { 2605 G_RAID3_DEBUG(1, 2606 "Invalid '%s' field on disk %s (device %s), skipping.", 2607 "md_mediasize", pp->name, sc->sc_name); 2608 return (EINVAL); 2609 } 2610 if ((md->md_mediasize % (sc->sc_ndisks - 1)) != 0) { 2611 G_RAID3_DEBUG(1, 2612 "Invalid '%s' field on disk %s (device %s), skipping.", 2613 "md_mediasize", pp->name, sc->sc_name); 2614 return (EINVAL); 2615 } 2616 if ((sc->sc_mediasize / (sc->sc_ndisks - 1)) > pp->mediasize) { 2617 G_RAID3_DEBUG(1, 2618 "Invalid size of disk %s (device %s), skipping.", pp->name, 2619 sc->sc_name); 2620 return (EINVAL); 2621 } 2622 if ((md->md_sectorsize / pp->sectorsize) < sc->sc_ndisks - 1) { 2623 G_RAID3_DEBUG(1, 2624 "Invalid '%s' field on disk %s (device %s), skipping.", 2625 "md_sectorsize", pp->name, sc->sc_name); 2626 return (EINVAL); 2627 } 2628 if (md->md_sectorsize != sc->sc_sectorsize) { 2629 G_RAID3_DEBUG(1, 2630 "Invalid '%s' field on disk %s (device %s), skipping.", 2631 "md_sectorsize", pp->name, sc->sc_name); 2632 return (EINVAL); 2633 } 2634 if ((sc->sc_sectorsize % pp->sectorsize) != 0) { 2635 G_RAID3_DEBUG(1, 2636 "Invalid sector size of disk %s (device %s), skipping.", 2637 pp->name, sc->sc_name); 2638 return (EINVAL); 2639 } 2640 if ((md->md_mflags & ~G_RAID3_DEVICE_FLAG_MASK) != 0) { 2641 G_RAID3_DEBUG(1, 2642 "Invalid device flags on disk %s (device %s), skipping.", 2643 pp->name, sc->sc_name); 2644 return (EINVAL); 2645 } 2646 if ((md->md_mflags & G_RAID3_DEVICE_FLAG_VERIFY) != 0 && 2647 (md->md_mflags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0) { 2648 /* 2649 * VERIFY and ROUND-ROBIN options are mutally exclusive. 2650 */ 2651 G_RAID3_DEBUG(1, "Both VERIFY and ROUND-ROBIN flags exist on " 2652 "disk %s (device %s), skipping.", pp->name, sc->sc_name); 2653 return (EINVAL); 2654 } 2655 if ((md->md_dflags & ~G_RAID3_DISK_FLAG_MASK) != 0) { 2656 G_RAID3_DEBUG(1, 2657 "Invalid disk flags on disk %s (device %s), skipping.", 2658 pp->name, sc->sc_name); 2659 return (EINVAL); 2660 } 2661 return (0); 2662} 2663 2664static int 2665g_raid3_add_disk(struct g_raid3_softc *sc, struct g_provider *pp, 2666 struct g_raid3_metadata *md) 2667{ 2668 struct g_raid3_disk *disk; 2669 int error; 2670 2671 g_topology_assert(); 2672 G_RAID3_DEBUG(2, "Adding disk %s.", pp->name); 2673 2674 error = g_raid3_check_metadata(sc, pp, md); 2675 if (error != 0) 2676 return (error); 2677 if (sc->sc_state != G_RAID3_DEVICE_STATE_STARTING && 2678 md->md_genid < sc->sc_genid) { 2679 G_RAID3_DEBUG(0, "Component %s (device %s) broken, skipping.", 2680 pp->name, sc->sc_name); 2681 return (EINVAL); 2682 } 2683 disk = g_raid3_init_disk(sc, pp, md, &error); 2684 if (disk == NULL) 2685 return (error); 2686 error = g_raid3_event_send(disk, G_RAID3_DISK_STATE_NEW, 2687 G_RAID3_EVENT_WAIT); 2688 if (error != 0) 2689 return (error); 2690 if (md->md_version < G_RAID3_VERSION) { 2691 G_RAID3_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).", 2692 pp->name, md->md_version, G_RAID3_VERSION); 2693 g_raid3_update_metadata(disk); 2694 } 2695 return (0); 2696} 2697 2698static int 2699g_raid3_access(struct g_provider *pp, int acr, int acw, int ace) 2700{ 2701 struct g_raid3_softc *sc; 2702 struct g_raid3_disk *disk; 2703 int dcr, dcw, dce; 2704 u_int n; 2705 2706 g_topology_assert(); 2707 G_RAID3_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr, 2708 acw, ace); 2709 2710 dcr = pp->acr + acr; 2711 dcw = pp->acw + acw; 2712 dce = pp->ace + ace; 2713 2714 sc = pp->geom->softc; 2715 if (sc == NULL || 2716 g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) < sc->sc_ndisks - 1 || 2717 (sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) != 0) { 2718 if (acr <= 0 && acw <= 0 && ace <= 0) 2719 return (0); 2720 else 2721 return (ENXIO); 2722 } 2723 for (n = 0; n < sc->sc_ndisks; n++) { 2724 disk = &sc->sc_disks[n]; 2725 if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) 2726 continue; 2727 /* 2728 * Mark disk as dirty on open and unmark on close. 2729 */ 2730 if (pp->acw == 0 && dcw > 0) { 2731 G_RAID3_DEBUG(1, "Disk %s (device %s) marked as dirty.", 2732 g_raid3_get_diskname(disk), sc->sc_name); 2733 disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY; 2734 g_raid3_update_metadata(disk); 2735 } else if (pp->acw > 0 && dcw == 0) { 2736 G_RAID3_DEBUG(1, "Disk %s (device %s) marked as clean.", 2737 g_raid3_get_diskname(disk), sc->sc_name); 2738 disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY; 2739 g_raid3_update_metadata(disk); 2740 } 2741 } 2742 return (0); 2743} 2744 2745static struct g_geom * 2746g_raid3_create(struct g_class *mp, const struct g_raid3_metadata *md) 2747{ 2748 struct g_raid3_softc *sc; 2749 struct g_geom *gp; 2750 int error, timeout; 2751 u_int n; 2752 2753 g_topology_assert(); 2754 G_RAID3_DEBUG(1, "Creating device %s (id=%u).", md->md_name, md->md_id); 2755 2756 /* One disk is minimum. */ 2757 if (md->md_all < 1) 2758 return (NULL); 2759 /* 2760 * Action geom. 2761 */ 2762 gp = g_new_geomf(mp, "%s", md->md_name); 2763 sc = malloc(sizeof(*sc), M_RAID3, M_WAITOK | M_ZERO); 2764 sc->sc_disks = malloc(sizeof(struct g_raid3_disk) * md->md_all, M_RAID3, 2765 M_WAITOK | M_ZERO); 2766 gp->start = g_raid3_start; 2767 gp->spoiled = g_raid3_spoiled; 2768 gp->orphan = g_raid3_orphan; 2769 gp->access = g_raid3_access; 2770 gp->dumpconf = g_raid3_dumpconf; 2771 2772 sc->sc_id = md->md_id; 2773 sc->sc_mediasize = md->md_mediasize; 2774 sc->sc_sectorsize = md->md_sectorsize; 2775 sc->sc_ndisks = md->md_all; 2776 sc->sc_round_robin = 0; 2777 sc->sc_flags = md->md_mflags; 2778 sc->sc_bump_id = 0; 2779 sc->sc_idle = 0; 2780 for (n = 0; n < sc->sc_ndisks; n++) { 2781 sc->sc_disks[n].d_softc = sc; 2782 sc->sc_disks[n].d_no = n; 2783 sc->sc_disks[n].d_state = G_RAID3_DISK_STATE_NODISK; 2784 } 2785 bioq_init(&sc->sc_queue); 2786 mtx_init(&sc->sc_queue_mtx, "graid3:queue", NULL, MTX_DEF); 2787 TAILQ_INIT(&sc->sc_events); 2788 mtx_init(&sc->sc_events_mtx, "graid3:events", NULL, MTX_DEF); 2789 callout_init(&sc->sc_callout, CALLOUT_MPSAFE); 2790 sc->sc_state = G_RAID3_DEVICE_STATE_STARTING; 2791 gp->softc = sc; 2792 sc->sc_geom = gp; 2793 sc->sc_provider = NULL; 2794 /* 2795 * Synchronization geom. 2796 */ 2797 gp = g_new_geomf(mp, "%s.sync", md->md_name); 2798 gp->softc = sc; 2799 gp->orphan = g_raid3_orphan; 2800 sc->sc_sync.ds_geom = gp; 2801 sc->sc_zone_64k = uma_zcreate("gr3:64k", 65536, NULL, NULL, NULL, NULL, 2802 UMA_ALIGN_PTR, 0); 2803 uma_zone_set_max(sc->sc_zone_64k, g_raid3_n64k); 2804 sc->sc_zone_16k = uma_zcreate("gr3:16k", 16384, NULL, NULL, NULL, NULL, 2805 UMA_ALIGN_PTR, 0); 2806 uma_zone_set_max(sc->sc_zone_64k, g_raid3_n16k); 2807 sc->sc_zone_4k = uma_zcreate("gr3:4k", 4096, NULL, NULL, NULL, NULL, 2808 UMA_ALIGN_PTR, 0); 2809 uma_zone_set_max(sc->sc_zone_4k, g_raid3_n4k); 2810 error = kthread_create(g_raid3_worker, sc, &sc->sc_worker, 0, 0, 2811 "g_raid3 %s", md->md_name); 2812 if (error != 0) { 2813 G_RAID3_DEBUG(1, "Cannot create kernel thread for %s.", 2814 sc->sc_name); 2815 uma_zdestroy(sc->sc_zone_64k); 2816 uma_zdestroy(sc->sc_zone_16k); 2817 uma_zdestroy(sc->sc_zone_4k); 2818 g_destroy_geom(sc->sc_sync.ds_geom); 2819 mtx_destroy(&sc->sc_events_mtx); 2820 mtx_destroy(&sc->sc_queue_mtx); 2821 g_destroy_geom(sc->sc_geom); 2822 free(sc->sc_disks, M_RAID3); 2823 free(sc, M_RAID3); 2824 return (NULL); 2825 } 2826 2827 G_RAID3_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id); 2828 2829 /* 2830 * Run timeout. 2831 */ 2832 timeout = atomic_load_acq_int(&g_raid3_timeout); 2833 callout_reset(&sc->sc_callout, timeout * hz, g_raid3_go, sc); 2834 return (sc->sc_geom); 2835} 2836 2837int 2838g_raid3_destroy(struct g_raid3_softc *sc, boolean_t force) 2839{ 2840 struct g_provider *pp; 2841 2842 g_topology_assert(); 2843 2844 if (sc == NULL) 2845 return (ENXIO); 2846 pp = sc->sc_provider; 2847 if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { 2848 if (force) { 2849 G_RAID3_DEBUG(1, "Device %s is still open, so it " 2850 "can't be definitely removed.", pp->name); 2851 } else { 2852 G_RAID3_DEBUG(1, 2853 "Device %s is still open (r%dw%de%d).", pp->name, 2854 pp->acr, pp->acw, pp->ace); 2855 return (EBUSY); 2856 } 2857 } 2858 2859 sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY; 2860 sc->sc_flags |= G_RAID3_DEVICE_FLAG_WAIT; 2861 g_topology_unlock(); 2862 G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, sc); 2863 mtx_lock(&sc->sc_queue_mtx); 2864 wakeup(sc); 2865 wakeup(&sc->sc_queue); 2866 mtx_unlock(&sc->sc_queue_mtx); 2867 G_RAID3_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker); 2868 while (sc->sc_worker != NULL) 2869 tsleep(&sc->sc_worker, PRIBIO, "r3:destroy", hz / 5); 2870 G_RAID3_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker); 2871 g_topology_lock(); 2872 g_raid3_destroy_device(sc); 2873 free(sc->sc_disks, M_RAID3); 2874 free(sc, M_RAID3); 2875 return (0); 2876} 2877 2878static void 2879g_raid3_taste_orphan(struct g_consumer *cp) 2880{ 2881 2882 KASSERT(1 == 0, ("%s called while tasting %s.", __func__, 2883 cp->provider->name)); 2884} 2885 2886static struct g_geom * 2887g_raid3_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) 2888{ 2889 struct g_raid3_metadata md; 2890 struct g_raid3_softc *sc; 2891 struct g_consumer *cp; 2892 struct g_geom *gp; 2893 int error; 2894 2895 g_topology_assert(); 2896 g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); 2897 G_RAID3_DEBUG(2, "Tasting %s.", pp->name); 2898 2899 gp = g_new_geomf(mp, "raid3:taste"); 2900 /* This orphan function should be never called. */ 2901 gp->orphan = g_raid3_taste_orphan; 2902 cp = g_new_consumer(gp); 2903 g_attach(cp, pp); 2904 error = g_raid3_read_metadata(cp, &md); 2905 g_detach(cp); 2906 g_destroy_consumer(cp); 2907 g_destroy_geom(gp); 2908 if (error != 0) 2909 return (NULL); 2910 gp = NULL; 2911 2912 if (md.md_provider[0] != '\0' && strcmp(md.md_provider, pp->name) != 0) 2913 return (NULL); 2914 if (g_raid3_debug >= 2) 2915 raid3_metadata_dump(&md); 2916 2917 /* 2918 * Let's check if device already exists. 2919 */ 2920 sc = NULL; 2921 LIST_FOREACH(gp, &mp->geom, geom) { 2922 sc = gp->softc; 2923 if (sc == NULL) 2924 continue; 2925 if (sc->sc_sync.ds_geom == gp) 2926 continue; 2927 if (strcmp(md.md_name, sc->sc_name) != 0) 2928 continue; 2929 if (md.md_id != sc->sc_id) { 2930 G_RAID3_DEBUG(0, "Device %s already configured.", 2931 sc->sc_name); 2932 return (NULL); 2933 } 2934 break; 2935 } 2936 if (gp == NULL) { 2937 gp = g_raid3_create(mp, &md); 2938 if (gp == NULL) { 2939 G_RAID3_DEBUG(0, "Cannot create device %s.", 2940 md.md_name); 2941 return (NULL); 2942 } 2943 sc = gp->softc; 2944 } 2945 G_RAID3_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name); 2946 error = g_raid3_add_disk(sc, pp, &md); 2947 if (error != 0) { 2948 G_RAID3_DEBUG(0, "Cannot add disk %s to %s (error=%d).", 2949 pp->name, gp->name, error); 2950 if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_NODISK) == 2951 sc->sc_ndisks) { 2952 g_raid3_destroy(sc, 1); 2953 } 2954 return (NULL); 2955 } 2956 return (gp); 2957} 2958 2959static int 2960g_raid3_destroy_geom(struct gctl_req *req __unused, struct g_class *mp __unused, 2961 struct g_geom *gp) 2962{ 2963 2964 return (g_raid3_destroy(gp->softc, 0)); 2965} 2966 2967static void 2968g_raid3_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, 2969 struct g_consumer *cp, struct g_provider *pp) 2970{ 2971 struct g_raid3_softc *sc; 2972 2973 g_topology_assert(); 2974 2975 sc = gp->softc; 2976 if (sc == NULL) 2977 return; 2978 /* Skip synchronization geom. */ 2979 if (gp == sc->sc_sync.ds_geom) 2980 return; 2981 if (pp != NULL) { 2982 /* Nothing here. */ 2983 } else if (cp != NULL) { 2984 struct g_raid3_disk *disk; 2985 2986 disk = cp->private; 2987 if (disk == NULL) 2988 return; 2989 sbuf_printf(sb, "%s<Type>", indent); 2990 if (disk->d_no == sc->sc_ndisks - 1) 2991 sbuf_printf(sb, "PARITY"); 2992 else 2993 sbuf_printf(sb, "DATA"); 2994 sbuf_printf(sb, "</Type>\n"); 2995 sbuf_printf(sb, "%s<Number>%u</Number>\n", indent, 2996 (u_int)disk->d_no); 2997 if (disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) { 2998 sbuf_printf(sb, "%s<Synchronized>", indent); 2999 if (disk->d_sync.ds_offset_done == 0) 3000 sbuf_printf(sb, "0%%"); 3001 else { 3002 sbuf_printf(sb, "%u%%", 3003 (u_int)((disk->d_sync.ds_offset_done * 100) / 3004 (sc->sc_mediasize / (sc->sc_ndisks - 1)))); 3005 } 3006 sbuf_printf(sb, "</Synchronized>\n"); 3007 } 3008 sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, 3009 disk->d_sync.ds_syncid); 3010 sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, disk->d_genid); 3011 sbuf_printf(sb, "%s<Flags>", indent); 3012 if (disk->d_flags == 0) 3013 sbuf_printf(sb, "NONE"); 3014 else { 3015 int first = 1; 3016 3017#define ADD_FLAG(flag, name) do { \ 3018 if ((disk->d_flags & (flag)) != 0) { \ 3019 if (!first) \ 3020 sbuf_printf(sb, ", "); \ 3021 else \ 3022 first = 0; \ 3023 sbuf_printf(sb, name); \ 3024 } \ 3025} while (0) 3026 ADD_FLAG(G_RAID3_DISK_FLAG_DIRTY, "DIRTY"); 3027 ADD_FLAG(G_RAID3_DISK_FLAG_HARDCODED, "HARDCODED"); 3028 ADD_FLAG(G_RAID3_DISK_FLAG_SYNCHRONIZING, 3029 "SYNCHRONIZING"); 3030 ADD_FLAG(G_RAID3_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC"); 3031#undef ADD_FLAG 3032 } 3033 sbuf_printf(sb, "</Flags>\n"); 3034 sbuf_printf(sb, "%s<State>%s</State>\n", indent, 3035 g_raid3_disk_state2str(disk->d_state)); 3036 } else { 3037 sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id); 3038 sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid); 3039 sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid); 3040 sbuf_printf(sb, "%s<Flags>", indent); 3041 if (sc->sc_flags == 0) 3042 sbuf_printf(sb, "NONE"); 3043 else { 3044 int first = 1; 3045 3046#define ADD_FLAG(flag, name) do { \ 3047 if ((sc->sc_flags & (flag)) != 0) { \ 3048 if (!first) \ 3049 sbuf_printf(sb, ", "); \ 3050 else \ 3051 first = 0; \ 3052 sbuf_printf(sb, name); \ 3053 } \ 3054} while (0) 3055 ADD_FLAG(G_RAID3_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC"); 3056 ADD_FLAG(G_RAID3_DEVICE_FLAG_ROUND_ROBIN, 3057 "ROUND-ROBIN"); 3058 ADD_FLAG(G_RAID3_DEVICE_FLAG_VERIFY, "VERIFY"); 3059#undef ADD_FLAG 3060 } 3061 sbuf_printf(sb, "</Flags>\n"); 3062 sbuf_printf(sb, "%s<Components>%u</Components>\n", indent, 3063 sc->sc_ndisks); 3064 sbuf_printf(sb, "%s<State>%s</State>\n", indent, 3065 g_raid3_device_state2str(sc->sc_state)); 3066 } 3067} 3068 3069static void 3070g_raid3_shutdown(void *arg, int howto) 3071{ 3072 struct g_class *mp; 3073 struct g_geom *gp, *gp2; 3074 3075 mp = arg; 3076 DROP_GIANT(); 3077 g_topology_lock(); 3078 LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) { 3079 if (gp->softc == NULL) 3080 continue; 3081 g_raid3_destroy(gp->softc, 1); 3082 } 3083 g_topology_unlock(); 3084 PICKUP_GIANT(); 3085#if 0 3086 tsleep(&gp, PRIBIO, "r3:shutdown", hz * 20); 3087#endif 3088} 3089 3090static void 3091g_raid3_init(struct g_class *mp) 3092{ 3093 3094 g_raid3_ehtag = EVENTHANDLER_REGISTER(shutdown_post_sync, 3095 g_raid3_shutdown, mp, SHUTDOWN_PRI_FIRST); 3096 if (g_raid3_ehtag == NULL) 3097 G_RAID3_DEBUG(0, "Warning! Cannot register shutdown event."); 3098} 3099 3100static void 3101g_raid3_fini(struct g_class *mp) 3102{ 3103 3104 if (g_raid3_ehtag == NULL) 3105 return; 3106 EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_raid3_ehtag); 3107} 3108 3109DECLARE_GEOM_CLASS(g_raid3_class, g_raid3); 3110