1133808Spjd/*- 2156876Spjd * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> 3133808Spjd * All rights reserved. 4133808Spjd * 5133808Spjd * Redistribution and use in source and binary forms, with or without 6133808Spjd * modification, are permitted provided that the following conditions 7133808Spjd * are met: 8133808Spjd * 1. Redistributions of source code must retain the above copyright 9133808Spjd * notice, this list of conditions and the following disclaimer. 10133808Spjd * 2. Redistributions in binary form must reproduce the above copyright 11133808Spjd * notice, this list of conditions and the following disclaimer in the 12133808Spjd * documentation and/or other materials provided with the distribution. 13155174Spjd * 14133808Spjd * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15133808Spjd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16133808Spjd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17133808Spjd * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18133808Spjd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19133808Spjd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20133808Spjd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21133808Spjd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22133808Spjd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23133808Spjd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24133808Spjd * SUCH DAMAGE. 25133808Spjd */ 26133808Spjd 27133808Spjd#include <sys/cdefs.h> 28133808Spjd__FBSDID("$FreeBSD: stable/11/sys/geom/raid3/g_raid3.c 332640 2018-04-17 02:18:04Z kevans $"); 29133808Spjd 30133808Spjd#include <sys/param.h> 31133808Spjd#include <sys/systm.h> 32133808Spjd#include <sys/kernel.h> 33133808Spjd#include <sys/module.h> 34133808Spjd#include <sys/limits.h> 35133808Spjd#include <sys/lock.h> 36133808Spjd#include <sys/mutex.h> 37133808Spjd#include <sys/bio.h> 38223921Sae#include <sys/sbuf.h> 39133808Spjd#include <sys/sysctl.h> 40133808Spjd#include <sys/malloc.h> 41137257Spjd#include <sys/eventhandler.h> 42133808Spjd#include <vm/uma.h> 43133808Spjd#include <geom/geom.h> 44133808Spjd#include <sys/proc.h> 45133808Spjd#include <sys/kthread.h> 46139451Sjhb#include <sys/sched.h> 47133808Spjd#include <geom/raid3/g_raid3.h> 48133808Spjd 49219029SnetchildFEATURE(geom_raid3, "GEOM RAID-3 functionality"); 50133808Spjd 51151897Srwatsonstatic MALLOC_DEFINE(M_RAID3, "raid3_data", "GEOM_RAID3 Data"); 52133808Spjd 53133808SpjdSYSCTL_DECL(_kern_geom); 54227309Sedstatic SYSCTL_NODE(_kern_geom, OID_AUTO, raid3, CTLFLAG_RW, 0, 55227309Sed "GEOM_RAID3 stuff"); 56133825Spjdu_int g_raid3_debug = 0; 57267992ShselaskySYSCTL_UINT(_kern_geom_raid3, OID_AUTO, debug, CTLFLAG_RWTUN, &g_raid3_debug, 0, 58133808Spjd "Debug level"); 59135866Spjdstatic u_int g_raid3_timeout = 4; 60267992ShselaskySYSCTL_UINT(_kern_geom_raid3, OID_AUTO, timeout, CTLFLAG_RWTUN, &g_raid3_timeout, 61133808Spjd 0, "Time to wait on all raid3 components"); 62137258Spjdstatic u_int g_raid3_idletime = 5; 63267992ShselaskySYSCTL_UINT(_kern_geom_raid3, OID_AUTO, idletime, CTLFLAG_RWTUN, 64137258Spjd &g_raid3_idletime, 0, "Mark components as clean when idling"); 65155546Spjdstatic u_int g_raid3_disconnect_on_failure = 1; 66267992ShselaskySYSCTL_UINT(_kern_geom_raid3, OID_AUTO, disconnect_on_failure, CTLFLAG_RWTUN, 67155546Spjd &g_raid3_disconnect_on_failure, 0, "Disconnect component on I/O failure."); 68156876Spjdstatic u_int g_raid3_syncreqs = 2; 69156612SpjdSYSCTL_UINT(_kern_geom_raid3, OID_AUTO, sync_requests, CTLFLAG_RDTUN, 70156612Spjd &g_raid3_syncreqs, 0, "Parallel synchronization I/O requests."); 71160203Spjdstatic u_int g_raid3_use_malloc = 0; 72160203SpjdSYSCTL_UINT(_kern_geom_raid3, OID_AUTO, use_malloc, CTLFLAG_RDTUN, 73160203Spjd &g_raid3_use_malloc, 0, "Use malloc(9) instead of uma(9)."); 74133808Spjd 75133808Spjdstatic u_int g_raid3_n64k = 50; 76267992ShselaskySYSCTL_UINT(_kern_geom_raid3, OID_AUTO, n64k, CTLFLAG_RDTUN, &g_raid3_n64k, 0, 77133808Spjd "Maximum number of 64kB allocations"); 78133808Spjdstatic u_int g_raid3_n16k = 200; 79267992ShselaskySYSCTL_UINT(_kern_geom_raid3, OID_AUTO, n16k, CTLFLAG_RDTUN, &g_raid3_n16k, 0, 80133808Spjd "Maximum number of 16kB allocations"); 81133808Spjdstatic u_int g_raid3_n4k = 1200; 82267992ShselaskySYSCTL_UINT(_kern_geom_raid3, OID_AUTO, n4k, CTLFLAG_RDTUN, &g_raid3_n4k, 0, 83133808Spjd "Maximum number of 4kB allocations"); 84133808Spjd 85227309Sedstatic SYSCTL_NODE(_kern_geom_raid3, OID_AUTO, stat, CTLFLAG_RW, 0, 86133808Spjd "GEOM_RAID3 statistics"); 87134168Spjdstatic u_int g_raid3_parity_mismatch = 0; 88134168SpjdSYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, parity_mismatch, CTLFLAG_RD, 89134168Spjd &g_raid3_parity_mismatch, 0, "Number of failures in VERIFY mode"); 90133808Spjd 91133808Spjd#define MSLEEP(ident, mtx, priority, wmesg, timeout) do { \ 92133808Spjd G_RAID3_DEBUG(4, "%s: Sleeping %p.", __func__, (ident)); \ 93133808Spjd msleep((ident), (mtx), (priority), (wmesg), (timeout)); \ 94133808Spjd G_RAID3_DEBUG(4, "%s: Woken up %p.", __func__, (ident)); \ 95133808Spjd} while (0) 96133808Spjd 97245444Smavstatic eventhandler_tag g_raid3_post_sync = NULL; 98245444Smavstatic int g_raid3_shutdown = 0; 99133808Spjd 100133808Spjdstatic int g_raid3_destroy_geom(struct gctl_req *req, struct g_class *mp, 101133808Spjd struct g_geom *gp); 102133808Spjdstatic g_taste_t g_raid3_taste; 103137257Spjdstatic void g_raid3_init(struct g_class *mp); 104137257Spjdstatic void g_raid3_fini(struct g_class *mp); 105133808Spjd 106133808Spjdstruct g_class g_raid3_class = { 107133808Spjd .name = G_RAID3_CLASS_NAME, 108133808Spjd .version = G_VERSION, 109133808Spjd .ctlreq = g_raid3_config, 110133808Spjd .taste = g_raid3_taste, 111137257Spjd .destroy_geom = g_raid3_destroy_geom, 112137257Spjd .init = g_raid3_init, 113137257Spjd .fini = g_raid3_fini 114133808Spjd}; 115133808Spjd 116133808Spjd 117133808Spjdstatic void g_raid3_destroy_provider(struct g_raid3_softc *sc); 118139144Spjdstatic int g_raid3_update_disk(struct g_raid3_disk *disk, u_int state); 119139144Spjdstatic void g_raid3_update_device(struct g_raid3_softc *sc, boolean_t force); 120133808Spjdstatic void g_raid3_dumpconf(struct sbuf *sb, const char *indent, 121133808Spjd struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp); 122133808Spjdstatic void g_raid3_sync_stop(struct g_raid3_softc *sc, int type); 123156612Spjdstatic int g_raid3_register_request(struct bio *pbp); 124156612Spjdstatic void g_raid3_sync_release(struct g_raid3_softc *sc); 125133808Spjd 126133808Spjd 127133808Spjdstatic const char * 128133808Spjdg_raid3_disk_state2str(int state) 129133808Spjd{ 130133808Spjd 131133808Spjd switch (state) { 132133808Spjd case G_RAID3_DISK_STATE_NODISK: 133133808Spjd return ("NODISK"); 134133808Spjd case G_RAID3_DISK_STATE_NONE: 135133808Spjd return ("NONE"); 136133808Spjd case G_RAID3_DISK_STATE_NEW: 137133808Spjd return ("NEW"); 138133808Spjd case G_RAID3_DISK_STATE_ACTIVE: 139133808Spjd return ("ACTIVE"); 140133808Spjd case G_RAID3_DISK_STATE_STALE: 141133808Spjd return ("STALE"); 142133808Spjd case G_RAID3_DISK_STATE_SYNCHRONIZING: 143133808Spjd return ("SYNCHRONIZING"); 144133808Spjd case G_RAID3_DISK_STATE_DISCONNECTED: 145133808Spjd return ("DISCONNECTED"); 146133808Spjd default: 147133808Spjd return ("INVALID"); 148133808Spjd } 149133808Spjd} 150133808Spjd 151133808Spjdstatic const char * 152133808Spjdg_raid3_device_state2str(int state) 153133808Spjd{ 154133808Spjd 155133808Spjd switch (state) { 156133808Spjd case G_RAID3_DEVICE_STATE_STARTING: 157133808Spjd return ("STARTING"); 158133808Spjd case G_RAID3_DEVICE_STATE_DEGRADED: 159133808Spjd return ("DEGRADED"); 160133808Spjd case G_RAID3_DEVICE_STATE_COMPLETE: 161133808Spjd return ("COMPLETE"); 162133808Spjd default: 163133808Spjd return ("INVALID"); 164133808Spjd } 165133808Spjd} 166133808Spjd 167133808Spjdconst char * 168133808Spjdg_raid3_get_diskname(struct g_raid3_disk *disk) 169133808Spjd{ 170133808Spjd 171133808Spjd if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL) 172133808Spjd return ("[unknown]"); 173133808Spjd return (disk->d_name); 174133808Spjd} 175133808Spjd 176160203Spjdstatic void * 177160203Spjdg_raid3_alloc(struct g_raid3_softc *sc, size_t size, int flags) 178160203Spjd{ 179160203Spjd void *ptr; 180200821Smav enum g_raid3_zones zone; 181160203Spjd 182200821Smav if (g_raid3_use_malloc || 183200821Smav (zone = g_raid3_zone(size)) == G_RAID3_NUM_ZONES) 184160203Spjd ptr = malloc(size, M_RAID3, flags); 185160203Spjd else { 186200821Smav ptr = uma_zalloc_arg(sc->sc_zones[zone].sz_zone, 187200821Smav &sc->sc_zones[zone], flags); 188200821Smav sc->sc_zones[zone].sz_requested++; 189160203Spjd if (ptr == NULL) 190200821Smav sc->sc_zones[zone].sz_failed++; 191160203Spjd } 192160203Spjd return (ptr); 193160203Spjd} 194160203Spjd 195160203Spjdstatic void 196160203Spjdg_raid3_free(struct g_raid3_softc *sc, void *ptr, size_t size) 197160203Spjd{ 198200821Smav enum g_raid3_zones zone; 199160203Spjd 200200821Smav if (g_raid3_use_malloc || 201200821Smav (zone = g_raid3_zone(size)) == G_RAID3_NUM_ZONES) 202160203Spjd free(ptr, M_RAID3); 203160203Spjd else { 204200821Smav uma_zfree_arg(sc->sc_zones[zone].sz_zone, 205200821Smav ptr, &sc->sc_zones[zone]); 206160203Spjd } 207160203Spjd} 208160203Spjd 209156612Spjdstatic int 210156612Spjdg_raid3_uma_ctor(void *mem, int size, void *arg, int flags) 211156612Spjd{ 212156612Spjd struct g_raid3_zone *sz = arg; 213156612Spjd 214157222Spjd if (sz->sz_max > 0 && sz->sz_inuse == sz->sz_max) 215156612Spjd return (ENOMEM); 216156612Spjd sz->sz_inuse++; 217156612Spjd return (0); 218156612Spjd} 219156612Spjd 220156612Spjdstatic void 221156612Spjdg_raid3_uma_dtor(void *mem, int size, void *arg) 222156612Spjd{ 223156612Spjd struct g_raid3_zone *sz = arg; 224156612Spjd 225156612Spjd sz->sz_inuse--; 226156612Spjd} 227156612Spjd 228201545Smav#define g_raid3_xor(src, dst, size) \ 229201545Smav _g_raid3_xor((uint64_t *)(src), \ 230133808Spjd (uint64_t *)(dst), (size_t)size) 231133808Spjdstatic void 232201545Smav_g_raid3_xor(uint64_t *src, uint64_t *dst, size_t size) 233133808Spjd{ 234133808Spjd 235133808Spjd KASSERT((size % 128) == 0, ("Invalid size: %zu.", size)); 236133808Spjd for (; size > 0; size -= 128) { 237201545Smav *dst++ ^= (*src++); 238201545Smav *dst++ ^= (*src++); 239201545Smav *dst++ ^= (*src++); 240201545Smav *dst++ ^= (*src++); 241201545Smav *dst++ ^= (*src++); 242201545Smav *dst++ ^= (*src++); 243201545Smav *dst++ ^= (*src++); 244201545Smav *dst++ ^= (*src++); 245201545Smav *dst++ ^= (*src++); 246201545Smav *dst++ ^= (*src++); 247201545Smav *dst++ ^= (*src++); 248201545Smav *dst++ ^= (*src++); 249201545Smav *dst++ ^= (*src++); 250201545Smav *dst++ ^= (*src++); 251201545Smav *dst++ ^= (*src++); 252201545Smav *dst++ ^= (*src++); 253133808Spjd } 254133808Spjd} 255133808Spjd 256134168Spjdstatic int 257134168Spjdg_raid3_is_zero(struct bio *bp) 258134168Spjd{ 259134168Spjd static const uint64_t zeros[] = { 260134168Spjd 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 261134168Spjd }; 262134168Spjd u_char *addr; 263134168Spjd ssize_t size; 264134168Spjd 265134168Spjd size = bp->bio_length; 266134168Spjd addr = (u_char *)bp->bio_data; 267134168Spjd for (; size > 0; size -= sizeof(zeros), addr += sizeof(zeros)) { 268134168Spjd if (bcmp(addr, zeros, sizeof(zeros)) != 0) 269134168Spjd return (0); 270134168Spjd } 271134168Spjd return (1); 272134168Spjd} 273134168Spjd 274133808Spjd/* 275133808Spjd * --- Events handling functions --- 276133808Spjd * Events in geom_raid3 are used to maintain disks and device status 277133808Spjd * from one thread to simplify locking. 278133808Spjd */ 279133808Spjdstatic void 280133808Spjdg_raid3_event_free(struct g_raid3_event *ep) 281133808Spjd{ 282133808Spjd 283133808Spjd free(ep, M_RAID3); 284133808Spjd} 285133808Spjd 286133808Spjdint 287133808Spjdg_raid3_event_send(void *arg, int state, int flags) 288133808Spjd{ 289133808Spjd struct g_raid3_softc *sc; 290133808Spjd struct g_raid3_disk *disk; 291133808Spjd struct g_raid3_event *ep; 292133808Spjd int error; 293133808Spjd 294133808Spjd ep = malloc(sizeof(*ep), M_RAID3, M_WAITOK); 295133808Spjd G_RAID3_DEBUG(4, "%s: Sending event %p.", __func__, ep); 296133808Spjd if ((flags & G_RAID3_EVENT_DEVICE) != 0) { 297133808Spjd disk = NULL; 298133808Spjd sc = arg; 299133808Spjd } else { 300133808Spjd disk = arg; 301133808Spjd sc = disk->d_softc; 302133808Spjd } 303133808Spjd ep->e_disk = disk; 304133808Spjd ep->e_state = state; 305133808Spjd ep->e_flags = flags; 306133808Spjd ep->e_error = 0; 307133808Spjd mtx_lock(&sc->sc_events_mtx); 308133808Spjd TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next); 309133808Spjd mtx_unlock(&sc->sc_events_mtx); 310133808Spjd G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, sc); 311133808Spjd mtx_lock(&sc->sc_queue_mtx); 312133808Spjd wakeup(sc); 313133808Spjd wakeup(&sc->sc_queue); 314133808Spjd mtx_unlock(&sc->sc_queue_mtx); 315133808Spjd if ((flags & G_RAID3_EVENT_DONTWAIT) != 0) 316133808Spjd return (0); 317156612Spjd sx_assert(&sc->sc_lock, SX_XLOCKED); 318133808Spjd G_RAID3_DEBUG(4, "%s: Sleeping %p.", __func__, ep); 319156612Spjd sx_xunlock(&sc->sc_lock); 320133808Spjd while ((ep->e_flags & G_RAID3_EVENT_DONE) == 0) { 321133808Spjd mtx_lock(&sc->sc_events_mtx); 322133808Spjd MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "r3:event", 323133808Spjd hz * 5); 324133808Spjd } 325133808Spjd error = ep->e_error; 326133808Spjd g_raid3_event_free(ep); 327156612Spjd sx_xlock(&sc->sc_lock); 328133808Spjd return (error); 329133808Spjd} 330133808Spjd 331133808Spjdstatic struct g_raid3_event * 332133808Spjdg_raid3_event_get(struct g_raid3_softc *sc) 333133808Spjd{ 334133808Spjd struct g_raid3_event *ep; 335133808Spjd 336133808Spjd mtx_lock(&sc->sc_events_mtx); 337133808Spjd ep = TAILQ_FIRST(&sc->sc_events); 338133808Spjd mtx_unlock(&sc->sc_events_mtx); 339133808Spjd return (ep); 340133808Spjd} 341133808Spjd 342133808Spjdstatic void 343139144Spjdg_raid3_event_remove(struct g_raid3_softc *sc, struct g_raid3_event *ep) 344139144Spjd{ 345139144Spjd 346139144Spjd mtx_lock(&sc->sc_events_mtx); 347139144Spjd TAILQ_REMOVE(&sc->sc_events, ep, e_next); 348139144Spjd mtx_unlock(&sc->sc_events_mtx); 349139144Spjd} 350139144Spjd 351139144Spjdstatic void 352133808Spjdg_raid3_event_cancel(struct g_raid3_disk *disk) 353133808Spjd{ 354133808Spjd struct g_raid3_softc *sc; 355133808Spjd struct g_raid3_event *ep, *tmpep; 356133808Spjd 357156612Spjd sc = disk->d_softc; 358156612Spjd sx_assert(&sc->sc_lock, SX_XLOCKED); 359133808Spjd 360133808Spjd mtx_lock(&sc->sc_events_mtx); 361133808Spjd TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) { 362133808Spjd if ((ep->e_flags & G_RAID3_EVENT_DEVICE) != 0) 363133808Spjd continue; 364133808Spjd if (ep->e_disk != disk) 365133808Spjd continue; 366133808Spjd TAILQ_REMOVE(&sc->sc_events, ep, e_next); 367133808Spjd if ((ep->e_flags & G_RAID3_EVENT_DONTWAIT) != 0) 368133808Spjd g_raid3_event_free(ep); 369133808Spjd else { 370133808Spjd ep->e_error = ECANCELED; 371133808Spjd wakeup(ep); 372133808Spjd } 373133808Spjd } 374133808Spjd mtx_unlock(&sc->sc_events_mtx); 375133808Spjd} 376133808Spjd 377133808Spjd/* 378133808Spjd * Return the number of disks in the given state. 379133808Spjd * If state is equal to -1, count all connected disks. 380133808Spjd */ 381133808Spjdu_int 382133808Spjdg_raid3_ndisks(struct g_raid3_softc *sc, int state) 383133808Spjd{ 384133808Spjd struct g_raid3_disk *disk; 385133839Sobrien u_int n, ndisks; 386133808Spjd 387156612Spjd sx_assert(&sc->sc_lock, SX_LOCKED); 388156612Spjd 389133839Sobrien for (n = ndisks = 0; n < sc->sc_ndisks; n++) { 390133808Spjd disk = &sc->sc_disks[n]; 391133808Spjd if (disk->d_state == G_RAID3_DISK_STATE_NODISK) 392133808Spjd continue; 393133808Spjd if (state == -1 || disk->d_state == state) 394133808Spjd ndisks++; 395133808Spjd } 396133808Spjd return (ndisks); 397133808Spjd} 398133808Spjd 399133808Spjdstatic u_int 400133808Spjdg_raid3_nrequests(struct g_raid3_softc *sc, struct g_consumer *cp) 401133808Spjd{ 402133808Spjd struct bio *bp; 403133808Spjd u_int nreqs = 0; 404133808Spjd 405133808Spjd mtx_lock(&sc->sc_queue_mtx); 406133808Spjd TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) { 407133808Spjd if (bp->bio_from == cp) 408133808Spjd nreqs++; 409133808Spjd } 410133808Spjd mtx_unlock(&sc->sc_queue_mtx); 411133808Spjd return (nreqs); 412133808Spjd} 413133808Spjd 414133808Spjdstatic int 415133808Spjdg_raid3_is_busy(struct g_raid3_softc *sc, struct g_consumer *cp) 416133808Spjd{ 417133808Spjd 418137256Spjd if (cp->index > 0) { 419133808Spjd G_RAID3_DEBUG(2, 420133808Spjd "I/O requests for %s exist, can't destroy it now.", 421133808Spjd cp->provider->name); 422133808Spjd return (1); 423133808Spjd } 424133808Spjd if (g_raid3_nrequests(sc, cp) > 0) { 425133808Spjd G_RAID3_DEBUG(2, 426133808Spjd "I/O requests for %s in queue, can't destroy it now.", 427133808Spjd cp->provider->name); 428133808Spjd return (1); 429133808Spjd } 430133808Spjd return (0); 431133808Spjd} 432133808Spjd 433133808Spjdstatic void 434139144Spjdg_raid3_destroy_consumer(void *arg, int flags __unused) 435139144Spjd{ 436139144Spjd struct g_consumer *cp; 437139144Spjd 438156612Spjd g_topology_assert(); 439156612Spjd 440139144Spjd cp = arg; 441139144Spjd G_RAID3_DEBUG(1, "Consumer %s destroyed.", cp->provider->name); 442139144Spjd g_detach(cp); 443139144Spjd g_destroy_consumer(cp); 444139144Spjd} 445139144Spjd 446139144Spjdstatic void 447133808Spjdg_raid3_kill_consumer(struct g_raid3_softc *sc, struct g_consumer *cp) 448133808Spjd{ 449139144Spjd struct g_provider *pp; 450139144Spjd int retaste_wait; 451133808Spjd 452133808Spjd g_topology_assert(); 453133808Spjd 454133808Spjd cp->private = NULL; 455133808Spjd if (g_raid3_is_busy(sc, cp)) 456133808Spjd return; 457133808Spjd G_RAID3_DEBUG(2, "Consumer %s destroyed.", cp->provider->name); 458139144Spjd pp = cp->provider; 459139144Spjd retaste_wait = 0; 460139144Spjd if (cp->acw == 1) { 461139144Spjd if ((pp->geom->flags & G_GEOM_WITHER) == 0) 462139144Spjd retaste_wait = 1; 463139144Spjd } 464139144Spjd G_RAID3_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr, 465139144Spjd -cp->acw, -cp->ace, 0); 466139144Spjd if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) 467139144Spjd g_access(cp, -cp->acr, -cp->acw, -cp->ace); 468139144Spjd if (retaste_wait) { 469139144Spjd /* 470139144Spjd * After retaste event was send (inside g_access()), we can send 471139144Spjd * event to detach and destroy consumer. 472139144Spjd * A class, which has consumer to the given provider connected 473139144Spjd * will not receive retaste event for the provider. 474139144Spjd * This is the way how I ignore retaste events when I close 475139144Spjd * consumers opened for write: I detach and destroy consumer 476139144Spjd * after retaste event is sent. 477139144Spjd */ 478139144Spjd g_post_event(g_raid3_destroy_consumer, cp, M_WAITOK, NULL); 479139144Spjd return; 480139144Spjd } 481139144Spjd G_RAID3_DEBUG(1, "Consumer %s destroyed.", pp->name); 482133808Spjd g_detach(cp); 483133808Spjd g_destroy_consumer(cp); 484133808Spjd} 485133808Spjd 486133808Spjdstatic int 487133808Spjdg_raid3_connect_disk(struct g_raid3_disk *disk, struct g_provider *pp) 488133808Spjd{ 489144144Spjd struct g_consumer *cp; 490133808Spjd int error; 491133808Spjd 492156612Spjd g_topology_assert_not(); 493133808Spjd KASSERT(disk->d_consumer == NULL, 494133808Spjd ("Disk already connected (device %s).", disk->d_softc->sc_name)); 495133808Spjd 496156612Spjd g_topology_lock(); 497144144Spjd cp = g_new_consumer(disk->d_softc->sc_geom); 498144144Spjd error = g_attach(cp, pp); 499144144Spjd if (error != 0) { 500144144Spjd g_destroy_consumer(cp); 501156612Spjd g_topology_unlock(); 502133808Spjd return (error); 503144144Spjd } 504144144Spjd error = g_access(cp, 1, 1, 1); 505156612Spjd g_topology_unlock(); 506139144Spjd if (error != 0) { 507144144Spjd g_detach(cp); 508144144Spjd g_destroy_consumer(cp); 509139144Spjd G_RAID3_DEBUG(0, "Cannot open consumer %s (error=%d).", 510139144Spjd pp->name, error); 511139144Spjd return (error); 512139144Spjd } 513144144Spjd disk->d_consumer = cp; 514144144Spjd disk->d_consumer->private = disk; 515144144Spjd disk->d_consumer->index = 0; 516133808Spjd G_RAID3_DEBUG(2, "Disk %s connected.", g_raid3_get_diskname(disk)); 517133808Spjd return (0); 518133808Spjd} 519133808Spjd 520133808Spjdstatic void 521133808Spjdg_raid3_disconnect_consumer(struct g_raid3_softc *sc, struct g_consumer *cp) 522133808Spjd{ 523133808Spjd 524133808Spjd g_topology_assert(); 525133808Spjd 526133808Spjd if (cp == NULL) 527133808Spjd return; 528139144Spjd if (cp->provider != NULL) 529133808Spjd g_raid3_kill_consumer(sc, cp); 530139144Spjd else 531133808Spjd g_destroy_consumer(cp); 532133808Spjd} 533133808Spjd 534133808Spjd/* 535133808Spjd * Initialize disk. This means allocate memory, create consumer, attach it 536133808Spjd * to the provider and open access (r1w1e1) to it. 537133808Spjd */ 538133808Spjdstatic struct g_raid3_disk * 539133808Spjdg_raid3_init_disk(struct g_raid3_softc *sc, struct g_provider *pp, 540133808Spjd struct g_raid3_metadata *md, int *errorp) 541133808Spjd{ 542133808Spjd struct g_raid3_disk *disk; 543133808Spjd int error; 544133808Spjd 545133808Spjd disk = &sc->sc_disks[md->md_no]; 546133808Spjd error = g_raid3_connect_disk(disk, pp); 547144144Spjd if (error != 0) { 548144144Spjd if (errorp != NULL) 549144144Spjd *errorp = error; 550144144Spjd return (NULL); 551144144Spjd } 552133808Spjd disk->d_state = G_RAID3_DISK_STATE_NONE; 553133808Spjd disk->d_flags = md->md_dflags; 554133808Spjd if (md->md_provider[0] != '\0') 555133808Spjd disk->d_flags |= G_RAID3_DISK_FLAG_HARDCODED; 556133808Spjd disk->d_sync.ds_consumer = NULL; 557133808Spjd disk->d_sync.ds_offset = md->md_sync_offset; 558133808Spjd disk->d_sync.ds_offset_done = md->md_sync_offset; 559139295Spjd disk->d_genid = md->md_genid; 560133808Spjd disk->d_sync.ds_syncid = md->md_syncid; 561133808Spjd if (errorp != NULL) 562133808Spjd *errorp = 0; 563133808Spjd return (disk); 564133808Spjd} 565133808Spjd 566133808Spjdstatic void 567133808Spjdg_raid3_destroy_disk(struct g_raid3_disk *disk) 568133808Spjd{ 569133808Spjd struct g_raid3_softc *sc; 570133808Spjd 571156612Spjd g_topology_assert_not(); 572156612Spjd sc = disk->d_softc; 573156612Spjd sx_assert(&sc->sc_lock, SX_XLOCKED); 574133808Spjd 575133808Spjd if (disk->d_state == G_RAID3_DISK_STATE_NODISK) 576133808Spjd return; 577133808Spjd g_raid3_event_cancel(disk); 578133808Spjd switch (disk->d_state) { 579133808Spjd case G_RAID3_DISK_STATE_SYNCHRONIZING: 580133808Spjd if (sc->sc_syncdisk != NULL) 581133808Spjd g_raid3_sync_stop(sc, 1); 582133808Spjd /* FALLTHROUGH */ 583133808Spjd case G_RAID3_DISK_STATE_NEW: 584133808Spjd case G_RAID3_DISK_STATE_STALE: 585133808Spjd case G_RAID3_DISK_STATE_ACTIVE: 586156612Spjd g_topology_lock(); 587133808Spjd g_raid3_disconnect_consumer(sc, disk->d_consumer); 588156612Spjd g_topology_unlock(); 589133808Spjd disk->d_consumer = NULL; 590133808Spjd break; 591133808Spjd default: 592133808Spjd KASSERT(0 == 1, ("Wrong disk state (%s, %s).", 593133808Spjd g_raid3_get_diskname(disk), 594133808Spjd g_raid3_disk_state2str(disk->d_state))); 595133808Spjd } 596133808Spjd disk->d_state = G_RAID3_DISK_STATE_NODISK; 597133808Spjd} 598133808Spjd 599133808Spjdstatic void 600133808Spjdg_raid3_destroy_device(struct g_raid3_softc *sc) 601133808Spjd{ 602133808Spjd struct g_raid3_event *ep; 603137257Spjd struct g_raid3_disk *disk; 604133808Spjd struct g_geom *gp; 605133808Spjd struct g_consumer *cp; 606133808Spjd u_int n; 607133808Spjd 608156612Spjd g_topology_assert_not(); 609156612Spjd sx_assert(&sc->sc_lock, SX_XLOCKED); 610133808Spjd 611133808Spjd gp = sc->sc_geom; 612133808Spjd if (sc->sc_provider != NULL) 613133808Spjd g_raid3_destroy_provider(sc); 614137257Spjd for (n = 0; n < sc->sc_ndisks; n++) { 615137257Spjd disk = &sc->sc_disks[n]; 616139144Spjd if (disk->d_state != G_RAID3_DISK_STATE_NODISK) { 617139144Spjd disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY; 618139144Spjd g_raid3_update_metadata(disk); 619139144Spjd g_raid3_destroy_disk(disk); 620139144Spjd } 621137257Spjd } 622133808Spjd while ((ep = g_raid3_event_get(sc)) != NULL) { 623139144Spjd g_raid3_event_remove(sc, ep); 624133808Spjd if ((ep->e_flags & G_RAID3_EVENT_DONTWAIT) != 0) 625133808Spjd g_raid3_event_free(ep); 626133808Spjd else { 627133808Spjd ep->e_error = ECANCELED; 628133808Spjd ep->e_flags |= G_RAID3_EVENT_DONE; 629133808Spjd G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, ep); 630133808Spjd mtx_lock(&sc->sc_events_mtx); 631133808Spjd wakeup(ep); 632133808Spjd mtx_unlock(&sc->sc_events_mtx); 633133808Spjd } 634133808Spjd } 635133808Spjd callout_drain(&sc->sc_callout); 636133808Spjd cp = LIST_FIRST(&sc->sc_sync.ds_geom->consumer); 637156612Spjd g_topology_lock(); 638133808Spjd if (cp != NULL) 639133808Spjd g_raid3_disconnect_consumer(sc, cp); 640133808Spjd g_wither_geom(sc->sc_sync.ds_geom, ENXIO); 641156612Spjd G_RAID3_DEBUG(0, "Device %s destroyed.", gp->name); 642156612Spjd g_wither_geom(gp, ENXIO); 643156612Spjd g_topology_unlock(); 644160203Spjd if (!g_raid3_use_malloc) { 645160203Spjd uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_64K].sz_zone); 646160203Spjd uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_16K].sz_zone); 647160203Spjd uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_4K].sz_zone); 648160203Spjd } 649133808Spjd mtx_destroy(&sc->sc_queue_mtx); 650133808Spjd mtx_destroy(&sc->sc_events_mtx); 651156612Spjd sx_xunlock(&sc->sc_lock); 652156612Spjd sx_destroy(&sc->sc_lock); 653133808Spjd} 654133808Spjd 655133808Spjdstatic void 656133808Spjdg_raid3_orphan(struct g_consumer *cp) 657133808Spjd{ 658133808Spjd struct g_raid3_disk *disk; 659133808Spjd 660133808Spjd g_topology_assert(); 661133808Spjd 662133808Spjd disk = cp->private; 663133808Spjd if (disk == NULL) 664133808Spjd return; 665139671Spjd disk->d_softc->sc_bump_id = G_RAID3_BUMP_SYNCID; 666133808Spjd g_raid3_event_send(disk, G_RAID3_DISK_STATE_DISCONNECTED, 667133808Spjd G_RAID3_EVENT_DONTWAIT); 668133808Spjd} 669133808Spjd 670133808Spjdstatic int 671133808Spjdg_raid3_write_metadata(struct g_raid3_disk *disk, struct g_raid3_metadata *md) 672133808Spjd{ 673133808Spjd struct g_raid3_softc *sc; 674133808Spjd struct g_consumer *cp; 675133808Spjd off_t offset, length; 676133808Spjd u_char *sector; 677139144Spjd int error = 0; 678133808Spjd 679156612Spjd g_topology_assert_not(); 680156612Spjd sc = disk->d_softc; 681156612Spjd sx_assert(&sc->sc_lock, SX_LOCKED); 682133808Spjd 683133808Spjd cp = disk->d_consumer; 684133808Spjd KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name)); 685133808Spjd KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name)); 686156612Spjd KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 687139144Spjd ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr, 688139144Spjd cp->acw, cp->ace)); 689133808Spjd length = cp->provider->sectorsize; 690133808Spjd offset = cp->provider->mediasize - length; 691133808Spjd sector = malloc((size_t)length, M_RAID3, M_WAITOK | M_ZERO); 692139144Spjd if (md != NULL) 693139144Spjd raid3_metadata_encode(md, sector); 694139144Spjd error = g_write_data(cp, offset, sector, length); 695133808Spjd free(sector, M_RAID3); 696133808Spjd if (error != 0) { 697162832Spjd if ((disk->d_flags & G_RAID3_DISK_FLAG_BROKEN) == 0) { 698162832Spjd G_RAID3_DEBUG(0, "Cannot write metadata on %s " 699162832Spjd "(device=%s, error=%d).", 700162832Spjd g_raid3_get_diskname(disk), sc->sc_name, error); 701162832Spjd disk->d_flags |= G_RAID3_DISK_FLAG_BROKEN; 702162832Spjd } else { 703162832Spjd G_RAID3_DEBUG(1, "Cannot write metadata on %s " 704162832Spjd "(device=%s, error=%d).", 705162832Spjd g_raid3_get_diskname(disk), sc->sc_name, error); 706162832Spjd } 707162832Spjd if (g_raid3_disconnect_on_failure && 708155546Spjd sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) { 709162832Spjd sc->sc_bump_id |= G_RAID3_BUMP_GENID; 710162832Spjd g_raid3_event_send(disk, 711162832Spjd G_RAID3_DISK_STATE_DISCONNECTED, 712162832Spjd G_RAID3_EVENT_DONTWAIT); 713162832Spjd } 714133808Spjd } 715133808Spjd return (error); 716133808Spjd} 717133808Spjd 718133808Spjdint 719133808Spjdg_raid3_clear_metadata(struct g_raid3_disk *disk) 720133808Spjd{ 721133808Spjd int error; 722133808Spjd 723156612Spjd g_topology_assert_not(); 724156612Spjd sx_assert(&disk->d_softc->sc_lock, SX_LOCKED); 725156612Spjd 726133808Spjd error = g_raid3_write_metadata(disk, NULL); 727133808Spjd if (error == 0) { 728133808Spjd G_RAID3_DEBUG(2, "Metadata on %s cleared.", 729133808Spjd g_raid3_get_diskname(disk)); 730133808Spjd } else { 731133808Spjd G_RAID3_DEBUG(0, 732133808Spjd "Cannot clear metadata on disk %s (error=%d).", 733133808Spjd g_raid3_get_diskname(disk), error); 734133808Spjd } 735133808Spjd return (error); 736133808Spjd} 737133808Spjd 738133808Spjdvoid 739133808Spjdg_raid3_fill_metadata(struct g_raid3_disk *disk, struct g_raid3_metadata *md) 740133808Spjd{ 741133808Spjd struct g_raid3_softc *sc; 742142727Spjd struct g_provider *pp; 743133808Spjd 744133808Spjd sc = disk->d_softc; 745133808Spjd strlcpy(md->md_magic, G_RAID3_MAGIC, sizeof(md->md_magic)); 746133808Spjd md->md_version = G_RAID3_VERSION; 747133808Spjd strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name)); 748133808Spjd md->md_id = sc->sc_id; 749133808Spjd md->md_all = sc->sc_ndisks; 750139295Spjd md->md_genid = sc->sc_genid; 751133808Spjd md->md_mediasize = sc->sc_mediasize; 752133808Spjd md->md_sectorsize = sc->sc_sectorsize; 753133808Spjd md->md_mflags = (sc->sc_flags & G_RAID3_DEVICE_FLAG_MASK); 754133808Spjd md->md_no = disk->d_no; 755133808Spjd md->md_syncid = disk->d_sync.ds_syncid; 756133808Spjd md->md_dflags = (disk->d_flags & G_RAID3_DISK_FLAG_MASK); 757157838Spjd if (disk->d_state != G_RAID3_DISK_STATE_SYNCHRONIZING) 758133808Spjd md->md_sync_offset = 0; 759157838Spjd else { 760157838Spjd md->md_sync_offset = 761157838Spjd disk->d_sync.ds_offset_done / (sc->sc_ndisks - 1); 762157838Spjd } 763142727Spjd if (disk->d_consumer != NULL && disk->d_consumer->provider != NULL) 764142727Spjd pp = disk->d_consumer->provider; 765142727Spjd else 766142727Spjd pp = NULL; 767142727Spjd if ((disk->d_flags & G_RAID3_DISK_FLAG_HARDCODED) != 0 && pp != NULL) 768142727Spjd strlcpy(md->md_provider, pp->name, sizeof(md->md_provider)); 769142727Spjd else 770133808Spjd bzero(md->md_provider, sizeof(md->md_provider)); 771142727Spjd if (pp != NULL) 772142727Spjd md->md_provsize = pp->mediasize; 773142727Spjd else 774142727Spjd md->md_provsize = 0; 775133808Spjd} 776133808Spjd 777133808Spjdvoid 778133808Spjdg_raid3_update_metadata(struct g_raid3_disk *disk) 779133808Spjd{ 780156612Spjd struct g_raid3_softc *sc; 781133808Spjd struct g_raid3_metadata md; 782133808Spjd int error; 783133808Spjd 784156612Spjd g_topology_assert_not(); 785156612Spjd sc = disk->d_softc; 786156612Spjd sx_assert(&sc->sc_lock, SX_LOCKED); 787156612Spjd 788133808Spjd g_raid3_fill_metadata(disk, &md); 789133808Spjd error = g_raid3_write_metadata(disk, &md); 790133808Spjd if (error == 0) { 791133808Spjd G_RAID3_DEBUG(2, "Metadata on %s updated.", 792133808Spjd g_raid3_get_diskname(disk)); 793133808Spjd } else { 794133808Spjd G_RAID3_DEBUG(0, 795133808Spjd "Cannot update metadata on disk %s (error=%d).", 796133808Spjd g_raid3_get_diskname(disk), error); 797133808Spjd } 798133808Spjd} 799133808Spjd 800133808Spjdstatic void 801139144Spjdg_raid3_bump_syncid(struct g_raid3_softc *sc) 802133808Spjd{ 803133808Spjd struct g_raid3_disk *disk; 804133808Spjd u_int n; 805133808Spjd 806156612Spjd g_topology_assert_not(); 807156612Spjd sx_assert(&sc->sc_lock, SX_XLOCKED); 808133808Spjd KASSERT(g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) > 0, 809133808Spjd ("%s called with no active disks (device=%s).", __func__, 810133808Spjd sc->sc_name)); 811133808Spjd 812133808Spjd sc->sc_syncid++; 813139295Spjd G_RAID3_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name, 814139295Spjd sc->sc_syncid); 815133808Spjd for (n = 0; n < sc->sc_ndisks; n++) { 816133808Spjd disk = &sc->sc_disks[n]; 817133808Spjd if (disk->d_state == G_RAID3_DISK_STATE_ACTIVE || 818133808Spjd disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) { 819133808Spjd disk->d_sync.ds_syncid = sc->sc_syncid; 820133808Spjd g_raid3_update_metadata(disk); 821133808Spjd } 822133808Spjd } 823133808Spjd} 824133808Spjd 825137258Spjdstatic void 826139295Spjdg_raid3_bump_genid(struct g_raid3_softc *sc) 827139295Spjd{ 828139295Spjd struct g_raid3_disk *disk; 829139295Spjd u_int n; 830139295Spjd 831156612Spjd g_topology_assert_not(); 832156612Spjd sx_assert(&sc->sc_lock, SX_XLOCKED); 833139295Spjd KASSERT(g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) > 0, 834139295Spjd ("%s called with no active disks (device=%s).", __func__, 835139295Spjd sc->sc_name)); 836139295Spjd 837139295Spjd sc->sc_genid++; 838139295Spjd G_RAID3_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name, 839139295Spjd sc->sc_genid); 840139295Spjd for (n = 0; n < sc->sc_ndisks; n++) { 841139295Spjd disk = &sc->sc_disks[n]; 842139295Spjd if (disk->d_state == G_RAID3_DISK_STATE_ACTIVE || 843139295Spjd disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) { 844139295Spjd disk->d_genid = sc->sc_genid; 845139295Spjd g_raid3_update_metadata(disk); 846139295Spjd } 847139295Spjd } 848139295Spjd} 849139295Spjd 850155540Spjdstatic int 851156612Spjdg_raid3_idle(struct g_raid3_softc *sc, int acw) 852137258Spjd{ 853137258Spjd struct g_raid3_disk *disk; 854137258Spjd u_int i; 855155540Spjd int timeout; 856137258Spjd 857156612Spjd g_topology_assert_not(); 858156612Spjd sx_assert(&sc->sc_lock, SX_XLOCKED); 859156612Spjd 860155540Spjd if (sc->sc_provider == NULL) 861155540Spjd return (0); 862163888Spjd if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) != 0) 863163888Spjd return (0); 864155540Spjd if (sc->sc_idle) 865155540Spjd return (0); 866155540Spjd if (sc->sc_writes > 0) 867155540Spjd return (0); 868156612Spjd if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) { 869155581Spjd timeout = g_raid3_idletime - (time_uptime - sc->sc_last_write); 870245444Smav if (!g_raid3_shutdown && timeout > 0) 871155540Spjd return (timeout); 872155540Spjd } 873137258Spjd sc->sc_idle = 1; 874137258Spjd for (i = 0; i < sc->sc_ndisks; i++) { 875137258Spjd disk = &sc->sc_disks[i]; 876137258Spjd if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) 877137258Spjd continue; 878137258Spjd G_RAID3_DEBUG(1, "Disk %s (device %s) marked as clean.", 879137258Spjd g_raid3_get_diskname(disk), sc->sc_name); 880137258Spjd disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY; 881137258Spjd g_raid3_update_metadata(disk); 882137258Spjd } 883155540Spjd return (0); 884137258Spjd} 885137258Spjd 886137258Spjdstatic void 887137258Spjdg_raid3_unidle(struct g_raid3_softc *sc) 888137258Spjd{ 889137258Spjd struct g_raid3_disk *disk; 890137258Spjd u_int i; 891137258Spjd 892156612Spjd g_topology_assert_not(); 893156612Spjd sx_assert(&sc->sc_lock, SX_XLOCKED); 894156612Spjd 895163888Spjd if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) != 0) 896163888Spjd return; 897137258Spjd sc->sc_idle = 0; 898155581Spjd sc->sc_last_write = time_uptime; 899137258Spjd for (i = 0; i < sc->sc_ndisks; i++) { 900137258Spjd disk = &sc->sc_disks[i]; 901137258Spjd if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) 902137258Spjd continue; 903137258Spjd G_RAID3_DEBUG(1, "Disk %s (device %s) marked as dirty.", 904137258Spjd g_raid3_get_diskname(disk), sc->sc_name); 905137258Spjd disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY; 906137258Spjd g_raid3_update_metadata(disk); 907137258Spjd } 908137258Spjd} 909137258Spjd 910155174Spjd/* 911133808Spjd * Treat bio_driver1 field in parent bio as list head and field bio_caller1 912133808Spjd * in child bio as pointer to the next element on the list. 913133808Spjd */ 914133808Spjd#define G_RAID3_HEAD_BIO(pbp) (pbp)->bio_driver1 915133808Spjd 916133808Spjd#define G_RAID3_NEXT_BIO(cbp) (cbp)->bio_caller1 917133808Spjd 918133808Spjd#define G_RAID3_FOREACH_BIO(pbp, bp) \ 919133808Spjd for ((bp) = G_RAID3_HEAD_BIO(pbp); (bp) != NULL; \ 920133808Spjd (bp) = G_RAID3_NEXT_BIO(bp)) 921133808Spjd 922133808Spjd#define G_RAID3_FOREACH_SAFE_BIO(pbp, bp, tmpbp) \ 923133808Spjd for ((bp) = G_RAID3_HEAD_BIO(pbp); \ 924133808Spjd (bp) != NULL && ((tmpbp) = G_RAID3_NEXT_BIO(bp), 1); \ 925133808Spjd (bp) = (tmpbp)) 926133808Spjd 927133808Spjdstatic void 928133808Spjdg_raid3_init_bio(struct bio *pbp) 929133808Spjd{ 930133808Spjd 931133808Spjd G_RAID3_HEAD_BIO(pbp) = NULL; 932133808Spjd} 933133808Spjd 934133808Spjdstatic void 935134168Spjdg_raid3_remove_bio(struct bio *cbp) 936134168Spjd{ 937134168Spjd struct bio *pbp, *bp; 938134168Spjd 939134168Spjd pbp = cbp->bio_parent; 940134168Spjd if (G_RAID3_HEAD_BIO(pbp) == cbp) 941134168Spjd G_RAID3_HEAD_BIO(pbp) = G_RAID3_NEXT_BIO(cbp); 942134168Spjd else { 943134168Spjd G_RAID3_FOREACH_BIO(pbp, bp) { 944134168Spjd if (G_RAID3_NEXT_BIO(bp) == cbp) { 945134168Spjd G_RAID3_NEXT_BIO(bp) = G_RAID3_NEXT_BIO(cbp); 946134168Spjd break; 947134168Spjd } 948134168Spjd } 949134168Spjd } 950134168Spjd G_RAID3_NEXT_BIO(cbp) = NULL; 951134168Spjd} 952134168Spjd 953134168Spjdstatic void 954134168Spjdg_raid3_replace_bio(struct bio *sbp, struct bio *dbp) 955134168Spjd{ 956134168Spjd struct bio *pbp, *bp; 957134168Spjd 958134168Spjd g_raid3_remove_bio(sbp); 959134168Spjd pbp = dbp->bio_parent; 960134168Spjd G_RAID3_NEXT_BIO(sbp) = G_RAID3_NEXT_BIO(dbp); 961134168Spjd if (G_RAID3_HEAD_BIO(pbp) == dbp) 962134168Spjd G_RAID3_HEAD_BIO(pbp) = sbp; 963134168Spjd else { 964134168Spjd G_RAID3_FOREACH_BIO(pbp, bp) { 965134168Spjd if (G_RAID3_NEXT_BIO(bp) == dbp) { 966134168Spjd G_RAID3_NEXT_BIO(bp) = sbp; 967134168Spjd break; 968134168Spjd } 969134168Spjd } 970134168Spjd } 971134168Spjd G_RAID3_NEXT_BIO(dbp) = NULL; 972134168Spjd} 973134168Spjd 974134168Spjdstatic void 975133808Spjdg_raid3_destroy_bio(struct g_raid3_softc *sc, struct bio *cbp) 976133808Spjd{ 977133808Spjd struct bio *bp, *pbp; 978133808Spjd size_t size; 979133808Spjd 980133808Spjd pbp = cbp->bio_parent; 981133808Spjd pbp->bio_children--; 982133808Spjd KASSERT(cbp->bio_data != NULL, ("NULL bio_data")); 983133808Spjd size = pbp->bio_length / (sc->sc_ndisks - 1); 984160203Spjd g_raid3_free(sc, cbp->bio_data, size); 985133808Spjd if (G_RAID3_HEAD_BIO(pbp) == cbp) { 986133808Spjd G_RAID3_HEAD_BIO(pbp) = G_RAID3_NEXT_BIO(cbp); 987133808Spjd G_RAID3_NEXT_BIO(cbp) = NULL; 988133808Spjd g_destroy_bio(cbp); 989133808Spjd } else { 990133808Spjd G_RAID3_FOREACH_BIO(pbp, bp) { 991133808Spjd if (G_RAID3_NEXT_BIO(bp) == cbp) 992133808Spjd break; 993133808Spjd } 994134168Spjd if (bp != NULL) { 995134168Spjd KASSERT(G_RAID3_NEXT_BIO(bp) != NULL, 996134168Spjd ("NULL bp->bio_driver1")); 997134168Spjd G_RAID3_NEXT_BIO(bp) = G_RAID3_NEXT_BIO(cbp); 998134168Spjd G_RAID3_NEXT_BIO(cbp) = NULL; 999134168Spjd } 1000133808Spjd g_destroy_bio(cbp); 1001133808Spjd } 1002133808Spjd} 1003133808Spjd 1004133808Spjdstatic struct bio * 1005133808Spjdg_raid3_clone_bio(struct g_raid3_softc *sc, struct bio *pbp) 1006133808Spjd{ 1007133808Spjd struct bio *bp, *cbp; 1008133808Spjd size_t size; 1009156612Spjd int memflag; 1010133808Spjd 1011133808Spjd cbp = g_clone_bio(pbp); 1012133808Spjd if (cbp == NULL) 1013133808Spjd return (NULL); 1014133808Spjd size = pbp->bio_length / (sc->sc_ndisks - 1); 1015156612Spjd if ((pbp->bio_cflags & G_RAID3_BIO_CFLAG_REGULAR) != 0) 1016156612Spjd memflag = M_WAITOK; 1017156612Spjd else 1018156612Spjd memflag = M_NOWAIT; 1019160203Spjd cbp->bio_data = g_raid3_alloc(sc, size, memflag); 1020133808Spjd if (cbp->bio_data == NULL) { 1021133808Spjd pbp->bio_children--; 1022133808Spjd g_destroy_bio(cbp); 1023133808Spjd return (NULL); 1024133808Spjd } 1025133808Spjd G_RAID3_NEXT_BIO(cbp) = NULL; 1026133808Spjd if (G_RAID3_HEAD_BIO(pbp) == NULL) 1027133808Spjd G_RAID3_HEAD_BIO(pbp) = cbp; 1028133808Spjd else { 1029133808Spjd G_RAID3_FOREACH_BIO(pbp, bp) { 1030133808Spjd if (G_RAID3_NEXT_BIO(bp) == NULL) { 1031133808Spjd G_RAID3_NEXT_BIO(bp) = cbp; 1032133808Spjd break; 1033133808Spjd } 1034133808Spjd } 1035133808Spjd } 1036133808Spjd return (cbp); 1037133808Spjd} 1038133808Spjd 1039133808Spjdstatic void 1040133808Spjdg_raid3_scatter(struct bio *pbp) 1041133808Spjd{ 1042133808Spjd struct g_raid3_softc *sc; 1043133808Spjd struct g_raid3_disk *disk; 1044158290Spjd struct bio *bp, *cbp, *tmpbp; 1045133808Spjd off_t atom, cadd, padd, left; 1046201545Smav int first; 1047133808Spjd 1048133808Spjd sc = pbp->bio_to->geom->softc; 1049133808Spjd bp = NULL; 1050133808Spjd if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_NOPARITY) == 0) { 1051133808Spjd /* 1052133808Spjd * Find bio for which we should calculate data. 1053133808Spjd */ 1054133808Spjd G_RAID3_FOREACH_BIO(pbp, cbp) { 1055133808Spjd if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) { 1056133808Spjd bp = cbp; 1057133808Spjd break; 1058133808Spjd } 1059133808Spjd } 1060133808Spjd KASSERT(bp != NULL, ("NULL parity bio.")); 1061133808Spjd } 1062133808Spjd atom = sc->sc_sectorsize / (sc->sc_ndisks - 1); 1063133808Spjd cadd = padd = 0; 1064133808Spjd for (left = pbp->bio_length; left > 0; left -= sc->sc_sectorsize) { 1065133808Spjd G_RAID3_FOREACH_BIO(pbp, cbp) { 1066133808Spjd if (cbp == bp) 1067133808Spjd continue; 1068133808Spjd bcopy(pbp->bio_data + padd, cbp->bio_data + cadd, atom); 1069133808Spjd padd += atom; 1070133808Spjd } 1071133808Spjd cadd += atom; 1072133808Spjd } 1073133808Spjd if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_NOPARITY) == 0) { 1074133808Spjd /* 1075133808Spjd * Calculate parity. 1076133808Spjd */ 1077201545Smav first = 1; 1078133808Spjd G_RAID3_FOREACH_SAFE_BIO(pbp, cbp, tmpbp) { 1079133808Spjd if (cbp == bp) 1080133808Spjd continue; 1081201545Smav if (first) { 1082201545Smav bcopy(cbp->bio_data, bp->bio_data, 1083201545Smav bp->bio_length); 1084201545Smav first = 0; 1085201545Smav } else { 1086201545Smav g_raid3_xor(cbp->bio_data, bp->bio_data, 1087201545Smav bp->bio_length); 1088201545Smav } 1089133808Spjd if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_NODISK) != 0) 1090133808Spjd g_raid3_destroy_bio(sc, cbp); 1091133808Spjd } 1092133808Spjd } 1093158290Spjd G_RAID3_FOREACH_SAFE_BIO(pbp, cbp, tmpbp) { 1094133808Spjd struct g_consumer *cp; 1095133808Spjd 1096133808Spjd disk = cbp->bio_caller2; 1097133808Spjd cp = disk->d_consumer; 1098133808Spjd cbp->bio_to = cp->provider; 1099133808Spjd G_RAID3_LOGREQ(3, cbp, "Sending request."); 1100156612Spjd KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 1101139144Spjd ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, 1102139144Spjd cp->acr, cp->acw, cp->ace)); 1103137256Spjd cp->index++; 1104155540Spjd sc->sc_writes++; 1105133808Spjd g_io_request(cbp, cp); 1106133808Spjd } 1107133808Spjd} 1108133808Spjd 1109133808Spjdstatic void 1110133808Spjdg_raid3_gather(struct bio *pbp) 1111133808Spjd{ 1112133808Spjd struct g_raid3_softc *sc; 1113133808Spjd struct g_raid3_disk *disk; 1114134124Spjd struct bio *xbp, *fbp, *cbp; 1115133808Spjd off_t atom, cadd, padd, left; 1116133808Spjd 1117133808Spjd sc = pbp->bio_to->geom->softc; 1118134124Spjd /* 1119134124Spjd * Find bio for which we have to calculate data. 1120134124Spjd * While going through this path, check if all requests 1121134124Spjd * succeeded, if not, deny whole request. 1122134124Spjd * If we're in COMPLETE mode, we allow one request to fail, 1123134124Spjd * so if we find one, we're sending it to the parity consumer. 1124134124Spjd * If there are more failed requests, we deny whole request. 1125134124Spjd */ 1126134124Spjd xbp = fbp = NULL; 1127134124Spjd G_RAID3_FOREACH_BIO(pbp, cbp) { 1128134124Spjd if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) { 1129134124Spjd KASSERT(xbp == NULL, ("More than one parity bio.")); 1130134124Spjd xbp = cbp; 1131134124Spjd } 1132134124Spjd if (cbp->bio_error == 0) 1133134124Spjd continue; 1134133808Spjd /* 1135134124Spjd * Found failed request. 1136133808Spjd */ 1137134124Spjd if (fbp == NULL) { 1138134124Spjd if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_DEGRADED) != 0) { 1139133808Spjd /* 1140134124Spjd * We are already in degraded mode, so we can't 1141134124Spjd * accept any failures. 1142133808Spjd */ 1143134124Spjd if (pbp->bio_error == 0) 1144155544Spjd pbp->bio_error = cbp->bio_error; 1145134124Spjd } else { 1146134124Spjd fbp = cbp; 1147133808Spjd } 1148134124Spjd } else { 1149133808Spjd /* 1150134124Spjd * Next failed request, that's too many. 1151133808Spjd */ 1152134124Spjd if (pbp->bio_error == 0) 1153134124Spjd pbp->bio_error = fbp->bio_error; 1154134124Spjd } 1155155546Spjd disk = cbp->bio_caller2; 1156155546Spjd if (disk == NULL) 1157155546Spjd continue; 1158155546Spjd if ((disk->d_flags & G_RAID3_DISK_FLAG_BROKEN) == 0) { 1159155546Spjd disk->d_flags |= G_RAID3_DISK_FLAG_BROKEN; 1160155546Spjd G_RAID3_LOGREQ(0, cbp, "Request failed (error=%d).", 1161155546Spjd cbp->bio_error); 1162155546Spjd } else { 1163155546Spjd G_RAID3_LOGREQ(1, cbp, "Request failed (error=%d).", 1164155546Spjd cbp->bio_error); 1165155546Spjd } 1166155546Spjd if (g_raid3_disconnect_on_failure && 1167155546Spjd sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) { 1168155546Spjd sc->sc_bump_id |= G_RAID3_BUMP_GENID; 1169155546Spjd g_raid3_event_send(disk, 1170155546Spjd G_RAID3_DISK_STATE_DISCONNECTED, 1171155546Spjd G_RAID3_EVENT_DONTWAIT); 1172155546Spjd } 1173134124Spjd } 1174134124Spjd if (pbp->bio_error != 0) 1175134124Spjd goto finish; 1176134168Spjd if (fbp != NULL && (pbp->bio_pflags & G_RAID3_BIO_PFLAG_VERIFY) != 0) { 1177134168Spjd pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_VERIFY; 1178134168Spjd if (xbp != fbp) 1179134168Spjd g_raid3_replace_bio(xbp, fbp); 1180134168Spjd g_raid3_destroy_bio(sc, fbp); 1181134168Spjd } else if (fbp != NULL) { 1182134124Spjd struct g_consumer *cp; 1183134124Spjd 1184134124Spjd /* 1185134124Spjd * One request failed, so send the same request to 1186134124Spjd * the parity consumer. 1187134124Spjd */ 1188134124Spjd disk = pbp->bio_driver2; 1189134124Spjd if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) { 1190134124Spjd pbp->bio_error = fbp->bio_error; 1191133808Spjd goto finish; 1192133808Spjd } 1193134124Spjd pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED; 1194134124Spjd pbp->bio_inbed--; 1195134124Spjd fbp->bio_flags &= ~(BIO_DONE | BIO_ERROR); 1196134124Spjd if (disk->d_no == sc->sc_ndisks - 1) 1197134124Spjd fbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY; 1198134124Spjd fbp->bio_error = 0; 1199134124Spjd fbp->bio_completed = 0; 1200134124Spjd fbp->bio_children = 0; 1201134124Spjd fbp->bio_inbed = 0; 1202134124Spjd cp = disk->d_consumer; 1203134124Spjd fbp->bio_caller2 = disk; 1204134124Spjd fbp->bio_to = cp->provider; 1205134124Spjd G_RAID3_LOGREQ(3, fbp, "Sending request (recover)."); 1206156612Spjd KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 1207134124Spjd ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, 1208134124Spjd cp->acr, cp->acw, cp->ace)); 1209137256Spjd cp->index++; 1210134124Spjd g_io_request(fbp, cp); 1211134124Spjd return; 1212134124Spjd } 1213134124Spjd if (xbp != NULL) { 1214133808Spjd /* 1215133808Spjd * Calculate parity. 1216133808Spjd */ 1217133808Spjd G_RAID3_FOREACH_BIO(pbp, cbp) { 1218133808Spjd if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) 1219133808Spjd continue; 1220201545Smav g_raid3_xor(cbp->bio_data, xbp->bio_data, 1221134124Spjd xbp->bio_length); 1222133808Spjd } 1223134124Spjd xbp->bio_cflags &= ~G_RAID3_BIO_CFLAG_PARITY; 1224134168Spjd if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_VERIFY) != 0) { 1225134168Spjd if (!g_raid3_is_zero(xbp)) { 1226134168Spjd g_raid3_parity_mismatch++; 1227134168Spjd pbp->bio_error = EIO; 1228134168Spjd goto finish; 1229134168Spjd } 1230134168Spjd g_raid3_destroy_bio(sc, xbp); 1231134168Spjd } 1232133808Spjd } 1233133808Spjd atom = sc->sc_sectorsize / (sc->sc_ndisks - 1); 1234133808Spjd cadd = padd = 0; 1235133808Spjd for (left = pbp->bio_length; left > 0; left -= sc->sc_sectorsize) { 1236133808Spjd G_RAID3_FOREACH_BIO(pbp, cbp) { 1237133808Spjd bcopy(cbp->bio_data + cadd, pbp->bio_data + padd, atom); 1238133808Spjd pbp->bio_completed += atom; 1239133808Spjd padd += atom; 1240133808Spjd } 1241133808Spjd cadd += atom; 1242133808Spjd } 1243133808Spjdfinish: 1244133808Spjd if (pbp->bio_error == 0) 1245133808Spjd G_RAID3_LOGREQ(3, pbp, "Request finished."); 1246134303Spjd else { 1247134303Spjd if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_VERIFY) != 0) 1248134303Spjd G_RAID3_LOGREQ(1, pbp, "Verification error."); 1249134303Spjd else 1250134303Spjd G_RAID3_LOGREQ(0, pbp, "Request failed."); 1251134303Spjd } 1252134168Spjd pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_MASK; 1253133808Spjd while ((cbp = G_RAID3_HEAD_BIO(pbp)) != NULL) 1254133808Spjd g_raid3_destroy_bio(sc, cbp); 1255155906Spjd g_io_deliver(pbp, pbp->bio_error); 1256133808Spjd} 1257133808Spjd 1258133808Spjdstatic void 1259133808Spjdg_raid3_done(struct bio *bp) 1260133808Spjd{ 1261133808Spjd struct g_raid3_softc *sc; 1262133808Spjd 1263133808Spjd sc = bp->bio_from->geom->softc; 1264155174Spjd bp->bio_cflags |= G_RAID3_BIO_CFLAG_REGULAR; 1265133808Spjd G_RAID3_LOGREQ(3, bp, "Regular request done (error=%d).", bp->bio_error); 1266133808Spjd mtx_lock(&sc->sc_queue_mtx); 1267133808Spjd bioq_insert_head(&sc->sc_queue, bp); 1268201567Smav mtx_unlock(&sc->sc_queue_mtx); 1269133808Spjd wakeup(sc); 1270133808Spjd wakeup(&sc->sc_queue); 1271133808Spjd} 1272133808Spjd 1273133808Spjdstatic void 1274133808Spjdg_raid3_regular_request(struct bio *cbp) 1275133808Spjd{ 1276133808Spjd struct g_raid3_softc *sc; 1277133808Spjd struct g_raid3_disk *disk; 1278133808Spjd struct bio *pbp; 1279133808Spjd 1280133808Spjd g_topology_assert_not(); 1281133808Spjd 1282133808Spjd pbp = cbp->bio_parent; 1283133808Spjd sc = pbp->bio_to->geom->softc; 1284155540Spjd cbp->bio_from->index--; 1285155540Spjd if (cbp->bio_cmd == BIO_WRITE) 1286155540Spjd sc->sc_writes--; 1287133808Spjd disk = cbp->bio_from->private; 1288133808Spjd if (disk == NULL) { 1289133808Spjd g_topology_lock(); 1290133808Spjd g_raid3_kill_consumer(sc, cbp->bio_from); 1291133808Spjd g_topology_unlock(); 1292133808Spjd } 1293133808Spjd 1294133808Spjd G_RAID3_LOGREQ(3, cbp, "Request finished."); 1295133808Spjd pbp->bio_inbed++; 1296133808Spjd KASSERT(pbp->bio_inbed <= pbp->bio_children, 1297133808Spjd ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed, 1298133808Spjd pbp->bio_children)); 1299133808Spjd if (pbp->bio_inbed != pbp->bio_children) 1300133808Spjd return; 1301133808Spjd switch (pbp->bio_cmd) { 1302133808Spjd case BIO_READ: 1303133808Spjd g_raid3_gather(pbp); 1304133808Spjd break; 1305133808Spjd case BIO_WRITE: 1306133808Spjd case BIO_DELETE: 1307133808Spjd { 1308133808Spjd int error = 0; 1309133808Spjd 1310133808Spjd pbp->bio_completed = pbp->bio_length; 1311133808Spjd while ((cbp = G_RAID3_HEAD_BIO(pbp)) != NULL) { 1312155546Spjd if (cbp->bio_error == 0) { 1313155546Spjd g_raid3_destroy_bio(sc, cbp); 1314155546Spjd continue; 1315133808Spjd } 1316155546Spjd 1317155546Spjd if (error == 0) 1318155546Spjd error = cbp->bio_error; 1319155546Spjd else if (pbp->bio_error == 0) { 1320155546Spjd /* 1321155546Spjd * Next failed request, that's too many. 1322155546Spjd */ 1323155546Spjd pbp->bio_error = error; 1324155546Spjd } 1325155546Spjd 1326155546Spjd disk = cbp->bio_caller2; 1327155546Spjd if (disk == NULL) { 1328155546Spjd g_raid3_destroy_bio(sc, cbp); 1329155546Spjd continue; 1330155546Spjd } 1331155546Spjd 1332155546Spjd if ((disk->d_flags & G_RAID3_DISK_FLAG_BROKEN) == 0) { 1333155546Spjd disk->d_flags |= G_RAID3_DISK_FLAG_BROKEN; 1334155546Spjd G_RAID3_LOGREQ(0, cbp, 1335155546Spjd "Request failed (error=%d).", 1336155546Spjd cbp->bio_error); 1337155546Spjd } else { 1338155546Spjd G_RAID3_LOGREQ(1, cbp, 1339155546Spjd "Request failed (error=%d).", 1340155546Spjd cbp->bio_error); 1341155546Spjd } 1342155546Spjd if (g_raid3_disconnect_on_failure && 1343155546Spjd sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) { 1344155546Spjd sc->sc_bump_id |= G_RAID3_BUMP_GENID; 1345155546Spjd g_raid3_event_send(disk, 1346155546Spjd G_RAID3_DISK_STATE_DISCONNECTED, 1347155546Spjd G_RAID3_EVENT_DONTWAIT); 1348155546Spjd } 1349133808Spjd g_raid3_destroy_bio(sc, cbp); 1350133808Spjd } 1351133808Spjd if (pbp->bio_error == 0) 1352133808Spjd G_RAID3_LOGREQ(3, pbp, "Request finished."); 1353133808Spjd else 1354133808Spjd G_RAID3_LOGREQ(0, pbp, "Request failed."); 1355133808Spjd pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_DEGRADED; 1356133808Spjd pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_NOPARITY; 1357156612Spjd bioq_remove(&sc->sc_inflight, pbp); 1358156612Spjd /* Release delayed sync requests if possible. */ 1359156612Spjd g_raid3_sync_release(sc); 1360133808Spjd g_io_deliver(pbp, pbp->bio_error); 1361133808Spjd break; 1362133808Spjd } 1363133808Spjd } 1364133808Spjd} 1365133808Spjd 1366133808Spjdstatic void 1367133808Spjdg_raid3_sync_done(struct bio *bp) 1368133808Spjd{ 1369133808Spjd struct g_raid3_softc *sc; 1370133808Spjd 1371133808Spjd G_RAID3_LOGREQ(3, bp, "Synchronization request delivered."); 1372133808Spjd sc = bp->bio_from->geom->softc; 1373133808Spjd bp->bio_cflags |= G_RAID3_BIO_CFLAG_SYNC; 1374133808Spjd mtx_lock(&sc->sc_queue_mtx); 1375133808Spjd bioq_insert_head(&sc->sc_queue, bp); 1376201567Smav mtx_unlock(&sc->sc_queue_mtx); 1377133808Spjd wakeup(sc); 1378133808Spjd wakeup(&sc->sc_queue); 1379133808Spjd} 1380133808Spjd 1381133808Spjdstatic void 1382163836Spjdg_raid3_flush(struct g_raid3_softc *sc, struct bio *bp) 1383163836Spjd{ 1384163836Spjd struct bio_queue_head queue; 1385163836Spjd struct g_raid3_disk *disk; 1386163836Spjd struct g_consumer *cp; 1387163836Spjd struct bio *cbp; 1388163836Spjd u_int i; 1389163836Spjd 1390163836Spjd bioq_init(&queue); 1391163836Spjd for (i = 0; i < sc->sc_ndisks; i++) { 1392163836Spjd disk = &sc->sc_disks[i]; 1393163836Spjd if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) 1394163836Spjd continue; 1395163836Spjd cbp = g_clone_bio(bp); 1396163836Spjd if (cbp == NULL) { 1397163836Spjd for (cbp = bioq_first(&queue); cbp != NULL; 1398163836Spjd cbp = bioq_first(&queue)) { 1399163836Spjd bioq_remove(&queue, cbp); 1400163836Spjd g_destroy_bio(cbp); 1401163836Spjd } 1402163836Spjd if (bp->bio_error == 0) 1403163836Spjd bp->bio_error = ENOMEM; 1404163836Spjd g_io_deliver(bp, bp->bio_error); 1405163836Spjd return; 1406163836Spjd } 1407163836Spjd bioq_insert_tail(&queue, cbp); 1408163836Spjd cbp->bio_done = g_std_done; 1409163836Spjd cbp->bio_caller1 = disk; 1410163836Spjd cbp->bio_to = disk->d_consumer->provider; 1411163836Spjd } 1412163836Spjd for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) { 1413163836Spjd bioq_remove(&queue, cbp); 1414163836Spjd G_RAID3_LOGREQ(3, cbp, "Sending request."); 1415163836Spjd disk = cbp->bio_caller1; 1416163836Spjd cbp->bio_caller1 = NULL; 1417163836Spjd cp = disk->d_consumer; 1418163836Spjd KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 1419163836Spjd ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, 1420163836Spjd cp->acr, cp->acw, cp->ace)); 1421163836Spjd g_io_request(cbp, disk->d_consumer); 1422163836Spjd } 1423163836Spjd} 1424163836Spjd 1425163836Spjdstatic void 1426133808Spjdg_raid3_start(struct bio *bp) 1427133808Spjd{ 1428133808Spjd struct g_raid3_softc *sc; 1429133808Spjd 1430133808Spjd sc = bp->bio_to->geom->softc; 1431133808Spjd /* 1432133808Spjd * If sc == NULL or there are no valid disks, provider's error 1433133808Spjd * should be set and g_raid3_start() should not be called at all. 1434133808Spjd */ 1435133808Spjd KASSERT(sc != NULL && (sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED || 1436133808Spjd sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE), 1437133808Spjd ("Provider's error should be set (error=%d)(device=%s).", 1438133808Spjd bp->bio_to->error, bp->bio_to->name)); 1439133808Spjd G_RAID3_LOGREQ(3, bp, "Request received."); 1440133808Spjd 1441133808Spjd switch (bp->bio_cmd) { 1442133808Spjd case BIO_READ: 1443133808Spjd case BIO_WRITE: 1444133808Spjd case BIO_DELETE: 1445133808Spjd break; 1446163836Spjd case BIO_FLUSH: 1447163836Spjd g_raid3_flush(sc, bp); 1448163836Spjd return; 1449133808Spjd case BIO_GETATTR: 1450133808Spjd default: 1451133808Spjd g_io_deliver(bp, EOPNOTSUPP); 1452133808Spjd return; 1453133808Spjd } 1454133808Spjd mtx_lock(&sc->sc_queue_mtx); 1455133808Spjd bioq_insert_tail(&sc->sc_queue, bp); 1456201567Smav mtx_unlock(&sc->sc_queue_mtx); 1457133808Spjd G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, sc); 1458133808Spjd wakeup(sc); 1459133808Spjd} 1460133808Spjd 1461133808Spjd/* 1462156612Spjd * Return TRUE if the given request is colliding with a in-progress 1463156612Spjd * synchronization request. 1464133808Spjd */ 1465156612Spjdstatic int 1466156612Spjdg_raid3_sync_collision(struct g_raid3_softc *sc, struct bio *bp) 1467133808Spjd{ 1468133808Spjd struct g_raid3_disk *disk; 1469156612Spjd struct bio *sbp; 1470156612Spjd off_t rstart, rend, sstart, send; 1471156612Spjd int i; 1472133808Spjd 1473133808Spjd disk = sc->sc_syncdisk; 1474156612Spjd if (disk == NULL) 1475156612Spjd return (0); 1476156612Spjd rstart = bp->bio_offset; 1477156612Spjd rend = bp->bio_offset + bp->bio_length; 1478156612Spjd for (i = 0; i < g_raid3_syncreqs; i++) { 1479156612Spjd sbp = disk->d_sync.ds_bios[i]; 1480156612Spjd if (sbp == NULL) 1481156612Spjd continue; 1482156612Spjd sstart = sbp->bio_offset; 1483156612Spjd send = sbp->bio_length; 1484156612Spjd if (sbp->bio_cmd == BIO_WRITE) { 1485156612Spjd sstart *= sc->sc_ndisks - 1; 1486156612Spjd send *= sc->sc_ndisks - 1; 1487156612Spjd } 1488156612Spjd send += sstart; 1489156612Spjd if (rend > sstart && rstart < send) 1490156612Spjd return (1); 1491156612Spjd } 1492156612Spjd return (0); 1493156612Spjd} 1494133808Spjd 1495156612Spjd/* 1496156612Spjd * Return TRUE if the given sync request is colliding with a in-progress regular 1497156612Spjd * request. 1498156612Spjd */ 1499156612Spjdstatic int 1500156612Spjdg_raid3_regular_collision(struct g_raid3_softc *sc, struct bio *sbp) 1501156612Spjd{ 1502156612Spjd off_t rstart, rend, sstart, send; 1503156612Spjd struct bio *bp; 1504156612Spjd 1505156612Spjd if (sc->sc_syncdisk == NULL) 1506156612Spjd return (0); 1507156612Spjd sstart = sbp->bio_offset; 1508156612Spjd send = sstart + sbp->bio_length; 1509156612Spjd TAILQ_FOREACH(bp, &sc->sc_inflight.queue, bio_queue) { 1510156612Spjd rstart = bp->bio_offset; 1511156612Spjd rend = bp->bio_offset + bp->bio_length; 1512156612Spjd if (rend > sstart && rstart < send) 1513156612Spjd return (1); 1514133808Spjd } 1515156612Spjd return (0); 1516133808Spjd} 1517133808Spjd 1518156612Spjd/* 1519156612Spjd * Puts request onto delayed queue. 1520156612Spjd */ 1521133808Spjdstatic void 1522156612Spjdg_raid3_regular_delay(struct g_raid3_softc *sc, struct bio *bp) 1523156612Spjd{ 1524156612Spjd 1525163886Spjd G_RAID3_LOGREQ(2, bp, "Delaying request."); 1526163886Spjd bioq_insert_head(&sc->sc_regular_delayed, bp); 1527156612Spjd} 1528156612Spjd 1529156612Spjd/* 1530156612Spjd * Puts synchronization request onto delayed queue. 1531156612Spjd */ 1532156612Spjdstatic void 1533156612Spjdg_raid3_sync_delay(struct g_raid3_softc *sc, struct bio *bp) 1534156612Spjd{ 1535156612Spjd 1536163886Spjd G_RAID3_LOGREQ(2, bp, "Delaying synchronization request."); 1537163886Spjd bioq_insert_tail(&sc->sc_sync_delayed, bp); 1538156612Spjd} 1539156612Spjd 1540156612Spjd/* 1541156612Spjd * Releases delayed regular requests which don't collide anymore with sync 1542156612Spjd * requests. 1543156612Spjd */ 1544156612Spjdstatic void 1545156612Spjdg_raid3_regular_release(struct g_raid3_softc *sc) 1546156612Spjd{ 1547163886Spjd struct bio *bp, *bp2; 1548156612Spjd 1549163886Spjd TAILQ_FOREACH_SAFE(bp, &sc->sc_regular_delayed.queue, bio_queue, bp2) { 1550163886Spjd if (g_raid3_sync_collision(sc, bp)) 1551163886Spjd continue; 1552163886Spjd bioq_remove(&sc->sc_regular_delayed, bp); 1553163886Spjd G_RAID3_LOGREQ(2, bp, "Releasing delayed request (%p).", bp); 1554156612Spjd mtx_lock(&sc->sc_queue_mtx); 1555156612Spjd bioq_insert_head(&sc->sc_queue, bp); 1556156612Spjd#if 0 1557156612Spjd /* 1558156612Spjd * wakeup() is not needed, because this function is called from 1559156612Spjd * the worker thread. 1560156612Spjd */ 1561156612Spjd wakeup(&sc->sc_queue); 1562156612Spjd#endif 1563156612Spjd mtx_unlock(&sc->sc_queue_mtx); 1564163886Spjd } 1565156612Spjd} 1566156612Spjd 1567156612Spjd/* 1568156612Spjd * Releases delayed sync requests which don't collide anymore with regular 1569156612Spjd * requests. 1570156612Spjd */ 1571156612Spjdstatic void 1572156612Spjdg_raid3_sync_release(struct g_raid3_softc *sc) 1573156612Spjd{ 1574163886Spjd struct bio *bp, *bp2; 1575156612Spjd 1576163886Spjd TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed.queue, bio_queue, bp2) { 1577163886Spjd if (g_raid3_regular_collision(sc, bp)) 1578163886Spjd continue; 1579163886Spjd bioq_remove(&sc->sc_sync_delayed, bp); 1580163886Spjd G_RAID3_LOGREQ(2, bp, 1581163886Spjd "Releasing delayed synchronization request."); 1582163886Spjd g_io_request(bp, bp->bio_from); 1583163886Spjd } 1584156612Spjd} 1585156612Spjd 1586156612Spjd/* 1587156612Spjd * Handle synchronization requests. 1588156612Spjd * Every synchronization request is two-steps process: first, READ request is 1589156612Spjd * send to active provider and then WRITE request (with read data) to the provider 1590298808Spfg * being synchronized. When WRITE is finished, new synchronization request is 1591156612Spjd * send. 1592156612Spjd */ 1593156612Spjdstatic void 1594133808Spjdg_raid3_sync_request(struct bio *bp) 1595133808Spjd{ 1596133808Spjd struct g_raid3_softc *sc; 1597133808Spjd struct g_raid3_disk *disk; 1598133808Spjd 1599137256Spjd bp->bio_from->index--; 1600133808Spjd sc = bp->bio_from->geom->softc; 1601133808Spjd disk = bp->bio_from->private; 1602133808Spjd if (disk == NULL) { 1603156612Spjd sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */ 1604133808Spjd g_topology_lock(); 1605133808Spjd g_raid3_kill_consumer(sc, bp->bio_from); 1606133808Spjd g_topology_unlock(); 1607156612Spjd free(bp->bio_data, M_RAID3); 1608133808Spjd g_destroy_bio(bp); 1609156612Spjd sx_xlock(&sc->sc_lock); 1610133808Spjd return; 1611133808Spjd } 1612133808Spjd 1613133808Spjd /* 1614133808Spjd * Synchronization request. 1615133808Spjd */ 1616133808Spjd switch (bp->bio_cmd) { 1617133808Spjd case BIO_READ: 1618133808Spjd { 1619133808Spjd struct g_consumer *cp; 1620133808Spjd u_char *dst, *src; 1621133808Spjd off_t left; 1622133808Spjd u_int atom; 1623133808Spjd 1624133808Spjd if (bp->bio_error != 0) { 1625133808Spjd G_RAID3_LOGREQ(0, bp, 1626133808Spjd "Synchronization request failed (error=%d).", 1627133808Spjd bp->bio_error); 1628133808Spjd g_destroy_bio(bp); 1629133808Spjd return; 1630133808Spjd } 1631133808Spjd G_RAID3_LOGREQ(3, bp, "Synchronization request finished."); 1632133808Spjd atom = sc->sc_sectorsize / (sc->sc_ndisks - 1); 1633133808Spjd dst = src = bp->bio_data; 1634133808Spjd if (disk->d_no == sc->sc_ndisks - 1) { 1635133808Spjd u_int n; 1636133808Spjd 1637133808Spjd /* Parity component. */ 1638133808Spjd for (left = bp->bio_length; left > 0; 1639133808Spjd left -= sc->sc_sectorsize) { 1640133808Spjd bcopy(src, dst, atom); 1641133808Spjd src += atom; 1642133808Spjd for (n = 1; n < sc->sc_ndisks - 1; n++) { 1643201545Smav g_raid3_xor(src, dst, atom); 1644133808Spjd src += atom; 1645133808Spjd } 1646133808Spjd dst += atom; 1647133808Spjd } 1648133808Spjd } else { 1649133808Spjd /* Regular component. */ 1650133808Spjd src += atom * disk->d_no; 1651133808Spjd for (left = bp->bio_length; left > 0; 1652133808Spjd left -= sc->sc_sectorsize) { 1653133808Spjd bcopy(src, dst, atom); 1654133808Spjd src += sc->sc_sectorsize; 1655133808Spjd dst += atom; 1656133808Spjd } 1657133808Spjd } 1658156612Spjd bp->bio_driver1 = bp->bio_driver2 = NULL; 1659156612Spjd bp->bio_pflags = 0; 1660133808Spjd bp->bio_offset /= sc->sc_ndisks - 1; 1661133808Spjd bp->bio_length /= sc->sc_ndisks - 1; 1662133808Spjd bp->bio_cmd = BIO_WRITE; 1663133808Spjd bp->bio_cflags = 0; 1664133808Spjd bp->bio_children = bp->bio_inbed = 0; 1665133808Spjd cp = disk->d_consumer; 1666156612Spjd KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 1667133808Spjd ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, 1668133808Spjd cp->acr, cp->acw, cp->ace)); 1669137256Spjd cp->index++; 1670133808Spjd g_io_request(bp, cp); 1671133808Spjd return; 1672133808Spjd } 1673133808Spjd case BIO_WRITE: 1674135863Spjd { 1675135863Spjd struct g_raid3_disk_sync *sync; 1676156612Spjd off_t boffset, moffset; 1677156612Spjd void *data; 1678156612Spjd int i; 1679135863Spjd 1680133808Spjd if (bp->bio_error != 0) { 1681133808Spjd G_RAID3_LOGREQ(0, bp, 1682133808Spjd "Synchronization request failed (error=%d).", 1683133808Spjd bp->bio_error); 1684133808Spjd g_destroy_bio(bp); 1685139671Spjd sc->sc_bump_id |= G_RAID3_BUMP_GENID; 1686133808Spjd g_raid3_event_send(disk, 1687133808Spjd G_RAID3_DISK_STATE_DISCONNECTED, 1688133808Spjd G_RAID3_EVENT_DONTWAIT); 1689133808Spjd return; 1690133808Spjd } 1691133808Spjd G_RAID3_LOGREQ(3, bp, "Synchronization request finished."); 1692135863Spjd sync = &disk->d_sync; 1693156612Spjd if (sync->ds_offset == sc->sc_mediasize / (sc->sc_ndisks - 1) || 1694156612Spjd sync->ds_consumer == NULL || 1695156612Spjd (sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) != 0) { 1696156612Spjd /* Don't send more synchronization requests. */ 1697156612Spjd sync->ds_inflight--; 1698156612Spjd if (sync->ds_bios != NULL) { 1699156684Sru i = (int)(uintptr_t)bp->bio_caller1; 1700156612Spjd sync->ds_bios[i] = NULL; 1701156612Spjd } 1702156612Spjd free(bp->bio_data, M_RAID3); 1703156612Spjd g_destroy_bio(bp); 1704156612Spjd if (sync->ds_inflight > 0) 1705156612Spjd return; 1706156612Spjd if (sync->ds_consumer == NULL || 1707156612Spjd (sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) != 0) { 1708156612Spjd return; 1709156612Spjd } 1710133808Spjd /* 1711133808Spjd * Disk up-to-date, activate it. 1712133808Spjd */ 1713133808Spjd g_raid3_event_send(disk, G_RAID3_DISK_STATE_ACTIVE, 1714133808Spjd G_RAID3_EVENT_DONTWAIT); 1715133808Spjd return; 1716156612Spjd } 1717156612Spjd 1718156612Spjd /* Send next synchronization request. */ 1719156612Spjd data = bp->bio_data; 1720295707Simp g_reset_bio(bp); 1721156612Spjd bp->bio_cmd = BIO_READ; 1722156612Spjd bp->bio_offset = sync->ds_offset * (sc->sc_ndisks - 1); 1723156612Spjd bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset); 1724156612Spjd sync->ds_offset += bp->bio_length / (sc->sc_ndisks - 1); 1725156612Spjd bp->bio_done = g_raid3_sync_done; 1726156612Spjd bp->bio_data = data; 1727156612Spjd bp->bio_from = sync->ds_consumer; 1728156612Spjd bp->bio_to = sc->sc_provider; 1729156612Spjd G_RAID3_LOGREQ(3, bp, "Sending synchronization request."); 1730156612Spjd sync->ds_consumer->index++; 1731156612Spjd /* 1732156612Spjd * Delay the request if it is colliding with a regular request. 1733156612Spjd */ 1734156612Spjd if (g_raid3_regular_collision(sc, bp)) 1735156612Spjd g_raid3_sync_delay(sc, bp); 1736156612Spjd else 1737156612Spjd g_io_request(bp, sync->ds_consumer); 1738156612Spjd 1739156612Spjd /* Release delayed requests if possible. */ 1740156612Spjd g_raid3_regular_release(sc); 1741156612Spjd 1742156612Spjd /* Find the smallest offset. */ 1743156612Spjd moffset = sc->sc_mediasize; 1744156612Spjd for (i = 0; i < g_raid3_syncreqs; i++) { 1745156612Spjd bp = sync->ds_bios[i]; 1746156612Spjd boffset = bp->bio_offset; 1747156612Spjd if (bp->bio_cmd == BIO_WRITE) 1748156612Spjd boffset *= sc->sc_ndisks - 1; 1749156612Spjd if (boffset < moffset) 1750156612Spjd moffset = boffset; 1751156612Spjd } 1752156612Spjd if (sync->ds_offset_done + (MAXPHYS * 100) < moffset) { 1753156612Spjd /* Update offset_done on every 100 blocks. */ 1754156612Spjd sync->ds_offset_done = moffset; 1755133808Spjd g_raid3_update_metadata(disk); 1756133808Spjd } 1757133808Spjd return; 1758135863Spjd } 1759133808Spjd default: 1760133808Spjd KASSERT(1 == 0, ("Invalid command here: %u (device=%s)", 1761133808Spjd bp->bio_cmd, sc->sc_name)); 1762133808Spjd break; 1763133808Spjd } 1764133808Spjd} 1765133808Spjd 1766133808Spjdstatic int 1767133808Spjdg_raid3_register_request(struct bio *pbp) 1768133808Spjd{ 1769133808Spjd struct g_raid3_softc *sc; 1770133808Spjd struct g_raid3_disk *disk; 1771133808Spjd struct g_consumer *cp; 1772158290Spjd struct bio *cbp, *tmpbp; 1773133808Spjd off_t offset, length; 1774133839Sobrien u_int n, ndisks; 1775134168Spjd int round_robin, verify; 1776133808Spjd 1777133839Sobrien ndisks = 0; 1778133808Spjd sc = pbp->bio_to->geom->softc; 1779133808Spjd if ((pbp->bio_cflags & G_RAID3_BIO_CFLAG_REGSYNC) != 0 && 1780133808Spjd sc->sc_syncdisk == NULL) { 1781133808Spjd g_io_deliver(pbp, EIO); 1782133808Spjd return (0); 1783133808Spjd } 1784133808Spjd g_raid3_init_bio(pbp); 1785133808Spjd length = pbp->bio_length / (sc->sc_ndisks - 1); 1786133808Spjd offset = pbp->bio_offset / (sc->sc_ndisks - 1); 1787134168Spjd round_robin = verify = 0; 1788133808Spjd switch (pbp->bio_cmd) { 1789133808Spjd case BIO_READ: 1790134168Spjd if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_VERIFY) != 0 && 1791134168Spjd sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) { 1792134168Spjd pbp->bio_pflags |= G_RAID3_BIO_PFLAG_VERIFY; 1793134168Spjd verify = 1; 1794134168Spjd ndisks = sc->sc_ndisks; 1795134168Spjd } else { 1796134168Spjd verify = 0; 1797134168Spjd ndisks = sc->sc_ndisks - 1; 1798134168Spjd } 1799134168Spjd if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0 && 1800134168Spjd sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) { 1801134168Spjd round_robin = 1; 1802134168Spjd } else { 1803134168Spjd round_robin = 0; 1804134168Spjd } 1805134168Spjd KASSERT(!round_robin || !verify, 1806134168Spjd ("ROUND-ROBIN and VERIFY are mutually exclusive.")); 1807134124Spjd pbp->bio_driver2 = &sc->sc_disks[sc->sc_ndisks - 1]; 1808133808Spjd break; 1809133808Spjd case BIO_WRITE: 1810133808Spjd case BIO_DELETE: 1811156612Spjd /* 1812156612Spjd * Delay the request if it is colliding with a synchronization 1813156612Spjd * request. 1814156612Spjd */ 1815156612Spjd if (g_raid3_sync_collision(sc, pbp)) { 1816156612Spjd g_raid3_regular_delay(sc, pbp); 1817156612Spjd return (0); 1818156612Spjd } 1819135863Spjd 1820137258Spjd if (sc->sc_idle) 1821137258Spjd g_raid3_unidle(sc); 1822155540Spjd else 1823155581Spjd sc->sc_last_write = time_uptime; 1824137258Spjd 1825133808Spjd ndisks = sc->sc_ndisks; 1826133808Spjd break; 1827133808Spjd } 1828133808Spjd for (n = 0; n < ndisks; n++) { 1829133808Spjd disk = &sc->sc_disks[n]; 1830133808Spjd cbp = g_raid3_clone_bio(sc, pbp); 1831133808Spjd if (cbp == NULL) { 1832133808Spjd while ((cbp = G_RAID3_HEAD_BIO(pbp)) != NULL) 1833133808Spjd g_raid3_destroy_bio(sc, cbp); 1834151822Spjd /* 1835151822Spjd * To prevent deadlock, we must run back up 1836151822Spjd * with the ENOMEM for failed requests of any 1837151822Spjd * of our consumers. Our own sync requests 1838151822Spjd * can stick around, as they are finite. 1839151822Spjd */ 1840151822Spjd if ((pbp->bio_cflags & 1841151822Spjd G_RAID3_BIO_CFLAG_REGULAR) != 0) { 1842151822Spjd g_io_deliver(pbp, ENOMEM); 1843151822Spjd return (0); 1844151822Spjd } 1845133808Spjd return (ENOMEM); 1846133808Spjd } 1847133808Spjd cbp->bio_offset = offset; 1848133808Spjd cbp->bio_length = length; 1849133808Spjd cbp->bio_done = g_raid3_done; 1850133808Spjd switch (pbp->bio_cmd) { 1851133808Spjd case BIO_READ: 1852133808Spjd if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) { 1853133808Spjd /* 1854133808Spjd * Replace invalid component with the parity 1855133808Spjd * component. 1856133808Spjd */ 1857133808Spjd disk = &sc->sc_disks[sc->sc_ndisks - 1]; 1858133808Spjd cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY; 1859133808Spjd pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED; 1860134124Spjd } else if (round_robin && 1861134124Spjd disk->d_no == sc->sc_round_robin) { 1862134124Spjd /* 1863134124Spjd * In round-robin mode skip one data component 1864134124Spjd * and use parity component when reading. 1865134124Spjd */ 1866134124Spjd pbp->bio_driver2 = disk; 1867134124Spjd disk = &sc->sc_disks[sc->sc_ndisks - 1]; 1868134124Spjd cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY; 1869134124Spjd sc->sc_round_robin++; 1870134124Spjd round_robin = 0; 1871134168Spjd } else if (verify && disk->d_no == sc->sc_ndisks - 1) { 1872134168Spjd cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY; 1873133808Spjd } 1874133808Spjd break; 1875133808Spjd case BIO_WRITE: 1876133808Spjd case BIO_DELETE: 1877133808Spjd if (disk->d_state == G_RAID3_DISK_STATE_ACTIVE || 1878133808Spjd disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) { 1879133808Spjd if (n == ndisks - 1) { 1880133808Spjd /* 1881133808Spjd * Active parity component, mark it as such. 1882133808Spjd */ 1883133808Spjd cbp->bio_cflags |= 1884133808Spjd G_RAID3_BIO_CFLAG_PARITY; 1885133808Spjd } 1886133808Spjd } else { 1887133808Spjd pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED; 1888133808Spjd if (n == ndisks - 1) { 1889133808Spjd /* 1890133808Spjd * Parity component is not connected, 1891133808Spjd * so destroy its request. 1892133808Spjd */ 1893133808Spjd pbp->bio_pflags |= 1894133808Spjd G_RAID3_BIO_PFLAG_NOPARITY; 1895133808Spjd g_raid3_destroy_bio(sc, cbp); 1896133808Spjd cbp = NULL; 1897133808Spjd } else { 1898133808Spjd cbp->bio_cflags |= 1899133808Spjd G_RAID3_BIO_CFLAG_NODISK; 1900133808Spjd disk = NULL; 1901133808Spjd } 1902133808Spjd } 1903133808Spjd break; 1904133808Spjd } 1905133808Spjd if (cbp != NULL) 1906133808Spjd cbp->bio_caller2 = disk; 1907133808Spjd } 1908133808Spjd switch (pbp->bio_cmd) { 1909133808Spjd case BIO_READ: 1910134124Spjd if (round_robin) { 1911134124Spjd /* 1912134124Spjd * If we are in round-robin mode and 'round_robin' is 1913134124Spjd * still 1, it means, that we skipped parity component 1914134124Spjd * for this read and must reset sc_round_robin field. 1915134124Spjd */ 1916134124Spjd sc->sc_round_robin = 0; 1917134124Spjd } 1918158290Spjd G_RAID3_FOREACH_SAFE_BIO(pbp, cbp, tmpbp) { 1919133808Spjd disk = cbp->bio_caller2; 1920133808Spjd cp = disk->d_consumer; 1921133808Spjd cbp->bio_to = cp->provider; 1922133808Spjd G_RAID3_LOGREQ(3, cbp, "Sending request."); 1923156612Spjd KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 1924133808Spjd ("Consumer %s not opened (r%dw%de%d).", 1925133808Spjd cp->provider->name, cp->acr, cp->acw, cp->ace)); 1926137256Spjd cp->index++; 1927133808Spjd g_io_request(cbp, cp); 1928133808Spjd } 1929133808Spjd break; 1930133808Spjd case BIO_WRITE: 1931133808Spjd case BIO_DELETE: 1932133808Spjd /* 1933156612Spjd * Put request onto inflight queue, so we can check if new 1934156612Spjd * synchronization requests don't collide with it. 1935156612Spjd */ 1936156612Spjd bioq_insert_tail(&sc->sc_inflight, pbp); 1937156612Spjd 1938156612Spjd /* 1939133808Spjd * Bump syncid on first write. 1940133808Spjd */ 1941139671Spjd if ((sc->sc_bump_id & G_RAID3_BUMP_SYNCID) != 0) { 1942139295Spjd sc->sc_bump_id &= ~G_RAID3_BUMP_SYNCID; 1943139144Spjd g_raid3_bump_syncid(sc); 1944133808Spjd } 1945133808Spjd g_raid3_scatter(pbp); 1946133808Spjd break; 1947133808Spjd } 1948133808Spjd return (0); 1949133808Spjd} 1950133808Spjd 1951133808Spjdstatic int 1952133808Spjdg_raid3_can_destroy(struct g_raid3_softc *sc) 1953133808Spjd{ 1954133808Spjd struct g_geom *gp; 1955133808Spjd struct g_consumer *cp; 1956155174Spjd 1957133808Spjd g_topology_assert(); 1958133808Spjd gp = sc->sc_geom; 1959158114Spjd if (gp->softc == NULL) 1960158114Spjd return (1); 1961133808Spjd LIST_FOREACH(cp, &gp->consumer, consumer) { 1962133808Spjd if (g_raid3_is_busy(sc, cp)) 1963133808Spjd return (0); 1964133808Spjd } 1965133808Spjd gp = sc->sc_sync.ds_geom; 1966133808Spjd LIST_FOREACH(cp, &gp->consumer, consumer) { 1967133808Spjd if (g_raid3_is_busy(sc, cp)) 1968133808Spjd return (0); 1969133808Spjd } 1970133808Spjd G_RAID3_DEBUG(2, "No I/O requests for %s, it can be destroyed.", 1971133808Spjd sc->sc_name); 1972133808Spjd return (1); 1973133808Spjd} 1974155174Spjd 1975133808Spjdstatic int 1976133808Spjdg_raid3_try_destroy(struct g_raid3_softc *sc) 1977133808Spjd{ 1978155174Spjd 1979156612Spjd g_topology_assert_not(); 1980156612Spjd sx_assert(&sc->sc_lock, SX_XLOCKED); 1981156612Spjd 1982148440Spjd if (sc->sc_rootmount != NULL) { 1983148440Spjd G_RAID3_DEBUG(1, "root_mount_rel[%u] %p", __LINE__, 1984148440Spjd sc->sc_rootmount); 1985148440Spjd root_mount_rel(sc->sc_rootmount); 1986148440Spjd sc->sc_rootmount = NULL; 1987148440Spjd } 1988148440Spjd 1989139295Spjd g_topology_lock(); 1990139295Spjd if (!g_raid3_can_destroy(sc)) { 1991139295Spjd g_topology_unlock(); 1992139295Spjd return (0); 1993139295Spjd } 1994158114Spjd sc->sc_geom->softc = NULL; 1995158114Spjd sc->sc_sync.ds_geom->softc = NULL; 1996133808Spjd if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_WAIT) != 0) { 1997133808Spjd g_topology_unlock(); 1998133808Spjd G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, 1999133808Spjd &sc->sc_worker); 2000156612Spjd /* Unlock sc_lock here, as it can be destroyed after wakeup. */ 2001156612Spjd sx_xunlock(&sc->sc_lock); 2002133808Spjd wakeup(&sc->sc_worker); 2003133808Spjd sc->sc_worker = NULL; 2004133808Spjd } else { 2005156612Spjd g_topology_unlock(); 2006133808Spjd g_raid3_destroy_device(sc); 2007133808Spjd free(sc->sc_disks, M_RAID3); 2008133808Spjd free(sc, M_RAID3); 2009133808Spjd } 2010133808Spjd return (1); 2011133808Spjd} 2012133808Spjd 2013133808Spjd/* 2014133808Spjd * Worker thread. 2015133808Spjd */ 2016133808Spjdstatic void 2017133808Spjdg_raid3_worker(void *arg) 2018133808Spjd{ 2019133808Spjd struct g_raid3_softc *sc; 2020133808Spjd struct g_raid3_event *ep; 2021133808Spjd struct bio *bp; 2022155540Spjd int timeout; 2023133808Spjd 2024133808Spjd sc = arg; 2025170307Sjeff thread_lock(curthread); 2026139451Sjhb sched_prio(curthread, PRIBIO); 2027170307Sjeff thread_unlock(curthread); 2028133808Spjd 2029156612Spjd sx_xlock(&sc->sc_lock); 2030133808Spjd for (;;) { 2031133808Spjd G_RAID3_DEBUG(5, "%s: Let's see...", __func__); 2032133808Spjd /* 2033133808Spjd * First take a look at events. 2034133808Spjd * This is important to handle events before any I/O requests. 2035133808Spjd */ 2036133808Spjd ep = g_raid3_event_get(sc); 2037156612Spjd if (ep != NULL) { 2038139144Spjd g_raid3_event_remove(sc, ep); 2039133808Spjd if ((ep->e_flags & G_RAID3_EVENT_DEVICE) != 0) { 2040133808Spjd /* Update only device status. */ 2041133808Spjd G_RAID3_DEBUG(3, 2042133808Spjd "Running event for device %s.", 2043133808Spjd sc->sc_name); 2044133808Spjd ep->e_error = 0; 2045139144Spjd g_raid3_update_device(sc, 1); 2046133808Spjd } else { 2047133808Spjd /* Update disk status. */ 2048133808Spjd G_RAID3_DEBUG(3, "Running event for disk %s.", 2049133808Spjd g_raid3_get_diskname(ep->e_disk)); 2050133808Spjd ep->e_error = g_raid3_update_disk(ep->e_disk, 2051139144Spjd ep->e_state); 2052133808Spjd if (ep->e_error == 0) 2053139144Spjd g_raid3_update_device(sc, 0); 2054133808Spjd } 2055133808Spjd if ((ep->e_flags & G_RAID3_EVENT_DONTWAIT) != 0) { 2056133808Spjd KASSERT(ep->e_error == 0, 2057133808Spjd ("Error cannot be handled.")); 2058133808Spjd g_raid3_event_free(ep); 2059133808Spjd } else { 2060133808Spjd ep->e_flags |= G_RAID3_EVENT_DONE; 2061133808Spjd G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, 2062133808Spjd ep); 2063133808Spjd mtx_lock(&sc->sc_events_mtx); 2064133808Spjd wakeup(ep); 2065133808Spjd mtx_unlock(&sc->sc_events_mtx); 2066133808Spjd } 2067133808Spjd if ((sc->sc_flags & 2068133808Spjd G_RAID3_DEVICE_FLAG_DESTROY) != 0) { 2069156612Spjd if (g_raid3_try_destroy(sc)) { 2070156612Spjd curthread->td_pflags &= ~TDP_GEOM; 2071156612Spjd G_RAID3_DEBUG(1, "Thread exiting."); 2072172836Sjulian kproc_exit(0); 2073156612Spjd } 2074133808Spjd } 2075133808Spjd G_RAID3_DEBUG(5, "%s: I'm here 1.", __func__); 2076133808Spjd continue; 2077133808Spjd } 2078133808Spjd /* 2079155540Spjd * Check if we can mark array as CLEAN and if we can't take 2080155540Spjd * how much seconds should we wait. 2081155540Spjd */ 2082156612Spjd timeout = g_raid3_idle(sc, -1); 2083155540Spjd /* 2084133808Spjd * Now I/O requests. 2085133808Spjd */ 2086133808Spjd /* Get first request from the queue. */ 2087133808Spjd mtx_lock(&sc->sc_queue_mtx); 2088133808Spjd bp = bioq_first(&sc->sc_queue); 2089133808Spjd if (bp == NULL) { 2090133808Spjd if ((sc->sc_flags & 2091133808Spjd G_RAID3_DEVICE_FLAG_DESTROY) != 0) { 2092133808Spjd mtx_unlock(&sc->sc_queue_mtx); 2093156612Spjd if (g_raid3_try_destroy(sc)) { 2094156612Spjd curthread->td_pflags &= ~TDP_GEOM; 2095157134Spjd G_RAID3_DEBUG(1, "Thread exiting."); 2096172836Sjulian kproc_exit(0); 2097156612Spjd } 2098133808Spjd mtx_lock(&sc->sc_queue_mtx); 2099133808Spjd } 2100156612Spjd sx_xunlock(&sc->sc_lock); 2101158116Spjd /* 2102158116Spjd * XXX: We can miss an event here, because an event 2103158116Spjd * can be added without sx-device-lock and without 2104158116Spjd * mtx-queue-lock. Maybe I should just stop using 2105158116Spjd * dedicated mutex for events synchronization and 2106158116Spjd * stick with the queue lock? 2107158116Spjd * The event will hang here until next I/O request 2108158116Spjd * or next event is received. 2109158116Spjd */ 2110155540Spjd MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "r3:w1", 2111155540Spjd timeout * hz); 2112156612Spjd sx_xlock(&sc->sc_lock); 2113155540Spjd G_RAID3_DEBUG(5, "%s: I'm here 4.", __func__); 2114133808Spjd continue; 2115133808Spjd } 2116158117Spjdprocess: 2117133808Spjd bioq_remove(&sc->sc_queue, bp); 2118133808Spjd mtx_unlock(&sc->sc_queue_mtx); 2119133808Spjd 2120162282Spjd if (bp->bio_from->geom == sc->sc_sync.ds_geom && 2121162282Spjd (bp->bio_cflags & G_RAID3_BIO_CFLAG_SYNC) != 0) { 2122162282Spjd g_raid3_sync_request(bp); /* READ */ 2123162282Spjd } else if (bp->bio_to != sc->sc_provider) { 2124161116Spjd if ((bp->bio_cflags & G_RAID3_BIO_CFLAG_REGULAR) != 0) 2125161116Spjd g_raid3_regular_request(bp); 2126161116Spjd else if ((bp->bio_cflags & G_RAID3_BIO_CFLAG_SYNC) != 0) 2127162282Spjd g_raid3_sync_request(bp); /* WRITE */ 2128161116Spjd else { 2129161116Spjd KASSERT(0, 2130297955Simp ("Invalid request cflags=0x%hx to=%s.", 2131161116Spjd bp->bio_cflags, bp->bio_to->name)); 2132161116Spjd } 2133161116Spjd } else if (g_raid3_register_request(bp) != 0) { 2134158117Spjd mtx_lock(&sc->sc_queue_mtx); 2135158117Spjd bioq_insert_head(&sc->sc_queue, bp); 2136158117Spjd /* 2137158117Spjd * We are short in memory, let see if there are finished 2138158117Spjd * request we can free. 2139158117Spjd */ 2140158117Spjd TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) { 2141158117Spjd if (bp->bio_cflags & G_RAID3_BIO_CFLAG_REGULAR) 2142158117Spjd goto process; 2143133808Spjd } 2144158117Spjd /* 2145158117Spjd * No finished regular request, so at least keep 2146158117Spjd * synchronization running. 2147158117Spjd */ 2148158117Spjd TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) { 2149158117Spjd if (bp->bio_cflags & G_RAID3_BIO_CFLAG_SYNC) 2150158117Spjd goto process; 2151158117Spjd } 2152158117Spjd sx_xunlock(&sc->sc_lock); 2153158117Spjd MSLEEP(&sc->sc_queue, &sc->sc_queue_mtx, PRIBIO | PDROP, 2154158117Spjd "r3:lowmem", hz / 10); 2155158117Spjd sx_xlock(&sc->sc_lock); 2156133808Spjd } 2157139144Spjd G_RAID3_DEBUG(5, "%s: I'm here 9.", __func__); 2158133808Spjd } 2159133808Spjd} 2160133808Spjd 2161133808Spjdstatic void 2162155540Spjdg_raid3_update_idle(struct g_raid3_softc *sc, struct g_raid3_disk *disk) 2163133808Spjd{ 2164133808Spjd 2165156612Spjd sx_assert(&sc->sc_lock, SX_LOCKED); 2166163888Spjd if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) != 0) 2167163888Spjd return; 2168155540Spjd if (!sc->sc_idle && (disk->d_flags & G_RAID3_DISK_FLAG_DIRTY) == 0) { 2169155540Spjd G_RAID3_DEBUG(1, "Disk %s (device %s) marked as dirty.", 2170156612Spjd g_raid3_get_diskname(disk), sc->sc_name); 2171155540Spjd disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY; 2172155540Spjd } else if (sc->sc_idle && 2173155540Spjd (disk->d_flags & G_RAID3_DISK_FLAG_DIRTY) != 0) { 2174155540Spjd G_RAID3_DEBUG(1, "Disk %s (device %s) marked as clean.", 2175156612Spjd g_raid3_get_diskname(disk), sc->sc_name); 2176155540Spjd disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY; 2177133808Spjd } 2178133808Spjd} 2179133808Spjd 2180133808Spjdstatic void 2181133808Spjdg_raid3_sync_start(struct g_raid3_softc *sc) 2182133808Spjd{ 2183133808Spjd struct g_raid3_disk *disk; 2184156612Spjd struct g_consumer *cp; 2185156612Spjd struct bio *bp; 2186133808Spjd int error; 2187133808Spjd u_int n; 2188133808Spjd 2189156612Spjd g_topology_assert_not(); 2190156612Spjd sx_assert(&sc->sc_lock, SX_XLOCKED); 2191133808Spjd 2192133808Spjd KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED, 2193133808Spjd ("Device not in DEGRADED state (%s, %u).", sc->sc_name, 2194133808Spjd sc->sc_state)); 2195133808Spjd KASSERT(sc->sc_syncdisk == NULL, ("Syncdisk is not NULL (%s, %u).", 2196133808Spjd sc->sc_name, sc->sc_state)); 2197133808Spjd disk = NULL; 2198133808Spjd for (n = 0; n < sc->sc_ndisks; n++) { 2199133808Spjd if (sc->sc_disks[n].d_state != G_RAID3_DISK_STATE_SYNCHRONIZING) 2200133808Spjd continue; 2201133808Spjd disk = &sc->sc_disks[n]; 2202133808Spjd break; 2203133808Spjd } 2204133808Spjd if (disk == NULL) 2205133808Spjd return; 2206133808Spjd 2207156612Spjd sx_xunlock(&sc->sc_lock); 2208156612Spjd g_topology_lock(); 2209156612Spjd cp = g_new_consumer(sc->sc_sync.ds_geom); 2210156612Spjd error = g_attach(cp, sc->sc_provider); 2211156612Spjd KASSERT(error == 0, 2212156612Spjd ("Cannot attach to %s (error=%d).", sc->sc_name, error)); 2213156612Spjd error = g_access(cp, 1, 0, 0); 2214156612Spjd KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error)); 2215156612Spjd g_topology_unlock(); 2216156612Spjd sx_xlock(&sc->sc_lock); 2217156612Spjd 2218133808Spjd G_RAID3_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name, 2219133808Spjd g_raid3_get_diskname(disk)); 2220163888Spjd if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) == 0) 2221163888Spjd disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY; 2222133808Spjd KASSERT(disk->d_sync.ds_consumer == NULL, 2223133808Spjd ("Sync consumer already exists (device=%s, disk=%s).", 2224133808Spjd sc->sc_name, g_raid3_get_diskname(disk))); 2225156612Spjd 2226156612Spjd disk->d_sync.ds_consumer = cp; 2227133808Spjd disk->d_sync.ds_consumer->private = disk; 2228137256Spjd disk->d_sync.ds_consumer->index = 0; 2229133808Spjd sc->sc_syncdisk = disk; 2230156612Spjd 2231156612Spjd /* 2232156612Spjd * Allocate memory for synchronization bios and initialize them. 2233156612Spjd */ 2234156612Spjd disk->d_sync.ds_bios = malloc(sizeof(struct bio *) * g_raid3_syncreqs, 2235156612Spjd M_RAID3, M_WAITOK); 2236156612Spjd for (n = 0; n < g_raid3_syncreqs; n++) { 2237156612Spjd bp = g_alloc_bio(); 2238156612Spjd disk->d_sync.ds_bios[n] = bp; 2239156612Spjd bp->bio_parent = NULL; 2240156612Spjd bp->bio_cmd = BIO_READ; 2241156612Spjd bp->bio_data = malloc(MAXPHYS, M_RAID3, M_WAITOK); 2242156612Spjd bp->bio_cflags = 0; 2243156612Spjd bp->bio_offset = disk->d_sync.ds_offset * (sc->sc_ndisks - 1); 2244156612Spjd bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset); 2245156612Spjd disk->d_sync.ds_offset += bp->bio_length / (sc->sc_ndisks - 1); 2246156612Spjd bp->bio_done = g_raid3_sync_done; 2247156612Spjd bp->bio_from = disk->d_sync.ds_consumer; 2248156612Spjd bp->bio_to = sc->sc_provider; 2249156684Sru bp->bio_caller1 = (void *)(uintptr_t)n; 2250156612Spjd } 2251156612Spjd 2252156612Spjd /* Set the number of in-flight synchronization requests. */ 2253156612Spjd disk->d_sync.ds_inflight = g_raid3_syncreqs; 2254156612Spjd 2255156612Spjd /* 2256156612Spjd * Fire off first synchronization requests. 2257156612Spjd */ 2258156612Spjd for (n = 0; n < g_raid3_syncreqs; n++) { 2259156612Spjd bp = disk->d_sync.ds_bios[n]; 2260156612Spjd G_RAID3_LOGREQ(3, bp, "Sending synchronization request."); 2261156612Spjd disk->d_sync.ds_consumer->index++; 2262156612Spjd /* 2263156612Spjd * Delay the request if it is colliding with a regular request. 2264156612Spjd */ 2265156612Spjd if (g_raid3_regular_collision(sc, bp)) 2266156612Spjd g_raid3_sync_delay(sc, bp); 2267156612Spjd else 2268156612Spjd g_io_request(bp, disk->d_sync.ds_consumer); 2269156612Spjd } 2270133808Spjd} 2271133808Spjd 2272133808Spjd/* 2273133808Spjd * Stop synchronization process. 2274133808Spjd * type: 0 - synchronization finished 2275133808Spjd * 1 - synchronization stopped 2276133808Spjd */ 2277133808Spjdstatic void 2278133808Spjdg_raid3_sync_stop(struct g_raid3_softc *sc, int type) 2279133808Spjd{ 2280133808Spjd struct g_raid3_disk *disk; 2281156612Spjd struct g_consumer *cp; 2282133808Spjd 2283156612Spjd g_topology_assert_not(); 2284156612Spjd sx_assert(&sc->sc_lock, SX_LOCKED); 2285156612Spjd 2286133808Spjd KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED, 2287133808Spjd ("Device not in DEGRADED state (%s, %u).", sc->sc_name, 2288133808Spjd sc->sc_state)); 2289133808Spjd disk = sc->sc_syncdisk; 2290133808Spjd sc->sc_syncdisk = NULL; 2291133808Spjd KASSERT(disk != NULL, ("No disk was synchronized (%s).", sc->sc_name)); 2292133808Spjd KASSERT(disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING, 2293133808Spjd ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk), 2294133808Spjd g_raid3_disk_state2str(disk->d_state))); 2295133808Spjd if (disk->d_sync.ds_consumer == NULL) 2296133808Spjd return; 2297133808Spjd 2298133808Spjd if (type == 0) { 2299133808Spjd G_RAID3_DEBUG(0, "Device %s: rebuilding provider %s finished.", 2300156612Spjd sc->sc_name, g_raid3_get_diskname(disk)); 2301133808Spjd } else /* if (type == 1) */ { 2302133808Spjd G_RAID3_DEBUG(0, "Device %s: rebuilding provider %s stopped.", 2303156612Spjd sc->sc_name, g_raid3_get_diskname(disk)); 2304133808Spjd } 2305156612Spjd free(disk->d_sync.ds_bios, M_RAID3); 2306156612Spjd disk->d_sync.ds_bios = NULL; 2307156612Spjd cp = disk->d_sync.ds_consumer; 2308133808Spjd disk->d_sync.ds_consumer = NULL; 2309133808Spjd disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY; 2310156612Spjd sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */ 2311156612Spjd g_topology_lock(); 2312156612Spjd g_raid3_kill_consumer(sc, cp); 2313156612Spjd g_topology_unlock(); 2314156612Spjd sx_xlock(&sc->sc_lock); 2315133808Spjd} 2316133808Spjd 2317133808Spjdstatic void 2318133808Spjdg_raid3_launch_provider(struct g_raid3_softc *sc) 2319133808Spjd{ 2320133808Spjd struct g_provider *pp; 2321200940Smav struct g_raid3_disk *disk; 2322200940Smav int n; 2323133808Spjd 2324156612Spjd sx_assert(&sc->sc_lock, SX_LOCKED); 2325133808Spjd 2326156612Spjd g_topology_lock(); 2327133808Spjd pp = g_new_providerf(sc->sc_geom, "raid3/%s", sc->sc_name); 2328133808Spjd pp->mediasize = sc->sc_mediasize; 2329133808Spjd pp->sectorsize = sc->sc_sectorsize; 2330200940Smav pp->stripesize = 0; 2331200940Smav pp->stripeoffset = 0; 2332200940Smav for (n = 0; n < sc->sc_ndisks; n++) { 2333200940Smav disk = &sc->sc_disks[n]; 2334200940Smav if (disk->d_consumer && disk->d_consumer->provider && 2335200940Smav disk->d_consumer->provider->stripesize > pp->stripesize) { 2336200940Smav pp->stripesize = disk->d_consumer->provider->stripesize; 2337200940Smav pp->stripeoffset = disk->d_consumer->provider->stripeoffset; 2338200940Smav } 2339200940Smav } 2340200940Smav pp->stripesize *= sc->sc_ndisks - 1; 2341200940Smav pp->stripeoffset *= sc->sc_ndisks - 1; 2342133808Spjd sc->sc_provider = pp; 2343133808Spjd g_error_provider(pp, 0); 2344156612Spjd g_topology_unlock(); 2345162188Sjmg G_RAID3_DEBUG(0, "Device %s launched (%u/%u).", pp->name, 2346162188Sjmg g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE), sc->sc_ndisks); 2347162835Spjd 2348133808Spjd if (sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED) 2349133808Spjd g_raid3_sync_start(sc); 2350133808Spjd} 2351133808Spjd 2352133808Spjdstatic void 2353133808Spjdg_raid3_destroy_provider(struct g_raid3_softc *sc) 2354133808Spjd{ 2355133808Spjd struct bio *bp; 2356133808Spjd 2357156612Spjd g_topology_assert_not(); 2358133808Spjd KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).", 2359133808Spjd sc->sc_name)); 2360133808Spjd 2361156612Spjd g_topology_lock(); 2362133808Spjd g_error_provider(sc->sc_provider, ENXIO); 2363133808Spjd mtx_lock(&sc->sc_queue_mtx); 2364133808Spjd while ((bp = bioq_first(&sc->sc_queue)) != NULL) { 2365133808Spjd bioq_remove(&sc->sc_queue, bp); 2366133808Spjd g_io_deliver(bp, ENXIO); 2367133808Spjd } 2368133808Spjd mtx_unlock(&sc->sc_queue_mtx); 2369133808Spjd G_RAID3_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name, 2370133808Spjd sc->sc_provider->name); 2371306764Smav g_wither_provider(sc->sc_provider, ENXIO); 2372156612Spjd g_topology_unlock(); 2373133808Spjd sc->sc_provider = NULL; 2374133808Spjd if (sc->sc_syncdisk != NULL) 2375133808Spjd g_raid3_sync_stop(sc, 1); 2376133808Spjd} 2377133808Spjd 2378133808Spjdstatic void 2379133808Spjdg_raid3_go(void *arg) 2380133808Spjd{ 2381133808Spjd struct g_raid3_softc *sc; 2382133808Spjd 2383133808Spjd sc = arg; 2384133808Spjd G_RAID3_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name); 2385133808Spjd g_raid3_event_send(sc, 0, 2386133808Spjd G_RAID3_EVENT_DONTWAIT | G_RAID3_EVENT_DEVICE); 2387133808Spjd} 2388133808Spjd 2389133808Spjdstatic u_int 2390133808Spjdg_raid3_determine_state(struct g_raid3_disk *disk) 2391133808Spjd{ 2392133808Spjd struct g_raid3_softc *sc; 2393133808Spjd u_int state; 2394133808Spjd 2395133808Spjd sc = disk->d_softc; 2396133808Spjd if (sc->sc_syncid == disk->d_sync.ds_syncid) { 2397133808Spjd if ((disk->d_flags & 2398133808Spjd G_RAID3_DISK_FLAG_SYNCHRONIZING) == 0) { 2399133808Spjd /* Disk does not need synchronization. */ 2400133808Spjd state = G_RAID3_DISK_STATE_ACTIVE; 2401133808Spjd } else { 2402133808Spjd if ((sc->sc_flags & 2403156876Spjd G_RAID3_DEVICE_FLAG_NOAUTOSYNC) == 0 || 2404133808Spjd (disk->d_flags & 2405133808Spjd G_RAID3_DISK_FLAG_FORCE_SYNC) != 0) { 2406133808Spjd /* 2407133808Spjd * We can start synchronization from 2408133808Spjd * the stored offset. 2409133808Spjd */ 2410133808Spjd state = G_RAID3_DISK_STATE_SYNCHRONIZING; 2411133808Spjd } else { 2412133808Spjd state = G_RAID3_DISK_STATE_STALE; 2413133808Spjd } 2414133808Spjd } 2415133808Spjd } else if (disk->d_sync.ds_syncid < sc->sc_syncid) { 2416133808Spjd /* 2417133808Spjd * Reset all synchronization data for this disk, 2418133808Spjd * because if it even was synchronized, it was 2419133808Spjd * synchronized to disks with different syncid. 2420133808Spjd */ 2421133808Spjd disk->d_flags |= G_RAID3_DISK_FLAG_SYNCHRONIZING; 2422133808Spjd disk->d_sync.ds_offset = 0; 2423133808Spjd disk->d_sync.ds_offset_done = 0; 2424133808Spjd disk->d_sync.ds_syncid = sc->sc_syncid; 2425133808Spjd if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOAUTOSYNC) == 0 || 2426133808Spjd (disk->d_flags & G_RAID3_DISK_FLAG_FORCE_SYNC) != 0) { 2427133808Spjd state = G_RAID3_DISK_STATE_SYNCHRONIZING; 2428133808Spjd } else { 2429133808Spjd state = G_RAID3_DISK_STATE_STALE; 2430133808Spjd } 2431133808Spjd } else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ { 2432133808Spjd /* 2433133808Spjd * Not good, NOT GOOD! 2434133808Spjd * It means that device was started on stale disks 2435133808Spjd * and more fresh disk just arrive. 2436160895Spjd * If there were writes, device is broken, sorry. 2437133808Spjd * I think the best choice here is don't touch 2438160964Syar * this disk and inform the user loudly. 2439133808Spjd */ 2440133808Spjd G_RAID3_DEBUG(0, "Device %s was started before the freshest " 2441133808Spjd "disk (%s) arrives!! It will not be connected to the " 2442133808Spjd "running device.", sc->sc_name, 2443133808Spjd g_raid3_get_diskname(disk)); 2444133808Spjd g_raid3_destroy_disk(disk); 2445133808Spjd state = G_RAID3_DISK_STATE_NONE; 2446133808Spjd /* Return immediately, because disk was destroyed. */ 2447133808Spjd return (state); 2448133808Spjd } 2449133808Spjd G_RAID3_DEBUG(3, "State for %s disk: %s.", 2450133808Spjd g_raid3_get_diskname(disk), g_raid3_disk_state2str(state)); 2451133808Spjd return (state); 2452133808Spjd} 2453133808Spjd 2454133808Spjd/* 2455133808Spjd * Update device state. 2456133808Spjd */ 2457133808Spjdstatic void 2458139144Spjdg_raid3_update_device(struct g_raid3_softc *sc, boolean_t force) 2459133808Spjd{ 2460133808Spjd struct g_raid3_disk *disk; 2461133808Spjd u_int state; 2462133808Spjd 2463156612Spjd sx_assert(&sc->sc_lock, SX_XLOCKED); 2464133808Spjd 2465133808Spjd switch (sc->sc_state) { 2466133808Spjd case G_RAID3_DEVICE_STATE_STARTING: 2467133808Spjd { 2468139295Spjd u_int n, ndirty, ndisks, genid, syncid; 2469133808Spjd 2470133808Spjd KASSERT(sc->sc_provider == NULL, 2471133808Spjd ("Non-NULL provider in STARTING state (%s).", sc->sc_name)); 2472133808Spjd /* 2473133808Spjd * Are we ready? We are, if all disks are connected or 2474133808Spjd * one disk is missing and 'force' is true. 2475133808Spjd */ 2476133808Spjd if (g_raid3_ndisks(sc, -1) + force == sc->sc_ndisks) { 2477133808Spjd if (!force) 2478133808Spjd callout_drain(&sc->sc_callout); 2479133808Spjd } else { 2480133808Spjd if (force) { 2481133808Spjd /* 2482133808Spjd * Timeout expired, so destroy device. 2483133808Spjd */ 2484133808Spjd sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY; 2485148440Spjd G_RAID3_DEBUG(1, "root_mount_rel[%u] %p", 2486148440Spjd __LINE__, sc->sc_rootmount); 2487148440Spjd root_mount_rel(sc->sc_rootmount); 2488148440Spjd sc->sc_rootmount = NULL; 2489133808Spjd } 2490133808Spjd return; 2491133808Spjd } 2492133808Spjd 2493133808Spjd /* 2494139295Spjd * Find the biggest genid. 2495139295Spjd */ 2496139295Spjd genid = 0; 2497139295Spjd for (n = 0; n < sc->sc_ndisks; n++) { 2498139295Spjd disk = &sc->sc_disks[n]; 2499139295Spjd if (disk->d_state == G_RAID3_DISK_STATE_NODISK) 2500139295Spjd continue; 2501139295Spjd if (disk->d_genid > genid) 2502139295Spjd genid = disk->d_genid; 2503139295Spjd } 2504139295Spjd sc->sc_genid = genid; 2505139295Spjd /* 2506139295Spjd * Remove all disks without the biggest genid. 2507139295Spjd */ 2508139295Spjd for (n = 0; n < sc->sc_ndisks; n++) { 2509139295Spjd disk = &sc->sc_disks[n]; 2510139295Spjd if (disk->d_state == G_RAID3_DISK_STATE_NODISK) 2511139295Spjd continue; 2512139295Spjd if (disk->d_genid < genid) { 2513139295Spjd G_RAID3_DEBUG(0, 2514139295Spjd "Component %s (device %s) broken, skipping.", 2515139295Spjd g_raid3_get_diskname(disk), sc->sc_name); 2516139295Spjd g_raid3_destroy_disk(disk); 2517139295Spjd } 2518139295Spjd } 2519139295Spjd 2520139295Spjd /* 2521133808Spjd * There must be at least 'sc->sc_ndisks - 1' components 2522133808Spjd * with the same syncid and without SYNCHRONIZING flag. 2523133808Spjd */ 2524133808Spjd 2525133808Spjd /* 2526133808Spjd * Find the biggest syncid, number of valid components and 2527133808Spjd * number of dirty components. 2528133808Spjd */ 2529133808Spjd ndirty = ndisks = syncid = 0; 2530133808Spjd for (n = 0; n < sc->sc_ndisks; n++) { 2531133808Spjd disk = &sc->sc_disks[n]; 2532133808Spjd if (disk->d_state == G_RAID3_DISK_STATE_NODISK) 2533133808Spjd continue; 2534133808Spjd if ((disk->d_flags & G_RAID3_DISK_FLAG_DIRTY) != 0) 2535133808Spjd ndirty++; 2536133808Spjd if (disk->d_sync.ds_syncid > syncid) { 2537133808Spjd syncid = disk->d_sync.ds_syncid; 2538133808Spjd ndisks = 0; 2539133808Spjd } else if (disk->d_sync.ds_syncid < syncid) { 2540133808Spjd continue; 2541133808Spjd } 2542133808Spjd if ((disk->d_flags & 2543133808Spjd G_RAID3_DISK_FLAG_SYNCHRONIZING) != 0) { 2544133808Spjd continue; 2545133808Spjd } 2546133808Spjd ndisks++; 2547133808Spjd } 2548133808Spjd /* 2549133808Spjd * Do we have enough valid components? 2550133808Spjd */ 2551133808Spjd if (ndisks + 1 < sc->sc_ndisks) { 2552133808Spjd G_RAID3_DEBUG(0, 2553133808Spjd "Device %s is broken, too few valid components.", 2554133808Spjd sc->sc_name); 2555133808Spjd sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY; 2556133808Spjd return; 2557133808Spjd } 2558133808Spjd /* 2559133808Spjd * If there is one DIRTY component and all disks are present, 2560133808Spjd * mark it for synchronization. If there is more than one DIRTY 2561133808Spjd * component, mark parity component for synchronization. 2562133808Spjd */ 2563133808Spjd if (ndisks == sc->sc_ndisks && ndirty == 1) { 2564133808Spjd for (n = 0; n < sc->sc_ndisks; n++) { 2565133808Spjd disk = &sc->sc_disks[n]; 2566133808Spjd if ((disk->d_flags & 2567133808Spjd G_RAID3_DISK_FLAG_DIRTY) == 0) { 2568133808Spjd continue; 2569133808Spjd } 2570133808Spjd disk->d_flags |= 2571155174Spjd G_RAID3_DISK_FLAG_SYNCHRONIZING; 2572133808Spjd } 2573133808Spjd } else if (ndisks == sc->sc_ndisks && ndirty > 1) { 2574133808Spjd disk = &sc->sc_disks[sc->sc_ndisks - 1]; 2575155174Spjd disk->d_flags |= G_RAID3_DISK_FLAG_SYNCHRONIZING; 2576133808Spjd } 2577133808Spjd 2578133808Spjd sc->sc_syncid = syncid; 2579133808Spjd if (force) { 2580133808Spjd /* Remember to bump syncid on first write. */ 2581139671Spjd sc->sc_bump_id |= G_RAID3_BUMP_SYNCID; 2582133808Spjd } 2583133808Spjd if (ndisks == sc->sc_ndisks) 2584133808Spjd state = G_RAID3_DEVICE_STATE_COMPLETE; 2585133808Spjd else /* if (ndisks == sc->sc_ndisks - 1) */ 2586133808Spjd state = G_RAID3_DEVICE_STATE_DEGRADED; 2587133808Spjd G_RAID3_DEBUG(1, "Device %s state changed from %s to %s.", 2588133808Spjd sc->sc_name, g_raid3_device_state2str(sc->sc_state), 2589133808Spjd g_raid3_device_state2str(state)); 2590133808Spjd sc->sc_state = state; 2591133808Spjd for (n = 0; n < sc->sc_ndisks; n++) { 2592133808Spjd disk = &sc->sc_disks[n]; 2593133808Spjd if (disk->d_state == G_RAID3_DISK_STATE_NODISK) 2594133808Spjd continue; 2595133808Spjd state = g_raid3_determine_state(disk); 2596133808Spjd g_raid3_event_send(disk, state, G_RAID3_EVENT_DONTWAIT); 2597139295Spjd if (state == G_RAID3_DISK_STATE_STALE) 2598139671Spjd sc->sc_bump_id |= G_RAID3_BUMP_SYNCID; 2599133808Spjd } 2600133808Spjd break; 2601133808Spjd } 2602133808Spjd case G_RAID3_DEVICE_STATE_DEGRADED: 2603133808Spjd /* 2604139671Spjd * Genid need to be bumped immediately, so do it here. 2605133808Spjd */ 2606139671Spjd if ((sc->sc_bump_id & G_RAID3_BUMP_GENID) != 0) { 2607139295Spjd sc->sc_bump_id &= ~G_RAID3_BUMP_GENID; 2608139295Spjd g_raid3_bump_genid(sc); 2609139295Spjd } 2610139295Spjd 2611133808Spjd if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_NEW) > 0) 2612133808Spjd return; 2613133808Spjd if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) < 2614133808Spjd sc->sc_ndisks - 1) { 2615133808Spjd if (sc->sc_provider != NULL) 2616133808Spjd g_raid3_destroy_provider(sc); 2617133808Spjd sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY; 2618133808Spjd return; 2619133808Spjd } 2620133808Spjd if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) == 2621133808Spjd sc->sc_ndisks) { 2622133808Spjd state = G_RAID3_DEVICE_STATE_COMPLETE; 2623133808Spjd G_RAID3_DEBUG(1, 2624133808Spjd "Device %s state changed from %s to %s.", 2625133808Spjd sc->sc_name, g_raid3_device_state2str(sc->sc_state), 2626133808Spjd g_raid3_device_state2str(state)); 2627133808Spjd sc->sc_state = state; 2628133808Spjd } 2629133808Spjd if (sc->sc_provider == NULL) 2630133808Spjd g_raid3_launch_provider(sc); 2631148440Spjd if (sc->sc_rootmount != NULL) { 2632148440Spjd G_RAID3_DEBUG(1, "root_mount_rel[%u] %p", __LINE__, 2633148440Spjd sc->sc_rootmount); 2634148440Spjd root_mount_rel(sc->sc_rootmount); 2635148440Spjd sc->sc_rootmount = NULL; 2636148440Spjd } 2637133808Spjd break; 2638133808Spjd case G_RAID3_DEVICE_STATE_COMPLETE: 2639133808Spjd /* 2640139671Spjd * Genid need to be bumped immediately, so do it here. 2641133808Spjd */ 2642139671Spjd if ((sc->sc_bump_id & G_RAID3_BUMP_GENID) != 0) { 2643139295Spjd sc->sc_bump_id &= ~G_RAID3_BUMP_GENID; 2644139295Spjd g_raid3_bump_genid(sc); 2645139295Spjd } 2646139295Spjd 2647133808Spjd if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_NEW) > 0) 2648133808Spjd return; 2649133808Spjd KASSERT(g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) >= 2650133808Spjd sc->sc_ndisks - 1, 2651133808Spjd ("Too few ACTIVE components in COMPLETE state (device %s).", 2652133808Spjd sc->sc_name)); 2653133808Spjd if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) == 2654133808Spjd sc->sc_ndisks - 1) { 2655133808Spjd state = G_RAID3_DEVICE_STATE_DEGRADED; 2656133808Spjd G_RAID3_DEBUG(1, 2657133808Spjd "Device %s state changed from %s to %s.", 2658133808Spjd sc->sc_name, g_raid3_device_state2str(sc->sc_state), 2659133808Spjd g_raid3_device_state2str(state)); 2660133808Spjd sc->sc_state = state; 2661133808Spjd } 2662133808Spjd if (sc->sc_provider == NULL) 2663133808Spjd g_raid3_launch_provider(sc); 2664148440Spjd if (sc->sc_rootmount != NULL) { 2665148440Spjd G_RAID3_DEBUG(1, "root_mount_rel[%u] %p", __LINE__, 2666148440Spjd sc->sc_rootmount); 2667148440Spjd root_mount_rel(sc->sc_rootmount); 2668148440Spjd sc->sc_rootmount = NULL; 2669148440Spjd } 2670133808Spjd break; 2671133808Spjd default: 2672133808Spjd KASSERT(1 == 0, ("Wrong device state (%s, %s).", sc->sc_name, 2673133808Spjd g_raid3_device_state2str(sc->sc_state))); 2674133808Spjd break; 2675133808Spjd } 2676133808Spjd} 2677133808Spjd 2678133808Spjd/* 2679133808Spjd * Update disk state and device state if needed. 2680133808Spjd */ 2681133808Spjd#define DISK_STATE_CHANGED() G_RAID3_DEBUG(1, \ 2682133808Spjd "Disk %s state changed from %s to %s (device %s).", \ 2683133808Spjd g_raid3_get_diskname(disk), \ 2684133808Spjd g_raid3_disk_state2str(disk->d_state), \ 2685133808Spjd g_raid3_disk_state2str(state), sc->sc_name) 2686133808Spjdstatic int 2687139144Spjdg_raid3_update_disk(struct g_raid3_disk *disk, u_int state) 2688133808Spjd{ 2689133808Spjd struct g_raid3_softc *sc; 2690133808Spjd 2691156612Spjd sc = disk->d_softc; 2692156612Spjd sx_assert(&sc->sc_lock, SX_XLOCKED); 2693133808Spjd 2694133808Spjdagain: 2695133808Spjd G_RAID3_DEBUG(3, "Changing disk %s state from %s to %s.", 2696133808Spjd g_raid3_get_diskname(disk), g_raid3_disk_state2str(disk->d_state), 2697133808Spjd g_raid3_disk_state2str(state)); 2698133808Spjd switch (state) { 2699133808Spjd case G_RAID3_DISK_STATE_NEW: 2700133808Spjd /* 2701133808Spjd * Possible scenarios: 2702133808Spjd * 1. New disk arrive. 2703133808Spjd */ 2704133808Spjd /* Previous state should be NONE. */ 2705133808Spjd KASSERT(disk->d_state == G_RAID3_DISK_STATE_NONE, 2706133808Spjd ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk), 2707133808Spjd g_raid3_disk_state2str(disk->d_state))); 2708133808Spjd DISK_STATE_CHANGED(); 2709133808Spjd 2710133808Spjd disk->d_state = state; 2711162188Sjmg G_RAID3_DEBUG(1, "Device %s: provider %s detected.", 2712133808Spjd sc->sc_name, g_raid3_get_diskname(disk)); 2713133808Spjd if (sc->sc_state == G_RAID3_DEVICE_STATE_STARTING) 2714133808Spjd break; 2715133808Spjd KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED || 2716133808Spjd sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE, 2717133808Spjd ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2718133808Spjd g_raid3_device_state2str(sc->sc_state), 2719133808Spjd g_raid3_get_diskname(disk), 2720133808Spjd g_raid3_disk_state2str(disk->d_state))); 2721133808Spjd state = g_raid3_determine_state(disk); 2722133808Spjd if (state != G_RAID3_DISK_STATE_NONE) 2723133808Spjd goto again; 2724133808Spjd break; 2725133808Spjd case G_RAID3_DISK_STATE_ACTIVE: 2726133808Spjd /* 2727133808Spjd * Possible scenarios: 2728133808Spjd * 1. New disk does not need synchronization. 2729133808Spjd * 2. Synchronization process finished successfully. 2730133808Spjd */ 2731133808Spjd KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED || 2732133808Spjd sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE, 2733133808Spjd ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2734133808Spjd g_raid3_device_state2str(sc->sc_state), 2735133808Spjd g_raid3_get_diskname(disk), 2736133808Spjd g_raid3_disk_state2str(disk->d_state))); 2737133808Spjd /* Previous state should be NEW or SYNCHRONIZING. */ 2738133808Spjd KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW || 2739133808Spjd disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING, 2740133808Spjd ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk), 2741133808Spjd g_raid3_disk_state2str(disk->d_state))); 2742133808Spjd DISK_STATE_CHANGED(); 2743133808Spjd 2744155582Spjd if (disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) { 2745133808Spjd disk->d_flags &= ~G_RAID3_DISK_FLAG_SYNCHRONIZING; 2746133808Spjd disk->d_flags &= ~G_RAID3_DISK_FLAG_FORCE_SYNC; 2747133808Spjd g_raid3_sync_stop(sc, 0); 2748133808Spjd } 2749133808Spjd disk->d_state = state; 2750133808Spjd disk->d_sync.ds_offset = 0; 2751133808Spjd disk->d_sync.ds_offset_done = 0; 2752155540Spjd g_raid3_update_idle(sc, disk); 2753155582Spjd g_raid3_update_metadata(disk); 2754162188Sjmg G_RAID3_DEBUG(1, "Device %s: provider %s activated.", 2755133808Spjd sc->sc_name, g_raid3_get_diskname(disk)); 2756133808Spjd break; 2757133808Spjd case G_RAID3_DISK_STATE_STALE: 2758133808Spjd /* 2759133808Spjd * Possible scenarios: 2760133808Spjd * 1. Stale disk was connected. 2761133808Spjd */ 2762133808Spjd /* Previous state should be NEW. */ 2763133808Spjd KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW, 2764133808Spjd ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk), 2765133808Spjd g_raid3_disk_state2str(disk->d_state))); 2766133808Spjd KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED || 2767133808Spjd sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE, 2768133808Spjd ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2769133808Spjd g_raid3_device_state2str(sc->sc_state), 2770133808Spjd g_raid3_get_diskname(disk), 2771133808Spjd g_raid3_disk_state2str(disk->d_state))); 2772133808Spjd /* 2773133808Spjd * STALE state is only possible if device is marked 2774133808Spjd * NOAUTOSYNC. 2775133808Spjd */ 2776133808Spjd KASSERT((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOAUTOSYNC) != 0, 2777133808Spjd ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2778133808Spjd g_raid3_device_state2str(sc->sc_state), 2779133808Spjd g_raid3_get_diskname(disk), 2780133808Spjd g_raid3_disk_state2str(disk->d_state))); 2781133808Spjd DISK_STATE_CHANGED(); 2782133808Spjd 2783133808Spjd disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY; 2784133808Spjd disk->d_state = state; 2785133808Spjd g_raid3_update_metadata(disk); 2786133808Spjd G_RAID3_DEBUG(0, "Device %s: provider %s is stale.", 2787133808Spjd sc->sc_name, g_raid3_get_diskname(disk)); 2788133808Spjd break; 2789133808Spjd case G_RAID3_DISK_STATE_SYNCHRONIZING: 2790133808Spjd /* 2791133808Spjd * Possible scenarios: 2792133808Spjd * 1. Disk which needs synchronization was connected. 2793133808Spjd */ 2794133808Spjd /* Previous state should be NEW. */ 2795133808Spjd KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW, 2796133808Spjd ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk), 2797133808Spjd g_raid3_disk_state2str(disk->d_state))); 2798133808Spjd KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED || 2799133808Spjd sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE, 2800133808Spjd ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2801133808Spjd g_raid3_device_state2str(sc->sc_state), 2802133808Spjd g_raid3_get_diskname(disk), 2803133808Spjd g_raid3_disk_state2str(disk->d_state))); 2804133808Spjd DISK_STATE_CHANGED(); 2805133808Spjd 2806133808Spjd if (disk->d_state == G_RAID3_DISK_STATE_NEW) 2807133808Spjd disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY; 2808133808Spjd disk->d_state = state; 2809133808Spjd if (sc->sc_provider != NULL) { 2810133808Spjd g_raid3_sync_start(sc); 2811133808Spjd g_raid3_update_metadata(disk); 2812133808Spjd } 2813133808Spjd break; 2814133808Spjd case G_RAID3_DISK_STATE_DISCONNECTED: 2815133808Spjd /* 2816133808Spjd * Possible scenarios: 2817133808Spjd * 1. Device wasn't running yet, but disk disappear. 2818133808Spjd * 2. Disk was active and disapppear. 2819133808Spjd * 3. Disk disappear during synchronization process. 2820133808Spjd */ 2821133808Spjd if (sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED || 2822133808Spjd sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) { 2823133808Spjd /* 2824133808Spjd * Previous state should be ACTIVE, STALE or 2825133808Spjd * SYNCHRONIZING. 2826133808Spjd */ 2827133808Spjd KASSERT(disk->d_state == G_RAID3_DISK_STATE_ACTIVE || 2828133808Spjd disk->d_state == G_RAID3_DISK_STATE_STALE || 2829133808Spjd disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING, 2830133808Spjd ("Wrong disk state (%s, %s).", 2831133808Spjd g_raid3_get_diskname(disk), 2832133808Spjd g_raid3_disk_state2str(disk->d_state))); 2833133808Spjd } else if (sc->sc_state == G_RAID3_DEVICE_STATE_STARTING) { 2834133808Spjd /* Previous state should be NEW. */ 2835133808Spjd KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW, 2836133808Spjd ("Wrong disk state (%s, %s).", 2837133808Spjd g_raid3_get_diskname(disk), 2838133808Spjd g_raid3_disk_state2str(disk->d_state))); 2839133808Spjd /* 2840133808Spjd * Reset bumping syncid if disk disappeared in STARTING 2841133808Spjd * state. 2842133808Spjd */ 2843139671Spjd if ((sc->sc_bump_id & G_RAID3_BUMP_SYNCID) != 0) 2844139295Spjd sc->sc_bump_id &= ~G_RAID3_BUMP_SYNCID; 2845133808Spjd#ifdef INVARIANTS 2846133808Spjd } else { 2847133808Spjd KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).", 2848133808Spjd sc->sc_name, 2849133808Spjd g_raid3_device_state2str(sc->sc_state), 2850133808Spjd g_raid3_get_diskname(disk), 2851133808Spjd g_raid3_disk_state2str(disk->d_state))); 2852133808Spjd#endif 2853133808Spjd } 2854133808Spjd DISK_STATE_CHANGED(); 2855133808Spjd G_RAID3_DEBUG(0, "Device %s: provider %s disconnected.", 2856133808Spjd sc->sc_name, g_raid3_get_diskname(disk)); 2857133808Spjd 2858133808Spjd g_raid3_destroy_disk(disk); 2859133808Spjd break; 2860133808Spjd default: 2861133808Spjd KASSERT(1 == 0, ("Unknown state (%u).", state)); 2862133808Spjd break; 2863133808Spjd } 2864133808Spjd return (0); 2865133808Spjd} 2866133808Spjd#undef DISK_STATE_CHANGED 2867133808Spjd 2868139671Spjdint 2869133808Spjdg_raid3_read_metadata(struct g_consumer *cp, struct g_raid3_metadata *md) 2870133808Spjd{ 2871133808Spjd struct g_provider *pp; 2872133808Spjd u_char *buf; 2873133808Spjd int error; 2874133808Spjd 2875133808Spjd g_topology_assert(); 2876133808Spjd 2877133808Spjd error = g_access(cp, 1, 0, 0); 2878133808Spjd if (error != 0) 2879133808Spjd return (error); 2880133808Spjd pp = cp->provider; 2881133808Spjd g_topology_unlock(); 2882133808Spjd /* Metadata are stored on last sector. */ 2883133808Spjd buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize, 2884133808Spjd &error); 2885133808Spjd g_topology_lock(); 2886139144Spjd g_access(cp, -1, 0, 0); 2887152967Ssobomax if (buf == NULL) { 2888139295Spjd G_RAID3_DEBUG(1, "Cannot read metadata from %s (error=%d).", 2889139295Spjd cp->provider->name, error); 2890133808Spjd return (error); 2891133808Spjd } 2892133808Spjd 2893133808Spjd /* Decode metadata. */ 2894133808Spjd error = raid3_metadata_decode(buf, md); 2895133808Spjd g_free(buf); 2896133808Spjd if (strcmp(md->md_magic, G_RAID3_MAGIC) != 0) 2897133808Spjd return (EINVAL); 2898139295Spjd if (md->md_version > G_RAID3_VERSION) { 2899139295Spjd G_RAID3_DEBUG(0, 2900139295Spjd "Kernel module is too old to handle metadata from %s.", 2901139295Spjd cp->provider->name); 2902139295Spjd return (EINVAL); 2903139295Spjd } 2904133808Spjd if (error != 0) { 2905133808Spjd G_RAID3_DEBUG(1, "MD5 metadata hash mismatch for provider %s.", 2906133808Spjd cp->provider->name); 2907133808Spjd return (error); 2908133808Spjd } 2909217305Sae if (md->md_sectorsize > MAXPHYS) { 2910217305Sae G_RAID3_DEBUG(0, "The blocksize is too big."); 2911217305Sae return (EINVAL); 2912217305Sae } 2913133808Spjd 2914133808Spjd return (0); 2915133808Spjd} 2916133808Spjd 2917133808Spjdstatic int 2918133808Spjdg_raid3_check_metadata(struct g_raid3_softc *sc, struct g_provider *pp, 2919133808Spjd struct g_raid3_metadata *md) 2920133808Spjd{ 2921133808Spjd 2922133808Spjd if (md->md_no >= sc->sc_ndisks) { 2923133808Spjd G_RAID3_DEBUG(1, "Invalid disk %s number (no=%u), skipping.", 2924133808Spjd pp->name, md->md_no); 2925133808Spjd return (EINVAL); 2926133808Spjd } 2927133808Spjd if (sc->sc_disks[md->md_no].d_state != G_RAID3_DISK_STATE_NODISK) { 2928133808Spjd G_RAID3_DEBUG(1, "Disk %s (no=%u) already exists, skipping.", 2929133808Spjd pp->name, md->md_no); 2930133808Spjd return (EEXIST); 2931133808Spjd } 2932133808Spjd if (md->md_all != sc->sc_ndisks) { 2933133808Spjd G_RAID3_DEBUG(1, 2934133808Spjd "Invalid '%s' field on disk %s (device %s), skipping.", 2935133808Spjd "md_all", pp->name, sc->sc_name); 2936133808Spjd return (EINVAL); 2937133808Spjd } 2938163206Spjd if ((md->md_mediasize % md->md_sectorsize) != 0) { 2939163206Spjd G_RAID3_DEBUG(1, "Invalid metadata (mediasize %% sectorsize != " 2940163206Spjd "0) on disk %s (device %s), skipping.", pp->name, 2941163206Spjd sc->sc_name); 2942163206Spjd return (EINVAL); 2943163206Spjd } 2944133808Spjd if (md->md_mediasize != sc->sc_mediasize) { 2945133808Spjd G_RAID3_DEBUG(1, 2946133808Spjd "Invalid '%s' field on disk %s (device %s), skipping.", 2947133808Spjd "md_mediasize", pp->name, sc->sc_name); 2948133808Spjd return (EINVAL); 2949133808Spjd } 2950133808Spjd if ((md->md_mediasize % (sc->sc_ndisks - 1)) != 0) { 2951133808Spjd G_RAID3_DEBUG(1, 2952133808Spjd "Invalid '%s' field on disk %s (device %s), skipping.", 2953133808Spjd "md_mediasize", pp->name, sc->sc_name); 2954133808Spjd return (EINVAL); 2955133808Spjd } 2956133808Spjd if ((sc->sc_mediasize / (sc->sc_ndisks - 1)) > pp->mediasize) { 2957133808Spjd G_RAID3_DEBUG(1, 2958133808Spjd "Invalid size of disk %s (device %s), skipping.", pp->name, 2959133808Spjd sc->sc_name); 2960133808Spjd return (EINVAL); 2961133808Spjd } 2962133808Spjd if ((md->md_sectorsize / pp->sectorsize) < sc->sc_ndisks - 1) { 2963133808Spjd G_RAID3_DEBUG(1, 2964133808Spjd "Invalid '%s' field on disk %s (device %s), skipping.", 2965133808Spjd "md_sectorsize", pp->name, sc->sc_name); 2966133808Spjd return (EINVAL); 2967133808Spjd } 2968133808Spjd if (md->md_sectorsize != sc->sc_sectorsize) { 2969133808Spjd G_RAID3_DEBUG(1, 2970133808Spjd "Invalid '%s' field on disk %s (device %s), skipping.", 2971133808Spjd "md_sectorsize", pp->name, sc->sc_name); 2972133808Spjd return (EINVAL); 2973133808Spjd } 2974133808Spjd if ((sc->sc_sectorsize % pp->sectorsize) != 0) { 2975133808Spjd G_RAID3_DEBUG(1, 2976133808Spjd "Invalid sector size of disk %s (device %s), skipping.", 2977133808Spjd pp->name, sc->sc_name); 2978133808Spjd return (EINVAL); 2979133808Spjd } 2980133808Spjd if ((md->md_mflags & ~G_RAID3_DEVICE_FLAG_MASK) != 0) { 2981133808Spjd G_RAID3_DEBUG(1, 2982133808Spjd "Invalid device flags on disk %s (device %s), skipping.", 2983133808Spjd pp->name, sc->sc_name); 2984133808Spjd return (EINVAL); 2985133808Spjd } 2986134168Spjd if ((md->md_mflags & G_RAID3_DEVICE_FLAG_VERIFY) != 0 && 2987134168Spjd (md->md_mflags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0) { 2988134168Spjd /* 2989134168Spjd * VERIFY and ROUND-ROBIN options are mutally exclusive. 2990134168Spjd */ 2991134168Spjd G_RAID3_DEBUG(1, "Both VERIFY and ROUND-ROBIN flags exist on " 2992134168Spjd "disk %s (device %s), skipping.", pp->name, sc->sc_name); 2993134168Spjd return (EINVAL); 2994134168Spjd } 2995133808Spjd if ((md->md_dflags & ~G_RAID3_DISK_FLAG_MASK) != 0) { 2996133808Spjd G_RAID3_DEBUG(1, 2997133808Spjd "Invalid disk flags on disk %s (device %s), skipping.", 2998133808Spjd pp->name, sc->sc_name); 2999133808Spjd return (EINVAL); 3000133808Spjd } 3001133808Spjd return (0); 3002133808Spjd} 3003133808Spjd 3004139671Spjdint 3005133808Spjdg_raid3_add_disk(struct g_raid3_softc *sc, struct g_provider *pp, 3006133808Spjd struct g_raid3_metadata *md) 3007133808Spjd{ 3008133808Spjd struct g_raid3_disk *disk; 3009133808Spjd int error; 3010133808Spjd 3011156612Spjd g_topology_assert_not(); 3012133808Spjd G_RAID3_DEBUG(2, "Adding disk %s.", pp->name); 3013133808Spjd 3014133808Spjd error = g_raid3_check_metadata(sc, pp, md); 3015133808Spjd if (error != 0) 3016133808Spjd return (error); 3017139295Spjd if (sc->sc_state != G_RAID3_DEVICE_STATE_STARTING && 3018139295Spjd md->md_genid < sc->sc_genid) { 3019139295Spjd G_RAID3_DEBUG(0, "Component %s (device %s) broken, skipping.", 3020139295Spjd pp->name, sc->sc_name); 3021139295Spjd return (EINVAL); 3022139295Spjd } 3023133808Spjd disk = g_raid3_init_disk(sc, pp, md, &error); 3024133808Spjd if (disk == NULL) 3025133808Spjd return (error); 3026133808Spjd error = g_raid3_event_send(disk, G_RAID3_DISK_STATE_NEW, 3027133808Spjd G_RAID3_EVENT_WAIT); 3028139295Spjd if (error != 0) 3029139295Spjd return (error); 3030139295Spjd if (md->md_version < G_RAID3_VERSION) { 3031139295Spjd G_RAID3_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).", 3032139295Spjd pp->name, md->md_version, G_RAID3_VERSION); 3033139295Spjd g_raid3_update_metadata(disk); 3034139295Spjd } 3035139295Spjd return (0); 3036133808Spjd} 3037133808Spjd 3038157630Spjdstatic void 3039157630Spjdg_raid3_destroy_delayed(void *arg, int flag) 3040157630Spjd{ 3041157630Spjd struct g_raid3_softc *sc; 3042157630Spjd int error; 3043157630Spjd 3044157630Spjd if (flag == EV_CANCEL) { 3045157630Spjd G_RAID3_DEBUG(1, "Destroying canceled."); 3046157630Spjd return; 3047157630Spjd } 3048157630Spjd sc = arg; 3049157630Spjd g_topology_unlock(); 3050157630Spjd sx_xlock(&sc->sc_lock); 3051157630Spjd KASSERT((sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) == 0, 3052157630Spjd ("DESTROY flag set on %s.", sc->sc_name)); 3053157630Spjd KASSERT((sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROYING) != 0, 3054157630Spjd ("DESTROYING flag not set on %s.", sc->sc_name)); 3055157630Spjd G_RAID3_DEBUG(0, "Destroying %s (delayed).", sc->sc_name); 3056157630Spjd error = g_raid3_destroy(sc, G_RAID3_DESTROY_SOFT); 3057157630Spjd if (error != 0) { 3058157630Spjd G_RAID3_DEBUG(0, "Cannot destroy %s.", sc->sc_name); 3059157630Spjd sx_xunlock(&sc->sc_lock); 3060157630Spjd } 3061157630Spjd g_topology_lock(); 3062157630Spjd} 3063157630Spjd 3064133808Spjdstatic int 3065133808Spjdg_raid3_access(struct g_provider *pp, int acr, int acw, int ace) 3066133808Spjd{ 3067133808Spjd struct g_raid3_softc *sc; 3068157630Spjd int dcr, dcw, dce, error = 0; 3069133808Spjd 3070133808Spjd g_topology_assert(); 3071133808Spjd G_RAID3_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr, 3072133808Spjd acw, ace); 3073133808Spjd 3074160081Spjd sc = pp->geom->softc; 3075160081Spjd if (sc == NULL && acr <= 0 && acw <= 0 && ace <= 0) 3076160081Spjd return (0); 3077160081Spjd KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name)); 3078160081Spjd 3079133808Spjd dcr = pp->acr + acr; 3080133808Spjd dcw = pp->acw + acw; 3081133808Spjd dce = pp->ace + ace; 3082133808Spjd 3083157630Spjd g_topology_unlock(); 3084157630Spjd sx_xlock(&sc->sc_lock); 3085157630Spjd if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) != 0 || 3086156612Spjd g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) < sc->sc_ndisks - 1) { 3087156612Spjd if (acr > 0 || acw > 0 || ace > 0) 3088156612Spjd error = ENXIO; 3089156612Spjd goto end; 3090133808Spjd } 3091245444Smav if (dcw == 0) 3092156612Spjd g_raid3_idle(sc, dcw); 3093157630Spjd if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROYING) != 0) { 3094157630Spjd if (acr > 0 || acw > 0 || ace > 0) { 3095157630Spjd error = ENXIO; 3096157630Spjd goto end; 3097157630Spjd } 3098157630Spjd if (dcr == 0 && dcw == 0 && dce == 0) { 3099157630Spjd g_post_event(g_raid3_destroy_delayed, sc, M_WAITOK, 3100157630Spjd sc, NULL); 3101157630Spjd } 3102157630Spjd } 3103156612Spjdend: 3104157630Spjd sx_xunlock(&sc->sc_lock); 3105157630Spjd g_topology_lock(); 3106156612Spjd return (error); 3107133808Spjd} 3108133808Spjd 3109133808Spjdstatic struct g_geom * 3110133808Spjdg_raid3_create(struct g_class *mp, const struct g_raid3_metadata *md) 3111133808Spjd{ 3112133808Spjd struct g_raid3_softc *sc; 3113133808Spjd struct g_geom *gp; 3114133808Spjd int error, timeout; 3115133808Spjd u_int n; 3116133808Spjd 3117133808Spjd g_topology_assert(); 3118133808Spjd G_RAID3_DEBUG(1, "Creating device %s (id=%u).", md->md_name, md->md_id); 3119133808Spjd 3120133808Spjd /* One disk is minimum. */ 3121133808Spjd if (md->md_all < 1) 3122133808Spjd return (NULL); 3123133808Spjd /* 3124133808Spjd * Action geom. 3125133808Spjd */ 3126133808Spjd gp = g_new_geomf(mp, "%s", md->md_name); 3127133808Spjd sc = malloc(sizeof(*sc), M_RAID3, M_WAITOK | M_ZERO); 3128133808Spjd sc->sc_disks = malloc(sizeof(struct g_raid3_disk) * md->md_all, M_RAID3, 3129133808Spjd M_WAITOK | M_ZERO); 3130133808Spjd gp->start = g_raid3_start; 3131133808Spjd gp->orphan = g_raid3_orphan; 3132133808Spjd gp->access = g_raid3_access; 3133133808Spjd gp->dumpconf = g_raid3_dumpconf; 3134133808Spjd 3135133808Spjd sc->sc_id = md->md_id; 3136133808Spjd sc->sc_mediasize = md->md_mediasize; 3137133808Spjd sc->sc_sectorsize = md->md_sectorsize; 3138133808Spjd sc->sc_ndisks = md->md_all; 3139134124Spjd sc->sc_round_robin = 0; 3140133808Spjd sc->sc_flags = md->md_mflags; 3141139295Spjd sc->sc_bump_id = 0; 3142155540Spjd sc->sc_idle = 1; 3143155581Spjd sc->sc_last_write = time_uptime; 3144155540Spjd sc->sc_writes = 0; 3145138374Spjd for (n = 0; n < sc->sc_ndisks; n++) { 3146138374Spjd sc->sc_disks[n].d_softc = sc; 3147138374Spjd sc->sc_disks[n].d_no = n; 3148133808Spjd sc->sc_disks[n].d_state = G_RAID3_DISK_STATE_NODISK; 3149138374Spjd } 3150156612Spjd sx_init(&sc->sc_lock, "graid3:lock"); 3151133808Spjd bioq_init(&sc->sc_queue); 3152133808Spjd mtx_init(&sc->sc_queue_mtx, "graid3:queue", NULL, MTX_DEF); 3153156612Spjd bioq_init(&sc->sc_regular_delayed); 3154156612Spjd bioq_init(&sc->sc_inflight); 3155156612Spjd bioq_init(&sc->sc_sync_delayed); 3156133808Spjd TAILQ_INIT(&sc->sc_events); 3157133808Spjd mtx_init(&sc->sc_events_mtx, "graid3:events", NULL, MTX_DEF); 3158283291Sjkim callout_init(&sc->sc_callout, 1); 3159133808Spjd sc->sc_state = G_RAID3_DEVICE_STATE_STARTING; 3160133808Spjd gp->softc = sc; 3161133808Spjd sc->sc_geom = gp; 3162133808Spjd sc->sc_provider = NULL; 3163133808Spjd /* 3164133808Spjd * Synchronization geom. 3165133808Spjd */ 3166133808Spjd gp = g_new_geomf(mp, "%s.sync", md->md_name); 3167133808Spjd gp->softc = sc; 3168133808Spjd gp->orphan = g_raid3_orphan; 3169133808Spjd sc->sc_sync.ds_geom = gp; 3170156612Spjd 3171160203Spjd if (!g_raid3_use_malloc) { 3172160203Spjd sc->sc_zones[G_RAID3_ZONE_64K].sz_zone = uma_zcreate("gr3:64k", 3173160203Spjd 65536, g_raid3_uma_ctor, g_raid3_uma_dtor, NULL, NULL, 3174160203Spjd UMA_ALIGN_PTR, 0); 3175160203Spjd sc->sc_zones[G_RAID3_ZONE_64K].sz_inuse = 0; 3176160203Spjd sc->sc_zones[G_RAID3_ZONE_64K].sz_max = g_raid3_n64k; 3177160203Spjd sc->sc_zones[G_RAID3_ZONE_64K].sz_requested = 3178160203Spjd sc->sc_zones[G_RAID3_ZONE_64K].sz_failed = 0; 3179160203Spjd sc->sc_zones[G_RAID3_ZONE_16K].sz_zone = uma_zcreate("gr3:16k", 3180160203Spjd 16384, g_raid3_uma_ctor, g_raid3_uma_dtor, NULL, NULL, 3181160203Spjd UMA_ALIGN_PTR, 0); 3182160203Spjd sc->sc_zones[G_RAID3_ZONE_16K].sz_inuse = 0; 3183160203Spjd sc->sc_zones[G_RAID3_ZONE_16K].sz_max = g_raid3_n16k; 3184160203Spjd sc->sc_zones[G_RAID3_ZONE_16K].sz_requested = 3185160203Spjd sc->sc_zones[G_RAID3_ZONE_16K].sz_failed = 0; 3186160203Spjd sc->sc_zones[G_RAID3_ZONE_4K].sz_zone = uma_zcreate("gr3:4k", 3187160203Spjd 4096, g_raid3_uma_ctor, g_raid3_uma_dtor, NULL, NULL, 3188160203Spjd UMA_ALIGN_PTR, 0); 3189160203Spjd sc->sc_zones[G_RAID3_ZONE_4K].sz_inuse = 0; 3190160203Spjd sc->sc_zones[G_RAID3_ZONE_4K].sz_max = g_raid3_n4k; 3191160203Spjd sc->sc_zones[G_RAID3_ZONE_4K].sz_requested = 3192160203Spjd sc->sc_zones[G_RAID3_ZONE_4K].sz_failed = 0; 3193160203Spjd } 3194156612Spjd 3195172836Sjulian error = kproc_create(g_raid3_worker, sc, &sc->sc_worker, 0, 0, 3196133808Spjd "g_raid3 %s", md->md_name); 3197133808Spjd if (error != 0) { 3198133808Spjd G_RAID3_DEBUG(1, "Cannot create kernel thread for %s.", 3199133808Spjd sc->sc_name); 3200160203Spjd if (!g_raid3_use_malloc) { 3201160203Spjd uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_64K].sz_zone); 3202160203Spjd uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_16K].sz_zone); 3203160203Spjd uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_4K].sz_zone); 3204160203Spjd } 3205133808Spjd g_destroy_geom(sc->sc_sync.ds_geom); 3206133808Spjd mtx_destroy(&sc->sc_events_mtx); 3207133808Spjd mtx_destroy(&sc->sc_queue_mtx); 3208156612Spjd sx_destroy(&sc->sc_lock); 3209133808Spjd g_destroy_geom(sc->sc_geom); 3210133808Spjd free(sc->sc_disks, M_RAID3); 3211133808Spjd free(sc, M_RAID3); 3212133808Spjd return (NULL); 3213133808Spjd } 3214133808Spjd 3215162188Sjmg G_RAID3_DEBUG(1, "Device %s created (%u components, id=%u).", 3216162188Sjmg sc->sc_name, sc->sc_ndisks, sc->sc_id); 3217133808Spjd 3218190878Sthompsa sc->sc_rootmount = root_mount_hold("GRAID3"); 3219148440Spjd G_RAID3_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount); 3220148440Spjd 3221133808Spjd /* 3222133808Spjd * Run timeout. 3223133808Spjd */ 3224133808Spjd timeout = atomic_load_acq_int(&g_raid3_timeout); 3225133808Spjd callout_reset(&sc->sc_callout, timeout * hz, g_raid3_go, sc); 3226133808Spjd return (sc->sc_geom); 3227133808Spjd} 3228133808Spjd 3229133808Spjdint 3230157630Spjdg_raid3_destroy(struct g_raid3_softc *sc, int how) 3231133808Spjd{ 3232133808Spjd struct g_provider *pp; 3233133808Spjd 3234156612Spjd g_topology_assert_not(); 3235133808Spjd if (sc == NULL) 3236133808Spjd return (ENXIO); 3237156612Spjd sx_assert(&sc->sc_lock, SX_XLOCKED); 3238156612Spjd 3239133808Spjd pp = sc->sc_provider; 3240133808Spjd if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { 3241157630Spjd switch (how) { 3242157630Spjd case G_RAID3_DESTROY_SOFT: 3243133808Spjd G_RAID3_DEBUG(1, 3244133808Spjd "Device %s is still open (r%dw%de%d).", pp->name, 3245133808Spjd pp->acr, pp->acw, pp->ace); 3246133808Spjd return (EBUSY); 3247157630Spjd case G_RAID3_DESTROY_DELAYED: 3248157630Spjd G_RAID3_DEBUG(1, 3249157630Spjd "Device %s will be destroyed on last close.", 3250157630Spjd pp->name); 3251157630Spjd if (sc->sc_syncdisk != NULL) 3252157630Spjd g_raid3_sync_stop(sc, 1); 3253157630Spjd sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROYING; 3254157630Spjd return (EBUSY); 3255157630Spjd case G_RAID3_DESTROY_HARD: 3256157630Spjd G_RAID3_DEBUG(1, "Device %s is still open, so it " 3257157630Spjd "can't be definitely removed.", pp->name); 3258157630Spjd break; 3259133808Spjd } 3260133808Spjd } 3261133808Spjd 3262158114Spjd g_topology_lock(); 3263158114Spjd if (sc->sc_geom->softc == NULL) { 3264158114Spjd g_topology_unlock(); 3265158114Spjd return (0); 3266158114Spjd } 3267158114Spjd sc->sc_geom->softc = NULL; 3268158114Spjd sc->sc_sync.ds_geom->softc = NULL; 3269158114Spjd g_topology_unlock(); 3270158114Spjd 3271133808Spjd sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY; 3272133808Spjd sc->sc_flags |= G_RAID3_DEVICE_FLAG_WAIT; 3273133808Spjd G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, sc); 3274156612Spjd sx_xunlock(&sc->sc_lock); 3275133808Spjd mtx_lock(&sc->sc_queue_mtx); 3276133808Spjd wakeup(sc); 3277133808Spjd wakeup(&sc->sc_queue); 3278133808Spjd mtx_unlock(&sc->sc_queue_mtx); 3279133808Spjd G_RAID3_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker); 3280133808Spjd while (sc->sc_worker != NULL) 3281133808Spjd tsleep(&sc->sc_worker, PRIBIO, "r3:destroy", hz / 5); 3282133808Spjd G_RAID3_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker); 3283156612Spjd sx_xlock(&sc->sc_lock); 3284133808Spjd g_raid3_destroy_device(sc); 3285133808Spjd free(sc->sc_disks, M_RAID3); 3286133808Spjd free(sc, M_RAID3); 3287133808Spjd return (0); 3288133808Spjd} 3289133808Spjd 3290133808Spjdstatic void 3291133808Spjdg_raid3_taste_orphan(struct g_consumer *cp) 3292133808Spjd{ 3293133808Spjd 3294133808Spjd KASSERT(1 == 0, ("%s called while tasting %s.", __func__, 3295133808Spjd cp->provider->name)); 3296133808Spjd} 3297133808Spjd 3298133808Spjdstatic struct g_geom * 3299133808Spjdg_raid3_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) 3300133808Spjd{ 3301133808Spjd struct g_raid3_metadata md; 3302133808Spjd struct g_raid3_softc *sc; 3303133808Spjd struct g_consumer *cp; 3304133808Spjd struct g_geom *gp; 3305133808Spjd int error; 3306133808Spjd 3307133808Spjd g_topology_assert(); 3308133808Spjd g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); 3309133808Spjd G_RAID3_DEBUG(2, "Tasting %s.", pp->name); 3310133808Spjd 3311133808Spjd gp = g_new_geomf(mp, "raid3:taste"); 3312133808Spjd /* This orphan function should be never called. */ 3313133808Spjd gp->orphan = g_raid3_taste_orphan; 3314133808Spjd cp = g_new_consumer(gp); 3315133808Spjd g_attach(cp, pp); 3316133808Spjd error = g_raid3_read_metadata(cp, &md); 3317133808Spjd g_detach(cp); 3318133808Spjd g_destroy_consumer(cp); 3319133808Spjd g_destroy_geom(gp); 3320133808Spjd if (error != 0) 3321133808Spjd return (NULL); 3322133808Spjd gp = NULL; 3323133808Spjd 3324221101Smav if (md.md_provider[0] != '\0' && 3325221101Smav !g_compare_names(md.md_provider, pp->name)) 3326133808Spjd return (NULL); 3327142727Spjd if (md.md_provsize != 0 && md.md_provsize != pp->mediasize) 3328142727Spjd return (NULL); 3329133808Spjd if (g_raid3_debug >= 2) 3330133808Spjd raid3_metadata_dump(&md); 3331133808Spjd 3332133808Spjd /* 3333133808Spjd * Let's check if device already exists. 3334133808Spjd */ 3335134486Spjd sc = NULL; 3336133808Spjd LIST_FOREACH(gp, &mp->geom, geom) { 3337133808Spjd sc = gp->softc; 3338133808Spjd if (sc == NULL) 3339133808Spjd continue; 3340133808Spjd if (sc->sc_sync.ds_geom == gp) 3341133808Spjd continue; 3342133808Spjd if (strcmp(md.md_name, sc->sc_name) != 0) 3343133808Spjd continue; 3344133808Spjd if (md.md_id != sc->sc_id) { 3345133808Spjd G_RAID3_DEBUG(0, "Device %s already configured.", 3346133808Spjd sc->sc_name); 3347133808Spjd return (NULL); 3348133808Spjd } 3349133808Spjd break; 3350133808Spjd } 3351133808Spjd if (gp == NULL) { 3352133808Spjd gp = g_raid3_create(mp, &md); 3353133808Spjd if (gp == NULL) { 3354133808Spjd G_RAID3_DEBUG(0, "Cannot create device %s.", 3355133808Spjd md.md_name); 3356133808Spjd return (NULL); 3357133808Spjd } 3358133808Spjd sc = gp->softc; 3359133808Spjd } 3360133808Spjd G_RAID3_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name); 3361156612Spjd g_topology_unlock(); 3362156612Spjd sx_xlock(&sc->sc_lock); 3363133808Spjd error = g_raid3_add_disk(sc, pp, &md); 3364133808Spjd if (error != 0) { 3365133808Spjd G_RAID3_DEBUG(0, "Cannot add disk %s to %s (error=%d).", 3366133808Spjd pp->name, gp->name, error); 3367133808Spjd if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_NODISK) == 3368133808Spjd sc->sc_ndisks) { 3369157630Spjd g_cancel_event(sc); 3370160248Spjd g_raid3_destroy(sc, G_RAID3_DESTROY_HARD); 3371156612Spjd g_topology_lock(); 3372156612Spjd return (NULL); 3373133808Spjd } 3374156612Spjd gp = NULL; 3375133808Spjd } 3376156612Spjd sx_xunlock(&sc->sc_lock); 3377156612Spjd g_topology_lock(); 3378133808Spjd return (gp); 3379133808Spjd} 3380133808Spjd 3381133808Spjdstatic int 3382133808Spjdg_raid3_destroy_geom(struct gctl_req *req __unused, struct g_class *mp __unused, 3383133808Spjd struct g_geom *gp) 3384133808Spjd{ 3385156612Spjd struct g_raid3_softc *sc; 3386156612Spjd int error; 3387133808Spjd 3388156612Spjd g_topology_unlock(); 3389156612Spjd sc = gp->softc; 3390156612Spjd sx_xlock(&sc->sc_lock); 3391157630Spjd g_cancel_event(sc); 3392160248Spjd error = g_raid3_destroy(gp->softc, G_RAID3_DESTROY_SOFT); 3393156612Spjd if (error != 0) 3394156612Spjd sx_xunlock(&sc->sc_lock); 3395156612Spjd g_topology_lock(); 3396156612Spjd return (error); 3397133808Spjd} 3398133808Spjd 3399133808Spjdstatic void 3400133808Spjdg_raid3_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, 3401133808Spjd struct g_consumer *cp, struct g_provider *pp) 3402133808Spjd{ 3403133808Spjd struct g_raid3_softc *sc; 3404133808Spjd 3405133808Spjd g_topology_assert(); 3406133808Spjd 3407133808Spjd sc = gp->softc; 3408133808Spjd if (sc == NULL) 3409133808Spjd return; 3410133808Spjd /* Skip synchronization geom. */ 3411133808Spjd if (gp == sc->sc_sync.ds_geom) 3412133808Spjd return; 3413133808Spjd if (pp != NULL) { 3414133808Spjd /* Nothing here. */ 3415133808Spjd } else if (cp != NULL) { 3416133808Spjd struct g_raid3_disk *disk; 3417133808Spjd 3418133808Spjd disk = cp->private; 3419133808Spjd if (disk == NULL) 3420133808Spjd return; 3421156612Spjd g_topology_unlock(); 3422156612Spjd sx_xlock(&sc->sc_lock); 3423133808Spjd sbuf_printf(sb, "%s<Type>", indent); 3424133808Spjd if (disk->d_no == sc->sc_ndisks - 1) 3425133808Spjd sbuf_printf(sb, "PARITY"); 3426133808Spjd else 3427133808Spjd sbuf_printf(sb, "DATA"); 3428133808Spjd sbuf_printf(sb, "</Type>\n"); 3429133808Spjd sbuf_printf(sb, "%s<Number>%u</Number>\n", indent, 3430133808Spjd (u_int)disk->d_no); 3431133808Spjd if (disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) { 3432133808Spjd sbuf_printf(sb, "%s<Synchronized>", indent); 3433156612Spjd if (disk->d_sync.ds_offset == 0) 3434133808Spjd sbuf_printf(sb, "0%%"); 3435133808Spjd else { 3436133808Spjd sbuf_printf(sb, "%u%%", 3437156612Spjd (u_int)((disk->d_sync.ds_offset * 100) / 3438134421Spjd (sc->sc_mediasize / (sc->sc_ndisks - 1)))); 3439133808Spjd } 3440133808Spjd sbuf_printf(sb, "</Synchronized>\n"); 3441240371Sglebius if (disk->d_sync.ds_offset > 0) { 3442240371Sglebius sbuf_printf(sb, "%s<BytesSynced>%jd" 3443240371Sglebius "</BytesSynced>\n", indent, 3444240371Sglebius (intmax_t)disk->d_sync.ds_offset); 3445240371Sglebius } 3446133808Spjd } 3447133808Spjd sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, 3448133808Spjd disk->d_sync.ds_syncid); 3449139295Spjd sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, disk->d_genid); 3450133808Spjd sbuf_printf(sb, "%s<Flags>", indent); 3451133808Spjd if (disk->d_flags == 0) 3452133808Spjd sbuf_printf(sb, "NONE"); 3453133808Spjd else { 3454133808Spjd int first = 1; 3455133808Spjd 3456133808Spjd#define ADD_FLAG(flag, name) do { \ 3457133808Spjd if ((disk->d_flags & (flag)) != 0) { \ 3458133808Spjd if (!first) \ 3459133808Spjd sbuf_printf(sb, ", "); \ 3460133808Spjd else \ 3461133808Spjd first = 0; \ 3462133808Spjd sbuf_printf(sb, name); \ 3463133808Spjd } \ 3464133808Spjd} while (0) 3465133808Spjd ADD_FLAG(G_RAID3_DISK_FLAG_DIRTY, "DIRTY"); 3466133808Spjd ADD_FLAG(G_RAID3_DISK_FLAG_HARDCODED, "HARDCODED"); 3467133808Spjd ADD_FLAG(G_RAID3_DISK_FLAG_SYNCHRONIZING, 3468133808Spjd "SYNCHRONIZING"); 3469133808Spjd ADD_FLAG(G_RAID3_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC"); 3470155546Spjd ADD_FLAG(G_RAID3_DISK_FLAG_BROKEN, "BROKEN"); 3471133808Spjd#undef ADD_FLAG 3472133808Spjd } 3473133808Spjd sbuf_printf(sb, "</Flags>\n"); 3474133808Spjd sbuf_printf(sb, "%s<State>%s</State>\n", indent, 3475133808Spjd g_raid3_disk_state2str(disk->d_state)); 3476156612Spjd sx_xunlock(&sc->sc_lock); 3477156612Spjd g_topology_lock(); 3478133808Spjd } else { 3479156612Spjd g_topology_unlock(); 3480156612Spjd sx_xlock(&sc->sc_lock); 3481160203Spjd if (!g_raid3_use_malloc) { 3482160203Spjd sbuf_printf(sb, 3483160203Spjd "%s<Zone4kRequested>%u</Zone4kRequested>\n", indent, 3484160203Spjd sc->sc_zones[G_RAID3_ZONE_4K].sz_requested); 3485160203Spjd sbuf_printf(sb, 3486160203Spjd "%s<Zone4kFailed>%u</Zone4kFailed>\n", indent, 3487160203Spjd sc->sc_zones[G_RAID3_ZONE_4K].sz_failed); 3488160203Spjd sbuf_printf(sb, 3489160203Spjd "%s<Zone16kRequested>%u</Zone16kRequested>\n", indent, 3490160203Spjd sc->sc_zones[G_RAID3_ZONE_16K].sz_requested); 3491160203Spjd sbuf_printf(sb, 3492160203Spjd "%s<Zone16kFailed>%u</Zone16kFailed>\n", indent, 3493160203Spjd sc->sc_zones[G_RAID3_ZONE_16K].sz_failed); 3494160203Spjd sbuf_printf(sb, 3495160203Spjd "%s<Zone64kRequested>%u</Zone64kRequested>\n", indent, 3496160203Spjd sc->sc_zones[G_RAID3_ZONE_64K].sz_requested); 3497160203Spjd sbuf_printf(sb, 3498160203Spjd "%s<Zone64kFailed>%u</Zone64kFailed>\n", indent, 3499160203Spjd sc->sc_zones[G_RAID3_ZONE_64K].sz_failed); 3500160203Spjd } 3501133808Spjd sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id); 3502133808Spjd sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid); 3503139295Spjd sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid); 3504133808Spjd sbuf_printf(sb, "%s<Flags>", indent); 3505133808Spjd if (sc->sc_flags == 0) 3506133808Spjd sbuf_printf(sb, "NONE"); 3507133808Spjd else { 3508133808Spjd int first = 1; 3509133808Spjd 3510133808Spjd#define ADD_FLAG(flag, name) do { \ 3511133808Spjd if ((sc->sc_flags & (flag)) != 0) { \ 3512133808Spjd if (!first) \ 3513133808Spjd sbuf_printf(sb, ", "); \ 3514133808Spjd else \ 3515133808Spjd first = 0; \ 3516133808Spjd sbuf_printf(sb, name); \ 3517133808Spjd } \ 3518133808Spjd} while (0) 3519163888Spjd ADD_FLAG(G_RAID3_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC"); 3520133808Spjd ADD_FLAG(G_RAID3_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC"); 3521134124Spjd ADD_FLAG(G_RAID3_DEVICE_FLAG_ROUND_ROBIN, 3522134124Spjd "ROUND-ROBIN"); 3523134168Spjd ADD_FLAG(G_RAID3_DEVICE_FLAG_VERIFY, "VERIFY"); 3524133808Spjd#undef ADD_FLAG 3525133808Spjd } 3526133808Spjd sbuf_printf(sb, "</Flags>\n"); 3527133808Spjd sbuf_printf(sb, "%s<Components>%u</Components>\n", indent, 3528133808Spjd sc->sc_ndisks); 3529133979Spjd sbuf_printf(sb, "%s<State>%s</State>\n", indent, 3530133979Spjd g_raid3_device_state2str(sc->sc_state)); 3531156612Spjd sx_xunlock(&sc->sc_lock); 3532156612Spjd g_topology_lock(); 3533133808Spjd } 3534133808Spjd} 3535133808Spjd 3536137257Spjdstatic void 3537245444Smavg_raid3_shutdown_post_sync(void *arg, int howto) 3538137257Spjd{ 3539137257Spjd struct g_class *mp; 3540137257Spjd struct g_geom *gp, *gp2; 3541156612Spjd struct g_raid3_softc *sc; 3542157630Spjd int error; 3543137257Spjd 3544137257Spjd mp = arg; 3545137257Spjd g_topology_lock(); 3546245444Smav g_raid3_shutdown = 1; 3547137257Spjd LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) { 3548156612Spjd if ((sc = gp->softc) == NULL) 3549137257Spjd continue; 3550157630Spjd /* Skip synchronization geom. */ 3551157630Spjd if (gp == sc->sc_sync.ds_geom) 3552156612Spjd continue; 3553156612Spjd g_topology_unlock(); 3554156612Spjd sx_xlock(&sc->sc_lock); 3555245444Smav g_raid3_idle(sc, -1); 3556157630Spjd g_cancel_event(sc); 3557157630Spjd error = g_raid3_destroy(sc, G_RAID3_DESTROY_DELAYED); 3558157630Spjd if (error != 0) 3559157630Spjd sx_xunlock(&sc->sc_lock); 3560156612Spjd g_topology_lock(); 3561156612Spjd } 3562156612Spjd g_topology_unlock(); 3563137257Spjd} 3564137257Spjd 3565137257Spjdstatic void 3566137257Spjdg_raid3_init(struct g_class *mp) 3567137257Spjd{ 3568137257Spjd 3569245444Smav g_raid3_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync, 3570245444Smav g_raid3_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST); 3571245444Smav if (g_raid3_post_sync == NULL) 3572137257Spjd G_RAID3_DEBUG(0, "Warning! Cannot register shutdown event."); 3573137257Spjd} 3574137257Spjd 3575137257Spjdstatic void 3576137257Spjdg_raid3_fini(struct g_class *mp) 3577137257Spjd{ 3578137257Spjd 3579245444Smav if (g_raid3_post_sync != NULL) 3580245444Smav EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_raid3_post_sync); 3581137257Spjd} 3582137257Spjd 3583133808SpjdDECLARE_GEOM_CLASS(g_raid3_class, g_raid3); 3584332640SkevansMODULE_VERSION(geom_raid3, 0); 3585