1133808Spjd/*- 2156876Spjd * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> 3133808Spjd * All rights reserved. 4133808Spjd * 5133808Spjd * Redistribution and use in source and binary forms, with or without 6133808Spjd * modification, are permitted provided that the following conditions 7133808Spjd * are met: 8133808Spjd * 1. Redistributions of source code must retain the above copyright 9133808Spjd * notice, this list of conditions and the following disclaimer. 10133808Spjd * 2. Redistributions in binary form must reproduce the above copyright 11133808Spjd * notice, this list of conditions and the following disclaimer in the 12133808Spjd * documentation and/or other materials provided with the distribution. 13155174Spjd * 14133808Spjd * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15133808Spjd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16133808Spjd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17133808Spjd * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18133808Spjd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19133808Spjd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20133808Spjd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21133808Spjd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22133808Spjd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23133808Spjd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24133808Spjd * SUCH DAMAGE. 25133808Spjd */ 26133808Spjd 27133808Spjd#include <sys/cdefs.h> 28133808Spjd__FBSDID("$FreeBSD: releng/10.3/sys/geom/raid3/g_raid3.c 245444 2013-01-15 01:27:04Z mav $"); 29133808Spjd 30133808Spjd#include <sys/param.h> 31133808Spjd#include <sys/systm.h> 32133808Spjd#include <sys/kernel.h> 33133808Spjd#include <sys/module.h> 34133808Spjd#include <sys/limits.h> 35133808Spjd#include <sys/lock.h> 36133808Spjd#include <sys/mutex.h> 37133808Spjd#include <sys/bio.h> 38223921Sae#include <sys/sbuf.h> 39133808Spjd#include <sys/sysctl.h> 40133808Spjd#include <sys/malloc.h> 41137257Spjd#include <sys/eventhandler.h> 42133808Spjd#include <vm/uma.h> 43133808Spjd#include <geom/geom.h> 44133808Spjd#include <sys/proc.h> 45133808Spjd#include <sys/kthread.h> 46139451Sjhb#include <sys/sched.h> 47133808Spjd#include <geom/raid3/g_raid3.h> 48133808Spjd 49219029SnetchildFEATURE(geom_raid3, "GEOM RAID-3 functionality"); 50133808Spjd 51151897Srwatsonstatic MALLOC_DEFINE(M_RAID3, "raid3_data", "GEOM_RAID3 Data"); 52133808Spjd 53133808SpjdSYSCTL_DECL(_kern_geom); 54227309Sedstatic SYSCTL_NODE(_kern_geom, OID_AUTO, raid3, CTLFLAG_RW, 0, 55227309Sed "GEOM_RAID3 stuff"); 56133825Spjdu_int g_raid3_debug = 0; 57134528SpjdTUNABLE_INT("kern.geom.raid3.debug", &g_raid3_debug); 58133808SpjdSYSCTL_UINT(_kern_geom_raid3, OID_AUTO, debug, CTLFLAG_RW, &g_raid3_debug, 0, 59133808Spjd "Debug level"); 60135866Spjdstatic u_int g_raid3_timeout = 4; 61137258SpjdTUNABLE_INT("kern.geom.raid3.timeout", &g_raid3_timeout); 62133808SpjdSYSCTL_UINT(_kern_geom_raid3, OID_AUTO, timeout, CTLFLAG_RW, &g_raid3_timeout, 63133808Spjd 0, "Time to wait on all raid3 components"); 64137258Spjdstatic u_int g_raid3_idletime = 5; 65137258SpjdTUNABLE_INT("kern.geom.raid3.idletime", &g_raid3_idletime); 66137258SpjdSYSCTL_UINT(_kern_geom_raid3, OID_AUTO, idletime, CTLFLAG_RW, 67137258Spjd &g_raid3_idletime, 0, "Mark components as clean when idling"); 68155546Spjdstatic u_int g_raid3_disconnect_on_failure = 1; 69155560SpjdTUNABLE_INT("kern.geom.raid3.disconnect_on_failure", 70155560Spjd &g_raid3_disconnect_on_failure); 71155546SpjdSYSCTL_UINT(_kern_geom_raid3, OID_AUTO, disconnect_on_failure, CTLFLAG_RW, 72155546Spjd &g_raid3_disconnect_on_failure, 0, "Disconnect component on I/O failure."); 73156876Spjdstatic u_int g_raid3_syncreqs = 2; 74156612SpjdTUNABLE_INT("kern.geom.raid3.sync_requests", &g_raid3_syncreqs); 75156612SpjdSYSCTL_UINT(_kern_geom_raid3, OID_AUTO, sync_requests, CTLFLAG_RDTUN, 76156612Spjd &g_raid3_syncreqs, 0, "Parallel synchronization I/O requests."); 77160203Spjdstatic u_int g_raid3_use_malloc = 0; 78160203SpjdTUNABLE_INT("kern.geom.raid3.use_malloc", &g_raid3_use_malloc); 79160203SpjdSYSCTL_UINT(_kern_geom_raid3, OID_AUTO, use_malloc, CTLFLAG_RDTUN, 80160203Spjd &g_raid3_use_malloc, 0, "Use malloc(9) instead of uma(9)."); 81133808Spjd 82133808Spjdstatic u_int g_raid3_n64k = 50; 83133808SpjdTUNABLE_INT("kern.geom.raid3.n64k", &g_raid3_n64k); 84133808SpjdSYSCTL_UINT(_kern_geom_raid3, OID_AUTO, n64k, CTLFLAG_RD, &g_raid3_n64k, 0, 85133808Spjd "Maximum number of 64kB allocations"); 86133808Spjdstatic u_int g_raid3_n16k = 200; 87133808SpjdTUNABLE_INT("kern.geom.raid3.n16k", &g_raid3_n16k); 88133808SpjdSYSCTL_UINT(_kern_geom_raid3, OID_AUTO, n16k, CTLFLAG_RD, &g_raid3_n16k, 0, 89133808Spjd "Maximum number of 16kB allocations"); 90133808Spjdstatic u_int g_raid3_n4k = 1200; 91133808SpjdTUNABLE_INT("kern.geom.raid3.n4k", &g_raid3_n4k); 92133808SpjdSYSCTL_UINT(_kern_geom_raid3, OID_AUTO, n4k, CTLFLAG_RD, &g_raid3_n4k, 0, 93133808Spjd "Maximum number of 4kB allocations"); 94133808Spjd 95227309Sedstatic SYSCTL_NODE(_kern_geom_raid3, OID_AUTO, stat, CTLFLAG_RW, 0, 96133808Spjd "GEOM_RAID3 statistics"); 97134168Spjdstatic u_int g_raid3_parity_mismatch = 0; 98134168SpjdSYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, parity_mismatch, CTLFLAG_RD, 99134168Spjd &g_raid3_parity_mismatch, 0, "Number of failures in VERIFY mode"); 100133808Spjd 101133808Spjd#define MSLEEP(ident, mtx, priority, wmesg, timeout) do { \ 102133808Spjd G_RAID3_DEBUG(4, "%s: Sleeping %p.", __func__, (ident)); \ 103133808Spjd msleep((ident), (mtx), (priority), (wmesg), (timeout)); \ 104133808Spjd G_RAID3_DEBUG(4, "%s: Woken up %p.", __func__, (ident)); \ 105133808Spjd} while (0) 106133808Spjd 107245444Smavstatic eventhandler_tag g_raid3_post_sync = NULL; 108245444Smavstatic int g_raid3_shutdown = 0; 109133808Spjd 110133808Spjdstatic int g_raid3_destroy_geom(struct gctl_req *req, struct g_class *mp, 111133808Spjd struct g_geom *gp); 112133808Spjdstatic g_taste_t g_raid3_taste; 113137257Spjdstatic void g_raid3_init(struct g_class *mp); 114137257Spjdstatic void g_raid3_fini(struct g_class *mp); 115133808Spjd 116133808Spjdstruct g_class g_raid3_class = { 117133808Spjd .name = G_RAID3_CLASS_NAME, 118133808Spjd .version = G_VERSION, 119133808Spjd .ctlreq = g_raid3_config, 120133808Spjd .taste = g_raid3_taste, 121137257Spjd .destroy_geom = g_raid3_destroy_geom, 122137257Spjd .init = g_raid3_init, 123137257Spjd .fini = g_raid3_fini 124133808Spjd}; 125133808Spjd 126133808Spjd 127133808Spjdstatic void g_raid3_destroy_provider(struct g_raid3_softc *sc); 128139144Spjdstatic int g_raid3_update_disk(struct g_raid3_disk *disk, u_int state); 129139144Spjdstatic void g_raid3_update_device(struct g_raid3_softc *sc, boolean_t force); 130133808Spjdstatic void g_raid3_dumpconf(struct sbuf *sb, const char *indent, 131133808Spjd struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp); 132133808Spjdstatic void g_raid3_sync_stop(struct g_raid3_softc *sc, int type); 133156612Spjdstatic int g_raid3_register_request(struct bio *pbp); 134156612Spjdstatic void g_raid3_sync_release(struct g_raid3_softc *sc); 135133808Spjd 136133808Spjd 137133808Spjdstatic const char * 138133808Spjdg_raid3_disk_state2str(int state) 139133808Spjd{ 140133808Spjd 141133808Spjd switch (state) { 142133808Spjd case G_RAID3_DISK_STATE_NODISK: 143133808Spjd return ("NODISK"); 144133808Spjd case G_RAID3_DISK_STATE_NONE: 145133808Spjd return ("NONE"); 146133808Spjd case G_RAID3_DISK_STATE_NEW: 147133808Spjd return ("NEW"); 148133808Spjd case G_RAID3_DISK_STATE_ACTIVE: 149133808Spjd return ("ACTIVE"); 150133808Spjd case G_RAID3_DISK_STATE_STALE: 151133808Spjd return ("STALE"); 152133808Spjd case G_RAID3_DISK_STATE_SYNCHRONIZING: 153133808Spjd return ("SYNCHRONIZING"); 154133808Spjd case G_RAID3_DISK_STATE_DISCONNECTED: 155133808Spjd return ("DISCONNECTED"); 156133808Spjd default: 157133808Spjd return ("INVALID"); 158133808Spjd } 159133808Spjd} 160133808Spjd 161133808Spjdstatic const char * 162133808Spjdg_raid3_device_state2str(int state) 163133808Spjd{ 164133808Spjd 165133808Spjd switch (state) { 166133808Spjd case G_RAID3_DEVICE_STATE_STARTING: 167133808Spjd return ("STARTING"); 168133808Spjd case G_RAID3_DEVICE_STATE_DEGRADED: 169133808Spjd return ("DEGRADED"); 170133808Spjd case G_RAID3_DEVICE_STATE_COMPLETE: 171133808Spjd return ("COMPLETE"); 172133808Spjd default: 173133808Spjd return ("INVALID"); 174133808Spjd } 175133808Spjd} 176133808Spjd 177133808Spjdconst char * 178133808Spjdg_raid3_get_diskname(struct g_raid3_disk *disk) 179133808Spjd{ 180133808Spjd 181133808Spjd if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL) 182133808Spjd return ("[unknown]"); 183133808Spjd return (disk->d_name); 184133808Spjd} 185133808Spjd 186160203Spjdstatic void * 187160203Spjdg_raid3_alloc(struct g_raid3_softc *sc, size_t size, int flags) 188160203Spjd{ 189160203Spjd void *ptr; 190200821Smav enum g_raid3_zones zone; 191160203Spjd 192200821Smav if (g_raid3_use_malloc || 193200821Smav (zone = g_raid3_zone(size)) == G_RAID3_NUM_ZONES) 194160203Spjd ptr = malloc(size, M_RAID3, flags); 195160203Spjd else { 196200821Smav ptr = uma_zalloc_arg(sc->sc_zones[zone].sz_zone, 197200821Smav &sc->sc_zones[zone], flags); 198200821Smav sc->sc_zones[zone].sz_requested++; 199160203Spjd if (ptr == NULL) 200200821Smav sc->sc_zones[zone].sz_failed++; 201160203Spjd } 202160203Spjd return (ptr); 203160203Spjd} 204160203Spjd 205160203Spjdstatic void 206160203Spjdg_raid3_free(struct g_raid3_softc *sc, void *ptr, size_t size) 207160203Spjd{ 208200821Smav enum g_raid3_zones zone; 209160203Spjd 210200821Smav if (g_raid3_use_malloc || 211200821Smav (zone = g_raid3_zone(size)) == G_RAID3_NUM_ZONES) 212160203Spjd free(ptr, M_RAID3); 213160203Spjd else { 214200821Smav uma_zfree_arg(sc->sc_zones[zone].sz_zone, 215200821Smav ptr, &sc->sc_zones[zone]); 216160203Spjd } 217160203Spjd} 218160203Spjd 219156612Spjdstatic int 220156612Spjdg_raid3_uma_ctor(void *mem, int size, void *arg, int flags) 221156612Spjd{ 222156612Spjd struct g_raid3_zone *sz = arg; 223156612Spjd 224157222Spjd if (sz->sz_max > 0 && sz->sz_inuse == sz->sz_max) 225156612Spjd return (ENOMEM); 226156612Spjd sz->sz_inuse++; 227156612Spjd return (0); 228156612Spjd} 229156612Spjd 230156612Spjdstatic void 231156612Spjdg_raid3_uma_dtor(void *mem, int size, void *arg) 232156612Spjd{ 233156612Spjd struct g_raid3_zone *sz = arg; 234156612Spjd 235156612Spjd sz->sz_inuse--; 236156612Spjd} 237156612Spjd 238201545Smav#define g_raid3_xor(src, dst, size) \ 239201545Smav _g_raid3_xor((uint64_t *)(src), \ 240133808Spjd (uint64_t *)(dst), (size_t)size) 241133808Spjdstatic void 242201545Smav_g_raid3_xor(uint64_t *src, uint64_t *dst, size_t size) 243133808Spjd{ 244133808Spjd 245133808Spjd KASSERT((size % 128) == 0, ("Invalid size: %zu.", size)); 246133808Spjd for (; size > 0; size -= 128) { 247201545Smav *dst++ ^= (*src++); 248201545Smav *dst++ ^= (*src++); 249201545Smav *dst++ ^= (*src++); 250201545Smav *dst++ ^= (*src++); 251201545Smav *dst++ ^= (*src++); 252201545Smav *dst++ ^= (*src++); 253201545Smav *dst++ ^= (*src++); 254201545Smav *dst++ ^= (*src++); 255201545Smav *dst++ ^= (*src++); 256201545Smav *dst++ ^= (*src++); 257201545Smav *dst++ ^= (*src++); 258201545Smav *dst++ ^= (*src++); 259201545Smav *dst++ ^= (*src++); 260201545Smav *dst++ ^= (*src++); 261201545Smav *dst++ ^= (*src++); 262201545Smav *dst++ ^= (*src++); 263133808Spjd } 264133808Spjd} 265133808Spjd 266134168Spjdstatic int 267134168Spjdg_raid3_is_zero(struct bio *bp) 268134168Spjd{ 269134168Spjd static const uint64_t zeros[] = { 270134168Spjd 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 271134168Spjd }; 272134168Spjd u_char *addr; 273134168Spjd ssize_t size; 274134168Spjd 275134168Spjd size = bp->bio_length; 276134168Spjd addr = (u_char *)bp->bio_data; 277134168Spjd for (; size > 0; size -= sizeof(zeros), addr += sizeof(zeros)) { 278134168Spjd if (bcmp(addr, zeros, sizeof(zeros)) != 0) 279134168Spjd return (0); 280134168Spjd } 281134168Spjd return (1); 282134168Spjd} 283134168Spjd 284133808Spjd/* 285133808Spjd * --- Events handling functions --- 286133808Spjd * Events in geom_raid3 are used to maintain disks and device status 287133808Spjd * from one thread to simplify locking. 288133808Spjd */ 289133808Spjdstatic void 290133808Spjdg_raid3_event_free(struct g_raid3_event *ep) 291133808Spjd{ 292133808Spjd 293133808Spjd free(ep, M_RAID3); 294133808Spjd} 295133808Spjd 296133808Spjdint 297133808Spjdg_raid3_event_send(void *arg, int state, int flags) 298133808Spjd{ 299133808Spjd struct g_raid3_softc *sc; 300133808Spjd struct g_raid3_disk *disk; 301133808Spjd struct g_raid3_event *ep; 302133808Spjd int error; 303133808Spjd 304133808Spjd ep = malloc(sizeof(*ep), M_RAID3, M_WAITOK); 305133808Spjd G_RAID3_DEBUG(4, "%s: Sending event %p.", __func__, ep); 306133808Spjd if ((flags & G_RAID3_EVENT_DEVICE) != 0) { 307133808Spjd disk = NULL; 308133808Spjd sc = arg; 309133808Spjd } else { 310133808Spjd disk = arg; 311133808Spjd sc = disk->d_softc; 312133808Spjd } 313133808Spjd ep->e_disk = disk; 314133808Spjd ep->e_state = state; 315133808Spjd ep->e_flags = flags; 316133808Spjd ep->e_error = 0; 317133808Spjd mtx_lock(&sc->sc_events_mtx); 318133808Spjd TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next); 319133808Spjd mtx_unlock(&sc->sc_events_mtx); 320133808Spjd G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, sc); 321133808Spjd mtx_lock(&sc->sc_queue_mtx); 322133808Spjd wakeup(sc); 323133808Spjd wakeup(&sc->sc_queue); 324133808Spjd mtx_unlock(&sc->sc_queue_mtx); 325133808Spjd if ((flags & G_RAID3_EVENT_DONTWAIT) != 0) 326133808Spjd return (0); 327156612Spjd sx_assert(&sc->sc_lock, SX_XLOCKED); 328133808Spjd G_RAID3_DEBUG(4, "%s: Sleeping %p.", __func__, ep); 329156612Spjd sx_xunlock(&sc->sc_lock); 330133808Spjd while ((ep->e_flags & G_RAID3_EVENT_DONE) == 0) { 331133808Spjd mtx_lock(&sc->sc_events_mtx); 332133808Spjd MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "r3:event", 333133808Spjd hz * 5); 334133808Spjd } 335133808Spjd error = ep->e_error; 336133808Spjd g_raid3_event_free(ep); 337156612Spjd sx_xlock(&sc->sc_lock); 338133808Spjd return (error); 339133808Spjd} 340133808Spjd 341133808Spjdstatic struct g_raid3_event * 342133808Spjdg_raid3_event_get(struct g_raid3_softc *sc) 343133808Spjd{ 344133808Spjd struct g_raid3_event *ep; 345133808Spjd 346133808Spjd mtx_lock(&sc->sc_events_mtx); 347133808Spjd ep = TAILQ_FIRST(&sc->sc_events); 348133808Spjd mtx_unlock(&sc->sc_events_mtx); 349133808Spjd return (ep); 350133808Spjd} 351133808Spjd 352133808Spjdstatic void 353139144Spjdg_raid3_event_remove(struct g_raid3_softc *sc, struct g_raid3_event *ep) 354139144Spjd{ 355139144Spjd 356139144Spjd mtx_lock(&sc->sc_events_mtx); 357139144Spjd TAILQ_REMOVE(&sc->sc_events, ep, e_next); 358139144Spjd mtx_unlock(&sc->sc_events_mtx); 359139144Spjd} 360139144Spjd 361139144Spjdstatic void 362133808Spjdg_raid3_event_cancel(struct g_raid3_disk *disk) 363133808Spjd{ 364133808Spjd struct g_raid3_softc *sc; 365133808Spjd struct g_raid3_event *ep, *tmpep; 366133808Spjd 367156612Spjd sc = disk->d_softc; 368156612Spjd sx_assert(&sc->sc_lock, SX_XLOCKED); 369133808Spjd 370133808Spjd mtx_lock(&sc->sc_events_mtx); 371133808Spjd TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) { 372133808Spjd if ((ep->e_flags & G_RAID3_EVENT_DEVICE) != 0) 373133808Spjd continue; 374133808Spjd if (ep->e_disk != disk) 375133808Spjd continue; 376133808Spjd TAILQ_REMOVE(&sc->sc_events, ep, e_next); 377133808Spjd if ((ep->e_flags & G_RAID3_EVENT_DONTWAIT) != 0) 378133808Spjd g_raid3_event_free(ep); 379133808Spjd else { 380133808Spjd ep->e_error = ECANCELED; 381133808Spjd wakeup(ep); 382133808Spjd } 383133808Spjd } 384133808Spjd mtx_unlock(&sc->sc_events_mtx); 385133808Spjd} 386133808Spjd 387133808Spjd/* 388133808Spjd * Return the number of disks in the given state. 389133808Spjd * If state is equal to -1, count all connected disks. 390133808Spjd */ 391133808Spjdu_int 392133808Spjdg_raid3_ndisks(struct g_raid3_softc *sc, int state) 393133808Spjd{ 394133808Spjd struct g_raid3_disk *disk; 395133839Sobrien u_int n, ndisks; 396133808Spjd 397156612Spjd sx_assert(&sc->sc_lock, SX_LOCKED); 398156612Spjd 399133839Sobrien for (n = ndisks = 0; n < sc->sc_ndisks; n++) { 400133808Spjd disk = &sc->sc_disks[n]; 401133808Spjd if (disk->d_state == G_RAID3_DISK_STATE_NODISK) 402133808Spjd continue; 403133808Spjd if (state == -1 || disk->d_state == state) 404133808Spjd ndisks++; 405133808Spjd } 406133808Spjd return (ndisks); 407133808Spjd} 408133808Spjd 409133808Spjdstatic u_int 410133808Spjdg_raid3_nrequests(struct g_raid3_softc *sc, struct g_consumer *cp) 411133808Spjd{ 412133808Spjd struct bio *bp; 413133808Spjd u_int nreqs = 0; 414133808Spjd 415133808Spjd mtx_lock(&sc->sc_queue_mtx); 416133808Spjd TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) { 417133808Spjd if (bp->bio_from == cp) 418133808Spjd nreqs++; 419133808Spjd } 420133808Spjd mtx_unlock(&sc->sc_queue_mtx); 421133808Spjd return (nreqs); 422133808Spjd} 423133808Spjd 424133808Spjdstatic int 425133808Spjdg_raid3_is_busy(struct g_raid3_softc *sc, struct g_consumer *cp) 426133808Spjd{ 427133808Spjd 428137256Spjd if (cp->index > 0) { 429133808Spjd G_RAID3_DEBUG(2, 430133808Spjd "I/O requests for %s exist, can't destroy it now.", 431133808Spjd cp->provider->name); 432133808Spjd return (1); 433133808Spjd } 434133808Spjd if (g_raid3_nrequests(sc, cp) > 0) { 435133808Spjd G_RAID3_DEBUG(2, 436133808Spjd "I/O requests for %s in queue, can't destroy it now.", 437133808Spjd cp->provider->name); 438133808Spjd return (1); 439133808Spjd } 440133808Spjd return (0); 441133808Spjd} 442133808Spjd 443133808Spjdstatic void 444139144Spjdg_raid3_destroy_consumer(void *arg, int flags __unused) 445139144Spjd{ 446139144Spjd struct g_consumer *cp; 447139144Spjd 448156612Spjd g_topology_assert(); 449156612Spjd 450139144Spjd cp = arg; 451139144Spjd G_RAID3_DEBUG(1, "Consumer %s destroyed.", cp->provider->name); 452139144Spjd g_detach(cp); 453139144Spjd g_destroy_consumer(cp); 454139144Spjd} 455139144Spjd 456139144Spjdstatic void 457133808Spjdg_raid3_kill_consumer(struct g_raid3_softc *sc, struct g_consumer *cp) 458133808Spjd{ 459139144Spjd struct g_provider *pp; 460139144Spjd int retaste_wait; 461133808Spjd 462133808Spjd g_topology_assert(); 463133808Spjd 464133808Spjd cp->private = NULL; 465133808Spjd if (g_raid3_is_busy(sc, cp)) 466133808Spjd return; 467133808Spjd G_RAID3_DEBUG(2, "Consumer %s destroyed.", cp->provider->name); 468139144Spjd pp = cp->provider; 469139144Spjd retaste_wait = 0; 470139144Spjd if (cp->acw == 1) { 471139144Spjd if ((pp->geom->flags & G_GEOM_WITHER) == 0) 472139144Spjd retaste_wait = 1; 473139144Spjd } 474139144Spjd G_RAID3_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr, 475139144Spjd -cp->acw, -cp->ace, 0); 476139144Spjd if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) 477139144Spjd g_access(cp, -cp->acr, -cp->acw, -cp->ace); 478139144Spjd if (retaste_wait) { 479139144Spjd /* 480139144Spjd * After retaste event was send (inside g_access()), we can send 481139144Spjd * event to detach and destroy consumer. 482139144Spjd * A class, which has consumer to the given provider connected 483139144Spjd * will not receive retaste event for the provider. 484139144Spjd * This is the way how I ignore retaste events when I close 485139144Spjd * consumers opened for write: I detach and destroy consumer 486139144Spjd * after retaste event is sent. 487139144Spjd */ 488139144Spjd g_post_event(g_raid3_destroy_consumer, cp, M_WAITOK, NULL); 489139144Spjd return; 490139144Spjd } 491139144Spjd G_RAID3_DEBUG(1, "Consumer %s destroyed.", pp->name); 492133808Spjd g_detach(cp); 493133808Spjd g_destroy_consumer(cp); 494133808Spjd} 495133808Spjd 496133808Spjdstatic int 497133808Spjdg_raid3_connect_disk(struct g_raid3_disk *disk, struct g_provider *pp) 498133808Spjd{ 499144144Spjd struct g_consumer *cp; 500133808Spjd int error; 501133808Spjd 502156612Spjd g_topology_assert_not(); 503133808Spjd KASSERT(disk->d_consumer == NULL, 504133808Spjd ("Disk already connected (device %s).", disk->d_softc->sc_name)); 505133808Spjd 506156612Spjd g_topology_lock(); 507144144Spjd cp = g_new_consumer(disk->d_softc->sc_geom); 508144144Spjd error = g_attach(cp, pp); 509144144Spjd if (error != 0) { 510144144Spjd g_destroy_consumer(cp); 511156612Spjd g_topology_unlock(); 512133808Spjd return (error); 513144144Spjd } 514144144Spjd error = g_access(cp, 1, 1, 1); 515156612Spjd g_topology_unlock(); 516139144Spjd if (error != 0) { 517144144Spjd g_detach(cp); 518144144Spjd g_destroy_consumer(cp); 519139144Spjd G_RAID3_DEBUG(0, "Cannot open consumer %s (error=%d).", 520139144Spjd pp->name, error); 521139144Spjd return (error); 522139144Spjd } 523144144Spjd disk->d_consumer = cp; 524144144Spjd disk->d_consumer->private = disk; 525144144Spjd disk->d_consumer->index = 0; 526133808Spjd G_RAID3_DEBUG(2, "Disk %s connected.", g_raid3_get_diskname(disk)); 527133808Spjd return (0); 528133808Spjd} 529133808Spjd 530133808Spjdstatic void 531133808Spjdg_raid3_disconnect_consumer(struct g_raid3_softc *sc, struct g_consumer *cp) 532133808Spjd{ 533133808Spjd 534133808Spjd g_topology_assert(); 535133808Spjd 536133808Spjd if (cp == NULL) 537133808Spjd return; 538139144Spjd if (cp->provider != NULL) 539133808Spjd g_raid3_kill_consumer(sc, cp); 540139144Spjd else 541133808Spjd g_destroy_consumer(cp); 542133808Spjd} 543133808Spjd 544133808Spjd/* 545133808Spjd * Initialize disk. This means allocate memory, create consumer, attach it 546133808Spjd * to the provider and open access (r1w1e1) to it. 547133808Spjd */ 548133808Spjdstatic struct g_raid3_disk * 549133808Spjdg_raid3_init_disk(struct g_raid3_softc *sc, struct g_provider *pp, 550133808Spjd struct g_raid3_metadata *md, int *errorp) 551133808Spjd{ 552133808Spjd struct g_raid3_disk *disk; 553133808Spjd int error; 554133808Spjd 555133808Spjd disk = &sc->sc_disks[md->md_no]; 556133808Spjd error = g_raid3_connect_disk(disk, pp); 557144144Spjd if (error != 0) { 558144144Spjd if (errorp != NULL) 559144144Spjd *errorp = error; 560144144Spjd return (NULL); 561144144Spjd } 562133808Spjd disk->d_state = G_RAID3_DISK_STATE_NONE; 563133808Spjd disk->d_flags = md->md_dflags; 564133808Spjd if (md->md_provider[0] != '\0') 565133808Spjd disk->d_flags |= G_RAID3_DISK_FLAG_HARDCODED; 566133808Spjd disk->d_sync.ds_consumer = NULL; 567133808Spjd disk->d_sync.ds_offset = md->md_sync_offset; 568133808Spjd disk->d_sync.ds_offset_done = md->md_sync_offset; 569139295Spjd disk->d_genid = md->md_genid; 570133808Spjd disk->d_sync.ds_syncid = md->md_syncid; 571133808Spjd if (errorp != NULL) 572133808Spjd *errorp = 0; 573133808Spjd return (disk); 574133808Spjd} 575133808Spjd 576133808Spjdstatic void 577133808Spjdg_raid3_destroy_disk(struct g_raid3_disk *disk) 578133808Spjd{ 579133808Spjd struct g_raid3_softc *sc; 580133808Spjd 581156612Spjd g_topology_assert_not(); 582156612Spjd sc = disk->d_softc; 583156612Spjd sx_assert(&sc->sc_lock, SX_XLOCKED); 584133808Spjd 585133808Spjd if (disk->d_state == G_RAID3_DISK_STATE_NODISK) 586133808Spjd return; 587133808Spjd g_raid3_event_cancel(disk); 588133808Spjd switch (disk->d_state) { 589133808Spjd case G_RAID3_DISK_STATE_SYNCHRONIZING: 590133808Spjd if (sc->sc_syncdisk != NULL) 591133808Spjd g_raid3_sync_stop(sc, 1); 592133808Spjd /* FALLTHROUGH */ 593133808Spjd case G_RAID3_DISK_STATE_NEW: 594133808Spjd case G_RAID3_DISK_STATE_STALE: 595133808Spjd case G_RAID3_DISK_STATE_ACTIVE: 596156612Spjd g_topology_lock(); 597133808Spjd g_raid3_disconnect_consumer(sc, disk->d_consumer); 598156612Spjd g_topology_unlock(); 599133808Spjd disk->d_consumer = NULL; 600133808Spjd break; 601133808Spjd default: 602133808Spjd KASSERT(0 == 1, ("Wrong disk state (%s, %s).", 603133808Spjd g_raid3_get_diskname(disk), 604133808Spjd g_raid3_disk_state2str(disk->d_state))); 605133808Spjd } 606133808Spjd disk->d_state = G_RAID3_DISK_STATE_NODISK; 607133808Spjd} 608133808Spjd 609133808Spjdstatic void 610133808Spjdg_raid3_destroy_device(struct g_raid3_softc *sc) 611133808Spjd{ 612133808Spjd struct g_raid3_event *ep; 613137257Spjd struct g_raid3_disk *disk; 614133808Spjd struct g_geom *gp; 615133808Spjd struct g_consumer *cp; 616133808Spjd u_int n; 617133808Spjd 618156612Spjd g_topology_assert_not(); 619156612Spjd sx_assert(&sc->sc_lock, SX_XLOCKED); 620133808Spjd 621133808Spjd gp = sc->sc_geom; 622133808Spjd if (sc->sc_provider != NULL) 623133808Spjd g_raid3_destroy_provider(sc); 624137257Spjd for (n = 0; n < sc->sc_ndisks; n++) { 625137257Spjd disk = &sc->sc_disks[n]; 626139144Spjd if (disk->d_state != G_RAID3_DISK_STATE_NODISK) { 627139144Spjd disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY; 628139144Spjd g_raid3_update_metadata(disk); 629139144Spjd g_raid3_destroy_disk(disk); 630139144Spjd } 631137257Spjd } 632133808Spjd while ((ep = g_raid3_event_get(sc)) != NULL) { 633139144Spjd g_raid3_event_remove(sc, ep); 634133808Spjd if ((ep->e_flags & G_RAID3_EVENT_DONTWAIT) != 0) 635133808Spjd g_raid3_event_free(ep); 636133808Spjd else { 637133808Spjd ep->e_error = ECANCELED; 638133808Spjd ep->e_flags |= G_RAID3_EVENT_DONE; 639133808Spjd G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, ep); 640133808Spjd mtx_lock(&sc->sc_events_mtx); 641133808Spjd wakeup(ep); 642133808Spjd mtx_unlock(&sc->sc_events_mtx); 643133808Spjd } 644133808Spjd } 645133808Spjd callout_drain(&sc->sc_callout); 646133808Spjd cp = LIST_FIRST(&sc->sc_sync.ds_geom->consumer); 647156612Spjd g_topology_lock(); 648133808Spjd if (cp != NULL) 649133808Spjd g_raid3_disconnect_consumer(sc, cp); 650133808Spjd g_wither_geom(sc->sc_sync.ds_geom, ENXIO); 651156612Spjd G_RAID3_DEBUG(0, "Device %s destroyed.", gp->name); 652156612Spjd g_wither_geom(gp, ENXIO); 653156612Spjd g_topology_unlock(); 654160203Spjd if (!g_raid3_use_malloc) { 655160203Spjd uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_64K].sz_zone); 656160203Spjd uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_16K].sz_zone); 657160203Spjd uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_4K].sz_zone); 658160203Spjd } 659133808Spjd mtx_destroy(&sc->sc_queue_mtx); 660133808Spjd mtx_destroy(&sc->sc_events_mtx); 661156612Spjd sx_xunlock(&sc->sc_lock); 662156612Spjd sx_destroy(&sc->sc_lock); 663133808Spjd} 664133808Spjd 665133808Spjdstatic void 666133808Spjdg_raid3_orphan(struct g_consumer *cp) 667133808Spjd{ 668133808Spjd struct g_raid3_disk *disk; 669133808Spjd 670133808Spjd g_topology_assert(); 671133808Spjd 672133808Spjd disk = cp->private; 673133808Spjd if (disk == NULL) 674133808Spjd return; 675139671Spjd disk->d_softc->sc_bump_id = G_RAID3_BUMP_SYNCID; 676133808Spjd g_raid3_event_send(disk, G_RAID3_DISK_STATE_DISCONNECTED, 677133808Spjd G_RAID3_EVENT_DONTWAIT); 678133808Spjd} 679133808Spjd 680133808Spjdstatic int 681133808Spjdg_raid3_write_metadata(struct g_raid3_disk *disk, struct g_raid3_metadata *md) 682133808Spjd{ 683133808Spjd struct g_raid3_softc *sc; 684133808Spjd struct g_consumer *cp; 685133808Spjd off_t offset, length; 686133808Spjd u_char *sector; 687139144Spjd int error = 0; 688133808Spjd 689156612Spjd g_topology_assert_not(); 690156612Spjd sc = disk->d_softc; 691156612Spjd sx_assert(&sc->sc_lock, SX_LOCKED); 692133808Spjd 693133808Spjd cp = disk->d_consumer; 694133808Spjd KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name)); 695133808Spjd KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name)); 696156612Spjd KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 697139144Spjd ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr, 698139144Spjd cp->acw, cp->ace)); 699133808Spjd length = cp->provider->sectorsize; 700133808Spjd offset = cp->provider->mediasize - length; 701133808Spjd sector = malloc((size_t)length, M_RAID3, M_WAITOK | M_ZERO); 702139144Spjd if (md != NULL) 703139144Spjd raid3_metadata_encode(md, sector); 704139144Spjd error = g_write_data(cp, offset, sector, length); 705133808Spjd free(sector, M_RAID3); 706133808Spjd if (error != 0) { 707162832Spjd if ((disk->d_flags & G_RAID3_DISK_FLAG_BROKEN) == 0) { 708162832Spjd G_RAID3_DEBUG(0, "Cannot write metadata on %s " 709162832Spjd "(device=%s, error=%d).", 710162832Spjd g_raid3_get_diskname(disk), sc->sc_name, error); 711162832Spjd disk->d_flags |= G_RAID3_DISK_FLAG_BROKEN; 712162832Spjd } else { 713162832Spjd G_RAID3_DEBUG(1, "Cannot write metadata on %s " 714162832Spjd "(device=%s, error=%d).", 715162832Spjd g_raid3_get_diskname(disk), sc->sc_name, error); 716162832Spjd } 717162832Spjd if (g_raid3_disconnect_on_failure && 718155546Spjd sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) { 719162832Spjd sc->sc_bump_id |= G_RAID3_BUMP_GENID; 720162832Spjd g_raid3_event_send(disk, 721162832Spjd G_RAID3_DISK_STATE_DISCONNECTED, 722162832Spjd G_RAID3_EVENT_DONTWAIT); 723162832Spjd } 724133808Spjd } 725133808Spjd return (error); 726133808Spjd} 727133808Spjd 728133808Spjdint 729133808Spjdg_raid3_clear_metadata(struct g_raid3_disk *disk) 730133808Spjd{ 731133808Spjd int error; 732133808Spjd 733156612Spjd g_topology_assert_not(); 734156612Spjd sx_assert(&disk->d_softc->sc_lock, SX_LOCKED); 735156612Spjd 736133808Spjd error = g_raid3_write_metadata(disk, NULL); 737133808Spjd if (error == 0) { 738133808Spjd G_RAID3_DEBUG(2, "Metadata on %s cleared.", 739133808Spjd g_raid3_get_diskname(disk)); 740133808Spjd } else { 741133808Spjd G_RAID3_DEBUG(0, 742133808Spjd "Cannot clear metadata on disk %s (error=%d).", 743133808Spjd g_raid3_get_diskname(disk), error); 744133808Spjd } 745133808Spjd return (error); 746133808Spjd} 747133808Spjd 748133808Spjdvoid 749133808Spjdg_raid3_fill_metadata(struct g_raid3_disk *disk, struct g_raid3_metadata *md) 750133808Spjd{ 751133808Spjd struct g_raid3_softc *sc; 752142727Spjd struct g_provider *pp; 753133808Spjd 754133808Spjd sc = disk->d_softc; 755133808Spjd strlcpy(md->md_magic, G_RAID3_MAGIC, sizeof(md->md_magic)); 756133808Spjd md->md_version = G_RAID3_VERSION; 757133808Spjd strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name)); 758133808Spjd md->md_id = sc->sc_id; 759133808Spjd md->md_all = sc->sc_ndisks; 760139295Spjd md->md_genid = sc->sc_genid; 761133808Spjd md->md_mediasize = sc->sc_mediasize; 762133808Spjd md->md_sectorsize = sc->sc_sectorsize; 763133808Spjd md->md_mflags = (sc->sc_flags & G_RAID3_DEVICE_FLAG_MASK); 764133808Spjd md->md_no = disk->d_no; 765133808Spjd md->md_syncid = disk->d_sync.ds_syncid; 766133808Spjd md->md_dflags = (disk->d_flags & G_RAID3_DISK_FLAG_MASK); 767157838Spjd if (disk->d_state != G_RAID3_DISK_STATE_SYNCHRONIZING) 768133808Spjd md->md_sync_offset = 0; 769157838Spjd else { 770157838Spjd md->md_sync_offset = 771157838Spjd disk->d_sync.ds_offset_done / (sc->sc_ndisks - 1); 772157838Spjd } 773142727Spjd if (disk->d_consumer != NULL && disk->d_consumer->provider != NULL) 774142727Spjd pp = disk->d_consumer->provider; 775142727Spjd else 776142727Spjd pp = NULL; 777142727Spjd if ((disk->d_flags & G_RAID3_DISK_FLAG_HARDCODED) != 0 && pp != NULL) 778142727Spjd strlcpy(md->md_provider, pp->name, sizeof(md->md_provider)); 779142727Spjd else 780133808Spjd bzero(md->md_provider, sizeof(md->md_provider)); 781142727Spjd if (pp != NULL) 782142727Spjd md->md_provsize = pp->mediasize; 783142727Spjd else 784142727Spjd md->md_provsize = 0; 785133808Spjd} 786133808Spjd 787133808Spjdvoid 788133808Spjdg_raid3_update_metadata(struct g_raid3_disk *disk) 789133808Spjd{ 790156612Spjd struct g_raid3_softc *sc; 791133808Spjd struct g_raid3_metadata md; 792133808Spjd int error; 793133808Spjd 794156612Spjd g_topology_assert_not(); 795156612Spjd sc = disk->d_softc; 796156612Spjd sx_assert(&sc->sc_lock, SX_LOCKED); 797156612Spjd 798133808Spjd g_raid3_fill_metadata(disk, &md); 799133808Spjd error = g_raid3_write_metadata(disk, &md); 800133808Spjd if (error == 0) { 801133808Spjd G_RAID3_DEBUG(2, "Metadata on %s updated.", 802133808Spjd g_raid3_get_diskname(disk)); 803133808Spjd } else { 804133808Spjd G_RAID3_DEBUG(0, 805133808Spjd "Cannot update metadata on disk %s (error=%d).", 806133808Spjd g_raid3_get_diskname(disk), error); 807133808Spjd } 808133808Spjd} 809133808Spjd 810133808Spjdstatic void 811139144Spjdg_raid3_bump_syncid(struct g_raid3_softc *sc) 812133808Spjd{ 813133808Spjd struct g_raid3_disk *disk; 814133808Spjd u_int n; 815133808Spjd 816156612Spjd g_topology_assert_not(); 817156612Spjd sx_assert(&sc->sc_lock, SX_XLOCKED); 818133808Spjd KASSERT(g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) > 0, 819133808Spjd ("%s called with no active disks (device=%s).", __func__, 820133808Spjd sc->sc_name)); 821133808Spjd 822133808Spjd sc->sc_syncid++; 823139295Spjd G_RAID3_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name, 824139295Spjd sc->sc_syncid); 825133808Spjd for (n = 0; n < sc->sc_ndisks; n++) { 826133808Spjd disk = &sc->sc_disks[n]; 827133808Spjd if (disk->d_state == G_RAID3_DISK_STATE_ACTIVE || 828133808Spjd disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) { 829133808Spjd disk->d_sync.ds_syncid = sc->sc_syncid; 830133808Spjd g_raid3_update_metadata(disk); 831133808Spjd } 832133808Spjd } 833133808Spjd} 834133808Spjd 835137258Spjdstatic void 836139295Spjdg_raid3_bump_genid(struct g_raid3_softc *sc) 837139295Spjd{ 838139295Spjd struct g_raid3_disk *disk; 839139295Spjd u_int n; 840139295Spjd 841156612Spjd g_topology_assert_not(); 842156612Spjd sx_assert(&sc->sc_lock, SX_XLOCKED); 843139295Spjd KASSERT(g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) > 0, 844139295Spjd ("%s called with no active disks (device=%s).", __func__, 845139295Spjd sc->sc_name)); 846139295Spjd 847139295Spjd sc->sc_genid++; 848139295Spjd G_RAID3_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name, 849139295Spjd sc->sc_genid); 850139295Spjd for (n = 0; n < sc->sc_ndisks; n++) { 851139295Spjd disk = &sc->sc_disks[n]; 852139295Spjd if (disk->d_state == G_RAID3_DISK_STATE_ACTIVE || 853139295Spjd disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) { 854139295Spjd disk->d_genid = sc->sc_genid; 855139295Spjd g_raid3_update_metadata(disk); 856139295Spjd } 857139295Spjd } 858139295Spjd} 859139295Spjd 860155540Spjdstatic int 861156612Spjdg_raid3_idle(struct g_raid3_softc *sc, int acw) 862137258Spjd{ 863137258Spjd struct g_raid3_disk *disk; 864137258Spjd u_int i; 865155540Spjd int timeout; 866137258Spjd 867156612Spjd g_topology_assert_not(); 868156612Spjd sx_assert(&sc->sc_lock, SX_XLOCKED); 869156612Spjd 870155540Spjd if (sc->sc_provider == NULL) 871155540Spjd return (0); 872163888Spjd if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) != 0) 873163888Spjd return (0); 874155540Spjd if (sc->sc_idle) 875155540Spjd return (0); 876155540Spjd if (sc->sc_writes > 0) 877155540Spjd return (0); 878156612Spjd if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) { 879155581Spjd timeout = g_raid3_idletime - (time_uptime - sc->sc_last_write); 880245444Smav if (!g_raid3_shutdown && timeout > 0) 881155540Spjd return (timeout); 882155540Spjd } 883137258Spjd sc->sc_idle = 1; 884137258Spjd for (i = 0; i < sc->sc_ndisks; i++) { 885137258Spjd disk = &sc->sc_disks[i]; 886137258Spjd if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) 887137258Spjd continue; 888137258Spjd G_RAID3_DEBUG(1, "Disk %s (device %s) marked as clean.", 889137258Spjd g_raid3_get_diskname(disk), sc->sc_name); 890137258Spjd disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY; 891137258Spjd g_raid3_update_metadata(disk); 892137258Spjd } 893155540Spjd return (0); 894137258Spjd} 895137258Spjd 896137258Spjdstatic void 897137258Spjdg_raid3_unidle(struct g_raid3_softc *sc) 898137258Spjd{ 899137258Spjd struct g_raid3_disk *disk; 900137258Spjd u_int i; 901137258Spjd 902156612Spjd g_topology_assert_not(); 903156612Spjd sx_assert(&sc->sc_lock, SX_XLOCKED); 904156612Spjd 905163888Spjd if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) != 0) 906163888Spjd return; 907137258Spjd sc->sc_idle = 0; 908155581Spjd sc->sc_last_write = time_uptime; 909137258Spjd for (i = 0; i < sc->sc_ndisks; i++) { 910137258Spjd disk = &sc->sc_disks[i]; 911137258Spjd if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) 912137258Spjd continue; 913137258Spjd G_RAID3_DEBUG(1, "Disk %s (device %s) marked as dirty.", 914137258Spjd g_raid3_get_diskname(disk), sc->sc_name); 915137258Spjd disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY; 916137258Spjd g_raid3_update_metadata(disk); 917137258Spjd } 918137258Spjd} 919137258Spjd 920155174Spjd/* 921133808Spjd * Treat bio_driver1 field in parent bio as list head and field bio_caller1 922133808Spjd * in child bio as pointer to the next element on the list. 923133808Spjd */ 924133808Spjd#define G_RAID3_HEAD_BIO(pbp) (pbp)->bio_driver1 925133808Spjd 926133808Spjd#define G_RAID3_NEXT_BIO(cbp) (cbp)->bio_caller1 927133808Spjd 928133808Spjd#define G_RAID3_FOREACH_BIO(pbp, bp) \ 929133808Spjd for ((bp) = G_RAID3_HEAD_BIO(pbp); (bp) != NULL; \ 930133808Spjd (bp) = G_RAID3_NEXT_BIO(bp)) 931133808Spjd 932133808Spjd#define G_RAID3_FOREACH_SAFE_BIO(pbp, bp, tmpbp) \ 933133808Spjd for ((bp) = G_RAID3_HEAD_BIO(pbp); \ 934133808Spjd (bp) != NULL && ((tmpbp) = G_RAID3_NEXT_BIO(bp), 1); \ 935133808Spjd (bp) = (tmpbp)) 936133808Spjd 937133808Spjdstatic void 938133808Spjdg_raid3_init_bio(struct bio *pbp) 939133808Spjd{ 940133808Spjd 941133808Spjd G_RAID3_HEAD_BIO(pbp) = NULL; 942133808Spjd} 943133808Spjd 944133808Spjdstatic void 945134168Spjdg_raid3_remove_bio(struct bio *cbp) 946134168Spjd{ 947134168Spjd struct bio *pbp, *bp; 948134168Spjd 949134168Spjd pbp = cbp->bio_parent; 950134168Spjd if (G_RAID3_HEAD_BIO(pbp) == cbp) 951134168Spjd G_RAID3_HEAD_BIO(pbp) = G_RAID3_NEXT_BIO(cbp); 952134168Spjd else { 953134168Spjd G_RAID3_FOREACH_BIO(pbp, bp) { 954134168Spjd if (G_RAID3_NEXT_BIO(bp) == cbp) { 955134168Spjd G_RAID3_NEXT_BIO(bp) = G_RAID3_NEXT_BIO(cbp); 956134168Spjd break; 957134168Spjd } 958134168Spjd } 959134168Spjd } 960134168Spjd G_RAID3_NEXT_BIO(cbp) = NULL; 961134168Spjd} 962134168Spjd 963134168Spjdstatic void 964134168Spjdg_raid3_replace_bio(struct bio *sbp, struct bio *dbp) 965134168Spjd{ 966134168Spjd struct bio *pbp, *bp; 967134168Spjd 968134168Spjd g_raid3_remove_bio(sbp); 969134168Spjd pbp = dbp->bio_parent; 970134168Spjd G_RAID3_NEXT_BIO(sbp) = G_RAID3_NEXT_BIO(dbp); 971134168Spjd if (G_RAID3_HEAD_BIO(pbp) == dbp) 972134168Spjd G_RAID3_HEAD_BIO(pbp) = sbp; 973134168Spjd else { 974134168Spjd G_RAID3_FOREACH_BIO(pbp, bp) { 975134168Spjd if (G_RAID3_NEXT_BIO(bp) == dbp) { 976134168Spjd G_RAID3_NEXT_BIO(bp) = sbp; 977134168Spjd break; 978134168Spjd } 979134168Spjd } 980134168Spjd } 981134168Spjd G_RAID3_NEXT_BIO(dbp) = NULL; 982134168Spjd} 983134168Spjd 984134168Spjdstatic void 985133808Spjdg_raid3_destroy_bio(struct g_raid3_softc *sc, struct bio *cbp) 986133808Spjd{ 987133808Spjd struct bio *bp, *pbp; 988133808Spjd size_t size; 989133808Spjd 990133808Spjd pbp = cbp->bio_parent; 991133808Spjd pbp->bio_children--; 992133808Spjd KASSERT(cbp->bio_data != NULL, ("NULL bio_data")); 993133808Spjd size = pbp->bio_length / (sc->sc_ndisks - 1); 994160203Spjd g_raid3_free(sc, cbp->bio_data, size); 995133808Spjd if (G_RAID3_HEAD_BIO(pbp) == cbp) { 996133808Spjd G_RAID3_HEAD_BIO(pbp) = G_RAID3_NEXT_BIO(cbp); 997133808Spjd G_RAID3_NEXT_BIO(cbp) = NULL; 998133808Spjd g_destroy_bio(cbp); 999133808Spjd } else { 1000133808Spjd G_RAID3_FOREACH_BIO(pbp, bp) { 1001133808Spjd if (G_RAID3_NEXT_BIO(bp) == cbp) 1002133808Spjd break; 1003133808Spjd } 1004134168Spjd if (bp != NULL) { 1005134168Spjd KASSERT(G_RAID3_NEXT_BIO(bp) != NULL, 1006134168Spjd ("NULL bp->bio_driver1")); 1007134168Spjd G_RAID3_NEXT_BIO(bp) = G_RAID3_NEXT_BIO(cbp); 1008134168Spjd G_RAID3_NEXT_BIO(cbp) = NULL; 1009134168Spjd } 1010133808Spjd g_destroy_bio(cbp); 1011133808Spjd } 1012133808Spjd} 1013133808Spjd 1014133808Spjdstatic struct bio * 1015133808Spjdg_raid3_clone_bio(struct g_raid3_softc *sc, struct bio *pbp) 1016133808Spjd{ 1017133808Spjd struct bio *bp, *cbp; 1018133808Spjd size_t size; 1019156612Spjd int memflag; 1020133808Spjd 1021133808Spjd cbp = g_clone_bio(pbp); 1022133808Spjd if (cbp == NULL) 1023133808Spjd return (NULL); 1024133808Spjd size = pbp->bio_length / (sc->sc_ndisks - 1); 1025156612Spjd if ((pbp->bio_cflags & G_RAID3_BIO_CFLAG_REGULAR) != 0) 1026156612Spjd memflag = M_WAITOK; 1027156612Spjd else 1028156612Spjd memflag = M_NOWAIT; 1029160203Spjd cbp->bio_data = g_raid3_alloc(sc, size, memflag); 1030133808Spjd if (cbp->bio_data == NULL) { 1031133808Spjd pbp->bio_children--; 1032133808Spjd g_destroy_bio(cbp); 1033133808Spjd return (NULL); 1034133808Spjd } 1035133808Spjd G_RAID3_NEXT_BIO(cbp) = NULL; 1036133808Spjd if (G_RAID3_HEAD_BIO(pbp) == NULL) 1037133808Spjd G_RAID3_HEAD_BIO(pbp) = cbp; 1038133808Spjd else { 1039133808Spjd G_RAID3_FOREACH_BIO(pbp, bp) { 1040133808Spjd if (G_RAID3_NEXT_BIO(bp) == NULL) { 1041133808Spjd G_RAID3_NEXT_BIO(bp) = cbp; 1042133808Spjd break; 1043133808Spjd } 1044133808Spjd } 1045133808Spjd } 1046133808Spjd return (cbp); 1047133808Spjd} 1048133808Spjd 1049133808Spjdstatic void 1050133808Spjdg_raid3_scatter(struct bio *pbp) 1051133808Spjd{ 1052133808Spjd struct g_raid3_softc *sc; 1053133808Spjd struct g_raid3_disk *disk; 1054158290Spjd struct bio *bp, *cbp, *tmpbp; 1055133808Spjd off_t atom, cadd, padd, left; 1056201545Smav int first; 1057133808Spjd 1058133808Spjd sc = pbp->bio_to->geom->softc; 1059133808Spjd bp = NULL; 1060133808Spjd if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_NOPARITY) == 0) { 1061133808Spjd /* 1062133808Spjd * Find bio for which we should calculate data. 1063133808Spjd */ 1064133808Spjd G_RAID3_FOREACH_BIO(pbp, cbp) { 1065133808Spjd if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) { 1066133808Spjd bp = cbp; 1067133808Spjd break; 1068133808Spjd } 1069133808Spjd } 1070133808Spjd KASSERT(bp != NULL, ("NULL parity bio.")); 1071133808Spjd } 1072133808Spjd atom = sc->sc_sectorsize / (sc->sc_ndisks - 1); 1073133808Spjd cadd = padd = 0; 1074133808Spjd for (left = pbp->bio_length; left > 0; left -= sc->sc_sectorsize) { 1075133808Spjd G_RAID3_FOREACH_BIO(pbp, cbp) { 1076133808Spjd if (cbp == bp) 1077133808Spjd continue; 1078133808Spjd bcopy(pbp->bio_data + padd, cbp->bio_data + cadd, atom); 1079133808Spjd padd += atom; 1080133808Spjd } 1081133808Spjd cadd += atom; 1082133808Spjd } 1083133808Spjd if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_NOPARITY) == 0) { 1084133808Spjd /* 1085133808Spjd * Calculate parity. 1086133808Spjd */ 1087201545Smav first = 1; 1088133808Spjd G_RAID3_FOREACH_SAFE_BIO(pbp, cbp, tmpbp) { 1089133808Spjd if (cbp == bp) 1090133808Spjd continue; 1091201545Smav if (first) { 1092201545Smav bcopy(cbp->bio_data, bp->bio_data, 1093201545Smav bp->bio_length); 1094201545Smav first = 0; 1095201545Smav } else { 1096201545Smav g_raid3_xor(cbp->bio_data, bp->bio_data, 1097201545Smav bp->bio_length); 1098201545Smav } 1099133808Spjd if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_NODISK) != 0) 1100133808Spjd g_raid3_destroy_bio(sc, cbp); 1101133808Spjd } 1102133808Spjd } 1103158290Spjd G_RAID3_FOREACH_SAFE_BIO(pbp, cbp, tmpbp) { 1104133808Spjd struct g_consumer *cp; 1105133808Spjd 1106133808Spjd disk = cbp->bio_caller2; 1107133808Spjd cp = disk->d_consumer; 1108133808Spjd cbp->bio_to = cp->provider; 1109133808Spjd G_RAID3_LOGREQ(3, cbp, "Sending request."); 1110156612Spjd KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 1111139144Spjd ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, 1112139144Spjd cp->acr, cp->acw, cp->ace)); 1113137256Spjd cp->index++; 1114155540Spjd sc->sc_writes++; 1115133808Spjd g_io_request(cbp, cp); 1116133808Spjd } 1117133808Spjd} 1118133808Spjd 1119133808Spjdstatic void 1120133808Spjdg_raid3_gather(struct bio *pbp) 1121133808Spjd{ 1122133808Spjd struct g_raid3_softc *sc; 1123133808Spjd struct g_raid3_disk *disk; 1124134124Spjd struct bio *xbp, *fbp, *cbp; 1125133808Spjd off_t atom, cadd, padd, left; 1126133808Spjd 1127133808Spjd sc = pbp->bio_to->geom->softc; 1128134124Spjd /* 1129134124Spjd * Find bio for which we have to calculate data. 1130134124Spjd * While going through this path, check if all requests 1131134124Spjd * succeeded, if not, deny whole request. 1132134124Spjd * If we're in COMPLETE mode, we allow one request to fail, 1133134124Spjd * so if we find one, we're sending it to the parity consumer. 1134134124Spjd * If there are more failed requests, we deny whole request. 1135134124Spjd */ 1136134124Spjd xbp = fbp = NULL; 1137134124Spjd G_RAID3_FOREACH_BIO(pbp, cbp) { 1138134124Spjd if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) { 1139134124Spjd KASSERT(xbp == NULL, ("More than one parity bio.")); 1140134124Spjd xbp = cbp; 1141134124Spjd } 1142134124Spjd if (cbp->bio_error == 0) 1143134124Spjd continue; 1144133808Spjd /* 1145134124Spjd * Found failed request. 1146133808Spjd */ 1147134124Spjd if (fbp == NULL) { 1148134124Spjd if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_DEGRADED) != 0) { 1149133808Spjd /* 1150134124Spjd * We are already in degraded mode, so we can't 1151134124Spjd * accept any failures. 1152133808Spjd */ 1153134124Spjd if (pbp->bio_error == 0) 1154155544Spjd pbp->bio_error = cbp->bio_error; 1155134124Spjd } else { 1156134124Spjd fbp = cbp; 1157133808Spjd } 1158134124Spjd } else { 1159133808Spjd /* 1160134124Spjd * Next failed request, that's too many. 1161133808Spjd */ 1162134124Spjd if (pbp->bio_error == 0) 1163134124Spjd pbp->bio_error = fbp->bio_error; 1164134124Spjd } 1165155546Spjd disk = cbp->bio_caller2; 1166155546Spjd if (disk == NULL) 1167155546Spjd continue; 1168155546Spjd if ((disk->d_flags & G_RAID3_DISK_FLAG_BROKEN) == 0) { 1169155546Spjd disk->d_flags |= G_RAID3_DISK_FLAG_BROKEN; 1170155546Spjd G_RAID3_LOGREQ(0, cbp, "Request failed (error=%d).", 1171155546Spjd cbp->bio_error); 1172155546Spjd } else { 1173155546Spjd G_RAID3_LOGREQ(1, cbp, "Request failed (error=%d).", 1174155546Spjd cbp->bio_error); 1175155546Spjd } 1176155546Spjd if (g_raid3_disconnect_on_failure && 1177155546Spjd sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) { 1178155546Spjd sc->sc_bump_id |= G_RAID3_BUMP_GENID; 1179155546Spjd g_raid3_event_send(disk, 1180155546Spjd G_RAID3_DISK_STATE_DISCONNECTED, 1181155546Spjd G_RAID3_EVENT_DONTWAIT); 1182155546Spjd } 1183134124Spjd } 1184134124Spjd if (pbp->bio_error != 0) 1185134124Spjd goto finish; 1186134168Spjd if (fbp != NULL && (pbp->bio_pflags & G_RAID3_BIO_PFLAG_VERIFY) != 0) { 1187134168Spjd pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_VERIFY; 1188134168Spjd if (xbp != fbp) 1189134168Spjd g_raid3_replace_bio(xbp, fbp); 1190134168Spjd g_raid3_destroy_bio(sc, fbp); 1191134168Spjd } else if (fbp != NULL) { 1192134124Spjd struct g_consumer *cp; 1193134124Spjd 1194134124Spjd /* 1195134124Spjd * One request failed, so send the same request to 1196134124Spjd * the parity consumer. 1197134124Spjd */ 1198134124Spjd disk = pbp->bio_driver2; 1199134124Spjd if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) { 1200134124Spjd pbp->bio_error = fbp->bio_error; 1201133808Spjd goto finish; 1202133808Spjd } 1203134124Spjd pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED; 1204134124Spjd pbp->bio_inbed--; 1205134124Spjd fbp->bio_flags &= ~(BIO_DONE | BIO_ERROR); 1206134124Spjd if (disk->d_no == sc->sc_ndisks - 1) 1207134124Spjd fbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY; 1208134124Spjd fbp->bio_error = 0; 1209134124Spjd fbp->bio_completed = 0; 1210134124Spjd fbp->bio_children = 0; 1211134124Spjd fbp->bio_inbed = 0; 1212134124Spjd cp = disk->d_consumer; 1213134124Spjd fbp->bio_caller2 = disk; 1214134124Spjd fbp->bio_to = cp->provider; 1215134124Spjd G_RAID3_LOGREQ(3, fbp, "Sending request (recover)."); 1216156612Spjd KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 1217134124Spjd ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, 1218134124Spjd cp->acr, cp->acw, cp->ace)); 1219137256Spjd cp->index++; 1220134124Spjd g_io_request(fbp, cp); 1221134124Spjd return; 1222134124Spjd } 1223134124Spjd if (xbp != NULL) { 1224133808Spjd /* 1225133808Spjd * Calculate parity. 1226133808Spjd */ 1227133808Spjd G_RAID3_FOREACH_BIO(pbp, cbp) { 1228133808Spjd if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) 1229133808Spjd continue; 1230201545Smav g_raid3_xor(cbp->bio_data, xbp->bio_data, 1231134124Spjd xbp->bio_length); 1232133808Spjd } 1233134124Spjd xbp->bio_cflags &= ~G_RAID3_BIO_CFLAG_PARITY; 1234134168Spjd if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_VERIFY) != 0) { 1235134168Spjd if (!g_raid3_is_zero(xbp)) { 1236134168Spjd g_raid3_parity_mismatch++; 1237134168Spjd pbp->bio_error = EIO; 1238134168Spjd goto finish; 1239134168Spjd } 1240134168Spjd g_raid3_destroy_bio(sc, xbp); 1241134168Spjd } 1242133808Spjd } 1243133808Spjd atom = sc->sc_sectorsize / (sc->sc_ndisks - 1); 1244133808Spjd cadd = padd = 0; 1245133808Spjd for (left = pbp->bio_length; left > 0; left -= sc->sc_sectorsize) { 1246133808Spjd G_RAID3_FOREACH_BIO(pbp, cbp) { 1247133808Spjd bcopy(cbp->bio_data + cadd, pbp->bio_data + padd, atom); 1248133808Spjd pbp->bio_completed += atom; 1249133808Spjd padd += atom; 1250133808Spjd } 1251133808Spjd cadd += atom; 1252133808Spjd } 1253133808Spjdfinish: 1254133808Spjd if (pbp->bio_error == 0) 1255133808Spjd G_RAID3_LOGREQ(3, pbp, "Request finished."); 1256134303Spjd else { 1257134303Spjd if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_VERIFY) != 0) 1258134303Spjd G_RAID3_LOGREQ(1, pbp, "Verification error."); 1259134303Spjd else 1260134303Spjd G_RAID3_LOGREQ(0, pbp, "Request failed."); 1261134303Spjd } 1262134168Spjd pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_MASK; 1263133808Spjd while ((cbp = G_RAID3_HEAD_BIO(pbp)) != NULL) 1264133808Spjd g_raid3_destroy_bio(sc, cbp); 1265155906Spjd g_io_deliver(pbp, pbp->bio_error); 1266133808Spjd} 1267133808Spjd 1268133808Spjdstatic void 1269133808Spjdg_raid3_done(struct bio *bp) 1270133808Spjd{ 1271133808Spjd struct g_raid3_softc *sc; 1272133808Spjd 1273133808Spjd sc = bp->bio_from->geom->softc; 1274155174Spjd bp->bio_cflags |= G_RAID3_BIO_CFLAG_REGULAR; 1275133808Spjd G_RAID3_LOGREQ(3, bp, "Regular request done (error=%d).", bp->bio_error); 1276133808Spjd mtx_lock(&sc->sc_queue_mtx); 1277133808Spjd bioq_insert_head(&sc->sc_queue, bp); 1278201567Smav mtx_unlock(&sc->sc_queue_mtx); 1279133808Spjd wakeup(sc); 1280133808Spjd wakeup(&sc->sc_queue); 1281133808Spjd} 1282133808Spjd 1283133808Spjdstatic void 1284133808Spjdg_raid3_regular_request(struct bio *cbp) 1285133808Spjd{ 1286133808Spjd struct g_raid3_softc *sc; 1287133808Spjd struct g_raid3_disk *disk; 1288133808Spjd struct bio *pbp; 1289133808Spjd 1290133808Spjd g_topology_assert_not(); 1291133808Spjd 1292133808Spjd pbp = cbp->bio_parent; 1293133808Spjd sc = pbp->bio_to->geom->softc; 1294155540Spjd cbp->bio_from->index--; 1295155540Spjd if (cbp->bio_cmd == BIO_WRITE) 1296155540Spjd sc->sc_writes--; 1297133808Spjd disk = cbp->bio_from->private; 1298133808Spjd if (disk == NULL) { 1299133808Spjd g_topology_lock(); 1300133808Spjd g_raid3_kill_consumer(sc, cbp->bio_from); 1301133808Spjd g_topology_unlock(); 1302133808Spjd } 1303133808Spjd 1304133808Spjd G_RAID3_LOGREQ(3, cbp, "Request finished."); 1305133808Spjd pbp->bio_inbed++; 1306133808Spjd KASSERT(pbp->bio_inbed <= pbp->bio_children, 1307133808Spjd ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed, 1308133808Spjd pbp->bio_children)); 1309133808Spjd if (pbp->bio_inbed != pbp->bio_children) 1310133808Spjd return; 1311133808Spjd switch (pbp->bio_cmd) { 1312133808Spjd case BIO_READ: 1313133808Spjd g_raid3_gather(pbp); 1314133808Spjd break; 1315133808Spjd case BIO_WRITE: 1316133808Spjd case BIO_DELETE: 1317133808Spjd { 1318133808Spjd int error = 0; 1319133808Spjd 1320133808Spjd pbp->bio_completed = pbp->bio_length; 1321133808Spjd while ((cbp = G_RAID3_HEAD_BIO(pbp)) != NULL) { 1322155546Spjd if (cbp->bio_error == 0) { 1323155546Spjd g_raid3_destroy_bio(sc, cbp); 1324155546Spjd continue; 1325133808Spjd } 1326155546Spjd 1327155546Spjd if (error == 0) 1328155546Spjd error = cbp->bio_error; 1329155546Spjd else if (pbp->bio_error == 0) { 1330155546Spjd /* 1331155546Spjd * Next failed request, that's too many. 1332155546Spjd */ 1333155546Spjd pbp->bio_error = error; 1334155546Spjd } 1335155546Spjd 1336155546Spjd disk = cbp->bio_caller2; 1337155546Spjd if (disk == NULL) { 1338155546Spjd g_raid3_destroy_bio(sc, cbp); 1339155546Spjd continue; 1340155546Spjd } 1341155546Spjd 1342155546Spjd if ((disk->d_flags & G_RAID3_DISK_FLAG_BROKEN) == 0) { 1343155546Spjd disk->d_flags |= G_RAID3_DISK_FLAG_BROKEN; 1344155546Spjd G_RAID3_LOGREQ(0, cbp, 1345155546Spjd "Request failed (error=%d).", 1346155546Spjd cbp->bio_error); 1347155546Spjd } else { 1348155546Spjd G_RAID3_LOGREQ(1, cbp, 1349155546Spjd "Request failed (error=%d).", 1350155546Spjd cbp->bio_error); 1351155546Spjd } 1352155546Spjd if (g_raid3_disconnect_on_failure && 1353155546Spjd sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) { 1354155546Spjd sc->sc_bump_id |= G_RAID3_BUMP_GENID; 1355155546Spjd g_raid3_event_send(disk, 1356155546Spjd G_RAID3_DISK_STATE_DISCONNECTED, 1357155546Spjd G_RAID3_EVENT_DONTWAIT); 1358155546Spjd } 1359133808Spjd g_raid3_destroy_bio(sc, cbp); 1360133808Spjd } 1361133808Spjd if (pbp->bio_error == 0) 1362133808Spjd G_RAID3_LOGREQ(3, pbp, "Request finished."); 1363133808Spjd else 1364133808Spjd G_RAID3_LOGREQ(0, pbp, "Request failed."); 1365133808Spjd pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_DEGRADED; 1366133808Spjd pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_NOPARITY; 1367156612Spjd bioq_remove(&sc->sc_inflight, pbp); 1368156612Spjd /* Release delayed sync requests if possible. */ 1369156612Spjd g_raid3_sync_release(sc); 1370133808Spjd g_io_deliver(pbp, pbp->bio_error); 1371133808Spjd break; 1372133808Spjd } 1373133808Spjd } 1374133808Spjd} 1375133808Spjd 1376133808Spjdstatic void 1377133808Spjdg_raid3_sync_done(struct bio *bp) 1378133808Spjd{ 1379133808Spjd struct g_raid3_softc *sc; 1380133808Spjd 1381133808Spjd G_RAID3_LOGREQ(3, bp, "Synchronization request delivered."); 1382133808Spjd sc = bp->bio_from->geom->softc; 1383133808Spjd bp->bio_cflags |= G_RAID3_BIO_CFLAG_SYNC; 1384133808Spjd mtx_lock(&sc->sc_queue_mtx); 1385133808Spjd bioq_insert_head(&sc->sc_queue, bp); 1386201567Smav mtx_unlock(&sc->sc_queue_mtx); 1387133808Spjd wakeup(sc); 1388133808Spjd wakeup(&sc->sc_queue); 1389133808Spjd} 1390133808Spjd 1391133808Spjdstatic void 1392163836Spjdg_raid3_flush(struct g_raid3_softc *sc, struct bio *bp) 1393163836Spjd{ 1394163836Spjd struct bio_queue_head queue; 1395163836Spjd struct g_raid3_disk *disk; 1396163836Spjd struct g_consumer *cp; 1397163836Spjd struct bio *cbp; 1398163836Spjd u_int i; 1399163836Spjd 1400163836Spjd bioq_init(&queue); 1401163836Spjd for (i = 0; i < sc->sc_ndisks; i++) { 1402163836Spjd disk = &sc->sc_disks[i]; 1403163836Spjd if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) 1404163836Spjd continue; 1405163836Spjd cbp = g_clone_bio(bp); 1406163836Spjd if (cbp == NULL) { 1407163836Spjd for (cbp = bioq_first(&queue); cbp != NULL; 1408163836Spjd cbp = bioq_first(&queue)) { 1409163836Spjd bioq_remove(&queue, cbp); 1410163836Spjd g_destroy_bio(cbp); 1411163836Spjd } 1412163836Spjd if (bp->bio_error == 0) 1413163836Spjd bp->bio_error = ENOMEM; 1414163836Spjd g_io_deliver(bp, bp->bio_error); 1415163836Spjd return; 1416163836Spjd } 1417163836Spjd bioq_insert_tail(&queue, cbp); 1418163836Spjd cbp->bio_done = g_std_done; 1419163836Spjd cbp->bio_caller1 = disk; 1420163836Spjd cbp->bio_to = disk->d_consumer->provider; 1421163836Spjd } 1422163836Spjd for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) { 1423163836Spjd bioq_remove(&queue, cbp); 1424163836Spjd G_RAID3_LOGREQ(3, cbp, "Sending request."); 1425163836Spjd disk = cbp->bio_caller1; 1426163836Spjd cbp->bio_caller1 = NULL; 1427163836Spjd cp = disk->d_consumer; 1428163836Spjd KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 1429163836Spjd ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, 1430163836Spjd cp->acr, cp->acw, cp->ace)); 1431163836Spjd g_io_request(cbp, disk->d_consumer); 1432163836Spjd } 1433163836Spjd} 1434163836Spjd 1435163836Spjdstatic void 1436133808Spjdg_raid3_start(struct bio *bp) 1437133808Spjd{ 1438133808Spjd struct g_raid3_softc *sc; 1439133808Spjd 1440133808Spjd sc = bp->bio_to->geom->softc; 1441133808Spjd /* 1442133808Spjd * If sc == NULL or there are no valid disks, provider's error 1443133808Spjd * should be set and g_raid3_start() should not be called at all. 1444133808Spjd */ 1445133808Spjd KASSERT(sc != NULL && (sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED || 1446133808Spjd sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE), 1447133808Spjd ("Provider's error should be set (error=%d)(device=%s).", 1448133808Spjd bp->bio_to->error, bp->bio_to->name)); 1449133808Spjd G_RAID3_LOGREQ(3, bp, "Request received."); 1450133808Spjd 1451133808Spjd switch (bp->bio_cmd) { 1452133808Spjd case BIO_READ: 1453133808Spjd case BIO_WRITE: 1454133808Spjd case BIO_DELETE: 1455133808Spjd break; 1456163836Spjd case BIO_FLUSH: 1457163836Spjd g_raid3_flush(sc, bp); 1458163836Spjd return; 1459133808Spjd case BIO_GETATTR: 1460133808Spjd default: 1461133808Spjd g_io_deliver(bp, EOPNOTSUPP); 1462133808Spjd return; 1463133808Spjd } 1464133808Spjd mtx_lock(&sc->sc_queue_mtx); 1465133808Spjd bioq_insert_tail(&sc->sc_queue, bp); 1466201567Smav mtx_unlock(&sc->sc_queue_mtx); 1467133808Spjd G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, sc); 1468133808Spjd wakeup(sc); 1469133808Spjd} 1470133808Spjd 1471133808Spjd/* 1472156612Spjd * Return TRUE if the given request is colliding with a in-progress 1473156612Spjd * synchronization request. 1474133808Spjd */ 1475156612Spjdstatic int 1476156612Spjdg_raid3_sync_collision(struct g_raid3_softc *sc, struct bio *bp) 1477133808Spjd{ 1478133808Spjd struct g_raid3_disk *disk; 1479156612Spjd struct bio *sbp; 1480156612Spjd off_t rstart, rend, sstart, send; 1481156612Spjd int i; 1482133808Spjd 1483133808Spjd disk = sc->sc_syncdisk; 1484156612Spjd if (disk == NULL) 1485156612Spjd return (0); 1486156612Spjd rstart = bp->bio_offset; 1487156612Spjd rend = bp->bio_offset + bp->bio_length; 1488156612Spjd for (i = 0; i < g_raid3_syncreqs; i++) { 1489156612Spjd sbp = disk->d_sync.ds_bios[i]; 1490156612Spjd if (sbp == NULL) 1491156612Spjd continue; 1492156612Spjd sstart = sbp->bio_offset; 1493156612Spjd send = sbp->bio_length; 1494156612Spjd if (sbp->bio_cmd == BIO_WRITE) { 1495156612Spjd sstart *= sc->sc_ndisks - 1; 1496156612Spjd send *= sc->sc_ndisks - 1; 1497156612Spjd } 1498156612Spjd send += sstart; 1499156612Spjd if (rend > sstart && rstart < send) 1500156612Spjd return (1); 1501156612Spjd } 1502156612Spjd return (0); 1503156612Spjd} 1504133808Spjd 1505156612Spjd/* 1506156612Spjd * Return TRUE if the given sync request is colliding with a in-progress regular 1507156612Spjd * request. 1508156612Spjd */ 1509156612Spjdstatic int 1510156612Spjdg_raid3_regular_collision(struct g_raid3_softc *sc, struct bio *sbp) 1511156612Spjd{ 1512156612Spjd off_t rstart, rend, sstart, send; 1513156612Spjd struct bio *bp; 1514156612Spjd 1515156612Spjd if (sc->sc_syncdisk == NULL) 1516156612Spjd return (0); 1517156612Spjd sstart = sbp->bio_offset; 1518156612Spjd send = sstart + sbp->bio_length; 1519156612Spjd TAILQ_FOREACH(bp, &sc->sc_inflight.queue, bio_queue) { 1520156612Spjd rstart = bp->bio_offset; 1521156612Spjd rend = bp->bio_offset + bp->bio_length; 1522156612Spjd if (rend > sstart && rstart < send) 1523156612Spjd return (1); 1524133808Spjd } 1525156612Spjd return (0); 1526133808Spjd} 1527133808Spjd 1528156612Spjd/* 1529156612Spjd * Puts request onto delayed queue. 1530156612Spjd */ 1531133808Spjdstatic void 1532156612Spjdg_raid3_regular_delay(struct g_raid3_softc *sc, struct bio *bp) 1533156612Spjd{ 1534156612Spjd 1535163886Spjd G_RAID3_LOGREQ(2, bp, "Delaying request."); 1536163886Spjd bioq_insert_head(&sc->sc_regular_delayed, bp); 1537156612Spjd} 1538156612Spjd 1539156612Spjd/* 1540156612Spjd * Puts synchronization request onto delayed queue. 1541156612Spjd */ 1542156612Spjdstatic void 1543156612Spjdg_raid3_sync_delay(struct g_raid3_softc *sc, struct bio *bp) 1544156612Spjd{ 1545156612Spjd 1546163886Spjd G_RAID3_LOGREQ(2, bp, "Delaying synchronization request."); 1547163886Spjd bioq_insert_tail(&sc->sc_sync_delayed, bp); 1548156612Spjd} 1549156612Spjd 1550156612Spjd/* 1551156612Spjd * Releases delayed regular requests which don't collide anymore with sync 1552156612Spjd * requests. 1553156612Spjd */ 1554156612Spjdstatic void 1555156612Spjdg_raid3_regular_release(struct g_raid3_softc *sc) 1556156612Spjd{ 1557163886Spjd struct bio *bp, *bp2; 1558156612Spjd 1559163886Spjd TAILQ_FOREACH_SAFE(bp, &sc->sc_regular_delayed.queue, bio_queue, bp2) { 1560163886Spjd if (g_raid3_sync_collision(sc, bp)) 1561163886Spjd continue; 1562163886Spjd bioq_remove(&sc->sc_regular_delayed, bp); 1563163886Spjd G_RAID3_LOGREQ(2, bp, "Releasing delayed request (%p).", bp); 1564156612Spjd mtx_lock(&sc->sc_queue_mtx); 1565156612Spjd bioq_insert_head(&sc->sc_queue, bp); 1566156612Spjd#if 0 1567156612Spjd /* 1568156612Spjd * wakeup() is not needed, because this function is called from 1569156612Spjd * the worker thread. 1570156612Spjd */ 1571156612Spjd wakeup(&sc->sc_queue); 1572156612Spjd#endif 1573156612Spjd mtx_unlock(&sc->sc_queue_mtx); 1574163886Spjd } 1575156612Spjd} 1576156612Spjd 1577156612Spjd/* 1578156612Spjd * Releases delayed sync requests which don't collide anymore with regular 1579156612Spjd * requests. 1580156612Spjd */ 1581156612Spjdstatic void 1582156612Spjdg_raid3_sync_release(struct g_raid3_softc *sc) 1583156612Spjd{ 1584163886Spjd struct bio *bp, *bp2; 1585156612Spjd 1586163886Spjd TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed.queue, bio_queue, bp2) { 1587163886Spjd if (g_raid3_regular_collision(sc, bp)) 1588163886Spjd continue; 1589163886Spjd bioq_remove(&sc->sc_sync_delayed, bp); 1590163886Spjd G_RAID3_LOGREQ(2, bp, 1591163886Spjd "Releasing delayed synchronization request."); 1592163886Spjd g_io_request(bp, bp->bio_from); 1593163886Spjd } 1594156612Spjd} 1595156612Spjd 1596156612Spjd/* 1597156612Spjd * Handle synchronization requests. 1598156612Spjd * Every synchronization request is two-steps process: first, READ request is 1599156612Spjd * send to active provider and then WRITE request (with read data) to the provider 1600156612Spjd * beeing synchronized. When WRITE is finished, new synchronization request is 1601156612Spjd * send. 1602156612Spjd */ 1603156612Spjdstatic void 1604133808Spjdg_raid3_sync_request(struct bio *bp) 1605133808Spjd{ 1606133808Spjd struct g_raid3_softc *sc; 1607133808Spjd struct g_raid3_disk *disk; 1608133808Spjd 1609137256Spjd bp->bio_from->index--; 1610133808Spjd sc = bp->bio_from->geom->softc; 1611133808Spjd disk = bp->bio_from->private; 1612133808Spjd if (disk == NULL) { 1613156612Spjd sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */ 1614133808Spjd g_topology_lock(); 1615133808Spjd g_raid3_kill_consumer(sc, bp->bio_from); 1616133808Spjd g_topology_unlock(); 1617156612Spjd free(bp->bio_data, M_RAID3); 1618133808Spjd g_destroy_bio(bp); 1619156612Spjd sx_xlock(&sc->sc_lock); 1620133808Spjd return; 1621133808Spjd } 1622133808Spjd 1623133808Spjd /* 1624133808Spjd * Synchronization request. 1625133808Spjd */ 1626133808Spjd switch (bp->bio_cmd) { 1627133808Spjd case BIO_READ: 1628133808Spjd { 1629133808Spjd struct g_consumer *cp; 1630133808Spjd u_char *dst, *src; 1631133808Spjd off_t left; 1632133808Spjd u_int atom; 1633133808Spjd 1634133808Spjd if (bp->bio_error != 0) { 1635133808Spjd G_RAID3_LOGREQ(0, bp, 1636133808Spjd "Synchronization request failed (error=%d).", 1637133808Spjd bp->bio_error); 1638133808Spjd g_destroy_bio(bp); 1639133808Spjd return; 1640133808Spjd } 1641133808Spjd G_RAID3_LOGREQ(3, bp, "Synchronization request finished."); 1642133808Spjd atom = sc->sc_sectorsize / (sc->sc_ndisks - 1); 1643133808Spjd dst = src = bp->bio_data; 1644133808Spjd if (disk->d_no == sc->sc_ndisks - 1) { 1645133808Spjd u_int n; 1646133808Spjd 1647133808Spjd /* Parity component. */ 1648133808Spjd for (left = bp->bio_length; left > 0; 1649133808Spjd left -= sc->sc_sectorsize) { 1650133808Spjd bcopy(src, dst, atom); 1651133808Spjd src += atom; 1652133808Spjd for (n = 1; n < sc->sc_ndisks - 1; n++) { 1653201545Smav g_raid3_xor(src, dst, atom); 1654133808Spjd src += atom; 1655133808Spjd } 1656133808Spjd dst += atom; 1657133808Spjd } 1658133808Spjd } else { 1659133808Spjd /* Regular component. */ 1660133808Spjd src += atom * disk->d_no; 1661133808Spjd for (left = bp->bio_length; left > 0; 1662133808Spjd left -= sc->sc_sectorsize) { 1663133808Spjd bcopy(src, dst, atom); 1664133808Spjd src += sc->sc_sectorsize; 1665133808Spjd dst += atom; 1666133808Spjd } 1667133808Spjd } 1668156612Spjd bp->bio_driver1 = bp->bio_driver2 = NULL; 1669156612Spjd bp->bio_pflags = 0; 1670133808Spjd bp->bio_offset /= sc->sc_ndisks - 1; 1671133808Spjd bp->bio_length /= sc->sc_ndisks - 1; 1672133808Spjd bp->bio_cmd = BIO_WRITE; 1673133808Spjd bp->bio_cflags = 0; 1674133808Spjd bp->bio_children = bp->bio_inbed = 0; 1675133808Spjd cp = disk->d_consumer; 1676156612Spjd KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 1677133808Spjd ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, 1678133808Spjd cp->acr, cp->acw, cp->ace)); 1679137256Spjd cp->index++; 1680133808Spjd g_io_request(bp, cp); 1681133808Spjd return; 1682133808Spjd } 1683133808Spjd case BIO_WRITE: 1684135863Spjd { 1685135863Spjd struct g_raid3_disk_sync *sync; 1686156612Spjd off_t boffset, moffset; 1687156612Spjd void *data; 1688156612Spjd int i; 1689135863Spjd 1690133808Spjd if (bp->bio_error != 0) { 1691133808Spjd G_RAID3_LOGREQ(0, bp, 1692133808Spjd "Synchronization request failed (error=%d).", 1693133808Spjd bp->bio_error); 1694133808Spjd g_destroy_bio(bp); 1695139671Spjd sc->sc_bump_id |= G_RAID3_BUMP_GENID; 1696133808Spjd g_raid3_event_send(disk, 1697133808Spjd G_RAID3_DISK_STATE_DISCONNECTED, 1698133808Spjd G_RAID3_EVENT_DONTWAIT); 1699133808Spjd return; 1700133808Spjd } 1701133808Spjd G_RAID3_LOGREQ(3, bp, "Synchronization request finished."); 1702135863Spjd sync = &disk->d_sync; 1703156612Spjd if (sync->ds_offset == sc->sc_mediasize / (sc->sc_ndisks - 1) || 1704156612Spjd sync->ds_consumer == NULL || 1705156612Spjd (sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) != 0) { 1706156612Spjd /* Don't send more synchronization requests. */ 1707156612Spjd sync->ds_inflight--; 1708156612Spjd if (sync->ds_bios != NULL) { 1709156684Sru i = (int)(uintptr_t)bp->bio_caller1; 1710156612Spjd sync->ds_bios[i] = NULL; 1711156612Spjd } 1712156612Spjd free(bp->bio_data, M_RAID3); 1713156612Spjd g_destroy_bio(bp); 1714156612Spjd if (sync->ds_inflight > 0) 1715156612Spjd return; 1716156612Spjd if (sync->ds_consumer == NULL || 1717156612Spjd (sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) != 0) { 1718156612Spjd return; 1719156612Spjd } 1720133808Spjd /* 1721133808Spjd * Disk up-to-date, activate it. 1722133808Spjd */ 1723133808Spjd g_raid3_event_send(disk, G_RAID3_DISK_STATE_ACTIVE, 1724133808Spjd G_RAID3_EVENT_DONTWAIT); 1725133808Spjd return; 1726156612Spjd } 1727156612Spjd 1728156612Spjd /* Send next synchronization request. */ 1729156612Spjd data = bp->bio_data; 1730156612Spjd bzero(bp, sizeof(*bp)); 1731156612Spjd bp->bio_cmd = BIO_READ; 1732156612Spjd bp->bio_offset = sync->ds_offset * (sc->sc_ndisks - 1); 1733156612Spjd bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset); 1734156612Spjd sync->ds_offset += bp->bio_length / (sc->sc_ndisks - 1); 1735156612Spjd bp->bio_done = g_raid3_sync_done; 1736156612Spjd bp->bio_data = data; 1737156612Spjd bp->bio_from = sync->ds_consumer; 1738156612Spjd bp->bio_to = sc->sc_provider; 1739156612Spjd G_RAID3_LOGREQ(3, bp, "Sending synchronization request."); 1740156612Spjd sync->ds_consumer->index++; 1741156612Spjd /* 1742156612Spjd * Delay the request if it is colliding with a regular request. 1743156612Spjd */ 1744156612Spjd if (g_raid3_regular_collision(sc, bp)) 1745156612Spjd g_raid3_sync_delay(sc, bp); 1746156612Spjd else 1747156612Spjd g_io_request(bp, sync->ds_consumer); 1748156612Spjd 1749156612Spjd /* Release delayed requests if possible. */ 1750156612Spjd g_raid3_regular_release(sc); 1751156612Spjd 1752156612Spjd /* Find the smallest offset. */ 1753156612Spjd moffset = sc->sc_mediasize; 1754156612Spjd for (i = 0; i < g_raid3_syncreqs; i++) { 1755156612Spjd bp = sync->ds_bios[i]; 1756156612Spjd boffset = bp->bio_offset; 1757156612Spjd if (bp->bio_cmd == BIO_WRITE) 1758156612Spjd boffset *= sc->sc_ndisks - 1; 1759156612Spjd if (boffset < moffset) 1760156612Spjd moffset = boffset; 1761156612Spjd } 1762156612Spjd if (sync->ds_offset_done + (MAXPHYS * 100) < moffset) { 1763156612Spjd /* Update offset_done on every 100 blocks. */ 1764156612Spjd sync->ds_offset_done = moffset; 1765133808Spjd g_raid3_update_metadata(disk); 1766133808Spjd } 1767133808Spjd return; 1768135863Spjd } 1769133808Spjd default: 1770133808Spjd KASSERT(1 == 0, ("Invalid command here: %u (device=%s)", 1771133808Spjd bp->bio_cmd, sc->sc_name)); 1772133808Spjd break; 1773133808Spjd } 1774133808Spjd} 1775133808Spjd 1776133808Spjdstatic int 1777133808Spjdg_raid3_register_request(struct bio *pbp) 1778133808Spjd{ 1779133808Spjd struct g_raid3_softc *sc; 1780133808Spjd struct g_raid3_disk *disk; 1781133808Spjd struct g_consumer *cp; 1782158290Spjd struct bio *cbp, *tmpbp; 1783133808Spjd off_t offset, length; 1784133839Sobrien u_int n, ndisks; 1785134168Spjd int round_robin, verify; 1786133808Spjd 1787133839Sobrien ndisks = 0; 1788133808Spjd sc = pbp->bio_to->geom->softc; 1789133808Spjd if ((pbp->bio_cflags & G_RAID3_BIO_CFLAG_REGSYNC) != 0 && 1790133808Spjd sc->sc_syncdisk == NULL) { 1791133808Spjd g_io_deliver(pbp, EIO); 1792133808Spjd return (0); 1793133808Spjd } 1794133808Spjd g_raid3_init_bio(pbp); 1795133808Spjd length = pbp->bio_length / (sc->sc_ndisks - 1); 1796133808Spjd offset = pbp->bio_offset / (sc->sc_ndisks - 1); 1797134168Spjd round_robin = verify = 0; 1798133808Spjd switch (pbp->bio_cmd) { 1799133808Spjd case BIO_READ: 1800134168Spjd if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_VERIFY) != 0 && 1801134168Spjd sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) { 1802134168Spjd pbp->bio_pflags |= G_RAID3_BIO_PFLAG_VERIFY; 1803134168Spjd verify = 1; 1804134168Spjd ndisks = sc->sc_ndisks; 1805134168Spjd } else { 1806134168Spjd verify = 0; 1807134168Spjd ndisks = sc->sc_ndisks - 1; 1808134168Spjd } 1809134168Spjd if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0 && 1810134168Spjd sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) { 1811134168Spjd round_robin = 1; 1812134168Spjd } else { 1813134168Spjd round_robin = 0; 1814134168Spjd } 1815134168Spjd KASSERT(!round_robin || !verify, 1816134168Spjd ("ROUND-ROBIN and VERIFY are mutually exclusive.")); 1817134124Spjd pbp->bio_driver2 = &sc->sc_disks[sc->sc_ndisks - 1]; 1818133808Spjd break; 1819133808Spjd case BIO_WRITE: 1820133808Spjd case BIO_DELETE: 1821156612Spjd /* 1822156612Spjd * Delay the request if it is colliding with a synchronization 1823156612Spjd * request. 1824156612Spjd */ 1825156612Spjd if (g_raid3_sync_collision(sc, pbp)) { 1826156612Spjd g_raid3_regular_delay(sc, pbp); 1827156612Spjd return (0); 1828156612Spjd } 1829135863Spjd 1830137258Spjd if (sc->sc_idle) 1831137258Spjd g_raid3_unidle(sc); 1832155540Spjd else 1833155581Spjd sc->sc_last_write = time_uptime; 1834137258Spjd 1835133808Spjd ndisks = sc->sc_ndisks; 1836133808Spjd break; 1837133808Spjd } 1838133808Spjd for (n = 0; n < ndisks; n++) { 1839133808Spjd disk = &sc->sc_disks[n]; 1840133808Spjd cbp = g_raid3_clone_bio(sc, pbp); 1841133808Spjd if (cbp == NULL) { 1842133808Spjd while ((cbp = G_RAID3_HEAD_BIO(pbp)) != NULL) 1843133808Spjd g_raid3_destroy_bio(sc, cbp); 1844151822Spjd /* 1845151822Spjd * To prevent deadlock, we must run back up 1846151822Spjd * with the ENOMEM for failed requests of any 1847151822Spjd * of our consumers. Our own sync requests 1848151822Spjd * can stick around, as they are finite. 1849151822Spjd */ 1850151822Spjd if ((pbp->bio_cflags & 1851151822Spjd G_RAID3_BIO_CFLAG_REGULAR) != 0) { 1852151822Spjd g_io_deliver(pbp, ENOMEM); 1853151822Spjd return (0); 1854151822Spjd } 1855133808Spjd return (ENOMEM); 1856133808Spjd } 1857133808Spjd cbp->bio_offset = offset; 1858133808Spjd cbp->bio_length = length; 1859133808Spjd cbp->bio_done = g_raid3_done; 1860133808Spjd switch (pbp->bio_cmd) { 1861133808Spjd case BIO_READ: 1862133808Spjd if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) { 1863133808Spjd /* 1864133808Spjd * Replace invalid component with the parity 1865133808Spjd * component. 1866133808Spjd */ 1867133808Spjd disk = &sc->sc_disks[sc->sc_ndisks - 1]; 1868133808Spjd cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY; 1869133808Spjd pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED; 1870134124Spjd } else if (round_robin && 1871134124Spjd disk->d_no == sc->sc_round_robin) { 1872134124Spjd /* 1873134124Spjd * In round-robin mode skip one data component 1874134124Spjd * and use parity component when reading. 1875134124Spjd */ 1876134124Spjd pbp->bio_driver2 = disk; 1877134124Spjd disk = &sc->sc_disks[sc->sc_ndisks - 1]; 1878134124Spjd cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY; 1879134124Spjd sc->sc_round_robin++; 1880134124Spjd round_robin = 0; 1881134168Spjd } else if (verify && disk->d_no == sc->sc_ndisks - 1) { 1882134168Spjd cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY; 1883133808Spjd } 1884133808Spjd break; 1885133808Spjd case BIO_WRITE: 1886133808Spjd case BIO_DELETE: 1887133808Spjd if (disk->d_state == G_RAID3_DISK_STATE_ACTIVE || 1888133808Spjd disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) { 1889133808Spjd if (n == ndisks - 1) { 1890133808Spjd /* 1891133808Spjd * Active parity component, mark it as such. 1892133808Spjd */ 1893133808Spjd cbp->bio_cflags |= 1894133808Spjd G_RAID3_BIO_CFLAG_PARITY; 1895133808Spjd } 1896133808Spjd } else { 1897133808Spjd pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED; 1898133808Spjd if (n == ndisks - 1) { 1899133808Spjd /* 1900133808Spjd * Parity component is not connected, 1901133808Spjd * so destroy its request. 1902133808Spjd */ 1903133808Spjd pbp->bio_pflags |= 1904133808Spjd G_RAID3_BIO_PFLAG_NOPARITY; 1905133808Spjd g_raid3_destroy_bio(sc, cbp); 1906133808Spjd cbp = NULL; 1907133808Spjd } else { 1908133808Spjd cbp->bio_cflags |= 1909133808Spjd G_RAID3_BIO_CFLAG_NODISK; 1910133808Spjd disk = NULL; 1911133808Spjd } 1912133808Spjd } 1913133808Spjd break; 1914133808Spjd } 1915133808Spjd if (cbp != NULL) 1916133808Spjd cbp->bio_caller2 = disk; 1917133808Spjd } 1918133808Spjd switch (pbp->bio_cmd) { 1919133808Spjd case BIO_READ: 1920134124Spjd if (round_robin) { 1921134124Spjd /* 1922134124Spjd * If we are in round-robin mode and 'round_robin' is 1923134124Spjd * still 1, it means, that we skipped parity component 1924134124Spjd * for this read and must reset sc_round_robin field. 1925134124Spjd */ 1926134124Spjd sc->sc_round_robin = 0; 1927134124Spjd } 1928158290Spjd G_RAID3_FOREACH_SAFE_BIO(pbp, cbp, tmpbp) { 1929133808Spjd disk = cbp->bio_caller2; 1930133808Spjd cp = disk->d_consumer; 1931133808Spjd cbp->bio_to = cp->provider; 1932133808Spjd G_RAID3_LOGREQ(3, cbp, "Sending request."); 1933156612Spjd KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 1934133808Spjd ("Consumer %s not opened (r%dw%de%d).", 1935133808Spjd cp->provider->name, cp->acr, cp->acw, cp->ace)); 1936137256Spjd cp->index++; 1937133808Spjd g_io_request(cbp, cp); 1938133808Spjd } 1939133808Spjd break; 1940133808Spjd case BIO_WRITE: 1941133808Spjd case BIO_DELETE: 1942133808Spjd /* 1943156612Spjd * Put request onto inflight queue, so we can check if new 1944156612Spjd * synchronization requests don't collide with it. 1945156612Spjd */ 1946156612Spjd bioq_insert_tail(&sc->sc_inflight, pbp); 1947156612Spjd 1948156612Spjd /* 1949133808Spjd * Bump syncid on first write. 1950133808Spjd */ 1951139671Spjd if ((sc->sc_bump_id & G_RAID3_BUMP_SYNCID) != 0) { 1952139295Spjd sc->sc_bump_id &= ~G_RAID3_BUMP_SYNCID; 1953139144Spjd g_raid3_bump_syncid(sc); 1954133808Spjd } 1955133808Spjd g_raid3_scatter(pbp); 1956133808Spjd break; 1957133808Spjd } 1958133808Spjd return (0); 1959133808Spjd} 1960133808Spjd 1961133808Spjdstatic int 1962133808Spjdg_raid3_can_destroy(struct g_raid3_softc *sc) 1963133808Spjd{ 1964133808Spjd struct g_geom *gp; 1965133808Spjd struct g_consumer *cp; 1966155174Spjd 1967133808Spjd g_topology_assert(); 1968133808Spjd gp = sc->sc_geom; 1969158114Spjd if (gp->softc == NULL) 1970158114Spjd return (1); 1971133808Spjd LIST_FOREACH(cp, &gp->consumer, consumer) { 1972133808Spjd if (g_raid3_is_busy(sc, cp)) 1973133808Spjd return (0); 1974133808Spjd } 1975133808Spjd gp = sc->sc_sync.ds_geom; 1976133808Spjd LIST_FOREACH(cp, &gp->consumer, consumer) { 1977133808Spjd if (g_raid3_is_busy(sc, cp)) 1978133808Spjd return (0); 1979133808Spjd } 1980133808Spjd G_RAID3_DEBUG(2, "No I/O requests for %s, it can be destroyed.", 1981133808Spjd sc->sc_name); 1982133808Spjd return (1); 1983133808Spjd} 1984155174Spjd 1985133808Spjdstatic int 1986133808Spjdg_raid3_try_destroy(struct g_raid3_softc *sc) 1987133808Spjd{ 1988155174Spjd 1989156612Spjd g_topology_assert_not(); 1990156612Spjd sx_assert(&sc->sc_lock, SX_XLOCKED); 1991156612Spjd 1992148440Spjd if (sc->sc_rootmount != NULL) { 1993148440Spjd G_RAID3_DEBUG(1, "root_mount_rel[%u] %p", __LINE__, 1994148440Spjd sc->sc_rootmount); 1995148440Spjd root_mount_rel(sc->sc_rootmount); 1996148440Spjd sc->sc_rootmount = NULL; 1997148440Spjd } 1998148440Spjd 1999139295Spjd g_topology_lock(); 2000139295Spjd if (!g_raid3_can_destroy(sc)) { 2001139295Spjd g_topology_unlock(); 2002139295Spjd return (0); 2003139295Spjd } 2004158114Spjd sc->sc_geom->softc = NULL; 2005158114Spjd sc->sc_sync.ds_geom->softc = NULL; 2006133808Spjd if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_WAIT) != 0) { 2007133808Spjd g_topology_unlock(); 2008133808Spjd G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, 2009133808Spjd &sc->sc_worker); 2010156612Spjd /* Unlock sc_lock here, as it can be destroyed after wakeup. */ 2011156612Spjd sx_xunlock(&sc->sc_lock); 2012133808Spjd wakeup(&sc->sc_worker); 2013133808Spjd sc->sc_worker = NULL; 2014133808Spjd } else { 2015156612Spjd g_topology_unlock(); 2016133808Spjd g_raid3_destroy_device(sc); 2017133808Spjd free(sc->sc_disks, M_RAID3); 2018133808Spjd free(sc, M_RAID3); 2019133808Spjd } 2020133808Spjd return (1); 2021133808Spjd} 2022133808Spjd 2023133808Spjd/* 2024133808Spjd * Worker thread. 2025133808Spjd */ 2026133808Spjdstatic void 2027133808Spjdg_raid3_worker(void *arg) 2028133808Spjd{ 2029133808Spjd struct g_raid3_softc *sc; 2030133808Spjd struct g_raid3_event *ep; 2031133808Spjd struct bio *bp; 2032155540Spjd int timeout; 2033133808Spjd 2034133808Spjd sc = arg; 2035170307Sjeff thread_lock(curthread); 2036139451Sjhb sched_prio(curthread, PRIBIO); 2037170307Sjeff thread_unlock(curthread); 2038133808Spjd 2039156612Spjd sx_xlock(&sc->sc_lock); 2040133808Spjd for (;;) { 2041133808Spjd G_RAID3_DEBUG(5, "%s: Let's see...", __func__); 2042133808Spjd /* 2043133808Spjd * First take a look at events. 2044133808Spjd * This is important to handle events before any I/O requests. 2045133808Spjd */ 2046133808Spjd ep = g_raid3_event_get(sc); 2047156612Spjd if (ep != NULL) { 2048139144Spjd g_raid3_event_remove(sc, ep); 2049133808Spjd if ((ep->e_flags & G_RAID3_EVENT_DEVICE) != 0) { 2050133808Spjd /* Update only device status. */ 2051133808Spjd G_RAID3_DEBUG(3, 2052133808Spjd "Running event for device %s.", 2053133808Spjd sc->sc_name); 2054133808Spjd ep->e_error = 0; 2055139144Spjd g_raid3_update_device(sc, 1); 2056133808Spjd } else { 2057133808Spjd /* Update disk status. */ 2058133808Spjd G_RAID3_DEBUG(3, "Running event for disk %s.", 2059133808Spjd g_raid3_get_diskname(ep->e_disk)); 2060133808Spjd ep->e_error = g_raid3_update_disk(ep->e_disk, 2061139144Spjd ep->e_state); 2062133808Spjd if (ep->e_error == 0) 2063139144Spjd g_raid3_update_device(sc, 0); 2064133808Spjd } 2065133808Spjd if ((ep->e_flags & G_RAID3_EVENT_DONTWAIT) != 0) { 2066133808Spjd KASSERT(ep->e_error == 0, 2067133808Spjd ("Error cannot be handled.")); 2068133808Spjd g_raid3_event_free(ep); 2069133808Spjd } else { 2070133808Spjd ep->e_flags |= G_RAID3_EVENT_DONE; 2071133808Spjd G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, 2072133808Spjd ep); 2073133808Spjd mtx_lock(&sc->sc_events_mtx); 2074133808Spjd wakeup(ep); 2075133808Spjd mtx_unlock(&sc->sc_events_mtx); 2076133808Spjd } 2077133808Spjd if ((sc->sc_flags & 2078133808Spjd G_RAID3_DEVICE_FLAG_DESTROY) != 0) { 2079156612Spjd if (g_raid3_try_destroy(sc)) { 2080156612Spjd curthread->td_pflags &= ~TDP_GEOM; 2081156612Spjd G_RAID3_DEBUG(1, "Thread exiting."); 2082172836Sjulian kproc_exit(0); 2083156612Spjd } 2084133808Spjd } 2085133808Spjd G_RAID3_DEBUG(5, "%s: I'm here 1.", __func__); 2086133808Spjd continue; 2087133808Spjd } 2088133808Spjd /* 2089155540Spjd * Check if we can mark array as CLEAN and if we can't take 2090155540Spjd * how much seconds should we wait. 2091155540Spjd */ 2092156612Spjd timeout = g_raid3_idle(sc, -1); 2093155540Spjd /* 2094133808Spjd * Now I/O requests. 2095133808Spjd */ 2096133808Spjd /* Get first request from the queue. */ 2097133808Spjd mtx_lock(&sc->sc_queue_mtx); 2098133808Spjd bp = bioq_first(&sc->sc_queue); 2099133808Spjd if (bp == NULL) { 2100133808Spjd if ((sc->sc_flags & 2101133808Spjd G_RAID3_DEVICE_FLAG_DESTROY) != 0) { 2102133808Spjd mtx_unlock(&sc->sc_queue_mtx); 2103156612Spjd if (g_raid3_try_destroy(sc)) { 2104156612Spjd curthread->td_pflags &= ~TDP_GEOM; 2105157134Spjd G_RAID3_DEBUG(1, "Thread exiting."); 2106172836Sjulian kproc_exit(0); 2107156612Spjd } 2108133808Spjd mtx_lock(&sc->sc_queue_mtx); 2109133808Spjd } 2110156612Spjd sx_xunlock(&sc->sc_lock); 2111158116Spjd /* 2112158116Spjd * XXX: We can miss an event here, because an event 2113158116Spjd * can be added without sx-device-lock and without 2114158116Spjd * mtx-queue-lock. Maybe I should just stop using 2115158116Spjd * dedicated mutex for events synchronization and 2116158116Spjd * stick with the queue lock? 2117158116Spjd * The event will hang here until next I/O request 2118158116Spjd * or next event is received. 2119158116Spjd */ 2120155540Spjd MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "r3:w1", 2121155540Spjd timeout * hz); 2122156612Spjd sx_xlock(&sc->sc_lock); 2123155540Spjd G_RAID3_DEBUG(5, "%s: I'm here 4.", __func__); 2124133808Spjd continue; 2125133808Spjd } 2126158117Spjdprocess: 2127133808Spjd bioq_remove(&sc->sc_queue, bp); 2128133808Spjd mtx_unlock(&sc->sc_queue_mtx); 2129133808Spjd 2130162282Spjd if (bp->bio_from->geom == sc->sc_sync.ds_geom && 2131162282Spjd (bp->bio_cflags & G_RAID3_BIO_CFLAG_SYNC) != 0) { 2132162282Spjd g_raid3_sync_request(bp); /* READ */ 2133162282Spjd } else if (bp->bio_to != sc->sc_provider) { 2134161116Spjd if ((bp->bio_cflags & G_RAID3_BIO_CFLAG_REGULAR) != 0) 2135161116Spjd g_raid3_regular_request(bp); 2136161116Spjd else if ((bp->bio_cflags & G_RAID3_BIO_CFLAG_SYNC) != 0) 2137162282Spjd g_raid3_sync_request(bp); /* WRITE */ 2138161116Spjd else { 2139161116Spjd KASSERT(0, 2140161116Spjd ("Invalid request cflags=0x%hhx to=%s.", 2141161116Spjd bp->bio_cflags, bp->bio_to->name)); 2142161116Spjd } 2143161116Spjd } else if (g_raid3_register_request(bp) != 0) { 2144158117Spjd mtx_lock(&sc->sc_queue_mtx); 2145158117Spjd bioq_insert_head(&sc->sc_queue, bp); 2146158117Spjd /* 2147158117Spjd * We are short in memory, let see if there are finished 2148158117Spjd * request we can free. 2149158117Spjd */ 2150158117Spjd TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) { 2151158117Spjd if (bp->bio_cflags & G_RAID3_BIO_CFLAG_REGULAR) 2152158117Spjd goto process; 2153133808Spjd } 2154158117Spjd /* 2155158117Spjd * No finished regular request, so at least keep 2156158117Spjd * synchronization running. 2157158117Spjd */ 2158158117Spjd TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) { 2159158117Spjd if (bp->bio_cflags & G_RAID3_BIO_CFLAG_SYNC) 2160158117Spjd goto process; 2161158117Spjd } 2162158117Spjd sx_xunlock(&sc->sc_lock); 2163158117Spjd MSLEEP(&sc->sc_queue, &sc->sc_queue_mtx, PRIBIO | PDROP, 2164158117Spjd "r3:lowmem", hz / 10); 2165158117Spjd sx_xlock(&sc->sc_lock); 2166133808Spjd } 2167139144Spjd G_RAID3_DEBUG(5, "%s: I'm here 9.", __func__); 2168133808Spjd } 2169133808Spjd} 2170133808Spjd 2171133808Spjdstatic void 2172155540Spjdg_raid3_update_idle(struct g_raid3_softc *sc, struct g_raid3_disk *disk) 2173133808Spjd{ 2174133808Spjd 2175156612Spjd sx_assert(&sc->sc_lock, SX_LOCKED); 2176163888Spjd if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) != 0) 2177163888Spjd return; 2178155540Spjd if (!sc->sc_idle && (disk->d_flags & G_RAID3_DISK_FLAG_DIRTY) == 0) { 2179155540Spjd G_RAID3_DEBUG(1, "Disk %s (device %s) marked as dirty.", 2180156612Spjd g_raid3_get_diskname(disk), sc->sc_name); 2181155540Spjd disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY; 2182155540Spjd } else if (sc->sc_idle && 2183155540Spjd (disk->d_flags & G_RAID3_DISK_FLAG_DIRTY) != 0) { 2184155540Spjd G_RAID3_DEBUG(1, "Disk %s (device %s) marked as clean.", 2185156612Spjd g_raid3_get_diskname(disk), sc->sc_name); 2186155540Spjd disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY; 2187133808Spjd } 2188133808Spjd} 2189133808Spjd 2190133808Spjdstatic void 2191133808Spjdg_raid3_sync_start(struct g_raid3_softc *sc) 2192133808Spjd{ 2193133808Spjd struct g_raid3_disk *disk; 2194156612Spjd struct g_consumer *cp; 2195156612Spjd struct bio *bp; 2196133808Spjd int error; 2197133808Spjd u_int n; 2198133808Spjd 2199156612Spjd g_topology_assert_not(); 2200156612Spjd sx_assert(&sc->sc_lock, SX_XLOCKED); 2201133808Spjd 2202133808Spjd KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED, 2203133808Spjd ("Device not in DEGRADED state (%s, %u).", sc->sc_name, 2204133808Spjd sc->sc_state)); 2205133808Spjd KASSERT(sc->sc_syncdisk == NULL, ("Syncdisk is not NULL (%s, %u).", 2206133808Spjd sc->sc_name, sc->sc_state)); 2207133808Spjd disk = NULL; 2208133808Spjd for (n = 0; n < sc->sc_ndisks; n++) { 2209133808Spjd if (sc->sc_disks[n].d_state != G_RAID3_DISK_STATE_SYNCHRONIZING) 2210133808Spjd continue; 2211133808Spjd disk = &sc->sc_disks[n]; 2212133808Spjd break; 2213133808Spjd } 2214133808Spjd if (disk == NULL) 2215133808Spjd return; 2216133808Spjd 2217156612Spjd sx_xunlock(&sc->sc_lock); 2218156612Spjd g_topology_lock(); 2219156612Spjd cp = g_new_consumer(sc->sc_sync.ds_geom); 2220156612Spjd error = g_attach(cp, sc->sc_provider); 2221156612Spjd KASSERT(error == 0, 2222156612Spjd ("Cannot attach to %s (error=%d).", sc->sc_name, error)); 2223156612Spjd error = g_access(cp, 1, 0, 0); 2224156612Spjd KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error)); 2225156612Spjd g_topology_unlock(); 2226156612Spjd sx_xlock(&sc->sc_lock); 2227156612Spjd 2228133808Spjd G_RAID3_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name, 2229133808Spjd g_raid3_get_diskname(disk)); 2230163888Spjd if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) == 0) 2231163888Spjd disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY; 2232133808Spjd KASSERT(disk->d_sync.ds_consumer == NULL, 2233133808Spjd ("Sync consumer already exists (device=%s, disk=%s).", 2234133808Spjd sc->sc_name, g_raid3_get_diskname(disk))); 2235156612Spjd 2236156612Spjd disk->d_sync.ds_consumer = cp; 2237133808Spjd disk->d_sync.ds_consumer->private = disk; 2238137256Spjd disk->d_sync.ds_consumer->index = 0; 2239133808Spjd sc->sc_syncdisk = disk; 2240156612Spjd 2241156612Spjd /* 2242156612Spjd * Allocate memory for synchronization bios and initialize them. 2243156612Spjd */ 2244156612Spjd disk->d_sync.ds_bios = malloc(sizeof(struct bio *) * g_raid3_syncreqs, 2245156612Spjd M_RAID3, M_WAITOK); 2246156612Spjd for (n = 0; n < g_raid3_syncreqs; n++) { 2247156612Spjd bp = g_alloc_bio(); 2248156612Spjd disk->d_sync.ds_bios[n] = bp; 2249156612Spjd bp->bio_parent = NULL; 2250156612Spjd bp->bio_cmd = BIO_READ; 2251156612Spjd bp->bio_data = malloc(MAXPHYS, M_RAID3, M_WAITOK); 2252156612Spjd bp->bio_cflags = 0; 2253156612Spjd bp->bio_offset = disk->d_sync.ds_offset * (sc->sc_ndisks - 1); 2254156612Spjd bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset); 2255156612Spjd disk->d_sync.ds_offset += bp->bio_length / (sc->sc_ndisks - 1); 2256156612Spjd bp->bio_done = g_raid3_sync_done; 2257156612Spjd bp->bio_from = disk->d_sync.ds_consumer; 2258156612Spjd bp->bio_to = sc->sc_provider; 2259156684Sru bp->bio_caller1 = (void *)(uintptr_t)n; 2260156612Spjd } 2261156612Spjd 2262156612Spjd /* Set the number of in-flight synchronization requests. */ 2263156612Spjd disk->d_sync.ds_inflight = g_raid3_syncreqs; 2264156612Spjd 2265156612Spjd /* 2266156612Spjd * Fire off first synchronization requests. 2267156612Spjd */ 2268156612Spjd for (n = 0; n < g_raid3_syncreqs; n++) { 2269156612Spjd bp = disk->d_sync.ds_bios[n]; 2270156612Spjd G_RAID3_LOGREQ(3, bp, "Sending synchronization request."); 2271156612Spjd disk->d_sync.ds_consumer->index++; 2272156612Spjd /* 2273156612Spjd * Delay the request if it is colliding with a regular request. 2274156612Spjd */ 2275156612Spjd if (g_raid3_regular_collision(sc, bp)) 2276156612Spjd g_raid3_sync_delay(sc, bp); 2277156612Spjd else 2278156612Spjd g_io_request(bp, disk->d_sync.ds_consumer); 2279156612Spjd } 2280133808Spjd} 2281133808Spjd 2282133808Spjd/* 2283133808Spjd * Stop synchronization process. 2284133808Spjd * type: 0 - synchronization finished 2285133808Spjd * 1 - synchronization stopped 2286133808Spjd */ 2287133808Spjdstatic void 2288133808Spjdg_raid3_sync_stop(struct g_raid3_softc *sc, int type) 2289133808Spjd{ 2290133808Spjd struct g_raid3_disk *disk; 2291156612Spjd struct g_consumer *cp; 2292133808Spjd 2293156612Spjd g_topology_assert_not(); 2294156612Spjd sx_assert(&sc->sc_lock, SX_LOCKED); 2295156612Spjd 2296133808Spjd KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED, 2297133808Spjd ("Device not in DEGRADED state (%s, %u).", sc->sc_name, 2298133808Spjd sc->sc_state)); 2299133808Spjd disk = sc->sc_syncdisk; 2300133808Spjd sc->sc_syncdisk = NULL; 2301133808Spjd KASSERT(disk != NULL, ("No disk was synchronized (%s).", sc->sc_name)); 2302133808Spjd KASSERT(disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING, 2303133808Spjd ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk), 2304133808Spjd g_raid3_disk_state2str(disk->d_state))); 2305133808Spjd if (disk->d_sync.ds_consumer == NULL) 2306133808Spjd return; 2307133808Spjd 2308133808Spjd if (type == 0) { 2309133808Spjd G_RAID3_DEBUG(0, "Device %s: rebuilding provider %s finished.", 2310156612Spjd sc->sc_name, g_raid3_get_diskname(disk)); 2311133808Spjd } else /* if (type == 1) */ { 2312133808Spjd G_RAID3_DEBUG(0, "Device %s: rebuilding provider %s stopped.", 2313156612Spjd sc->sc_name, g_raid3_get_diskname(disk)); 2314133808Spjd } 2315156612Spjd free(disk->d_sync.ds_bios, M_RAID3); 2316156612Spjd disk->d_sync.ds_bios = NULL; 2317156612Spjd cp = disk->d_sync.ds_consumer; 2318133808Spjd disk->d_sync.ds_consumer = NULL; 2319133808Spjd disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY; 2320156612Spjd sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */ 2321156612Spjd g_topology_lock(); 2322156612Spjd g_raid3_kill_consumer(sc, cp); 2323156612Spjd g_topology_unlock(); 2324156612Spjd sx_xlock(&sc->sc_lock); 2325133808Spjd} 2326133808Spjd 2327133808Spjdstatic void 2328133808Spjdg_raid3_launch_provider(struct g_raid3_softc *sc) 2329133808Spjd{ 2330133808Spjd struct g_provider *pp; 2331200940Smav struct g_raid3_disk *disk; 2332200940Smav int n; 2333133808Spjd 2334156612Spjd sx_assert(&sc->sc_lock, SX_LOCKED); 2335133808Spjd 2336156612Spjd g_topology_lock(); 2337133808Spjd pp = g_new_providerf(sc->sc_geom, "raid3/%s", sc->sc_name); 2338133808Spjd pp->mediasize = sc->sc_mediasize; 2339133808Spjd pp->sectorsize = sc->sc_sectorsize; 2340200940Smav pp->stripesize = 0; 2341200940Smav pp->stripeoffset = 0; 2342200940Smav for (n = 0; n < sc->sc_ndisks; n++) { 2343200940Smav disk = &sc->sc_disks[n]; 2344200940Smav if (disk->d_consumer && disk->d_consumer->provider && 2345200940Smav disk->d_consumer->provider->stripesize > pp->stripesize) { 2346200940Smav pp->stripesize = disk->d_consumer->provider->stripesize; 2347200940Smav pp->stripeoffset = disk->d_consumer->provider->stripeoffset; 2348200940Smav } 2349200940Smav } 2350200940Smav pp->stripesize *= sc->sc_ndisks - 1; 2351200940Smav pp->stripeoffset *= sc->sc_ndisks - 1; 2352133808Spjd sc->sc_provider = pp; 2353133808Spjd g_error_provider(pp, 0); 2354156612Spjd g_topology_unlock(); 2355162188Sjmg G_RAID3_DEBUG(0, "Device %s launched (%u/%u).", pp->name, 2356162188Sjmg g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE), sc->sc_ndisks); 2357162835Spjd 2358133808Spjd if (sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED) 2359133808Spjd g_raid3_sync_start(sc); 2360133808Spjd} 2361133808Spjd 2362133808Spjdstatic void 2363133808Spjdg_raid3_destroy_provider(struct g_raid3_softc *sc) 2364133808Spjd{ 2365133808Spjd struct bio *bp; 2366133808Spjd 2367156612Spjd g_topology_assert_not(); 2368133808Spjd KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).", 2369133808Spjd sc->sc_name)); 2370133808Spjd 2371156612Spjd g_topology_lock(); 2372133808Spjd g_error_provider(sc->sc_provider, ENXIO); 2373133808Spjd mtx_lock(&sc->sc_queue_mtx); 2374133808Spjd while ((bp = bioq_first(&sc->sc_queue)) != NULL) { 2375133808Spjd bioq_remove(&sc->sc_queue, bp); 2376133808Spjd g_io_deliver(bp, ENXIO); 2377133808Spjd } 2378133808Spjd mtx_unlock(&sc->sc_queue_mtx); 2379133808Spjd G_RAID3_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name, 2380133808Spjd sc->sc_provider->name); 2381133808Spjd sc->sc_provider->flags |= G_PF_WITHER; 2382133808Spjd g_orphan_provider(sc->sc_provider, ENXIO); 2383156612Spjd g_topology_unlock(); 2384133808Spjd sc->sc_provider = NULL; 2385133808Spjd if (sc->sc_syncdisk != NULL) 2386133808Spjd g_raid3_sync_stop(sc, 1); 2387133808Spjd} 2388133808Spjd 2389133808Spjdstatic void 2390133808Spjdg_raid3_go(void *arg) 2391133808Spjd{ 2392133808Spjd struct g_raid3_softc *sc; 2393133808Spjd 2394133808Spjd sc = arg; 2395133808Spjd G_RAID3_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name); 2396133808Spjd g_raid3_event_send(sc, 0, 2397133808Spjd G_RAID3_EVENT_DONTWAIT | G_RAID3_EVENT_DEVICE); 2398133808Spjd} 2399133808Spjd 2400133808Spjdstatic u_int 2401133808Spjdg_raid3_determine_state(struct g_raid3_disk *disk) 2402133808Spjd{ 2403133808Spjd struct g_raid3_softc *sc; 2404133808Spjd u_int state; 2405133808Spjd 2406133808Spjd sc = disk->d_softc; 2407133808Spjd if (sc->sc_syncid == disk->d_sync.ds_syncid) { 2408133808Spjd if ((disk->d_flags & 2409133808Spjd G_RAID3_DISK_FLAG_SYNCHRONIZING) == 0) { 2410133808Spjd /* Disk does not need synchronization. */ 2411133808Spjd state = G_RAID3_DISK_STATE_ACTIVE; 2412133808Spjd } else { 2413133808Spjd if ((sc->sc_flags & 2414156876Spjd G_RAID3_DEVICE_FLAG_NOAUTOSYNC) == 0 || 2415133808Spjd (disk->d_flags & 2416133808Spjd G_RAID3_DISK_FLAG_FORCE_SYNC) != 0) { 2417133808Spjd /* 2418133808Spjd * We can start synchronization from 2419133808Spjd * the stored offset. 2420133808Spjd */ 2421133808Spjd state = G_RAID3_DISK_STATE_SYNCHRONIZING; 2422133808Spjd } else { 2423133808Spjd state = G_RAID3_DISK_STATE_STALE; 2424133808Spjd } 2425133808Spjd } 2426133808Spjd } else if (disk->d_sync.ds_syncid < sc->sc_syncid) { 2427133808Spjd /* 2428133808Spjd * Reset all synchronization data for this disk, 2429133808Spjd * because if it even was synchronized, it was 2430133808Spjd * synchronized to disks with different syncid. 2431133808Spjd */ 2432133808Spjd disk->d_flags |= G_RAID3_DISK_FLAG_SYNCHRONIZING; 2433133808Spjd disk->d_sync.ds_offset = 0; 2434133808Spjd disk->d_sync.ds_offset_done = 0; 2435133808Spjd disk->d_sync.ds_syncid = sc->sc_syncid; 2436133808Spjd if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOAUTOSYNC) == 0 || 2437133808Spjd (disk->d_flags & G_RAID3_DISK_FLAG_FORCE_SYNC) != 0) { 2438133808Spjd state = G_RAID3_DISK_STATE_SYNCHRONIZING; 2439133808Spjd } else { 2440133808Spjd state = G_RAID3_DISK_STATE_STALE; 2441133808Spjd } 2442133808Spjd } else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ { 2443133808Spjd /* 2444133808Spjd * Not good, NOT GOOD! 2445133808Spjd * It means that device was started on stale disks 2446133808Spjd * and more fresh disk just arrive. 2447160895Spjd * If there were writes, device is broken, sorry. 2448133808Spjd * I think the best choice here is don't touch 2449160964Syar * this disk and inform the user loudly. 2450133808Spjd */ 2451133808Spjd G_RAID3_DEBUG(0, "Device %s was started before the freshest " 2452133808Spjd "disk (%s) arrives!! It will not be connected to the " 2453133808Spjd "running device.", sc->sc_name, 2454133808Spjd g_raid3_get_diskname(disk)); 2455133808Spjd g_raid3_destroy_disk(disk); 2456133808Spjd state = G_RAID3_DISK_STATE_NONE; 2457133808Spjd /* Return immediately, because disk was destroyed. */ 2458133808Spjd return (state); 2459133808Spjd } 2460133808Spjd G_RAID3_DEBUG(3, "State for %s disk: %s.", 2461133808Spjd g_raid3_get_diskname(disk), g_raid3_disk_state2str(state)); 2462133808Spjd return (state); 2463133808Spjd} 2464133808Spjd 2465133808Spjd/* 2466133808Spjd * Update device state. 2467133808Spjd */ 2468133808Spjdstatic void 2469139144Spjdg_raid3_update_device(struct g_raid3_softc *sc, boolean_t force) 2470133808Spjd{ 2471133808Spjd struct g_raid3_disk *disk; 2472133808Spjd u_int state; 2473133808Spjd 2474156612Spjd sx_assert(&sc->sc_lock, SX_XLOCKED); 2475133808Spjd 2476133808Spjd switch (sc->sc_state) { 2477133808Spjd case G_RAID3_DEVICE_STATE_STARTING: 2478133808Spjd { 2479139295Spjd u_int n, ndirty, ndisks, genid, syncid; 2480133808Spjd 2481133808Spjd KASSERT(sc->sc_provider == NULL, 2482133808Spjd ("Non-NULL provider in STARTING state (%s).", sc->sc_name)); 2483133808Spjd /* 2484133808Spjd * Are we ready? We are, if all disks are connected or 2485133808Spjd * one disk is missing and 'force' is true. 2486133808Spjd */ 2487133808Spjd if (g_raid3_ndisks(sc, -1) + force == sc->sc_ndisks) { 2488133808Spjd if (!force) 2489133808Spjd callout_drain(&sc->sc_callout); 2490133808Spjd } else { 2491133808Spjd if (force) { 2492133808Spjd /* 2493133808Spjd * Timeout expired, so destroy device. 2494133808Spjd */ 2495133808Spjd sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY; 2496148440Spjd G_RAID3_DEBUG(1, "root_mount_rel[%u] %p", 2497148440Spjd __LINE__, sc->sc_rootmount); 2498148440Spjd root_mount_rel(sc->sc_rootmount); 2499148440Spjd sc->sc_rootmount = NULL; 2500133808Spjd } 2501133808Spjd return; 2502133808Spjd } 2503133808Spjd 2504133808Spjd /* 2505139295Spjd * Find the biggest genid. 2506139295Spjd */ 2507139295Spjd genid = 0; 2508139295Spjd for (n = 0; n < sc->sc_ndisks; n++) { 2509139295Spjd disk = &sc->sc_disks[n]; 2510139295Spjd if (disk->d_state == G_RAID3_DISK_STATE_NODISK) 2511139295Spjd continue; 2512139295Spjd if (disk->d_genid > genid) 2513139295Spjd genid = disk->d_genid; 2514139295Spjd } 2515139295Spjd sc->sc_genid = genid; 2516139295Spjd /* 2517139295Spjd * Remove all disks without the biggest genid. 2518139295Spjd */ 2519139295Spjd for (n = 0; n < sc->sc_ndisks; n++) { 2520139295Spjd disk = &sc->sc_disks[n]; 2521139295Spjd if (disk->d_state == G_RAID3_DISK_STATE_NODISK) 2522139295Spjd continue; 2523139295Spjd if (disk->d_genid < genid) { 2524139295Spjd G_RAID3_DEBUG(0, 2525139295Spjd "Component %s (device %s) broken, skipping.", 2526139295Spjd g_raid3_get_diskname(disk), sc->sc_name); 2527139295Spjd g_raid3_destroy_disk(disk); 2528139295Spjd } 2529139295Spjd } 2530139295Spjd 2531139295Spjd /* 2532133808Spjd * There must be at least 'sc->sc_ndisks - 1' components 2533133808Spjd * with the same syncid and without SYNCHRONIZING flag. 2534133808Spjd */ 2535133808Spjd 2536133808Spjd /* 2537133808Spjd * Find the biggest syncid, number of valid components and 2538133808Spjd * number of dirty components. 2539133808Spjd */ 2540133808Spjd ndirty = ndisks = syncid = 0; 2541133808Spjd for (n = 0; n < sc->sc_ndisks; n++) { 2542133808Spjd disk = &sc->sc_disks[n]; 2543133808Spjd if (disk->d_state == G_RAID3_DISK_STATE_NODISK) 2544133808Spjd continue; 2545133808Spjd if ((disk->d_flags & G_RAID3_DISK_FLAG_DIRTY) != 0) 2546133808Spjd ndirty++; 2547133808Spjd if (disk->d_sync.ds_syncid > syncid) { 2548133808Spjd syncid = disk->d_sync.ds_syncid; 2549133808Spjd ndisks = 0; 2550133808Spjd } else if (disk->d_sync.ds_syncid < syncid) { 2551133808Spjd continue; 2552133808Spjd } 2553133808Spjd if ((disk->d_flags & 2554133808Spjd G_RAID3_DISK_FLAG_SYNCHRONIZING) != 0) { 2555133808Spjd continue; 2556133808Spjd } 2557133808Spjd ndisks++; 2558133808Spjd } 2559133808Spjd /* 2560133808Spjd * Do we have enough valid components? 2561133808Spjd */ 2562133808Spjd if (ndisks + 1 < sc->sc_ndisks) { 2563133808Spjd G_RAID3_DEBUG(0, 2564133808Spjd "Device %s is broken, too few valid components.", 2565133808Spjd sc->sc_name); 2566133808Spjd sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY; 2567133808Spjd return; 2568133808Spjd } 2569133808Spjd /* 2570133808Spjd * If there is one DIRTY component and all disks are present, 2571133808Spjd * mark it for synchronization. If there is more than one DIRTY 2572133808Spjd * component, mark parity component for synchronization. 2573133808Spjd */ 2574133808Spjd if (ndisks == sc->sc_ndisks && ndirty == 1) { 2575133808Spjd for (n = 0; n < sc->sc_ndisks; n++) { 2576133808Spjd disk = &sc->sc_disks[n]; 2577133808Spjd if ((disk->d_flags & 2578133808Spjd G_RAID3_DISK_FLAG_DIRTY) == 0) { 2579133808Spjd continue; 2580133808Spjd } 2581133808Spjd disk->d_flags |= 2582155174Spjd G_RAID3_DISK_FLAG_SYNCHRONIZING; 2583133808Spjd } 2584133808Spjd } else if (ndisks == sc->sc_ndisks && ndirty > 1) { 2585133808Spjd disk = &sc->sc_disks[sc->sc_ndisks - 1]; 2586155174Spjd disk->d_flags |= G_RAID3_DISK_FLAG_SYNCHRONIZING; 2587133808Spjd } 2588133808Spjd 2589133808Spjd sc->sc_syncid = syncid; 2590133808Spjd if (force) { 2591133808Spjd /* Remember to bump syncid on first write. */ 2592139671Spjd sc->sc_bump_id |= G_RAID3_BUMP_SYNCID; 2593133808Spjd } 2594133808Spjd if (ndisks == sc->sc_ndisks) 2595133808Spjd state = G_RAID3_DEVICE_STATE_COMPLETE; 2596133808Spjd else /* if (ndisks == sc->sc_ndisks - 1) */ 2597133808Spjd state = G_RAID3_DEVICE_STATE_DEGRADED; 2598133808Spjd G_RAID3_DEBUG(1, "Device %s state changed from %s to %s.", 2599133808Spjd sc->sc_name, g_raid3_device_state2str(sc->sc_state), 2600133808Spjd g_raid3_device_state2str(state)); 2601133808Spjd sc->sc_state = state; 2602133808Spjd for (n = 0; n < sc->sc_ndisks; n++) { 2603133808Spjd disk = &sc->sc_disks[n]; 2604133808Spjd if (disk->d_state == G_RAID3_DISK_STATE_NODISK) 2605133808Spjd continue; 2606133808Spjd state = g_raid3_determine_state(disk); 2607133808Spjd g_raid3_event_send(disk, state, G_RAID3_EVENT_DONTWAIT); 2608139295Spjd if (state == G_RAID3_DISK_STATE_STALE) 2609139671Spjd sc->sc_bump_id |= G_RAID3_BUMP_SYNCID; 2610133808Spjd } 2611133808Spjd break; 2612133808Spjd } 2613133808Spjd case G_RAID3_DEVICE_STATE_DEGRADED: 2614133808Spjd /* 2615139671Spjd * Genid need to be bumped immediately, so do it here. 2616133808Spjd */ 2617139671Spjd if ((sc->sc_bump_id & G_RAID3_BUMP_GENID) != 0) { 2618139295Spjd sc->sc_bump_id &= ~G_RAID3_BUMP_GENID; 2619139295Spjd g_raid3_bump_genid(sc); 2620139295Spjd } 2621139295Spjd 2622133808Spjd if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_NEW) > 0) 2623133808Spjd return; 2624133808Spjd if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) < 2625133808Spjd sc->sc_ndisks - 1) { 2626133808Spjd if (sc->sc_provider != NULL) 2627133808Spjd g_raid3_destroy_provider(sc); 2628133808Spjd sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY; 2629133808Spjd return; 2630133808Spjd } 2631133808Spjd if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) == 2632133808Spjd sc->sc_ndisks) { 2633133808Spjd state = G_RAID3_DEVICE_STATE_COMPLETE; 2634133808Spjd G_RAID3_DEBUG(1, 2635133808Spjd "Device %s state changed from %s to %s.", 2636133808Spjd sc->sc_name, g_raid3_device_state2str(sc->sc_state), 2637133808Spjd g_raid3_device_state2str(state)); 2638133808Spjd sc->sc_state = state; 2639133808Spjd } 2640133808Spjd if (sc->sc_provider == NULL) 2641133808Spjd g_raid3_launch_provider(sc); 2642148440Spjd if (sc->sc_rootmount != NULL) { 2643148440Spjd G_RAID3_DEBUG(1, "root_mount_rel[%u] %p", __LINE__, 2644148440Spjd sc->sc_rootmount); 2645148440Spjd root_mount_rel(sc->sc_rootmount); 2646148440Spjd sc->sc_rootmount = NULL; 2647148440Spjd } 2648133808Spjd break; 2649133808Spjd case G_RAID3_DEVICE_STATE_COMPLETE: 2650133808Spjd /* 2651139671Spjd * Genid need to be bumped immediately, so do it here. 2652133808Spjd */ 2653139671Spjd if ((sc->sc_bump_id & G_RAID3_BUMP_GENID) != 0) { 2654139295Spjd sc->sc_bump_id &= ~G_RAID3_BUMP_GENID; 2655139295Spjd g_raid3_bump_genid(sc); 2656139295Spjd } 2657139295Spjd 2658133808Spjd if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_NEW) > 0) 2659133808Spjd return; 2660133808Spjd KASSERT(g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) >= 2661133808Spjd sc->sc_ndisks - 1, 2662133808Spjd ("Too few ACTIVE components in COMPLETE state (device %s).", 2663133808Spjd sc->sc_name)); 2664133808Spjd if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) == 2665133808Spjd sc->sc_ndisks - 1) { 2666133808Spjd state = G_RAID3_DEVICE_STATE_DEGRADED; 2667133808Spjd G_RAID3_DEBUG(1, 2668133808Spjd "Device %s state changed from %s to %s.", 2669133808Spjd sc->sc_name, g_raid3_device_state2str(sc->sc_state), 2670133808Spjd g_raid3_device_state2str(state)); 2671133808Spjd sc->sc_state = state; 2672133808Spjd } 2673133808Spjd if (sc->sc_provider == NULL) 2674133808Spjd g_raid3_launch_provider(sc); 2675148440Spjd if (sc->sc_rootmount != NULL) { 2676148440Spjd G_RAID3_DEBUG(1, "root_mount_rel[%u] %p", __LINE__, 2677148440Spjd sc->sc_rootmount); 2678148440Spjd root_mount_rel(sc->sc_rootmount); 2679148440Spjd sc->sc_rootmount = NULL; 2680148440Spjd } 2681133808Spjd break; 2682133808Spjd default: 2683133808Spjd KASSERT(1 == 0, ("Wrong device state (%s, %s).", sc->sc_name, 2684133808Spjd g_raid3_device_state2str(sc->sc_state))); 2685133808Spjd break; 2686133808Spjd } 2687133808Spjd} 2688133808Spjd 2689133808Spjd/* 2690133808Spjd * Update disk state and device state if needed. 2691133808Spjd */ 2692133808Spjd#define DISK_STATE_CHANGED() G_RAID3_DEBUG(1, \ 2693133808Spjd "Disk %s state changed from %s to %s (device %s).", \ 2694133808Spjd g_raid3_get_diskname(disk), \ 2695133808Spjd g_raid3_disk_state2str(disk->d_state), \ 2696133808Spjd g_raid3_disk_state2str(state), sc->sc_name) 2697133808Spjdstatic int 2698139144Spjdg_raid3_update_disk(struct g_raid3_disk *disk, u_int state) 2699133808Spjd{ 2700133808Spjd struct g_raid3_softc *sc; 2701133808Spjd 2702156612Spjd sc = disk->d_softc; 2703156612Spjd sx_assert(&sc->sc_lock, SX_XLOCKED); 2704133808Spjd 2705133808Spjdagain: 2706133808Spjd G_RAID3_DEBUG(3, "Changing disk %s state from %s to %s.", 2707133808Spjd g_raid3_get_diskname(disk), g_raid3_disk_state2str(disk->d_state), 2708133808Spjd g_raid3_disk_state2str(state)); 2709133808Spjd switch (state) { 2710133808Spjd case G_RAID3_DISK_STATE_NEW: 2711133808Spjd /* 2712133808Spjd * Possible scenarios: 2713133808Spjd * 1. New disk arrive. 2714133808Spjd */ 2715133808Spjd /* Previous state should be NONE. */ 2716133808Spjd KASSERT(disk->d_state == G_RAID3_DISK_STATE_NONE, 2717133808Spjd ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk), 2718133808Spjd g_raid3_disk_state2str(disk->d_state))); 2719133808Spjd DISK_STATE_CHANGED(); 2720133808Spjd 2721133808Spjd disk->d_state = state; 2722162188Sjmg G_RAID3_DEBUG(1, "Device %s: provider %s detected.", 2723133808Spjd sc->sc_name, g_raid3_get_diskname(disk)); 2724133808Spjd if (sc->sc_state == G_RAID3_DEVICE_STATE_STARTING) 2725133808Spjd break; 2726133808Spjd KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED || 2727133808Spjd sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE, 2728133808Spjd ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2729133808Spjd g_raid3_device_state2str(sc->sc_state), 2730133808Spjd g_raid3_get_diskname(disk), 2731133808Spjd g_raid3_disk_state2str(disk->d_state))); 2732133808Spjd state = g_raid3_determine_state(disk); 2733133808Spjd if (state != G_RAID3_DISK_STATE_NONE) 2734133808Spjd goto again; 2735133808Spjd break; 2736133808Spjd case G_RAID3_DISK_STATE_ACTIVE: 2737133808Spjd /* 2738133808Spjd * Possible scenarios: 2739133808Spjd * 1. New disk does not need synchronization. 2740133808Spjd * 2. Synchronization process finished successfully. 2741133808Spjd */ 2742133808Spjd KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED || 2743133808Spjd sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE, 2744133808Spjd ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2745133808Spjd g_raid3_device_state2str(sc->sc_state), 2746133808Spjd g_raid3_get_diskname(disk), 2747133808Spjd g_raid3_disk_state2str(disk->d_state))); 2748133808Spjd /* Previous state should be NEW or SYNCHRONIZING. */ 2749133808Spjd KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW || 2750133808Spjd disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING, 2751133808Spjd ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk), 2752133808Spjd g_raid3_disk_state2str(disk->d_state))); 2753133808Spjd DISK_STATE_CHANGED(); 2754133808Spjd 2755155582Spjd if (disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) { 2756133808Spjd disk->d_flags &= ~G_RAID3_DISK_FLAG_SYNCHRONIZING; 2757133808Spjd disk->d_flags &= ~G_RAID3_DISK_FLAG_FORCE_SYNC; 2758133808Spjd g_raid3_sync_stop(sc, 0); 2759133808Spjd } 2760133808Spjd disk->d_state = state; 2761133808Spjd disk->d_sync.ds_offset = 0; 2762133808Spjd disk->d_sync.ds_offset_done = 0; 2763155540Spjd g_raid3_update_idle(sc, disk); 2764155582Spjd g_raid3_update_metadata(disk); 2765162188Sjmg G_RAID3_DEBUG(1, "Device %s: provider %s activated.", 2766133808Spjd sc->sc_name, g_raid3_get_diskname(disk)); 2767133808Spjd break; 2768133808Spjd case G_RAID3_DISK_STATE_STALE: 2769133808Spjd /* 2770133808Spjd * Possible scenarios: 2771133808Spjd * 1. Stale disk was connected. 2772133808Spjd */ 2773133808Spjd /* Previous state should be NEW. */ 2774133808Spjd KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW, 2775133808Spjd ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk), 2776133808Spjd g_raid3_disk_state2str(disk->d_state))); 2777133808Spjd KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED || 2778133808Spjd sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE, 2779133808Spjd ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2780133808Spjd g_raid3_device_state2str(sc->sc_state), 2781133808Spjd g_raid3_get_diskname(disk), 2782133808Spjd g_raid3_disk_state2str(disk->d_state))); 2783133808Spjd /* 2784133808Spjd * STALE state is only possible if device is marked 2785133808Spjd * NOAUTOSYNC. 2786133808Spjd */ 2787133808Spjd KASSERT((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOAUTOSYNC) != 0, 2788133808Spjd ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2789133808Spjd g_raid3_device_state2str(sc->sc_state), 2790133808Spjd g_raid3_get_diskname(disk), 2791133808Spjd g_raid3_disk_state2str(disk->d_state))); 2792133808Spjd DISK_STATE_CHANGED(); 2793133808Spjd 2794133808Spjd disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY; 2795133808Spjd disk->d_state = state; 2796133808Spjd g_raid3_update_metadata(disk); 2797133808Spjd G_RAID3_DEBUG(0, "Device %s: provider %s is stale.", 2798133808Spjd sc->sc_name, g_raid3_get_diskname(disk)); 2799133808Spjd break; 2800133808Spjd case G_RAID3_DISK_STATE_SYNCHRONIZING: 2801133808Spjd /* 2802133808Spjd * Possible scenarios: 2803133808Spjd * 1. Disk which needs synchronization was connected. 2804133808Spjd */ 2805133808Spjd /* Previous state should be NEW. */ 2806133808Spjd KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW, 2807133808Spjd ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk), 2808133808Spjd g_raid3_disk_state2str(disk->d_state))); 2809133808Spjd KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED || 2810133808Spjd sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE, 2811133808Spjd ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2812133808Spjd g_raid3_device_state2str(sc->sc_state), 2813133808Spjd g_raid3_get_diskname(disk), 2814133808Spjd g_raid3_disk_state2str(disk->d_state))); 2815133808Spjd DISK_STATE_CHANGED(); 2816133808Spjd 2817133808Spjd if (disk->d_state == G_RAID3_DISK_STATE_NEW) 2818133808Spjd disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY; 2819133808Spjd disk->d_state = state; 2820133808Spjd if (sc->sc_provider != NULL) { 2821133808Spjd g_raid3_sync_start(sc); 2822133808Spjd g_raid3_update_metadata(disk); 2823133808Spjd } 2824133808Spjd break; 2825133808Spjd case G_RAID3_DISK_STATE_DISCONNECTED: 2826133808Spjd /* 2827133808Spjd * Possible scenarios: 2828133808Spjd * 1. Device wasn't running yet, but disk disappear. 2829133808Spjd * 2. Disk was active and disapppear. 2830133808Spjd * 3. Disk disappear during synchronization process. 2831133808Spjd */ 2832133808Spjd if (sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED || 2833133808Spjd sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) { 2834133808Spjd /* 2835133808Spjd * Previous state should be ACTIVE, STALE or 2836133808Spjd * SYNCHRONIZING. 2837133808Spjd */ 2838133808Spjd KASSERT(disk->d_state == G_RAID3_DISK_STATE_ACTIVE || 2839133808Spjd disk->d_state == G_RAID3_DISK_STATE_STALE || 2840133808Spjd disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING, 2841133808Spjd ("Wrong disk state (%s, %s).", 2842133808Spjd g_raid3_get_diskname(disk), 2843133808Spjd g_raid3_disk_state2str(disk->d_state))); 2844133808Spjd } else if (sc->sc_state == G_RAID3_DEVICE_STATE_STARTING) { 2845133808Spjd /* Previous state should be NEW. */ 2846133808Spjd KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW, 2847133808Spjd ("Wrong disk state (%s, %s).", 2848133808Spjd g_raid3_get_diskname(disk), 2849133808Spjd g_raid3_disk_state2str(disk->d_state))); 2850133808Spjd /* 2851133808Spjd * Reset bumping syncid if disk disappeared in STARTING 2852133808Spjd * state. 2853133808Spjd */ 2854139671Spjd if ((sc->sc_bump_id & G_RAID3_BUMP_SYNCID) != 0) 2855139295Spjd sc->sc_bump_id &= ~G_RAID3_BUMP_SYNCID; 2856133808Spjd#ifdef INVARIANTS 2857133808Spjd } else { 2858133808Spjd KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).", 2859133808Spjd sc->sc_name, 2860133808Spjd g_raid3_device_state2str(sc->sc_state), 2861133808Spjd g_raid3_get_diskname(disk), 2862133808Spjd g_raid3_disk_state2str(disk->d_state))); 2863133808Spjd#endif 2864133808Spjd } 2865133808Spjd DISK_STATE_CHANGED(); 2866133808Spjd G_RAID3_DEBUG(0, "Device %s: provider %s disconnected.", 2867133808Spjd sc->sc_name, g_raid3_get_diskname(disk)); 2868133808Spjd 2869133808Spjd g_raid3_destroy_disk(disk); 2870133808Spjd break; 2871133808Spjd default: 2872133808Spjd KASSERT(1 == 0, ("Unknown state (%u).", state)); 2873133808Spjd break; 2874133808Spjd } 2875133808Spjd return (0); 2876133808Spjd} 2877133808Spjd#undef DISK_STATE_CHANGED 2878133808Spjd 2879139671Spjdint 2880133808Spjdg_raid3_read_metadata(struct g_consumer *cp, struct g_raid3_metadata *md) 2881133808Spjd{ 2882133808Spjd struct g_provider *pp; 2883133808Spjd u_char *buf; 2884133808Spjd int error; 2885133808Spjd 2886133808Spjd g_topology_assert(); 2887133808Spjd 2888133808Spjd error = g_access(cp, 1, 0, 0); 2889133808Spjd if (error != 0) 2890133808Spjd return (error); 2891133808Spjd pp = cp->provider; 2892133808Spjd g_topology_unlock(); 2893133808Spjd /* Metadata are stored on last sector. */ 2894133808Spjd buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize, 2895133808Spjd &error); 2896133808Spjd g_topology_lock(); 2897139144Spjd g_access(cp, -1, 0, 0); 2898152967Ssobomax if (buf == NULL) { 2899139295Spjd G_RAID3_DEBUG(1, "Cannot read metadata from %s (error=%d).", 2900139295Spjd cp->provider->name, error); 2901133808Spjd return (error); 2902133808Spjd } 2903133808Spjd 2904133808Spjd /* Decode metadata. */ 2905133808Spjd error = raid3_metadata_decode(buf, md); 2906133808Spjd g_free(buf); 2907133808Spjd if (strcmp(md->md_magic, G_RAID3_MAGIC) != 0) 2908133808Spjd return (EINVAL); 2909139295Spjd if (md->md_version > G_RAID3_VERSION) { 2910139295Spjd G_RAID3_DEBUG(0, 2911139295Spjd "Kernel module is too old to handle metadata from %s.", 2912139295Spjd cp->provider->name); 2913139295Spjd return (EINVAL); 2914139295Spjd } 2915133808Spjd if (error != 0) { 2916133808Spjd G_RAID3_DEBUG(1, "MD5 metadata hash mismatch for provider %s.", 2917133808Spjd cp->provider->name); 2918133808Spjd return (error); 2919133808Spjd } 2920217305Sae if (md->md_sectorsize > MAXPHYS) { 2921217305Sae G_RAID3_DEBUG(0, "The blocksize is too big."); 2922217305Sae return (EINVAL); 2923217305Sae } 2924133808Spjd 2925133808Spjd return (0); 2926133808Spjd} 2927133808Spjd 2928133808Spjdstatic int 2929133808Spjdg_raid3_check_metadata(struct g_raid3_softc *sc, struct g_provider *pp, 2930133808Spjd struct g_raid3_metadata *md) 2931133808Spjd{ 2932133808Spjd 2933133808Spjd if (md->md_no >= sc->sc_ndisks) { 2934133808Spjd G_RAID3_DEBUG(1, "Invalid disk %s number (no=%u), skipping.", 2935133808Spjd pp->name, md->md_no); 2936133808Spjd return (EINVAL); 2937133808Spjd } 2938133808Spjd if (sc->sc_disks[md->md_no].d_state != G_RAID3_DISK_STATE_NODISK) { 2939133808Spjd G_RAID3_DEBUG(1, "Disk %s (no=%u) already exists, skipping.", 2940133808Spjd pp->name, md->md_no); 2941133808Spjd return (EEXIST); 2942133808Spjd } 2943133808Spjd if (md->md_all != sc->sc_ndisks) { 2944133808Spjd G_RAID3_DEBUG(1, 2945133808Spjd "Invalid '%s' field on disk %s (device %s), skipping.", 2946133808Spjd "md_all", pp->name, sc->sc_name); 2947133808Spjd return (EINVAL); 2948133808Spjd } 2949163206Spjd if ((md->md_mediasize % md->md_sectorsize) != 0) { 2950163206Spjd G_RAID3_DEBUG(1, "Invalid metadata (mediasize %% sectorsize != " 2951163206Spjd "0) on disk %s (device %s), skipping.", pp->name, 2952163206Spjd sc->sc_name); 2953163206Spjd return (EINVAL); 2954163206Spjd } 2955133808Spjd if (md->md_mediasize != sc->sc_mediasize) { 2956133808Spjd G_RAID3_DEBUG(1, 2957133808Spjd "Invalid '%s' field on disk %s (device %s), skipping.", 2958133808Spjd "md_mediasize", pp->name, sc->sc_name); 2959133808Spjd return (EINVAL); 2960133808Spjd } 2961133808Spjd if ((md->md_mediasize % (sc->sc_ndisks - 1)) != 0) { 2962133808Spjd G_RAID3_DEBUG(1, 2963133808Spjd "Invalid '%s' field on disk %s (device %s), skipping.", 2964133808Spjd "md_mediasize", pp->name, sc->sc_name); 2965133808Spjd return (EINVAL); 2966133808Spjd } 2967133808Spjd if ((sc->sc_mediasize / (sc->sc_ndisks - 1)) > pp->mediasize) { 2968133808Spjd G_RAID3_DEBUG(1, 2969133808Spjd "Invalid size of disk %s (device %s), skipping.", pp->name, 2970133808Spjd sc->sc_name); 2971133808Spjd return (EINVAL); 2972133808Spjd } 2973133808Spjd if ((md->md_sectorsize / pp->sectorsize) < sc->sc_ndisks - 1) { 2974133808Spjd G_RAID3_DEBUG(1, 2975133808Spjd "Invalid '%s' field on disk %s (device %s), skipping.", 2976133808Spjd "md_sectorsize", pp->name, sc->sc_name); 2977133808Spjd return (EINVAL); 2978133808Spjd } 2979133808Spjd if (md->md_sectorsize != sc->sc_sectorsize) { 2980133808Spjd G_RAID3_DEBUG(1, 2981133808Spjd "Invalid '%s' field on disk %s (device %s), skipping.", 2982133808Spjd "md_sectorsize", pp->name, sc->sc_name); 2983133808Spjd return (EINVAL); 2984133808Spjd } 2985133808Spjd if ((sc->sc_sectorsize % pp->sectorsize) != 0) { 2986133808Spjd G_RAID3_DEBUG(1, 2987133808Spjd "Invalid sector size of disk %s (device %s), skipping.", 2988133808Spjd pp->name, sc->sc_name); 2989133808Spjd return (EINVAL); 2990133808Spjd } 2991133808Spjd if ((md->md_mflags & ~G_RAID3_DEVICE_FLAG_MASK) != 0) { 2992133808Spjd G_RAID3_DEBUG(1, 2993133808Spjd "Invalid device flags on disk %s (device %s), skipping.", 2994133808Spjd pp->name, sc->sc_name); 2995133808Spjd return (EINVAL); 2996133808Spjd } 2997134168Spjd if ((md->md_mflags & G_RAID3_DEVICE_FLAG_VERIFY) != 0 && 2998134168Spjd (md->md_mflags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0) { 2999134168Spjd /* 3000134168Spjd * VERIFY and ROUND-ROBIN options are mutally exclusive. 3001134168Spjd */ 3002134168Spjd G_RAID3_DEBUG(1, "Both VERIFY and ROUND-ROBIN flags exist on " 3003134168Spjd "disk %s (device %s), skipping.", pp->name, sc->sc_name); 3004134168Spjd return (EINVAL); 3005134168Spjd } 3006133808Spjd if ((md->md_dflags & ~G_RAID3_DISK_FLAG_MASK) != 0) { 3007133808Spjd G_RAID3_DEBUG(1, 3008133808Spjd "Invalid disk flags on disk %s (device %s), skipping.", 3009133808Spjd pp->name, sc->sc_name); 3010133808Spjd return (EINVAL); 3011133808Spjd } 3012133808Spjd return (0); 3013133808Spjd} 3014133808Spjd 3015139671Spjdint 3016133808Spjdg_raid3_add_disk(struct g_raid3_softc *sc, struct g_provider *pp, 3017133808Spjd struct g_raid3_metadata *md) 3018133808Spjd{ 3019133808Spjd struct g_raid3_disk *disk; 3020133808Spjd int error; 3021133808Spjd 3022156612Spjd g_topology_assert_not(); 3023133808Spjd G_RAID3_DEBUG(2, "Adding disk %s.", pp->name); 3024133808Spjd 3025133808Spjd error = g_raid3_check_metadata(sc, pp, md); 3026133808Spjd if (error != 0) 3027133808Spjd return (error); 3028139295Spjd if (sc->sc_state != G_RAID3_DEVICE_STATE_STARTING && 3029139295Spjd md->md_genid < sc->sc_genid) { 3030139295Spjd G_RAID3_DEBUG(0, "Component %s (device %s) broken, skipping.", 3031139295Spjd pp->name, sc->sc_name); 3032139295Spjd return (EINVAL); 3033139295Spjd } 3034133808Spjd disk = g_raid3_init_disk(sc, pp, md, &error); 3035133808Spjd if (disk == NULL) 3036133808Spjd return (error); 3037133808Spjd error = g_raid3_event_send(disk, G_RAID3_DISK_STATE_NEW, 3038133808Spjd G_RAID3_EVENT_WAIT); 3039139295Spjd if (error != 0) 3040139295Spjd return (error); 3041139295Spjd if (md->md_version < G_RAID3_VERSION) { 3042139295Spjd G_RAID3_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).", 3043139295Spjd pp->name, md->md_version, G_RAID3_VERSION); 3044139295Spjd g_raid3_update_metadata(disk); 3045139295Spjd } 3046139295Spjd return (0); 3047133808Spjd} 3048133808Spjd 3049157630Spjdstatic void 3050157630Spjdg_raid3_destroy_delayed(void *arg, int flag) 3051157630Spjd{ 3052157630Spjd struct g_raid3_softc *sc; 3053157630Spjd int error; 3054157630Spjd 3055157630Spjd if (flag == EV_CANCEL) { 3056157630Spjd G_RAID3_DEBUG(1, "Destroying canceled."); 3057157630Spjd return; 3058157630Spjd } 3059157630Spjd sc = arg; 3060157630Spjd g_topology_unlock(); 3061157630Spjd sx_xlock(&sc->sc_lock); 3062157630Spjd KASSERT((sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) == 0, 3063157630Spjd ("DESTROY flag set on %s.", sc->sc_name)); 3064157630Spjd KASSERT((sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROYING) != 0, 3065157630Spjd ("DESTROYING flag not set on %s.", sc->sc_name)); 3066157630Spjd G_RAID3_DEBUG(0, "Destroying %s (delayed).", sc->sc_name); 3067157630Spjd error = g_raid3_destroy(sc, G_RAID3_DESTROY_SOFT); 3068157630Spjd if (error != 0) { 3069157630Spjd G_RAID3_DEBUG(0, "Cannot destroy %s.", sc->sc_name); 3070157630Spjd sx_xunlock(&sc->sc_lock); 3071157630Spjd } 3072157630Spjd g_topology_lock(); 3073157630Spjd} 3074157630Spjd 3075133808Spjdstatic int 3076133808Spjdg_raid3_access(struct g_provider *pp, int acr, int acw, int ace) 3077133808Spjd{ 3078133808Spjd struct g_raid3_softc *sc; 3079157630Spjd int dcr, dcw, dce, error = 0; 3080133808Spjd 3081133808Spjd g_topology_assert(); 3082133808Spjd G_RAID3_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr, 3083133808Spjd acw, ace); 3084133808Spjd 3085160081Spjd sc = pp->geom->softc; 3086160081Spjd if (sc == NULL && acr <= 0 && acw <= 0 && ace <= 0) 3087160081Spjd return (0); 3088160081Spjd KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name)); 3089160081Spjd 3090133808Spjd dcr = pp->acr + acr; 3091133808Spjd dcw = pp->acw + acw; 3092133808Spjd dce = pp->ace + ace; 3093133808Spjd 3094157630Spjd g_topology_unlock(); 3095157630Spjd sx_xlock(&sc->sc_lock); 3096157630Spjd if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) != 0 || 3097156612Spjd g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) < sc->sc_ndisks - 1) { 3098156612Spjd if (acr > 0 || acw > 0 || ace > 0) 3099156612Spjd error = ENXIO; 3100156612Spjd goto end; 3101133808Spjd } 3102245444Smav if (dcw == 0) 3103156612Spjd g_raid3_idle(sc, dcw); 3104157630Spjd if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROYING) != 0) { 3105157630Spjd if (acr > 0 || acw > 0 || ace > 0) { 3106157630Spjd error = ENXIO; 3107157630Spjd goto end; 3108157630Spjd } 3109157630Spjd if (dcr == 0 && dcw == 0 && dce == 0) { 3110157630Spjd g_post_event(g_raid3_destroy_delayed, sc, M_WAITOK, 3111157630Spjd sc, NULL); 3112157630Spjd } 3113157630Spjd } 3114156612Spjdend: 3115157630Spjd sx_xunlock(&sc->sc_lock); 3116157630Spjd g_topology_lock(); 3117156612Spjd return (error); 3118133808Spjd} 3119133808Spjd 3120133808Spjdstatic struct g_geom * 3121133808Spjdg_raid3_create(struct g_class *mp, const struct g_raid3_metadata *md) 3122133808Spjd{ 3123133808Spjd struct g_raid3_softc *sc; 3124133808Spjd struct g_geom *gp; 3125133808Spjd int error, timeout; 3126133808Spjd u_int n; 3127133808Spjd 3128133808Spjd g_topology_assert(); 3129133808Spjd G_RAID3_DEBUG(1, "Creating device %s (id=%u).", md->md_name, md->md_id); 3130133808Spjd 3131133808Spjd /* One disk is minimum. */ 3132133808Spjd if (md->md_all < 1) 3133133808Spjd return (NULL); 3134133808Spjd /* 3135133808Spjd * Action geom. 3136133808Spjd */ 3137133808Spjd gp = g_new_geomf(mp, "%s", md->md_name); 3138133808Spjd sc = malloc(sizeof(*sc), M_RAID3, M_WAITOK | M_ZERO); 3139133808Spjd sc->sc_disks = malloc(sizeof(struct g_raid3_disk) * md->md_all, M_RAID3, 3140133808Spjd M_WAITOK | M_ZERO); 3141133808Spjd gp->start = g_raid3_start; 3142133808Spjd gp->orphan = g_raid3_orphan; 3143133808Spjd gp->access = g_raid3_access; 3144133808Spjd gp->dumpconf = g_raid3_dumpconf; 3145133808Spjd 3146133808Spjd sc->sc_id = md->md_id; 3147133808Spjd sc->sc_mediasize = md->md_mediasize; 3148133808Spjd sc->sc_sectorsize = md->md_sectorsize; 3149133808Spjd sc->sc_ndisks = md->md_all; 3150134124Spjd sc->sc_round_robin = 0; 3151133808Spjd sc->sc_flags = md->md_mflags; 3152139295Spjd sc->sc_bump_id = 0; 3153155540Spjd sc->sc_idle = 1; 3154155581Spjd sc->sc_last_write = time_uptime; 3155155540Spjd sc->sc_writes = 0; 3156138374Spjd for (n = 0; n < sc->sc_ndisks; n++) { 3157138374Spjd sc->sc_disks[n].d_softc = sc; 3158138374Spjd sc->sc_disks[n].d_no = n; 3159133808Spjd sc->sc_disks[n].d_state = G_RAID3_DISK_STATE_NODISK; 3160138374Spjd } 3161156612Spjd sx_init(&sc->sc_lock, "graid3:lock"); 3162133808Spjd bioq_init(&sc->sc_queue); 3163133808Spjd mtx_init(&sc->sc_queue_mtx, "graid3:queue", NULL, MTX_DEF); 3164156612Spjd bioq_init(&sc->sc_regular_delayed); 3165156612Spjd bioq_init(&sc->sc_inflight); 3166156612Spjd bioq_init(&sc->sc_sync_delayed); 3167133808Spjd TAILQ_INIT(&sc->sc_events); 3168133808Spjd mtx_init(&sc->sc_events_mtx, "graid3:events", NULL, MTX_DEF); 3169133808Spjd callout_init(&sc->sc_callout, CALLOUT_MPSAFE); 3170133808Spjd sc->sc_state = G_RAID3_DEVICE_STATE_STARTING; 3171133808Spjd gp->softc = sc; 3172133808Spjd sc->sc_geom = gp; 3173133808Spjd sc->sc_provider = NULL; 3174133808Spjd /* 3175133808Spjd * Synchronization geom. 3176133808Spjd */ 3177133808Spjd gp = g_new_geomf(mp, "%s.sync", md->md_name); 3178133808Spjd gp->softc = sc; 3179133808Spjd gp->orphan = g_raid3_orphan; 3180133808Spjd sc->sc_sync.ds_geom = gp; 3181156612Spjd 3182160203Spjd if (!g_raid3_use_malloc) { 3183160203Spjd sc->sc_zones[G_RAID3_ZONE_64K].sz_zone = uma_zcreate("gr3:64k", 3184160203Spjd 65536, g_raid3_uma_ctor, g_raid3_uma_dtor, NULL, NULL, 3185160203Spjd UMA_ALIGN_PTR, 0); 3186160203Spjd sc->sc_zones[G_RAID3_ZONE_64K].sz_inuse = 0; 3187160203Spjd sc->sc_zones[G_RAID3_ZONE_64K].sz_max = g_raid3_n64k; 3188160203Spjd sc->sc_zones[G_RAID3_ZONE_64K].sz_requested = 3189160203Spjd sc->sc_zones[G_RAID3_ZONE_64K].sz_failed = 0; 3190160203Spjd sc->sc_zones[G_RAID3_ZONE_16K].sz_zone = uma_zcreate("gr3:16k", 3191160203Spjd 16384, g_raid3_uma_ctor, g_raid3_uma_dtor, NULL, NULL, 3192160203Spjd UMA_ALIGN_PTR, 0); 3193160203Spjd sc->sc_zones[G_RAID3_ZONE_16K].sz_inuse = 0; 3194160203Spjd sc->sc_zones[G_RAID3_ZONE_16K].sz_max = g_raid3_n16k; 3195160203Spjd sc->sc_zones[G_RAID3_ZONE_16K].sz_requested = 3196160203Spjd sc->sc_zones[G_RAID3_ZONE_16K].sz_failed = 0; 3197160203Spjd sc->sc_zones[G_RAID3_ZONE_4K].sz_zone = uma_zcreate("gr3:4k", 3198160203Spjd 4096, g_raid3_uma_ctor, g_raid3_uma_dtor, NULL, NULL, 3199160203Spjd UMA_ALIGN_PTR, 0); 3200160203Spjd sc->sc_zones[G_RAID3_ZONE_4K].sz_inuse = 0; 3201160203Spjd sc->sc_zones[G_RAID3_ZONE_4K].sz_max = g_raid3_n4k; 3202160203Spjd sc->sc_zones[G_RAID3_ZONE_4K].sz_requested = 3203160203Spjd sc->sc_zones[G_RAID3_ZONE_4K].sz_failed = 0; 3204160203Spjd } 3205156612Spjd 3206172836Sjulian error = kproc_create(g_raid3_worker, sc, &sc->sc_worker, 0, 0, 3207133808Spjd "g_raid3 %s", md->md_name); 3208133808Spjd if (error != 0) { 3209133808Spjd G_RAID3_DEBUG(1, "Cannot create kernel thread for %s.", 3210133808Spjd sc->sc_name); 3211160203Spjd if (!g_raid3_use_malloc) { 3212160203Spjd uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_64K].sz_zone); 3213160203Spjd uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_16K].sz_zone); 3214160203Spjd uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_4K].sz_zone); 3215160203Spjd } 3216133808Spjd g_destroy_geom(sc->sc_sync.ds_geom); 3217133808Spjd mtx_destroy(&sc->sc_events_mtx); 3218133808Spjd mtx_destroy(&sc->sc_queue_mtx); 3219156612Spjd sx_destroy(&sc->sc_lock); 3220133808Spjd g_destroy_geom(sc->sc_geom); 3221133808Spjd free(sc->sc_disks, M_RAID3); 3222133808Spjd free(sc, M_RAID3); 3223133808Spjd return (NULL); 3224133808Spjd } 3225133808Spjd 3226162188Sjmg G_RAID3_DEBUG(1, "Device %s created (%u components, id=%u).", 3227162188Sjmg sc->sc_name, sc->sc_ndisks, sc->sc_id); 3228133808Spjd 3229190878Sthompsa sc->sc_rootmount = root_mount_hold("GRAID3"); 3230148440Spjd G_RAID3_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount); 3231148440Spjd 3232133808Spjd /* 3233133808Spjd * Run timeout. 3234133808Spjd */ 3235133808Spjd timeout = atomic_load_acq_int(&g_raid3_timeout); 3236133808Spjd callout_reset(&sc->sc_callout, timeout * hz, g_raid3_go, sc); 3237133808Spjd return (sc->sc_geom); 3238133808Spjd} 3239133808Spjd 3240133808Spjdint 3241157630Spjdg_raid3_destroy(struct g_raid3_softc *sc, int how) 3242133808Spjd{ 3243133808Spjd struct g_provider *pp; 3244133808Spjd 3245156612Spjd g_topology_assert_not(); 3246133808Spjd if (sc == NULL) 3247133808Spjd return (ENXIO); 3248156612Spjd sx_assert(&sc->sc_lock, SX_XLOCKED); 3249156612Spjd 3250133808Spjd pp = sc->sc_provider; 3251133808Spjd if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { 3252157630Spjd switch (how) { 3253157630Spjd case G_RAID3_DESTROY_SOFT: 3254133808Spjd G_RAID3_DEBUG(1, 3255133808Spjd "Device %s is still open (r%dw%de%d).", pp->name, 3256133808Spjd pp->acr, pp->acw, pp->ace); 3257133808Spjd return (EBUSY); 3258157630Spjd case G_RAID3_DESTROY_DELAYED: 3259157630Spjd G_RAID3_DEBUG(1, 3260157630Spjd "Device %s will be destroyed on last close.", 3261157630Spjd pp->name); 3262157630Spjd if (sc->sc_syncdisk != NULL) 3263157630Spjd g_raid3_sync_stop(sc, 1); 3264157630Spjd sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROYING; 3265157630Spjd return (EBUSY); 3266157630Spjd case G_RAID3_DESTROY_HARD: 3267157630Spjd G_RAID3_DEBUG(1, "Device %s is still open, so it " 3268157630Spjd "can't be definitely removed.", pp->name); 3269157630Spjd break; 3270133808Spjd } 3271133808Spjd } 3272133808Spjd 3273158114Spjd g_topology_lock(); 3274158114Spjd if (sc->sc_geom->softc == NULL) { 3275158114Spjd g_topology_unlock(); 3276158114Spjd return (0); 3277158114Spjd } 3278158114Spjd sc->sc_geom->softc = NULL; 3279158114Spjd sc->sc_sync.ds_geom->softc = NULL; 3280158114Spjd g_topology_unlock(); 3281158114Spjd 3282133808Spjd sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY; 3283133808Spjd sc->sc_flags |= G_RAID3_DEVICE_FLAG_WAIT; 3284133808Spjd G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, sc); 3285156612Spjd sx_xunlock(&sc->sc_lock); 3286133808Spjd mtx_lock(&sc->sc_queue_mtx); 3287133808Spjd wakeup(sc); 3288133808Spjd wakeup(&sc->sc_queue); 3289133808Spjd mtx_unlock(&sc->sc_queue_mtx); 3290133808Spjd G_RAID3_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker); 3291133808Spjd while (sc->sc_worker != NULL) 3292133808Spjd tsleep(&sc->sc_worker, PRIBIO, "r3:destroy", hz / 5); 3293133808Spjd G_RAID3_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker); 3294156612Spjd sx_xlock(&sc->sc_lock); 3295133808Spjd g_raid3_destroy_device(sc); 3296133808Spjd free(sc->sc_disks, M_RAID3); 3297133808Spjd free(sc, M_RAID3); 3298133808Spjd return (0); 3299133808Spjd} 3300133808Spjd 3301133808Spjdstatic void 3302133808Spjdg_raid3_taste_orphan(struct g_consumer *cp) 3303133808Spjd{ 3304133808Spjd 3305133808Spjd KASSERT(1 == 0, ("%s called while tasting %s.", __func__, 3306133808Spjd cp->provider->name)); 3307133808Spjd} 3308133808Spjd 3309133808Spjdstatic struct g_geom * 3310133808Spjdg_raid3_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) 3311133808Spjd{ 3312133808Spjd struct g_raid3_metadata md; 3313133808Spjd struct g_raid3_softc *sc; 3314133808Spjd struct g_consumer *cp; 3315133808Spjd struct g_geom *gp; 3316133808Spjd int error; 3317133808Spjd 3318133808Spjd g_topology_assert(); 3319133808Spjd g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); 3320133808Spjd G_RAID3_DEBUG(2, "Tasting %s.", pp->name); 3321133808Spjd 3322133808Spjd gp = g_new_geomf(mp, "raid3:taste"); 3323133808Spjd /* This orphan function should be never called. */ 3324133808Spjd gp->orphan = g_raid3_taste_orphan; 3325133808Spjd cp = g_new_consumer(gp); 3326133808Spjd g_attach(cp, pp); 3327133808Spjd error = g_raid3_read_metadata(cp, &md); 3328133808Spjd g_detach(cp); 3329133808Spjd g_destroy_consumer(cp); 3330133808Spjd g_destroy_geom(gp); 3331133808Spjd if (error != 0) 3332133808Spjd return (NULL); 3333133808Spjd gp = NULL; 3334133808Spjd 3335221101Smav if (md.md_provider[0] != '\0' && 3336221101Smav !g_compare_names(md.md_provider, pp->name)) 3337133808Spjd return (NULL); 3338142727Spjd if (md.md_provsize != 0 && md.md_provsize != pp->mediasize) 3339142727Spjd return (NULL); 3340133808Spjd if (g_raid3_debug >= 2) 3341133808Spjd raid3_metadata_dump(&md); 3342133808Spjd 3343133808Spjd /* 3344133808Spjd * Let's check if device already exists. 3345133808Spjd */ 3346134486Spjd sc = NULL; 3347133808Spjd LIST_FOREACH(gp, &mp->geom, geom) { 3348133808Spjd sc = gp->softc; 3349133808Spjd if (sc == NULL) 3350133808Spjd continue; 3351133808Spjd if (sc->sc_sync.ds_geom == gp) 3352133808Spjd continue; 3353133808Spjd if (strcmp(md.md_name, sc->sc_name) != 0) 3354133808Spjd continue; 3355133808Spjd if (md.md_id != sc->sc_id) { 3356133808Spjd G_RAID3_DEBUG(0, "Device %s already configured.", 3357133808Spjd sc->sc_name); 3358133808Spjd return (NULL); 3359133808Spjd } 3360133808Spjd break; 3361133808Spjd } 3362133808Spjd if (gp == NULL) { 3363133808Spjd gp = g_raid3_create(mp, &md); 3364133808Spjd if (gp == NULL) { 3365133808Spjd G_RAID3_DEBUG(0, "Cannot create device %s.", 3366133808Spjd md.md_name); 3367133808Spjd return (NULL); 3368133808Spjd } 3369133808Spjd sc = gp->softc; 3370133808Spjd } 3371133808Spjd G_RAID3_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name); 3372156612Spjd g_topology_unlock(); 3373156612Spjd sx_xlock(&sc->sc_lock); 3374133808Spjd error = g_raid3_add_disk(sc, pp, &md); 3375133808Spjd if (error != 0) { 3376133808Spjd G_RAID3_DEBUG(0, "Cannot add disk %s to %s (error=%d).", 3377133808Spjd pp->name, gp->name, error); 3378133808Spjd if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_NODISK) == 3379133808Spjd sc->sc_ndisks) { 3380157630Spjd g_cancel_event(sc); 3381160248Spjd g_raid3_destroy(sc, G_RAID3_DESTROY_HARD); 3382156612Spjd g_topology_lock(); 3383156612Spjd return (NULL); 3384133808Spjd } 3385156612Spjd gp = NULL; 3386133808Spjd } 3387156612Spjd sx_xunlock(&sc->sc_lock); 3388156612Spjd g_topology_lock(); 3389133808Spjd return (gp); 3390133808Spjd} 3391133808Spjd 3392133808Spjdstatic int 3393133808Spjdg_raid3_destroy_geom(struct gctl_req *req __unused, struct g_class *mp __unused, 3394133808Spjd struct g_geom *gp) 3395133808Spjd{ 3396156612Spjd struct g_raid3_softc *sc; 3397156612Spjd int error; 3398133808Spjd 3399156612Spjd g_topology_unlock(); 3400156612Spjd sc = gp->softc; 3401156612Spjd sx_xlock(&sc->sc_lock); 3402157630Spjd g_cancel_event(sc); 3403160248Spjd error = g_raid3_destroy(gp->softc, G_RAID3_DESTROY_SOFT); 3404156612Spjd if (error != 0) 3405156612Spjd sx_xunlock(&sc->sc_lock); 3406156612Spjd g_topology_lock(); 3407156612Spjd return (error); 3408133808Spjd} 3409133808Spjd 3410133808Spjdstatic void 3411133808Spjdg_raid3_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, 3412133808Spjd struct g_consumer *cp, struct g_provider *pp) 3413133808Spjd{ 3414133808Spjd struct g_raid3_softc *sc; 3415133808Spjd 3416133808Spjd g_topology_assert(); 3417133808Spjd 3418133808Spjd sc = gp->softc; 3419133808Spjd if (sc == NULL) 3420133808Spjd return; 3421133808Spjd /* Skip synchronization geom. */ 3422133808Spjd if (gp == sc->sc_sync.ds_geom) 3423133808Spjd return; 3424133808Spjd if (pp != NULL) { 3425133808Spjd /* Nothing here. */ 3426133808Spjd } else if (cp != NULL) { 3427133808Spjd struct g_raid3_disk *disk; 3428133808Spjd 3429133808Spjd disk = cp->private; 3430133808Spjd if (disk == NULL) 3431133808Spjd return; 3432156612Spjd g_topology_unlock(); 3433156612Spjd sx_xlock(&sc->sc_lock); 3434133808Spjd sbuf_printf(sb, "%s<Type>", indent); 3435133808Spjd if (disk->d_no == sc->sc_ndisks - 1) 3436133808Spjd sbuf_printf(sb, "PARITY"); 3437133808Spjd else 3438133808Spjd sbuf_printf(sb, "DATA"); 3439133808Spjd sbuf_printf(sb, "</Type>\n"); 3440133808Spjd sbuf_printf(sb, "%s<Number>%u</Number>\n", indent, 3441133808Spjd (u_int)disk->d_no); 3442133808Spjd if (disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) { 3443133808Spjd sbuf_printf(sb, "%s<Synchronized>", indent); 3444156612Spjd if (disk->d_sync.ds_offset == 0) 3445133808Spjd sbuf_printf(sb, "0%%"); 3446133808Spjd else { 3447133808Spjd sbuf_printf(sb, "%u%%", 3448156612Spjd (u_int)((disk->d_sync.ds_offset * 100) / 3449134421Spjd (sc->sc_mediasize / (sc->sc_ndisks - 1)))); 3450133808Spjd } 3451133808Spjd sbuf_printf(sb, "</Synchronized>\n"); 3452240371Sglebius if (disk->d_sync.ds_offset > 0) { 3453240371Sglebius sbuf_printf(sb, "%s<BytesSynced>%jd" 3454240371Sglebius "</BytesSynced>\n", indent, 3455240371Sglebius (intmax_t)disk->d_sync.ds_offset); 3456240371Sglebius } 3457133808Spjd } 3458133808Spjd sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, 3459133808Spjd disk->d_sync.ds_syncid); 3460139295Spjd sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, disk->d_genid); 3461133808Spjd sbuf_printf(sb, "%s<Flags>", indent); 3462133808Spjd if (disk->d_flags == 0) 3463133808Spjd sbuf_printf(sb, "NONE"); 3464133808Spjd else { 3465133808Spjd int first = 1; 3466133808Spjd 3467133808Spjd#define ADD_FLAG(flag, name) do { \ 3468133808Spjd if ((disk->d_flags & (flag)) != 0) { \ 3469133808Spjd if (!first) \ 3470133808Spjd sbuf_printf(sb, ", "); \ 3471133808Spjd else \ 3472133808Spjd first = 0; \ 3473133808Spjd sbuf_printf(sb, name); \ 3474133808Spjd } \ 3475133808Spjd} while (0) 3476133808Spjd ADD_FLAG(G_RAID3_DISK_FLAG_DIRTY, "DIRTY"); 3477133808Spjd ADD_FLAG(G_RAID3_DISK_FLAG_HARDCODED, "HARDCODED"); 3478133808Spjd ADD_FLAG(G_RAID3_DISK_FLAG_SYNCHRONIZING, 3479133808Spjd "SYNCHRONIZING"); 3480133808Spjd ADD_FLAG(G_RAID3_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC"); 3481155546Spjd ADD_FLAG(G_RAID3_DISK_FLAG_BROKEN, "BROKEN"); 3482133808Spjd#undef ADD_FLAG 3483133808Spjd } 3484133808Spjd sbuf_printf(sb, "</Flags>\n"); 3485133808Spjd sbuf_printf(sb, "%s<State>%s</State>\n", indent, 3486133808Spjd g_raid3_disk_state2str(disk->d_state)); 3487156612Spjd sx_xunlock(&sc->sc_lock); 3488156612Spjd g_topology_lock(); 3489133808Spjd } else { 3490156612Spjd g_topology_unlock(); 3491156612Spjd sx_xlock(&sc->sc_lock); 3492160203Spjd if (!g_raid3_use_malloc) { 3493160203Spjd sbuf_printf(sb, 3494160203Spjd "%s<Zone4kRequested>%u</Zone4kRequested>\n", indent, 3495160203Spjd sc->sc_zones[G_RAID3_ZONE_4K].sz_requested); 3496160203Spjd sbuf_printf(sb, 3497160203Spjd "%s<Zone4kFailed>%u</Zone4kFailed>\n", indent, 3498160203Spjd sc->sc_zones[G_RAID3_ZONE_4K].sz_failed); 3499160203Spjd sbuf_printf(sb, 3500160203Spjd "%s<Zone16kRequested>%u</Zone16kRequested>\n", indent, 3501160203Spjd sc->sc_zones[G_RAID3_ZONE_16K].sz_requested); 3502160203Spjd sbuf_printf(sb, 3503160203Spjd "%s<Zone16kFailed>%u</Zone16kFailed>\n", indent, 3504160203Spjd sc->sc_zones[G_RAID3_ZONE_16K].sz_failed); 3505160203Spjd sbuf_printf(sb, 3506160203Spjd "%s<Zone64kRequested>%u</Zone64kRequested>\n", indent, 3507160203Spjd sc->sc_zones[G_RAID3_ZONE_64K].sz_requested); 3508160203Spjd sbuf_printf(sb, 3509160203Spjd "%s<Zone64kFailed>%u</Zone64kFailed>\n", indent, 3510160203Spjd sc->sc_zones[G_RAID3_ZONE_64K].sz_failed); 3511160203Spjd } 3512133808Spjd sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id); 3513133808Spjd sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid); 3514139295Spjd sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid); 3515133808Spjd sbuf_printf(sb, "%s<Flags>", indent); 3516133808Spjd if (sc->sc_flags == 0) 3517133808Spjd sbuf_printf(sb, "NONE"); 3518133808Spjd else { 3519133808Spjd int first = 1; 3520133808Spjd 3521133808Spjd#define ADD_FLAG(flag, name) do { \ 3522133808Spjd if ((sc->sc_flags & (flag)) != 0) { \ 3523133808Spjd if (!first) \ 3524133808Spjd sbuf_printf(sb, ", "); \ 3525133808Spjd else \ 3526133808Spjd first = 0; \ 3527133808Spjd sbuf_printf(sb, name); \ 3528133808Spjd } \ 3529133808Spjd} while (0) 3530163888Spjd ADD_FLAG(G_RAID3_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC"); 3531133808Spjd ADD_FLAG(G_RAID3_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC"); 3532134124Spjd ADD_FLAG(G_RAID3_DEVICE_FLAG_ROUND_ROBIN, 3533134124Spjd "ROUND-ROBIN"); 3534134168Spjd ADD_FLAG(G_RAID3_DEVICE_FLAG_VERIFY, "VERIFY"); 3535133808Spjd#undef ADD_FLAG 3536133808Spjd } 3537133808Spjd sbuf_printf(sb, "</Flags>\n"); 3538133808Spjd sbuf_printf(sb, "%s<Components>%u</Components>\n", indent, 3539133808Spjd sc->sc_ndisks); 3540133979Spjd sbuf_printf(sb, "%s<State>%s</State>\n", indent, 3541133979Spjd g_raid3_device_state2str(sc->sc_state)); 3542156612Spjd sx_xunlock(&sc->sc_lock); 3543156612Spjd g_topology_lock(); 3544133808Spjd } 3545133808Spjd} 3546133808Spjd 3547137257Spjdstatic void 3548245444Smavg_raid3_shutdown_post_sync(void *arg, int howto) 3549137257Spjd{ 3550137257Spjd struct g_class *mp; 3551137257Spjd struct g_geom *gp, *gp2; 3552156612Spjd struct g_raid3_softc *sc; 3553157630Spjd int error; 3554137257Spjd 3555137257Spjd mp = arg; 3556137421Spjd DROP_GIANT(); 3557137257Spjd g_topology_lock(); 3558245444Smav g_raid3_shutdown = 1; 3559137257Spjd LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) { 3560156612Spjd if ((sc = gp->softc) == NULL) 3561137257Spjd continue; 3562157630Spjd /* Skip synchronization geom. */ 3563157630Spjd if (gp == sc->sc_sync.ds_geom) 3564156612Spjd continue; 3565156612Spjd g_topology_unlock(); 3566156612Spjd sx_xlock(&sc->sc_lock); 3567245444Smav g_raid3_idle(sc, -1); 3568157630Spjd g_cancel_event(sc); 3569157630Spjd error = g_raid3_destroy(sc, G_RAID3_DESTROY_DELAYED); 3570157630Spjd if (error != 0) 3571157630Spjd sx_xunlock(&sc->sc_lock); 3572156612Spjd g_topology_lock(); 3573156612Spjd } 3574156612Spjd g_topology_unlock(); 3575156612Spjd PICKUP_GIANT(); 3576137257Spjd} 3577137257Spjd 3578137257Spjdstatic void 3579137257Spjdg_raid3_init(struct g_class *mp) 3580137257Spjd{ 3581137257Spjd 3582245444Smav g_raid3_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync, 3583245444Smav g_raid3_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST); 3584245444Smav if (g_raid3_post_sync == NULL) 3585137257Spjd G_RAID3_DEBUG(0, "Warning! Cannot register shutdown event."); 3586137257Spjd} 3587137257Spjd 3588137257Spjdstatic void 3589137257Spjdg_raid3_fini(struct g_class *mp) 3590137257Spjd{ 3591137257Spjd 3592245444Smav if (g_raid3_post_sync != NULL) 3593245444Smav EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_raid3_post_sync); 3594137257Spjd} 3595137257Spjd 3596133808SpjdDECLARE_GEOM_CLASS(g_raid3_class, g_raid3); 3597