g_mirror.c revision 245443
121308Sache/*- 221308Sache * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> 321308Sache * All rights reserved. 421308Sache * 521308Sache * Redistribution and use in source and binary forms, with or without 621308Sache * modification, are permitted provided that the following conditions 721308Sache * are met: 821308Sache * 1. Redistributions of source code must retain the above copyright 921308Sache * notice, this list of conditions and the following disclaimer. 1058310Sache * 2. Redistributions in binary form must reproduce the above copyright 1121308Sache * notice, this list of conditions and the following disclaimer in the 1221308Sache * documentation and/or other materials provided with the distribution. 1321308Sache * 1421308Sache * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 1521308Sache * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1621308Sache * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1721308Sache * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 1821308Sache * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1921308Sache * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2021308Sache * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2158310Sache * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2221308Sache * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2321308Sache * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2421308Sache * SUCH DAMAGE. 2521308Sache */ 2621308Sache 2721308Sache#include <sys/cdefs.h> 2821308Sache__FBSDID("$FreeBSD: head/sys/geom/mirror/g_mirror.c 245443 2013-01-15 01:13:55Z mav $"); 2921308Sache 3021308Sache#include <sys/param.h> 3121308Sache#include <sys/systm.h> 3221308Sache#include <sys/kernel.h> 3321308Sache#include <sys/module.h> 3421308Sache#include <sys/limits.h> 3521308Sache#include <sys/lock.h> 3621308Sache#include <sys/mutex.h> 3721308Sache#include <sys/bio.h> 3821308Sache#include <sys/sbuf.h> 3921308Sache#include <sys/sysctl.h> 4021308Sache#include <sys/malloc.h> 4121308Sache#include <sys/eventhandler.h> 4221308Sache#include <vm/uma.h> 4321308Sache#include <geom/geom.h> 4421308Sache#include <sys/proc.h> 4521308Sache#include <sys/kthread.h> 4621308Sache#include <sys/sched.h> 4721308Sache#include <geom/mirror/g_mirror.h> 4821308Sache 4958310SacheFEATURE(geom_mirror, "GEOM mirroring support"); 5058310Sache 5121308Sachestatic MALLOC_DEFINE(M_MIRROR, "mirror_data", "GEOM_MIRROR Data"); 5221308Sache 5321308SacheSYSCTL_DECL(_kern_geom); 5421308Sachestatic SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0, 5521308Sache "GEOM_MIRROR stuff"); 5621308Sacheu_int g_mirror_debug = 0; 5721308SacheTUNABLE_INT("kern.geom.mirror.debug", &g_mirror_debug); 5821308SacheSYSCTL_UINT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RW, &g_mirror_debug, 0, 5921308Sache "Debug level"); 6021308Sachestatic u_int g_mirror_timeout = 4; 6121308SacheTUNABLE_INT("kern.geom.mirror.timeout", &g_mirror_timeout); 6221308SacheSYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RW, &g_mirror_timeout, 6321308Sache 0, "Time to wait on all mirror components"); 6421308Sachestatic u_int g_mirror_idletime = 5; 6521308SacheTUNABLE_INT("kern.geom.mirror.idletime", &g_mirror_idletime); 6621308SacheSYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RW, 6721308Sache &g_mirror_idletime, 0, "Mark components as clean when idling"); 6821308Sachestatic u_int g_mirror_disconnect_on_failure = 1; 6921308SacheTUNABLE_INT("kern.geom.mirror.disconnect_on_failure", 7021308Sache &g_mirror_disconnect_on_failure); 7121308SacheSYSCTL_UINT(_kern_geom_mirror, OID_AUTO, disconnect_on_failure, CTLFLAG_RW, 7221308Sache &g_mirror_disconnect_on_failure, 0, "Disconnect component on I/O failure."); 73119610Sachestatic u_int g_mirror_syncreqs = 2; 74119610SacheTUNABLE_INT("kern.geom.mirror.sync_requests", &g_mirror_syncreqs); 75119610SacheSYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_requests, CTLFLAG_RDTUN, 76119610Sache &g_mirror_syncreqs, 0, "Parallel synchronization I/O requests."); 77119610Sache 7821308Sache#define MSLEEP(ident, mtx, priority, wmesg, timeout) do { \ 7921308Sache G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident)); \ 8021308Sache msleep((ident), (mtx), (priority), (wmesg), (timeout)); \ 8121308Sache G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident)); \ 8221308Sache} while (0) 8321308Sache 8421308Sachestatic eventhandler_tag g_mirror_post_sync = NULL; 8521308Sachestatic int g_mirror_shutdown = 0; 8621308Sache 8721308Sachestatic int g_mirror_destroy_geom(struct gctl_req *req, struct g_class *mp, 8821308Sache struct g_geom *gp); 8921308Sachestatic g_taste_t g_mirror_taste; 9021308Sachestatic void g_mirror_init(struct g_class *mp); 9121308Sachestatic void g_mirror_fini(struct g_class *mp); 9221308Sache 9321308Sachestruct g_class g_mirror_class = { 9421308Sache .name = G_MIRROR_CLASS_NAME, 9521308Sache .version = G_VERSION, 9621308Sache .ctlreq = g_mirror_config, 9721308Sache .taste = g_mirror_taste, 9821308Sache .destroy_geom = g_mirror_destroy_geom, 9921308Sache .init = g_mirror_init, 10021308Sache .fini = g_mirror_fini 10121308Sache}; 10221308Sache 10321308Sache 10421308Sachestatic void g_mirror_destroy_provider(struct g_mirror_softc *sc); 10521308Sachestatic int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state); 10621308Sachestatic void g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force); 10721308Sachestatic void g_mirror_dumpconf(struct sbuf *sb, const char *indent, 10821308Sache struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp); 10921308Sachestatic void g_mirror_sync_stop(struct g_mirror_disk *disk, int type); 11021308Sachestatic void g_mirror_register_request(struct bio *bp); 11121308Sachestatic void g_mirror_sync_release(struct g_mirror_softc *sc); 11221308Sache 11321308Sache 11421308Sachestatic const char * 11521308Sacheg_mirror_disk_state2str(int state) 11621308Sache{ 11721308Sache 11821308Sache switch (state) { 11921308Sache case G_MIRROR_DISK_STATE_NONE: 12021308Sache return ("NONE"); 12121308Sache case G_MIRROR_DISK_STATE_NEW: 12221308Sache return ("NEW"); 12321308Sache case G_MIRROR_DISK_STATE_ACTIVE: 12421308Sache return ("ACTIVE"); 12521308Sache case G_MIRROR_DISK_STATE_STALE: 12621308Sache return ("STALE"); 12721308Sache case G_MIRROR_DISK_STATE_SYNCHRONIZING: 12821308Sache return ("SYNCHRONIZING"); 12921308Sache case G_MIRROR_DISK_STATE_DISCONNECTED: 13021308Sache return ("DISCONNECTED"); 13121308Sache case G_MIRROR_DISK_STATE_DESTROY: 13221308Sache return ("DESTROY"); 13321308Sache default: 13421308Sache return ("INVALID"); 13521308Sache } 13621308Sache} 137119610Sache 13821308Sachestatic const char * 13921308Sacheg_mirror_device_state2str(int state) 14021308Sache{ 14121308Sache 14221308Sache switch (state) { 14321308Sache case G_MIRROR_DEVICE_STATE_STARTING: 14421308Sache return ("STARTING"); 14521308Sache case G_MIRROR_DEVICE_STATE_RUNNING: 14621308Sache return ("RUNNING"); 14721308Sache default: 14821308Sache return ("INVALID"); 14921308Sache } 15021308Sache} 15121308Sache 15221308Sachestatic const char * 15321308Sacheg_mirror_get_diskname(struct g_mirror_disk *disk) 15421308Sache{ 15521308Sache 15621308Sache if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL) 15721308Sache return ("[unknown]"); 15821308Sache return (disk->d_name); 15921308Sache} 16021308Sache 16121308Sache/* 16221308Sache * --- Events handling functions --- 16321308Sache * Events in geom_mirror are used to maintain disks and device status 16421308Sache * from one thread to simplify locking. 16521308Sache */ 16621308Sachestatic void 16721308Sacheg_mirror_event_free(struct g_mirror_event *ep) 16821308Sache{ 16921308Sache 17021308Sache free(ep, M_MIRROR); 17121308Sache} 17221308Sache 17321308Sacheint 17421308Sacheg_mirror_event_send(void *arg, int state, int flags) 17521308Sache{ 17621308Sache struct g_mirror_softc *sc; 17721308Sache struct g_mirror_disk *disk; 17821308Sache struct g_mirror_event *ep; 17921308Sache int error; 18021308Sache 18121308Sache ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK); 18221308Sache G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep); 18321308Sache if ((flags & G_MIRROR_EVENT_DEVICE) != 0) { 18421308Sache disk = NULL; 18521308Sache sc = arg; 18621308Sache } else { 18721308Sache disk = arg; 18821308Sache sc = disk->d_softc; 18921308Sache } 19021308Sache ep->e_disk = disk; 19121308Sache ep->e_state = state; 19221308Sache ep->e_flags = flags; 19321308Sache ep->e_error = 0; 19421308Sache mtx_lock(&sc->sc_events_mtx); 19521308Sache TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next); 19621308Sache mtx_unlock(&sc->sc_events_mtx); 19721308Sache G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc); 19821308Sache mtx_lock(&sc->sc_queue_mtx); 19921308Sache wakeup(sc); 20021308Sache mtx_unlock(&sc->sc_queue_mtx); 20121308Sache if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0) 20221308Sache return (0); 20321308Sache sx_assert(&sc->sc_lock, SX_XLOCKED); 204119610Sache G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep); 20521308Sache sx_xunlock(&sc->sc_lock); 20621308Sache while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) { 20721308Sache mtx_lock(&sc->sc_events_mtx); 20821308Sache MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event", 20921308Sache hz * 5); 210119610Sache } 21121308Sache error = ep->e_error; 21221308Sache g_mirror_event_free(ep); 21321308Sache sx_xlock(&sc->sc_lock); 21421308Sache return (error); 21521308Sache} 21621308Sache 217119610Sachestatic struct g_mirror_event * 218119610Sacheg_mirror_event_get(struct g_mirror_softc *sc) 21921308Sache{ 22021308Sache struct g_mirror_event *ep; 22121308Sache 22221308Sache mtx_lock(&sc->sc_events_mtx); 22321308Sache ep = TAILQ_FIRST(&sc->sc_events); 22421308Sache mtx_unlock(&sc->sc_events_mtx); 22521308Sache return (ep); 22621308Sache} 22721308Sache 228119610Sachestatic void 22921308Sacheg_mirror_event_remove(struct g_mirror_softc *sc, struct g_mirror_event *ep) 23021308Sache{ 23121308Sache 23221308Sache mtx_lock(&sc->sc_events_mtx); 23321308Sache TAILQ_REMOVE(&sc->sc_events, ep, e_next); 234119610Sache mtx_unlock(&sc->sc_events_mtx); 23521308Sache} 23621308Sache 23721308Sachestatic void 23821308Sacheg_mirror_event_cancel(struct g_mirror_disk *disk) 239119610Sache{ 240119610Sache struct g_mirror_softc *sc; 241119610Sache struct g_mirror_event *ep, *tmpep; 24221308Sache 24321308Sache sc = disk->d_softc; 24421308Sache sx_assert(&sc->sc_lock, SX_XLOCKED); 24521308Sache 24621308Sache mtx_lock(&sc->sc_events_mtx); 24721308Sache TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) { 24821308Sache if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) 24921308Sache continue; 25021308Sache if (ep->e_disk != disk) 25121308Sache continue; 252119610Sache TAILQ_REMOVE(&sc->sc_events, ep, e_next); 25321308Sache if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) 25421308Sache g_mirror_event_free(ep); 25521308Sache else { 25621308Sache ep->e_error = ECANCELED; 25721308Sache wakeup(ep); 258119610Sache } 25921308Sache } 26021308Sache mtx_unlock(&sc->sc_events_mtx); 26121308Sache} 26221308Sache 263119610Sache/* 264119610Sache * Return the number of disks in given state. 26521308Sache * If state is equal to -1, count all connected disks. 26621308Sache */ 26721308Sacheu_int 26821308Sacheg_mirror_ndisks(struct g_mirror_softc *sc, int state) 26921308Sache{ 27021308Sache struct g_mirror_disk *disk; 27121308Sache u_int n = 0; 27221308Sache 27321308Sache sx_assert(&sc->sc_lock, SX_LOCKED); 27421308Sache 275119610Sache LIST_FOREACH(disk, &sc->sc_disks, d_next) { 27621308Sache if (state == -1 || disk->d_state == state) 27721308Sache n++; 27821308Sache } 27921308Sache return (n); 28021308Sache} 28121308Sache 28275406Sache/* 28321308Sache * Find a disk in mirror by its disk ID. 28421308Sache */ 285119610Sachestatic struct g_mirror_disk * 28621308Sacheg_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id) 287119610Sache{ 288119610Sache struct g_mirror_disk *disk; 289119610Sache 290119610Sache sx_assert(&sc->sc_lock, SX_XLOCKED); 29121308Sache 29221308Sache LIST_FOREACH(disk, &sc->sc_disks, d_next) { 29321308Sache if (disk->d_id == id) 29421308Sache return (disk); 29521308Sache } 29621308Sache return (NULL); 29721308Sache} 29821308Sache 29921308Sachestatic u_int 30021308Sacheg_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp) 30121308Sache{ 30221308Sache struct bio *bp; 30321308Sache u_int nreqs = 0; 304119610Sache 30521308Sache mtx_lock(&sc->sc_queue_mtx); 30621308Sache TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) { 30721308Sache if (bp->bio_from == cp) 30821308Sache nreqs++; 30921308Sache } 31021308Sache mtx_unlock(&sc->sc_queue_mtx); 31121308Sache return (nreqs); 31221308Sache} 31321308Sache 31421308Sachestatic int 31521308Sacheg_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp) 31621308Sache{ 31721308Sache 31821308Sache if (cp->index > 0) { 31921308Sache G_MIRROR_DEBUG(2, 32021308Sache "I/O requests for %s exist, can't destroy it now.", 32121308Sache cp->provider->name); 32275406Sache return (1); 32321308Sache } 32421308Sache if (g_mirror_nrequests(sc, cp) > 0) { 32521308Sache G_MIRROR_DEBUG(2, 32621308Sache "I/O requests for %s in queue, can't destroy it now.", 32721308Sache cp->provider->name); 32821308Sache return (1); 32921308Sache } 33021308Sache return (0); 33121308Sache} 33221308Sache 33321308Sachestatic void 33421308Sacheg_mirror_destroy_consumer(void *arg, int flags __unused) 33521308Sache{ 33621308Sache struct g_consumer *cp; 33721308Sache 33821308Sache g_topology_assert(); 339119610Sache 340119610Sache cp = arg; 34121308Sache G_MIRROR_DEBUG(1, "Consumer %s destroyed.", cp->provider->name); 342136644Sache g_detach(cp); 34321308Sache g_destroy_consumer(cp); 34421308Sache} 34521308Sache 346136644Sachestatic void 347136644Sacheg_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp) 348136644Sache{ 349136644Sache struct g_provider *pp; 350136644Sache int retaste_wait; 351136644Sache 352136644Sache g_topology_assert(); 353136644Sache 354136644Sache cp->private = NULL; 355136644Sache if (g_mirror_is_busy(sc, cp)) 356136644Sache return; 357136644Sache pp = cp->provider; 358136644Sache retaste_wait = 0; 359136644Sache if (cp->acw == 1) { 360136644Sache if ((pp->geom->flags & G_GEOM_WITHER) == 0) 361136644Sache retaste_wait = 1; 362136644Sache } 363136644Sache G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr, 364136644Sache -cp->acw, -cp->ace, 0); 365136644Sache if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) 366136644Sache g_access(cp, -cp->acr, -cp->acw, -cp->ace); 367136644Sache if (retaste_wait) { 368136644Sache /* 369136644Sache * After retaste event was send (inside g_access()), we can send 370136644Sache * event to detach and destroy consumer. 371136644Sache * A class, which has consumer to the given provider connected 372136644Sache * will not receive retaste event for the provider. 373136644Sache * This is the way how I ignore retaste events when I close 374136644Sache * consumers opened for write: I detach and destroy consumer 375136644Sache * after retaste event is sent. 376136644Sache */ 377136644Sache g_post_event(g_mirror_destroy_consumer, cp, M_WAITOK, NULL); 378136644Sache return; 379136644Sache } 380136644Sache G_MIRROR_DEBUG(1, "Consumer %s destroyed.", pp->name); 381136644Sache g_detach(cp); 382136644Sache g_destroy_consumer(cp); 383136644Sache} 384136644Sache 385136644Sachestatic int 38621308Sacheg_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp) 38721308Sache{ 38821308Sache struct g_consumer *cp; 38921308Sache int error; 39021308Sache 39121308Sache g_topology_assert_not(); 39221308Sache KASSERT(disk->d_consumer == NULL, 39321308Sache ("Disk already connected (device %s).", disk->d_softc->sc_name)); 39421308Sache 39521308Sache g_topology_lock(); 39621308Sache cp = g_new_consumer(disk->d_softc->sc_geom); 39775406Sache error = g_attach(cp, pp); 39821308Sache if (error != 0) { 39921308Sache g_destroy_consumer(cp); 40021308Sache g_topology_unlock(); 40121308Sache return (error); 402119610Sache } 403119610Sache error = g_access(cp, 1, 1, 1); 40421308Sache if (error != 0) { 40521308Sache g_detach(cp); 40621308Sache g_destroy_consumer(cp); 40721308Sache g_topology_unlock(); 40821308Sache G_MIRROR_DEBUG(0, "Cannot open consumer %s (error=%d).", 40921308Sache pp->name, error); 41021308Sache return (error); 41121308Sache } 41221308Sache g_topology_unlock(); 41321308Sache disk->d_consumer = cp; 41421308Sache disk->d_consumer->private = disk; 41521308Sache disk->d_consumer->index = 0; 416119610Sache 41721308Sache G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk)); 418119610Sache return (0); 419119610Sache} 420119610Sache 421119610Sachestatic void 42221308Sacheg_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp) 42321308Sache{ 42421308Sache 42521308Sache g_topology_assert(); 42621308Sache 42721308Sache if (cp == NULL) 42821308Sache return; 42921308Sache if (cp->provider != NULL) 43021308Sache g_mirror_kill_consumer(sc, cp); 43121308Sache else 43221308Sache g_destroy_consumer(cp); 43321308Sache} 43421308Sache 43521308Sache/* 43621308Sache * Initialize disk. This means allocate memory, create consumer, attach it 43721308Sache * to the provider and open access (r1w1e1) to it. 43821308Sache */ 43921308Sachestatic struct g_mirror_disk * 44021308Sacheg_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp, 44158310Sache struct g_mirror_metadata *md, int *errorp) 44226497Sache{ 44358310Sache struct g_mirror_disk *disk; 44426497Sache int i, error; 44526497Sache 44658310Sache disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO); 44726497Sache if (disk == NULL) { 44821308Sache error = ENOMEM; 44921308Sache goto fail; 45021308Sache } 45121308Sache disk->d_softc = sc; 45221308Sache error = g_mirror_connect_disk(disk, pp); 45321308Sache if (error != 0) 45421308Sache goto fail; 45521308Sache disk->d_id = md->md_did; 45621308Sache disk->d_state = G_MIRROR_DISK_STATE_NONE; 45721308Sache disk->d_priority = md->md_priority; 45821308Sache disk->d_flags = md->md_dflags; 45921308Sache error = g_getattr("GEOM::candelete", disk->d_consumer, &i); 46021308Sache if (error != 0) 46121308Sache goto fail; 46221308Sache if (i) 46321308Sache disk->d_flags |= G_MIRROR_DISK_FLAG_CANDELETE; 46421308Sache if (md->md_provider[0] != '\0') 46521308Sache disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED; 46621308Sache disk->d_sync.ds_consumer = NULL; 46721308Sache disk->d_sync.ds_offset = md->md_sync_offset; 46821308Sache disk->d_sync.ds_offset_done = md->md_sync_offset; 46921308Sache disk->d_genid = md->md_genid; 47021308Sache disk->d_sync.ds_syncid = md->md_syncid; 47121308Sache if (errorp != NULL) 47221308Sache *errorp = 0; 47321308Sache return (disk); 47421308Sachefail: 47521308Sache if (errorp != NULL) 47621308Sache *errorp = error; 47721308Sache if (disk != NULL) 47821308Sache free(disk, M_MIRROR); 47921308Sache return (NULL); 48021308Sache} 48121308Sache 48221308Sachestatic void 48321308Sacheg_mirror_destroy_disk(struct g_mirror_disk *disk) 48421308Sache{ 48521308Sache struct g_mirror_softc *sc; 48621308Sache 48721308Sache g_topology_assert_not(); 48821308Sache sc = disk->d_softc; 48921308Sache sx_assert(&sc->sc_lock, SX_XLOCKED); 49021308Sache 49121308Sache LIST_REMOVE(disk, d_next); 49221308Sache g_mirror_event_cancel(disk); 49321308Sache if (sc->sc_hint == disk) 49421308Sache sc->sc_hint = NULL; 49521308Sache switch (disk->d_state) { 49621308Sache case G_MIRROR_DISK_STATE_SYNCHRONIZING: 49721308Sache g_mirror_sync_stop(disk, 1); 49821308Sache /* FALLTHROUGH */ 49921308Sache case G_MIRROR_DISK_STATE_NEW: 50021308Sache case G_MIRROR_DISK_STATE_STALE: 50121308Sache case G_MIRROR_DISK_STATE_ACTIVE: 50221308Sache g_topology_lock(); 50321308Sache g_mirror_disconnect_consumer(sc, disk->d_consumer); 50421308Sache g_topology_unlock(); 50521308Sache free(disk, M_MIRROR); 50621308Sache break; 50721308Sache default: 50821308Sache KASSERT(0 == 1, ("Wrong disk state (%s, %s).", 50921308Sache g_mirror_get_diskname(disk), 51021308Sache g_mirror_disk_state2str(disk->d_state))); 51121308Sache } 51221308Sache} 51321308Sache 51421308Sachestatic void 51521308Sacheg_mirror_destroy_device(struct g_mirror_softc *sc) 51621308Sache{ 51721308Sache struct g_mirror_disk *disk; 51821308Sache struct g_mirror_event *ep; 51921308Sache struct g_geom *gp; 52021308Sache struct g_consumer *cp, *tmpcp; 52121308Sache 52221308Sache g_topology_assert_not(); 52321308Sache sx_assert(&sc->sc_lock, SX_XLOCKED); 52421308Sache 52521308Sache gp = sc->sc_geom; 52621308Sache if (sc->sc_provider != NULL) 52721308Sache g_mirror_destroy_provider(sc); 52821308Sache for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL; 52921308Sache disk = LIST_FIRST(&sc->sc_disks)) { 53021308Sache disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY; 53121308Sache g_mirror_update_metadata(disk); 53221308Sache g_mirror_destroy_disk(disk); 53321308Sache } 53421308Sache while ((ep = g_mirror_event_get(sc)) != NULL) { 53521308Sache g_mirror_event_remove(sc, ep); 53621308Sache if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) 53721308Sache g_mirror_event_free(ep); 53821308Sache else { 53921308Sache ep->e_error = ECANCELED; 54021308Sache ep->e_flags |= G_MIRROR_EVENT_DONE; 54121308Sache G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep); 54221308Sache mtx_lock(&sc->sc_events_mtx); 54321308Sache wakeup(ep); 54421308Sache mtx_unlock(&sc->sc_events_mtx); 54521308Sache } 54621308Sache } 54721308Sache callout_drain(&sc->sc_callout); 54821308Sache 54921308Sache g_topology_lock(); 55021308Sache LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) { 55121308Sache g_mirror_disconnect_consumer(sc, cp); 55221308Sache } 55335486Sache g_wither_geom(sc->sc_sync.ds_geom, ENXIO); 55435486Sache G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name); 55535486Sache g_wither_geom(gp, ENXIO); 55635486Sache g_topology_unlock(); 55735486Sache mtx_destroy(&sc->sc_queue_mtx); 55821308Sache mtx_destroy(&sc->sc_events_mtx); 55921308Sache sx_xunlock(&sc->sc_lock); 56021308Sache sx_destroy(&sc->sc_lock); 56158310Sache} 56221308Sache 56358310Sachestatic void 56458310Sacheg_mirror_orphan(struct g_consumer *cp) 56535486Sache{ 56635486Sache struct g_mirror_disk *disk; 56735486Sache 56835486Sache g_topology_assert(); 56935486Sache 57035486Sache disk = cp->private; 57121308Sache if (disk == NULL) 57258310Sache return; 57358310Sache disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_SYNCID; 57458310Sache g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED, 57558310Sache G_MIRROR_EVENT_DONTWAIT); 57635486Sache} 57775406Sache 57821308Sache/* 57921308Sache * Function should return the next active disk on the list. 58021308Sache * It is possible that it will be the same disk as given. 58121308Sache * If there are no active disks on list, NULL is returned. 58221308Sache */ 58321308Sachestatic __inline struct g_mirror_disk * 58475406Sacheg_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk) 585165670Sache{ 58621308Sache struct g_mirror_disk *dp; 58721308Sache 58821308Sache for (dp = LIST_NEXT(disk, d_next); dp != disk; 58921308Sache dp = LIST_NEXT(dp, d_next)) { 59021308Sache if (dp == NULL) 591119610Sache dp = LIST_FIRST(&sc->sc_disks); 592119610Sache if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE) 59321308Sache break; 59421308Sache } 59521308Sache if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE) 59621308Sache return (NULL); 59721308Sache return (dp); 59826497Sache} 59921308Sache 60021308Sachestatic struct g_mirror_disk * 60121308Sacheg_mirror_get_disk(struct g_mirror_softc *sc) 60221308Sache{ 60321308Sache struct g_mirror_disk *disk; 60421308Sache 60521308Sache if (sc->sc_hint == NULL) { 60621308Sache sc->sc_hint = LIST_FIRST(&sc->sc_disks); 60721308Sache if (sc->sc_hint == NULL) 60821308Sache return (NULL); 60921308Sache } 61035486Sache disk = sc->sc_hint; 61135486Sache if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) { 61235486Sache disk = g_mirror_find_next(sc, disk); 61335486Sache if (disk == NULL) 61435486Sache return (NULL); 61535486Sache } 61635486Sache sc->sc_hint = g_mirror_find_next(sc, disk); 61735486Sache return (disk); 61821308Sache} 61921308Sache 62021308Sachestatic int 62121308Sacheg_mirror_write_metadata(struct g_mirror_disk *disk, 62221308Sache struct g_mirror_metadata *md) 62321308Sache{ 62421308Sache struct g_mirror_softc *sc; 62535486Sache struct g_consumer *cp; 62635486Sache off_t offset, length; 62735486Sache u_char *sector; 62835486Sache int error = 0; 62947558Sache 63047558Sache g_topology_assert_not(); 63135486Sache sc = disk->d_softc; 63235486Sache sx_assert(&sc->sc_lock, SX_LOCKED); 63335486Sache 63435486Sache cp = disk->d_consumer; 63535486Sache KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name)); 63635486Sache KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name)); 63735486Sache KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 63835486Sache ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr, 63921308Sache cp->acw, cp->ace)); 64035486Sache length = cp->provider->sectorsize; 64147558Sache offset = cp->provider->mediasize - length; 64247558Sache sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO); 64335486Sache if (md != NULL) 64435486Sache mirror_metadata_encode(md, sector); 64535486Sache error = g_write_data(cp, offset, sector, length); 64635486Sache free(sector, M_MIRROR); 64735486Sache if (error != 0) { 64835486Sache if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) { 64935486Sache disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN; 65035486Sache G_MIRROR_DEBUG(0, "Cannot write metadata on %s " 65147558Sache "(device=%s, error=%d).", 65235486Sache g_mirror_get_diskname(disk), sc->sc_name, error); 65347558Sache } else { 65447558Sache G_MIRROR_DEBUG(1, "Cannot write metadata on %s " 65547558Sache "(device=%s, error=%d).", 65647558Sache g_mirror_get_diskname(disk), sc->sc_name, error); 65721308Sache } 65835486Sache if (g_mirror_disconnect_on_failure && 65935486Sache g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) { 66075406Sache sc->sc_bump_id |= G_MIRROR_BUMP_GENID; 66135486Sache g_mirror_event_send(disk, 66235486Sache G_MIRROR_DISK_STATE_DISCONNECTED, 66335486Sache G_MIRROR_EVENT_DONTWAIT); 66435486Sache } 66535486Sache } 66635486Sache return (error); 66735486Sache} 66835486Sache 66935486Sachestatic int 67035486Sacheg_mirror_clear_metadata(struct g_mirror_disk *disk) 67135486Sache{ 67235486Sache int error; 67335486Sache 67435486Sache g_topology_assert_not(); 67535486Sache sx_assert(&disk->d_softc->sc_lock, SX_LOCKED); 67635486Sache 67735486Sache error = g_mirror_write_metadata(disk, NULL); 67835486Sache if (error == 0) { 67935486Sache G_MIRROR_DEBUG(2, "Metadata on %s cleared.", 680119610Sache g_mirror_get_diskname(disk)); 68135486Sache } else { 68235486Sache G_MIRROR_DEBUG(0, 68335486Sache "Cannot clear metadata on disk %s (error=%d).", 68435486Sache g_mirror_get_diskname(disk), error); 68535486Sache } 686119610Sache return (error); 68735486Sache} 68835486Sache 68935486Sachevoid 69035486Sacheg_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk, 69135486Sache struct g_mirror_metadata *md) 69235486Sache{ 69335486Sache 69475406Sache strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic)); 695 md->md_version = G_MIRROR_VERSION; 696 strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name)); 697 md->md_mid = sc->sc_id; 698 md->md_all = sc->sc_ndisks; 699 md->md_slice = sc->sc_slice; 700 md->md_balance = sc->sc_balance; 701 md->md_genid = sc->sc_genid; 702 md->md_mediasize = sc->sc_mediasize; 703 md->md_sectorsize = sc->sc_sectorsize; 704 md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK); 705 bzero(md->md_provider, sizeof(md->md_provider)); 706 if (disk == NULL) { 707 md->md_did = arc4random(); 708 md->md_priority = 0; 709 md->md_syncid = 0; 710 md->md_dflags = 0; 711 md->md_sync_offset = 0; 712 md->md_provsize = 0; 713 } else { 714 md->md_did = disk->d_id; 715 md->md_priority = disk->d_priority; 716 md->md_syncid = disk->d_sync.ds_syncid; 717 md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK); 718 if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) 719 md->md_sync_offset = disk->d_sync.ds_offset_done; 720 else 721 md->md_sync_offset = 0; 722 if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) { 723 strlcpy(md->md_provider, 724 disk->d_consumer->provider->name, 725 sizeof(md->md_provider)); 726 } 727 md->md_provsize = disk->d_consumer->provider->mediasize; 728 } 729} 730 731void 732g_mirror_update_metadata(struct g_mirror_disk *disk) 733{ 734 struct g_mirror_softc *sc; 735 struct g_mirror_metadata md; 736 int error; 737 738 g_topology_assert_not(); 739 sc = disk->d_softc; 740 sx_assert(&sc->sc_lock, SX_LOCKED); 741 742 g_mirror_fill_metadata(sc, disk, &md); 743 error = g_mirror_write_metadata(disk, &md); 744 if (error == 0) { 745 G_MIRROR_DEBUG(2, "Metadata on %s updated.", 746 g_mirror_get_diskname(disk)); 747 } else { 748 G_MIRROR_DEBUG(0, 749 "Cannot update metadata on disk %s (error=%d).", 750 g_mirror_get_diskname(disk), error); 751 } 752} 753 754static void 755g_mirror_bump_syncid(struct g_mirror_softc *sc) 756{ 757 struct g_mirror_disk *disk; 758 759 g_topology_assert_not(); 760 sx_assert(&sc->sc_lock, SX_XLOCKED); 761 KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0, 762 ("%s called with no active disks (device=%s).", __func__, 763 sc->sc_name)); 764 765 sc->sc_syncid++; 766 G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name, 767 sc->sc_syncid); 768 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 769 if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE || 770 disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) { 771 disk->d_sync.ds_syncid = sc->sc_syncid; 772 g_mirror_update_metadata(disk); 773 } 774 } 775} 776 777static void 778g_mirror_bump_genid(struct g_mirror_softc *sc) 779{ 780 struct g_mirror_disk *disk; 781 782 g_topology_assert_not(); 783 sx_assert(&sc->sc_lock, SX_XLOCKED); 784 KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0, 785 ("%s called with no active disks (device=%s).", __func__, 786 sc->sc_name)); 787 788 sc->sc_genid++; 789 G_MIRROR_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name, 790 sc->sc_genid); 791 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 792 if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE || 793 disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) { 794 disk->d_genid = sc->sc_genid; 795 g_mirror_update_metadata(disk); 796 } 797 } 798} 799 800static int 801g_mirror_idle(struct g_mirror_softc *sc, int acw) 802{ 803 struct g_mirror_disk *disk; 804 int timeout; 805 806 g_topology_assert_not(); 807 sx_assert(&sc->sc_lock, SX_XLOCKED); 808 809 if (sc->sc_provider == NULL) 810 return (0); 811 if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0) 812 return (0); 813 if (sc->sc_idle) 814 return (0); 815 if (sc->sc_writes > 0) 816 return (0); 817 if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) { 818 timeout = g_mirror_idletime - (time_uptime - sc->sc_last_write); 819 if (!g_mirror_shutdown && timeout > 0) 820 return (timeout); 821 } 822 sc->sc_idle = 1; 823 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 824 if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) 825 continue; 826 G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.", 827 g_mirror_get_diskname(disk), sc->sc_name); 828 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY; 829 g_mirror_update_metadata(disk); 830 } 831 return (0); 832} 833 834static void 835g_mirror_unidle(struct g_mirror_softc *sc) 836{ 837 struct g_mirror_disk *disk; 838 839 g_topology_assert_not(); 840 sx_assert(&sc->sc_lock, SX_XLOCKED); 841 842 if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0) 843 return; 844 sc->sc_idle = 0; 845 sc->sc_last_write = time_uptime; 846 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 847 if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) 848 continue; 849 G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.", 850 g_mirror_get_diskname(disk), sc->sc_name); 851 disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY; 852 g_mirror_update_metadata(disk); 853 } 854} 855 856static void 857g_mirror_done(struct bio *bp) 858{ 859 struct g_mirror_softc *sc; 860 861 sc = bp->bio_from->geom->softc; 862 bp->bio_cflags = G_MIRROR_BIO_FLAG_REGULAR; 863 mtx_lock(&sc->sc_queue_mtx); 864 bioq_disksort(&sc->sc_queue, bp); 865 mtx_unlock(&sc->sc_queue_mtx); 866 wakeup(sc); 867} 868 869static void 870g_mirror_regular_request(struct bio *bp) 871{ 872 struct g_mirror_softc *sc; 873 struct g_mirror_disk *disk; 874 struct bio *pbp; 875 876 g_topology_assert_not(); 877 878 pbp = bp->bio_parent; 879 sc = pbp->bio_to->geom->softc; 880 bp->bio_from->index--; 881 if (bp->bio_cmd == BIO_WRITE) 882 sc->sc_writes--; 883 disk = bp->bio_from->private; 884 if (disk == NULL) { 885 g_topology_lock(); 886 g_mirror_kill_consumer(sc, bp->bio_from); 887 g_topology_unlock(); 888 } 889 890 pbp->bio_inbed++; 891 KASSERT(pbp->bio_inbed <= pbp->bio_children, 892 ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed, 893 pbp->bio_children)); 894 if (bp->bio_error == 0 && pbp->bio_error == 0) { 895 G_MIRROR_LOGREQ(3, bp, "Request delivered."); 896 g_destroy_bio(bp); 897 if (pbp->bio_children == pbp->bio_inbed) { 898 G_MIRROR_LOGREQ(3, pbp, "Request delivered."); 899 pbp->bio_completed = pbp->bio_length; 900 if (pbp->bio_cmd == BIO_WRITE || 901 pbp->bio_cmd == BIO_DELETE) { 902 bioq_remove(&sc->sc_inflight, pbp); 903 /* Release delayed sync requests if possible. */ 904 g_mirror_sync_release(sc); 905 } 906 g_io_deliver(pbp, pbp->bio_error); 907 } 908 return; 909 } else if (bp->bio_error != 0) { 910 if (pbp->bio_error == 0) 911 pbp->bio_error = bp->bio_error; 912 if (disk != NULL) { 913 if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) { 914 disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN; 915 G_MIRROR_LOGREQ(0, bp, 916 "Request failed (error=%d).", 917 bp->bio_error); 918 } else { 919 G_MIRROR_LOGREQ(1, bp, 920 "Request failed (error=%d).", 921 bp->bio_error); 922 } 923 if (g_mirror_disconnect_on_failure && 924 g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) 925 { 926 sc->sc_bump_id |= G_MIRROR_BUMP_GENID; 927 g_mirror_event_send(disk, 928 G_MIRROR_DISK_STATE_DISCONNECTED, 929 G_MIRROR_EVENT_DONTWAIT); 930 } 931 } 932 switch (pbp->bio_cmd) { 933 case BIO_DELETE: 934 case BIO_WRITE: 935 pbp->bio_inbed--; 936 pbp->bio_children--; 937 break; 938 } 939 } 940 g_destroy_bio(bp); 941 942 switch (pbp->bio_cmd) { 943 case BIO_READ: 944 if (pbp->bio_inbed < pbp->bio_children) 945 break; 946 if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 1) 947 g_io_deliver(pbp, pbp->bio_error); 948 else { 949 pbp->bio_error = 0; 950 mtx_lock(&sc->sc_queue_mtx); 951 bioq_disksort(&sc->sc_queue, pbp); 952 mtx_unlock(&sc->sc_queue_mtx); 953 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc); 954 wakeup(sc); 955 } 956 break; 957 case BIO_DELETE: 958 case BIO_WRITE: 959 if (pbp->bio_children == 0) { 960 /* 961 * All requests failed. 962 */ 963 } else if (pbp->bio_inbed < pbp->bio_children) { 964 /* Do nothing. */ 965 break; 966 } else if (pbp->bio_children == pbp->bio_inbed) { 967 /* Some requests succeeded. */ 968 pbp->bio_error = 0; 969 pbp->bio_completed = pbp->bio_length; 970 } 971 bioq_remove(&sc->sc_inflight, pbp); 972 /* Release delayed sync requests if possible. */ 973 g_mirror_sync_release(sc); 974 g_io_deliver(pbp, pbp->bio_error); 975 break; 976 default: 977 KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd)); 978 break; 979 } 980} 981 982static void 983g_mirror_sync_done(struct bio *bp) 984{ 985 struct g_mirror_softc *sc; 986 987 G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered."); 988 sc = bp->bio_from->geom->softc; 989 bp->bio_cflags = G_MIRROR_BIO_FLAG_SYNC; 990 mtx_lock(&sc->sc_queue_mtx); 991 bioq_disksort(&sc->sc_queue, bp); 992 mtx_unlock(&sc->sc_queue_mtx); 993 wakeup(sc); 994} 995 996static void 997g_mirror_kernel_dump(struct bio *bp) 998{ 999 struct g_mirror_softc *sc; 1000 struct g_mirror_disk *disk; 1001 struct bio *cbp; 1002 struct g_kerneldump *gkd; 1003 1004 /* 1005 * We configure dumping to the first component, because this component 1006 * will be used for reading with 'prefer' balance algorithm. 1007 * If the component with the higest priority is currently disconnected 1008 * we will not be able to read the dump after the reboot if it will be 1009 * connected and synchronized later. Can we do something better? 1010 */ 1011 sc = bp->bio_to->geom->softc; 1012 disk = LIST_FIRST(&sc->sc_disks); 1013 1014 gkd = (struct g_kerneldump *)bp->bio_data; 1015 if (gkd->length > bp->bio_to->mediasize) 1016 gkd->length = bp->bio_to->mediasize; 1017 cbp = g_clone_bio(bp); 1018 if (cbp == NULL) { 1019 g_io_deliver(bp, ENOMEM); 1020 return; 1021 } 1022 cbp->bio_done = g_std_done; 1023 g_io_request(cbp, disk->d_consumer); 1024 G_MIRROR_DEBUG(1, "Kernel dump will go to %s.", 1025 g_mirror_get_diskname(disk)); 1026} 1027 1028static void 1029g_mirror_flush(struct g_mirror_softc *sc, struct bio *bp) 1030{ 1031 struct bio_queue_head queue; 1032 struct g_mirror_disk *disk; 1033 struct g_consumer *cp; 1034 struct bio *cbp; 1035 1036 bioq_init(&queue); 1037 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1038 if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) 1039 continue; 1040 cbp = g_clone_bio(bp); 1041 if (cbp == NULL) { 1042 for (cbp = bioq_first(&queue); cbp != NULL; 1043 cbp = bioq_first(&queue)) { 1044 bioq_remove(&queue, cbp); 1045 g_destroy_bio(cbp); 1046 } 1047 if (bp->bio_error == 0) 1048 bp->bio_error = ENOMEM; 1049 g_io_deliver(bp, bp->bio_error); 1050 return; 1051 } 1052 bioq_insert_tail(&queue, cbp); 1053 cbp->bio_done = g_std_done; 1054 cbp->bio_caller1 = disk; 1055 cbp->bio_to = disk->d_consumer->provider; 1056 } 1057 for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) { 1058 bioq_remove(&queue, cbp); 1059 G_MIRROR_LOGREQ(3, cbp, "Sending request."); 1060 disk = cbp->bio_caller1; 1061 cbp->bio_caller1 = NULL; 1062 cp = disk->d_consumer; 1063 KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 1064 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, 1065 cp->acr, cp->acw, cp->ace)); 1066 g_io_request(cbp, disk->d_consumer); 1067 } 1068} 1069 1070static void 1071g_mirror_start(struct bio *bp) 1072{ 1073 struct g_mirror_softc *sc; 1074 1075 sc = bp->bio_to->geom->softc; 1076 /* 1077 * If sc == NULL or there are no valid disks, provider's error 1078 * should be set and g_mirror_start() should not be called at all. 1079 */ 1080 KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING, 1081 ("Provider's error should be set (error=%d)(mirror=%s).", 1082 bp->bio_to->error, bp->bio_to->name)); 1083 G_MIRROR_LOGREQ(3, bp, "Request received."); 1084 1085 switch (bp->bio_cmd) { 1086 case BIO_READ: 1087 case BIO_WRITE: 1088 case BIO_DELETE: 1089 break; 1090 case BIO_FLUSH: 1091 g_mirror_flush(sc, bp); 1092 return; 1093 case BIO_GETATTR: 1094 if (g_handleattr_int(bp, "GEOM::candelete", 1)) 1095 return; 1096 else if (strcmp("GEOM::kerneldump", bp->bio_attribute) == 0) { 1097 g_mirror_kernel_dump(bp); 1098 return; 1099 } 1100 /* FALLTHROUGH */ 1101 default: 1102 g_io_deliver(bp, EOPNOTSUPP); 1103 return; 1104 } 1105 mtx_lock(&sc->sc_queue_mtx); 1106 bioq_disksort(&sc->sc_queue, bp); 1107 mtx_unlock(&sc->sc_queue_mtx); 1108 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc); 1109 wakeup(sc); 1110} 1111 1112/* 1113 * Return TRUE if the given request is colliding with a in-progress 1114 * synchronization request. 1115 */ 1116static int 1117g_mirror_sync_collision(struct g_mirror_softc *sc, struct bio *bp) 1118{ 1119 struct g_mirror_disk *disk; 1120 struct bio *sbp; 1121 off_t rstart, rend, sstart, send; 1122 int i; 1123 1124 if (sc->sc_sync.ds_ndisks == 0) 1125 return (0); 1126 rstart = bp->bio_offset; 1127 rend = bp->bio_offset + bp->bio_length; 1128 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1129 if (disk->d_state != G_MIRROR_DISK_STATE_SYNCHRONIZING) 1130 continue; 1131 for (i = 0; i < g_mirror_syncreqs; i++) { 1132 sbp = disk->d_sync.ds_bios[i]; 1133 if (sbp == NULL) 1134 continue; 1135 sstart = sbp->bio_offset; 1136 send = sbp->bio_offset + sbp->bio_length; 1137 if (rend > sstart && rstart < send) 1138 return (1); 1139 } 1140 } 1141 return (0); 1142} 1143 1144/* 1145 * Return TRUE if the given sync request is colliding with a in-progress regular 1146 * request. 1147 */ 1148static int 1149g_mirror_regular_collision(struct g_mirror_softc *sc, struct bio *sbp) 1150{ 1151 off_t rstart, rend, sstart, send; 1152 struct bio *bp; 1153 1154 if (sc->sc_sync.ds_ndisks == 0) 1155 return (0); 1156 sstart = sbp->bio_offset; 1157 send = sbp->bio_offset + sbp->bio_length; 1158 TAILQ_FOREACH(bp, &sc->sc_inflight.queue, bio_queue) { 1159 rstart = bp->bio_offset; 1160 rend = bp->bio_offset + bp->bio_length; 1161 if (rend > sstart && rstart < send) 1162 return (1); 1163 } 1164 return (0); 1165} 1166 1167/* 1168 * Puts request onto delayed queue. 1169 */ 1170static void 1171g_mirror_regular_delay(struct g_mirror_softc *sc, struct bio *bp) 1172{ 1173 1174 G_MIRROR_LOGREQ(2, bp, "Delaying request."); 1175 bioq_insert_head(&sc->sc_regular_delayed, bp); 1176} 1177 1178/* 1179 * Puts synchronization request onto delayed queue. 1180 */ 1181static void 1182g_mirror_sync_delay(struct g_mirror_softc *sc, struct bio *bp) 1183{ 1184 1185 G_MIRROR_LOGREQ(2, bp, "Delaying synchronization request."); 1186 bioq_insert_tail(&sc->sc_sync_delayed, bp); 1187} 1188 1189/* 1190 * Releases delayed regular requests which don't collide anymore with sync 1191 * requests. 1192 */ 1193static void 1194g_mirror_regular_release(struct g_mirror_softc *sc) 1195{ 1196 struct bio *bp, *bp2; 1197 1198 TAILQ_FOREACH_SAFE(bp, &sc->sc_regular_delayed.queue, bio_queue, bp2) { 1199 if (g_mirror_sync_collision(sc, bp)) 1200 continue; 1201 bioq_remove(&sc->sc_regular_delayed, bp); 1202 G_MIRROR_LOGREQ(2, bp, "Releasing delayed request (%p).", bp); 1203 mtx_lock(&sc->sc_queue_mtx); 1204 bioq_insert_head(&sc->sc_queue, bp); 1205#if 0 1206 /* 1207 * wakeup() is not needed, because this function is called from 1208 * the worker thread. 1209 */ 1210 wakeup(&sc->sc_queue); 1211#endif 1212 mtx_unlock(&sc->sc_queue_mtx); 1213 } 1214} 1215 1216/* 1217 * Releases delayed sync requests which don't collide anymore with regular 1218 * requests. 1219 */ 1220static void 1221g_mirror_sync_release(struct g_mirror_softc *sc) 1222{ 1223 struct bio *bp, *bp2; 1224 1225 TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed.queue, bio_queue, bp2) { 1226 if (g_mirror_regular_collision(sc, bp)) 1227 continue; 1228 bioq_remove(&sc->sc_sync_delayed, bp); 1229 G_MIRROR_LOGREQ(2, bp, 1230 "Releasing delayed synchronization request."); 1231 g_io_request(bp, bp->bio_from); 1232 } 1233} 1234 1235/* 1236 * Handle synchronization requests. 1237 * Every synchronization request is two-steps process: first, READ request is 1238 * send to active provider and then WRITE request (with read data) to the provider 1239 * beeing synchronized. When WRITE is finished, new synchronization request is 1240 * send. 1241 */ 1242static void 1243g_mirror_sync_request(struct bio *bp) 1244{ 1245 struct g_mirror_softc *sc; 1246 struct g_mirror_disk *disk; 1247 1248 bp->bio_from->index--; 1249 sc = bp->bio_from->geom->softc; 1250 disk = bp->bio_from->private; 1251 if (disk == NULL) { 1252 sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */ 1253 g_topology_lock(); 1254 g_mirror_kill_consumer(sc, bp->bio_from); 1255 g_topology_unlock(); 1256 free(bp->bio_data, M_MIRROR); 1257 g_destroy_bio(bp); 1258 sx_xlock(&sc->sc_lock); 1259 return; 1260 } 1261 1262 /* 1263 * Synchronization request. 1264 */ 1265 switch (bp->bio_cmd) { 1266 case BIO_READ: 1267 { 1268 struct g_consumer *cp; 1269 1270 if (bp->bio_error != 0) { 1271 G_MIRROR_LOGREQ(0, bp, 1272 "Synchronization request failed (error=%d).", 1273 bp->bio_error); 1274 g_destroy_bio(bp); 1275 return; 1276 } 1277 G_MIRROR_LOGREQ(3, bp, 1278 "Synchronization request half-finished."); 1279 bp->bio_cmd = BIO_WRITE; 1280 bp->bio_cflags = 0; 1281 cp = disk->d_consumer; 1282 KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 1283 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, 1284 cp->acr, cp->acw, cp->ace)); 1285 cp->index++; 1286 g_io_request(bp, cp); 1287 return; 1288 } 1289 case BIO_WRITE: 1290 { 1291 struct g_mirror_disk_sync *sync; 1292 off_t offset; 1293 void *data; 1294 int i; 1295 1296 if (bp->bio_error != 0) { 1297 G_MIRROR_LOGREQ(0, bp, 1298 "Synchronization request failed (error=%d).", 1299 bp->bio_error); 1300 g_destroy_bio(bp); 1301 sc->sc_bump_id |= G_MIRROR_BUMP_GENID; 1302 g_mirror_event_send(disk, 1303 G_MIRROR_DISK_STATE_DISCONNECTED, 1304 G_MIRROR_EVENT_DONTWAIT); 1305 return; 1306 } 1307 G_MIRROR_LOGREQ(3, bp, "Synchronization request finished."); 1308 sync = &disk->d_sync; 1309 if (sync->ds_offset == sc->sc_mediasize || 1310 sync->ds_consumer == NULL || 1311 (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) { 1312 /* Don't send more synchronization requests. */ 1313 sync->ds_inflight--; 1314 if (sync->ds_bios != NULL) { 1315 i = (int)(uintptr_t)bp->bio_caller1; 1316 sync->ds_bios[i] = NULL; 1317 } 1318 free(bp->bio_data, M_MIRROR); 1319 g_destroy_bio(bp); 1320 if (sync->ds_inflight > 0) 1321 return; 1322 if (sync->ds_consumer == NULL || 1323 (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) { 1324 return; 1325 } 1326 /* Disk up-to-date, activate it. */ 1327 g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE, 1328 G_MIRROR_EVENT_DONTWAIT); 1329 return; 1330 } 1331 1332 /* Send next synchronization request. */ 1333 data = bp->bio_data; 1334 bzero(bp, sizeof(*bp)); 1335 bp->bio_cmd = BIO_READ; 1336 bp->bio_offset = sync->ds_offset; 1337 bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset); 1338 sync->ds_offset += bp->bio_length; 1339 bp->bio_done = g_mirror_sync_done; 1340 bp->bio_data = data; 1341 bp->bio_from = sync->ds_consumer; 1342 bp->bio_to = sc->sc_provider; 1343 G_MIRROR_LOGREQ(3, bp, "Sending synchronization request."); 1344 sync->ds_consumer->index++; 1345 /* 1346 * Delay the request if it is colliding with a regular request. 1347 */ 1348 if (g_mirror_regular_collision(sc, bp)) 1349 g_mirror_sync_delay(sc, bp); 1350 else 1351 g_io_request(bp, sync->ds_consumer); 1352 1353 /* Release delayed requests if possible. */ 1354 g_mirror_regular_release(sc); 1355 1356 /* Find the smallest offset */ 1357 offset = sc->sc_mediasize; 1358 for (i = 0; i < g_mirror_syncreqs; i++) { 1359 bp = sync->ds_bios[i]; 1360 if (bp->bio_offset < offset) 1361 offset = bp->bio_offset; 1362 } 1363 if (sync->ds_offset_done + (MAXPHYS * 100) < offset) { 1364 /* Update offset_done on every 100 blocks. */ 1365 sync->ds_offset_done = offset; 1366 g_mirror_update_metadata(disk); 1367 } 1368 return; 1369 } 1370 default: 1371 KASSERT(1 == 0, ("Invalid command here: %u (device=%s)", 1372 bp->bio_cmd, sc->sc_name)); 1373 break; 1374 } 1375} 1376 1377static void 1378g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp) 1379{ 1380 struct g_mirror_disk *disk; 1381 struct g_consumer *cp; 1382 struct bio *cbp; 1383 1384 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1385 if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE) 1386 break; 1387 } 1388 if (disk == NULL) { 1389 if (bp->bio_error == 0) 1390 bp->bio_error = ENXIO; 1391 g_io_deliver(bp, bp->bio_error); 1392 return; 1393 } 1394 cbp = g_clone_bio(bp); 1395 if (cbp == NULL) { 1396 if (bp->bio_error == 0) 1397 bp->bio_error = ENOMEM; 1398 g_io_deliver(bp, bp->bio_error); 1399 return; 1400 } 1401 /* 1402 * Fill in the component buf structure. 1403 */ 1404 cp = disk->d_consumer; 1405 cbp->bio_done = g_mirror_done; 1406 cbp->bio_to = cp->provider; 1407 G_MIRROR_LOGREQ(3, cbp, "Sending request."); 1408 KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 1409 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr, 1410 cp->acw, cp->ace)); 1411 cp->index++; 1412 g_io_request(cbp, cp); 1413} 1414 1415static void 1416g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp) 1417{ 1418 struct g_mirror_disk *disk; 1419 struct g_consumer *cp; 1420 struct bio *cbp; 1421 1422 disk = g_mirror_get_disk(sc); 1423 if (disk == NULL) { 1424 if (bp->bio_error == 0) 1425 bp->bio_error = ENXIO; 1426 g_io_deliver(bp, bp->bio_error); 1427 return; 1428 } 1429 cbp = g_clone_bio(bp); 1430 if (cbp == NULL) { 1431 if (bp->bio_error == 0) 1432 bp->bio_error = ENOMEM; 1433 g_io_deliver(bp, bp->bio_error); 1434 return; 1435 } 1436 /* 1437 * Fill in the component buf structure. 1438 */ 1439 cp = disk->d_consumer; 1440 cbp->bio_done = g_mirror_done; 1441 cbp->bio_to = cp->provider; 1442 G_MIRROR_LOGREQ(3, cbp, "Sending request."); 1443 KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 1444 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr, 1445 cp->acw, cp->ace)); 1446 cp->index++; 1447 g_io_request(cbp, cp); 1448} 1449 1450#define TRACK_SIZE (1 * 1024 * 1024) 1451#define LOAD_SCALE 256 1452#define ABS(x) (((x) >= 0) ? (x) : (-(x))) 1453 1454static void 1455g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp) 1456{ 1457 struct g_mirror_disk *disk, *dp; 1458 struct g_consumer *cp; 1459 struct bio *cbp; 1460 int prio, best; 1461 1462 /* Find a disk with the smallest load. */ 1463 disk = NULL; 1464 best = INT_MAX; 1465 LIST_FOREACH(dp, &sc->sc_disks, d_next) { 1466 if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE) 1467 continue; 1468 prio = dp->load; 1469 /* If disk head is precisely in position - highly prefer it. */ 1470 if (dp->d_last_offset == bp->bio_offset) 1471 prio -= 2 * LOAD_SCALE; 1472 else 1473 /* If disk head is close to position - prefer it. */ 1474 if (ABS(dp->d_last_offset - bp->bio_offset) < TRACK_SIZE) 1475 prio -= 1 * LOAD_SCALE; 1476 if (prio <= best) { 1477 disk = dp; 1478 best = prio; 1479 } 1480 } 1481 KASSERT(disk != NULL, ("NULL disk for %s.", sc->sc_name)); 1482 cbp = g_clone_bio(bp); 1483 if (cbp == NULL) { 1484 if (bp->bio_error == 0) 1485 bp->bio_error = ENOMEM; 1486 g_io_deliver(bp, bp->bio_error); 1487 return; 1488 } 1489 /* 1490 * Fill in the component buf structure. 1491 */ 1492 cp = disk->d_consumer; 1493 cbp->bio_done = g_mirror_done; 1494 cbp->bio_to = cp->provider; 1495 G_MIRROR_LOGREQ(3, cbp, "Sending request."); 1496 KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 1497 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr, 1498 cp->acw, cp->ace)); 1499 cp->index++; 1500 /* Remember last head position */ 1501 disk->d_last_offset = bp->bio_offset + bp->bio_length; 1502 /* Update loads. */ 1503 LIST_FOREACH(dp, &sc->sc_disks, d_next) { 1504 dp->load = (dp->d_consumer->index * LOAD_SCALE + 1505 dp->load * 7) / 8; 1506 } 1507 g_io_request(cbp, cp); 1508} 1509 1510static void 1511g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp) 1512{ 1513 struct bio_queue_head queue; 1514 struct g_mirror_disk *disk; 1515 struct g_consumer *cp; 1516 struct bio *cbp; 1517 off_t left, mod, offset, slice; 1518 u_char *data; 1519 u_int ndisks; 1520 1521 if (bp->bio_length <= sc->sc_slice) { 1522 g_mirror_request_round_robin(sc, bp); 1523 return; 1524 } 1525 ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE); 1526 slice = bp->bio_length / ndisks; 1527 mod = slice % sc->sc_provider->sectorsize; 1528 if (mod != 0) 1529 slice += sc->sc_provider->sectorsize - mod; 1530 /* 1531 * Allocate all bios before sending any request, so we can 1532 * return ENOMEM in nice and clean way. 1533 */ 1534 left = bp->bio_length; 1535 offset = bp->bio_offset; 1536 data = bp->bio_data; 1537 bioq_init(&queue); 1538 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1539 if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) 1540 continue; 1541 cbp = g_clone_bio(bp); 1542 if (cbp == NULL) { 1543 for (cbp = bioq_first(&queue); cbp != NULL; 1544 cbp = bioq_first(&queue)) { 1545 bioq_remove(&queue, cbp); 1546 g_destroy_bio(cbp); 1547 } 1548 if (bp->bio_error == 0) 1549 bp->bio_error = ENOMEM; 1550 g_io_deliver(bp, bp->bio_error); 1551 return; 1552 } 1553 bioq_insert_tail(&queue, cbp); 1554 cbp->bio_done = g_mirror_done; 1555 cbp->bio_caller1 = disk; 1556 cbp->bio_to = disk->d_consumer->provider; 1557 cbp->bio_offset = offset; 1558 cbp->bio_data = data; 1559 cbp->bio_length = MIN(left, slice); 1560 left -= cbp->bio_length; 1561 if (left == 0) 1562 break; 1563 offset += cbp->bio_length; 1564 data += cbp->bio_length; 1565 } 1566 for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) { 1567 bioq_remove(&queue, cbp); 1568 G_MIRROR_LOGREQ(3, cbp, "Sending request."); 1569 disk = cbp->bio_caller1; 1570 cbp->bio_caller1 = NULL; 1571 cp = disk->d_consumer; 1572 KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 1573 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, 1574 cp->acr, cp->acw, cp->ace)); 1575 disk->d_consumer->index++; 1576 g_io_request(cbp, disk->d_consumer); 1577 } 1578} 1579 1580static void 1581g_mirror_register_request(struct bio *bp) 1582{ 1583 struct g_mirror_softc *sc; 1584 1585 sc = bp->bio_to->geom->softc; 1586 switch (bp->bio_cmd) { 1587 case BIO_READ: 1588 switch (sc->sc_balance) { 1589 case G_MIRROR_BALANCE_LOAD: 1590 g_mirror_request_load(sc, bp); 1591 break; 1592 case G_MIRROR_BALANCE_PREFER: 1593 g_mirror_request_prefer(sc, bp); 1594 break; 1595 case G_MIRROR_BALANCE_ROUND_ROBIN: 1596 g_mirror_request_round_robin(sc, bp); 1597 break; 1598 case G_MIRROR_BALANCE_SPLIT: 1599 g_mirror_request_split(sc, bp); 1600 break; 1601 } 1602 return; 1603 case BIO_WRITE: 1604 case BIO_DELETE: 1605 { 1606 struct g_mirror_disk *disk; 1607 struct g_mirror_disk_sync *sync; 1608 struct bio_queue_head queue; 1609 struct g_consumer *cp; 1610 struct bio *cbp; 1611 1612 /* 1613 * Delay the request if it is colliding with a synchronization 1614 * request. 1615 */ 1616 if (g_mirror_sync_collision(sc, bp)) { 1617 g_mirror_regular_delay(sc, bp); 1618 return; 1619 } 1620 1621 if (sc->sc_idle) 1622 g_mirror_unidle(sc); 1623 else 1624 sc->sc_last_write = time_uptime; 1625 1626 /* 1627 * Allocate all bios before sending any request, so we can 1628 * return ENOMEM in nice and clean way. 1629 */ 1630 bioq_init(&queue); 1631 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1632 sync = &disk->d_sync; 1633 switch (disk->d_state) { 1634 case G_MIRROR_DISK_STATE_ACTIVE: 1635 break; 1636 case G_MIRROR_DISK_STATE_SYNCHRONIZING: 1637 if (bp->bio_offset >= sync->ds_offset) 1638 continue; 1639 break; 1640 default: 1641 continue; 1642 } 1643 if (bp->bio_cmd == BIO_DELETE && 1644 (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE) == 0) 1645 continue; 1646 cbp = g_clone_bio(bp); 1647 if (cbp == NULL) { 1648 for (cbp = bioq_first(&queue); cbp != NULL; 1649 cbp = bioq_first(&queue)) { 1650 bioq_remove(&queue, cbp); 1651 g_destroy_bio(cbp); 1652 } 1653 if (bp->bio_error == 0) 1654 bp->bio_error = ENOMEM; 1655 g_io_deliver(bp, bp->bio_error); 1656 return; 1657 } 1658 bioq_insert_tail(&queue, cbp); 1659 cbp->bio_done = g_mirror_done; 1660 cp = disk->d_consumer; 1661 cbp->bio_caller1 = cp; 1662 cbp->bio_to = cp->provider; 1663 KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 1664 ("Consumer %s not opened (r%dw%de%d).", 1665 cp->provider->name, cp->acr, cp->acw, cp->ace)); 1666 } 1667 for (cbp = bioq_first(&queue); cbp != NULL; 1668 cbp = bioq_first(&queue)) { 1669 bioq_remove(&queue, cbp); 1670 G_MIRROR_LOGREQ(3, cbp, "Sending request."); 1671 cp = cbp->bio_caller1; 1672 cbp->bio_caller1 = NULL; 1673 cp->index++; 1674 sc->sc_writes++; 1675 g_io_request(cbp, cp); 1676 } 1677 /* 1678 * Put request onto inflight queue, so we can check if new 1679 * synchronization requests don't collide with it. 1680 */ 1681 bioq_insert_tail(&sc->sc_inflight, bp); 1682 /* 1683 * Bump syncid on first write. 1684 */ 1685 if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) { 1686 sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID; 1687 g_mirror_bump_syncid(sc); 1688 } 1689 return; 1690 } 1691 default: 1692 KASSERT(1 == 0, ("Invalid command here: %u (device=%s)", 1693 bp->bio_cmd, sc->sc_name)); 1694 break; 1695 } 1696} 1697 1698static int 1699g_mirror_can_destroy(struct g_mirror_softc *sc) 1700{ 1701 struct g_geom *gp; 1702 struct g_consumer *cp; 1703 1704 g_topology_assert(); 1705 gp = sc->sc_geom; 1706 if (gp->softc == NULL) 1707 return (1); 1708 if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_TASTING) != 0) 1709 return (0); 1710 LIST_FOREACH(cp, &gp->consumer, consumer) { 1711 if (g_mirror_is_busy(sc, cp)) 1712 return (0); 1713 } 1714 gp = sc->sc_sync.ds_geom; 1715 LIST_FOREACH(cp, &gp->consumer, consumer) { 1716 if (g_mirror_is_busy(sc, cp)) 1717 return (0); 1718 } 1719 G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.", 1720 sc->sc_name); 1721 return (1); 1722} 1723 1724static int 1725g_mirror_try_destroy(struct g_mirror_softc *sc) 1726{ 1727 1728 if (sc->sc_rootmount != NULL) { 1729 G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__, 1730 sc->sc_rootmount); 1731 root_mount_rel(sc->sc_rootmount); 1732 sc->sc_rootmount = NULL; 1733 } 1734 g_topology_lock(); 1735 if (!g_mirror_can_destroy(sc)) { 1736 g_topology_unlock(); 1737 return (0); 1738 } 1739 sc->sc_geom->softc = NULL; 1740 sc->sc_sync.ds_geom->softc = NULL; 1741 if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WAIT) != 0) { 1742 g_topology_unlock(); 1743 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, 1744 &sc->sc_worker); 1745 /* Unlock sc_lock here, as it can be destroyed after wakeup. */ 1746 sx_xunlock(&sc->sc_lock); 1747 wakeup(&sc->sc_worker); 1748 sc->sc_worker = NULL; 1749 } else { 1750 g_topology_unlock(); 1751 g_mirror_destroy_device(sc); 1752 free(sc, M_MIRROR); 1753 } 1754 return (1); 1755} 1756 1757/* 1758 * Worker thread. 1759 */ 1760static void 1761g_mirror_worker(void *arg) 1762{ 1763 struct g_mirror_softc *sc; 1764 struct g_mirror_event *ep; 1765 struct bio *bp; 1766 int timeout; 1767 1768 sc = arg; 1769 thread_lock(curthread); 1770 sched_prio(curthread, PRIBIO); 1771 thread_unlock(curthread); 1772 1773 sx_xlock(&sc->sc_lock); 1774 for (;;) { 1775 G_MIRROR_DEBUG(5, "%s: Let's see...", __func__); 1776 /* 1777 * First take a look at events. 1778 * This is important to handle events before any I/O requests. 1779 */ 1780 ep = g_mirror_event_get(sc); 1781 if (ep != NULL) { 1782 g_mirror_event_remove(sc, ep); 1783 if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) { 1784 /* Update only device status. */ 1785 G_MIRROR_DEBUG(3, 1786 "Running event for device %s.", 1787 sc->sc_name); 1788 ep->e_error = 0; 1789 g_mirror_update_device(sc, 1); 1790 } else { 1791 /* Update disk status. */ 1792 G_MIRROR_DEBUG(3, "Running event for disk %s.", 1793 g_mirror_get_diskname(ep->e_disk)); 1794 ep->e_error = g_mirror_update_disk(ep->e_disk, 1795 ep->e_state); 1796 if (ep->e_error == 0) 1797 g_mirror_update_device(sc, 0); 1798 } 1799 if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) { 1800 KASSERT(ep->e_error == 0, 1801 ("Error cannot be handled.")); 1802 g_mirror_event_free(ep); 1803 } else { 1804 ep->e_flags |= G_MIRROR_EVENT_DONE; 1805 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, 1806 ep); 1807 mtx_lock(&sc->sc_events_mtx); 1808 wakeup(ep); 1809 mtx_unlock(&sc->sc_events_mtx); 1810 } 1811 if ((sc->sc_flags & 1812 G_MIRROR_DEVICE_FLAG_DESTROY) != 0) { 1813 if (g_mirror_try_destroy(sc)) { 1814 curthread->td_pflags &= ~TDP_GEOM; 1815 G_MIRROR_DEBUG(1, "Thread exiting."); 1816 kproc_exit(0); 1817 } 1818 } 1819 G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__); 1820 continue; 1821 } 1822 /* 1823 * Check if we can mark array as CLEAN and if we can't take 1824 * how much seconds should we wait. 1825 */ 1826 timeout = g_mirror_idle(sc, -1); 1827 /* 1828 * Now I/O requests. 1829 */ 1830 /* Get first request from the queue. */ 1831 mtx_lock(&sc->sc_queue_mtx); 1832 bp = bioq_first(&sc->sc_queue); 1833 if (bp == NULL) { 1834 if ((sc->sc_flags & 1835 G_MIRROR_DEVICE_FLAG_DESTROY) != 0) { 1836 mtx_unlock(&sc->sc_queue_mtx); 1837 if (g_mirror_try_destroy(sc)) { 1838 curthread->td_pflags &= ~TDP_GEOM; 1839 G_MIRROR_DEBUG(1, "Thread exiting."); 1840 kproc_exit(0); 1841 } 1842 mtx_lock(&sc->sc_queue_mtx); 1843 } 1844 sx_xunlock(&sc->sc_lock); 1845 /* 1846 * XXX: We can miss an event here, because an event 1847 * can be added without sx-device-lock and without 1848 * mtx-queue-lock. Maybe I should just stop using 1849 * dedicated mutex for events synchronization and 1850 * stick with the queue lock? 1851 * The event will hang here until next I/O request 1852 * or next event is received. 1853 */ 1854 MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1", 1855 timeout * hz); 1856 sx_xlock(&sc->sc_lock); 1857 G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__); 1858 continue; 1859 } 1860 bioq_remove(&sc->sc_queue, bp); 1861 mtx_unlock(&sc->sc_queue_mtx); 1862 1863 if (bp->bio_from->geom == sc->sc_sync.ds_geom && 1864 (bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) { 1865 g_mirror_sync_request(bp); /* READ */ 1866 } else if (bp->bio_to != sc->sc_provider) { 1867 if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0) 1868 g_mirror_regular_request(bp); 1869 else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) 1870 g_mirror_sync_request(bp); /* WRITE */ 1871 else { 1872 KASSERT(0, 1873 ("Invalid request cflags=0x%hhx to=%s.", 1874 bp->bio_cflags, bp->bio_to->name)); 1875 } 1876 } else { 1877 g_mirror_register_request(bp); 1878 } 1879 G_MIRROR_DEBUG(5, "%s: I'm here 9.", __func__); 1880 } 1881} 1882 1883static void 1884g_mirror_update_idle(struct g_mirror_softc *sc, struct g_mirror_disk *disk) 1885{ 1886 1887 sx_assert(&sc->sc_lock, SX_LOCKED); 1888 1889 if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0) 1890 return; 1891 if (!sc->sc_idle && (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) { 1892 G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.", 1893 g_mirror_get_diskname(disk), sc->sc_name); 1894 disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY; 1895 } else if (sc->sc_idle && 1896 (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) { 1897 G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.", 1898 g_mirror_get_diskname(disk), sc->sc_name); 1899 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY; 1900 } 1901} 1902 1903static void 1904g_mirror_sync_start(struct g_mirror_disk *disk) 1905{ 1906 struct g_mirror_softc *sc; 1907 struct g_consumer *cp; 1908 struct bio *bp; 1909 int error, i; 1910 1911 g_topology_assert_not(); 1912 sc = disk->d_softc; 1913 sx_assert(&sc->sc_lock, SX_LOCKED); 1914 1915 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING, 1916 ("Disk %s is not marked for synchronization.", 1917 g_mirror_get_diskname(disk))); 1918 KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING, 1919 ("Device not in RUNNING state (%s, %u).", sc->sc_name, 1920 sc->sc_state)); 1921 1922 sx_xunlock(&sc->sc_lock); 1923 g_topology_lock(); 1924 cp = g_new_consumer(sc->sc_sync.ds_geom); 1925 error = g_attach(cp, sc->sc_provider); 1926 KASSERT(error == 0, 1927 ("Cannot attach to %s (error=%d).", sc->sc_name, error)); 1928 error = g_access(cp, 1, 0, 0); 1929 KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error)); 1930 g_topology_unlock(); 1931 sx_xlock(&sc->sc_lock); 1932 1933 G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name, 1934 g_mirror_get_diskname(disk)); 1935 if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) == 0) 1936 disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY; 1937 KASSERT(disk->d_sync.ds_consumer == NULL, 1938 ("Sync consumer already exists (device=%s, disk=%s).", 1939 sc->sc_name, g_mirror_get_diskname(disk))); 1940 1941 disk->d_sync.ds_consumer = cp; 1942 disk->d_sync.ds_consumer->private = disk; 1943 disk->d_sync.ds_consumer->index = 0; 1944 1945 /* 1946 * Allocate memory for synchronization bios and initialize them. 1947 */ 1948 disk->d_sync.ds_bios = malloc(sizeof(struct bio *) * g_mirror_syncreqs, 1949 M_MIRROR, M_WAITOK); 1950 for (i = 0; i < g_mirror_syncreqs; i++) { 1951 bp = g_alloc_bio(); 1952 disk->d_sync.ds_bios[i] = bp; 1953 bp->bio_parent = NULL; 1954 bp->bio_cmd = BIO_READ; 1955 bp->bio_data = malloc(MAXPHYS, M_MIRROR, M_WAITOK); 1956 bp->bio_cflags = 0; 1957 bp->bio_offset = disk->d_sync.ds_offset; 1958 bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset); 1959 disk->d_sync.ds_offset += bp->bio_length; 1960 bp->bio_done = g_mirror_sync_done; 1961 bp->bio_from = disk->d_sync.ds_consumer; 1962 bp->bio_to = sc->sc_provider; 1963 bp->bio_caller1 = (void *)(uintptr_t)i; 1964 } 1965 1966 /* Increase the number of disks in SYNCHRONIZING state. */ 1967 sc->sc_sync.ds_ndisks++; 1968 /* Set the number of in-flight synchronization requests. */ 1969 disk->d_sync.ds_inflight = g_mirror_syncreqs; 1970 1971 /* 1972 * Fire off first synchronization requests. 1973 */ 1974 for (i = 0; i < g_mirror_syncreqs; i++) { 1975 bp = disk->d_sync.ds_bios[i]; 1976 G_MIRROR_LOGREQ(3, bp, "Sending synchronization request."); 1977 disk->d_sync.ds_consumer->index++; 1978 /* 1979 * Delay the request if it is colliding with a regular request. 1980 */ 1981 if (g_mirror_regular_collision(sc, bp)) 1982 g_mirror_sync_delay(sc, bp); 1983 else 1984 g_io_request(bp, disk->d_sync.ds_consumer); 1985 } 1986} 1987 1988/* 1989 * Stop synchronization process. 1990 * type: 0 - synchronization finished 1991 * 1 - synchronization stopped 1992 */ 1993static void 1994g_mirror_sync_stop(struct g_mirror_disk *disk, int type) 1995{ 1996 struct g_mirror_softc *sc; 1997 struct g_consumer *cp; 1998 1999 g_topology_assert_not(); 2000 sc = disk->d_softc; 2001 sx_assert(&sc->sc_lock, SX_LOCKED); 2002 2003 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING, 2004 ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk), 2005 g_mirror_disk_state2str(disk->d_state))); 2006 if (disk->d_sync.ds_consumer == NULL) 2007 return; 2008 2009 if (type == 0) { 2010 G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.", 2011 sc->sc_name, g_mirror_get_diskname(disk)); 2012 } else /* if (type == 1) */ { 2013 G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.", 2014 sc->sc_name, g_mirror_get_diskname(disk)); 2015 } 2016 free(disk->d_sync.ds_bios, M_MIRROR); 2017 disk->d_sync.ds_bios = NULL; 2018 cp = disk->d_sync.ds_consumer; 2019 disk->d_sync.ds_consumer = NULL; 2020 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY; 2021 sc->sc_sync.ds_ndisks--; 2022 sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */ 2023 g_topology_lock(); 2024 g_mirror_kill_consumer(sc, cp); 2025 g_topology_unlock(); 2026 sx_xlock(&sc->sc_lock); 2027} 2028 2029static void 2030g_mirror_launch_provider(struct g_mirror_softc *sc) 2031{ 2032 struct g_mirror_disk *disk; 2033 struct g_provider *pp; 2034 2035 sx_assert(&sc->sc_lock, SX_LOCKED); 2036 2037 g_topology_lock(); 2038 pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name); 2039 pp->mediasize = sc->sc_mediasize; 2040 pp->sectorsize = sc->sc_sectorsize; 2041 pp->stripesize = 0; 2042 pp->stripeoffset = 0; 2043 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 2044 if (disk->d_consumer && disk->d_consumer->provider && 2045 disk->d_consumer->provider->stripesize > pp->stripesize) { 2046 pp->stripesize = disk->d_consumer->provider->stripesize; 2047 pp->stripeoffset = disk->d_consumer->provider->stripeoffset; 2048 } 2049 } 2050 sc->sc_provider = pp; 2051 g_error_provider(pp, 0); 2052 g_topology_unlock(); 2053 G_MIRROR_DEBUG(0, "Device %s launched (%u/%u).", pp->name, 2054 g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE), sc->sc_ndisks); 2055 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 2056 if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) 2057 g_mirror_sync_start(disk); 2058 } 2059} 2060 2061static void 2062g_mirror_destroy_provider(struct g_mirror_softc *sc) 2063{ 2064 struct g_mirror_disk *disk; 2065 struct bio *bp; 2066 2067 g_topology_assert_not(); 2068 KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).", 2069 sc->sc_name)); 2070 2071 g_topology_lock(); 2072 g_error_provider(sc->sc_provider, ENXIO); 2073 mtx_lock(&sc->sc_queue_mtx); 2074 while ((bp = bioq_first(&sc->sc_queue)) != NULL) { 2075 bioq_remove(&sc->sc_queue, bp); 2076 g_io_deliver(bp, ENXIO); 2077 } 2078 mtx_unlock(&sc->sc_queue_mtx); 2079 G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name, 2080 sc->sc_provider->name); 2081 sc->sc_provider->flags |= G_PF_WITHER; 2082 g_orphan_provider(sc->sc_provider, ENXIO); 2083 g_topology_unlock(); 2084 sc->sc_provider = NULL; 2085 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 2086 if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) 2087 g_mirror_sync_stop(disk, 1); 2088 } 2089} 2090 2091static void 2092g_mirror_go(void *arg) 2093{ 2094 struct g_mirror_softc *sc; 2095 2096 sc = arg; 2097 G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name); 2098 g_mirror_event_send(sc, 0, 2099 G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE); 2100} 2101 2102static u_int 2103g_mirror_determine_state(struct g_mirror_disk *disk) 2104{ 2105 struct g_mirror_softc *sc; 2106 u_int state; 2107 2108 sc = disk->d_softc; 2109 if (sc->sc_syncid == disk->d_sync.ds_syncid) { 2110 if ((disk->d_flags & 2111 G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) { 2112 /* Disk does not need synchronization. */ 2113 state = G_MIRROR_DISK_STATE_ACTIVE; 2114 } else { 2115 if ((sc->sc_flags & 2116 G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 || 2117 (disk->d_flags & 2118 G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) { 2119 /* 2120 * We can start synchronization from 2121 * the stored offset. 2122 */ 2123 state = G_MIRROR_DISK_STATE_SYNCHRONIZING; 2124 } else { 2125 state = G_MIRROR_DISK_STATE_STALE; 2126 } 2127 } 2128 } else if (disk->d_sync.ds_syncid < sc->sc_syncid) { 2129 /* 2130 * Reset all synchronization data for this disk, 2131 * because if it even was synchronized, it was 2132 * synchronized to disks with different syncid. 2133 */ 2134 disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING; 2135 disk->d_sync.ds_offset = 0; 2136 disk->d_sync.ds_offset_done = 0; 2137 disk->d_sync.ds_syncid = sc->sc_syncid; 2138 if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 || 2139 (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) { 2140 state = G_MIRROR_DISK_STATE_SYNCHRONIZING; 2141 } else { 2142 state = G_MIRROR_DISK_STATE_STALE; 2143 } 2144 } else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ { 2145 /* 2146 * Not good, NOT GOOD! 2147 * It means that mirror was started on stale disks 2148 * and more fresh disk just arrive. 2149 * If there were writes, mirror is broken, sorry. 2150 * I think the best choice here is don't touch 2151 * this disk and inform the user loudly. 2152 */ 2153 G_MIRROR_DEBUG(0, "Device %s was started before the freshest " 2154 "disk (%s) arrives!! It will not be connected to the " 2155 "running device.", sc->sc_name, 2156 g_mirror_get_diskname(disk)); 2157 g_mirror_destroy_disk(disk); 2158 state = G_MIRROR_DISK_STATE_NONE; 2159 /* Return immediately, because disk was destroyed. */ 2160 return (state); 2161 } 2162 G_MIRROR_DEBUG(3, "State for %s disk: %s.", 2163 g_mirror_get_diskname(disk), g_mirror_disk_state2str(state)); 2164 return (state); 2165} 2166 2167/* 2168 * Update device state. 2169 */ 2170static void 2171g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force) 2172{ 2173 struct g_mirror_disk *disk; 2174 u_int state; 2175 2176 sx_assert(&sc->sc_lock, SX_XLOCKED); 2177 2178 switch (sc->sc_state) { 2179 case G_MIRROR_DEVICE_STATE_STARTING: 2180 { 2181 struct g_mirror_disk *pdisk, *tdisk; 2182 u_int dirty, ndisks, genid, syncid; 2183 2184 KASSERT(sc->sc_provider == NULL, 2185 ("Non-NULL provider in STARTING state (%s).", sc->sc_name)); 2186 /* 2187 * Are we ready? We are, if all disks are connected or 2188 * if we have any disks and 'force' is true. 2189 */ 2190 ndisks = g_mirror_ndisks(sc, -1); 2191 if (sc->sc_ndisks == ndisks || (force && ndisks > 0)) { 2192 ; 2193 } else if (ndisks == 0) { 2194 /* 2195 * Disks went down in starting phase, so destroy 2196 * device. 2197 */ 2198 callout_drain(&sc->sc_callout); 2199 sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY; 2200 G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__, 2201 sc->sc_rootmount); 2202 root_mount_rel(sc->sc_rootmount); 2203 sc->sc_rootmount = NULL; 2204 return; 2205 } else { 2206 return; 2207 } 2208 2209 /* 2210 * Activate all disks with the biggest syncid. 2211 */ 2212 if (force) { 2213 /* 2214 * If 'force' is true, we have been called due to 2215 * timeout, so don't bother canceling timeout. 2216 */ 2217 ndisks = 0; 2218 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 2219 if ((disk->d_flags & 2220 G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) { 2221 ndisks++; 2222 } 2223 } 2224 if (ndisks == 0) { 2225 /* No valid disks found, destroy device. */ 2226 sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY; 2227 G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", 2228 __LINE__, sc->sc_rootmount); 2229 root_mount_rel(sc->sc_rootmount); 2230 sc->sc_rootmount = NULL; 2231 return; 2232 } 2233 } else { 2234 /* Cancel timeout. */ 2235 callout_drain(&sc->sc_callout); 2236 } 2237 2238 /* 2239 * Find the biggest genid. 2240 */ 2241 genid = 0; 2242 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 2243 if (disk->d_genid > genid) 2244 genid = disk->d_genid; 2245 } 2246 sc->sc_genid = genid; 2247 /* 2248 * Remove all disks without the biggest genid. 2249 */ 2250 LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) { 2251 if (disk->d_genid < genid) { 2252 G_MIRROR_DEBUG(0, 2253 "Component %s (device %s) broken, skipping.", 2254 g_mirror_get_diskname(disk), sc->sc_name); 2255 g_mirror_destroy_disk(disk); 2256 } 2257 } 2258 2259 /* 2260 * Find the biggest syncid. 2261 */ 2262 syncid = 0; 2263 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 2264 if (disk->d_sync.ds_syncid > syncid) 2265 syncid = disk->d_sync.ds_syncid; 2266 } 2267 2268 /* 2269 * Here we need to look for dirty disks and if all disks 2270 * with the biggest syncid are dirty, we have to choose 2271 * one with the biggest priority and rebuild the rest. 2272 */ 2273 /* 2274 * Find the number of dirty disks with the biggest syncid. 2275 * Find the number of disks with the biggest syncid. 2276 * While here, find a disk with the biggest priority. 2277 */ 2278 dirty = ndisks = 0; 2279 pdisk = NULL; 2280 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 2281 if (disk->d_sync.ds_syncid != syncid) 2282 continue; 2283 if ((disk->d_flags & 2284 G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) { 2285 continue; 2286 } 2287 ndisks++; 2288 if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) { 2289 dirty++; 2290 if (pdisk == NULL || 2291 pdisk->d_priority < disk->d_priority) { 2292 pdisk = disk; 2293 } 2294 } 2295 } 2296 if (dirty == 0) { 2297 /* No dirty disks at all, great. */ 2298 } else if (dirty == ndisks) { 2299 /* 2300 * Force synchronization for all dirty disks except one 2301 * with the biggest priority. 2302 */ 2303 KASSERT(pdisk != NULL, ("pdisk == NULL")); 2304 G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a " 2305 "master disk for synchronization.", 2306 g_mirror_get_diskname(pdisk), sc->sc_name); 2307 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 2308 if (disk->d_sync.ds_syncid != syncid) 2309 continue; 2310 if ((disk->d_flags & 2311 G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) { 2312 continue; 2313 } 2314 KASSERT((disk->d_flags & 2315 G_MIRROR_DISK_FLAG_DIRTY) != 0, 2316 ("Disk %s isn't marked as dirty.", 2317 g_mirror_get_diskname(disk))); 2318 /* Skip the disk with the biggest priority. */ 2319 if (disk == pdisk) 2320 continue; 2321 disk->d_sync.ds_syncid = 0; 2322 } 2323 } else if (dirty < ndisks) { 2324 /* 2325 * Force synchronization for all dirty disks. 2326 * We have some non-dirty disks. 2327 */ 2328 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 2329 if (disk->d_sync.ds_syncid != syncid) 2330 continue; 2331 if ((disk->d_flags & 2332 G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) { 2333 continue; 2334 } 2335 if ((disk->d_flags & 2336 G_MIRROR_DISK_FLAG_DIRTY) == 0) { 2337 continue; 2338 } 2339 disk->d_sync.ds_syncid = 0; 2340 } 2341 } 2342 2343 /* Reset hint. */ 2344 sc->sc_hint = NULL; 2345 sc->sc_syncid = syncid; 2346 if (force) { 2347 /* Remember to bump syncid on first write. */ 2348 sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID; 2349 } 2350 state = G_MIRROR_DEVICE_STATE_RUNNING; 2351 G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.", 2352 sc->sc_name, g_mirror_device_state2str(sc->sc_state), 2353 g_mirror_device_state2str(state)); 2354 sc->sc_state = state; 2355 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 2356 state = g_mirror_determine_state(disk); 2357 g_mirror_event_send(disk, state, 2358 G_MIRROR_EVENT_DONTWAIT); 2359 if (state == G_MIRROR_DISK_STATE_STALE) 2360 sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID; 2361 } 2362 break; 2363 } 2364 case G_MIRROR_DEVICE_STATE_RUNNING: 2365 if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 && 2366 g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) { 2367 /* 2368 * No active disks or no disks at all, 2369 * so destroy device. 2370 */ 2371 if (sc->sc_provider != NULL) 2372 g_mirror_destroy_provider(sc); 2373 sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY; 2374 break; 2375 } else if (g_mirror_ndisks(sc, 2376 G_MIRROR_DISK_STATE_ACTIVE) > 0 && 2377 g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) { 2378 /* 2379 * We have active disks, launch provider if it doesn't 2380 * exist. 2381 */ 2382 if (sc->sc_provider == NULL) 2383 g_mirror_launch_provider(sc); 2384 if (sc->sc_rootmount != NULL) { 2385 G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", 2386 __LINE__, sc->sc_rootmount); 2387 root_mount_rel(sc->sc_rootmount); 2388 sc->sc_rootmount = NULL; 2389 } 2390 } 2391 /* 2392 * Genid should be bumped immediately, so do it here. 2393 */ 2394 if ((sc->sc_bump_id & G_MIRROR_BUMP_GENID) != 0) { 2395 sc->sc_bump_id &= ~G_MIRROR_BUMP_GENID; 2396 g_mirror_bump_genid(sc); 2397 } 2398 break; 2399 default: 2400 KASSERT(1 == 0, ("Wrong device state (%s, %s).", 2401 sc->sc_name, g_mirror_device_state2str(sc->sc_state))); 2402 break; 2403 } 2404} 2405 2406/* 2407 * Update disk state and device state if needed. 2408 */ 2409#define DISK_STATE_CHANGED() G_MIRROR_DEBUG(1, \ 2410 "Disk %s state changed from %s to %s (device %s).", \ 2411 g_mirror_get_diskname(disk), \ 2412 g_mirror_disk_state2str(disk->d_state), \ 2413 g_mirror_disk_state2str(state), sc->sc_name) 2414static int 2415g_mirror_update_disk(struct g_mirror_disk *disk, u_int state) 2416{ 2417 struct g_mirror_softc *sc; 2418 2419 sc = disk->d_softc; 2420 sx_assert(&sc->sc_lock, SX_XLOCKED); 2421 2422again: 2423 G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.", 2424 g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state), 2425 g_mirror_disk_state2str(state)); 2426 switch (state) { 2427 case G_MIRROR_DISK_STATE_NEW: 2428 /* 2429 * Possible scenarios: 2430 * 1. New disk arrive. 2431 */ 2432 /* Previous state should be NONE. */ 2433 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE, 2434 ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk), 2435 g_mirror_disk_state2str(disk->d_state))); 2436 DISK_STATE_CHANGED(); 2437 2438 disk->d_state = state; 2439 if (LIST_EMPTY(&sc->sc_disks)) 2440 LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next); 2441 else { 2442 struct g_mirror_disk *dp; 2443 2444 LIST_FOREACH(dp, &sc->sc_disks, d_next) { 2445 if (disk->d_priority >= dp->d_priority) { 2446 LIST_INSERT_BEFORE(dp, disk, d_next); 2447 dp = NULL; 2448 break; 2449 } 2450 if (LIST_NEXT(dp, d_next) == NULL) 2451 break; 2452 } 2453 if (dp != NULL) 2454 LIST_INSERT_AFTER(dp, disk, d_next); 2455 } 2456 G_MIRROR_DEBUG(1, "Device %s: provider %s detected.", 2457 sc->sc_name, g_mirror_get_diskname(disk)); 2458 if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) 2459 break; 2460 KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING, 2461 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2462 g_mirror_device_state2str(sc->sc_state), 2463 g_mirror_get_diskname(disk), 2464 g_mirror_disk_state2str(disk->d_state))); 2465 state = g_mirror_determine_state(disk); 2466 if (state != G_MIRROR_DISK_STATE_NONE) 2467 goto again; 2468 break; 2469 case G_MIRROR_DISK_STATE_ACTIVE: 2470 /* 2471 * Possible scenarios: 2472 * 1. New disk does not need synchronization. 2473 * 2. Synchronization process finished successfully. 2474 */ 2475 KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING, 2476 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2477 g_mirror_device_state2str(sc->sc_state), 2478 g_mirror_get_diskname(disk), 2479 g_mirror_disk_state2str(disk->d_state))); 2480 /* Previous state should be NEW or SYNCHRONIZING. */ 2481 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW || 2482 disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING, 2483 ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk), 2484 g_mirror_disk_state2str(disk->d_state))); 2485 DISK_STATE_CHANGED(); 2486 2487 if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) { 2488 disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING; 2489 disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC; 2490 g_mirror_sync_stop(disk, 0); 2491 } 2492 disk->d_state = state; 2493 disk->d_sync.ds_offset = 0; 2494 disk->d_sync.ds_offset_done = 0; 2495 g_mirror_update_idle(sc, disk); 2496 g_mirror_update_metadata(disk); 2497 G_MIRROR_DEBUG(1, "Device %s: provider %s activated.", 2498 sc->sc_name, g_mirror_get_diskname(disk)); 2499 break; 2500 case G_MIRROR_DISK_STATE_STALE: 2501 /* 2502 * Possible scenarios: 2503 * 1. Stale disk was connected. 2504 */ 2505 /* Previous state should be NEW. */ 2506 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW, 2507 ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk), 2508 g_mirror_disk_state2str(disk->d_state))); 2509 KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING, 2510 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2511 g_mirror_device_state2str(sc->sc_state), 2512 g_mirror_get_diskname(disk), 2513 g_mirror_disk_state2str(disk->d_state))); 2514 /* 2515 * STALE state is only possible if device is marked 2516 * NOAUTOSYNC. 2517 */ 2518 KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0, 2519 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2520 g_mirror_device_state2str(sc->sc_state), 2521 g_mirror_get_diskname(disk), 2522 g_mirror_disk_state2str(disk->d_state))); 2523 DISK_STATE_CHANGED(); 2524 2525 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY; 2526 disk->d_state = state; 2527 g_mirror_update_metadata(disk); 2528 G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.", 2529 sc->sc_name, g_mirror_get_diskname(disk)); 2530 break; 2531 case G_MIRROR_DISK_STATE_SYNCHRONIZING: 2532 /* 2533 * Possible scenarios: 2534 * 1. Disk which needs synchronization was connected. 2535 */ 2536 /* Previous state should be NEW. */ 2537 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW, 2538 ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk), 2539 g_mirror_disk_state2str(disk->d_state))); 2540 KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING, 2541 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2542 g_mirror_device_state2str(sc->sc_state), 2543 g_mirror_get_diskname(disk), 2544 g_mirror_disk_state2str(disk->d_state))); 2545 DISK_STATE_CHANGED(); 2546 2547 if (disk->d_state == G_MIRROR_DISK_STATE_NEW) 2548 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY; 2549 disk->d_state = state; 2550 if (sc->sc_provider != NULL) { 2551 g_mirror_sync_start(disk); 2552 g_mirror_update_metadata(disk); 2553 } 2554 break; 2555 case G_MIRROR_DISK_STATE_DISCONNECTED: 2556 /* 2557 * Possible scenarios: 2558 * 1. Device wasn't running yet, but disk disappear. 2559 * 2. Disk was active and disapppear. 2560 * 3. Disk disappear during synchronization process. 2561 */ 2562 if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) { 2563 /* 2564 * Previous state should be ACTIVE, STALE or 2565 * SYNCHRONIZING. 2566 */ 2567 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE || 2568 disk->d_state == G_MIRROR_DISK_STATE_STALE || 2569 disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING, 2570 ("Wrong disk state (%s, %s).", 2571 g_mirror_get_diskname(disk), 2572 g_mirror_disk_state2str(disk->d_state))); 2573 } else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) { 2574 /* Previous state should be NEW. */ 2575 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW, 2576 ("Wrong disk state (%s, %s).", 2577 g_mirror_get_diskname(disk), 2578 g_mirror_disk_state2str(disk->d_state))); 2579 /* 2580 * Reset bumping syncid if disk disappeared in STARTING 2581 * state. 2582 */ 2583 if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) 2584 sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID; 2585#ifdef INVARIANTS 2586 } else { 2587 KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).", 2588 sc->sc_name, 2589 g_mirror_device_state2str(sc->sc_state), 2590 g_mirror_get_diskname(disk), 2591 g_mirror_disk_state2str(disk->d_state))); 2592#endif 2593 } 2594 DISK_STATE_CHANGED(); 2595 G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.", 2596 sc->sc_name, g_mirror_get_diskname(disk)); 2597 2598 g_mirror_destroy_disk(disk); 2599 break; 2600 case G_MIRROR_DISK_STATE_DESTROY: 2601 { 2602 int error; 2603 2604 error = g_mirror_clear_metadata(disk); 2605 if (error != 0) 2606 return (error); 2607 DISK_STATE_CHANGED(); 2608 G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.", 2609 sc->sc_name, g_mirror_get_diskname(disk)); 2610 2611 g_mirror_destroy_disk(disk); 2612 sc->sc_ndisks--; 2613 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 2614 g_mirror_update_metadata(disk); 2615 } 2616 break; 2617 } 2618 default: 2619 KASSERT(1 == 0, ("Unknown state (%u).", state)); 2620 break; 2621 } 2622 return (0); 2623} 2624#undef DISK_STATE_CHANGED 2625 2626int 2627g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md) 2628{ 2629 struct g_provider *pp; 2630 u_char *buf; 2631 int error; 2632 2633 g_topology_assert(); 2634 2635 error = g_access(cp, 1, 0, 0); 2636 if (error != 0) 2637 return (error); 2638 pp = cp->provider; 2639 g_topology_unlock(); 2640 /* Metadata are stored on last sector. */ 2641 buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize, 2642 &error); 2643 g_topology_lock(); 2644 g_access(cp, -1, 0, 0); 2645 if (buf == NULL) { 2646 G_MIRROR_DEBUG(1, "Cannot read metadata from %s (error=%d).", 2647 cp->provider->name, error); 2648 return (error); 2649 } 2650 2651 /* Decode metadata. */ 2652 error = mirror_metadata_decode(buf, md); 2653 g_free(buf); 2654 if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0) 2655 return (EINVAL); 2656 if (md->md_version > G_MIRROR_VERSION) { 2657 G_MIRROR_DEBUG(0, 2658 "Kernel module is too old to handle metadata from %s.", 2659 cp->provider->name); 2660 return (EINVAL); 2661 } 2662 if (error != 0) { 2663 G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.", 2664 cp->provider->name); 2665 return (error); 2666 } 2667 2668 return (0); 2669} 2670 2671static int 2672g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp, 2673 struct g_mirror_metadata *md) 2674{ 2675 2676 if (g_mirror_id2disk(sc, md->md_did) != NULL) { 2677 G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.", 2678 pp->name, md->md_did); 2679 return (EEXIST); 2680 } 2681 if (md->md_all != sc->sc_ndisks) { 2682 G_MIRROR_DEBUG(1, 2683 "Invalid '%s' field on disk %s (device %s), skipping.", 2684 "md_all", pp->name, sc->sc_name); 2685 return (EINVAL); 2686 } 2687 if (md->md_slice != sc->sc_slice) { 2688 G_MIRROR_DEBUG(1, 2689 "Invalid '%s' field on disk %s (device %s), skipping.", 2690 "md_slice", pp->name, sc->sc_name); 2691 return (EINVAL); 2692 } 2693 if (md->md_balance != sc->sc_balance) { 2694 G_MIRROR_DEBUG(1, 2695 "Invalid '%s' field on disk %s (device %s), skipping.", 2696 "md_balance", pp->name, sc->sc_name); 2697 return (EINVAL); 2698 } 2699 if (md->md_mediasize != sc->sc_mediasize) { 2700 G_MIRROR_DEBUG(1, 2701 "Invalid '%s' field on disk %s (device %s), skipping.", 2702 "md_mediasize", pp->name, sc->sc_name); 2703 return (EINVAL); 2704 } 2705 if (sc->sc_mediasize > pp->mediasize) { 2706 G_MIRROR_DEBUG(1, 2707 "Invalid size of disk %s (device %s), skipping.", pp->name, 2708 sc->sc_name); 2709 return (EINVAL); 2710 } 2711 if (md->md_sectorsize != sc->sc_sectorsize) { 2712 G_MIRROR_DEBUG(1, 2713 "Invalid '%s' field on disk %s (device %s), skipping.", 2714 "md_sectorsize", pp->name, sc->sc_name); 2715 return (EINVAL); 2716 } 2717 if ((sc->sc_sectorsize % pp->sectorsize) != 0) { 2718 G_MIRROR_DEBUG(1, 2719 "Invalid sector size of disk %s (device %s), skipping.", 2720 pp->name, sc->sc_name); 2721 return (EINVAL); 2722 } 2723 if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) { 2724 G_MIRROR_DEBUG(1, 2725 "Invalid device flags on disk %s (device %s), skipping.", 2726 pp->name, sc->sc_name); 2727 return (EINVAL); 2728 } 2729 if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) { 2730 G_MIRROR_DEBUG(1, 2731 "Invalid disk flags on disk %s (device %s), skipping.", 2732 pp->name, sc->sc_name); 2733 return (EINVAL); 2734 } 2735 return (0); 2736} 2737 2738int 2739g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp, 2740 struct g_mirror_metadata *md) 2741{ 2742 struct g_mirror_disk *disk; 2743 int error; 2744 2745 g_topology_assert_not(); 2746 G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name); 2747 2748 error = g_mirror_check_metadata(sc, pp, md); 2749 if (error != 0) 2750 return (error); 2751 if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING && 2752 md->md_genid < sc->sc_genid) { 2753 G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.", 2754 pp->name, sc->sc_name); 2755 return (EINVAL); 2756 } 2757 disk = g_mirror_init_disk(sc, pp, md, &error); 2758 if (disk == NULL) 2759 return (error); 2760 error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW, 2761 G_MIRROR_EVENT_WAIT); 2762 if (error != 0) 2763 return (error); 2764 if (md->md_version < G_MIRROR_VERSION) { 2765 G_MIRROR_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).", 2766 pp->name, md->md_version, G_MIRROR_VERSION); 2767 g_mirror_update_metadata(disk); 2768 } 2769 return (0); 2770} 2771 2772static void 2773g_mirror_destroy_delayed(void *arg, int flag) 2774{ 2775 struct g_mirror_softc *sc; 2776 int error; 2777 2778 if (flag == EV_CANCEL) { 2779 G_MIRROR_DEBUG(1, "Destroying canceled."); 2780 return; 2781 } 2782 sc = arg; 2783 g_topology_unlock(); 2784 sx_xlock(&sc->sc_lock); 2785 KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) == 0, 2786 ("DESTROY flag set on %s.", sc->sc_name)); 2787 KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROYING) != 0, 2788 ("DESTROYING flag not set on %s.", sc->sc_name)); 2789 G_MIRROR_DEBUG(1, "Destroying %s (delayed).", sc->sc_name); 2790 error = g_mirror_destroy(sc, G_MIRROR_DESTROY_SOFT); 2791 if (error != 0) { 2792 G_MIRROR_DEBUG(0, "Cannot destroy %s.", sc->sc_name); 2793 sx_xunlock(&sc->sc_lock); 2794 } 2795 g_topology_lock(); 2796} 2797 2798static int 2799g_mirror_access(struct g_provider *pp, int acr, int acw, int ace) 2800{ 2801 struct g_mirror_softc *sc; 2802 int dcr, dcw, dce, error = 0; 2803 2804 g_topology_assert(); 2805 G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr, 2806 acw, ace); 2807 2808 sc = pp->geom->softc; 2809 if (sc == NULL && acr <= 0 && acw <= 0 && ace <= 0) 2810 return (0); 2811 KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name)); 2812 2813 dcr = pp->acr + acr; 2814 dcw = pp->acw + acw; 2815 dce = pp->ace + ace; 2816 2817 g_topology_unlock(); 2818 sx_xlock(&sc->sc_lock); 2819 if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0 || 2820 LIST_EMPTY(&sc->sc_disks)) { 2821 if (acr > 0 || acw > 0 || ace > 0) 2822 error = ENXIO; 2823 goto end; 2824 } 2825 if (dcw == 0) 2826 g_mirror_idle(sc, dcw); 2827 if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROYING) != 0) { 2828 if (acr > 0 || acw > 0 || ace > 0) { 2829 error = ENXIO; 2830 goto end; 2831 } 2832 if (dcr == 0 && dcw == 0 && dce == 0) { 2833 g_post_event(g_mirror_destroy_delayed, sc, M_WAITOK, 2834 sc, NULL); 2835 } 2836 } 2837end: 2838 sx_xunlock(&sc->sc_lock); 2839 g_topology_lock(); 2840 return (error); 2841} 2842 2843static struct g_geom * 2844g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md) 2845{ 2846 struct g_mirror_softc *sc; 2847 struct g_geom *gp; 2848 int error, timeout; 2849 2850 g_topology_assert(); 2851 G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name, 2852 md->md_mid); 2853 2854 /* One disk is minimum. */ 2855 if (md->md_all < 1) 2856 return (NULL); 2857 /* 2858 * Action geom. 2859 */ 2860 gp = g_new_geomf(mp, "%s", md->md_name); 2861 sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO); 2862 gp->start = g_mirror_start; 2863 gp->orphan = g_mirror_orphan; 2864 gp->access = g_mirror_access; 2865 gp->dumpconf = g_mirror_dumpconf; 2866 2867 sc->sc_id = md->md_mid; 2868 sc->sc_slice = md->md_slice; 2869 sc->sc_balance = md->md_balance; 2870 sc->sc_mediasize = md->md_mediasize; 2871 sc->sc_sectorsize = md->md_sectorsize; 2872 sc->sc_ndisks = md->md_all; 2873 sc->sc_flags = md->md_mflags; 2874 sc->sc_bump_id = 0; 2875 sc->sc_idle = 1; 2876 sc->sc_last_write = time_uptime; 2877 sc->sc_writes = 0; 2878 sx_init(&sc->sc_lock, "gmirror:lock"); 2879 bioq_init(&sc->sc_queue); 2880 mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF); 2881 bioq_init(&sc->sc_regular_delayed); 2882 bioq_init(&sc->sc_inflight); 2883 bioq_init(&sc->sc_sync_delayed); 2884 LIST_INIT(&sc->sc_disks); 2885 TAILQ_INIT(&sc->sc_events); 2886 mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF); 2887 callout_init(&sc->sc_callout, CALLOUT_MPSAFE); 2888 sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING; 2889 gp->softc = sc; 2890 sc->sc_geom = gp; 2891 sc->sc_provider = NULL; 2892 /* 2893 * Synchronization geom. 2894 */ 2895 gp = g_new_geomf(mp, "%s.sync", md->md_name); 2896 gp->softc = sc; 2897 gp->orphan = g_mirror_orphan; 2898 sc->sc_sync.ds_geom = gp; 2899 sc->sc_sync.ds_ndisks = 0; 2900 error = kproc_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0, 2901 "g_mirror %s", md->md_name); 2902 if (error != 0) { 2903 G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.", 2904 sc->sc_name); 2905 g_destroy_geom(sc->sc_sync.ds_geom); 2906 mtx_destroy(&sc->sc_events_mtx); 2907 mtx_destroy(&sc->sc_queue_mtx); 2908 sx_destroy(&sc->sc_lock); 2909 g_destroy_geom(sc->sc_geom); 2910 free(sc, M_MIRROR); 2911 return (NULL); 2912 } 2913 2914 G_MIRROR_DEBUG(1, "Device %s created (%u components, id=%u).", 2915 sc->sc_name, sc->sc_ndisks, sc->sc_id); 2916 2917 sc->sc_rootmount = root_mount_hold("GMIRROR"); 2918 G_MIRROR_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount); 2919 /* 2920 * Run timeout. 2921 */ 2922 timeout = g_mirror_timeout * hz; 2923 callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc); 2924 return (sc->sc_geom); 2925} 2926 2927int 2928g_mirror_destroy(struct g_mirror_softc *sc, int how) 2929{ 2930 struct g_mirror_disk *disk; 2931 struct g_provider *pp; 2932 2933 g_topology_assert_not(); 2934 if (sc == NULL) 2935 return (ENXIO); 2936 sx_assert(&sc->sc_lock, SX_XLOCKED); 2937 2938 pp = sc->sc_provider; 2939 if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { 2940 switch (how) { 2941 case G_MIRROR_DESTROY_SOFT: 2942 G_MIRROR_DEBUG(1, 2943 "Device %s is still open (r%dw%de%d).", pp->name, 2944 pp->acr, pp->acw, pp->ace); 2945 return (EBUSY); 2946 case G_MIRROR_DESTROY_DELAYED: 2947 G_MIRROR_DEBUG(1, 2948 "Device %s will be destroyed on last close.", 2949 pp->name); 2950 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 2951 if (disk->d_state == 2952 G_MIRROR_DISK_STATE_SYNCHRONIZING) { 2953 g_mirror_sync_stop(disk, 1); 2954 } 2955 } 2956 sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROYING; 2957 return (EBUSY); 2958 case G_MIRROR_DESTROY_HARD: 2959 G_MIRROR_DEBUG(1, "Device %s is still open, so it " 2960 "can't be definitely removed.", pp->name); 2961 } 2962 } 2963 2964 g_topology_lock(); 2965 if (sc->sc_geom->softc == NULL) { 2966 g_topology_unlock(); 2967 return (0); 2968 } 2969 sc->sc_geom->softc = NULL; 2970 sc->sc_sync.ds_geom->softc = NULL; 2971 g_topology_unlock(); 2972 2973 sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY; 2974 sc->sc_flags |= G_MIRROR_DEVICE_FLAG_WAIT; 2975 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc); 2976 sx_xunlock(&sc->sc_lock); 2977 mtx_lock(&sc->sc_queue_mtx); 2978 wakeup(sc); 2979 mtx_unlock(&sc->sc_queue_mtx); 2980 G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker); 2981 while (sc->sc_worker != NULL) 2982 tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5); 2983 G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker); 2984 sx_xlock(&sc->sc_lock); 2985 g_mirror_destroy_device(sc); 2986 free(sc, M_MIRROR); 2987 return (0); 2988} 2989 2990static void 2991g_mirror_taste_orphan(struct g_consumer *cp) 2992{ 2993 2994 KASSERT(1 == 0, ("%s called while tasting %s.", __func__, 2995 cp->provider->name)); 2996} 2997 2998static struct g_geom * 2999g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) 3000{ 3001 struct g_mirror_metadata md; 3002 struct g_mirror_softc *sc; 3003 struct g_consumer *cp; 3004 struct g_geom *gp; 3005 int error; 3006 3007 g_topology_assert(); 3008 g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); 3009 G_MIRROR_DEBUG(2, "Tasting %s.", pp->name); 3010 3011 gp = g_new_geomf(mp, "mirror:taste"); 3012 /* 3013 * This orphan function should be never called. 3014 */ 3015 gp->orphan = g_mirror_taste_orphan; 3016 cp = g_new_consumer(gp); 3017 g_attach(cp, pp); 3018 error = g_mirror_read_metadata(cp, &md); 3019 g_detach(cp); 3020 g_destroy_consumer(cp); 3021 g_destroy_geom(gp); 3022 if (error != 0) 3023 return (NULL); 3024 gp = NULL; 3025 3026 if (md.md_provider[0] != '\0' && 3027 !g_compare_names(md.md_provider, pp->name)) 3028 return (NULL); 3029 if (md.md_provsize != 0 && md.md_provsize != pp->mediasize) 3030 return (NULL); 3031 if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) { 3032 G_MIRROR_DEBUG(0, 3033 "Device %s: provider %s marked as inactive, skipping.", 3034 md.md_name, pp->name); 3035 return (NULL); 3036 } 3037 if (g_mirror_debug >= 2) 3038 mirror_metadata_dump(&md); 3039 3040 /* 3041 * Let's check if device already exists. 3042 */ 3043 sc = NULL; 3044 LIST_FOREACH(gp, &mp->geom, geom) { 3045 sc = gp->softc; 3046 if (sc == NULL) 3047 continue; 3048 if (sc->sc_sync.ds_geom == gp) 3049 continue; 3050 if (strcmp(md.md_name, sc->sc_name) != 0) 3051 continue; 3052 if (md.md_mid != sc->sc_id) { 3053 G_MIRROR_DEBUG(0, "Device %s already configured.", 3054 sc->sc_name); 3055 return (NULL); 3056 } 3057 break; 3058 } 3059 if (gp == NULL) { 3060 gp = g_mirror_create(mp, &md); 3061 if (gp == NULL) { 3062 G_MIRROR_DEBUG(0, "Cannot create device %s.", 3063 md.md_name); 3064 return (NULL); 3065 } 3066 sc = gp->softc; 3067 } 3068 G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name); 3069 g_topology_unlock(); 3070 sx_xlock(&sc->sc_lock); 3071 sc->sc_flags |= G_MIRROR_DEVICE_FLAG_TASTING; 3072 error = g_mirror_add_disk(sc, pp, &md); 3073 if (error != 0) { 3074 G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).", 3075 pp->name, gp->name, error); 3076 if (LIST_EMPTY(&sc->sc_disks)) { 3077 g_cancel_event(sc); 3078 g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD); 3079 g_topology_lock(); 3080 return (NULL); 3081 } 3082 gp = NULL; 3083 } 3084 sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_TASTING; 3085 if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) { 3086 g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD); 3087 g_topology_lock(); 3088 return (NULL); 3089 } 3090 sx_xunlock(&sc->sc_lock); 3091 g_topology_lock(); 3092 return (gp); 3093} 3094 3095static int 3096g_mirror_destroy_geom(struct gctl_req *req __unused, 3097 struct g_class *mp __unused, struct g_geom *gp) 3098{ 3099 struct g_mirror_softc *sc; 3100 int error; 3101 3102 g_topology_unlock(); 3103 sc = gp->softc; 3104 sx_xlock(&sc->sc_lock); 3105 g_cancel_event(sc); 3106 error = g_mirror_destroy(gp->softc, G_MIRROR_DESTROY_SOFT); 3107 if (error != 0) 3108 sx_xunlock(&sc->sc_lock); 3109 g_topology_lock(); 3110 return (error); 3111} 3112 3113static void 3114g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, 3115 struct g_consumer *cp, struct g_provider *pp) 3116{ 3117 struct g_mirror_softc *sc; 3118 3119 g_topology_assert(); 3120 3121 sc = gp->softc; 3122 if (sc == NULL) 3123 return; 3124 /* Skip synchronization geom. */ 3125 if (gp == sc->sc_sync.ds_geom) 3126 return; 3127 if (pp != NULL) { 3128 /* Nothing here. */ 3129 } else if (cp != NULL) { 3130 struct g_mirror_disk *disk; 3131 3132 disk = cp->private; 3133 if (disk == NULL) 3134 return; 3135 g_topology_unlock(); 3136 sx_xlock(&sc->sc_lock); 3137 sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id); 3138 if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) { 3139 sbuf_printf(sb, "%s<Synchronized>", indent); 3140 if (disk->d_sync.ds_offset == 0) 3141 sbuf_printf(sb, "0%%"); 3142 else { 3143 sbuf_printf(sb, "%u%%", 3144 (u_int)((disk->d_sync.ds_offset * 100) / 3145 sc->sc_provider->mediasize)); 3146 } 3147 sbuf_printf(sb, "</Synchronized>\n"); 3148 if (disk->d_sync.ds_offset > 0) { 3149 sbuf_printf(sb, "%s<BytesSynced>%jd" 3150 "</BytesSynced>\n", indent, 3151 (intmax_t)disk->d_sync.ds_offset); 3152 } 3153 } 3154 sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, 3155 disk->d_sync.ds_syncid); 3156 sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, 3157 disk->d_genid); 3158 sbuf_printf(sb, "%s<Flags>", indent); 3159 if (disk->d_flags == 0) 3160 sbuf_printf(sb, "NONE"); 3161 else { 3162 int first = 1; 3163 3164#define ADD_FLAG(flag, name) do { \ 3165 if ((disk->d_flags & (flag)) != 0) { \ 3166 if (!first) \ 3167 sbuf_printf(sb, ", "); \ 3168 else \ 3169 first = 0; \ 3170 sbuf_printf(sb, name); \ 3171 } \ 3172} while (0) 3173 ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY"); 3174 ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED"); 3175 ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE"); 3176 ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING, 3177 "SYNCHRONIZING"); 3178 ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC"); 3179 ADD_FLAG(G_MIRROR_DISK_FLAG_BROKEN, "BROKEN"); 3180#undef ADD_FLAG 3181 } 3182 sbuf_printf(sb, "</Flags>\n"); 3183 sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent, 3184 disk->d_priority); 3185 sbuf_printf(sb, "%s<State>%s</State>\n", indent, 3186 g_mirror_disk_state2str(disk->d_state)); 3187 sx_xunlock(&sc->sc_lock); 3188 g_topology_lock(); 3189 } else { 3190 g_topology_unlock(); 3191 sx_xlock(&sc->sc_lock); 3192 sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id); 3193 sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid); 3194 sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid); 3195 sbuf_printf(sb, "%s<Flags>", indent); 3196 if (sc->sc_flags == 0) 3197 sbuf_printf(sb, "NONE"); 3198 else { 3199 int first = 1; 3200 3201#define ADD_FLAG(flag, name) do { \ 3202 if ((sc->sc_flags & (flag)) != 0) { \ 3203 if (!first) \ 3204 sbuf_printf(sb, ", "); \ 3205 else \ 3206 first = 0; \ 3207 sbuf_printf(sb, name); \ 3208 } \ 3209} while (0) 3210 ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC"); 3211 ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC"); 3212#undef ADD_FLAG 3213 } 3214 sbuf_printf(sb, "</Flags>\n"); 3215 sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent, 3216 (u_int)sc->sc_slice); 3217 sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent, 3218 balance_name(sc->sc_balance)); 3219 sbuf_printf(sb, "%s<Components>%u</Components>\n", indent, 3220 sc->sc_ndisks); 3221 sbuf_printf(sb, "%s<State>", indent); 3222 if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) 3223 sbuf_printf(sb, "%s", "STARTING"); 3224 else if (sc->sc_ndisks == 3225 g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE)) 3226 sbuf_printf(sb, "%s", "COMPLETE"); 3227 else 3228 sbuf_printf(sb, "%s", "DEGRADED"); 3229 sbuf_printf(sb, "</State>\n"); 3230 sx_xunlock(&sc->sc_lock); 3231 g_topology_lock(); 3232 } 3233} 3234 3235static void 3236g_mirror_shutdown_post_sync(void *arg, int howto) 3237{ 3238 struct g_class *mp; 3239 struct g_geom *gp, *gp2; 3240 struct g_mirror_softc *sc; 3241 int error; 3242 3243 mp = arg; 3244 DROP_GIANT(); 3245 g_topology_lock(); 3246 g_mirror_shutdown = 1; 3247 LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) { 3248 if ((sc = gp->softc) == NULL) 3249 continue; 3250 /* Skip synchronization geom. */ 3251 if (gp == sc->sc_sync.ds_geom) 3252 continue; 3253 g_topology_unlock(); 3254 sx_xlock(&sc->sc_lock); 3255 g_mirror_idle(sc, -1); 3256 g_cancel_event(sc); 3257 error = g_mirror_destroy(sc, G_MIRROR_DESTROY_DELAYED); 3258 if (error != 0) 3259 sx_xunlock(&sc->sc_lock); 3260 g_topology_lock(); 3261 } 3262 g_topology_unlock(); 3263 PICKUP_GIANT(); 3264} 3265 3266static void 3267g_mirror_init(struct g_class *mp) 3268{ 3269 3270 g_mirror_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync, 3271 g_mirror_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST); 3272 if (g_mirror_post_sync == NULL) 3273 G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event."); 3274} 3275 3276static void 3277g_mirror_fini(struct g_class *mp) 3278{ 3279 3280 if (g_mirror_post_sync != NULL) 3281 EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_post_sync); 3282} 3283 3284DECLARE_GEOM_CLASS(g_mirror_class, g_mirror); 3285