g_mirror.c revision 324402
143166Snsouch/*-
243166Snsouch * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
343166Snsouch * All rights reserved.
443166Snsouch *
543166Snsouch * Redistribution and use in source and binary forms, with or without
643166Snsouch * modification, are permitted provided that the following conditions
743166Snsouch * are met:
843166Snsouch * 1. Redistributions of source code must retain the above copyright
943166Snsouch *    notice, this list of conditions and the following disclaimer.
1043166Snsouch * 2. Redistributions in binary form must reproduce the above copyright
1143166Snsouch *    notice, this list of conditions and the following disclaimer in the
1243166Snsouch *    documentation and/or other materials provided with the distribution.
1343166Snsouch *
1443166Snsouch * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
1543166Snsouch * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1643166Snsouch * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1743166Snsouch * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
1843166Snsouch * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1943166Snsouch * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2043166Snsouch * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2143166Snsouch * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2243166Snsouch * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2343166Snsouch * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2443166Snsouch * SUCH DAMAGE.
2543166Snsouch */
2643166Snsouch
27116192Sobrien#include <sys/cdefs.h>
28116192Sobrien__FBSDID("$FreeBSD: stable/11/sys/geom/mirror/g_mirror.c 324402 2017-10-07 23:06:49Z ngie $");
29116192Sobrien
3043166Snsouch#include <sys/param.h>
3143166Snsouch#include <sys/systm.h>
32165951Sjhb#include <sys/fail.h>
3343166Snsouch#include <sys/kernel.h>
34165951Sjhb#include <sys/module.h>
3543166Snsouch#include <sys/limits.h>
36165951Sjhb#include <sys/lock.h>
3746651Speter#include <sys/mutex.h>
38165951Sjhb#include <sys/bio.h>
3943166Snsouch#include <sys/sbuf.h>
4043166Snsouch#include <sys/sysctl.h>
4143166Snsouch#include <sys/malloc.h>
4243166Snsouch#include <sys/eventhandler.h>
43119288Simp#include <vm/uma.h>
44119288Simp#include <geom/geom.h>
4543166Snsouch#include <sys/proc.h>
4643166Snsouch#include <sys/kthread.h>
4743166Snsouch#include <sys/sched.h>
4843166Snsouch#include <geom/mirror/g_mirror.h>
49165951Sjhb
50165951SjhbFEATURE(geom_mirror, "GEOM mirroring support");
51165951Sjhb
52165951Sjhbstatic MALLOC_DEFINE(M_MIRROR, "mirror_data", "GEOM_MIRROR Data");
53165951Sjhb
54165951SjhbSYSCTL_DECL(_kern_geom);
55165951Sjhbstatic SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0,
56165951Sjhb    "GEOM_MIRROR stuff");
5743166Snsouchu_int g_mirror_debug = 0;
58162289SjhbSYSCTL_UINT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RWTUN, &g_mirror_debug, 0,
59165951Sjhb    "Debug level");
60165951Sjhbstatic u_int g_mirror_timeout = 4;
61165951SjhbSYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RWTUN, &g_mirror_timeout,
62165951Sjhb    0, "Time to wait on all mirror components");
6343166Snsouchstatic u_int g_mirror_idletime = 5;
6443166SnsouchSYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RWTUN,
65165951Sjhb    &g_mirror_idletime, 0, "Mark components as clean when idling");
66165951Sjhbstatic u_int g_mirror_disconnect_on_failure = 1;
67165951SjhbSYSCTL_UINT(_kern_geom_mirror, OID_AUTO, disconnect_on_failure, CTLFLAG_RWTUN,
68165951Sjhb    &g_mirror_disconnect_on_failure, 0, "Disconnect component on I/O failure.");
69162234Sjhbstatic u_int g_mirror_syncreqs = 2;
7043166SnsouchSYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_requests, CTLFLAG_RDTUN,
7143166Snsouch    &g_mirror_syncreqs, 0, "Parallel synchronization I/O requests.");
7243166Snsouch
7343166Snsouch#define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
7443166Snsouch	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
7543166Snsouch	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
7643166Snsouch	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
7743166Snsouch} while (0)
7843166Snsouch
79162234Sjhbstatic eventhandler_tag g_mirror_post_sync = NULL;
80165951Sjhbstatic int g_mirror_shutdown = 0;
81165951Sjhb
82165951Sjhbstatic g_ctl_destroy_geom_t g_mirror_destroy_geom;
83165951Sjhbstatic g_taste_t g_mirror_taste;
84165951Sjhbstatic g_init_t g_mirror_init;
85162289Sjhbstatic g_fini_t g_mirror_fini;
86165951Sjhbstatic g_provgone_t g_mirror_providergone;
87165951Sjhbstatic g_resize_t g_mirror_resize;
88165951Sjhb
8943166Snsouchstruct g_class g_mirror_class = {
90165951Sjhb	.name = G_MIRROR_CLASS_NAME,
91165951Sjhb	.version = G_VERSION,
92165951Sjhb	.ctlreq = g_mirror_config,
93165951Sjhb	.taste = g_mirror_taste,
94165951Sjhb	.destroy_geom = g_mirror_destroy_geom,
95165951Sjhb	.init = g_mirror_init,
96165951Sjhb	.fini = g_mirror_fini,
97165951Sjhb	.providergone = g_mirror_providergone,
98165951Sjhb	.resize = g_mirror_resize
99165951Sjhb};
100165951Sjhb
101165951Sjhb
10243166Snsouchstatic void g_mirror_destroy_provider(struct g_mirror_softc *sc);
103165951Sjhbstatic int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
104165951Sjhbstatic void g_mirror_update_device(struct g_mirror_softc *sc, bool force);
105162289Sjhbstatic void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
106165951Sjhb    struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
107165951Sjhbstatic void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
108165951Sjhbstatic void g_mirror_register_request(struct bio *bp);
109165951Sjhbstatic void g_mirror_sync_release(struct g_mirror_softc *sc);
110165951Sjhb
111165951Sjhb
112162289Sjhbstatic const char *
113178972Sjhbg_mirror_disk_state2str(int state)
114178972Sjhb{
115165951Sjhb
11643166Snsouch	switch (state) {
117165951Sjhb	case G_MIRROR_DISK_STATE_NONE:
118165951Sjhb		return ("NONE");
119165951Sjhb	case G_MIRROR_DISK_STATE_NEW:
120165951Sjhb		return ("NEW");
121165951Sjhb	case G_MIRROR_DISK_STATE_ACTIVE:
122165951Sjhb		return ("ACTIVE");
123165951Sjhb	case G_MIRROR_DISK_STATE_STALE:
124165951Sjhb		return ("STALE");
12543166Snsouch	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
126165951Sjhb		return ("SYNCHRONIZING");
127165951Sjhb	case G_MIRROR_DISK_STATE_DISCONNECTED:
128165951Sjhb		return ("DISCONNECTED");
129165951Sjhb	case G_MIRROR_DISK_STATE_DESTROY:
130165951Sjhb		return ("DESTROY");
131165951Sjhb	default:
132165951Sjhb		return ("INVALID");
133165951Sjhb	}
134165951Sjhb}
135165951Sjhb
136165951Sjhbstatic const char *
137165951Sjhbg_mirror_device_state2str(int state)
138165951Sjhb{
139165951Sjhb
140165951Sjhb	switch (state) {
141165951Sjhb	case G_MIRROR_DEVICE_STATE_STARTING:
142165951Sjhb		return ("STARTING");
143165951Sjhb	case G_MIRROR_DEVICE_STATE_RUNNING:
144168870Sjhb		return ("RUNNING");
14543166Snsouch	default:
146165951Sjhb		return ("INVALID");
147165951Sjhb	}
148165951Sjhb}
149165951Sjhb
150165951Sjhbstatic const char *
15146651Speterg_mirror_get_diskname(struct g_mirror_disk *disk)
152165951Sjhb{
153165951Sjhb
154165951Sjhb	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
155165951Sjhb		return ("[unknown]");
156165951Sjhb	return (disk->d_name);
157165951Sjhb}
158165951Sjhb
159165951Sjhb/*
160165951Sjhb * --- Events handling functions ---
161165951Sjhb * Events in geom_mirror are used to maintain disks and device status
16243166Snsouch * from one thread to simplify locking.
163179622Sjhb */
164179622Sjhbstatic void
165165951Sjhbg_mirror_event_free(struct g_mirror_event *ep)
166165951Sjhb{
167165951Sjhb
168165951Sjhb	free(ep, M_MIRROR);
169162289Sjhb}
170165951Sjhb
171165951Sjhbint
172165951Sjhbg_mirror_event_send(void *arg, int state, int flags)
173165951Sjhb{
174165951Sjhb	struct g_mirror_softc *sc;
175165951Sjhb	struct g_mirror_disk *disk;
176165951Sjhb	struct g_mirror_event *ep;
177165951Sjhb	int error;
178165951Sjhb
179162289Sjhb	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
180165951Sjhb	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
181165951Sjhb	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
182165951Sjhb		disk = NULL;
183165951Sjhb		sc = arg;
184165951Sjhb	} else {
18543166Snsouch		disk = arg;
186165951Sjhb		sc = disk->d_softc;
187165951Sjhb	}
188165951Sjhb	ep->e_disk = disk;
189165951Sjhb	ep->e_state = state;
190165951Sjhb	ep->e_flags = flags;
191162289Sjhb	ep->e_error = 0;
192165951Sjhb	mtx_lock(&sc->sc_events_mtx);
19343166Snsouch	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
194162289Sjhb	mtx_unlock(&sc->sc_events_mtx);
195165951Sjhb	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
196162289Sjhb	mtx_lock(&sc->sc_queue_mtx);
197165951Sjhb	wakeup(sc);
198165951Sjhb	mtx_unlock(&sc->sc_queue_mtx);
199165951Sjhb	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
200162289Sjhb		return (0);
201165951Sjhb	sx_assert(&sc->sc_lock, SX_XLOCKED);
202165951Sjhb	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
203165951Sjhb	sx_xunlock(&sc->sc_lock);
204165951Sjhb	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
205165951Sjhb		mtx_lock(&sc->sc_events_mtx);
206165951Sjhb		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
207165951Sjhb		    hz * 5);
208165951Sjhb	}
209165951Sjhb	error = ep->e_error;
210165951Sjhb	g_mirror_event_free(ep);
211165951Sjhb	sx_xlock(&sc->sc_lock);
21243166Snsouch	return (error);
213165951Sjhb}
214165951Sjhb
215165951Sjhbstatic struct g_mirror_event *
21643166Snsouchg_mirror_event_get(struct g_mirror_softc *sc)
217165951Sjhb{
218162289Sjhb	struct g_mirror_event *ep;
219165951Sjhb
220165951Sjhb	mtx_lock(&sc->sc_events_mtx);
221165951Sjhb	ep = TAILQ_FIRST(&sc->sc_events);
222165951Sjhb	mtx_unlock(&sc->sc_events_mtx);
22343166Snsouch	return (ep);
22443166Snsouch}
225162289Sjhb
226162234Sjhbstatic void
22743166Snsouchg_mirror_event_remove(struct g_mirror_softc *sc, struct g_mirror_event *ep)
22843166Snsouch{
229162289Sjhb
23043166Snsouch	mtx_lock(&sc->sc_events_mtx);
23143166Snsouch	TAILQ_REMOVE(&sc->sc_events, ep, e_next);
23243166Snsouch	mtx_unlock(&sc->sc_events_mtx);
23343166Snsouch}
23443166Snsouch
23543166Snsouchstatic void
23643166Snsouchg_mirror_event_cancel(struct g_mirror_disk *disk)
23743166Snsouch{
238162289Sjhb	struct g_mirror_softc *sc;
23943166Snsouch	struct g_mirror_event *ep, *tmpep;
24043166Snsouch
241162289Sjhb	sc = disk->d_softc;
242162289Sjhb	sx_assert(&sc->sc_lock, SX_XLOCKED);
243162289Sjhb
244165951Sjhb	mtx_lock(&sc->sc_events_mtx);
245162289Sjhb	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
246162289Sjhb		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
247165951Sjhb			continue;
248165951Sjhb		if (ep->e_disk != disk)
24943166Snsouch			continue;
250165951Sjhb		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
25143166Snsouch		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
252162289Sjhb			g_mirror_event_free(ep);
253165951Sjhb		else {
254165951Sjhb			ep->e_error = ECANCELED;
255162289Sjhb			wakeup(ep);
256162289Sjhb		}
25743166Snsouch	}
258165951Sjhb	mtx_unlock(&sc->sc_events_mtx);
25943166Snsouch}
260162289Sjhb
261165951Sjhb/*
262165951Sjhb * Return the number of disks in given state.
263165951Sjhb * If state is equal to -1, count all connected disks.
264162289Sjhb */
26543166Snsouchu_int
26643166Snsouchg_mirror_ndisks(struct g_mirror_softc *sc, int state)
26743166Snsouch{
268165951Sjhb	struct g_mirror_disk *disk;
26943166Snsouch	u_int n = 0;
270165951Sjhb
271162289Sjhb	sx_assert(&sc->sc_lock, SX_LOCKED);
272165951Sjhb
273162289Sjhb	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
274162289Sjhb		if (state == -1 || disk->d_state == state)
275162289Sjhb			n++;
276162289Sjhb	}
277162289Sjhb	return (n);
278162289Sjhb}
279165951Sjhb
280165951Sjhb/*
281162289Sjhb * Find a disk in mirror by its disk ID.
282162289Sjhb */
283162289Sjhbstatic struct g_mirror_disk *
284162289Sjhbg_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
28549064Snsouch{
286162289Sjhb	struct g_mirror_disk *disk;
28743166Snsouch
288162289Sjhb	sx_assert(&sc->sc_lock, SX_XLOCKED);
28943166Snsouch
290162289Sjhb	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
29143166Snsouch		if (disk->d_id == id)
292165951Sjhb			return (disk);
29343166Snsouch	}
294165951Sjhb	return (NULL);
295162289Sjhb}
296165951Sjhb
297162289Sjhbstatic u_int
298165951Sjhbg_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
299165951Sjhb{
300165951Sjhb	struct bio *bp;
301165951Sjhb	u_int nreqs = 0;
302162289Sjhb
30343166Snsouch	mtx_lock(&sc->sc_queue_mtx);
304162289Sjhb	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
305162289Sjhb		if (bp->bio_from == cp)
306165951Sjhb			nreqs++;
307162289Sjhb	}
308162289Sjhb	mtx_unlock(&sc->sc_queue_mtx);
309165951Sjhb	return (nreqs);
31043166Snsouch}
31143166Snsouch
312162289Sjhbstatic int
313165951Sjhbg_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
31443166Snsouch{
31543166Snsouch
31643288Sdillon	if (cp->index > 0) {
31743288Sdillon		G_MIRROR_DEBUG(2,
318165951Sjhb		    "I/O requests for %s exist, can't destroy it now.",
31943288Sdillon		    cp->provider->name);
32043288Sdillon		return (1);
321162289Sjhb	}
322165951Sjhb	if (g_mirror_nrequests(sc, cp) > 0) {
32343166Snsouch		G_MIRROR_DEBUG(2,
324165951Sjhb		    "I/O requests for %s in queue, can't destroy it now.",
325162289Sjhb		    cp->provider->name);
32643166Snsouch		return (1);
32743166Snsouch	}
328162289Sjhb	return (0);
329162289Sjhb}
33043166Snsouch
331165951Sjhbstatic void
332165951Sjhbg_mirror_destroy_consumer(void *arg, int flags __unused)
333165951Sjhb{
334162289Sjhb	struct g_consumer *cp;
335165951Sjhb
336165951Sjhb	g_topology_assert();
337165951Sjhb
338165951Sjhb	cp = arg;
339165951Sjhb	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
340165951Sjhb	g_detach(cp);
341165951Sjhb	g_destroy_consumer(cp);
342165951Sjhb}
343165951Sjhb
34443166Snsouchstatic void
345162289Sjhbg_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
346165951Sjhb{
347162289Sjhb	struct g_provider *pp;
34843166Snsouch	int retaste_wait;
34943166Snsouch
350162289Sjhb	g_topology_assert();
35143166Snsouch
35243166Snsouch	cp->private = NULL;
353165951Sjhb	if (g_mirror_is_busy(sc, cp))
35443166Snsouch		return;
35543166Snsouch	pp = cp->provider;
356162289Sjhb	retaste_wait = 0;
357165951Sjhb	if (cp->acw == 1) {
358165951Sjhb		if ((pp->geom->flags & G_GEOM_WITHER) == 0)
359162289Sjhb			retaste_wait = 1;
36043166Snsouch	}
361162289Sjhb	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr,
362162289Sjhb	    -cp->acw, -cp->ace, 0);
363162289Sjhb	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
364189882Savg		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
365162289Sjhb	if (retaste_wait) {
366165951Sjhb		/*
36743166Snsouch		 * After retaste event was send (inside g_access()), we can send
36843166Snsouch		 * event to detach and destroy consumer.
369165951Sjhb		 * A class, which has consumer to the given provider connected
370189882Savg		 * will not receive retaste event for the provider.
371165951Sjhb		 * This is the way how I ignore retaste events when I close
372165951Sjhb		 * consumers opened for write: I detach and destroy consumer
373165951Sjhb		 * after retaste event is sent.
374165951Sjhb		 */
375165951Sjhb		g_post_event(g_mirror_destroy_consumer, cp, M_WAITOK, NULL);
376165951Sjhb		return;
377165951Sjhb	}
378165951Sjhb	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", pp->name);
379165951Sjhb	g_detach(cp);
380189882Savg	g_destroy_consumer(cp);
381189882Savg}
382189882Savg
383189882Savgstatic int
384165951Sjhbg_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
385165951Sjhb{
386165951Sjhb	struct g_consumer *cp;
387162289Sjhb	int error;
388162289Sjhb
389162289Sjhb	g_topology_assert_not();
390162289Sjhb	KASSERT(disk->d_consumer == NULL,
391162289Sjhb	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
392162289Sjhb
393162289Sjhb	g_topology_lock();
39443166Snsouch	cp = g_new_consumer(disk->d_softc->sc_geom);
395162289Sjhb	cp->flags |= G_CF_DIRECT_RECEIVE;
396165951Sjhb	error = g_attach(cp, pp);
397162289Sjhb	if (error != 0) {
398165951Sjhb		g_destroy_consumer(cp);
39943166Snsouch		g_topology_unlock();
400165951Sjhb		return (error);
401165951Sjhb	}
402165951Sjhb	error = g_access(cp, 1, 1, 1);
403162289Sjhb	if (error != 0) {
404165951Sjhb		g_detach(cp);
405162289Sjhb		g_destroy_consumer(cp);
406162289Sjhb		g_topology_unlock();
407162289Sjhb		G_MIRROR_DEBUG(0, "Cannot open consumer %s (error=%d).",
408165951Sjhb		    pp->name, error);
409162289Sjhb		return (error);
410165951Sjhb	}
411162289Sjhb	g_topology_unlock();
412162289Sjhb	disk->d_consumer = cp;
413189882Savg	disk->d_consumer->private = disk;
414162289Sjhb	disk->d_consumer->index = 0;
415178972Sjhb
416162289Sjhb	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
41743166Snsouch	return (0);
41843166Snsouch}
419162289Sjhb
420165951Sjhbstatic void
421162289Sjhbg_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
422165951Sjhb{
423165951Sjhb
424165951Sjhb	g_topology_assert();
42543166Snsouch
426162289Sjhb	if (cp == NULL)
42743166Snsouch		return;
428162289Sjhb	if (cp->provider != NULL)
42943166Snsouch		g_mirror_kill_consumer(sc, cp);
430162289Sjhb	else
431165951Sjhb		g_destroy_consumer(cp);
432162289Sjhb}
433165951Sjhb
434162289Sjhb/*
435165951Sjhb * Initialize disk. This means allocate memory, create consumer, attach it
436165951Sjhb * to the provider and open access (r1w1e1) to it.
437165951Sjhb */
438162289Sjhbstatic struct g_mirror_disk *
439165951Sjhbg_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
440162289Sjhb    struct g_mirror_metadata *md, int *errorp)
441172667Sjhb{
442165951Sjhb	struct g_mirror_disk *disk;
443165951Sjhb	int i, error;
444162289Sjhb
445189882Savg	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
446162289Sjhb	if (disk == NULL) {
447165951Sjhb		error = ENOMEM;
44843166Snsouch		goto fail;
449165951Sjhb	}
450162289Sjhb	disk->d_softc = sc;
45143166Snsouch	error = g_mirror_connect_disk(disk, pp);
452162289Sjhb	if (error != 0)
45343166Snsouch		goto fail;
45443166Snsouch	disk->d_id = md->md_did;
455162289Sjhb	disk->d_state = G_MIRROR_DISK_STATE_NONE;
456162289Sjhb	disk->d_priority = md->md_priority;
457162289Sjhb	disk->d_flags = md->md_dflags;
458162289Sjhb	error = g_getattr("GEOM::candelete", disk->d_consumer, &i);
459162289Sjhb	if (error == 0 && i != 0)
460165951Sjhb		disk->d_flags |= G_MIRROR_DISK_FLAG_CANDELETE;
461165951Sjhb	if (md->md_provider[0] != '\0')
462165951Sjhb		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
463165951Sjhb	disk->d_sync.ds_consumer = NULL;
464165951Sjhb	disk->d_sync.ds_offset = md->md_sync_offset;
46543166Snsouch	disk->d_sync.ds_offset_done = md->md_sync_offset;
46643166Snsouch	disk->d_genid = md->md_genid;
46743166Snsouch	disk->d_sync.ds_syncid = md->md_syncid;
46843166Snsouch	if (errorp != NULL)
46943166Snsouch		*errorp = 0;
470162289Sjhb	return (disk);
471165951Sjhbfail:
472162289Sjhb	if (errorp != NULL)
473162289Sjhb		*errorp = error;
474162289Sjhb	if (disk != NULL)
475162289Sjhb		free(disk, M_MIRROR);
476162289Sjhb	return (NULL);
477162289Sjhb}
478162289Sjhb
479162289Sjhbstatic void
48043166Snsouchg_mirror_destroy_disk(struct g_mirror_disk *disk)
481162289Sjhb{
482162289Sjhb	struct g_mirror_softc *sc;
483162289Sjhb
484162289Sjhb	g_topology_assert_not();
485165951Sjhb	sc = disk->d_softc;
486162289Sjhb	sx_assert(&sc->sc_lock, SX_XLOCKED);
487165951Sjhb
488165951Sjhb	LIST_REMOVE(disk, d_next);
489165951Sjhb	g_mirror_event_cancel(disk);
490165951Sjhb	if (sc->sc_hint == disk)
491165951Sjhb		sc->sc_hint = NULL;
492165951Sjhb	switch (disk->d_state) {
493162289Sjhb	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
494165951Sjhb		g_mirror_sync_stop(disk, 1);
495165951Sjhb		/* FALLTHROUGH */
496165951Sjhb	case G_MIRROR_DISK_STATE_NEW:
497165951Sjhb	case G_MIRROR_DISK_STATE_STALE:
498162289Sjhb	case G_MIRROR_DISK_STATE_ACTIVE:
49943166Snsouch		g_topology_lock();
50043166Snsouch		g_mirror_disconnect_consumer(sc, disk->d_consumer);
50143166Snsouch		g_topology_unlock();
50243166Snsouch		free(disk, M_MIRROR);
50343166Snsouch		break;
504162289Sjhb	default:
505162289Sjhb		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
506162289Sjhb		    g_mirror_get_diskname(disk),
507165951Sjhb		    g_mirror_disk_state2str(disk->d_state)));
508165951Sjhb	}
509165951Sjhb}
510165951Sjhb
511165951Sjhbstatic void
512162289Sjhbg_mirror_free_device(struct g_mirror_softc *sc)
513165951Sjhb{
514165951Sjhb
515165951Sjhb	mtx_destroy(&sc->sc_queue_mtx);
516165951Sjhb	mtx_destroy(&sc->sc_events_mtx);
517165951Sjhb	mtx_destroy(&sc->sc_done_mtx);
518162289Sjhb	sx_destroy(&sc->sc_lock);
51943166Snsouch	free(sc, M_MIRROR);
520162289Sjhb}
52143166Snsouch
52243166Snsouchstatic void
52343166Snsouchg_mirror_providergone(struct g_provider *pp)
524162289Sjhb{
525162289Sjhb	struct g_mirror_softc *sc = pp->private;
526162289Sjhb
527165951Sjhb	if ((--sc->sc_refcnt) == 0)
528165951Sjhb		g_mirror_free_device(sc);
529165951Sjhb}
530165951Sjhb
531165951Sjhbstatic void
532165951Sjhbg_mirror_destroy_device(struct g_mirror_softc *sc)
533165951Sjhb{
534165951Sjhb	struct g_mirror_disk *disk;
535165951Sjhb	struct g_mirror_event *ep;
536165951Sjhb	struct g_geom *gp;
53743166Snsouch	struct g_consumer *cp, *tmpcp;
538165951Sjhb
539165951Sjhb	g_topology_assert_not();
540165951Sjhb	sx_assert(&sc->sc_lock, SX_XLOCKED);
541165951Sjhb
542165951Sjhb	gp = sc->sc_geom;
54343166Snsouch	if (sc->sc_provider != NULL)
544165951Sjhb		g_mirror_destroy_provider(sc);
54543166Snsouch	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
546162289Sjhb	    disk = LIST_FIRST(&sc->sc_disks)) {
547165951Sjhb		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
548162289Sjhb		g_mirror_update_metadata(disk);
54943166Snsouch		g_mirror_destroy_disk(disk);
550162289Sjhb	}
55143166Snsouch	while ((ep = g_mirror_event_get(sc)) != NULL) {
55243166Snsouch		g_mirror_event_remove(sc, ep);
55343166Snsouch		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
554162289Sjhb			g_mirror_event_free(ep);
555162289Sjhb		else {
556162289Sjhb			ep->e_error = ECANCELED;
557165951Sjhb			ep->e_flags |= G_MIRROR_EVENT_DONE;
558165951Sjhb			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
559165951Sjhb			mtx_lock(&sc->sc_events_mtx);
560165951Sjhb			wakeup(ep);
561165951Sjhb			mtx_unlock(&sc->sc_events_mtx);
562162289Sjhb		}
563165951Sjhb	}
564165951Sjhb	callout_drain(&sc->sc_callout);
565165951Sjhb
566165951Sjhb	g_topology_lock();
567165951Sjhb	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
568165951Sjhb		g_mirror_disconnect_consumer(sc, cp);
569162289Sjhb	}
57043166Snsouch	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
571162289Sjhb	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
57243166Snsouch	g_wither_geom(gp, ENXIO);
57343166Snsouch	sx_xunlock(&sc->sc_lock);
57443166Snsouch	if ((--sc->sc_refcnt) == 0)
575162289Sjhb		g_mirror_free_device(sc);
576162289Sjhb	g_topology_unlock();
577162289Sjhb}
578165951Sjhb
579165951Sjhbstatic void
580165951Sjhbg_mirror_orphan(struct g_consumer *cp)
581165951Sjhb{
582165951Sjhb	struct g_mirror_disk *disk;
583162289Sjhb
584165951Sjhb	g_topology_assert();
585165951Sjhb
586165951Sjhb	disk = cp->private;
587165951Sjhb	if (disk == NULL)
588165951Sjhb		return;
589165951Sjhb	disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
590165951Sjhb	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
591162289Sjhb	    G_MIRROR_EVENT_DONTWAIT);
59243166Snsouch}
59343166Snsouch
59443166Snsouch/*
59543166Snsouch * Function should return the next active disk on the list.
59643166Snsouch * It is possible that it will be the same disk as given.
597162289Sjhb * If there are no active disks on list, NULL is returned.
598162289Sjhb */
599162289Sjhbstatic __inline struct g_mirror_disk *
600165951Sjhbg_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
601165951Sjhb{
602165951Sjhb	struct g_mirror_disk *dp;
603165951Sjhb
604165951Sjhb	for (dp = LIST_NEXT(disk, d_next); dp != disk;
605162289Sjhb	    dp = LIST_NEXT(dp, d_next)) {
606165951Sjhb		if (dp == NULL)
607165951Sjhb			dp = LIST_FIRST(&sc->sc_disks);
608165951Sjhb		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
609165951Sjhb			break;
610165951Sjhb	}
611165951Sjhb	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
612165951Sjhb		return (NULL);
613162289Sjhb	return (dp);
61443166Snsouch}
615165951Sjhb
61643166Snsouchstatic struct g_mirror_disk *
61743166Snsouchg_mirror_get_disk(struct g_mirror_softc *sc)
61843166Snsouch{
619162289Sjhb	struct g_mirror_disk *disk;
620162289Sjhb
621162289Sjhb	if (sc->sc_hint == NULL) {
622165951Sjhb		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
623165951Sjhb		if (sc->sc_hint == NULL)
624165951Sjhb			return (NULL);
625165951Sjhb	}
626165951Sjhb	disk = sc->sc_hint;
627162289Sjhb	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
628165951Sjhb		disk = g_mirror_find_next(sc, disk);
629165951Sjhb		if (disk == NULL)
630165951Sjhb			return (NULL);
631165951Sjhb	}
632165951Sjhb	sc->sc_hint = g_mirror_find_next(sc, disk);
633165951Sjhb	return (disk);
634165951Sjhb}
635165951Sjhb
636165951Sjhbstatic int
637162289Sjhbg_mirror_write_metadata(struct g_mirror_disk *disk,
63843166Snsouch    struct g_mirror_metadata *md)
639162289Sjhb{
64043166Snsouch	struct g_mirror_softc *sc;
64143166Snsouch	struct g_consumer *cp;
64243166Snsouch	off_t offset, length;
64343166Snsouch	u_char *sector;
64443166Snsouch	int error = 0;
64543166Snsouch
64643166Snsouch	g_topology_assert_not();
64743166Snsouch	sc = disk->d_softc;
64843166Snsouch	sx_assert(&sc->sc_lock, SX_LOCKED);
649162289Sjhb
650162289Sjhb	cp = disk->d_consumer;
651162289Sjhb	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
652165951Sjhb	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
653165951Sjhb	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
654165951Sjhb	    ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
655165951Sjhb	    cp->acw, cp->ace));
656165951Sjhb	length = cp->provider->sectorsize;
657162289Sjhb	offset = cp->provider->mediasize - length;
658165951Sjhb	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
659165951Sjhb	if (md != NULL &&
660165951Sjhb	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0) {
661165951Sjhb		/*
662165951Sjhb		 * Handle the case, when the size of parent provider reduced.
663165951Sjhb		 */
664165951Sjhb		if (offset < md->md_mediasize)
665165951Sjhb			error = ENOSPC;
666165951Sjhb		else
667162289Sjhb			mirror_metadata_encode(md, sector);
668165951Sjhb	}
669162289Sjhb	KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_metadata_write, error);
67043166Snsouch	if (error == 0)
671165951Sjhb		error = g_write_data(cp, offset, sector, length);
67243166Snsouch	free(sector, M_MIRROR);
67343166Snsouch	if (error != 0) {
674162289Sjhb		if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
67543166Snsouch			disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
67643166Snsouch			G_MIRROR_DEBUG(0, "Cannot write metadata on %s "
67743166Snsouch			    "(device=%s, error=%d).",
678162289Sjhb			    g_mirror_get_diskname(disk), sc->sc_name, error);
679162289Sjhb		} else {
680162289Sjhb			G_MIRROR_DEBUG(1, "Cannot write metadata on %s "
681162289Sjhb			    "(device=%s, error=%d).",
682165951Sjhb			    g_mirror_get_diskname(disk), sc->sc_name, error);
683162289Sjhb		}
684165951Sjhb		if (g_mirror_disconnect_on_failure &&
685165951Sjhb		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
686165951Sjhb			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
687165951Sjhb			g_mirror_event_send(disk,
688165951Sjhb			    G_MIRROR_DISK_STATE_DISCONNECTED,
689162289Sjhb			    G_MIRROR_EVENT_DONTWAIT);
690165951Sjhb		}
691165951Sjhb	}
692165951Sjhb	return (error);
693165951Sjhb}
694165951Sjhb
695165951Sjhbstatic int
696165951Sjhbg_mirror_clear_metadata(struct g_mirror_disk *disk)
697165951Sjhb{
698165951Sjhb	int error;
699165951Sjhb
700165951Sjhb	g_topology_assert_not();
701165951Sjhb	sx_assert(&disk->d_softc->sc_lock, SX_LOCKED);
702162289Sjhb
70343166Snsouch	if (disk->d_softc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
70443166Snsouch		return (0);
70543166Snsouch	error = g_mirror_write_metadata(disk, NULL);
706162234Sjhb	if (error == 0) {
70743166Snsouch		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
708162289Sjhb		    g_mirror_get_diskname(disk));
709162289Sjhb	} else {
710162234Sjhb		G_MIRROR_DEBUG(0,
711162289Sjhb		    "Cannot clear metadata on disk %s (error=%d).",
712162289Sjhb		    g_mirror_get_diskname(disk), error);
713165951Sjhb	}
714162289Sjhb	return (error);
715165951Sjhb}
716165951Sjhb
717165951Sjhbvoid
718165951Sjhbg_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
719165951Sjhb    struct g_mirror_metadata *md)
720165951Sjhb{
721165951Sjhb
722165951Sjhb	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
723165951Sjhb	md->md_version = G_MIRROR_VERSION;
724165951Sjhb	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
725165951Sjhb	md->md_mid = sc->sc_id;
726165951Sjhb	md->md_all = sc->sc_ndisks;
727165951Sjhb	md->md_slice = sc->sc_slice;
728165951Sjhb	md->md_balance = sc->sc_balance;
729165951Sjhb	md->md_genid = sc->sc_genid;
730165951Sjhb	md->md_mediasize = sc->sc_mediasize;
731165951Sjhb	md->md_sectorsize = sc->sc_sectorsize;
732165951Sjhb	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
733165951Sjhb	bzero(md->md_provider, sizeof(md->md_provider));
734165951Sjhb	if (disk == NULL) {
735165951Sjhb		md->md_did = arc4random();
736165951Sjhb		md->md_priority = 0;
73743166Snsouch		md->md_syncid = 0;
738165951Sjhb		md->md_dflags = 0;
739165951Sjhb		md->md_sync_offset = 0;
740165951Sjhb		md->md_provsize = 0;
74143166Snsouch	} else {
742165951Sjhb		md->md_did = disk->d_id;
743162289Sjhb		md->md_priority = disk->d_priority;
74443166Snsouch		md->md_syncid = disk->d_sync.ds_syncid;
74543166Snsouch		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
746165951Sjhb		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
74743166Snsouch			md->md_sync_offset = disk->d_sync.ds_offset_done;
748165951Sjhb		else
749165951Sjhb			md->md_sync_offset = 0;
750165951Sjhb		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
751165951Sjhb			strlcpy(md->md_provider,
752165951Sjhb			    disk->d_consumer->provider->name,
75346651Speter			    sizeof(md->md_provider));
754165951Sjhb		}
755165951Sjhb		md->md_provsize = disk->d_consumer->provider->mediasize;
75643166Snsouch	}
757165951Sjhb}
758165951Sjhb
759165951Sjhbvoid
760165951Sjhbg_mirror_update_metadata(struct g_mirror_disk *disk)
761165951Sjhb{
762165951Sjhb	struct g_mirror_softc *sc;
763165951Sjhb	struct g_mirror_metadata md;
764165951Sjhb	int error;
765165951Sjhb
766165951Sjhb	g_topology_assert_not();
767165951Sjhb	sc = disk->d_softc;
768165951Sjhb	sx_assert(&sc->sc_lock, SX_LOCKED);
769162289Sjhb
770165951Sjhb	if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
771165951Sjhb		return;
77243166Snsouch	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0)
773165951Sjhb		g_mirror_fill_metadata(sc, disk, &md);
774165951Sjhb	error = g_mirror_write_metadata(disk, &md);
775165951Sjhb	if (error == 0) {
776165951Sjhb		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
777165951Sjhb		    g_mirror_get_diskname(disk));
778162289Sjhb	} else {
779165951Sjhb		G_MIRROR_DEBUG(0,
780162289Sjhb		    "Cannot update metadata on disk %s (error=%d).",
781165951Sjhb		    g_mirror_get_diskname(disk), error);
782165951Sjhb	}
783}
784
785static void
786g_mirror_bump_syncid(struct g_mirror_softc *sc)
787{
788	struct g_mirror_disk *disk;
789
790	g_topology_assert_not();
791	sx_assert(&sc->sc_lock, SX_XLOCKED);
792	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
793	    ("%s called with no active disks (device=%s).", __func__,
794	    sc->sc_name));
795
796	sc->sc_syncid++;
797	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
798	    sc->sc_syncid);
799	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
800		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
801		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
802			disk->d_sync.ds_syncid = sc->sc_syncid;
803			g_mirror_update_metadata(disk);
804		}
805	}
806}
807
808static void
809g_mirror_bump_genid(struct g_mirror_softc *sc)
810{
811	struct g_mirror_disk *disk;
812
813	g_topology_assert_not();
814	sx_assert(&sc->sc_lock, SX_XLOCKED);
815	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
816	    ("%s called with no active disks (device=%s).", __func__,
817	    sc->sc_name));
818
819	sc->sc_genid++;
820	G_MIRROR_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name,
821	    sc->sc_genid);
822	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
823		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
824		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
825			disk->d_genid = sc->sc_genid;
826			g_mirror_update_metadata(disk);
827		}
828	}
829}
830
831static int
832g_mirror_idle(struct g_mirror_softc *sc, int acw)
833{
834	struct g_mirror_disk *disk;
835	int timeout;
836
837	g_topology_assert_not();
838	sx_assert(&sc->sc_lock, SX_XLOCKED);
839
840	if (sc->sc_provider == NULL)
841		return (0);
842	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
843		return (0);
844	if (sc->sc_idle)
845		return (0);
846	if (sc->sc_writes > 0)
847		return (0);
848	if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) {
849		timeout = g_mirror_idletime - (time_uptime - sc->sc_last_write);
850		if (!g_mirror_shutdown && timeout > 0)
851			return (timeout);
852	}
853	sc->sc_idle = 1;
854	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
855		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
856			continue;
857		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
858		    g_mirror_get_diskname(disk), sc->sc_name);
859		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
860		g_mirror_update_metadata(disk);
861	}
862	return (0);
863}
864
865static void
866g_mirror_unidle(struct g_mirror_softc *sc)
867{
868	struct g_mirror_disk *disk;
869
870	g_topology_assert_not();
871	sx_assert(&sc->sc_lock, SX_XLOCKED);
872
873	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
874		return;
875	sc->sc_idle = 0;
876	sc->sc_last_write = time_uptime;
877	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
878		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
879			continue;
880		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
881		    g_mirror_get_diskname(disk), sc->sc_name);
882		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
883		g_mirror_update_metadata(disk);
884	}
885}
886
887static void
888g_mirror_flush_done(struct bio *bp)
889{
890	struct g_mirror_softc *sc;
891	struct bio *pbp;
892
893	pbp = bp->bio_parent;
894	sc = pbp->bio_to->private;
895	mtx_lock(&sc->sc_done_mtx);
896	if (pbp->bio_error == 0)
897		pbp->bio_error = bp->bio_error;
898	pbp->bio_completed += bp->bio_completed;
899	pbp->bio_inbed++;
900	if (pbp->bio_children == pbp->bio_inbed) {
901		mtx_unlock(&sc->sc_done_mtx);
902		g_io_deliver(pbp, pbp->bio_error);
903	} else
904		mtx_unlock(&sc->sc_done_mtx);
905	g_destroy_bio(bp);
906}
907
908static void
909g_mirror_done(struct bio *bp)
910{
911	struct g_mirror_softc *sc;
912
913	sc = bp->bio_from->geom->softc;
914	bp->bio_cflags = G_MIRROR_BIO_FLAG_REGULAR;
915	mtx_lock(&sc->sc_queue_mtx);
916	bioq_insert_tail(&sc->sc_queue, bp);
917	mtx_unlock(&sc->sc_queue_mtx);
918	wakeup(sc);
919}
920
921static void
922g_mirror_regular_request(struct bio *bp)
923{
924	struct g_mirror_softc *sc;
925	struct g_mirror_disk *disk;
926	struct bio *pbp;
927
928	g_topology_assert_not();
929
930	pbp = bp->bio_parent;
931	sc = pbp->bio_to->private;
932	bp->bio_from->index--;
933	if (bp->bio_cmd == BIO_WRITE)
934		sc->sc_writes--;
935	disk = bp->bio_from->private;
936	if (disk == NULL) {
937		g_topology_lock();
938		g_mirror_kill_consumer(sc, bp->bio_from);
939		g_topology_unlock();
940	}
941
942	if (bp->bio_cmd == BIO_READ)
943		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_read,
944		    bp->bio_error);
945	else if (bp->bio_cmd == BIO_WRITE)
946		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_write,
947		    bp->bio_error);
948
949	pbp->bio_inbed++;
950	KASSERT(pbp->bio_inbed <= pbp->bio_children,
951	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
952	    pbp->bio_children));
953	if (bp->bio_error == 0 && pbp->bio_error == 0) {
954		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
955		g_destroy_bio(bp);
956		if (pbp->bio_children == pbp->bio_inbed) {
957			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
958			pbp->bio_completed = pbp->bio_length;
959			if (pbp->bio_cmd == BIO_WRITE ||
960			    pbp->bio_cmd == BIO_DELETE) {
961				bioq_remove(&sc->sc_inflight, pbp);
962				/* Release delayed sync requests if possible. */
963				g_mirror_sync_release(sc);
964			}
965			g_io_deliver(pbp, pbp->bio_error);
966		}
967		return;
968	} else if (bp->bio_error != 0) {
969		if (pbp->bio_error == 0)
970			pbp->bio_error = bp->bio_error;
971		if (disk != NULL) {
972			if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
973				disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
974				G_MIRROR_LOGREQ(0, bp,
975				    "Request failed (error=%d).",
976				    bp->bio_error);
977			} else {
978				G_MIRROR_LOGREQ(1, bp,
979				    "Request failed (error=%d).",
980				    bp->bio_error);
981			}
982			if (g_mirror_disconnect_on_failure &&
983			    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1)
984			{
985				sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
986				g_mirror_event_send(disk,
987				    G_MIRROR_DISK_STATE_DISCONNECTED,
988				    G_MIRROR_EVENT_DONTWAIT);
989			}
990		}
991		switch (pbp->bio_cmd) {
992		case BIO_DELETE:
993		case BIO_WRITE:
994			pbp->bio_inbed--;
995			pbp->bio_children--;
996			break;
997		}
998	}
999	g_destroy_bio(bp);
1000
1001	switch (pbp->bio_cmd) {
1002	case BIO_READ:
1003		if (pbp->bio_inbed < pbp->bio_children)
1004			break;
1005		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 1)
1006			g_io_deliver(pbp, pbp->bio_error);
1007		else {
1008			pbp->bio_error = 0;
1009			mtx_lock(&sc->sc_queue_mtx);
1010			bioq_insert_tail(&sc->sc_queue, pbp);
1011			mtx_unlock(&sc->sc_queue_mtx);
1012			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1013			wakeup(sc);
1014		}
1015		break;
1016	case BIO_DELETE:
1017	case BIO_WRITE:
1018		if (pbp->bio_children == 0) {
1019			/*
1020			 * All requests failed.
1021			 */
1022		} else if (pbp->bio_inbed < pbp->bio_children) {
1023			/* Do nothing. */
1024			break;
1025		} else if (pbp->bio_children == pbp->bio_inbed) {
1026			/* Some requests succeeded. */
1027			pbp->bio_error = 0;
1028			pbp->bio_completed = pbp->bio_length;
1029		}
1030		bioq_remove(&sc->sc_inflight, pbp);
1031		/* Release delayed sync requests if possible. */
1032		g_mirror_sync_release(sc);
1033		g_io_deliver(pbp, pbp->bio_error);
1034		break;
1035	default:
1036		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
1037		break;
1038	}
1039}
1040
1041static void
1042g_mirror_sync_done(struct bio *bp)
1043{
1044	struct g_mirror_softc *sc;
1045
1046	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
1047	sc = bp->bio_from->geom->softc;
1048	bp->bio_cflags = G_MIRROR_BIO_FLAG_SYNC;
1049	mtx_lock(&sc->sc_queue_mtx);
1050	bioq_insert_tail(&sc->sc_queue, bp);
1051	mtx_unlock(&sc->sc_queue_mtx);
1052	wakeup(sc);
1053}
1054
1055static void
1056g_mirror_candelete(struct bio *bp)
1057{
1058	struct g_mirror_softc *sc;
1059	struct g_mirror_disk *disk;
1060	int *val;
1061
1062	sc = bp->bio_to->private;
1063	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1064		if (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE)
1065			break;
1066	}
1067	val = (int *)bp->bio_data;
1068	*val = (disk != NULL);
1069	g_io_deliver(bp, 0);
1070}
1071
1072static void
1073g_mirror_kernel_dump(struct bio *bp)
1074{
1075	struct g_mirror_softc *sc;
1076	struct g_mirror_disk *disk;
1077	struct bio *cbp;
1078	struct g_kerneldump *gkd;
1079
1080	/*
1081	 * We configure dumping to the first component, because this component
1082	 * will be used for reading with 'prefer' balance algorithm.
1083	 * If the component with the highest priority is currently disconnected
1084	 * we will not be able to read the dump after the reboot if it will be
1085	 * connected and synchronized later. Can we do something better?
1086	 */
1087	sc = bp->bio_to->private;
1088	disk = LIST_FIRST(&sc->sc_disks);
1089
1090	gkd = (struct g_kerneldump *)bp->bio_data;
1091	if (gkd->length > bp->bio_to->mediasize)
1092		gkd->length = bp->bio_to->mediasize;
1093	cbp = g_clone_bio(bp);
1094	if (cbp == NULL) {
1095		g_io_deliver(bp, ENOMEM);
1096		return;
1097	}
1098	cbp->bio_done = g_std_done;
1099	g_io_request(cbp, disk->d_consumer);
1100	G_MIRROR_DEBUG(1, "Kernel dump will go to %s.",
1101	    g_mirror_get_diskname(disk));
1102}
1103
1104static void
1105g_mirror_flush(struct g_mirror_softc *sc, struct bio *bp)
1106{
1107	struct bio_queue_head queue;
1108	struct g_mirror_disk *disk;
1109	struct g_consumer *cp;
1110	struct bio *cbp;
1111
1112	bioq_init(&queue);
1113	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1114		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1115			continue;
1116		cbp = g_clone_bio(bp);
1117		if (cbp == NULL) {
1118			while ((cbp = bioq_takefirst(&queue)) != NULL)
1119				g_destroy_bio(cbp);
1120			if (bp->bio_error == 0)
1121				bp->bio_error = ENOMEM;
1122			g_io_deliver(bp, bp->bio_error);
1123			return;
1124		}
1125		bioq_insert_tail(&queue, cbp);
1126		cbp->bio_done = g_mirror_flush_done;
1127		cbp->bio_caller1 = disk;
1128		cbp->bio_to = disk->d_consumer->provider;
1129	}
1130	while ((cbp = bioq_takefirst(&queue)) != NULL) {
1131		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1132		disk = cbp->bio_caller1;
1133		cbp->bio_caller1 = NULL;
1134		cp = disk->d_consumer;
1135		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1136		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1137		    cp->acr, cp->acw, cp->ace));
1138		g_io_request(cbp, disk->d_consumer);
1139	}
1140}
1141
1142static void
1143g_mirror_start(struct bio *bp)
1144{
1145	struct g_mirror_softc *sc;
1146
1147	sc = bp->bio_to->private;
1148	/*
1149	 * If sc == NULL or there are no valid disks, provider's error
1150	 * should be set and g_mirror_start() should not be called at all.
1151	 */
1152	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1153	    ("Provider's error should be set (error=%d)(mirror=%s).",
1154	    bp->bio_to->error, bp->bio_to->name));
1155	G_MIRROR_LOGREQ(3, bp, "Request received.");
1156
1157	switch (bp->bio_cmd) {
1158	case BIO_READ:
1159	case BIO_WRITE:
1160	case BIO_DELETE:
1161		break;
1162	case BIO_FLUSH:
1163		g_mirror_flush(sc, bp);
1164		return;
1165	case BIO_GETATTR:
1166		if (!strcmp(bp->bio_attribute, "GEOM::candelete")) {
1167			g_mirror_candelete(bp);
1168			return;
1169		} else if (strcmp("GEOM::kerneldump", bp->bio_attribute) == 0) {
1170			g_mirror_kernel_dump(bp);
1171			return;
1172		}
1173		/* FALLTHROUGH */
1174	default:
1175		g_io_deliver(bp, EOPNOTSUPP);
1176		return;
1177	}
1178	mtx_lock(&sc->sc_queue_mtx);
1179	bioq_insert_tail(&sc->sc_queue, bp);
1180	mtx_unlock(&sc->sc_queue_mtx);
1181	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1182	wakeup(sc);
1183}
1184
1185/*
1186 * Return TRUE if the given request is colliding with a in-progress
1187 * synchronization request.
1188 */
1189static int
1190g_mirror_sync_collision(struct g_mirror_softc *sc, struct bio *bp)
1191{
1192	struct g_mirror_disk *disk;
1193	struct bio *sbp;
1194	off_t rstart, rend, sstart, send;
1195	u_int i;
1196
1197	if (sc->sc_sync.ds_ndisks == 0)
1198		return (0);
1199	rstart = bp->bio_offset;
1200	rend = bp->bio_offset + bp->bio_length;
1201	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1202		if (disk->d_state != G_MIRROR_DISK_STATE_SYNCHRONIZING)
1203			continue;
1204		for (i = 0; i < g_mirror_syncreqs; i++) {
1205			sbp = disk->d_sync.ds_bios[i];
1206			if (sbp == NULL)
1207				continue;
1208			sstart = sbp->bio_offset;
1209			send = sbp->bio_offset + sbp->bio_length;
1210			if (rend > sstart && rstart < send)
1211				return (1);
1212		}
1213	}
1214	return (0);
1215}
1216
1217/*
1218 * Return TRUE if the given sync request is colliding with a in-progress regular
1219 * request.
1220 */
1221static int
1222g_mirror_regular_collision(struct g_mirror_softc *sc, struct bio *sbp)
1223{
1224	off_t rstart, rend, sstart, send;
1225	struct bio *bp;
1226
1227	if (sc->sc_sync.ds_ndisks == 0)
1228		return (0);
1229	sstart = sbp->bio_offset;
1230	send = sbp->bio_offset + sbp->bio_length;
1231	TAILQ_FOREACH(bp, &sc->sc_inflight.queue, bio_queue) {
1232		rstart = bp->bio_offset;
1233		rend = bp->bio_offset + bp->bio_length;
1234		if (rend > sstart && rstart < send)
1235			return (1);
1236	}
1237	return (0);
1238}
1239
1240/*
1241 * Puts request onto delayed queue.
1242 */
1243static void
1244g_mirror_regular_delay(struct g_mirror_softc *sc, struct bio *bp)
1245{
1246
1247	G_MIRROR_LOGREQ(2, bp, "Delaying request.");
1248	bioq_insert_head(&sc->sc_regular_delayed, bp);
1249}
1250
1251/*
1252 * Puts synchronization request onto delayed queue.
1253 */
1254static void
1255g_mirror_sync_delay(struct g_mirror_softc *sc, struct bio *bp)
1256{
1257
1258	G_MIRROR_LOGREQ(2, bp, "Delaying synchronization request.");
1259	bioq_insert_tail(&sc->sc_sync_delayed, bp);
1260}
1261
1262/*
1263 * Releases delayed regular requests which don't collide anymore with sync
1264 * requests.
1265 */
1266static void
1267g_mirror_regular_release(struct g_mirror_softc *sc)
1268{
1269	struct bio *bp, *bp2;
1270
1271	TAILQ_FOREACH_SAFE(bp, &sc->sc_regular_delayed.queue, bio_queue, bp2) {
1272		if (g_mirror_sync_collision(sc, bp))
1273			continue;
1274		bioq_remove(&sc->sc_regular_delayed, bp);
1275		G_MIRROR_LOGREQ(2, bp, "Releasing delayed request (%p).", bp);
1276		mtx_lock(&sc->sc_queue_mtx);
1277		bioq_insert_head(&sc->sc_queue, bp);
1278#if 0
1279		/*
1280		 * wakeup() is not needed, because this function is called from
1281		 * the worker thread.
1282		 */
1283		wakeup(&sc->sc_queue);
1284#endif
1285		mtx_unlock(&sc->sc_queue_mtx);
1286	}
1287}
1288
1289/*
1290 * Releases delayed sync requests which don't collide anymore with regular
1291 * requests.
1292 */
1293static void
1294g_mirror_sync_release(struct g_mirror_softc *sc)
1295{
1296	struct bio *bp, *bp2;
1297
1298	TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed.queue, bio_queue, bp2) {
1299		if (g_mirror_regular_collision(sc, bp))
1300			continue;
1301		bioq_remove(&sc->sc_sync_delayed, bp);
1302		G_MIRROR_LOGREQ(2, bp,
1303		    "Releasing delayed synchronization request.");
1304		g_io_request(bp, bp->bio_from);
1305	}
1306}
1307
1308/*
1309 * Handle synchronization requests.
1310 * Every synchronization request is two-steps process: first, READ request is
1311 * send to active provider and then WRITE request (with read data) to the provider
1312 * being synchronized. When WRITE is finished, new synchronization request is
1313 * send.
1314 */
1315static void
1316g_mirror_sync_request(struct bio *bp)
1317{
1318	struct g_mirror_softc *sc;
1319	struct g_mirror_disk *disk;
1320
1321	bp->bio_from->index--;
1322	sc = bp->bio_from->geom->softc;
1323	disk = bp->bio_from->private;
1324	if (disk == NULL) {
1325		sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
1326		g_topology_lock();
1327		g_mirror_kill_consumer(sc, bp->bio_from);
1328		g_topology_unlock();
1329		free(bp->bio_data, M_MIRROR);
1330		g_destroy_bio(bp);
1331		sx_xlock(&sc->sc_lock);
1332		return;
1333	}
1334
1335	/*
1336	 * Synchronization request.
1337	 */
1338	switch (bp->bio_cmd) {
1339	case BIO_READ:
1340	    {
1341		struct g_consumer *cp;
1342
1343		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_read,
1344		    bp->bio_error);
1345
1346		if (bp->bio_error != 0) {
1347			G_MIRROR_LOGREQ(0, bp,
1348			    "Synchronization request failed (error=%d).",
1349			    bp->bio_error);
1350			g_destroy_bio(bp);
1351			return;
1352		}
1353		G_MIRROR_LOGREQ(3, bp,
1354		    "Synchronization request half-finished.");
1355		bp->bio_cmd = BIO_WRITE;
1356		bp->bio_cflags = 0;
1357		cp = disk->d_consumer;
1358		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1359		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1360		    cp->acr, cp->acw, cp->ace));
1361		cp->index++;
1362		g_io_request(bp, cp);
1363		return;
1364	    }
1365	case BIO_WRITE:
1366	    {
1367		struct g_mirror_disk_sync *sync;
1368		off_t offset;
1369		void *data;
1370		int i;
1371
1372		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_write,
1373		    bp->bio_error);
1374
1375		if (bp->bio_error != 0) {
1376			G_MIRROR_LOGREQ(0, bp,
1377			    "Synchronization request failed (error=%d).",
1378			    bp->bio_error);
1379			g_destroy_bio(bp);
1380			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
1381			g_mirror_event_send(disk,
1382			    G_MIRROR_DISK_STATE_DISCONNECTED,
1383			    G_MIRROR_EVENT_DONTWAIT);
1384			return;
1385		}
1386		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
1387		sync = &disk->d_sync;
1388		if (sync->ds_offset >= sc->sc_mediasize ||
1389		    sync->ds_consumer == NULL ||
1390		    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1391			/* Don't send more synchronization requests. */
1392			sync->ds_inflight--;
1393			if (sync->ds_bios != NULL) {
1394				i = (int)(uintptr_t)bp->bio_caller1;
1395				sync->ds_bios[i] = NULL;
1396			}
1397			free(bp->bio_data, M_MIRROR);
1398			g_destroy_bio(bp);
1399			if (sync->ds_inflight > 0)
1400				return;
1401			if (sync->ds_consumer == NULL ||
1402			    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1403				return;
1404			}
1405			/* Disk up-to-date, activate it. */
1406			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
1407			    G_MIRROR_EVENT_DONTWAIT);
1408			return;
1409		}
1410
1411		/* Send next synchronization request. */
1412		data = bp->bio_data;
1413		g_reset_bio(bp);
1414		bp->bio_cmd = BIO_READ;
1415		bp->bio_offset = sync->ds_offset;
1416		bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
1417		sync->ds_offset += bp->bio_length;
1418		bp->bio_done = g_mirror_sync_done;
1419		bp->bio_data = data;
1420		bp->bio_from = sync->ds_consumer;
1421		bp->bio_to = sc->sc_provider;
1422		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
1423		sync->ds_consumer->index++;
1424		/*
1425		 * Delay the request if it is colliding with a regular request.
1426		 */
1427		if (g_mirror_regular_collision(sc, bp))
1428			g_mirror_sync_delay(sc, bp);
1429		else
1430			g_io_request(bp, sync->ds_consumer);
1431
1432		/* Release delayed requests if possible. */
1433		g_mirror_regular_release(sc);
1434
1435		/* Find the smallest offset */
1436		offset = sc->sc_mediasize;
1437		for (i = 0; i < g_mirror_syncreqs; i++) {
1438			bp = sync->ds_bios[i];
1439			if (bp->bio_offset < offset)
1440				offset = bp->bio_offset;
1441		}
1442		if (sync->ds_offset_done + (MAXPHYS * 100) < offset) {
1443			/* Update offset_done on every 100 blocks. */
1444			sync->ds_offset_done = offset;
1445			g_mirror_update_metadata(disk);
1446		}
1447		return;
1448	    }
1449	default:
1450		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1451		    bp->bio_cmd, sc->sc_name));
1452		break;
1453	}
1454}
1455
1456static void
1457g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
1458{
1459	struct g_mirror_disk *disk;
1460	struct g_consumer *cp;
1461	struct bio *cbp;
1462
1463	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1464		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
1465			break;
1466	}
1467	if (disk == NULL) {
1468		if (bp->bio_error == 0)
1469			bp->bio_error = ENXIO;
1470		g_io_deliver(bp, bp->bio_error);
1471		return;
1472	}
1473	cbp = g_clone_bio(bp);
1474	if (cbp == NULL) {
1475		if (bp->bio_error == 0)
1476			bp->bio_error = ENOMEM;
1477		g_io_deliver(bp, bp->bio_error);
1478		return;
1479	}
1480	/*
1481	 * Fill in the component buf structure.
1482	 */
1483	cp = disk->d_consumer;
1484	cbp->bio_done = g_mirror_done;
1485	cbp->bio_to = cp->provider;
1486	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1487	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1488	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1489	    cp->acw, cp->ace));
1490	cp->index++;
1491	g_io_request(cbp, cp);
1492}
1493
1494static void
1495g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
1496{
1497	struct g_mirror_disk *disk;
1498	struct g_consumer *cp;
1499	struct bio *cbp;
1500
1501	disk = g_mirror_get_disk(sc);
1502	if (disk == NULL) {
1503		if (bp->bio_error == 0)
1504			bp->bio_error = ENXIO;
1505		g_io_deliver(bp, bp->bio_error);
1506		return;
1507	}
1508	cbp = g_clone_bio(bp);
1509	if (cbp == NULL) {
1510		if (bp->bio_error == 0)
1511			bp->bio_error = ENOMEM;
1512		g_io_deliver(bp, bp->bio_error);
1513		return;
1514	}
1515	/*
1516	 * Fill in the component buf structure.
1517	 */
1518	cp = disk->d_consumer;
1519	cbp->bio_done = g_mirror_done;
1520	cbp->bio_to = cp->provider;
1521	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1522	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1523	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1524	    cp->acw, cp->ace));
1525	cp->index++;
1526	g_io_request(cbp, cp);
1527}
1528
1529#define TRACK_SIZE  (1 * 1024 * 1024)
1530#define LOAD_SCALE	256
1531#define ABS(x)		(((x) >= 0) ? (x) : (-(x)))
1532
1533static void
1534g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
1535{
1536	struct g_mirror_disk *disk, *dp;
1537	struct g_consumer *cp;
1538	struct bio *cbp;
1539	int prio, best;
1540
1541	/* Find a disk with the smallest load. */
1542	disk = NULL;
1543	best = INT_MAX;
1544	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1545		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1546			continue;
1547		prio = dp->load;
1548		/* If disk head is precisely in position - highly prefer it. */
1549		if (dp->d_last_offset == bp->bio_offset)
1550			prio -= 2 * LOAD_SCALE;
1551		else
1552		/* If disk head is close to position - prefer it. */
1553		if (ABS(dp->d_last_offset - bp->bio_offset) < TRACK_SIZE)
1554			prio -= 1 * LOAD_SCALE;
1555		if (prio <= best) {
1556			disk = dp;
1557			best = prio;
1558		}
1559	}
1560	KASSERT(disk != NULL, ("NULL disk for %s.", sc->sc_name));
1561	cbp = g_clone_bio(bp);
1562	if (cbp == NULL) {
1563		if (bp->bio_error == 0)
1564			bp->bio_error = ENOMEM;
1565		g_io_deliver(bp, bp->bio_error);
1566		return;
1567	}
1568	/*
1569	 * Fill in the component buf structure.
1570	 */
1571	cp = disk->d_consumer;
1572	cbp->bio_done = g_mirror_done;
1573	cbp->bio_to = cp->provider;
1574	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1575	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1576	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1577	    cp->acw, cp->ace));
1578	cp->index++;
1579	/* Remember last head position */
1580	disk->d_last_offset = bp->bio_offset + bp->bio_length;
1581	/* Update loads. */
1582	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1583		dp->load = (dp->d_consumer->index * LOAD_SCALE +
1584		    dp->load * 7) / 8;
1585	}
1586	g_io_request(cbp, cp);
1587}
1588
1589static void
1590g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
1591{
1592	struct bio_queue_head queue;
1593	struct g_mirror_disk *disk;
1594	struct g_consumer *cp;
1595	struct bio *cbp;
1596	off_t left, mod, offset, slice;
1597	u_char *data;
1598	u_int ndisks;
1599
1600	if (bp->bio_length <= sc->sc_slice) {
1601		g_mirror_request_round_robin(sc, bp);
1602		return;
1603	}
1604	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
1605	slice = bp->bio_length / ndisks;
1606	mod = slice % sc->sc_provider->sectorsize;
1607	if (mod != 0)
1608		slice += sc->sc_provider->sectorsize - mod;
1609	/*
1610	 * Allocate all bios before sending any request, so we can
1611	 * return ENOMEM in nice and clean way.
1612	 */
1613	left = bp->bio_length;
1614	offset = bp->bio_offset;
1615	data = bp->bio_data;
1616	bioq_init(&queue);
1617	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1618		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1619			continue;
1620		cbp = g_clone_bio(bp);
1621		if (cbp == NULL) {
1622			while ((cbp = bioq_takefirst(&queue)) != NULL)
1623				g_destroy_bio(cbp);
1624			if (bp->bio_error == 0)
1625				bp->bio_error = ENOMEM;
1626			g_io_deliver(bp, bp->bio_error);
1627			return;
1628		}
1629		bioq_insert_tail(&queue, cbp);
1630		cbp->bio_done = g_mirror_done;
1631		cbp->bio_caller1 = disk;
1632		cbp->bio_to = disk->d_consumer->provider;
1633		cbp->bio_offset = offset;
1634		cbp->bio_data = data;
1635		cbp->bio_length = MIN(left, slice);
1636		left -= cbp->bio_length;
1637		if (left == 0)
1638			break;
1639		offset += cbp->bio_length;
1640		data += cbp->bio_length;
1641	}
1642	while ((cbp = bioq_takefirst(&queue)) != NULL) {
1643		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1644		disk = cbp->bio_caller1;
1645		cbp->bio_caller1 = NULL;
1646		cp = disk->d_consumer;
1647		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1648		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1649		    cp->acr, cp->acw, cp->ace));
1650		disk->d_consumer->index++;
1651		g_io_request(cbp, disk->d_consumer);
1652	}
1653}
1654
1655static void
1656g_mirror_register_request(struct bio *bp)
1657{
1658	struct g_mirror_softc *sc;
1659
1660	sc = bp->bio_to->private;
1661	switch (bp->bio_cmd) {
1662	case BIO_READ:
1663		switch (sc->sc_balance) {
1664		case G_MIRROR_BALANCE_LOAD:
1665			g_mirror_request_load(sc, bp);
1666			break;
1667		case G_MIRROR_BALANCE_PREFER:
1668			g_mirror_request_prefer(sc, bp);
1669			break;
1670		case G_MIRROR_BALANCE_ROUND_ROBIN:
1671			g_mirror_request_round_robin(sc, bp);
1672			break;
1673		case G_MIRROR_BALANCE_SPLIT:
1674			g_mirror_request_split(sc, bp);
1675			break;
1676		}
1677		return;
1678	case BIO_WRITE:
1679	case BIO_DELETE:
1680	    {
1681		struct g_mirror_disk *disk;
1682		struct g_mirror_disk_sync *sync;
1683		struct bio_queue_head queue;
1684		struct g_consumer *cp;
1685		struct bio *cbp;
1686
1687		/*
1688		 * Delay the request if it is colliding with a synchronization
1689		 * request.
1690		 */
1691		if (g_mirror_sync_collision(sc, bp)) {
1692			g_mirror_regular_delay(sc, bp);
1693			return;
1694		}
1695
1696		if (sc->sc_idle)
1697			g_mirror_unidle(sc);
1698		else
1699			sc->sc_last_write = time_uptime;
1700
1701		/*
1702		 * Allocate all bios before sending any request, so we can
1703		 * return ENOMEM in nice and clean way.
1704		 */
1705		bioq_init(&queue);
1706		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1707			sync = &disk->d_sync;
1708			switch (disk->d_state) {
1709			case G_MIRROR_DISK_STATE_ACTIVE:
1710				break;
1711			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1712				if (bp->bio_offset >= sync->ds_offset)
1713					continue;
1714				break;
1715			default:
1716				continue;
1717			}
1718			if (bp->bio_cmd == BIO_DELETE &&
1719			    (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE) == 0)
1720				continue;
1721			cbp = g_clone_bio(bp);
1722			if (cbp == NULL) {
1723				while ((cbp = bioq_takefirst(&queue)) != NULL)
1724					g_destroy_bio(cbp);
1725				if (bp->bio_error == 0)
1726					bp->bio_error = ENOMEM;
1727				g_io_deliver(bp, bp->bio_error);
1728				return;
1729			}
1730			bioq_insert_tail(&queue, cbp);
1731			cbp->bio_done = g_mirror_done;
1732			cp = disk->d_consumer;
1733			cbp->bio_caller1 = cp;
1734			cbp->bio_to = cp->provider;
1735			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1736			    ("Consumer %s not opened (r%dw%de%d).",
1737			    cp->provider->name, cp->acr, cp->acw, cp->ace));
1738		}
1739		if (bioq_first(&queue) == NULL) {
1740			g_io_deliver(bp, EOPNOTSUPP);
1741			return;
1742		}
1743		while ((cbp = bioq_takefirst(&queue)) != NULL) {
1744			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1745			cp = cbp->bio_caller1;
1746			cbp->bio_caller1 = NULL;
1747			cp->index++;
1748			sc->sc_writes++;
1749			g_io_request(cbp, cp);
1750		}
1751		/*
1752		 * Put request onto inflight queue, so we can check if new
1753		 * synchronization requests don't collide with it.
1754		 */
1755		bioq_insert_tail(&sc->sc_inflight, bp);
1756		/*
1757		 * Bump syncid on first write.
1758		 */
1759		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) {
1760			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
1761			g_mirror_bump_syncid(sc);
1762		}
1763		return;
1764	    }
1765	default:
1766		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1767		    bp->bio_cmd, sc->sc_name));
1768		break;
1769	}
1770}
1771
1772static int
1773g_mirror_can_destroy(struct g_mirror_softc *sc)
1774{
1775	struct g_geom *gp;
1776	struct g_consumer *cp;
1777
1778	g_topology_assert();
1779	gp = sc->sc_geom;
1780	if (gp->softc == NULL)
1781		return (1);
1782	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_TASTING) != 0)
1783		return (0);
1784	LIST_FOREACH(cp, &gp->consumer, consumer) {
1785		if (g_mirror_is_busy(sc, cp))
1786			return (0);
1787	}
1788	gp = sc->sc_sync.ds_geom;
1789	LIST_FOREACH(cp, &gp->consumer, consumer) {
1790		if (g_mirror_is_busy(sc, cp))
1791			return (0);
1792	}
1793	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
1794	    sc->sc_name);
1795	return (1);
1796}
1797
1798static int
1799g_mirror_try_destroy(struct g_mirror_softc *sc)
1800{
1801
1802	if (sc->sc_rootmount != NULL) {
1803		G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
1804		    sc->sc_rootmount);
1805		root_mount_rel(sc->sc_rootmount);
1806		sc->sc_rootmount = NULL;
1807	}
1808	g_topology_lock();
1809	if (!g_mirror_can_destroy(sc)) {
1810		g_topology_unlock();
1811		return (0);
1812	}
1813	sc->sc_geom->softc = NULL;
1814	sc->sc_sync.ds_geom->softc = NULL;
1815	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WAIT) != 0) {
1816		g_topology_unlock();
1817		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1818		    &sc->sc_worker);
1819		/* Unlock sc_lock here, as it can be destroyed after wakeup. */
1820		sx_xunlock(&sc->sc_lock);
1821		wakeup(&sc->sc_worker);
1822		sc->sc_worker = NULL;
1823	} else {
1824		g_topology_unlock();
1825		g_mirror_destroy_device(sc);
1826	}
1827	return (1);
1828}
1829
1830/*
1831 * Worker thread.
1832 */
1833static void
1834g_mirror_worker(void *arg)
1835{
1836	struct g_mirror_softc *sc;
1837	struct g_mirror_event *ep;
1838	struct bio *bp;
1839	int timeout;
1840
1841	sc = arg;
1842	thread_lock(curthread);
1843	sched_prio(curthread, PRIBIO);
1844	thread_unlock(curthread);
1845
1846	sx_xlock(&sc->sc_lock);
1847	for (;;) {
1848		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
1849		/*
1850		 * First take a look at events.
1851		 * This is important to handle events before any I/O requests.
1852		 */
1853		ep = g_mirror_event_get(sc);
1854		if (ep != NULL) {
1855			g_mirror_event_remove(sc, ep);
1856			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
1857				/* Update only device status. */
1858				G_MIRROR_DEBUG(3,
1859				    "Running event for device %s.",
1860				    sc->sc_name);
1861				ep->e_error = 0;
1862				g_mirror_update_device(sc, true);
1863			} else {
1864				/* Update disk status. */
1865				G_MIRROR_DEBUG(3, "Running event for disk %s.",
1866				     g_mirror_get_diskname(ep->e_disk));
1867				ep->e_error = g_mirror_update_disk(ep->e_disk,
1868				    ep->e_state);
1869				if (ep->e_error == 0)
1870					g_mirror_update_device(sc, false);
1871			}
1872			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
1873				KASSERT(ep->e_error == 0,
1874				    ("Error cannot be handled."));
1875				g_mirror_event_free(ep);
1876			} else {
1877				ep->e_flags |= G_MIRROR_EVENT_DONE;
1878				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1879				    ep);
1880				mtx_lock(&sc->sc_events_mtx);
1881				wakeup(ep);
1882				mtx_unlock(&sc->sc_events_mtx);
1883			}
1884			if ((sc->sc_flags &
1885			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1886				if (g_mirror_try_destroy(sc)) {
1887					curthread->td_pflags &= ~TDP_GEOM;
1888					G_MIRROR_DEBUG(1, "Thread exiting.");
1889					kproc_exit(0);
1890				}
1891			}
1892			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
1893			continue;
1894		}
1895		/*
1896		 * Check if we can mark array as CLEAN and if we can't take
1897		 * how much seconds should we wait.
1898		 */
1899		timeout = g_mirror_idle(sc, -1);
1900		/*
1901		 * Now I/O requests.
1902		 */
1903		/* Get first request from the queue. */
1904		mtx_lock(&sc->sc_queue_mtx);
1905		bp = bioq_takefirst(&sc->sc_queue);
1906		if (bp == NULL) {
1907			if ((sc->sc_flags &
1908			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1909				mtx_unlock(&sc->sc_queue_mtx);
1910				if (g_mirror_try_destroy(sc)) {
1911					curthread->td_pflags &= ~TDP_GEOM;
1912					G_MIRROR_DEBUG(1, "Thread exiting.");
1913					kproc_exit(0);
1914				}
1915				mtx_lock(&sc->sc_queue_mtx);
1916				if (bioq_first(&sc->sc_queue) != NULL) {
1917					mtx_unlock(&sc->sc_queue_mtx);
1918					continue;
1919				}
1920			}
1921			sx_xunlock(&sc->sc_lock);
1922			/*
1923			 * XXX: We can miss an event here, because an event
1924			 *      can be added without sx-device-lock and without
1925			 *      mtx-queue-lock. Maybe I should just stop using
1926			 *      dedicated mutex for events synchronization and
1927			 *      stick with the queue lock?
1928			 *      The event will hang here until next I/O request
1929			 *      or next event is received.
1930			 */
1931			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1",
1932			    timeout * hz);
1933			sx_xlock(&sc->sc_lock);
1934			G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
1935			continue;
1936		}
1937		mtx_unlock(&sc->sc_queue_mtx);
1938
1939		if (bp->bio_from->geom == sc->sc_sync.ds_geom &&
1940		    (bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
1941			g_mirror_sync_request(bp);	/* READ */
1942		} else if (bp->bio_to != sc->sc_provider) {
1943			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0)
1944				g_mirror_regular_request(bp);
1945			else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
1946				g_mirror_sync_request(bp);	/* WRITE */
1947			else {
1948				KASSERT(0,
1949				    ("Invalid request cflags=0x%hx to=%s.",
1950				    bp->bio_cflags, bp->bio_to->name));
1951			}
1952		} else {
1953			g_mirror_register_request(bp);
1954		}
1955		G_MIRROR_DEBUG(5, "%s: I'm here 9.", __func__);
1956	}
1957}
1958
1959static void
1960g_mirror_update_idle(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
1961{
1962
1963	sx_assert(&sc->sc_lock, SX_LOCKED);
1964
1965	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
1966		return;
1967	if (!sc->sc_idle && (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
1968		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
1969		    g_mirror_get_diskname(disk), sc->sc_name);
1970		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1971	} else if (sc->sc_idle &&
1972	    (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
1973		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
1974		    g_mirror_get_diskname(disk), sc->sc_name);
1975		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1976	}
1977}
1978
1979static void
1980g_mirror_sync_start(struct g_mirror_disk *disk)
1981{
1982	struct g_mirror_softc *sc;
1983	struct g_consumer *cp;
1984	struct bio *bp;
1985	int error, i;
1986
1987	g_topology_assert_not();
1988	sc = disk->d_softc;
1989	sx_assert(&sc->sc_lock, SX_LOCKED);
1990
1991	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
1992	    ("Disk %s is not marked for synchronization.",
1993	    g_mirror_get_diskname(disk)));
1994	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1995	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
1996	    sc->sc_state));
1997
1998	sx_xunlock(&sc->sc_lock);
1999	g_topology_lock();
2000	cp = g_new_consumer(sc->sc_sync.ds_geom);
2001	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
2002	error = g_attach(cp, sc->sc_provider);
2003	KASSERT(error == 0,
2004	    ("Cannot attach to %s (error=%d).", sc->sc_name, error));
2005	error = g_access(cp, 1, 0, 0);
2006	KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error));
2007	g_topology_unlock();
2008	sx_xlock(&sc->sc_lock);
2009
2010	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
2011	    g_mirror_get_diskname(disk));
2012	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) == 0)
2013		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2014	KASSERT(disk->d_sync.ds_consumer == NULL,
2015	    ("Sync consumer already exists (device=%s, disk=%s).",
2016	    sc->sc_name, g_mirror_get_diskname(disk)));
2017
2018	disk->d_sync.ds_consumer = cp;
2019	disk->d_sync.ds_consumer->private = disk;
2020	disk->d_sync.ds_consumer->index = 0;
2021
2022	/*
2023	 * Allocate memory for synchronization bios and initialize them.
2024	 */
2025	disk->d_sync.ds_bios = malloc(sizeof(struct bio *) * g_mirror_syncreqs,
2026	    M_MIRROR, M_WAITOK);
2027	for (i = 0; i < g_mirror_syncreqs; i++) {
2028		bp = g_alloc_bio();
2029		disk->d_sync.ds_bios[i] = bp;
2030		bp->bio_parent = NULL;
2031		bp->bio_cmd = BIO_READ;
2032		bp->bio_data = malloc(MAXPHYS, M_MIRROR, M_WAITOK);
2033		bp->bio_cflags = 0;
2034		bp->bio_offset = disk->d_sync.ds_offset;
2035		bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
2036		disk->d_sync.ds_offset += bp->bio_length;
2037		bp->bio_done = g_mirror_sync_done;
2038		bp->bio_from = disk->d_sync.ds_consumer;
2039		bp->bio_to = sc->sc_provider;
2040		bp->bio_caller1 = (void *)(uintptr_t)i;
2041	}
2042
2043	/* Increase the number of disks in SYNCHRONIZING state. */
2044	sc->sc_sync.ds_ndisks++;
2045	/* Set the number of in-flight synchronization requests. */
2046	disk->d_sync.ds_inflight = g_mirror_syncreqs;
2047
2048	/*
2049	 * Fire off first synchronization requests.
2050	 */
2051	for (i = 0; i < g_mirror_syncreqs; i++) {
2052		bp = disk->d_sync.ds_bios[i];
2053		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
2054		disk->d_sync.ds_consumer->index++;
2055		/*
2056		 * Delay the request if it is colliding with a regular request.
2057		 */
2058		if (g_mirror_regular_collision(sc, bp))
2059			g_mirror_sync_delay(sc, bp);
2060		else
2061			g_io_request(bp, disk->d_sync.ds_consumer);
2062	}
2063}
2064
2065/*
2066 * Stop synchronization process.
2067 * type: 0 - synchronization finished
2068 *       1 - synchronization stopped
2069 */
2070static void
2071g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
2072{
2073	struct g_mirror_softc *sc;
2074	struct g_consumer *cp;
2075
2076	g_topology_assert_not();
2077	sc = disk->d_softc;
2078	sx_assert(&sc->sc_lock, SX_LOCKED);
2079
2080	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2081	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2082	    g_mirror_disk_state2str(disk->d_state)));
2083	if (disk->d_sync.ds_consumer == NULL)
2084		return;
2085
2086	if (type == 0) {
2087		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
2088		    sc->sc_name, g_mirror_get_diskname(disk));
2089	} else /* if (type == 1) */ {
2090		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
2091		    sc->sc_name, g_mirror_get_diskname(disk));
2092	}
2093	free(disk->d_sync.ds_bios, M_MIRROR);
2094	disk->d_sync.ds_bios = NULL;
2095	cp = disk->d_sync.ds_consumer;
2096	disk->d_sync.ds_consumer = NULL;
2097	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2098	sc->sc_sync.ds_ndisks--;
2099	sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
2100	g_topology_lock();
2101	g_mirror_kill_consumer(sc, cp);
2102	g_topology_unlock();
2103	sx_xlock(&sc->sc_lock);
2104}
2105
2106static void
2107g_mirror_launch_provider(struct g_mirror_softc *sc)
2108{
2109	struct g_mirror_disk *disk;
2110	struct g_provider *pp, *dp;
2111
2112	sx_assert(&sc->sc_lock, SX_LOCKED);
2113
2114	g_topology_lock();
2115	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
2116	pp->flags |= G_PF_DIRECT_RECEIVE;
2117	pp->mediasize = sc->sc_mediasize;
2118	pp->sectorsize = sc->sc_sectorsize;
2119	pp->stripesize = 0;
2120	pp->stripeoffset = 0;
2121
2122	/* Splitting of unmapped BIO's could work but isn't implemented now */
2123	if (sc->sc_balance != G_MIRROR_BALANCE_SPLIT)
2124		pp->flags |= G_PF_ACCEPT_UNMAPPED;
2125
2126	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2127		if (disk->d_consumer && disk->d_consumer->provider) {
2128			dp = disk->d_consumer->provider;
2129			if (dp->stripesize > pp->stripesize) {
2130				pp->stripesize = dp->stripesize;
2131				pp->stripeoffset = dp->stripeoffset;
2132			}
2133			/* A provider underneath us doesn't support unmapped */
2134			if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
2135				G_MIRROR_DEBUG(0, "Cancelling unmapped "
2136				    "because of %s.", dp->name);
2137				pp->flags &= ~G_PF_ACCEPT_UNMAPPED;
2138			}
2139		}
2140	}
2141	pp->private = sc;
2142	sc->sc_refcnt++;
2143	sc->sc_provider = pp;
2144	g_error_provider(pp, 0);
2145	g_topology_unlock();
2146	G_MIRROR_DEBUG(0, "Device %s launched (%u/%u).", pp->name,
2147	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE), sc->sc_ndisks);
2148	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2149		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2150			g_mirror_sync_start(disk);
2151	}
2152}
2153
2154static void
2155g_mirror_destroy_provider(struct g_mirror_softc *sc)
2156{
2157	struct g_mirror_disk *disk;
2158	struct bio *bp;
2159
2160	g_topology_assert_not();
2161	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
2162	    sc->sc_name));
2163
2164	g_topology_lock();
2165	g_error_provider(sc->sc_provider, ENXIO);
2166	mtx_lock(&sc->sc_queue_mtx);
2167	while ((bp = bioq_takefirst(&sc->sc_queue)) != NULL) {
2168		/*
2169		 * Abort any pending I/O that wasn't generated by us.
2170		 * Synchronization requests and requests destined for individual
2171		 * mirror components can be destroyed immediately.
2172		 */
2173		if (bp->bio_to == sc->sc_provider &&
2174		    bp->bio_from->geom != sc->sc_sync.ds_geom) {
2175			g_io_deliver(bp, ENXIO);
2176		} else {
2177			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
2178				free(bp->bio_data, M_MIRROR);
2179			g_destroy_bio(bp);
2180		}
2181	}
2182	mtx_unlock(&sc->sc_queue_mtx);
2183	g_wither_provider(sc->sc_provider, ENXIO);
2184	sc->sc_provider = NULL;
2185	G_MIRROR_DEBUG(0, "Device %s: provider destroyed.", sc->sc_name);
2186	g_topology_unlock();
2187	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2188		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2189			g_mirror_sync_stop(disk, 1);
2190	}
2191}
2192
2193static void
2194g_mirror_go(void *arg)
2195{
2196	struct g_mirror_softc *sc;
2197
2198	sc = arg;
2199	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
2200	g_mirror_event_send(sc, 0,
2201	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
2202}
2203
2204static u_int
2205g_mirror_determine_state(struct g_mirror_disk *disk)
2206{
2207	struct g_mirror_softc *sc;
2208	u_int state;
2209
2210	sc = disk->d_softc;
2211	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
2212		if ((disk->d_flags &
2213		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0 &&
2214		    (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 ||
2215		     (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0)) {
2216			/* Disk does not need synchronization. */
2217			state = G_MIRROR_DISK_STATE_ACTIVE;
2218		} else {
2219			if ((sc->sc_flags &
2220			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2221			    (disk->d_flags &
2222			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2223				/*
2224				 * We can start synchronization from
2225				 * the stored offset.
2226				 */
2227				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2228			} else {
2229				state = G_MIRROR_DISK_STATE_STALE;
2230			}
2231		}
2232	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
2233		/*
2234		 * Reset all synchronization data for this disk,
2235		 * because if it even was synchronized, it was
2236		 * synchronized to disks with different syncid.
2237		 */
2238		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2239		disk->d_sync.ds_offset = 0;
2240		disk->d_sync.ds_offset_done = 0;
2241		disk->d_sync.ds_syncid = sc->sc_syncid;
2242		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2243		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2244			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2245		} else {
2246			state = G_MIRROR_DISK_STATE_STALE;
2247		}
2248	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
2249		/*
2250		 * Not good, NOT GOOD!
2251		 * It means that mirror was started on stale disks
2252		 * and more fresh disk just arrive.
2253		 * If there were writes, mirror is broken, sorry.
2254		 * I think the best choice here is don't touch
2255		 * this disk and inform the user loudly.
2256		 */
2257		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
2258		    "disk (%s) arrives!! It will not be connected to the "
2259		    "running device.", sc->sc_name,
2260		    g_mirror_get_diskname(disk));
2261		g_mirror_destroy_disk(disk);
2262		state = G_MIRROR_DISK_STATE_NONE;
2263		/* Return immediately, because disk was destroyed. */
2264		return (state);
2265	}
2266	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
2267	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
2268	return (state);
2269}
2270
2271/*
2272 * Update device state.
2273 */
2274static void
2275g_mirror_update_device(struct g_mirror_softc *sc, bool force)
2276{
2277	struct g_mirror_disk *disk;
2278	u_int state;
2279
2280	sx_assert(&sc->sc_lock, SX_XLOCKED);
2281
2282	switch (sc->sc_state) {
2283	case G_MIRROR_DEVICE_STATE_STARTING:
2284	    {
2285		struct g_mirror_disk *pdisk, *tdisk;
2286		u_int dirty, ndisks, genid, syncid;
2287		bool broken;
2288
2289		KASSERT(sc->sc_provider == NULL,
2290		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
2291		/*
2292		 * Are we ready? We are, if all disks are connected or
2293		 * if we have any disks and 'force' is true.
2294		 */
2295		ndisks = g_mirror_ndisks(sc, -1);
2296		if (sc->sc_ndisks == ndisks || (force && ndisks > 0)) {
2297			;
2298		} else if (ndisks == 0) {
2299			/*
2300			 * Disks went down in starting phase, so destroy
2301			 * device.
2302			 */
2303			callout_drain(&sc->sc_callout);
2304			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2305			G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
2306			    sc->sc_rootmount);
2307			root_mount_rel(sc->sc_rootmount);
2308			sc->sc_rootmount = NULL;
2309			return;
2310		} else {
2311			return;
2312		}
2313
2314		/*
2315		 * Activate all disks with the biggest syncid.
2316		 */
2317		if (force) {
2318			/*
2319			 * If 'force' is true, we have been called due to
2320			 * timeout, so don't bother canceling timeout.
2321			 */
2322			ndisks = 0;
2323			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2324				if ((disk->d_flags &
2325				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
2326					ndisks++;
2327				}
2328			}
2329			if (ndisks == 0) {
2330				/* No valid disks found, destroy device. */
2331				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2332				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2333				    __LINE__, sc->sc_rootmount);
2334				root_mount_rel(sc->sc_rootmount);
2335				sc->sc_rootmount = NULL;
2336				return;
2337			}
2338		} else {
2339			/* Cancel timeout. */
2340			callout_drain(&sc->sc_callout);
2341		}
2342
2343		/*
2344		 * Find the biggest genid.
2345		 */
2346		genid = 0;
2347		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2348			if (disk->d_genid > genid)
2349				genid = disk->d_genid;
2350		}
2351		sc->sc_genid = genid;
2352		/*
2353		 * Remove all disks without the biggest genid.
2354		 */
2355		broken = false;
2356		LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
2357			if (disk->d_genid < genid) {
2358				G_MIRROR_DEBUG(0,
2359				    "Component %s (device %s) broken, skipping.",
2360				    g_mirror_get_diskname(disk), sc->sc_name);
2361				g_mirror_destroy_disk(disk);
2362				/*
2363				 * Bump the syncid in case we discover a healthy
2364				 * replacement disk after starting the mirror.
2365				 */
2366				broken = true;
2367			}
2368		}
2369
2370		/*
2371		 * Find the biggest syncid.
2372		 */
2373		syncid = 0;
2374		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2375			if (disk->d_sync.ds_syncid > syncid)
2376				syncid = disk->d_sync.ds_syncid;
2377		}
2378
2379		/*
2380		 * Here we need to look for dirty disks and if all disks
2381		 * with the biggest syncid are dirty, we have to choose
2382		 * one with the biggest priority and rebuild the rest.
2383		 */
2384		/*
2385		 * Find the number of dirty disks with the biggest syncid.
2386		 * Find the number of disks with the biggest syncid.
2387		 * While here, find a disk with the biggest priority.
2388		 */
2389		dirty = ndisks = 0;
2390		pdisk = NULL;
2391		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2392			if (disk->d_sync.ds_syncid != syncid)
2393				continue;
2394			if ((disk->d_flags &
2395			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2396				continue;
2397			}
2398			ndisks++;
2399			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
2400				dirty++;
2401				if (pdisk == NULL ||
2402				    pdisk->d_priority < disk->d_priority) {
2403					pdisk = disk;
2404				}
2405			}
2406		}
2407		if (dirty == 0) {
2408			/* No dirty disks at all, great. */
2409		} else if (dirty == ndisks) {
2410			/*
2411			 * Force synchronization for all dirty disks except one
2412			 * with the biggest priority.
2413			 */
2414			KASSERT(pdisk != NULL, ("pdisk == NULL"));
2415			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
2416			    "master disk for synchronization.",
2417			    g_mirror_get_diskname(pdisk), sc->sc_name);
2418			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2419				if (disk->d_sync.ds_syncid != syncid)
2420					continue;
2421				if ((disk->d_flags &
2422				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2423					continue;
2424				}
2425				KASSERT((disk->d_flags &
2426				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
2427				    ("Disk %s isn't marked as dirty.",
2428				    g_mirror_get_diskname(disk)));
2429				/* Skip the disk with the biggest priority. */
2430				if (disk == pdisk)
2431					continue;
2432				disk->d_sync.ds_syncid = 0;
2433			}
2434		} else if (dirty < ndisks) {
2435			/*
2436			 * Force synchronization for all dirty disks.
2437			 * We have some non-dirty disks.
2438			 */
2439			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2440				if (disk->d_sync.ds_syncid != syncid)
2441					continue;
2442				if ((disk->d_flags &
2443				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2444					continue;
2445				}
2446				if ((disk->d_flags &
2447				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
2448					continue;
2449				}
2450				disk->d_sync.ds_syncid = 0;
2451			}
2452		}
2453
2454		/* Reset hint. */
2455		sc->sc_hint = NULL;
2456		sc->sc_syncid = syncid;
2457		if (force || broken) {
2458			/* Remember to bump syncid on first write. */
2459			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2460		}
2461		state = G_MIRROR_DEVICE_STATE_RUNNING;
2462		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
2463		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
2464		    g_mirror_device_state2str(state));
2465		sc->sc_state = state;
2466		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2467			state = g_mirror_determine_state(disk);
2468			g_mirror_event_send(disk, state,
2469			    G_MIRROR_EVENT_DONTWAIT);
2470			if (state == G_MIRROR_DISK_STATE_STALE)
2471				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2472		}
2473		break;
2474	    }
2475	case G_MIRROR_DEVICE_STATE_RUNNING:
2476		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
2477		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2478			/*
2479			 * No active disks or no disks at all,
2480			 * so destroy device.
2481			 */
2482			if (sc->sc_provider != NULL)
2483				g_mirror_destroy_provider(sc);
2484			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2485			break;
2486		} else if (g_mirror_ndisks(sc,
2487		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
2488		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2489			/*
2490			 * We have active disks, launch provider if it doesn't
2491			 * exist.
2492			 */
2493			if (sc->sc_provider == NULL)
2494				g_mirror_launch_provider(sc);
2495			if (sc->sc_rootmount != NULL) {
2496				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2497				    __LINE__, sc->sc_rootmount);
2498				root_mount_rel(sc->sc_rootmount);
2499				sc->sc_rootmount = NULL;
2500			}
2501		}
2502		/*
2503		 * Genid should be bumped immediately, so do it here.
2504		 */
2505		if ((sc->sc_bump_id & G_MIRROR_BUMP_GENID) != 0) {
2506			sc->sc_bump_id &= ~G_MIRROR_BUMP_GENID;
2507			g_mirror_bump_genid(sc);
2508		}
2509		break;
2510	default:
2511		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
2512		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
2513		break;
2514	}
2515}
2516
2517/*
2518 * Update disk state and device state if needed.
2519 */
2520#define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
2521	"Disk %s state changed from %s to %s (device %s).",		\
2522	g_mirror_get_diskname(disk),					\
2523	g_mirror_disk_state2str(disk->d_state),				\
2524	g_mirror_disk_state2str(state), sc->sc_name)
2525static int
2526g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
2527{
2528	struct g_mirror_softc *sc;
2529
2530	sc = disk->d_softc;
2531	sx_assert(&sc->sc_lock, SX_XLOCKED);
2532
2533again:
2534	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
2535	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
2536	    g_mirror_disk_state2str(state));
2537	switch (state) {
2538	case G_MIRROR_DISK_STATE_NEW:
2539		/*
2540		 * Possible scenarios:
2541		 * 1. New disk arrive.
2542		 */
2543		/* Previous state should be NONE. */
2544		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
2545		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2546		    g_mirror_disk_state2str(disk->d_state)));
2547		DISK_STATE_CHANGED();
2548
2549		disk->d_state = state;
2550		if (LIST_EMPTY(&sc->sc_disks))
2551			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
2552		else {
2553			struct g_mirror_disk *dp;
2554
2555			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
2556				if (disk->d_priority >= dp->d_priority) {
2557					LIST_INSERT_BEFORE(dp, disk, d_next);
2558					dp = NULL;
2559					break;
2560				}
2561				if (LIST_NEXT(dp, d_next) == NULL)
2562					break;
2563			}
2564			if (dp != NULL)
2565				LIST_INSERT_AFTER(dp, disk, d_next);
2566		}
2567		G_MIRROR_DEBUG(1, "Device %s: provider %s detected.",
2568		    sc->sc_name, g_mirror_get_diskname(disk));
2569		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2570			break;
2571		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2572		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2573		    g_mirror_device_state2str(sc->sc_state),
2574		    g_mirror_get_diskname(disk),
2575		    g_mirror_disk_state2str(disk->d_state)));
2576		state = g_mirror_determine_state(disk);
2577		if (state != G_MIRROR_DISK_STATE_NONE)
2578			goto again;
2579		break;
2580	case G_MIRROR_DISK_STATE_ACTIVE:
2581		/*
2582		 * Possible scenarios:
2583		 * 1. New disk does not need synchronization.
2584		 * 2. Synchronization process finished successfully.
2585		 */
2586		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2587		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2588		    g_mirror_device_state2str(sc->sc_state),
2589		    g_mirror_get_diskname(disk),
2590		    g_mirror_disk_state2str(disk->d_state)));
2591		/* Previous state should be NEW or SYNCHRONIZING. */
2592		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
2593		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2594		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2595		    g_mirror_disk_state2str(disk->d_state)));
2596		DISK_STATE_CHANGED();
2597
2598		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2599			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2600			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
2601			g_mirror_sync_stop(disk, 0);
2602		}
2603		disk->d_state = state;
2604		disk->d_sync.ds_offset = 0;
2605		disk->d_sync.ds_offset_done = 0;
2606		g_mirror_update_idle(sc, disk);
2607		g_mirror_update_metadata(disk);
2608		G_MIRROR_DEBUG(1, "Device %s: provider %s activated.",
2609		    sc->sc_name, g_mirror_get_diskname(disk));
2610		break;
2611	case G_MIRROR_DISK_STATE_STALE:
2612		/*
2613		 * Possible scenarios:
2614		 * 1. Stale disk was connected.
2615		 */
2616		/* Previous state should be NEW. */
2617		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2618		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2619		    g_mirror_disk_state2str(disk->d_state)));
2620		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2621		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2622		    g_mirror_device_state2str(sc->sc_state),
2623		    g_mirror_get_diskname(disk),
2624		    g_mirror_disk_state2str(disk->d_state)));
2625		/*
2626		 * STALE state is only possible if device is marked
2627		 * NOAUTOSYNC.
2628		 */
2629		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
2630		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2631		    g_mirror_device_state2str(sc->sc_state),
2632		    g_mirror_get_diskname(disk),
2633		    g_mirror_disk_state2str(disk->d_state)));
2634		DISK_STATE_CHANGED();
2635
2636		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2637		disk->d_state = state;
2638		g_mirror_update_metadata(disk);
2639		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
2640		    sc->sc_name, g_mirror_get_diskname(disk));
2641		break;
2642	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
2643		/*
2644		 * Possible scenarios:
2645		 * 1. Disk which needs synchronization was connected.
2646		 */
2647		/* Previous state should be NEW. */
2648		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2649		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2650		    g_mirror_disk_state2str(disk->d_state)));
2651		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2652		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2653		    g_mirror_device_state2str(sc->sc_state),
2654		    g_mirror_get_diskname(disk),
2655		    g_mirror_disk_state2str(disk->d_state)));
2656		DISK_STATE_CHANGED();
2657
2658		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2659			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2660		disk->d_state = state;
2661		if (sc->sc_provider != NULL) {
2662			g_mirror_sync_start(disk);
2663			g_mirror_update_metadata(disk);
2664		}
2665		break;
2666	case G_MIRROR_DISK_STATE_DISCONNECTED:
2667		/*
2668		 * Possible scenarios:
2669		 * 1. Device wasn't running yet, but disk disappear.
2670		 * 2. Disk was active and disapppear.
2671		 * 3. Disk disappear during synchronization process.
2672		 */
2673		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
2674			/*
2675			 * Previous state should be ACTIVE, STALE or
2676			 * SYNCHRONIZING.
2677			 */
2678			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
2679			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
2680			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2681			    ("Wrong disk state (%s, %s).",
2682			    g_mirror_get_diskname(disk),
2683			    g_mirror_disk_state2str(disk->d_state)));
2684		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
2685			/* Previous state should be NEW. */
2686			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2687			    ("Wrong disk state (%s, %s).",
2688			    g_mirror_get_diskname(disk),
2689			    g_mirror_disk_state2str(disk->d_state)));
2690			/*
2691			 * Reset bumping syncid if disk disappeared in STARTING
2692			 * state.
2693			 */
2694			if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0)
2695				sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
2696#ifdef	INVARIANTS
2697		} else {
2698			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2699			    sc->sc_name,
2700			    g_mirror_device_state2str(sc->sc_state),
2701			    g_mirror_get_diskname(disk),
2702			    g_mirror_disk_state2str(disk->d_state)));
2703#endif
2704		}
2705		DISK_STATE_CHANGED();
2706		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
2707		    sc->sc_name, g_mirror_get_diskname(disk));
2708
2709		g_mirror_destroy_disk(disk);
2710		break;
2711	case G_MIRROR_DISK_STATE_DESTROY:
2712	    {
2713		int error;
2714
2715		error = g_mirror_clear_metadata(disk);
2716		if (error != 0) {
2717			G_MIRROR_DEBUG(0,
2718			    "Device %s: failed to clear metadata on %s: %d.",
2719			    sc->sc_name, g_mirror_get_diskname(disk), error);
2720			break;
2721		}
2722		DISK_STATE_CHANGED();
2723		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
2724		    sc->sc_name, g_mirror_get_diskname(disk));
2725
2726		g_mirror_destroy_disk(disk);
2727		sc->sc_ndisks--;
2728		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2729			g_mirror_update_metadata(disk);
2730		}
2731		break;
2732	    }
2733	default:
2734		KASSERT(1 == 0, ("Unknown state (%u).", state));
2735		break;
2736	}
2737	return (0);
2738}
2739#undef	DISK_STATE_CHANGED
2740
2741int
2742g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
2743{
2744	struct g_provider *pp;
2745	u_char *buf;
2746	int error;
2747
2748	g_topology_assert();
2749
2750	error = g_access(cp, 1, 0, 0);
2751	if (error != 0)
2752		return (error);
2753	pp = cp->provider;
2754	g_topology_unlock();
2755	/* Metadata are stored on last sector. */
2756	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2757	    &error);
2758	g_topology_lock();
2759	g_access(cp, -1, 0, 0);
2760	if (buf == NULL) {
2761		G_MIRROR_DEBUG(1, "Cannot read metadata from %s (error=%d).",
2762		    cp->provider->name, error);
2763		return (error);
2764	}
2765
2766	/* Decode metadata. */
2767	error = mirror_metadata_decode(buf, md);
2768	g_free(buf);
2769	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
2770		return (EINVAL);
2771	if (md->md_version > G_MIRROR_VERSION) {
2772		G_MIRROR_DEBUG(0,
2773		    "Kernel module is too old to handle metadata from %s.",
2774		    cp->provider->name);
2775		return (EINVAL);
2776	}
2777	if (error != 0) {
2778		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2779		    cp->provider->name);
2780		return (error);
2781	}
2782
2783	return (0);
2784}
2785
2786static int
2787g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
2788    struct g_mirror_metadata *md)
2789{
2790
2791	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
2792		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
2793		    pp->name, md->md_did);
2794		return (EEXIST);
2795	}
2796	if (md->md_all != sc->sc_ndisks) {
2797		G_MIRROR_DEBUG(1,
2798		    "Invalid '%s' field on disk %s (device %s), skipping.",
2799		    "md_all", pp->name, sc->sc_name);
2800		return (EINVAL);
2801	}
2802	if (md->md_slice != sc->sc_slice) {
2803		G_MIRROR_DEBUG(1,
2804		    "Invalid '%s' field on disk %s (device %s), skipping.",
2805		    "md_slice", pp->name, sc->sc_name);
2806		return (EINVAL);
2807	}
2808	if (md->md_balance != sc->sc_balance) {
2809		G_MIRROR_DEBUG(1,
2810		    "Invalid '%s' field on disk %s (device %s), skipping.",
2811		    "md_balance", pp->name, sc->sc_name);
2812		return (EINVAL);
2813	}
2814#if 0
2815	if (md->md_mediasize != sc->sc_mediasize) {
2816		G_MIRROR_DEBUG(1,
2817		    "Invalid '%s' field on disk %s (device %s), skipping.",
2818		    "md_mediasize", pp->name, sc->sc_name);
2819		return (EINVAL);
2820	}
2821#endif
2822	if (sc->sc_mediasize > pp->mediasize) {
2823		G_MIRROR_DEBUG(1,
2824		    "Invalid size of disk %s (device %s), skipping.", pp->name,
2825		    sc->sc_name);
2826		return (EINVAL);
2827	}
2828	if (md->md_sectorsize != sc->sc_sectorsize) {
2829		G_MIRROR_DEBUG(1,
2830		    "Invalid '%s' field on disk %s (device %s), skipping.",
2831		    "md_sectorsize", pp->name, sc->sc_name);
2832		return (EINVAL);
2833	}
2834	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2835		G_MIRROR_DEBUG(1,
2836		    "Invalid sector size of disk %s (device %s), skipping.",
2837		    pp->name, sc->sc_name);
2838		return (EINVAL);
2839	}
2840	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
2841		G_MIRROR_DEBUG(1,
2842		    "Invalid device flags on disk %s (device %s), skipping.",
2843		    pp->name, sc->sc_name);
2844		return (EINVAL);
2845	}
2846	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
2847		G_MIRROR_DEBUG(1,
2848		    "Invalid disk flags on disk %s (device %s), skipping.",
2849		    pp->name, sc->sc_name);
2850		return (EINVAL);
2851	}
2852	return (0);
2853}
2854
2855int
2856g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
2857    struct g_mirror_metadata *md)
2858{
2859	struct g_mirror_disk *disk;
2860	int error;
2861
2862	g_topology_assert_not();
2863	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
2864
2865	error = g_mirror_check_metadata(sc, pp, md);
2866	if (error != 0)
2867		return (error);
2868	if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING &&
2869	    md->md_genid < sc->sc_genid) {
2870		G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.",
2871		    pp->name, sc->sc_name);
2872		return (EINVAL);
2873	}
2874	disk = g_mirror_init_disk(sc, pp, md, &error);
2875	if (disk == NULL)
2876		return (error);
2877	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
2878	    G_MIRROR_EVENT_WAIT);
2879	if (error != 0)
2880		return (error);
2881	if (md->md_version < G_MIRROR_VERSION) {
2882		G_MIRROR_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).",
2883		    pp->name, md->md_version, G_MIRROR_VERSION);
2884		g_mirror_update_metadata(disk);
2885	}
2886	return (0);
2887}
2888
2889static void
2890g_mirror_destroy_delayed(void *arg, int flag)
2891{
2892	struct g_mirror_softc *sc;
2893	int error;
2894
2895	if (flag == EV_CANCEL) {
2896		G_MIRROR_DEBUG(1, "Destroying canceled.");
2897		return;
2898	}
2899	sc = arg;
2900	g_topology_unlock();
2901	sx_xlock(&sc->sc_lock);
2902	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) == 0,
2903	    ("DESTROY flag set on %s.", sc->sc_name));
2904	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROYING) != 0,
2905	    ("DESTROYING flag not set on %s.", sc->sc_name));
2906	G_MIRROR_DEBUG(1, "Destroying %s (delayed).", sc->sc_name);
2907	error = g_mirror_destroy(sc, G_MIRROR_DESTROY_SOFT);
2908	if (error != 0) {
2909		G_MIRROR_DEBUG(0, "Cannot destroy %s (error=%d).",
2910		    sc->sc_name, error);
2911		sx_xunlock(&sc->sc_lock);
2912	}
2913	g_topology_lock();
2914}
2915
2916static int
2917g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
2918{
2919	struct g_mirror_softc *sc;
2920	int error = 0;
2921
2922	g_topology_assert();
2923	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
2924	    acw, ace);
2925
2926	sc = pp->private;
2927	KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name));
2928
2929	g_topology_unlock();
2930	sx_xlock(&sc->sc_lock);
2931	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0 ||
2932	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROYING) != 0 ||
2933	    LIST_EMPTY(&sc->sc_disks)) {
2934		if (acr > 0 || acw > 0 || ace > 0)
2935			error = ENXIO;
2936		goto end;
2937	}
2938	sc->sc_provider_open += acr + acw + ace;
2939	if (pp->acw + acw == 0)
2940		g_mirror_idle(sc, 0);
2941	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROYING) != 0 &&
2942	    sc->sc_provider_open == 0)
2943		g_post_event(g_mirror_destroy_delayed, sc, M_WAITOK, sc, NULL);
2944end:
2945	sx_xunlock(&sc->sc_lock);
2946	g_topology_lock();
2947	return (error);
2948}
2949
2950struct g_geom *
2951g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md,
2952    u_int type)
2953{
2954	struct g_mirror_softc *sc;
2955	struct g_geom *gp;
2956	int error, timeout;
2957
2958	g_topology_assert();
2959	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
2960	    md->md_mid);
2961
2962	/* One disk is minimum. */
2963	if (md->md_all < 1)
2964		return (NULL);
2965	/*
2966	 * Action geom.
2967	 */
2968	gp = g_new_geomf(mp, "%s", md->md_name);
2969	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
2970	gp->start = g_mirror_start;
2971	gp->orphan = g_mirror_orphan;
2972	gp->access = g_mirror_access;
2973	gp->dumpconf = g_mirror_dumpconf;
2974
2975	sc->sc_type = type;
2976	sc->sc_id = md->md_mid;
2977	sc->sc_slice = md->md_slice;
2978	sc->sc_balance = md->md_balance;
2979	sc->sc_mediasize = md->md_mediasize;
2980	sc->sc_sectorsize = md->md_sectorsize;
2981	sc->sc_ndisks = md->md_all;
2982	sc->sc_flags = md->md_mflags;
2983	sc->sc_bump_id = 0;
2984	sc->sc_idle = 1;
2985	sc->sc_last_write = time_uptime;
2986	sc->sc_writes = 0;
2987	sc->sc_refcnt = 1;
2988	sx_init(&sc->sc_lock, "gmirror:lock");
2989	bioq_init(&sc->sc_queue);
2990	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
2991	bioq_init(&sc->sc_regular_delayed);
2992	bioq_init(&sc->sc_inflight);
2993	bioq_init(&sc->sc_sync_delayed);
2994	LIST_INIT(&sc->sc_disks);
2995	TAILQ_INIT(&sc->sc_events);
2996	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
2997	callout_init(&sc->sc_callout, 1);
2998	mtx_init(&sc->sc_done_mtx, "gmirror:done", NULL, MTX_DEF);
2999	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
3000	gp->softc = sc;
3001	sc->sc_geom = gp;
3002	sc->sc_provider = NULL;
3003	sc->sc_provider_open = 0;
3004	/*
3005	 * Synchronization geom.
3006	 */
3007	gp = g_new_geomf(mp, "%s.sync", md->md_name);
3008	gp->softc = sc;
3009	gp->orphan = g_mirror_orphan;
3010	sc->sc_sync.ds_geom = gp;
3011	sc->sc_sync.ds_ndisks = 0;
3012	error = kproc_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
3013	    "g_mirror %s", md->md_name);
3014	if (error != 0) {
3015		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
3016		    sc->sc_name);
3017		g_destroy_geom(sc->sc_sync.ds_geom);
3018		g_destroy_geom(sc->sc_geom);
3019		g_mirror_free_device(sc);
3020		return (NULL);
3021	}
3022
3023	G_MIRROR_DEBUG(1, "Device %s created (%u components, id=%u).",
3024	    sc->sc_name, sc->sc_ndisks, sc->sc_id);
3025
3026	sc->sc_rootmount = root_mount_hold("GMIRROR");
3027	G_MIRROR_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
3028	/*
3029	 * Run timeout.
3030	 */
3031	timeout = g_mirror_timeout * hz;
3032	callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc);
3033	return (sc->sc_geom);
3034}
3035
3036int
3037g_mirror_destroy(struct g_mirror_softc *sc, int how)
3038{
3039	struct g_mirror_disk *disk;
3040
3041	g_topology_assert_not();
3042	sx_assert(&sc->sc_lock, SX_XLOCKED);
3043
3044	if (sc->sc_provider_open != 0) {
3045		switch (how) {
3046		case G_MIRROR_DESTROY_SOFT:
3047			G_MIRROR_DEBUG(1,
3048			    "Device %s is still open (%d).", sc->sc_name,
3049			    sc->sc_provider_open);
3050			return (EBUSY);
3051		case G_MIRROR_DESTROY_DELAYED:
3052			G_MIRROR_DEBUG(1,
3053			    "Device %s will be destroyed on last close.",
3054			    sc->sc_name);
3055			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
3056				if (disk->d_state ==
3057				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3058					g_mirror_sync_stop(disk, 1);
3059				}
3060			}
3061			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROYING;
3062			return (EBUSY);
3063		case G_MIRROR_DESTROY_HARD:
3064			G_MIRROR_DEBUG(1, "Device %s is still open, so it "
3065			    "can't be definitely removed.", sc->sc_name);
3066		}
3067	}
3068
3069	g_topology_lock();
3070	if (sc->sc_geom->softc == NULL) {
3071		g_topology_unlock();
3072		return (0);
3073	}
3074	sc->sc_geom->softc = NULL;
3075	sc->sc_sync.ds_geom->softc = NULL;
3076	g_topology_unlock();
3077
3078	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
3079	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_WAIT;
3080	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
3081	sx_xunlock(&sc->sc_lock);
3082	mtx_lock(&sc->sc_queue_mtx);
3083	wakeup(sc);
3084	mtx_unlock(&sc->sc_queue_mtx);
3085	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
3086	while (sc->sc_worker != NULL)
3087		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
3088	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
3089	sx_xlock(&sc->sc_lock);
3090	g_mirror_destroy_device(sc);
3091	return (0);
3092}
3093
3094static void
3095g_mirror_taste_orphan(struct g_consumer *cp)
3096{
3097
3098	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
3099	    cp->provider->name));
3100}
3101
3102static struct g_geom *
3103g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
3104{
3105	struct g_mirror_metadata md;
3106	struct g_mirror_softc *sc;
3107	struct g_consumer *cp;
3108	struct g_geom *gp;
3109	int error;
3110
3111	g_topology_assert();
3112	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
3113	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
3114
3115	gp = g_new_geomf(mp, "mirror:taste");
3116	/*
3117	 * This orphan function should be never called.
3118	 */
3119	gp->orphan = g_mirror_taste_orphan;
3120	cp = g_new_consumer(gp);
3121	g_attach(cp, pp);
3122	error = g_mirror_read_metadata(cp, &md);
3123	g_detach(cp);
3124	g_destroy_consumer(cp);
3125	g_destroy_geom(gp);
3126	if (error != 0)
3127		return (NULL);
3128	gp = NULL;
3129
3130	if (md.md_provider[0] != '\0' &&
3131	    !g_compare_names(md.md_provider, pp->name))
3132		return (NULL);
3133	if (md.md_provsize != 0 && md.md_provsize != pp->mediasize)
3134		return (NULL);
3135	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
3136		G_MIRROR_DEBUG(0,
3137		    "Device %s: provider %s marked as inactive, skipping.",
3138		    md.md_name, pp->name);
3139		return (NULL);
3140	}
3141	if (g_mirror_debug >= 2)
3142		mirror_metadata_dump(&md);
3143
3144	/*
3145	 * Let's check if device already exists.
3146	 */
3147	sc = NULL;
3148	LIST_FOREACH(gp, &mp->geom, geom) {
3149		sc = gp->softc;
3150		if (sc == NULL)
3151			continue;
3152		if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
3153			continue;
3154		if (sc->sc_sync.ds_geom == gp)
3155			continue;
3156		if (strcmp(md.md_name, sc->sc_name) != 0)
3157			continue;
3158		if (md.md_mid != sc->sc_id) {
3159			G_MIRROR_DEBUG(0, "Device %s already configured.",
3160			    sc->sc_name);
3161			return (NULL);
3162		}
3163		break;
3164	}
3165	if (gp == NULL) {
3166		gp = g_mirror_create(mp, &md, G_MIRROR_TYPE_AUTOMATIC);
3167		if (gp == NULL) {
3168			G_MIRROR_DEBUG(0, "Cannot create device %s.",
3169			    md.md_name);
3170			return (NULL);
3171		}
3172		sc = gp->softc;
3173	}
3174	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
3175	g_topology_unlock();
3176	sx_xlock(&sc->sc_lock);
3177	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_TASTING;
3178	error = g_mirror_add_disk(sc, pp, &md);
3179	if (error != 0) {
3180		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
3181		    pp->name, gp->name, error);
3182		if (LIST_EMPTY(&sc->sc_disks)) {
3183			g_cancel_event(sc);
3184			g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3185			g_topology_lock();
3186			return (NULL);
3187		}
3188		gp = NULL;
3189	}
3190	sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_TASTING;
3191	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
3192		g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3193		g_topology_lock();
3194		return (NULL);
3195	}
3196	sx_xunlock(&sc->sc_lock);
3197	g_topology_lock();
3198	return (gp);
3199}
3200
3201static void
3202g_mirror_resize(struct g_consumer *cp)
3203{
3204	struct g_mirror_disk *disk;
3205
3206	g_topology_assert();
3207	g_trace(G_T_TOPOLOGY, "%s(%s)", __func__, cp->provider->name);
3208
3209	disk = cp->private;
3210	if (disk == NULL)
3211		return;
3212	g_topology_unlock();
3213	g_mirror_update_metadata(disk);
3214	g_topology_lock();
3215}
3216
3217static int
3218g_mirror_destroy_geom(struct gctl_req *req __unused,
3219    struct g_class *mp __unused, struct g_geom *gp)
3220{
3221	struct g_mirror_softc *sc;
3222	int error;
3223
3224	g_topology_unlock();
3225	sc = gp->softc;
3226	sx_xlock(&sc->sc_lock);
3227	g_cancel_event(sc);
3228	error = g_mirror_destroy(gp->softc, G_MIRROR_DESTROY_SOFT);
3229	if (error != 0)
3230		sx_xunlock(&sc->sc_lock);
3231	g_topology_lock();
3232	return (error);
3233}
3234
3235static void
3236g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
3237    struct g_consumer *cp, struct g_provider *pp)
3238{
3239	struct g_mirror_softc *sc;
3240
3241	g_topology_assert();
3242
3243	sc = gp->softc;
3244	if (sc == NULL)
3245		return;
3246	/* Skip synchronization geom. */
3247	if (gp == sc->sc_sync.ds_geom)
3248		return;
3249	if (pp != NULL) {
3250		/* Nothing here. */
3251	} else if (cp != NULL) {
3252		struct g_mirror_disk *disk;
3253
3254		disk = cp->private;
3255		if (disk == NULL)
3256			return;
3257		g_topology_unlock();
3258		sx_xlock(&sc->sc_lock);
3259		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
3260		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3261			sbuf_printf(sb, "%s<Synchronized>", indent);
3262			if (disk->d_sync.ds_offset == 0)
3263				sbuf_printf(sb, "0%%");
3264			else {
3265				sbuf_printf(sb, "%u%%",
3266				    (u_int)((disk->d_sync.ds_offset * 100) /
3267				    sc->sc_provider->mediasize));
3268			}
3269			sbuf_printf(sb, "</Synchronized>\n");
3270			if (disk->d_sync.ds_offset > 0) {
3271				sbuf_printf(sb, "%s<BytesSynced>%jd"
3272				    "</BytesSynced>\n", indent,
3273				    (intmax_t)disk->d_sync.ds_offset);
3274			}
3275		}
3276		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
3277		    disk->d_sync.ds_syncid);
3278		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent,
3279		    disk->d_genid);
3280		sbuf_printf(sb, "%s<Flags>", indent);
3281		if (disk->d_flags == 0)
3282			sbuf_printf(sb, "NONE");
3283		else {
3284			int first = 1;
3285
3286#define	ADD_FLAG(flag, name)	do {					\
3287	if ((disk->d_flags & (flag)) != 0) {				\
3288		if (!first)						\
3289			sbuf_printf(sb, ", ");				\
3290		else							\
3291			first = 0;					\
3292		sbuf_printf(sb, name);					\
3293	}								\
3294} while (0)
3295			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
3296			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
3297			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
3298			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
3299			    "SYNCHRONIZING");
3300			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
3301			ADD_FLAG(G_MIRROR_DISK_FLAG_BROKEN, "BROKEN");
3302#undef	ADD_FLAG
3303		}
3304		sbuf_printf(sb, "</Flags>\n");
3305		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
3306		    disk->d_priority);
3307		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
3308		    g_mirror_disk_state2str(disk->d_state));
3309		sx_xunlock(&sc->sc_lock);
3310		g_topology_lock();
3311	} else {
3312		g_topology_unlock();
3313		sx_xlock(&sc->sc_lock);
3314		sbuf_printf(sb, "%s<Type>", indent);
3315		switch (sc->sc_type) {
3316		case G_MIRROR_TYPE_AUTOMATIC:
3317			sbuf_printf(sb, "AUTOMATIC");
3318			break;
3319		case G_MIRROR_TYPE_MANUAL:
3320			sbuf_printf(sb, "MANUAL");
3321			break;
3322		default:
3323			sbuf_printf(sb, "UNKNOWN");
3324			break;
3325		}
3326		sbuf_printf(sb, "</Type>\n");
3327		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
3328		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
3329		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid);
3330		sbuf_printf(sb, "%s<Flags>", indent);
3331		if (sc->sc_flags == 0)
3332			sbuf_printf(sb, "NONE");
3333		else {
3334			int first = 1;
3335
3336#define	ADD_FLAG(flag, name)	do {					\
3337	if ((sc->sc_flags & (flag)) != 0) {				\
3338		if (!first)						\
3339			sbuf_printf(sb, ", ");				\
3340		else							\
3341			first = 0;					\
3342		sbuf_printf(sb, name);					\
3343	}								\
3344} while (0)
3345			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC");
3346			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
3347#undef	ADD_FLAG
3348		}
3349		sbuf_printf(sb, "</Flags>\n");
3350		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
3351		    (u_int)sc->sc_slice);
3352		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
3353		    balance_name(sc->sc_balance));
3354		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
3355		    sc->sc_ndisks);
3356		sbuf_printf(sb, "%s<State>", indent);
3357		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
3358			sbuf_printf(sb, "%s", "STARTING");
3359		else if (sc->sc_ndisks ==
3360		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
3361			sbuf_printf(sb, "%s", "COMPLETE");
3362		else
3363			sbuf_printf(sb, "%s", "DEGRADED");
3364		sbuf_printf(sb, "</State>\n");
3365		sx_xunlock(&sc->sc_lock);
3366		g_topology_lock();
3367	}
3368}
3369
3370static void
3371g_mirror_shutdown_post_sync(void *arg, int howto)
3372{
3373	struct g_class *mp;
3374	struct g_geom *gp, *gp2;
3375	struct g_mirror_softc *sc;
3376	int error;
3377
3378	if (panicstr != NULL)
3379		return;
3380
3381	mp = arg;
3382	g_topology_lock();
3383	g_mirror_shutdown = 1;
3384	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
3385		if ((sc = gp->softc) == NULL)
3386			continue;
3387		/* Skip synchronization geom. */
3388		if (gp == sc->sc_sync.ds_geom)
3389			continue;
3390		g_topology_unlock();
3391		sx_xlock(&sc->sc_lock);
3392		g_mirror_idle(sc, -1);
3393		g_cancel_event(sc);
3394		error = g_mirror_destroy(sc, G_MIRROR_DESTROY_DELAYED);
3395		if (error != 0)
3396			sx_xunlock(&sc->sc_lock);
3397		g_topology_lock();
3398	}
3399	g_topology_unlock();
3400}
3401
3402static void
3403g_mirror_init(struct g_class *mp)
3404{
3405
3406	g_mirror_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync,
3407	    g_mirror_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST);
3408	if (g_mirror_post_sync == NULL)
3409		G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event.");
3410}
3411
3412static void
3413g_mirror_fini(struct g_class *mp)
3414{
3415
3416	if (g_mirror_post_sync != NULL)
3417		EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_post_sync);
3418}
3419
3420DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
3421