1132904Spjd/*-
2156878Spjd * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3132904Spjd * All rights reserved.
4132904Spjd *
5132904Spjd * Redistribution and use in source and binary forms, with or without
6132904Spjd * modification, are permitted provided that the following conditions
7132904Spjd * are met:
8132904Spjd * 1. Redistributions of source code must retain the above copyright
9132904Spjd *    notice, this list of conditions and the following disclaimer.
10132904Spjd * 2. Redistributions in binary form must reproduce the above copyright
11132904Spjd *    notice, this list of conditions and the following disclaimer in the
12132904Spjd *    documentation and/or other materials provided with the distribution.
13155174Spjd *
14132904Spjd * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15132904Spjd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16132904Spjd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17132904Spjd * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18132904Spjd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19132904Spjd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20132904Spjd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21132904Spjd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22132904Spjd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23132904Spjd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24132904Spjd * SUCH DAMAGE.
25132904Spjd */
26132904Spjd
27132904Spjd#include <sys/cdefs.h>
28132904Spjd__FBSDID("$FreeBSD$");
29132904Spjd
30132904Spjd#include <sys/param.h>
31132904Spjd#include <sys/systm.h>
32132904Spjd#include <sys/kernel.h>
33132904Spjd#include <sys/module.h>
34132904Spjd#include <sys/limits.h>
35132904Spjd#include <sys/lock.h>
36132904Spjd#include <sys/mutex.h>
37132904Spjd#include <sys/bio.h>
38223921Sae#include <sys/sbuf.h>
39132904Spjd#include <sys/sysctl.h>
40132904Spjd#include <sys/malloc.h>
41137254Spjd#include <sys/eventhandler.h>
42132904Spjd#include <vm/uma.h>
43132904Spjd#include <geom/geom.h>
44132904Spjd#include <sys/proc.h>
45132904Spjd#include <sys/kthread.h>
46139451Sjhb#include <sys/sched.h>
47132904Spjd#include <geom/mirror/g_mirror.h>
48132904Spjd
49219029SnetchildFEATURE(geom_mirror, "GEOM mirroring support");
50132904Spjd
51151897Srwatsonstatic MALLOC_DEFINE(M_MIRROR, "mirror_data", "GEOM_MIRROR Data");
52132904Spjd
53132904SpjdSYSCTL_DECL(_kern_geom);
54248085Smariusstatic SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0,
55248085Smarius    "GEOM_MIRROR stuff");
56132904Spjdu_int g_mirror_debug = 0;
57134528SpjdTUNABLE_INT("kern.geom.mirror.debug", &g_mirror_debug);
58132904SpjdSYSCTL_UINT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RW, &g_mirror_debug, 0,
59132904Spjd    "Debug level");
60135854Spjdstatic u_int g_mirror_timeout = 4;
61134226SpjdTUNABLE_INT("kern.geom.mirror.timeout", &g_mirror_timeout);
62132904SpjdSYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RW, &g_mirror_timeout,
63132904Spjd    0, "Time to wait on all mirror components");
64137251Spjdstatic u_int g_mirror_idletime = 5;
65137251SpjdTUNABLE_INT("kern.geom.mirror.idletime", &g_mirror_idletime);
66137251SpjdSYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RW,
67137251Spjd    &g_mirror_idletime, 0, "Mark components as clean when idling");
68155545Spjdstatic u_int g_mirror_disconnect_on_failure = 1;
69155545SpjdTUNABLE_INT("kern.geom.mirror.disconnect_on_failure",
70155545Spjd    &g_mirror_disconnect_on_failure);
71155545SpjdSYSCTL_UINT(_kern_geom_mirror, OID_AUTO, disconnect_on_failure, CTLFLAG_RW,
72155545Spjd    &g_mirror_disconnect_on_failure, 0, "Disconnect component on I/O failure.");
73156873Spjdstatic u_int g_mirror_syncreqs = 2;
74156610SpjdTUNABLE_INT("kern.geom.mirror.sync_requests", &g_mirror_syncreqs);
75156610SpjdSYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_requests, CTLFLAG_RDTUN,
76156610Spjd    &g_mirror_syncreqs, 0, "Parallel synchronization I/O requests.");
77132904Spjd
78132904Spjd#define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
79132904Spjd	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
80132904Spjd	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
81132904Spjd	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
82132904Spjd} while (0)
83132904Spjd
84246076Smavstatic eventhandler_tag g_mirror_post_sync = NULL;
85246076Smavstatic int g_mirror_shutdown = 0;
86132904Spjd
87132904Spjdstatic int g_mirror_destroy_geom(struct gctl_req *req, struct g_class *mp,
88132904Spjd    struct g_geom *gp);
89132904Spjdstatic g_taste_t g_mirror_taste;
90137254Spjdstatic void g_mirror_init(struct g_class *mp);
91137254Spjdstatic void g_mirror_fini(struct g_class *mp);
92132904Spjd
93132904Spjdstruct g_class g_mirror_class = {
94132904Spjd	.name = G_MIRROR_CLASS_NAME,
95133318Sphk	.version = G_VERSION,
96132904Spjd	.ctlreq = g_mirror_config,
97132904Spjd	.taste = g_mirror_taste,
98137254Spjd	.destroy_geom = g_mirror_destroy_geom,
99137254Spjd	.init = g_mirror_init,
100137254Spjd	.fini = g_mirror_fini
101132904Spjd};
102132904Spjd
103132904Spjd
104132904Spjdstatic void g_mirror_destroy_provider(struct g_mirror_softc *sc);
105139051Spjdstatic int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
106139051Spjdstatic void g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force);
107132904Spjdstatic void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
108132904Spjd    struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
109132904Spjdstatic void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
110156610Spjdstatic void g_mirror_register_request(struct bio *bp);
111156610Spjdstatic void g_mirror_sync_release(struct g_mirror_softc *sc);
112132904Spjd
113132904Spjd
114132904Spjdstatic const char *
115132904Spjdg_mirror_disk_state2str(int state)
116132904Spjd{
117132904Spjd
118132904Spjd	switch (state) {
119132904Spjd	case G_MIRROR_DISK_STATE_NONE:
120132904Spjd		return ("NONE");
121132904Spjd	case G_MIRROR_DISK_STATE_NEW:
122132904Spjd		return ("NEW");
123132904Spjd	case G_MIRROR_DISK_STATE_ACTIVE:
124132904Spjd		return ("ACTIVE");
125132904Spjd	case G_MIRROR_DISK_STATE_STALE:
126132904Spjd		return ("STALE");
127132904Spjd	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
128132904Spjd		return ("SYNCHRONIZING");
129132904Spjd	case G_MIRROR_DISK_STATE_DISCONNECTED:
130132904Spjd		return ("DISCONNECTED");
131132904Spjd	case G_MIRROR_DISK_STATE_DESTROY:
132132904Spjd		return ("DESTROY");
133132904Spjd	default:
134132904Spjd		return ("INVALID");
135132904Spjd	}
136132904Spjd}
137132904Spjd
138132904Spjdstatic const char *
139132904Spjdg_mirror_device_state2str(int state)
140132904Spjd{
141132904Spjd
142132904Spjd	switch (state) {
143132904Spjd	case G_MIRROR_DEVICE_STATE_STARTING:
144132904Spjd		return ("STARTING");
145132904Spjd	case G_MIRROR_DEVICE_STATE_RUNNING:
146132904Spjd		return ("RUNNING");
147132904Spjd	default:
148132904Spjd		return ("INVALID");
149132904Spjd	}
150132904Spjd}
151132904Spjd
152132904Spjdstatic const char *
153132904Spjdg_mirror_get_diskname(struct g_mirror_disk *disk)
154132904Spjd{
155132904Spjd
156132904Spjd	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
157132904Spjd		return ("[unknown]");
158132904Spjd	return (disk->d_name);
159132904Spjd}
160132904Spjd
161132904Spjd/*
162132904Spjd * --- Events handling functions ---
163132904Spjd * Events in geom_mirror are used to maintain disks and device status
164132904Spjd * from one thread to simplify locking.
165132904Spjd */
166132904Spjdstatic void
167132904Spjdg_mirror_event_free(struct g_mirror_event *ep)
168132904Spjd{
169132904Spjd
170132904Spjd	free(ep, M_MIRROR);
171132904Spjd}
172132904Spjd
173132904Spjdint
174132904Spjdg_mirror_event_send(void *arg, int state, int flags)
175132904Spjd{
176132904Spjd	struct g_mirror_softc *sc;
177132904Spjd	struct g_mirror_disk *disk;
178132904Spjd	struct g_mirror_event *ep;
179132904Spjd	int error;
180132904Spjd
181132904Spjd	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
182132904Spjd	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
183132904Spjd	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
184132904Spjd		disk = NULL;
185132904Spjd		sc = arg;
186132904Spjd	} else {
187132904Spjd		disk = arg;
188132904Spjd		sc = disk->d_softc;
189132904Spjd	}
190132904Spjd	ep->e_disk = disk;
191132904Spjd	ep->e_state = state;
192132904Spjd	ep->e_flags = flags;
193132904Spjd	ep->e_error = 0;
194132904Spjd	mtx_lock(&sc->sc_events_mtx);
195132904Spjd	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
196132904Spjd	mtx_unlock(&sc->sc_events_mtx);
197132904Spjd	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
198132904Spjd	mtx_lock(&sc->sc_queue_mtx);
199132904Spjd	wakeup(sc);
200132904Spjd	mtx_unlock(&sc->sc_queue_mtx);
201132904Spjd	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
202132904Spjd		return (0);
203156610Spjd	sx_assert(&sc->sc_lock, SX_XLOCKED);
204132904Spjd	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
205156610Spjd	sx_xunlock(&sc->sc_lock);
206132904Spjd	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
207132904Spjd		mtx_lock(&sc->sc_events_mtx);
208132904Spjd		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
209132904Spjd		    hz * 5);
210132904Spjd	}
211132904Spjd	error = ep->e_error;
212132904Spjd	g_mirror_event_free(ep);
213156610Spjd	sx_xlock(&sc->sc_lock);
214132904Spjd	return (error);
215132904Spjd}
216132904Spjd
217132904Spjdstatic struct g_mirror_event *
218132904Spjdg_mirror_event_get(struct g_mirror_softc *sc)
219132904Spjd{
220132904Spjd	struct g_mirror_event *ep;
221132904Spjd
222132904Spjd	mtx_lock(&sc->sc_events_mtx);
223132904Spjd	ep = TAILQ_FIRST(&sc->sc_events);
224132904Spjd	mtx_unlock(&sc->sc_events_mtx);
225132904Spjd	return (ep);
226132904Spjd}
227132904Spjd
228132904Spjdstatic void
229139140Spjdg_mirror_event_remove(struct g_mirror_softc *sc, struct g_mirror_event *ep)
230139140Spjd{
231139140Spjd
232139140Spjd	mtx_lock(&sc->sc_events_mtx);
233139140Spjd	TAILQ_REMOVE(&sc->sc_events, ep, e_next);
234139140Spjd	mtx_unlock(&sc->sc_events_mtx);
235139140Spjd}
236139140Spjd
237139140Spjdstatic void
238132904Spjdg_mirror_event_cancel(struct g_mirror_disk *disk)
239132904Spjd{
240132904Spjd	struct g_mirror_softc *sc;
241132904Spjd	struct g_mirror_event *ep, *tmpep;
242132904Spjd
243156610Spjd	sc = disk->d_softc;
244156610Spjd	sx_assert(&sc->sc_lock, SX_XLOCKED);
245132904Spjd
246132904Spjd	mtx_lock(&sc->sc_events_mtx);
247132904Spjd	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
248132904Spjd		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
249132904Spjd			continue;
250132904Spjd		if (ep->e_disk != disk)
251132904Spjd			continue;
252132904Spjd		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
253132904Spjd		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
254132904Spjd			g_mirror_event_free(ep);
255132904Spjd		else {
256132904Spjd			ep->e_error = ECANCELED;
257132904Spjd			wakeup(ep);
258132904Spjd		}
259132904Spjd	}
260132904Spjd	mtx_unlock(&sc->sc_events_mtx);
261132904Spjd}
262132904Spjd
263132904Spjd/*
264132904Spjd * Return the number of disks in given state.
265132904Spjd * If state is equal to -1, count all connected disks.
266132904Spjd */
267132904Spjdu_int
268132904Spjdg_mirror_ndisks(struct g_mirror_softc *sc, int state)
269132904Spjd{
270132904Spjd	struct g_mirror_disk *disk;
271132904Spjd	u_int n = 0;
272132904Spjd
273156610Spjd	sx_assert(&sc->sc_lock, SX_LOCKED);
274156610Spjd
275132904Spjd	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
276132904Spjd		if (state == -1 || disk->d_state == state)
277132904Spjd			n++;
278132904Spjd	}
279132904Spjd	return (n);
280132904Spjd}
281132904Spjd
282132904Spjd/*
283132904Spjd * Find a disk in mirror by its disk ID.
284132904Spjd */
285132904Spjdstatic struct g_mirror_disk *
286132904Spjdg_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
287132904Spjd{
288132904Spjd	struct g_mirror_disk *disk;
289132904Spjd
290156610Spjd	sx_assert(&sc->sc_lock, SX_XLOCKED);
291132904Spjd
292132904Spjd	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
293132904Spjd		if (disk->d_id == id)
294132904Spjd			return (disk);
295132904Spjd	}
296132904Spjd	return (NULL);
297132904Spjd}
298132904Spjd
299132904Spjdstatic u_int
300132904Spjdg_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
301132904Spjd{
302132904Spjd	struct bio *bp;
303132904Spjd	u_int nreqs = 0;
304132904Spjd
305132904Spjd	mtx_lock(&sc->sc_queue_mtx);
306132904Spjd	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
307132904Spjd		if (bp->bio_from == cp)
308132904Spjd			nreqs++;
309132904Spjd	}
310132904Spjd	mtx_unlock(&sc->sc_queue_mtx);
311132904Spjd	return (nreqs);
312132904Spjd}
313132904Spjd
314133484Spjdstatic int
315133484Spjdg_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
316132904Spjd{
317132904Spjd
318137248Spjd	if (cp->index > 0) {
319132904Spjd		G_MIRROR_DEBUG(2,
320132904Spjd		    "I/O requests for %s exist, can't destroy it now.",
321132904Spjd		    cp->provider->name);
322133484Spjd		return (1);
323132904Spjd	}
324132904Spjd	if (g_mirror_nrequests(sc, cp) > 0) {
325132904Spjd		G_MIRROR_DEBUG(2,
326132904Spjd		    "I/O requests for %s in queue, can't destroy it now.",
327132904Spjd		    cp->provider->name);
328133484Spjd		return (1);
329133484Spjd	}
330133484Spjd	return (0);
331133484Spjd}
332133484Spjd
333133484Spjdstatic void
334139053Spjdg_mirror_destroy_consumer(void *arg, int flags __unused)
335139051Spjd{
336139051Spjd	struct g_consumer *cp;
337139051Spjd
338156610Spjd	g_topology_assert();
339156610Spjd
340139051Spjd	cp = arg;
341139051Spjd	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
342139051Spjd	g_detach(cp);
343139051Spjd	g_destroy_consumer(cp);
344139051Spjd}
345139051Spjd
346139051Spjdstatic void
347133484Spjdg_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
348133484Spjd{
349139051Spjd	struct g_provider *pp;
350139051Spjd	int retaste_wait;
351133484Spjd
352133484Spjd	g_topology_assert();
353133484Spjd
354133484Spjd	cp->private = NULL;
355133484Spjd	if (g_mirror_is_busy(sc, cp))
356132904Spjd		return;
357139051Spjd	pp = cp->provider;
358139051Spjd	retaste_wait = 0;
359139051Spjd	if (cp->acw == 1) {
360139051Spjd		if ((pp->geom->flags & G_GEOM_WITHER) == 0)
361139051Spjd			retaste_wait = 1;
362139051Spjd	}
363139051Spjd	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr,
364139051Spjd	    -cp->acw, -cp->ace, 0);
365139053Spjd	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
366139053Spjd		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
367139051Spjd	if (retaste_wait) {
368139051Spjd		/*
369139051Spjd		 * After retaste event was send (inside g_access()), we can send
370139051Spjd		 * event to detach and destroy consumer.
371139051Spjd		 * A class, which has consumer to the given provider connected
372139051Spjd		 * will not receive retaste event for the provider.
373139051Spjd		 * This is the way how I ignore retaste events when I close
374139051Spjd		 * consumers opened for write: I detach and destroy consumer
375139051Spjd		 * after retaste event is sent.
376139051Spjd		 */
377139051Spjd		g_post_event(g_mirror_destroy_consumer, cp, M_WAITOK, NULL);
378139051Spjd		return;
379139051Spjd	}
380139051Spjd	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", pp->name);
381132904Spjd	g_detach(cp);
382132904Spjd	g_destroy_consumer(cp);
383132904Spjd}
384132904Spjd
385132904Spjdstatic int
386132904Spjdg_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
387132904Spjd{
388144143Spjd	struct g_consumer *cp;
389132904Spjd	int error;
390132904Spjd
391156610Spjd	g_topology_assert_not();
392132904Spjd	KASSERT(disk->d_consumer == NULL,
393132904Spjd	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
394132904Spjd
395156610Spjd	g_topology_lock();
396144143Spjd	cp = g_new_consumer(disk->d_softc->sc_geom);
397144143Spjd	error = g_attach(cp, pp);
398144143Spjd	if (error != 0) {
399144143Spjd		g_destroy_consumer(cp);
400156610Spjd		g_topology_unlock();
401132904Spjd		return (error);
402144143Spjd	}
403144143Spjd	error = g_access(cp, 1, 1, 1);
404139051Spjd	if (error != 0) {
405144143Spjd		g_detach(cp);
406144143Spjd		g_destroy_consumer(cp);
407156610Spjd		g_topology_unlock();
408139051Spjd		G_MIRROR_DEBUG(0, "Cannot open consumer %s (error=%d).",
409139051Spjd		    pp->name, error);
410139051Spjd		return (error);
411139051Spjd	}
412156610Spjd	g_topology_unlock();
413144143Spjd	disk->d_consumer = cp;
414144143Spjd	disk->d_consumer->private = disk;
415144143Spjd	disk->d_consumer->index = 0;
416139051Spjd
417132904Spjd	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
418132904Spjd	return (0);
419132904Spjd}
420132904Spjd
421132904Spjdstatic void
422133484Spjdg_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
423132904Spjd{
424132904Spjd
425132904Spjd	g_topology_assert();
426132904Spjd
427132904Spjd	if (cp == NULL)
428132904Spjd		return;
429139051Spjd	if (cp->provider != NULL)
430133484Spjd		g_mirror_kill_consumer(sc, cp);
431139051Spjd	else
432132904Spjd		g_destroy_consumer(cp);
433132904Spjd}
434132904Spjd
435132904Spjd/*
436132904Spjd * Initialize disk. This means allocate memory, create consumer, attach it
437132904Spjd * to the provider and open access (r1w1e1) to it.
438132904Spjd */
439132904Spjdstatic struct g_mirror_disk *
440132904Spjdg_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
441132904Spjd    struct g_mirror_metadata *md, int *errorp)
442132904Spjd{
443132904Spjd	struct g_mirror_disk *disk;
444238500Sglebius	int i, error;
445132904Spjd
446132904Spjd	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
447132904Spjd	if (disk == NULL) {
448132904Spjd		error = ENOMEM;
449132904Spjd		goto fail;
450132904Spjd	}
451132904Spjd	disk->d_softc = sc;
452132904Spjd	error = g_mirror_connect_disk(disk, pp);
453132904Spjd	if (error != 0)
454132904Spjd		goto fail;
455132904Spjd	disk->d_id = md->md_did;
456132904Spjd	disk->d_state = G_MIRROR_DISK_STATE_NONE;
457132904Spjd	disk->d_priority = md->md_priority;
458132904Spjd	disk->d_flags = md->md_dflags;
459238500Sglebius	error = g_getattr("GEOM::candelete", disk->d_consumer, &i);
460246241Savg	if (error == 0 && i != 0)
461238500Sglebius		disk->d_flags |= G_MIRROR_DISK_FLAG_CANDELETE;
462133373Spjd	if (md->md_provider[0] != '\0')
463133373Spjd		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
464132904Spjd	disk->d_sync.ds_consumer = NULL;
465132904Spjd	disk->d_sync.ds_offset = md->md_sync_offset;
466132904Spjd	disk->d_sync.ds_offset_done = md->md_sync_offset;
467139213Spjd	disk->d_genid = md->md_genid;
468132904Spjd	disk->d_sync.ds_syncid = md->md_syncid;
469132904Spjd	if (errorp != NULL)
470132904Spjd		*errorp = 0;
471132904Spjd	return (disk);
472132904Spjdfail:
473132904Spjd	if (errorp != NULL)
474132904Spjd		*errorp = error;
475144143Spjd	if (disk != NULL)
476132904Spjd		free(disk, M_MIRROR);
477132904Spjd	return (NULL);
478132904Spjd}
479132904Spjd
480132904Spjdstatic void
481132904Spjdg_mirror_destroy_disk(struct g_mirror_disk *disk)
482132904Spjd{
483132904Spjd	struct g_mirror_softc *sc;
484132904Spjd
485156610Spjd	g_topology_assert_not();
486156610Spjd	sc = disk->d_softc;
487156610Spjd	sx_assert(&sc->sc_lock, SX_XLOCKED);
488132904Spjd
489132904Spjd	LIST_REMOVE(disk, d_next);
490132904Spjd	g_mirror_event_cancel(disk);
491132904Spjd	if (sc->sc_hint == disk)
492132904Spjd		sc->sc_hint = NULL;
493132904Spjd	switch (disk->d_state) {
494132904Spjd	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
495132904Spjd		g_mirror_sync_stop(disk, 1);
496132904Spjd		/* FALLTHROUGH */
497132904Spjd	case G_MIRROR_DISK_STATE_NEW:
498132904Spjd	case G_MIRROR_DISK_STATE_STALE:
499132904Spjd	case G_MIRROR_DISK_STATE_ACTIVE:
500156610Spjd		g_topology_lock();
501133484Spjd		g_mirror_disconnect_consumer(sc, disk->d_consumer);
502156610Spjd		g_topology_unlock();
503133114Spjd		free(disk, M_MIRROR);
504132904Spjd		break;
505132904Spjd	default:
506132904Spjd		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
507132904Spjd		    g_mirror_get_diskname(disk),
508132904Spjd		    g_mirror_disk_state2str(disk->d_state)));
509132904Spjd	}
510132904Spjd}
511132904Spjd
512132904Spjdstatic void
513132904Spjdg_mirror_destroy_device(struct g_mirror_softc *sc)
514132904Spjd{
515132904Spjd	struct g_mirror_disk *disk;
516132904Spjd	struct g_mirror_event *ep;
517132904Spjd	struct g_geom *gp;
518133484Spjd	struct g_consumer *cp, *tmpcp;
519132904Spjd
520156610Spjd	g_topology_assert_not();
521156610Spjd	sx_assert(&sc->sc_lock, SX_XLOCKED);
522132904Spjd
523132904Spjd	gp = sc->sc_geom;
524132904Spjd	if (sc->sc_provider != NULL)
525132904Spjd		g_mirror_destroy_provider(sc);
526132904Spjd	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
527132904Spjd	    disk = LIST_FIRST(&sc->sc_disks)) {
528137254Spjd		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
529137254Spjd		g_mirror_update_metadata(disk);
530132904Spjd		g_mirror_destroy_disk(disk);
531132904Spjd	}
532132904Spjd	while ((ep = g_mirror_event_get(sc)) != NULL) {
533139140Spjd		g_mirror_event_remove(sc, ep);
534132904Spjd		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
535132904Spjd			g_mirror_event_free(ep);
536132904Spjd		else {
537132904Spjd			ep->e_error = ECANCELED;
538132904Spjd			ep->e_flags |= G_MIRROR_EVENT_DONE;
539132904Spjd			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
540132904Spjd			mtx_lock(&sc->sc_events_mtx);
541132904Spjd			wakeup(ep);
542132904Spjd			mtx_unlock(&sc->sc_events_mtx);
543132904Spjd		}
544132904Spjd	}
545132904Spjd	callout_drain(&sc->sc_callout);
546133484Spjd
547156610Spjd	g_topology_lock();
548133484Spjd	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
549133484Spjd		g_mirror_disconnect_consumer(sc, cp);
550132922Spjd	}
551133484Spjd	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
552156610Spjd	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
553156610Spjd	g_wither_geom(gp, ENXIO);
554156610Spjd	g_topology_unlock();
555132904Spjd	mtx_destroy(&sc->sc_queue_mtx);
556132904Spjd	mtx_destroy(&sc->sc_events_mtx);
557156610Spjd	sx_xunlock(&sc->sc_lock);
558156610Spjd	sx_destroy(&sc->sc_lock);
559132904Spjd}
560132904Spjd
561132904Spjdstatic void
562132904Spjdg_mirror_orphan(struct g_consumer *cp)
563132904Spjd{
564132904Spjd	struct g_mirror_disk *disk;
565132904Spjd
566132904Spjd	g_topology_assert();
567132904Spjd
568132904Spjd	disk = cp->private;
569132904Spjd	if (disk == NULL)
570132904Spjd		return;
571139670Spjd	disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
572132904Spjd	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
573132904Spjd	    G_MIRROR_EVENT_DONTWAIT);
574132904Spjd}
575132904Spjd
576132904Spjd/*
577132904Spjd * Function should return the next active disk on the list.
578132904Spjd * It is possible that it will be the same disk as given.
579132904Spjd * If there are no active disks on list, NULL is returned.
580132904Spjd */
581132904Spjdstatic __inline struct g_mirror_disk *
582132904Spjdg_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
583132904Spjd{
584132904Spjd	struct g_mirror_disk *dp;
585132904Spjd
586132904Spjd	for (dp = LIST_NEXT(disk, d_next); dp != disk;
587132904Spjd	    dp = LIST_NEXT(dp, d_next)) {
588132904Spjd		if (dp == NULL)
589132904Spjd			dp = LIST_FIRST(&sc->sc_disks);
590132904Spjd		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
591132904Spjd			break;
592132904Spjd	}
593132904Spjd	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
594132904Spjd		return (NULL);
595132904Spjd	return (dp);
596132904Spjd}
597132904Spjd
598132904Spjdstatic struct g_mirror_disk *
599132904Spjdg_mirror_get_disk(struct g_mirror_softc *sc)
600132904Spjd{
601132904Spjd	struct g_mirror_disk *disk;
602132904Spjd
603132904Spjd	if (sc->sc_hint == NULL) {
604132904Spjd		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
605132904Spjd		if (sc->sc_hint == NULL)
606132904Spjd			return (NULL);
607132904Spjd	}
608132904Spjd	disk = sc->sc_hint;
609132904Spjd	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
610132904Spjd		disk = g_mirror_find_next(sc, disk);
611132904Spjd		if (disk == NULL)
612132904Spjd			return (NULL);
613132904Spjd	}
614132904Spjd	sc->sc_hint = g_mirror_find_next(sc, disk);
615132904Spjd	return (disk);
616132904Spjd}
617132904Spjd
618132904Spjdstatic int
619133752Spjdg_mirror_write_metadata(struct g_mirror_disk *disk,
620133752Spjd    struct g_mirror_metadata *md)
621132904Spjd{
622132904Spjd	struct g_mirror_softc *sc;
623132904Spjd	struct g_consumer *cp;
624132904Spjd	off_t offset, length;
625132904Spjd	u_char *sector;
626139051Spjd	int error = 0;
627132904Spjd
628156610Spjd	g_topology_assert_not();
629156610Spjd	sc = disk->d_softc;
630156610Spjd	sx_assert(&sc->sc_lock, SX_LOCKED);
631132904Spjd
632132904Spjd	cp = disk->d_consumer;
633132904Spjd	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
634132904Spjd	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
635156610Spjd	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
636139051Spjd	    ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
637139051Spjd	    cp->acw, cp->ace));
638132904Spjd	length = cp->provider->sectorsize;
639132904Spjd	offset = cp->provider->mediasize - length;
640132904Spjd	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
641260507Sae	if (md != NULL &&
642260507Sae	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0)
643139051Spjd		mirror_metadata_encode(md, sector);
644139051Spjd	error = g_write_data(cp, offset, sector, length);
645132904Spjd	free(sector, M_MIRROR);
646132904Spjd	if (error != 0) {
647155545Spjd		if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
648155545Spjd			disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
649155545Spjd			G_MIRROR_DEBUG(0, "Cannot write metadata on %s "
650155545Spjd			    "(device=%s, error=%d).",
651155545Spjd			    g_mirror_get_diskname(disk), sc->sc_name, error);
652155545Spjd		} else {
653155545Spjd			G_MIRROR_DEBUG(1, "Cannot write metadata on %s "
654155545Spjd			    "(device=%s, error=%d).",
655155545Spjd			    g_mirror_get_diskname(disk), sc->sc_name, error);
656155545Spjd		}
657155545Spjd		if (g_mirror_disconnect_on_failure &&
658155545Spjd		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
659155545Spjd			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
660155545Spjd			g_mirror_event_send(disk,
661155545Spjd			    G_MIRROR_DISK_STATE_DISCONNECTED,
662155545Spjd			    G_MIRROR_EVENT_DONTWAIT);
663155545Spjd		}
664132904Spjd	}
665133752Spjd	return (error);
666132904Spjd}
667132904Spjd
668133752Spjdstatic int
669133752Spjdg_mirror_clear_metadata(struct g_mirror_disk *disk)
670133752Spjd{
671133752Spjd	int error;
672133752Spjd
673156610Spjd	g_topology_assert_not();
674156610Spjd	sx_assert(&disk->d_softc->sc_lock, SX_LOCKED);
675156610Spjd
676133752Spjd	error = g_mirror_write_metadata(disk, NULL);
677133752Spjd	if (error == 0) {
678133752Spjd		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
679133752Spjd		    g_mirror_get_diskname(disk));
680133752Spjd	} else {
681133752Spjd		G_MIRROR_DEBUG(0,
682133752Spjd		    "Cannot clear metadata on disk %s (error=%d).",
683133752Spjd		    g_mirror_get_diskname(disk), error);
684133752Spjd	}
685133752Spjd	return (error);
686133752Spjd}
687133752Spjd
688132904Spjdvoid
689132904Spjdg_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
690132904Spjd    struct g_mirror_metadata *md)
691132904Spjd{
692132904Spjd
693132904Spjd	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
694132904Spjd	md->md_version = G_MIRROR_VERSION;
695132904Spjd	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
696132904Spjd	md->md_mid = sc->sc_id;
697132904Spjd	md->md_all = sc->sc_ndisks;
698132904Spjd	md->md_slice = sc->sc_slice;
699132904Spjd	md->md_balance = sc->sc_balance;
700139213Spjd	md->md_genid = sc->sc_genid;
701132904Spjd	md->md_mediasize = sc->sc_mediasize;
702132904Spjd	md->md_sectorsize = sc->sc_sectorsize;
703132904Spjd	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
704133373Spjd	bzero(md->md_provider, sizeof(md->md_provider));
705132904Spjd	if (disk == NULL) {
706132904Spjd		md->md_did = arc4random();
707132904Spjd		md->md_priority = 0;
708132904Spjd		md->md_syncid = 0;
709132904Spjd		md->md_dflags = 0;
710132904Spjd		md->md_sync_offset = 0;
711142727Spjd		md->md_provsize = 0;
712132904Spjd	} else {
713132904Spjd		md->md_did = disk->d_id;
714132904Spjd		md->md_priority = disk->d_priority;
715132904Spjd		md->md_syncid = disk->d_sync.ds_syncid;
716132904Spjd		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
717132904Spjd		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
718132904Spjd			md->md_sync_offset = disk->d_sync.ds_offset_done;
719132904Spjd		else
720132904Spjd			md->md_sync_offset = 0;
721133373Spjd		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
722133373Spjd			strlcpy(md->md_provider,
723133373Spjd			    disk->d_consumer->provider->name,
724133373Spjd			    sizeof(md->md_provider));
725133373Spjd		}
726142727Spjd		md->md_provsize = disk->d_consumer->provider->mediasize;
727132904Spjd	}
728132904Spjd}
729132904Spjd
730132904Spjdvoid
731132904Spjdg_mirror_update_metadata(struct g_mirror_disk *disk)
732132904Spjd{
733156610Spjd	struct g_mirror_softc *sc;
734132904Spjd	struct g_mirror_metadata md;
735133752Spjd	int error;
736132904Spjd
737156610Spjd	g_topology_assert_not();
738156610Spjd	sc = disk->d_softc;
739156610Spjd	sx_assert(&sc->sc_lock, SX_LOCKED);
740156610Spjd
741260507Sae	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0)
742260507Sae		g_mirror_fill_metadata(sc, disk, &md);
743133752Spjd	error = g_mirror_write_metadata(disk, &md);
744133752Spjd	if (error == 0) {
745133752Spjd		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
746133752Spjd		    g_mirror_get_diskname(disk));
747132904Spjd	} else {
748132904Spjd		G_MIRROR_DEBUG(0,
749132904Spjd		    "Cannot update metadata on disk %s (error=%d).",
750132904Spjd		    g_mirror_get_diskname(disk), error);
751132904Spjd	}
752132904Spjd}
753132904Spjd
754132904Spjdstatic void
755139051Spjdg_mirror_bump_syncid(struct g_mirror_softc *sc)
756132904Spjd{
757132904Spjd	struct g_mirror_disk *disk;
758132904Spjd
759156610Spjd	g_topology_assert_not();
760156610Spjd	sx_assert(&sc->sc_lock, SX_XLOCKED);
761132904Spjd	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
762132904Spjd	    ("%s called with no active disks (device=%s).", __func__,
763132904Spjd	    sc->sc_name));
764132904Spjd
765132904Spjd	sc->sc_syncid++;
766132954Spjd	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
767132954Spjd	    sc->sc_syncid);
768132904Spjd	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
769132904Spjd		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
770132904Spjd		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
771132904Spjd			disk->d_sync.ds_syncid = sc->sc_syncid;
772132904Spjd			g_mirror_update_metadata(disk);
773132904Spjd		}
774132904Spjd	}
775132904Spjd}
776132904Spjd
777137248Spjdstatic void
778139213Spjdg_mirror_bump_genid(struct g_mirror_softc *sc)
779139213Spjd{
780139213Spjd	struct g_mirror_disk *disk;
781139213Spjd
782156610Spjd	g_topology_assert_not();
783156610Spjd	sx_assert(&sc->sc_lock, SX_XLOCKED);
784139213Spjd	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
785139213Spjd	    ("%s called with no active disks (device=%s).", __func__,
786139213Spjd	    sc->sc_name));
787139213Spjd
788139213Spjd	sc->sc_genid++;
789139213Spjd	G_MIRROR_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name,
790139213Spjd	    sc->sc_genid);
791139213Spjd	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
792139213Spjd		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
793139213Spjd		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
794139246Spjd			disk->d_genid = sc->sc_genid;
795139213Spjd			g_mirror_update_metadata(disk);
796139213Spjd		}
797139213Spjd	}
798139213Spjd}
799139213Spjd
800155539Spjdstatic int
801156610Spjdg_mirror_idle(struct g_mirror_softc *sc, int acw)
802137248Spjd{
803137248Spjd	struct g_mirror_disk *disk;
804155539Spjd	int timeout;
805137248Spjd
806156610Spjd	g_topology_assert_not();
807156610Spjd	sx_assert(&sc->sc_lock, SX_XLOCKED);
808156610Spjd
809155539Spjd	if (sc->sc_provider == NULL)
810155539Spjd		return (0);
811163888Spjd	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
812163888Spjd		return (0);
813155539Spjd	if (sc->sc_idle)
814155539Spjd		return (0);
815155539Spjd	if (sc->sc_writes > 0)
816155539Spjd		return (0);
817156610Spjd	if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) {
818155581Spjd		timeout = g_mirror_idletime - (time_uptime - sc->sc_last_write);
819246076Smav		if (!g_mirror_shutdown && timeout > 0)
820155539Spjd			return (timeout);
821155539Spjd	}
822137248Spjd	sc->sc_idle = 1;
823137248Spjd	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
824137248Spjd		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
825137248Spjd			continue;
826137248Spjd		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
827137248Spjd		    g_mirror_get_diskname(disk), sc->sc_name);
828137248Spjd		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
829137248Spjd		g_mirror_update_metadata(disk);
830137248Spjd	}
831155539Spjd	return (0);
832137248Spjd}
833137248Spjd
834137248Spjdstatic void
835137248Spjdg_mirror_unidle(struct g_mirror_softc *sc)
836137248Spjd{
837137248Spjd	struct g_mirror_disk *disk;
838137248Spjd
839156610Spjd	g_topology_assert_not();
840156610Spjd	sx_assert(&sc->sc_lock, SX_XLOCKED);
841156610Spjd
842163888Spjd	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
843163888Spjd		return;
844137248Spjd	sc->sc_idle = 0;
845155581Spjd	sc->sc_last_write = time_uptime;
846137248Spjd	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
847137248Spjd		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
848137248Spjd			continue;
849137248Spjd		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
850137248Spjd		    g_mirror_get_diskname(disk), sc->sc_name);
851137248Spjd		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
852137248Spjd		g_mirror_update_metadata(disk);
853137248Spjd	}
854137248Spjd}
855137248Spjd
856132904Spjdstatic void
857132904Spjdg_mirror_done(struct bio *bp)
858132904Spjd{
859132904Spjd	struct g_mirror_softc *sc;
860132904Spjd
861132904Spjd	sc = bp->bio_from->geom->softc;
862162282Spjd	bp->bio_cflags = G_MIRROR_BIO_FLAG_REGULAR;
863132904Spjd	mtx_lock(&sc->sc_queue_mtx);
864132904Spjd	bioq_disksort(&sc->sc_queue, bp);
865201566Smav	mtx_unlock(&sc->sc_queue_mtx);
866132904Spjd	wakeup(sc);
867132904Spjd}
868132904Spjd
869132904Spjdstatic void
870132904Spjdg_mirror_regular_request(struct bio *bp)
871132904Spjd{
872132904Spjd	struct g_mirror_softc *sc;
873132904Spjd	struct g_mirror_disk *disk;
874132904Spjd	struct bio *pbp;
875132904Spjd
876132904Spjd	g_topology_assert_not();
877132904Spjd
878132904Spjd	pbp = bp->bio_parent;
879132904Spjd	sc = pbp->bio_to->geom->softc;
880155539Spjd	bp->bio_from->index--;
881155539Spjd	if (bp->bio_cmd == BIO_WRITE)
882155539Spjd		sc->sc_writes--;
883132904Spjd	disk = bp->bio_from->private;
884132904Spjd	if (disk == NULL) {
885132904Spjd		g_topology_lock();
886132904Spjd		g_mirror_kill_consumer(sc, bp->bio_from);
887132904Spjd		g_topology_unlock();
888132904Spjd	}
889132904Spjd
890132904Spjd	pbp->bio_inbed++;
891132904Spjd	KASSERT(pbp->bio_inbed <= pbp->bio_children,
892132904Spjd	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
893132904Spjd	    pbp->bio_children));
894132904Spjd	if (bp->bio_error == 0 && pbp->bio_error == 0) {
895132904Spjd		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
896132904Spjd		g_destroy_bio(bp);
897132904Spjd		if (pbp->bio_children == pbp->bio_inbed) {
898132904Spjd			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
899132904Spjd			pbp->bio_completed = pbp->bio_length;
900238500Sglebius			if (pbp->bio_cmd == BIO_WRITE ||
901238500Sglebius			    pbp->bio_cmd == BIO_DELETE) {
902156610Spjd				bioq_remove(&sc->sc_inflight, pbp);
903156610Spjd				/* Release delayed sync requests if possible. */
904156610Spjd				g_mirror_sync_release(sc);
905156610Spjd			}
906132904Spjd			g_io_deliver(pbp, pbp->bio_error);
907132904Spjd		}
908132904Spjd		return;
909132904Spjd	} else if (bp->bio_error != 0) {
910132904Spjd		if (pbp->bio_error == 0)
911132904Spjd			pbp->bio_error = bp->bio_error;
912132904Spjd		if (disk != NULL) {
913155545Spjd			if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
914155545Spjd				disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
915155545Spjd				G_MIRROR_LOGREQ(0, bp,
916155545Spjd				    "Request failed (error=%d).",
917155545Spjd				    bp->bio_error);
918155545Spjd			} else {
919155545Spjd				G_MIRROR_LOGREQ(1, bp,
920155545Spjd				    "Request failed (error=%d).",
921155545Spjd				    bp->bio_error);
922155545Spjd			}
923155545Spjd			if (g_mirror_disconnect_on_failure &&
924155545Spjd			    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1)
925155545Spjd			{
926155545Spjd				sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
927155545Spjd				g_mirror_event_send(disk,
928155545Spjd				    G_MIRROR_DISK_STATE_DISCONNECTED,
929155545Spjd				    G_MIRROR_EVENT_DONTWAIT);
930155545Spjd			}
931132904Spjd		}
932132904Spjd		switch (pbp->bio_cmd) {
933132904Spjd		case BIO_DELETE:
934132904Spjd		case BIO_WRITE:
935132904Spjd			pbp->bio_inbed--;
936132904Spjd			pbp->bio_children--;
937132904Spjd			break;
938132904Spjd		}
939132904Spjd	}
940132904Spjd	g_destroy_bio(bp);
941132904Spjd
942132904Spjd	switch (pbp->bio_cmd) {
943132904Spjd	case BIO_READ:
944155545Spjd		if (pbp->bio_inbed < pbp->bio_children)
945155545Spjd			break;
946155545Spjd		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 1)
947155545Spjd			g_io_deliver(pbp, pbp->bio_error);
948155545Spjd		else {
949132904Spjd			pbp->bio_error = 0;
950132904Spjd			mtx_lock(&sc->sc_queue_mtx);
951132904Spjd			bioq_disksort(&sc->sc_queue, pbp);
952201566Smav			mtx_unlock(&sc->sc_queue_mtx);
953132904Spjd			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
954132904Spjd			wakeup(sc);
955132904Spjd		}
956132904Spjd		break;
957132904Spjd	case BIO_DELETE:
958132904Spjd	case BIO_WRITE:
959132904Spjd		if (pbp->bio_children == 0) {
960132904Spjd			/*
961132904Spjd			 * All requests failed.
962132904Spjd			 */
963132904Spjd		} else if (pbp->bio_inbed < pbp->bio_children) {
964132904Spjd			/* Do nothing. */
965132904Spjd			break;
966132904Spjd		} else if (pbp->bio_children == pbp->bio_inbed) {
967132904Spjd			/* Some requests succeeded. */
968132904Spjd			pbp->bio_error = 0;
969132904Spjd			pbp->bio_completed = pbp->bio_length;
970132904Spjd		}
971156610Spjd		bioq_remove(&sc->sc_inflight, pbp);
972156610Spjd		/* Release delayed sync requests if possible. */
973156610Spjd		g_mirror_sync_release(sc);
974132904Spjd		g_io_deliver(pbp, pbp->bio_error);
975132904Spjd		break;
976132904Spjd	default:
977132904Spjd		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
978132904Spjd		break;
979132904Spjd	}
980132904Spjd}
981132904Spjd
982132904Spjdstatic void
983132904Spjdg_mirror_sync_done(struct bio *bp)
984132904Spjd{
985132904Spjd	struct g_mirror_softc *sc;
986132904Spjd
987132904Spjd	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
988132904Spjd	sc = bp->bio_from->geom->softc;
989162282Spjd	bp->bio_cflags = G_MIRROR_BIO_FLAG_SYNC;
990132904Spjd	mtx_lock(&sc->sc_queue_mtx);
991132904Spjd	bioq_disksort(&sc->sc_queue, bp);
992201566Smav	mtx_unlock(&sc->sc_queue_mtx);
993132904Spjd	wakeup(sc);
994132904Spjd}
995132904Spjd
996132904Spjdstatic void
997156421Spjdg_mirror_kernel_dump(struct bio *bp)
998156421Spjd{
999156421Spjd	struct g_mirror_softc *sc;
1000156421Spjd	struct g_mirror_disk *disk;
1001156421Spjd	struct bio *cbp;
1002156421Spjd	struct g_kerneldump *gkd;
1003156421Spjd
1004156421Spjd	/*
1005156421Spjd	 * We configure dumping to the first component, because this component
1006156421Spjd	 * will be used for reading with 'prefer' balance algorithm.
1007156421Spjd	 * If the component with the higest priority is currently disconnected
1008156421Spjd	 * we will not be able to read the dump after the reboot if it will be
1009156421Spjd	 * connected and synchronized later. Can we do something better?
1010156421Spjd	 */
1011156421Spjd	sc = bp->bio_to->geom->softc;
1012156421Spjd	disk = LIST_FIRST(&sc->sc_disks);
1013156421Spjd
1014156421Spjd	gkd = (struct g_kerneldump *)bp->bio_data;
1015156421Spjd	if (gkd->length > bp->bio_to->mediasize)
1016156421Spjd		gkd->length = bp->bio_to->mediasize;
1017156421Spjd	cbp = g_clone_bio(bp);
1018156421Spjd	if (cbp == NULL) {
1019156421Spjd		g_io_deliver(bp, ENOMEM);
1020156421Spjd		return;
1021156421Spjd	}
1022156421Spjd	cbp->bio_done = g_std_done;
1023156421Spjd	g_io_request(cbp, disk->d_consumer);
1024156421Spjd	G_MIRROR_DEBUG(1, "Kernel dump will go to %s.",
1025156421Spjd	    g_mirror_get_diskname(disk));
1026156421Spjd}
1027156421Spjd
1028156421Spjdstatic void
1029163836Spjdg_mirror_flush(struct g_mirror_softc *sc, struct bio *bp)
1030163836Spjd{
1031163836Spjd	struct bio_queue_head queue;
1032163836Spjd	struct g_mirror_disk *disk;
1033163836Spjd	struct g_consumer *cp;
1034163836Spjd	struct bio *cbp;
1035163836Spjd
1036163836Spjd	bioq_init(&queue);
1037163836Spjd	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1038163836Spjd		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1039163836Spjd			continue;
1040163836Spjd		cbp = g_clone_bio(bp);
1041163836Spjd		if (cbp == NULL) {
1042163836Spjd			for (cbp = bioq_first(&queue); cbp != NULL;
1043163836Spjd			    cbp = bioq_first(&queue)) {
1044163836Spjd				bioq_remove(&queue, cbp);
1045163836Spjd				g_destroy_bio(cbp);
1046163836Spjd			}
1047163836Spjd			if (bp->bio_error == 0)
1048163836Spjd				bp->bio_error = ENOMEM;
1049163836Spjd			g_io_deliver(bp, bp->bio_error);
1050163836Spjd			return;
1051163836Spjd		}
1052163836Spjd		bioq_insert_tail(&queue, cbp);
1053163836Spjd		cbp->bio_done = g_std_done;
1054163836Spjd		cbp->bio_caller1 = disk;
1055163836Spjd		cbp->bio_to = disk->d_consumer->provider;
1056163836Spjd	}
1057163836Spjd	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
1058163836Spjd		bioq_remove(&queue, cbp);
1059163836Spjd		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1060163836Spjd		disk = cbp->bio_caller1;
1061163836Spjd		cbp->bio_caller1 = NULL;
1062163836Spjd		cp = disk->d_consumer;
1063163836Spjd		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1064163836Spjd		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1065163836Spjd		    cp->acr, cp->acw, cp->ace));
1066163836Spjd		g_io_request(cbp, disk->d_consumer);
1067163836Spjd	}
1068163836Spjd}
1069163836Spjd
1070163836Spjdstatic void
1071132904Spjdg_mirror_start(struct bio *bp)
1072132904Spjd{
1073132904Spjd	struct g_mirror_softc *sc;
1074132904Spjd
1075132904Spjd	sc = bp->bio_to->geom->softc;
1076132904Spjd	/*
1077132904Spjd	 * If sc == NULL or there are no valid disks, provider's error
1078132904Spjd	 * should be set and g_mirror_start() should not be called at all.
1079132904Spjd	 */
1080132904Spjd	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1081132904Spjd	    ("Provider's error should be set (error=%d)(mirror=%s).",
1082132904Spjd	    bp->bio_to->error, bp->bio_to->name));
1083132904Spjd	G_MIRROR_LOGREQ(3, bp, "Request received.");
1084132904Spjd
1085132904Spjd	switch (bp->bio_cmd) {
1086132904Spjd	case BIO_READ:
1087132904Spjd	case BIO_WRITE:
1088132904Spjd	case BIO_DELETE:
1089132904Spjd		break;
1090163836Spjd	case BIO_FLUSH:
1091163836Spjd		g_mirror_flush(sc, bp);
1092163836Spjd		return;
1093132904Spjd	case BIO_GETATTR:
1094238500Sglebius		if (g_handleattr_int(bp, "GEOM::candelete", 1))
1095238500Sglebius			return;
1096238500Sglebius		else if (strcmp("GEOM::kerneldump", bp->bio_attribute) == 0) {
1097156421Spjd			g_mirror_kernel_dump(bp);
1098156421Spjd			return;
1099156421Spjd		}
1100156421Spjd		/* FALLTHROUGH */
1101132904Spjd	default:
1102132904Spjd		g_io_deliver(bp, EOPNOTSUPP);
1103132904Spjd		return;
1104132904Spjd	}
1105132904Spjd	mtx_lock(&sc->sc_queue_mtx);
1106132904Spjd	bioq_disksort(&sc->sc_queue, bp);
1107201566Smav	mtx_unlock(&sc->sc_queue_mtx);
1108132904Spjd	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1109132904Spjd	wakeup(sc);
1110132904Spjd}
1111132904Spjd
1112132904Spjd/*
1113156610Spjd * Return TRUE if the given request is colliding with a in-progress
1114156610Spjd * synchronization request.
1115132904Spjd */
1116156610Spjdstatic int
1117156610Spjdg_mirror_sync_collision(struct g_mirror_softc *sc, struct bio *bp)
1118132904Spjd{
1119156610Spjd	struct g_mirror_disk *disk;
1120156610Spjd	struct bio *sbp;
1121156610Spjd	off_t rstart, rend, sstart, send;
1122156610Spjd	int i;
1123156610Spjd
1124156610Spjd	if (sc->sc_sync.ds_ndisks == 0)
1125156610Spjd		return (0);
1126156610Spjd	rstart = bp->bio_offset;
1127156610Spjd	rend = bp->bio_offset + bp->bio_length;
1128156610Spjd	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1129156610Spjd		if (disk->d_state != G_MIRROR_DISK_STATE_SYNCHRONIZING)
1130156610Spjd			continue;
1131156610Spjd		for (i = 0; i < g_mirror_syncreqs; i++) {
1132156610Spjd			sbp = disk->d_sync.ds_bios[i];
1133156610Spjd			if (sbp == NULL)
1134156610Spjd				continue;
1135156610Spjd			sstart = sbp->bio_offset;
1136156610Spjd			send = sbp->bio_offset + sbp->bio_length;
1137156610Spjd			if (rend > sstart && rstart < send)
1138156610Spjd				return (1);
1139156610Spjd		}
1140156610Spjd	}
1141156610Spjd	return (0);
1142156610Spjd}
1143156610Spjd
1144156610Spjd/*
1145156610Spjd * Return TRUE if the given sync request is colliding with a in-progress regular
1146156610Spjd * request.
1147156610Spjd */
1148156610Spjdstatic int
1149156610Spjdg_mirror_regular_collision(struct g_mirror_softc *sc, struct bio *sbp)
1150156610Spjd{
1151156610Spjd	off_t rstart, rend, sstart, send;
1152132904Spjd	struct bio *bp;
1153132904Spjd
1154156610Spjd	if (sc->sc_sync.ds_ndisks == 0)
1155156610Spjd		return (0);
1156156610Spjd	sstart = sbp->bio_offset;
1157156610Spjd	send = sbp->bio_offset + sbp->bio_length;
1158156610Spjd	TAILQ_FOREACH(bp, &sc->sc_inflight.queue, bio_queue) {
1159156610Spjd		rstart = bp->bio_offset;
1160156610Spjd		rend = bp->bio_offset + bp->bio_length;
1161156610Spjd		if (rend > sstart && rstart < send)
1162156610Spjd			return (1);
1163156610Spjd	}
1164156610Spjd	return (0);
1165156610Spjd}
1166132904Spjd
1167156610Spjd/*
1168156610Spjd * Puts request onto delayed queue.
1169156610Spjd */
1170156610Spjdstatic void
1171156610Spjdg_mirror_regular_delay(struct g_mirror_softc *sc, struct bio *bp)
1172156610Spjd{
1173156610Spjd
1174156610Spjd	G_MIRROR_LOGREQ(2, bp, "Delaying request.");
1175156610Spjd	bioq_insert_head(&sc->sc_regular_delayed, bp);
1176156610Spjd}
1177156610Spjd
1178156610Spjd/*
1179156610Spjd * Puts synchronization request onto delayed queue.
1180156610Spjd */
1181156610Spjdstatic void
1182156610Spjdg_mirror_sync_delay(struct g_mirror_softc *sc, struct bio *bp)
1183156610Spjd{
1184156610Spjd
1185156610Spjd	G_MIRROR_LOGREQ(2, bp, "Delaying synchronization request.");
1186156610Spjd	bioq_insert_tail(&sc->sc_sync_delayed, bp);
1187156610Spjd}
1188156610Spjd
1189156610Spjd/*
1190156610Spjd * Releases delayed regular requests which don't collide anymore with sync
1191156610Spjd * requests.
1192156610Spjd */
1193156610Spjdstatic void
1194156610Spjdg_mirror_regular_release(struct g_mirror_softc *sc)
1195156610Spjd{
1196156610Spjd	struct bio *bp, *bp2;
1197156610Spjd
1198156610Spjd	TAILQ_FOREACH_SAFE(bp, &sc->sc_regular_delayed.queue, bio_queue, bp2) {
1199156610Spjd		if (g_mirror_sync_collision(sc, bp))
1200156610Spjd			continue;
1201156610Spjd		bioq_remove(&sc->sc_regular_delayed, bp);
1202156610Spjd		G_MIRROR_LOGREQ(2, bp, "Releasing delayed request (%p).", bp);
1203156610Spjd		mtx_lock(&sc->sc_queue_mtx);
1204156610Spjd		bioq_insert_head(&sc->sc_queue, bp);
1205156610Spjd#if 0
1206156610Spjd		/*
1207156610Spjd		 * wakeup() is not needed, because this function is called from
1208156610Spjd		 * the worker thread.
1209156610Spjd		 */
1210156610Spjd		wakeup(&sc->sc_queue);
1211156610Spjd#endif
1212156610Spjd		mtx_unlock(&sc->sc_queue_mtx);
1213132904Spjd	}
1214132904Spjd}
1215132904Spjd
1216156610Spjd/*
1217156610Spjd * Releases delayed sync requests which don't collide anymore with regular
1218156610Spjd * requests.
1219156610Spjd */
1220132904Spjdstatic void
1221156610Spjdg_mirror_sync_release(struct g_mirror_softc *sc)
1222156610Spjd{
1223156610Spjd	struct bio *bp, *bp2;
1224156610Spjd
1225156610Spjd	TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed.queue, bio_queue, bp2) {
1226156610Spjd		if (g_mirror_regular_collision(sc, bp))
1227156610Spjd			continue;
1228156610Spjd		bioq_remove(&sc->sc_sync_delayed, bp);
1229156610Spjd		G_MIRROR_LOGREQ(2, bp,
1230156610Spjd		    "Releasing delayed synchronization request.");
1231156610Spjd		g_io_request(bp, bp->bio_from);
1232156610Spjd	}
1233156610Spjd}
1234156610Spjd
1235156610Spjd/*
1236156610Spjd * Handle synchronization requests.
1237156610Spjd * Every synchronization request is two-steps process: first, READ request is
1238156610Spjd * send to active provider and then WRITE request (with read data) to the provider
1239156610Spjd * beeing synchronized. When WRITE is finished, new synchronization request is
1240156610Spjd * send.
1241156610Spjd */
1242156610Spjdstatic void
1243132904Spjdg_mirror_sync_request(struct bio *bp)
1244132904Spjd{
1245132904Spjd	struct g_mirror_softc *sc;
1246132904Spjd	struct g_mirror_disk *disk;
1247132904Spjd
1248137248Spjd	bp->bio_from->index--;
1249132904Spjd	sc = bp->bio_from->geom->softc;
1250132904Spjd	disk = bp->bio_from->private;
1251132904Spjd	if (disk == NULL) {
1252156610Spjd		sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
1253132904Spjd		g_topology_lock();
1254132904Spjd		g_mirror_kill_consumer(sc, bp->bio_from);
1255132904Spjd		g_topology_unlock();
1256156610Spjd		free(bp->bio_data, M_MIRROR);
1257132904Spjd		g_destroy_bio(bp);
1258156610Spjd		sx_xlock(&sc->sc_lock);
1259132904Spjd		return;
1260132904Spjd	}
1261132904Spjd
1262132904Spjd	/*
1263132904Spjd	 * Synchronization request.
1264132904Spjd	 */
1265132904Spjd	switch (bp->bio_cmd) {
1266132904Spjd	case BIO_READ:
1267132904Spjd	    {
1268132904Spjd		struct g_consumer *cp;
1269132904Spjd
1270132904Spjd		if (bp->bio_error != 0) {
1271132904Spjd			G_MIRROR_LOGREQ(0, bp,
1272132904Spjd			    "Synchronization request failed (error=%d).",
1273132904Spjd			    bp->bio_error);
1274132904Spjd			g_destroy_bio(bp);
1275132904Spjd			return;
1276132904Spjd		}
1277137248Spjd		G_MIRROR_LOGREQ(3, bp,
1278137248Spjd		    "Synchronization request half-finished.");
1279132904Spjd		bp->bio_cmd = BIO_WRITE;
1280133142Spjd		bp->bio_cflags = 0;
1281132904Spjd		cp = disk->d_consumer;
1282156610Spjd		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1283132904Spjd		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1284132904Spjd		    cp->acr, cp->acw, cp->ace));
1285137248Spjd		cp->index++;
1286132904Spjd		g_io_request(bp, cp);
1287132904Spjd		return;
1288132904Spjd	    }
1289132904Spjd	case BIO_WRITE:
1290135833Spjd	    {
1291135833Spjd		struct g_mirror_disk_sync *sync;
1292156610Spjd		off_t offset;
1293156610Spjd		void *data;
1294156610Spjd		int i;
1295135833Spjd
1296132904Spjd		if (bp->bio_error != 0) {
1297132904Spjd			G_MIRROR_LOGREQ(0, bp,
1298132904Spjd			    "Synchronization request failed (error=%d).",
1299132904Spjd			    bp->bio_error);
1300132904Spjd			g_destroy_bio(bp);
1301139670Spjd			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
1302132904Spjd			g_mirror_event_send(disk,
1303132904Spjd			    G_MIRROR_DISK_STATE_DISCONNECTED,
1304132904Spjd			    G_MIRROR_EVENT_DONTWAIT);
1305132904Spjd			return;
1306132904Spjd		}
1307132904Spjd		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
1308135833Spjd		sync = &disk->d_sync;
1309156610Spjd		if (sync->ds_offset == sc->sc_mediasize ||
1310156610Spjd		    sync->ds_consumer == NULL ||
1311156610Spjd		    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1312156610Spjd			/* Don't send more synchronization requests. */
1313156610Spjd			sync->ds_inflight--;
1314156610Spjd			if (sync->ds_bios != NULL) {
1315156684Sru				i = (int)(uintptr_t)bp->bio_caller1;
1316156610Spjd				sync->ds_bios[i] = NULL;
1317156610Spjd			}
1318156610Spjd			free(bp->bio_data, M_MIRROR);
1319156610Spjd			g_destroy_bio(bp);
1320156610Spjd			if (sync->ds_inflight > 0)
1321156610Spjd				return;
1322156610Spjd			if (sync->ds_consumer == NULL ||
1323156610Spjd			    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1324156610Spjd				return;
1325156610Spjd			}
1326156610Spjd			/* Disk up-to-date, activate it. */
1327132904Spjd			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
1328132904Spjd			    G_MIRROR_EVENT_DONTWAIT);
1329132904Spjd			return;
1330156610Spjd		}
1331156610Spjd
1332156610Spjd		/* Send next synchronization request. */
1333156610Spjd		data = bp->bio_data;
1334156610Spjd		bzero(bp, sizeof(*bp));
1335156610Spjd		bp->bio_cmd = BIO_READ;
1336156610Spjd		bp->bio_offset = sync->ds_offset;
1337156610Spjd		bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
1338156610Spjd		sync->ds_offset += bp->bio_length;
1339156610Spjd		bp->bio_done = g_mirror_sync_done;
1340156610Spjd		bp->bio_data = data;
1341156610Spjd		bp->bio_from = sync->ds_consumer;
1342156610Spjd		bp->bio_to = sc->sc_provider;
1343156610Spjd		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
1344156610Spjd		sync->ds_consumer->index++;
1345156610Spjd		/*
1346156610Spjd		 * Delay the request if it is colliding with a regular request.
1347156610Spjd		 */
1348156610Spjd		if (g_mirror_regular_collision(sc, bp))
1349156610Spjd			g_mirror_sync_delay(sc, bp);
1350156610Spjd		else
1351156610Spjd			g_io_request(bp, sync->ds_consumer);
1352156610Spjd
1353156610Spjd		/* Release delayed requests if possible. */
1354156610Spjd		g_mirror_regular_release(sc);
1355156610Spjd
1356156610Spjd		/* Find the smallest offset */
1357156610Spjd		offset = sc->sc_mediasize;
1358156610Spjd		for (i = 0; i < g_mirror_syncreqs; i++) {
1359156610Spjd			bp = sync->ds_bios[i];
1360156610Spjd			if (bp->bio_offset < offset)
1361156610Spjd				offset = bp->bio_offset;
1362156610Spjd		}
1363156610Spjd		if (sync->ds_offset_done + (MAXPHYS * 100) < offset) {
1364156610Spjd			/* Update offset_done on every 100 blocks. */
1365156610Spjd			sync->ds_offset_done = offset;
1366132904Spjd			g_mirror_update_metadata(disk);
1367132904Spjd		}
1368132904Spjd		return;
1369135833Spjd	    }
1370132904Spjd	default:
1371132904Spjd		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1372132904Spjd		    bp->bio_cmd, sc->sc_name));
1373132904Spjd		break;
1374132904Spjd	}
1375132904Spjd}
1376132904Spjd
1377132904Spjdstatic void
1378133115Spjdg_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
1379133115Spjd{
1380133115Spjd	struct g_mirror_disk *disk;
1381133115Spjd	struct g_consumer *cp;
1382133115Spjd	struct bio *cbp;
1383133115Spjd
1384133115Spjd	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1385133115Spjd		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
1386133115Spjd			break;
1387133115Spjd	}
1388133115Spjd	if (disk == NULL) {
1389133115Spjd		if (bp->bio_error == 0)
1390133115Spjd			bp->bio_error = ENXIO;
1391133115Spjd		g_io_deliver(bp, bp->bio_error);
1392133115Spjd		return;
1393133115Spjd	}
1394133115Spjd	cbp = g_clone_bio(bp);
1395133115Spjd	if (cbp == NULL) {
1396133115Spjd		if (bp->bio_error == 0)
1397133115Spjd			bp->bio_error = ENOMEM;
1398133115Spjd		g_io_deliver(bp, bp->bio_error);
1399133115Spjd		return;
1400133115Spjd	}
1401133115Spjd	/*
1402133115Spjd	 * Fill in the component buf structure.
1403133115Spjd	 */
1404133115Spjd	cp = disk->d_consumer;
1405133115Spjd	cbp->bio_done = g_mirror_done;
1406133115Spjd	cbp->bio_to = cp->provider;
1407133115Spjd	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1408156610Spjd	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1409133115Spjd	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1410133115Spjd	    cp->acw, cp->ace));
1411137248Spjd	cp->index++;
1412133115Spjd	g_io_request(cbp, cp);
1413133115Spjd}
1414133115Spjd
1415133115Spjdstatic void
1416132904Spjdg_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
1417132904Spjd{
1418132904Spjd	struct g_mirror_disk *disk;
1419132904Spjd	struct g_consumer *cp;
1420132904Spjd	struct bio *cbp;
1421132904Spjd
1422132904Spjd	disk = g_mirror_get_disk(sc);
1423132904Spjd	if (disk == NULL) {
1424132904Spjd		if (bp->bio_error == 0)
1425132904Spjd			bp->bio_error = ENXIO;
1426132904Spjd		g_io_deliver(bp, bp->bio_error);
1427132904Spjd		return;
1428132904Spjd	}
1429132904Spjd	cbp = g_clone_bio(bp);
1430132904Spjd	if (cbp == NULL) {
1431132904Spjd		if (bp->bio_error == 0)
1432132904Spjd			bp->bio_error = ENOMEM;
1433132904Spjd		g_io_deliver(bp, bp->bio_error);
1434132904Spjd		return;
1435132904Spjd	}
1436132904Spjd	/*
1437132904Spjd	 * Fill in the component buf structure.
1438132904Spjd	 */
1439132904Spjd	cp = disk->d_consumer;
1440132904Spjd	cbp->bio_done = g_mirror_done;
1441132904Spjd	cbp->bio_to = cp->provider;
1442132904Spjd	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1443156610Spjd	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1444132904Spjd	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1445132904Spjd	    cp->acw, cp->ace));
1446137248Spjd	cp->index++;
1447132904Spjd	g_io_request(cbp, cp);
1448132904Spjd}
1449132904Spjd
1450200086Smav#define TRACK_SIZE  (1 * 1024 * 1024)
1451200086Smav#define LOAD_SCALE	256
1452200086Smav#define ABS(x)		(((x) >= 0) ? (x) : (-(x)))
1453200086Smav
1454132904Spjdstatic void
1455132904Spjdg_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
1456132904Spjd{
1457132904Spjd	struct g_mirror_disk *disk, *dp;
1458132904Spjd	struct g_consumer *cp;
1459132904Spjd	struct bio *cbp;
1460200086Smav	int prio, best;
1461132904Spjd
1462200086Smav	/* Find a disk with the smallest load. */
1463132904Spjd	disk = NULL;
1464200086Smav	best = INT_MAX;
1465132904Spjd	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1466132904Spjd		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1467132904Spjd			continue;
1468200086Smav		prio = dp->load;
1469200086Smav		/* If disk head is precisely in position - highly prefer it. */
1470200086Smav		if (dp->d_last_offset == bp->bio_offset)
1471200086Smav			prio -= 2 * LOAD_SCALE;
1472200086Smav		else
1473200086Smav		/* If disk head is close to position - prefer it. */
1474200086Smav		if (ABS(dp->d_last_offset - bp->bio_offset) < TRACK_SIZE)
1475200086Smav			prio -= 1 * LOAD_SCALE;
1476200086Smav		if (prio <= best) {
1477132904Spjd			disk = dp;
1478200086Smav			best = prio;
1479132904Spjd		}
1480132904Spjd	}
1481146110Spjd	KASSERT(disk != NULL, ("NULL disk for %s.", sc->sc_name));
1482132904Spjd	cbp = g_clone_bio(bp);
1483132904Spjd	if (cbp == NULL) {
1484132904Spjd		if (bp->bio_error == 0)
1485132904Spjd			bp->bio_error = ENOMEM;
1486132904Spjd		g_io_deliver(bp, bp->bio_error);
1487132904Spjd		return;
1488132904Spjd	}
1489132904Spjd	/*
1490132904Spjd	 * Fill in the component buf structure.
1491132904Spjd	 */
1492132904Spjd	cp = disk->d_consumer;
1493132904Spjd	cbp->bio_done = g_mirror_done;
1494132904Spjd	cbp->bio_to = cp->provider;
1495132904Spjd	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1496156610Spjd	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1497132904Spjd	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1498132904Spjd	    cp->acw, cp->ace));
1499137248Spjd	cp->index++;
1500200086Smav	/* Remember last head position */
1501200086Smav	disk->d_last_offset = bp->bio_offset + bp->bio_length;
1502200086Smav	/* Update loads. */
1503200086Smav	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1504200086Smav		dp->load = (dp->d_consumer->index * LOAD_SCALE +
1505200086Smav		    dp->load * 7) / 8;
1506200086Smav	}
1507132904Spjd	g_io_request(cbp, cp);
1508132904Spjd}
1509132904Spjd
1510132904Spjdstatic void
1511132904Spjdg_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
1512132904Spjd{
1513132904Spjd	struct bio_queue_head queue;
1514132904Spjd	struct g_mirror_disk *disk;
1515132904Spjd	struct g_consumer *cp;
1516132904Spjd	struct bio *cbp;
1517132904Spjd	off_t left, mod, offset, slice;
1518132904Spjd	u_char *data;
1519132904Spjd	u_int ndisks;
1520132904Spjd
1521132904Spjd	if (bp->bio_length <= sc->sc_slice) {
1522132904Spjd		g_mirror_request_round_robin(sc, bp);
1523132904Spjd		return;
1524132904Spjd	}
1525132904Spjd	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
1526132904Spjd	slice = bp->bio_length / ndisks;
1527132904Spjd	mod = slice % sc->sc_provider->sectorsize;
1528132904Spjd	if (mod != 0)
1529132904Spjd		slice += sc->sc_provider->sectorsize - mod;
1530132904Spjd	/*
1531132904Spjd	 * Allocate all bios before sending any request, so we can
1532132904Spjd	 * return ENOMEM in nice and clean way.
1533132904Spjd	 */
1534132904Spjd	left = bp->bio_length;
1535132904Spjd	offset = bp->bio_offset;
1536132904Spjd	data = bp->bio_data;
1537132904Spjd	bioq_init(&queue);
1538132904Spjd	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1539132904Spjd		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1540132904Spjd			continue;
1541132904Spjd		cbp = g_clone_bio(bp);
1542132904Spjd		if (cbp == NULL) {
1543132904Spjd			for (cbp = bioq_first(&queue); cbp != NULL;
1544132904Spjd			    cbp = bioq_first(&queue)) {
1545132904Spjd				bioq_remove(&queue, cbp);
1546132904Spjd				g_destroy_bio(cbp);
1547132904Spjd			}
1548132904Spjd			if (bp->bio_error == 0)
1549132904Spjd				bp->bio_error = ENOMEM;
1550132904Spjd			g_io_deliver(bp, bp->bio_error);
1551132904Spjd			return;
1552132904Spjd		}
1553132904Spjd		bioq_insert_tail(&queue, cbp);
1554132904Spjd		cbp->bio_done = g_mirror_done;
1555132904Spjd		cbp->bio_caller1 = disk;
1556132904Spjd		cbp->bio_to = disk->d_consumer->provider;
1557132904Spjd		cbp->bio_offset = offset;
1558132904Spjd		cbp->bio_data = data;
1559132904Spjd		cbp->bio_length = MIN(left, slice);
1560132904Spjd		left -= cbp->bio_length;
1561132904Spjd		if (left == 0)
1562132904Spjd			break;
1563132904Spjd		offset += cbp->bio_length;
1564132904Spjd		data += cbp->bio_length;
1565132904Spjd	}
1566132904Spjd	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
1567132904Spjd		bioq_remove(&queue, cbp);
1568132904Spjd		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1569132904Spjd		disk = cbp->bio_caller1;
1570132904Spjd		cbp->bio_caller1 = NULL;
1571132904Spjd		cp = disk->d_consumer;
1572156610Spjd		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1573132904Spjd		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1574132904Spjd		    cp->acr, cp->acw, cp->ace));
1575137248Spjd		disk->d_consumer->index++;
1576132904Spjd		g_io_request(cbp, disk->d_consumer);
1577132904Spjd	}
1578132904Spjd}
1579132904Spjd
1580132904Spjdstatic void
1581132904Spjdg_mirror_register_request(struct bio *bp)
1582132904Spjd{
1583132904Spjd	struct g_mirror_softc *sc;
1584132904Spjd
1585132904Spjd	sc = bp->bio_to->geom->softc;
1586132904Spjd	switch (bp->bio_cmd) {
1587132904Spjd	case BIO_READ:
1588132904Spjd		switch (sc->sc_balance) {
1589133115Spjd		case G_MIRROR_BALANCE_LOAD:
1590133115Spjd			g_mirror_request_load(sc, bp);
1591133115Spjd			break;
1592133115Spjd		case G_MIRROR_BALANCE_PREFER:
1593133115Spjd			g_mirror_request_prefer(sc, bp);
1594133115Spjd			break;
1595132904Spjd		case G_MIRROR_BALANCE_ROUND_ROBIN:
1596132904Spjd			g_mirror_request_round_robin(sc, bp);
1597132904Spjd			break;
1598132904Spjd		case G_MIRROR_BALANCE_SPLIT:
1599132904Spjd			g_mirror_request_split(sc, bp);
1600132904Spjd			break;
1601132904Spjd		}
1602132904Spjd		return;
1603132904Spjd	case BIO_WRITE:
1604132904Spjd	case BIO_DELETE:
1605132904Spjd	    {
1606132904Spjd		struct g_mirror_disk *disk;
1607135833Spjd		struct g_mirror_disk_sync *sync;
1608132904Spjd		struct bio_queue_head queue;
1609132904Spjd		struct g_consumer *cp;
1610132904Spjd		struct bio *cbp;
1611132904Spjd
1612156610Spjd		/*
1613156610Spjd		 * Delay the request if it is colliding with a synchronization
1614156610Spjd		 * request.
1615156610Spjd		 */
1616156610Spjd		if (g_mirror_sync_collision(sc, bp)) {
1617156610Spjd			g_mirror_regular_delay(sc, bp);
1618156610Spjd			return;
1619156610Spjd		}
1620156610Spjd
1621137248Spjd		if (sc->sc_idle)
1622137248Spjd			g_mirror_unidle(sc);
1623155539Spjd		else
1624155581Spjd			sc->sc_last_write = time_uptime;
1625155539Spjd
1626132904Spjd		/*
1627132904Spjd		 * Allocate all bios before sending any request, so we can
1628132904Spjd		 * return ENOMEM in nice and clean way.
1629132904Spjd		 */
1630132904Spjd		bioq_init(&queue);
1631132904Spjd		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1632135833Spjd			sync = &disk->d_sync;
1633132904Spjd			switch (disk->d_state) {
1634132904Spjd			case G_MIRROR_DISK_STATE_ACTIVE:
1635132904Spjd				break;
1636132904Spjd			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1637135833Spjd				if (bp->bio_offset >= sync->ds_offset)
1638132904Spjd					continue;
1639132904Spjd				break;
1640132904Spjd			default:
1641132904Spjd				continue;
1642132904Spjd			}
1643238500Sglebius			if (bp->bio_cmd == BIO_DELETE &&
1644238500Sglebius			    (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE) == 0)
1645238500Sglebius				continue;
1646132904Spjd			cbp = g_clone_bio(bp);
1647132904Spjd			if (cbp == NULL) {
1648132904Spjd				for (cbp = bioq_first(&queue); cbp != NULL;
1649132904Spjd				    cbp = bioq_first(&queue)) {
1650132904Spjd					bioq_remove(&queue, cbp);
1651132904Spjd					g_destroy_bio(cbp);
1652132904Spjd				}
1653132904Spjd				if (bp->bio_error == 0)
1654132904Spjd					bp->bio_error = ENOMEM;
1655132904Spjd				g_io_deliver(bp, bp->bio_error);
1656132904Spjd				return;
1657132904Spjd			}
1658132904Spjd			bioq_insert_tail(&queue, cbp);
1659135831Spjd			cbp->bio_done = g_mirror_done;
1660132904Spjd			cp = disk->d_consumer;
1661135831Spjd			cbp->bio_caller1 = cp;
1662132904Spjd			cbp->bio_to = cp->provider;
1663156610Spjd			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1664132904Spjd			    ("Consumer %s not opened (r%dw%de%d).",
1665132904Spjd			    cp->provider->name, cp->acr, cp->acw, cp->ace));
1666135831Spjd		}
1667135831Spjd		for (cbp = bioq_first(&queue); cbp != NULL;
1668135831Spjd		    cbp = bioq_first(&queue)) {
1669135831Spjd			bioq_remove(&queue, cbp);
1670135831Spjd			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1671135831Spjd			cp = cbp->bio_caller1;
1672135831Spjd			cbp->bio_caller1 = NULL;
1673137248Spjd			cp->index++;
1674155539Spjd			sc->sc_writes++;
1675132904Spjd			g_io_request(cbp, cp);
1676132904Spjd		}
1677132904Spjd		/*
1678156610Spjd		 * Put request onto inflight queue, so we can check if new
1679156610Spjd		 * synchronization requests don't collide with it.
1680156610Spjd		 */
1681156610Spjd		bioq_insert_tail(&sc->sc_inflight, bp);
1682156610Spjd		/*
1683132904Spjd		 * Bump syncid on first write.
1684132904Spjd		 */
1685139670Spjd		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) {
1686139213Spjd			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
1687139051Spjd			g_mirror_bump_syncid(sc);
1688132904Spjd		}
1689132904Spjd		return;
1690132904Spjd	    }
1691132904Spjd	default:
1692132904Spjd		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1693132904Spjd		    bp->bio_cmd, sc->sc_name));
1694132904Spjd		break;
1695132904Spjd	}
1696132904Spjd}
1697132904Spjd
1698133484Spjdstatic int
1699133484Spjdg_mirror_can_destroy(struct g_mirror_softc *sc)
1700133484Spjd{
1701133484Spjd	struct g_geom *gp;
1702133484Spjd	struct g_consumer *cp;
1703133484Spjd
1704133484Spjd	g_topology_assert();
1705133484Spjd	gp = sc->sc_geom;
1706158112Spjd	if (gp->softc == NULL)
1707158112Spjd		return (1);
1708235968Sae	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_TASTING) != 0)
1709235968Sae		return (0);
1710133484Spjd	LIST_FOREACH(cp, &gp->consumer, consumer) {
1711133484Spjd		if (g_mirror_is_busy(sc, cp))
1712133484Spjd			return (0);
1713133484Spjd	}
1714133484Spjd	gp = sc->sc_sync.ds_geom;
1715133484Spjd	LIST_FOREACH(cp, &gp->consumer, consumer) {
1716133484Spjd		if (g_mirror_is_busy(sc, cp))
1717133484Spjd			return (0);
1718133484Spjd	}
1719133484Spjd	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
1720133484Spjd	    sc->sc_name);
1721133484Spjd	return (1);
1722133484Spjd}
1723133484Spjd
1724133484Spjdstatic int
1725133484Spjdg_mirror_try_destroy(struct g_mirror_softc *sc)
1726133484Spjd{
1727133484Spjd
1728146616Spjd	if (sc->sc_rootmount != NULL) {
1729146616Spjd		G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
1730146616Spjd		    sc->sc_rootmount);
1731146616Spjd		root_mount_rel(sc->sc_rootmount);
1732146616Spjd		sc->sc_rootmount = NULL;
1733146616Spjd	}
1734139213Spjd	g_topology_lock();
1735139213Spjd	if (!g_mirror_can_destroy(sc)) {
1736139213Spjd		g_topology_unlock();
1737139213Spjd		return (0);
1738139213Spjd	}
1739158112Spjd	sc->sc_geom->softc = NULL;
1740158112Spjd	sc->sc_sync.ds_geom->softc = NULL;
1741133484Spjd	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WAIT) != 0) {
1742133484Spjd		g_topology_unlock();
1743133484Spjd		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1744133484Spjd		    &sc->sc_worker);
1745156610Spjd		/* Unlock sc_lock here, as it can be destroyed after wakeup. */
1746156610Spjd		sx_xunlock(&sc->sc_lock);
1747133484Spjd		wakeup(&sc->sc_worker);
1748133484Spjd		sc->sc_worker = NULL;
1749133484Spjd	} else {
1750156610Spjd		g_topology_unlock();
1751133484Spjd		g_mirror_destroy_device(sc);
1752133484Spjd		free(sc, M_MIRROR);
1753133484Spjd	}
1754133484Spjd	return (1);
1755133484Spjd}
1756133484Spjd
1757132904Spjd/*
1758132904Spjd * Worker thread.
1759132904Spjd */
1760132904Spjdstatic void
1761132904Spjdg_mirror_worker(void *arg)
1762132904Spjd{
1763132904Spjd	struct g_mirror_softc *sc;
1764132904Spjd	struct g_mirror_event *ep;
1765132904Spjd	struct bio *bp;
1766155539Spjd	int timeout;
1767132904Spjd
1768132904Spjd	sc = arg;
1769170307Sjeff	thread_lock(curthread);
1770139451Sjhb	sched_prio(curthread, PRIBIO);
1771170307Sjeff	thread_unlock(curthread);
1772132904Spjd
1773156610Spjd	sx_xlock(&sc->sc_lock);
1774132904Spjd	for (;;) {
1775132904Spjd		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
1776132904Spjd		/*
1777132904Spjd		 * First take a look at events.
1778132904Spjd		 * This is important to handle events before any I/O requests.
1779132904Spjd		 */
1780132904Spjd		ep = g_mirror_event_get(sc);
1781156610Spjd		if (ep != NULL) {
1782139140Spjd			g_mirror_event_remove(sc, ep);
1783132904Spjd			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
1784132904Spjd				/* Update only device status. */
1785132904Spjd				G_MIRROR_DEBUG(3,
1786132904Spjd				    "Running event for device %s.",
1787132904Spjd				    sc->sc_name);
1788132904Spjd				ep->e_error = 0;
1789139051Spjd				g_mirror_update_device(sc, 1);
1790132904Spjd			} else {
1791132904Spjd				/* Update disk status. */
1792132904Spjd				G_MIRROR_DEBUG(3, "Running event for disk %s.",
1793132904Spjd				     g_mirror_get_diskname(ep->e_disk));
1794132904Spjd				ep->e_error = g_mirror_update_disk(ep->e_disk,
1795139051Spjd				    ep->e_state);
1796132904Spjd				if (ep->e_error == 0)
1797139051Spjd					g_mirror_update_device(sc, 0);
1798132904Spjd			}
1799132904Spjd			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
1800132904Spjd				KASSERT(ep->e_error == 0,
1801132904Spjd				    ("Error cannot be handled."));
1802132904Spjd				g_mirror_event_free(ep);
1803132904Spjd			} else {
1804132904Spjd				ep->e_flags |= G_MIRROR_EVENT_DONE;
1805132904Spjd				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1806132904Spjd				    ep);
1807132904Spjd				mtx_lock(&sc->sc_events_mtx);
1808132904Spjd				wakeup(ep);
1809132904Spjd				mtx_unlock(&sc->sc_events_mtx);
1810132904Spjd			}
1811132904Spjd			if ((sc->sc_flags &
1812132904Spjd			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1813156610Spjd				if (g_mirror_try_destroy(sc)) {
1814156610Spjd					curthread->td_pflags &= ~TDP_GEOM;
1815156610Spjd					G_MIRROR_DEBUG(1, "Thread exiting.");
1816172836Sjulian					kproc_exit(0);
1817156610Spjd				}
1818132904Spjd			}
1819132904Spjd			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
1820132904Spjd			continue;
1821132904Spjd		}
1822132904Spjd		/*
1823155539Spjd		 * Check if we can mark array as CLEAN and if we can't take
1824155539Spjd		 * how much seconds should we wait.
1825155539Spjd		 */
1826156610Spjd		timeout = g_mirror_idle(sc, -1);
1827155539Spjd		/*
1828132904Spjd		 * Now I/O requests.
1829132904Spjd		 */
1830132904Spjd		/* Get first request from the queue. */
1831132904Spjd		mtx_lock(&sc->sc_queue_mtx);
1832132904Spjd		bp = bioq_first(&sc->sc_queue);
1833132904Spjd		if (bp == NULL) {
1834132904Spjd			if ((sc->sc_flags &
1835132904Spjd			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1836132904Spjd				mtx_unlock(&sc->sc_queue_mtx);
1837156610Spjd				if (g_mirror_try_destroy(sc)) {
1838156610Spjd					curthread->td_pflags &= ~TDP_GEOM;
1839156610Spjd					G_MIRROR_DEBUG(1, "Thread exiting.");
1840172836Sjulian					kproc_exit(0);
1841156610Spjd				}
1842133484Spjd				mtx_lock(&sc->sc_queue_mtx);
1843132904Spjd			}
1844156610Spjd			sx_xunlock(&sc->sc_lock);
1845158116Spjd			/*
1846158116Spjd			 * XXX: We can miss an event here, because an event
1847158116Spjd			 *      can be added without sx-device-lock and without
1848158116Spjd			 *      mtx-queue-lock. Maybe I should just stop using
1849158116Spjd			 *      dedicated mutex for events synchronization and
1850158116Spjd			 *      stick with the queue lock?
1851158116Spjd			 *      The event will hang here until next I/O request
1852158116Spjd			 *      or next event is received.
1853158116Spjd			 */
1854155539Spjd			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1",
1855155539Spjd			    timeout * hz);
1856156610Spjd			sx_xlock(&sc->sc_lock);
1857155539Spjd			G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
1858132904Spjd			continue;
1859132904Spjd		}
1860132904Spjd		bioq_remove(&sc->sc_queue, bp);
1861132904Spjd		mtx_unlock(&sc->sc_queue_mtx);
1862132904Spjd
1863162282Spjd		if (bp->bio_from->geom == sc->sc_sync.ds_geom &&
1864162282Spjd		    (bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
1865162282Spjd			g_mirror_sync_request(bp);	/* READ */
1866162282Spjd		} else if (bp->bio_to != sc->sc_provider) {
1867161116Spjd			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0)
1868161116Spjd				g_mirror_regular_request(bp);
1869161116Spjd			else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
1870162282Spjd				g_mirror_sync_request(bp);	/* WRITE */
1871161116Spjd			else {
1872161116Spjd				KASSERT(0,
1873161116Spjd				    ("Invalid request cflags=0x%hhx to=%s.",
1874161116Spjd				    bp->bio_cflags, bp->bio_to->name));
1875161116Spjd			}
1876161116Spjd		} else {
1877132904Spjd			g_mirror_register_request(bp);
1878161116Spjd		}
1879139140Spjd		G_MIRROR_DEBUG(5, "%s: I'm here 9.", __func__);
1880132904Spjd	}
1881132904Spjd}
1882132904Spjd
1883132904Spjdstatic void
1884155539Spjdg_mirror_update_idle(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
1885132904Spjd{
1886132904Spjd
1887156610Spjd	sx_assert(&sc->sc_lock, SX_LOCKED);
1888156610Spjd
1889163888Spjd	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
1890163888Spjd		return;
1891155539Spjd	if (!sc->sc_idle && (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
1892155539Spjd		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
1893156610Spjd		    g_mirror_get_diskname(disk), sc->sc_name);
1894155539Spjd		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1895155539Spjd	} else if (sc->sc_idle &&
1896155539Spjd	    (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
1897155539Spjd		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
1898156610Spjd		    g_mirror_get_diskname(disk), sc->sc_name);
1899155539Spjd		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1900132904Spjd	}
1901132904Spjd}
1902132904Spjd
1903132904Spjdstatic void
1904132904Spjdg_mirror_sync_start(struct g_mirror_disk *disk)
1905132904Spjd{
1906132904Spjd	struct g_mirror_softc *sc;
1907156610Spjd	struct g_consumer *cp;
1908156610Spjd	struct bio *bp;
1909156610Spjd	int error, i;
1910132904Spjd
1911156610Spjd	g_topology_assert_not();
1912156610Spjd	sc = disk->d_softc;
1913156610Spjd	sx_assert(&sc->sc_lock, SX_LOCKED);
1914132904Spjd
1915156610Spjd	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
1916156610Spjd	    ("Disk %s is not marked for synchronization.",
1917156610Spjd	    g_mirror_get_diskname(disk)));
1918132904Spjd	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1919132904Spjd	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
1920132904Spjd	    sc->sc_state));
1921132904Spjd
1922156610Spjd	sx_xunlock(&sc->sc_lock);
1923156610Spjd	g_topology_lock();
1924156610Spjd	cp = g_new_consumer(sc->sc_sync.ds_geom);
1925156610Spjd	error = g_attach(cp, sc->sc_provider);
1926156610Spjd	KASSERT(error == 0,
1927156610Spjd	    ("Cannot attach to %s (error=%d).", sc->sc_name, error));
1928156610Spjd	error = g_access(cp, 1, 0, 0);
1929156610Spjd	KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error));
1930156610Spjd	g_topology_unlock();
1931156610Spjd	sx_xlock(&sc->sc_lock);
1932156610Spjd
1933132904Spjd	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
1934132904Spjd	    g_mirror_get_diskname(disk));
1935163888Spjd	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) == 0)
1936163888Spjd		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1937132904Spjd	KASSERT(disk->d_sync.ds_consumer == NULL,
1938132904Spjd	    ("Sync consumer already exists (device=%s, disk=%s).",
1939132904Spjd	    sc->sc_name, g_mirror_get_diskname(disk)));
1940156610Spjd
1941156610Spjd	disk->d_sync.ds_consumer = cp;
1942132904Spjd	disk->d_sync.ds_consumer->private = disk;
1943137248Spjd	disk->d_sync.ds_consumer->index = 0;
1944156610Spjd
1945156610Spjd	/*
1946156610Spjd	 * Allocate memory for synchronization bios and initialize them.
1947156610Spjd	 */
1948156610Spjd	disk->d_sync.ds_bios = malloc(sizeof(struct bio *) * g_mirror_syncreqs,
1949156610Spjd	    M_MIRROR, M_WAITOK);
1950156610Spjd	for (i = 0; i < g_mirror_syncreqs; i++) {
1951156610Spjd		bp = g_alloc_bio();
1952156610Spjd		disk->d_sync.ds_bios[i] = bp;
1953156610Spjd		bp->bio_parent = NULL;
1954156610Spjd		bp->bio_cmd = BIO_READ;
1955156610Spjd		bp->bio_data = malloc(MAXPHYS, M_MIRROR, M_WAITOK);
1956156610Spjd		bp->bio_cflags = 0;
1957156610Spjd		bp->bio_offset = disk->d_sync.ds_offset;
1958156610Spjd		bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
1959156610Spjd		disk->d_sync.ds_offset += bp->bio_length;
1960156610Spjd		bp->bio_done = g_mirror_sync_done;
1961156610Spjd		bp->bio_from = disk->d_sync.ds_consumer;
1962156610Spjd		bp->bio_to = sc->sc_provider;
1963156684Sru		bp->bio_caller1 = (void *)(uintptr_t)i;
1964156610Spjd	}
1965156610Spjd
1966156610Spjd	/* Increase the number of disks in SYNCHRONIZING state. */
1967132904Spjd	sc->sc_sync.ds_ndisks++;
1968156610Spjd	/* Set the number of in-flight synchronization requests. */
1969156610Spjd	disk->d_sync.ds_inflight = g_mirror_syncreqs;
1970156610Spjd
1971156610Spjd	/*
1972156610Spjd	 * Fire off first synchronization requests.
1973156610Spjd	 */
1974156610Spjd	for (i = 0; i < g_mirror_syncreqs; i++) {
1975156610Spjd		bp = disk->d_sync.ds_bios[i];
1976156610Spjd		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
1977156610Spjd		disk->d_sync.ds_consumer->index++;
1978156610Spjd		/*
1979156610Spjd		 * Delay the request if it is colliding with a regular request.
1980156610Spjd		 */
1981156610Spjd		if (g_mirror_regular_collision(sc, bp))
1982156610Spjd			g_mirror_sync_delay(sc, bp);
1983156610Spjd		else
1984156610Spjd			g_io_request(bp, disk->d_sync.ds_consumer);
1985156610Spjd	}
1986132904Spjd}
1987132904Spjd
1988132904Spjd/*
1989132904Spjd * Stop synchronization process.
1990132904Spjd * type: 0 - synchronization finished
1991132904Spjd *       1 - synchronization stopped
1992132904Spjd */
1993132904Spjdstatic void
1994132904Spjdg_mirror_sync_stop(struct g_mirror_disk *disk, int type)
1995132904Spjd{
1996156610Spjd	struct g_mirror_softc *sc;
1997156610Spjd	struct g_consumer *cp;
1998132904Spjd
1999156610Spjd	g_topology_assert_not();
2000156610Spjd	sc = disk->d_softc;
2001156610Spjd	sx_assert(&sc->sc_lock, SX_LOCKED);
2002156610Spjd
2003132904Spjd	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2004132904Spjd	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2005132904Spjd	    g_mirror_disk_state2str(disk->d_state)));
2006132904Spjd	if (disk->d_sync.ds_consumer == NULL)
2007132904Spjd		return;
2008132904Spjd
2009132904Spjd	if (type == 0) {
2010132904Spjd		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
2011156610Spjd		    sc->sc_name, g_mirror_get_diskname(disk));
2012132904Spjd	} else /* if (type == 1) */ {
2013132904Spjd		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
2014156610Spjd		    sc->sc_name, g_mirror_get_diskname(disk));
2015132904Spjd	}
2016156610Spjd	free(disk->d_sync.ds_bios, M_MIRROR);
2017156610Spjd	disk->d_sync.ds_bios = NULL;
2018156610Spjd	cp = disk->d_sync.ds_consumer;
2019132904Spjd	disk->d_sync.ds_consumer = NULL;
2020132904Spjd	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2021156610Spjd	sc->sc_sync.ds_ndisks--;
2022156610Spjd	sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
2023156610Spjd	g_topology_lock();
2024156610Spjd	g_mirror_kill_consumer(sc, cp);
2025156610Spjd	g_topology_unlock();
2026156610Spjd	sx_xlock(&sc->sc_lock);
2027132904Spjd}
2028132904Spjd
2029132904Spjdstatic void
2030132904Spjdg_mirror_launch_provider(struct g_mirror_softc *sc)
2031132904Spjd{
2032132904Spjd	struct g_mirror_disk *disk;
2033252063Sscottl	struct g_provider *pp, *dp;
2034132904Spjd
2035156610Spjd	sx_assert(&sc->sc_lock, SX_LOCKED);
2036132904Spjd
2037156610Spjd	g_topology_lock();
2038132904Spjd	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
2039132904Spjd	pp->mediasize = sc->sc_mediasize;
2040132904Spjd	pp->sectorsize = sc->sc_sectorsize;
2041200935Smav	pp->stripesize = 0;
2042200935Smav	pp->stripeoffset = 0;
2043252063Sscottl
2044252063Sscottl	/* Splitting of unmapped BIO's could work but isn't implemented now */
2045252063Sscottl	if (sc->sc_balance != G_MIRROR_BALANCE_SPLIT)
2046252063Sscottl		pp->flags |= G_PF_ACCEPT_UNMAPPED;
2047252063Sscottl
2048200935Smav	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2049252063Sscottl		if (disk->d_consumer && disk->d_consumer->provider) {
2050252063Sscottl			dp = disk->d_consumer->provider;
2051252063Sscottl			if (dp->stripesize > pp->stripesize) {
2052252063Sscottl				pp->stripesize = dp->stripesize;
2053252063Sscottl				pp->stripeoffset = dp->stripeoffset;
2054252063Sscottl			}
2055252063Sscottl			/* A provider underneath us doesn't support unmapped */
2056252063Sscottl			if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
2057265670Smav				G_MIRROR_DEBUG(0, "Cancelling unmapped "
2058265670Smav				    "because of %s.", dp->name);
2059252063Sscottl				pp->flags &= ~G_PF_ACCEPT_UNMAPPED;
2060252063Sscottl			}
2061200935Smav		}
2062200935Smav	}
2063132904Spjd	sc->sc_provider = pp;
2064132904Spjd	g_error_provider(pp, 0);
2065156610Spjd	g_topology_unlock();
2066162188Sjmg	G_MIRROR_DEBUG(0, "Device %s launched (%u/%u).", pp->name,
2067162188Sjmg	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE), sc->sc_ndisks);
2068132904Spjd	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2069132904Spjd		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2070132904Spjd			g_mirror_sync_start(disk);
2071132904Spjd	}
2072132904Spjd}
2073132904Spjd
2074132904Spjdstatic void
2075132904Spjdg_mirror_destroy_provider(struct g_mirror_softc *sc)
2076132904Spjd{
2077132904Spjd	struct g_mirror_disk *disk;
2078132904Spjd	struct bio *bp;
2079132904Spjd
2080156610Spjd	g_topology_assert_not();
2081132904Spjd	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
2082132904Spjd	    sc->sc_name));
2083132904Spjd
2084156610Spjd	g_topology_lock();
2085132904Spjd	g_error_provider(sc->sc_provider, ENXIO);
2086132904Spjd	mtx_lock(&sc->sc_queue_mtx);
2087132904Spjd	while ((bp = bioq_first(&sc->sc_queue)) != NULL) {
2088132904Spjd		bioq_remove(&sc->sc_queue, bp);
2089132904Spjd		g_io_deliver(bp, ENXIO);
2090132904Spjd	}
2091132904Spjd	mtx_unlock(&sc->sc_queue_mtx);
2092132904Spjd	G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name,
2093132904Spjd	    sc->sc_provider->name);
2094132904Spjd	sc->sc_provider->flags |= G_PF_WITHER;
2095132904Spjd	g_orphan_provider(sc->sc_provider, ENXIO);
2096156610Spjd	g_topology_unlock();
2097132904Spjd	sc->sc_provider = NULL;
2098132904Spjd	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2099132904Spjd		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2100132904Spjd			g_mirror_sync_stop(disk, 1);
2101132904Spjd	}
2102132904Spjd}
2103132904Spjd
2104132904Spjdstatic void
2105132904Spjdg_mirror_go(void *arg)
2106132904Spjd{
2107132904Spjd	struct g_mirror_softc *sc;
2108132904Spjd
2109132904Spjd	sc = arg;
2110132904Spjd	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
2111132904Spjd	g_mirror_event_send(sc, 0,
2112132904Spjd	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
2113132904Spjd}
2114132904Spjd
2115132904Spjdstatic u_int
2116132904Spjdg_mirror_determine_state(struct g_mirror_disk *disk)
2117132904Spjd{
2118132904Spjd	struct g_mirror_softc *sc;
2119132904Spjd	u_int state;
2120132904Spjd
2121132904Spjd	sc = disk->d_softc;
2122132904Spjd	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
2123132904Spjd		if ((disk->d_flags &
2124132904Spjd		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
2125132904Spjd			/* Disk does not need synchronization. */
2126132904Spjd			state = G_MIRROR_DISK_STATE_ACTIVE;
2127132904Spjd		} else {
2128132904Spjd			if ((sc->sc_flags &
2129156873Spjd			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2130132904Spjd			    (disk->d_flags &
2131132904Spjd			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2132132904Spjd				/*
2133132904Spjd				 * We can start synchronization from
2134132904Spjd				 * the stored offset.
2135132904Spjd				 */
2136132904Spjd				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2137132904Spjd			} else {
2138132904Spjd				state = G_MIRROR_DISK_STATE_STALE;
2139132904Spjd			}
2140132904Spjd		}
2141132904Spjd	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
2142132904Spjd		/*
2143132904Spjd		 * Reset all synchronization data for this disk,
2144132904Spjd		 * because if it even was synchronized, it was
2145132904Spjd		 * synchronized to disks with different syncid.
2146132904Spjd		 */
2147132904Spjd		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2148132904Spjd		disk->d_sync.ds_offset = 0;
2149132904Spjd		disk->d_sync.ds_offset_done = 0;
2150132904Spjd		disk->d_sync.ds_syncid = sc->sc_syncid;
2151132904Spjd		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2152132904Spjd		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2153132904Spjd			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2154132904Spjd		} else {
2155132904Spjd			state = G_MIRROR_DISK_STATE_STALE;
2156132904Spjd		}
2157132904Spjd	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
2158132904Spjd		/*
2159132904Spjd		 * Not good, NOT GOOD!
2160132904Spjd		 * It means that mirror was started on stale disks
2161132904Spjd		 * and more fresh disk just arrive.
2162160895Spjd		 * If there were writes, mirror is broken, sorry.
2163132904Spjd		 * I think the best choice here is don't touch
2164160964Syar		 * this disk and inform the user loudly.
2165132904Spjd		 */
2166132904Spjd		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
2167132904Spjd		    "disk (%s) arrives!! It will not be connected to the "
2168132904Spjd		    "running device.", sc->sc_name,
2169132904Spjd		    g_mirror_get_diskname(disk));
2170132904Spjd		g_mirror_destroy_disk(disk);
2171132904Spjd		state = G_MIRROR_DISK_STATE_NONE;
2172132904Spjd		/* Return immediately, because disk was destroyed. */
2173132904Spjd		return (state);
2174132904Spjd	}
2175132904Spjd	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
2176132904Spjd	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
2177132904Spjd	return (state);
2178132904Spjd}
2179132904Spjd
2180132904Spjd/*
2181132904Spjd * Update device state.
2182132904Spjd */
2183132904Spjdstatic void
2184139051Spjdg_mirror_update_device(struct g_mirror_softc *sc, boolean_t force)
2185132904Spjd{
2186132904Spjd	struct g_mirror_disk *disk;
2187132904Spjd	u_int state;
2188132904Spjd
2189156610Spjd	sx_assert(&sc->sc_lock, SX_XLOCKED);
2190132904Spjd
2191132904Spjd	switch (sc->sc_state) {
2192132904Spjd	case G_MIRROR_DEVICE_STATE_STARTING:
2193132904Spjd	    {
2194139213Spjd		struct g_mirror_disk *pdisk, *tdisk;
2195139213Spjd		u_int dirty, ndisks, genid, syncid;
2196132904Spjd
2197132904Spjd		KASSERT(sc->sc_provider == NULL,
2198132904Spjd		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
2199132904Spjd		/*
2200132904Spjd		 * Are we ready? We are, if all disks are connected or
2201132904Spjd		 * if we have any disks and 'force' is true.
2202132904Spjd		 */
2203156610Spjd		ndisks = g_mirror_ndisks(sc, -1);
2204157290Spjd		if (sc->sc_ndisks == ndisks || (force && ndisks > 0)) {
2205132904Spjd			;
2206156610Spjd		} else if (ndisks == 0) {
2207132904Spjd			/*
2208132904Spjd			 * Disks went down in starting phase, so destroy
2209132904Spjd			 * device.
2210132904Spjd			 */
2211146616Spjd			callout_drain(&sc->sc_callout);
2212146616Spjd			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2213146616Spjd			G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
2214146616Spjd			    sc->sc_rootmount);
2215145305Spjd			root_mount_rel(sc->sc_rootmount);
2216145305Spjd			sc->sc_rootmount = NULL;
2217132904Spjd			return;
2218132904Spjd		} else {
2219132904Spjd			return;
2220132904Spjd		}
2221132904Spjd
2222132904Spjd		/*
2223132904Spjd		 * Activate all disks with the biggest syncid.
2224132904Spjd		 */
2225132904Spjd		if (force) {
2226132904Spjd			/*
2227133079Spjd			 * If 'force' is true, we have been called due to
2228133079Spjd			 * timeout, so don't bother canceling timeout.
2229132904Spjd			 */
2230132904Spjd			ndisks = 0;
2231132904Spjd			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2232132904Spjd				if ((disk->d_flags &
2233132904Spjd				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
2234132904Spjd					ndisks++;
2235132904Spjd				}
2236132904Spjd			}
2237132904Spjd			if (ndisks == 0) {
2238132941Spjd				/* No valid disks found, destroy device. */
2239132941Spjd				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2240146616Spjd				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2241146616Spjd				    __LINE__, sc->sc_rootmount);
2242146616Spjd				root_mount_rel(sc->sc_rootmount);
2243146616Spjd				sc->sc_rootmount = NULL;
2244132904Spjd				return;
2245132904Spjd			}
2246132904Spjd		} else {
2247132904Spjd			/* Cancel timeout. */
2248132904Spjd			callout_drain(&sc->sc_callout);
2249132904Spjd		}
2250132904Spjd
2251132904Spjd		/*
2252139213Spjd		 * Find the biggest genid.
2253132904Spjd		 */
2254139213Spjd		genid = 0;
2255139213Spjd		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2256139213Spjd			if (disk->d_genid > genid)
2257139213Spjd				genid = disk->d_genid;
2258139213Spjd		}
2259139213Spjd		sc->sc_genid = genid;
2260139213Spjd		/*
2261139213Spjd		 * Remove all disks without the biggest genid.
2262139213Spjd		 */
2263139213Spjd		LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
2264139213Spjd			if (disk->d_genid < genid) {
2265139213Spjd				G_MIRROR_DEBUG(0,
2266139213Spjd				    "Component %s (device %s) broken, skipping.",
2267139213Spjd				    g_mirror_get_diskname(disk), sc->sc_name);
2268139213Spjd				g_mirror_destroy_disk(disk);
2269139213Spjd			}
2270139213Spjd		}
2271139213Spjd
2272139213Spjd		/*
2273139213Spjd		 * Find the biggest syncid.
2274139213Spjd		 */
2275132904Spjd		syncid = 0;
2276132904Spjd		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2277132904Spjd			if (disk->d_sync.ds_syncid > syncid)
2278132904Spjd				syncid = disk->d_sync.ds_syncid;
2279132904Spjd		}
2280132904Spjd
2281132904Spjd		/*
2282132904Spjd		 * Here we need to look for dirty disks and if all disks
2283132904Spjd		 * with the biggest syncid are dirty, we have to choose
2284132904Spjd		 * one with the biggest priority and rebuild the rest.
2285132904Spjd		 */
2286132904Spjd		/*
2287132904Spjd		 * Find the number of dirty disks with the biggest syncid.
2288132904Spjd		 * Find the number of disks with the biggest syncid.
2289132904Spjd		 * While here, find a disk with the biggest priority.
2290132904Spjd		 */
2291132904Spjd		dirty = ndisks = 0;
2292132904Spjd		pdisk = NULL;
2293132904Spjd		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2294132904Spjd			if (disk->d_sync.ds_syncid != syncid)
2295132904Spjd				continue;
2296132904Spjd			if ((disk->d_flags &
2297132904Spjd			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2298132904Spjd				continue;
2299132904Spjd			}
2300132904Spjd			ndisks++;
2301132904Spjd			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
2302132904Spjd				dirty++;
2303132904Spjd				if (pdisk == NULL ||
2304132904Spjd				    pdisk->d_priority < disk->d_priority) {
2305132904Spjd					pdisk = disk;
2306132904Spjd				}
2307132904Spjd			}
2308132904Spjd		}
2309132904Spjd		if (dirty == 0) {
2310132904Spjd			/* No dirty disks at all, great. */
2311132904Spjd		} else if (dirty == ndisks) {
2312132904Spjd			/*
2313132904Spjd			 * Force synchronization for all dirty disks except one
2314132904Spjd			 * with the biggest priority.
2315132904Spjd			 */
2316132904Spjd			KASSERT(pdisk != NULL, ("pdisk == NULL"));
2317132904Spjd			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
2318132904Spjd			    "master disk for synchronization.",
2319132904Spjd			    g_mirror_get_diskname(pdisk), sc->sc_name);
2320132904Spjd			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2321132904Spjd				if (disk->d_sync.ds_syncid != syncid)
2322132904Spjd					continue;
2323132904Spjd				if ((disk->d_flags &
2324132904Spjd				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2325132904Spjd					continue;
2326132904Spjd				}
2327132904Spjd				KASSERT((disk->d_flags &
2328132904Spjd				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
2329132904Spjd				    ("Disk %s isn't marked as dirty.",
2330132904Spjd				    g_mirror_get_diskname(disk)));
2331132904Spjd				/* Skip the disk with the biggest priority. */
2332132904Spjd				if (disk == pdisk)
2333132904Spjd					continue;
2334132904Spjd				disk->d_sync.ds_syncid = 0;
2335132904Spjd			}
2336132904Spjd		} else if (dirty < ndisks) {
2337132904Spjd			/*
2338132904Spjd			 * Force synchronization for all dirty disks.
2339132904Spjd			 * We have some non-dirty disks.
2340132904Spjd			 */
2341132904Spjd			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2342132904Spjd				if (disk->d_sync.ds_syncid != syncid)
2343132904Spjd					continue;
2344132904Spjd				if ((disk->d_flags &
2345132904Spjd				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2346132904Spjd					continue;
2347132904Spjd				}
2348132904Spjd				if ((disk->d_flags &
2349132904Spjd				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
2350132904Spjd					continue;
2351132904Spjd				}
2352132904Spjd				disk->d_sync.ds_syncid = 0;
2353132904Spjd			}
2354132904Spjd		}
2355132904Spjd
2356132904Spjd		/* Reset hint. */
2357132904Spjd		sc->sc_hint = NULL;
2358132904Spjd		sc->sc_syncid = syncid;
2359132904Spjd		if (force) {
2360132904Spjd			/* Remember to bump syncid on first write. */
2361139670Spjd			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2362132904Spjd		}
2363132904Spjd		state = G_MIRROR_DEVICE_STATE_RUNNING;
2364132904Spjd		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
2365132904Spjd		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
2366132904Spjd		    g_mirror_device_state2str(state));
2367132904Spjd		sc->sc_state = state;
2368132904Spjd		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2369132904Spjd			state = g_mirror_determine_state(disk);
2370132904Spjd			g_mirror_event_send(disk, state,
2371132904Spjd			    G_MIRROR_EVENT_DONTWAIT);
2372139213Spjd			if (state == G_MIRROR_DISK_STATE_STALE)
2373139670Spjd				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2374132904Spjd		}
2375132904Spjd		break;
2376132904Spjd	    }
2377132904Spjd	case G_MIRROR_DEVICE_STATE_RUNNING:
2378137248Spjd		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
2379132904Spjd		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2380132904Spjd			/*
2381137248Spjd			 * No active disks or no disks at all,
2382137248Spjd			 * so destroy device.
2383132904Spjd			 */
2384132904Spjd			if (sc->sc_provider != NULL)
2385132904Spjd				g_mirror_destroy_provider(sc);
2386137248Spjd			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2387133946Spjd			break;
2388132904Spjd		} else if (g_mirror_ndisks(sc,
2389132954Spjd		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
2390132954Spjd		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2391132904Spjd			/*
2392132904Spjd			 * We have active disks, launch provider if it doesn't
2393132904Spjd			 * exist.
2394132904Spjd			 */
2395132904Spjd			if (sc->sc_provider == NULL)
2396132904Spjd				g_mirror_launch_provider(sc);
2397146624Spjd			if (sc->sc_rootmount != NULL) {
2398146624Spjd				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2399146624Spjd				    __LINE__, sc->sc_rootmount);
2400146624Spjd				root_mount_rel(sc->sc_rootmount);
2401146624Spjd				sc->sc_rootmount = NULL;
2402146624Spjd			}
2403132904Spjd		}
2404133946Spjd		/*
2405139670Spjd		 * Genid should be bumped immediately, so do it here.
2406133946Spjd		 */
2407139670Spjd		if ((sc->sc_bump_id & G_MIRROR_BUMP_GENID) != 0) {
2408139213Spjd			sc->sc_bump_id &= ~G_MIRROR_BUMP_GENID;
2409139213Spjd			g_mirror_bump_genid(sc);
2410139213Spjd		}
2411132904Spjd		break;
2412132904Spjd	default:
2413132904Spjd		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
2414132904Spjd		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
2415132904Spjd		break;
2416132904Spjd	}
2417132904Spjd}
2418132904Spjd
2419132904Spjd/*
2420132904Spjd * Update disk state and device state if needed.
2421132904Spjd */
2422132904Spjd#define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
2423132904Spjd	"Disk %s state changed from %s to %s (device %s).",		\
2424132904Spjd	g_mirror_get_diskname(disk),					\
2425132904Spjd	g_mirror_disk_state2str(disk->d_state),				\
2426132904Spjd	g_mirror_disk_state2str(state), sc->sc_name)
2427132904Spjdstatic int
2428139051Spjdg_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
2429132904Spjd{
2430132904Spjd	struct g_mirror_softc *sc;
2431132904Spjd
2432156610Spjd	sc = disk->d_softc;
2433156610Spjd	sx_assert(&sc->sc_lock, SX_XLOCKED);
2434132904Spjd
2435132904Spjdagain:
2436132904Spjd	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
2437132904Spjd	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
2438132904Spjd	    g_mirror_disk_state2str(state));
2439132904Spjd	switch (state) {
2440132904Spjd	case G_MIRROR_DISK_STATE_NEW:
2441132904Spjd		/*
2442132904Spjd		 * Possible scenarios:
2443132904Spjd		 * 1. New disk arrive.
2444132904Spjd		 */
2445132904Spjd		/* Previous state should be NONE. */
2446132904Spjd		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
2447132904Spjd		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2448132904Spjd		    g_mirror_disk_state2str(disk->d_state)));
2449132904Spjd		DISK_STATE_CHANGED();
2450132904Spjd
2451132904Spjd		disk->d_state = state;
2452133115Spjd		if (LIST_EMPTY(&sc->sc_disks))
2453133115Spjd			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
2454133115Spjd		else {
2455133115Spjd			struct g_mirror_disk *dp;
2456133115Spjd
2457133115Spjd			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
2458133115Spjd				if (disk->d_priority >= dp->d_priority) {
2459133115Spjd					LIST_INSERT_BEFORE(dp, disk, d_next);
2460133115Spjd					dp = NULL;
2461133115Spjd					break;
2462133115Spjd				}
2463133115Spjd				if (LIST_NEXT(dp, d_next) == NULL)
2464133115Spjd					break;
2465133115Spjd			}
2466133115Spjd			if (dp != NULL)
2467133115Spjd				LIST_INSERT_AFTER(dp, disk, d_next);
2468133115Spjd		}
2469162188Sjmg		G_MIRROR_DEBUG(1, "Device %s: provider %s detected.",
2470132904Spjd		    sc->sc_name, g_mirror_get_diskname(disk));
2471132904Spjd		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2472132904Spjd			break;
2473132904Spjd		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2474132904Spjd		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2475132904Spjd		    g_mirror_device_state2str(sc->sc_state),
2476132904Spjd		    g_mirror_get_diskname(disk),
2477132904Spjd		    g_mirror_disk_state2str(disk->d_state)));
2478132904Spjd		state = g_mirror_determine_state(disk);
2479132904Spjd		if (state != G_MIRROR_DISK_STATE_NONE)
2480132904Spjd			goto again;
2481132904Spjd		break;
2482132904Spjd	case G_MIRROR_DISK_STATE_ACTIVE:
2483132904Spjd		/*
2484132904Spjd		 * Possible scenarios:
2485132904Spjd		 * 1. New disk does not need synchronization.
2486132904Spjd		 * 2. Synchronization process finished successfully.
2487132904Spjd		 */
2488132904Spjd		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2489132904Spjd		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2490132904Spjd		    g_mirror_device_state2str(sc->sc_state),
2491132904Spjd		    g_mirror_get_diskname(disk),
2492132904Spjd		    g_mirror_disk_state2str(disk->d_state)));
2493132904Spjd		/* Previous state should be NEW or SYNCHRONIZING. */
2494132904Spjd		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
2495132904Spjd		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2496132904Spjd		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2497132904Spjd		    g_mirror_disk_state2str(disk->d_state)));
2498132904Spjd		DISK_STATE_CHANGED();
2499132904Spjd
2500155582Spjd		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2501132904Spjd			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2502132904Spjd			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
2503132904Spjd			g_mirror_sync_stop(disk, 0);
2504132904Spjd		}
2505132904Spjd		disk->d_state = state;
2506132904Spjd		disk->d_sync.ds_offset = 0;
2507132904Spjd		disk->d_sync.ds_offset_done = 0;
2508155539Spjd		g_mirror_update_idle(sc, disk);
2509155582Spjd		g_mirror_update_metadata(disk);
2510162188Sjmg		G_MIRROR_DEBUG(1, "Device %s: provider %s activated.",
2511132904Spjd		    sc->sc_name, g_mirror_get_diskname(disk));
2512132904Spjd		break;
2513132904Spjd	case G_MIRROR_DISK_STATE_STALE:
2514132904Spjd		/*
2515132904Spjd		 * Possible scenarios:
2516132904Spjd		 * 1. Stale disk was connected.
2517132904Spjd		 */
2518132904Spjd		/* Previous state should be NEW. */
2519132904Spjd		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2520132904Spjd		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2521132904Spjd		    g_mirror_disk_state2str(disk->d_state)));
2522132904Spjd		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2523132904Spjd		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2524132904Spjd		    g_mirror_device_state2str(sc->sc_state),
2525132904Spjd		    g_mirror_get_diskname(disk),
2526132904Spjd		    g_mirror_disk_state2str(disk->d_state)));
2527132904Spjd		/*
2528132904Spjd		 * STALE state is only possible if device is marked
2529132904Spjd		 * NOAUTOSYNC.
2530132904Spjd		 */
2531132904Spjd		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
2532132904Spjd		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2533132904Spjd		    g_mirror_device_state2str(sc->sc_state),
2534132904Spjd		    g_mirror_get_diskname(disk),
2535132904Spjd		    g_mirror_disk_state2str(disk->d_state)));
2536132904Spjd		DISK_STATE_CHANGED();
2537132904Spjd
2538132904Spjd		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2539132904Spjd		disk->d_state = state;
2540132904Spjd		g_mirror_update_metadata(disk);
2541132904Spjd		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
2542132904Spjd		    sc->sc_name, g_mirror_get_diskname(disk));
2543132904Spjd		break;
2544132904Spjd	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
2545132904Spjd		/*
2546132904Spjd		 * Possible scenarios:
2547132904Spjd		 * 1. Disk which needs synchronization was connected.
2548132904Spjd		 */
2549132904Spjd		/* Previous state should be NEW. */
2550132904Spjd		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2551132904Spjd		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2552132904Spjd		    g_mirror_disk_state2str(disk->d_state)));
2553132904Spjd		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2554132904Spjd		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2555132904Spjd		    g_mirror_device_state2str(sc->sc_state),
2556132904Spjd		    g_mirror_get_diskname(disk),
2557132904Spjd		    g_mirror_disk_state2str(disk->d_state)));
2558132904Spjd		DISK_STATE_CHANGED();
2559132904Spjd
2560132904Spjd		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2561132904Spjd			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2562132904Spjd		disk->d_state = state;
2563132904Spjd		if (sc->sc_provider != NULL) {
2564132904Spjd			g_mirror_sync_start(disk);
2565132904Spjd			g_mirror_update_metadata(disk);
2566132904Spjd		}
2567132904Spjd		break;
2568132904Spjd	case G_MIRROR_DISK_STATE_DISCONNECTED:
2569132904Spjd		/*
2570132904Spjd		 * Possible scenarios:
2571132904Spjd		 * 1. Device wasn't running yet, but disk disappear.
2572132904Spjd		 * 2. Disk was active and disapppear.
2573132904Spjd		 * 3. Disk disappear during synchronization process.
2574132904Spjd		 */
2575132904Spjd		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
2576132904Spjd			/*
2577132904Spjd			 * Previous state should be ACTIVE, STALE or
2578132904Spjd			 * SYNCHRONIZING.
2579132904Spjd			 */
2580132904Spjd			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
2581132904Spjd			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
2582132904Spjd			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2583132904Spjd			    ("Wrong disk state (%s, %s).",
2584132904Spjd			    g_mirror_get_diskname(disk),
2585132904Spjd			    g_mirror_disk_state2str(disk->d_state)));
2586132904Spjd		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
2587132904Spjd			/* Previous state should be NEW. */
2588132904Spjd			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2589132904Spjd			    ("Wrong disk state (%s, %s).",
2590132904Spjd			    g_mirror_get_diskname(disk),
2591132904Spjd			    g_mirror_disk_state2str(disk->d_state)));
2592132904Spjd			/*
2593132904Spjd			 * Reset bumping syncid if disk disappeared in STARTING
2594132904Spjd			 * state.
2595132904Spjd			 */
2596139670Spjd			if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0)
2597139213Spjd				sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
2598132904Spjd#ifdef	INVARIANTS
2599132904Spjd		} else {
2600132904Spjd			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2601132904Spjd			    sc->sc_name,
2602132904Spjd			    g_mirror_device_state2str(sc->sc_state),
2603132904Spjd			    g_mirror_get_diskname(disk),
2604132904Spjd			    g_mirror_disk_state2str(disk->d_state)));
2605132904Spjd#endif
2606132904Spjd		}
2607132904Spjd		DISK_STATE_CHANGED();
2608132904Spjd		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
2609132904Spjd		    sc->sc_name, g_mirror_get_diskname(disk));
2610132904Spjd
2611132904Spjd		g_mirror_destroy_disk(disk);
2612132904Spjd		break;
2613132904Spjd	case G_MIRROR_DISK_STATE_DESTROY:
2614132904Spjd	    {
2615132904Spjd		int error;
2616132904Spjd
2617132904Spjd		error = g_mirror_clear_metadata(disk);
2618132904Spjd		if (error != 0)
2619132904Spjd			return (error);
2620132904Spjd		DISK_STATE_CHANGED();
2621132904Spjd		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
2622132904Spjd		    sc->sc_name, g_mirror_get_diskname(disk));
2623132904Spjd
2624132904Spjd		g_mirror_destroy_disk(disk);
2625132904Spjd		sc->sc_ndisks--;
2626132904Spjd		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2627132904Spjd			g_mirror_update_metadata(disk);
2628132904Spjd		}
2629132904Spjd		break;
2630132904Spjd	    }
2631132904Spjd	default:
2632132904Spjd		KASSERT(1 == 0, ("Unknown state (%u).", state));
2633132904Spjd		break;
2634132904Spjd	}
2635132904Spjd	return (0);
2636132904Spjd}
2637132904Spjd#undef	DISK_STATE_CHANGED
2638132904Spjd
2639139650Spjdint
2640132904Spjdg_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
2641132904Spjd{
2642132904Spjd	struct g_provider *pp;
2643132904Spjd	u_char *buf;
2644132904Spjd	int error;
2645132904Spjd
2646132904Spjd	g_topology_assert();
2647132904Spjd
2648132904Spjd	error = g_access(cp, 1, 0, 0);
2649132904Spjd	if (error != 0)
2650132904Spjd		return (error);
2651132904Spjd	pp = cp->provider;
2652132904Spjd	g_topology_unlock();
2653132904Spjd	/* Metadata are stored on last sector. */
2654132904Spjd	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2655132904Spjd	    &error);
2656132904Spjd	g_topology_lock();
2657139051Spjd	g_access(cp, -1, 0, 0);
2658152967Ssobomax	if (buf == NULL) {
2659139213Spjd		G_MIRROR_DEBUG(1, "Cannot read metadata from %s (error=%d).",
2660139213Spjd		    cp->provider->name, error);
2661132904Spjd		return (error);
2662132904Spjd	}
2663132904Spjd
2664132904Spjd	/* Decode metadata. */
2665132904Spjd	error = mirror_metadata_decode(buf, md);
2666132904Spjd	g_free(buf);
2667132904Spjd	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
2668132904Spjd		return (EINVAL);
2669139213Spjd	if (md->md_version > G_MIRROR_VERSION) {
2670139213Spjd		G_MIRROR_DEBUG(0,
2671139213Spjd		    "Kernel module is too old to handle metadata from %s.",
2672139213Spjd		    cp->provider->name);
2673139213Spjd		return (EINVAL);
2674139213Spjd	}
2675132904Spjd	if (error != 0) {
2676132904Spjd		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2677132904Spjd		    cp->provider->name);
2678132904Spjd		return (error);
2679132904Spjd	}
2680132904Spjd
2681132904Spjd	return (0);
2682132904Spjd}
2683132904Spjd
2684132904Spjdstatic int
2685132904Spjdg_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
2686132904Spjd    struct g_mirror_metadata *md)
2687132904Spjd{
2688132904Spjd
2689132904Spjd	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
2690132904Spjd		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
2691132904Spjd		    pp->name, md->md_did);
2692132904Spjd		return (EEXIST);
2693132904Spjd	}
2694132904Spjd	if (md->md_all != sc->sc_ndisks) {
2695132904Spjd		G_MIRROR_DEBUG(1,
2696132904Spjd		    "Invalid '%s' field on disk %s (device %s), skipping.",
2697132904Spjd		    "md_all", pp->name, sc->sc_name);
2698132904Spjd		return (EINVAL);
2699132904Spjd	}
2700132904Spjd	if (md->md_slice != sc->sc_slice) {
2701132904Spjd		G_MIRROR_DEBUG(1,
2702132904Spjd		    "Invalid '%s' field on disk %s (device %s), skipping.",
2703132904Spjd		    "md_slice", pp->name, sc->sc_name);
2704132904Spjd		return (EINVAL);
2705132904Spjd	}
2706132904Spjd	if (md->md_balance != sc->sc_balance) {
2707132904Spjd		G_MIRROR_DEBUG(1,
2708132904Spjd		    "Invalid '%s' field on disk %s (device %s), skipping.",
2709132904Spjd		    "md_balance", pp->name, sc->sc_name);
2710132904Spjd		return (EINVAL);
2711132904Spjd	}
2712132904Spjd	if (md->md_mediasize != sc->sc_mediasize) {
2713132904Spjd		G_MIRROR_DEBUG(1,
2714132904Spjd		    "Invalid '%s' field on disk %s (device %s), skipping.",
2715132904Spjd		    "md_mediasize", pp->name, sc->sc_name);
2716132904Spjd		return (EINVAL);
2717132904Spjd	}
2718132904Spjd	if (sc->sc_mediasize > pp->mediasize) {
2719132904Spjd		G_MIRROR_DEBUG(1,
2720132904Spjd		    "Invalid size of disk %s (device %s), skipping.", pp->name,
2721132904Spjd		    sc->sc_name);
2722132904Spjd		return (EINVAL);
2723132904Spjd	}
2724132904Spjd	if (md->md_sectorsize != sc->sc_sectorsize) {
2725132904Spjd		G_MIRROR_DEBUG(1,
2726132904Spjd		    "Invalid '%s' field on disk %s (device %s), skipping.",
2727132904Spjd		    "md_sectorsize", pp->name, sc->sc_name);
2728132904Spjd		return (EINVAL);
2729132904Spjd	}
2730132904Spjd	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2731132904Spjd		G_MIRROR_DEBUG(1,
2732132904Spjd		    "Invalid sector size of disk %s (device %s), skipping.",
2733132904Spjd		    pp->name, sc->sc_name);
2734132904Spjd		return (EINVAL);
2735132904Spjd	}
2736132904Spjd	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
2737132904Spjd		G_MIRROR_DEBUG(1,
2738132904Spjd		    "Invalid device flags on disk %s (device %s), skipping.",
2739132904Spjd		    pp->name, sc->sc_name);
2740132904Spjd		return (EINVAL);
2741132904Spjd	}
2742132904Spjd	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
2743132904Spjd		G_MIRROR_DEBUG(1,
2744132904Spjd		    "Invalid disk flags on disk %s (device %s), skipping.",
2745132904Spjd		    pp->name, sc->sc_name);
2746132904Spjd		return (EINVAL);
2747132904Spjd	}
2748132904Spjd	return (0);
2749132904Spjd}
2750132904Spjd
2751139650Spjdint
2752132904Spjdg_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
2753132904Spjd    struct g_mirror_metadata *md)
2754132904Spjd{
2755132904Spjd	struct g_mirror_disk *disk;
2756132904Spjd	int error;
2757132904Spjd
2758156610Spjd	g_topology_assert_not();
2759132904Spjd	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
2760132904Spjd
2761132904Spjd	error = g_mirror_check_metadata(sc, pp, md);
2762132904Spjd	if (error != 0)
2763132904Spjd		return (error);
2764139213Spjd	if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING &&
2765139213Spjd	    md->md_genid < sc->sc_genid) {
2766139213Spjd		G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.",
2767139213Spjd		    pp->name, sc->sc_name);
2768139213Spjd		return (EINVAL);
2769139213Spjd	}
2770132904Spjd	disk = g_mirror_init_disk(sc, pp, md, &error);
2771132904Spjd	if (disk == NULL)
2772132904Spjd		return (error);
2773132904Spjd	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
2774132904Spjd	    G_MIRROR_EVENT_WAIT);
2775139213Spjd	if (error != 0)
2776139213Spjd		return (error);
2777139213Spjd	if (md->md_version < G_MIRROR_VERSION) {
2778139213Spjd		G_MIRROR_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).",
2779139213Spjd		    pp->name, md->md_version, G_MIRROR_VERSION);
2780139213Spjd		g_mirror_update_metadata(disk);
2781139213Spjd	}
2782139213Spjd	return (0);
2783132904Spjd}
2784132904Spjd
2785157630Spjdstatic void
2786157630Spjdg_mirror_destroy_delayed(void *arg, int flag)
2787157630Spjd{
2788157630Spjd	struct g_mirror_softc *sc;
2789157630Spjd	int error;
2790157630Spjd
2791157630Spjd	if (flag == EV_CANCEL) {
2792157630Spjd		G_MIRROR_DEBUG(1, "Destroying canceled.");
2793157630Spjd		return;
2794157630Spjd	}
2795157630Spjd	sc = arg;
2796157630Spjd	g_topology_unlock();
2797157630Spjd	sx_xlock(&sc->sc_lock);
2798157630Spjd	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) == 0,
2799157630Spjd	    ("DESTROY flag set on %s.", sc->sc_name));
2800157630Spjd	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROYING) != 0,
2801157630Spjd	    ("DESTROYING flag not set on %s.", sc->sc_name));
2802157630Spjd	G_MIRROR_DEBUG(1, "Destroying %s (delayed).", sc->sc_name);
2803157630Spjd	error = g_mirror_destroy(sc, G_MIRROR_DESTROY_SOFT);
2804157630Spjd	if (error != 0) {
2805264715Sbdrewery		G_MIRROR_DEBUG(0, "Cannot destroy %s (error=%d).",
2806264715Sbdrewery		    sc->sc_name, error);
2807157630Spjd		sx_xunlock(&sc->sc_lock);
2808157630Spjd	}
2809157630Spjd	g_topology_lock();
2810157630Spjd}
2811157630Spjd
2812132904Spjdstatic int
2813132904Spjdg_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
2814132904Spjd{
2815132904Spjd	struct g_mirror_softc *sc;
2816157630Spjd	int dcr, dcw, dce, error = 0;
2817132904Spjd
2818132904Spjd	g_topology_assert();
2819132904Spjd	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
2820132904Spjd	    acw, ace);
2821132904Spjd
2822160081Spjd	sc = pp->geom->softc;
2823160081Spjd	if (sc == NULL && acr <= 0 && acw <= 0 && ace <= 0)
2824160081Spjd		return (0);
2825160081Spjd	KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name));
2826160081Spjd
2827132904Spjd	dcr = pp->acr + acr;
2828132904Spjd	dcw = pp->acw + acw;
2829132904Spjd	dce = pp->ace + ace;
2830132904Spjd
2831157630Spjd	g_topology_unlock();
2832157630Spjd	sx_xlock(&sc->sc_lock);
2833157630Spjd	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0 ||
2834157630Spjd	    LIST_EMPTY(&sc->sc_disks)) {
2835157630Spjd		if (acr > 0 || acw > 0 || ace > 0)
2836157630Spjd			error = ENXIO;
2837157630Spjd		goto end;
2838132904Spjd	}
2839246076Smav	if (dcw == 0)
2840156610Spjd		g_mirror_idle(sc, dcw);
2841157630Spjd	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROYING) != 0) {
2842157630Spjd		if (acr > 0 || acw > 0 || ace > 0) {
2843157630Spjd			error = ENXIO;
2844157630Spjd			goto end;
2845157630Spjd		}
2846157630Spjd		if (dcr == 0 && dcw == 0 && dce == 0) {
2847157630Spjd			g_post_event(g_mirror_destroy_delayed, sc, M_WAITOK,
2848157630Spjd			    sc, NULL);
2849157630Spjd		}
2850156610Spjd	}
2851157630Spjdend:
2852157630Spjd	sx_xunlock(&sc->sc_lock);
2853157630Spjd	g_topology_lock();
2854157630Spjd	return (error);
2855132904Spjd}
2856132904Spjd
2857132904Spjdstatic struct g_geom *
2858132904Spjdg_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md)
2859132904Spjd{
2860132904Spjd	struct g_mirror_softc *sc;
2861132904Spjd	struct g_geom *gp;
2862132904Spjd	int error, timeout;
2863132904Spjd
2864132904Spjd	g_topology_assert();
2865132904Spjd	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
2866132904Spjd	    md->md_mid);
2867132904Spjd
2868132904Spjd	/* One disk is minimum. */
2869132904Spjd	if (md->md_all < 1)
2870132904Spjd		return (NULL);
2871132904Spjd	/*
2872132904Spjd	 * Action geom.
2873132904Spjd	 */
2874132904Spjd	gp = g_new_geomf(mp, "%s", md->md_name);
2875132904Spjd	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
2876132904Spjd	gp->start = g_mirror_start;
2877132904Spjd	gp->orphan = g_mirror_orphan;
2878132904Spjd	gp->access = g_mirror_access;
2879132904Spjd	gp->dumpconf = g_mirror_dumpconf;
2880132904Spjd
2881132904Spjd	sc->sc_id = md->md_mid;
2882132904Spjd	sc->sc_slice = md->md_slice;
2883132904Spjd	sc->sc_balance = md->md_balance;
2884132904Spjd	sc->sc_mediasize = md->md_mediasize;
2885132904Spjd	sc->sc_sectorsize = md->md_sectorsize;
2886132904Spjd	sc->sc_ndisks = md->md_all;
2887132904Spjd	sc->sc_flags = md->md_mflags;
2888139213Spjd	sc->sc_bump_id = 0;
2889155539Spjd	sc->sc_idle = 1;
2890155581Spjd	sc->sc_last_write = time_uptime;
2891155539Spjd	sc->sc_writes = 0;
2892156610Spjd	sx_init(&sc->sc_lock, "gmirror:lock");
2893132904Spjd	bioq_init(&sc->sc_queue);
2894132904Spjd	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
2895156610Spjd	bioq_init(&sc->sc_regular_delayed);
2896156610Spjd	bioq_init(&sc->sc_inflight);
2897156610Spjd	bioq_init(&sc->sc_sync_delayed);
2898132904Spjd	LIST_INIT(&sc->sc_disks);
2899132904Spjd	TAILQ_INIT(&sc->sc_events);
2900132904Spjd	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
2901132904Spjd	callout_init(&sc->sc_callout, CALLOUT_MPSAFE);
2902132904Spjd	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
2903132904Spjd	gp->softc = sc;
2904132904Spjd	sc->sc_geom = gp;
2905132904Spjd	sc->sc_provider = NULL;
2906132904Spjd	/*
2907132904Spjd	 * Synchronization geom.
2908132904Spjd	 */
2909132904Spjd	gp = g_new_geomf(mp, "%s.sync", md->md_name);
2910132904Spjd	gp->softc = sc;
2911132904Spjd	gp->orphan = g_mirror_orphan;
2912132904Spjd	sc->sc_sync.ds_geom = gp;
2913132904Spjd	sc->sc_sync.ds_ndisks = 0;
2914172836Sjulian	error = kproc_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
2915132904Spjd	    "g_mirror %s", md->md_name);
2916132904Spjd	if (error != 0) {
2917132904Spjd		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
2918132904Spjd		    sc->sc_name);
2919132904Spjd		g_destroy_geom(sc->sc_sync.ds_geom);
2920132904Spjd		mtx_destroy(&sc->sc_events_mtx);
2921132904Spjd		mtx_destroy(&sc->sc_queue_mtx);
2922156610Spjd		sx_destroy(&sc->sc_lock);
2923132904Spjd		g_destroy_geom(sc->sc_geom);
2924132904Spjd		free(sc, M_MIRROR);
2925132904Spjd		return (NULL);
2926132904Spjd	}
2927132904Spjd
2928162188Sjmg	G_MIRROR_DEBUG(1, "Device %s created (%u components, id=%u).",
2929162188Sjmg	    sc->sc_name, sc->sc_ndisks, sc->sc_id);
2930132904Spjd
2931190878Sthompsa	sc->sc_rootmount = root_mount_hold("GMIRROR");
2932146538Spjd	G_MIRROR_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
2933132904Spjd	/*
2934132904Spjd	 * Run timeout.
2935132904Spjd	 */
2936137251Spjd	timeout = g_mirror_timeout * hz;
2937137251Spjd	callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc);
2938132904Spjd	return (sc->sc_geom);
2939132904Spjd}
2940132904Spjd
2941132904Spjdint
2942157630Spjdg_mirror_destroy(struct g_mirror_softc *sc, int how)
2943132904Spjd{
2944157630Spjd	struct g_mirror_disk *disk;
2945132904Spjd	struct g_provider *pp;
2946132904Spjd
2947156610Spjd	g_topology_assert_not();
2948132904Spjd	if (sc == NULL)
2949132904Spjd		return (ENXIO);
2950156610Spjd	sx_assert(&sc->sc_lock, SX_XLOCKED);
2951156610Spjd
2952132904Spjd	pp = sc->sc_provider;
2953132904Spjd	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
2954157630Spjd		switch (how) {
2955157630Spjd		case G_MIRROR_DESTROY_SOFT:
2956132904Spjd			G_MIRROR_DEBUG(1,
2957132904Spjd			    "Device %s is still open (r%dw%de%d).", pp->name,
2958132904Spjd			    pp->acr, pp->acw, pp->ace);
2959132904Spjd			return (EBUSY);
2960157630Spjd		case G_MIRROR_DESTROY_DELAYED:
2961157630Spjd			G_MIRROR_DEBUG(1,
2962157630Spjd			    "Device %s will be destroyed on last close.",
2963157630Spjd			    pp->name);
2964157630Spjd			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2965157630Spjd				if (disk->d_state ==
2966157630Spjd				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2967157630Spjd					g_mirror_sync_stop(disk, 1);
2968157630Spjd				}
2969157630Spjd			}
2970157630Spjd			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROYING;
2971157630Spjd			return (EBUSY);
2972157630Spjd		case G_MIRROR_DESTROY_HARD:
2973157630Spjd			G_MIRROR_DEBUG(1, "Device %s is still open, so it "
2974157630Spjd			    "can't be definitely removed.", pp->name);
2975132904Spjd		}
2976132904Spjd	}
2977132904Spjd
2978158112Spjd	g_topology_lock();
2979158112Spjd	if (sc->sc_geom->softc == NULL) {
2980158112Spjd		g_topology_unlock();
2981158112Spjd		return (0);
2982158112Spjd	}
2983158112Spjd	sc->sc_geom->softc = NULL;
2984158112Spjd	sc->sc_sync.ds_geom->softc = NULL;
2985158112Spjd	g_topology_unlock();
2986158112Spjd
2987132904Spjd	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2988132904Spjd	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_WAIT;
2989132904Spjd	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
2990156610Spjd	sx_xunlock(&sc->sc_lock);
2991132904Spjd	mtx_lock(&sc->sc_queue_mtx);
2992132904Spjd	wakeup(sc);
2993132904Spjd	mtx_unlock(&sc->sc_queue_mtx);
2994132904Spjd	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
2995132904Spjd	while (sc->sc_worker != NULL)
2996132904Spjd		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
2997132904Spjd	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
2998156610Spjd	sx_xlock(&sc->sc_lock);
2999132904Spjd	g_mirror_destroy_device(sc);
3000132904Spjd	free(sc, M_MIRROR);
3001132904Spjd	return (0);
3002132904Spjd}
3003132904Spjd
3004132904Spjdstatic void
3005132904Spjdg_mirror_taste_orphan(struct g_consumer *cp)
3006132904Spjd{
3007132904Spjd
3008132904Spjd	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
3009132904Spjd	    cp->provider->name));
3010132904Spjd}
3011132904Spjd
3012132904Spjdstatic struct g_geom *
3013132904Spjdg_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
3014132904Spjd{
3015132904Spjd	struct g_mirror_metadata md;
3016132904Spjd	struct g_mirror_softc *sc;
3017132904Spjd	struct g_consumer *cp;
3018132904Spjd	struct g_geom *gp;
3019132904Spjd	int error;
3020132904Spjd
3021132904Spjd	g_topology_assert();
3022132904Spjd	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
3023132904Spjd	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
3024132904Spjd
3025132904Spjd	gp = g_new_geomf(mp, "mirror:taste");
3026132904Spjd	/*
3027132904Spjd	 * This orphan function should be never called.
3028132904Spjd	 */
3029132904Spjd	gp->orphan = g_mirror_taste_orphan;
3030132904Spjd	cp = g_new_consumer(gp);
3031132904Spjd	g_attach(cp, pp);
3032132904Spjd	error = g_mirror_read_metadata(cp, &md);
3033132904Spjd	g_detach(cp);
3034132904Spjd	g_destroy_consumer(cp);
3035132904Spjd	g_destroy_geom(gp);
3036132904Spjd	if (error != 0)
3037132904Spjd		return (NULL);
3038132904Spjd	gp = NULL;
3039132904Spjd
3040221101Smav	if (md.md_provider[0] != '\0' &&
3041221101Smav	    !g_compare_names(md.md_provider, pp->name))
3042133373Spjd		return (NULL);
3043142727Spjd	if (md.md_provsize != 0 && md.md_provsize != pp->mediasize)
3044142727Spjd		return (NULL);
3045132904Spjd	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
3046132904Spjd		G_MIRROR_DEBUG(0,
3047132904Spjd		    "Device %s: provider %s marked as inactive, skipping.",
3048132904Spjd		    md.md_name, pp->name);
3049132904Spjd		return (NULL);
3050132904Spjd	}
3051132904Spjd	if (g_mirror_debug >= 2)
3052132904Spjd		mirror_metadata_dump(&md);
3053132904Spjd
3054132904Spjd	/*
3055132904Spjd	 * Let's check if device already exists.
3056132904Spjd	 */
3057134486Spjd	sc = NULL;
3058132904Spjd	LIST_FOREACH(gp, &mp->geom, geom) {
3059132904Spjd		sc = gp->softc;
3060132904Spjd		if (sc == NULL)
3061132904Spjd			continue;
3062132904Spjd		if (sc->sc_sync.ds_geom == gp)
3063132904Spjd			continue;
3064132904Spjd		if (strcmp(md.md_name, sc->sc_name) != 0)
3065132904Spjd			continue;
3066132904Spjd		if (md.md_mid != sc->sc_id) {
3067132904Spjd			G_MIRROR_DEBUG(0, "Device %s already configured.",
3068132904Spjd			    sc->sc_name);
3069132904Spjd			return (NULL);
3070132904Spjd		}
3071132904Spjd		break;
3072132904Spjd	}
3073132904Spjd	if (gp == NULL) {
3074132904Spjd		gp = g_mirror_create(mp, &md);
3075132904Spjd		if (gp == NULL) {
3076132976Spjd			G_MIRROR_DEBUG(0, "Cannot create device %s.",
3077132904Spjd			    md.md_name);
3078132904Spjd			return (NULL);
3079132904Spjd		}
3080132904Spjd		sc = gp->softc;
3081132904Spjd	}
3082132904Spjd	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
3083156610Spjd	g_topology_unlock();
3084156610Spjd	sx_xlock(&sc->sc_lock);
3085235968Sae	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_TASTING;
3086132904Spjd	error = g_mirror_add_disk(sc, pp, &md);
3087132904Spjd	if (error != 0) {
3088132904Spjd		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
3089132904Spjd		    pp->name, gp->name, error);
3090156610Spjd		if (LIST_EMPTY(&sc->sc_disks)) {
3091157630Spjd			g_cancel_event(sc);
3092160248Spjd			g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3093156610Spjd			g_topology_lock();
3094156610Spjd			return (NULL);
3095156610Spjd		}
3096156610Spjd		gp = NULL;
3097132904Spjd	}
3098235968Sae	sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_TASTING;
3099235968Sae	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
3100235968Sae		g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3101235968Sae		g_topology_lock();
3102235968Sae		return (NULL);
3103235968Sae	}
3104156610Spjd	sx_xunlock(&sc->sc_lock);
3105156610Spjd	g_topology_lock();
3106132904Spjd	return (gp);
3107132904Spjd}
3108132904Spjd
3109132904Spjdstatic int
3110132904Spjdg_mirror_destroy_geom(struct gctl_req *req __unused,
3111132904Spjd    struct g_class *mp __unused, struct g_geom *gp)
3112132904Spjd{
3113156610Spjd	struct g_mirror_softc *sc;
3114156610Spjd	int error;
3115132904Spjd
3116156610Spjd	g_topology_unlock();
3117156610Spjd	sc = gp->softc;
3118156610Spjd	sx_xlock(&sc->sc_lock);
3119157630Spjd	g_cancel_event(sc);
3120160248Spjd	error = g_mirror_destroy(gp->softc, G_MIRROR_DESTROY_SOFT);
3121156610Spjd	if (error != 0)
3122156610Spjd		sx_xunlock(&sc->sc_lock);
3123156610Spjd	g_topology_lock();
3124156610Spjd	return (error);
3125132904Spjd}
3126132904Spjd
3127132904Spjdstatic void
3128132904Spjdg_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
3129132904Spjd    struct g_consumer *cp, struct g_provider *pp)
3130132904Spjd{
3131132904Spjd	struct g_mirror_softc *sc;
3132132904Spjd
3133132904Spjd	g_topology_assert();
3134132904Spjd
3135132904Spjd	sc = gp->softc;
3136132904Spjd	if (sc == NULL)
3137132904Spjd		return;
3138132904Spjd	/* Skip synchronization geom. */
3139132904Spjd	if (gp == sc->sc_sync.ds_geom)
3140132904Spjd		return;
3141132904Spjd	if (pp != NULL) {
3142132904Spjd		/* Nothing here. */
3143132904Spjd	} else if (cp != NULL) {
3144132904Spjd		struct g_mirror_disk *disk;
3145132904Spjd
3146132904Spjd		disk = cp->private;
3147132904Spjd		if (disk == NULL)
3148132904Spjd			return;
3149156610Spjd		g_topology_unlock();
3150156610Spjd		sx_xlock(&sc->sc_lock);
3151132904Spjd		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
3152132904Spjd		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3153132904Spjd			sbuf_printf(sb, "%s<Synchronized>", indent);
3154156610Spjd			if (disk->d_sync.ds_offset == 0)
3155132904Spjd				sbuf_printf(sb, "0%%");
3156132904Spjd			else {
3157132904Spjd				sbuf_printf(sb, "%u%%",
3158156610Spjd				    (u_int)((disk->d_sync.ds_offset * 100) /
3159132904Spjd				    sc->sc_provider->mediasize));
3160132904Spjd			}
3161132904Spjd			sbuf_printf(sb, "</Synchronized>\n");
3162240604Sglebius			if (disk->d_sync.ds_offset > 0) {
3163240604Sglebius				sbuf_printf(sb, "%s<BytesSynced>%jd"
3164240604Sglebius				    "</BytesSynced>\n", indent,
3165240604Sglebius				    (intmax_t)disk->d_sync.ds_offset);
3166240604Sglebius			}
3167132904Spjd		}
3168132904Spjd		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
3169132904Spjd		    disk->d_sync.ds_syncid);
3170139213Spjd		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent,
3171139213Spjd		    disk->d_genid);
3172132904Spjd		sbuf_printf(sb, "%s<Flags>", indent);
3173132904Spjd		if (disk->d_flags == 0)
3174132904Spjd			sbuf_printf(sb, "NONE");
3175132904Spjd		else {
3176132904Spjd			int first = 1;
3177132904Spjd
3178133448Spjd#define	ADD_FLAG(flag, name)	do {					\
3179133448Spjd	if ((disk->d_flags & (flag)) != 0) {				\
3180133448Spjd		if (!first)						\
3181133448Spjd			sbuf_printf(sb, ", ");				\
3182133448Spjd		else							\
3183133448Spjd			first = 0;					\
3184133448Spjd		sbuf_printf(sb, name);					\
3185133448Spjd	}								\
3186133448Spjd} while (0)
3187133448Spjd			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
3188133448Spjd			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
3189133448Spjd			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
3190133448Spjd			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
3191133448Spjd			    "SYNCHRONIZING");
3192133448Spjd			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
3193155545Spjd			ADD_FLAG(G_MIRROR_DISK_FLAG_BROKEN, "BROKEN");
3194133448Spjd#undef	ADD_FLAG
3195132904Spjd		}
3196132904Spjd		sbuf_printf(sb, "</Flags>\n");
3197132904Spjd		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
3198132907Spjd		    disk->d_priority);
3199132904Spjd		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
3200132904Spjd		    g_mirror_disk_state2str(disk->d_state));
3201156610Spjd		sx_xunlock(&sc->sc_lock);
3202156610Spjd		g_topology_lock();
3203132904Spjd	} else {
3204156610Spjd		g_topology_unlock();
3205156610Spjd		sx_xlock(&sc->sc_lock);
3206132904Spjd		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
3207132904Spjd		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
3208139213Spjd		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid);
3209132904Spjd		sbuf_printf(sb, "%s<Flags>", indent);
3210132904Spjd		if (sc->sc_flags == 0)
3211132904Spjd			sbuf_printf(sb, "NONE");
3212132904Spjd		else {
3213132904Spjd			int first = 1;
3214132904Spjd
3215133448Spjd#define	ADD_FLAG(flag, name)	do {					\
3216133448Spjd	if ((sc->sc_flags & (flag)) != 0) {				\
3217133448Spjd		if (!first)						\
3218133448Spjd			sbuf_printf(sb, ", ");				\
3219133448Spjd		else							\
3220133448Spjd			first = 0;					\
3221133448Spjd		sbuf_printf(sb, name);					\
3222133448Spjd	}								\
3223133448Spjd} while (0)
3224163888Spjd			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC");
3225133448Spjd			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
3226133448Spjd#undef	ADD_FLAG
3227132904Spjd		}
3228132904Spjd		sbuf_printf(sb, "</Flags>\n");
3229132904Spjd		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
3230132904Spjd		    (u_int)sc->sc_slice);
3231132904Spjd		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
3232132904Spjd		    balance_name(sc->sc_balance));
3233132904Spjd		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
3234132904Spjd		    sc->sc_ndisks);
3235134957Spjd		sbuf_printf(sb, "%s<State>", indent);
3236134957Spjd		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
3237134957Spjd			sbuf_printf(sb, "%s", "STARTING");
3238134957Spjd		else if (sc->sc_ndisks ==
3239134957Spjd		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
3240134957Spjd			sbuf_printf(sb, "%s", "COMPLETE");
3241134957Spjd		else
3242134957Spjd			sbuf_printf(sb, "%s", "DEGRADED");
3243134957Spjd		sbuf_printf(sb, "</State>\n");
3244156610Spjd		sx_xunlock(&sc->sc_lock);
3245156610Spjd		g_topology_lock();
3246132904Spjd	}
3247132904Spjd}
3248132904Spjd
3249137254Spjdstatic void
3250246076Smavg_mirror_shutdown_post_sync(void *arg, int howto)
3251137254Spjd{
3252137254Spjd	struct g_class *mp;
3253137254Spjd	struct g_geom *gp, *gp2;
3254156610Spjd	struct g_mirror_softc *sc;
3255157630Spjd	int error;
3256137254Spjd
3257137254Spjd	mp = arg;
3258137421Spjd	DROP_GIANT();
3259137254Spjd	g_topology_lock();
3260246076Smav	g_mirror_shutdown = 1;
3261137254Spjd	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
3262156610Spjd		if ((sc = gp->softc) == NULL)
3263137254Spjd			continue;
3264157630Spjd		/* Skip synchronization geom. */
3265157630Spjd		if (gp == sc->sc_sync.ds_geom)
3266156610Spjd			continue;
3267156610Spjd		g_topology_unlock();
3268156610Spjd		sx_xlock(&sc->sc_lock);
3269246076Smav		g_mirror_idle(sc, -1);
3270157630Spjd		g_cancel_event(sc);
3271157630Spjd		error = g_mirror_destroy(sc, G_MIRROR_DESTROY_DELAYED);
3272157630Spjd		if (error != 0)
3273157630Spjd			sx_xunlock(&sc->sc_lock);
3274156610Spjd		g_topology_lock();
3275156610Spjd	}
3276156610Spjd	g_topology_unlock();
3277156610Spjd	PICKUP_GIANT();
3278137254Spjd}
3279137254Spjd
3280137254Spjdstatic void
3281137254Spjdg_mirror_init(struct g_class *mp)
3282137254Spjd{
3283137254Spjd
3284246076Smav	g_mirror_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync,
3285246076Smav	    g_mirror_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST);
3286246076Smav	if (g_mirror_post_sync == NULL)
3287137254Spjd		G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event.");
3288137254Spjd}
3289137254Spjd
3290137254Spjdstatic void
3291137254Spjdg_mirror_fini(struct g_class *mp)
3292137254Spjd{
3293137254Spjd
3294246076Smav	if (g_mirror_post_sync != NULL)
3295246076Smav		EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_post_sync);
3296137254Spjd}
3297137254Spjd
3298132904SpjdDECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
3299