1132904Spjd/*-
2156878Spjd * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3132904Spjd * All rights reserved.
4132904Spjd *
5132904Spjd * Redistribution and use in source and binary forms, with or without
6132904Spjd * modification, are permitted provided that the following conditions
7132904Spjd * are met:
8132904Spjd * 1. Redistributions of source code must retain the above copyright
9132904Spjd *    notice, this list of conditions and the following disclaimer.
10132904Spjd * 2. Redistributions in binary form must reproduce the above copyright
11132904Spjd *    notice, this list of conditions and the following disclaimer in the
12132904Spjd *    documentation and/or other materials provided with the distribution.
13155174Spjd *
14132904Spjd * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15132904Spjd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16132904Spjd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17132904Spjd * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18132904Spjd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19132904Spjd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20132904Spjd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21132904Spjd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22132904Spjd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23132904Spjd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24132904Spjd * SUCH DAMAGE.
25132904Spjd */
26132904Spjd
27132904Spjd#include <sys/cdefs.h>
28132904Spjd__FBSDID("$FreeBSD$");
29132904Spjd
30132904Spjd#include <sys/param.h>
31132904Spjd#include <sys/systm.h>
32132904Spjd#include <sys/kernel.h>
33132904Spjd#include <sys/module.h>
34132904Spjd#include <sys/limits.h>
35132904Spjd#include <sys/lock.h>
36132904Spjd#include <sys/mutex.h>
37132904Spjd#include <sys/bio.h>
38223921Sae#include <sys/sbuf.h>
39132904Spjd#include <sys/sysctl.h>
40132904Spjd#include <sys/malloc.h>
41137254Spjd#include <sys/eventhandler.h>
42132904Spjd#include <vm/uma.h>
43132904Spjd#include <geom/geom.h>
44132904Spjd#include <sys/proc.h>
45132904Spjd#include <sys/kthread.h>
46139451Sjhb#include <sys/sched.h>
47132904Spjd#include <geom/mirror/g_mirror.h>
48132904Spjd
49219029SnetchildFEATURE(geom_mirror, "GEOM mirroring support");
50132904Spjd
51151897Srwatsonstatic MALLOC_DEFINE(M_MIRROR, "mirror_data", "GEOM_MIRROR Data");
52132904Spjd
53132904SpjdSYSCTL_DECL(_kern_geom);
54227309Sedstatic SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0,
55227309Sed    "GEOM_MIRROR stuff");
56132904Spjdu_int g_mirror_debug = 0;
57134528SpjdTUNABLE_INT("kern.geom.mirror.debug", &g_mirror_debug);
58132904SpjdSYSCTL_UINT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RW, &g_mirror_debug, 0,
59132904Spjd    "Debug level");
60135854Spjdstatic u_int g_mirror_timeout = 4;
61134226SpjdTUNABLE_INT("kern.geom.mirror.timeout", &g_mirror_timeout);
62132904SpjdSYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RW, &g_mirror_timeout,
63132904Spjd    0, "Time to wait on all mirror components");
64137251Spjdstatic u_int g_mirror_idletime = 5;
65137251SpjdTUNABLE_INT("kern.geom.mirror.idletime", &g_mirror_idletime);
66137251SpjdSYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RW,
67137251Spjd    &g_mirror_idletime, 0, "Mark components as clean when idling");
68155545Spjdstatic u_int g_mirror_disconnect_on_failure = 1;
69155545SpjdTUNABLE_INT("kern.geom.mirror.disconnect_on_failure",
70155545Spjd    &g_mirror_disconnect_on_failure);
71155545SpjdSYSCTL_UINT(_kern_geom_mirror, OID_AUTO, disconnect_on_failure, CTLFLAG_RW,
72155545Spjd    &g_mirror_disconnect_on_failure, 0, "Disconnect component on I/O failure.");
73156873Spjdstatic u_int g_mirror_syncreqs = 2;
74156610SpjdTUNABLE_INT("kern.geom.mirror.sync_requests", &g_mirror_syncreqs);
75156610SpjdSYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_requests, CTLFLAG_RDTUN,
76156610Spjd    &g_mirror_syncreqs, 0, "Parallel synchronization I/O requests.");
77132904Spjd
78132904Spjd#define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
79132904Spjd	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
80132904Spjd	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
81132904Spjd	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
82132904Spjd} while (0)
83132904Spjd
84245443Smavstatic eventhandler_tag g_mirror_post_sync = NULL;
85245443Smavstatic int g_mirror_shutdown = 0;
86132904Spjd
87132904Spjdstatic int g_mirror_destroy_geom(struct gctl_req *req, struct g_class *mp,
88132904Spjd    struct g_geom *gp);
89132904Spjdstatic g_taste_t g_mirror_taste;
90137254Spjdstatic void g_mirror_init(struct g_class *mp);
91137254Spjdstatic void g_mirror_fini(struct g_class *mp);
92132904Spjd
93132904Spjdstruct g_class g_mirror_class = {
94132904Spjd	.name = G_MIRROR_CLASS_NAME,
95133318Sphk	.version = G_VERSION,
96132904Spjd	.ctlreq = g_mirror_config,
97132904Spjd	.taste = g_mirror_taste,
98137254Spjd	.destroy_geom = g_mirror_destroy_geom,
99137254Spjd	.init = g_mirror_init,
100137254Spjd	.fini = g_mirror_fini
101132904Spjd};
102132904Spjd
103132904Spjd
104132904Spjdstatic void g_mirror_destroy_provider(struct g_mirror_softc *sc);
105139051Spjdstatic int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
106139051Spjdstatic void g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force);
107132904Spjdstatic void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
108132904Spjd    struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
109132904Spjdstatic void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
110156610Spjdstatic void g_mirror_register_request(struct bio *bp);
111156610Spjdstatic void g_mirror_sync_release(struct g_mirror_softc *sc);
112132904Spjd
113132904Spjd
114132904Spjdstatic const char *
115132904Spjdg_mirror_disk_state2str(int state)
116132904Spjd{
117132904Spjd
118132904Spjd	switch (state) {
119132904Spjd	case G_MIRROR_DISK_STATE_NONE:
120132904Spjd		return ("NONE");
121132904Spjd	case G_MIRROR_DISK_STATE_NEW:
122132904Spjd		return ("NEW");
123132904Spjd	case G_MIRROR_DISK_STATE_ACTIVE:
124132904Spjd		return ("ACTIVE");
125132904Spjd	case G_MIRROR_DISK_STATE_STALE:
126132904Spjd		return ("STALE");
127132904Spjd	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
128132904Spjd		return ("SYNCHRONIZING");
129132904Spjd	case G_MIRROR_DISK_STATE_DISCONNECTED:
130132904Spjd		return ("DISCONNECTED");
131132904Spjd	case G_MIRROR_DISK_STATE_DESTROY:
132132904Spjd		return ("DESTROY");
133132904Spjd	default:
134132904Spjd		return ("INVALID");
135132904Spjd	}
136132904Spjd}
137132904Spjd
138132904Spjdstatic const char *
139132904Spjdg_mirror_device_state2str(int state)
140132904Spjd{
141132904Spjd
142132904Spjd	switch (state) {
143132904Spjd	case G_MIRROR_DEVICE_STATE_STARTING:
144132904Spjd		return ("STARTING");
145132904Spjd	case G_MIRROR_DEVICE_STATE_RUNNING:
146132904Spjd		return ("RUNNING");
147132904Spjd	default:
148132904Spjd		return ("INVALID");
149132904Spjd	}
150132904Spjd}
151132904Spjd
152132904Spjdstatic const char *
153132904Spjdg_mirror_get_diskname(struct g_mirror_disk *disk)
154132904Spjd{
155132904Spjd
156132904Spjd	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
157132904Spjd		return ("[unknown]");
158132904Spjd	return (disk->d_name);
159132904Spjd}
160132904Spjd
161132904Spjd/*
162132904Spjd * --- Events handling functions ---
163132904Spjd * Events in geom_mirror are used to maintain disks and device status
164132904Spjd * from one thread to simplify locking.
165132904Spjd */
166132904Spjdstatic void
167132904Spjdg_mirror_event_free(struct g_mirror_event *ep)
168132904Spjd{
169132904Spjd
170132904Spjd	free(ep, M_MIRROR);
171132904Spjd}
172132904Spjd
173132904Spjdint
174132904Spjdg_mirror_event_send(void *arg, int state, int flags)
175132904Spjd{
176132904Spjd	struct g_mirror_softc *sc;
177132904Spjd	struct g_mirror_disk *disk;
178132904Spjd	struct g_mirror_event *ep;
179132904Spjd	int error;
180132904Spjd
181132904Spjd	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
182132904Spjd	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
183132904Spjd	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
184132904Spjd		disk = NULL;
185132904Spjd		sc = arg;
186132904Spjd	} else {
187132904Spjd		disk = arg;
188132904Spjd		sc = disk->d_softc;
189132904Spjd	}
190132904Spjd	ep->e_disk = disk;
191132904Spjd	ep->e_state = state;
192132904Spjd	ep->e_flags = flags;
193132904Spjd	ep->e_error = 0;
194132904Spjd	mtx_lock(&sc->sc_events_mtx);
195132904Spjd	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
196132904Spjd	mtx_unlock(&sc->sc_events_mtx);
197132904Spjd	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
198132904Spjd	mtx_lock(&sc->sc_queue_mtx);
199132904Spjd	wakeup(sc);
200132904Spjd	mtx_unlock(&sc->sc_queue_mtx);
201132904Spjd	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
202132904Spjd		return (0);
203156610Spjd	sx_assert(&sc->sc_lock, SX_XLOCKED);
204132904Spjd	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
205156610Spjd	sx_xunlock(&sc->sc_lock);
206132904Spjd	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
207132904Spjd		mtx_lock(&sc->sc_events_mtx);
208132904Spjd		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
209132904Spjd		    hz * 5);
210132904Spjd	}
211132904Spjd	error = ep->e_error;
212132904Spjd	g_mirror_event_free(ep);
213156610Spjd	sx_xlock(&sc->sc_lock);
214132904Spjd	return (error);
215132904Spjd}
216132904Spjd
217132904Spjdstatic struct g_mirror_event *
218132904Spjdg_mirror_event_get(struct g_mirror_softc *sc)
219132904Spjd{
220132904Spjd	struct g_mirror_event *ep;
221132904Spjd
222132904Spjd	mtx_lock(&sc->sc_events_mtx);
223132904Spjd	ep = TAILQ_FIRST(&sc->sc_events);
224132904Spjd	mtx_unlock(&sc->sc_events_mtx);
225132904Spjd	return (ep);
226132904Spjd}
227132904Spjd
228132904Spjdstatic void
229139140Spjdg_mirror_event_remove(struct g_mirror_softc *sc, struct g_mirror_event *ep)
230139140Spjd{
231139140Spjd
232139140Spjd	mtx_lock(&sc->sc_events_mtx);
233139140Spjd	TAILQ_REMOVE(&sc->sc_events, ep, e_next);
234139140Spjd	mtx_unlock(&sc->sc_events_mtx);
235139140Spjd}
236139140Spjd
237139140Spjdstatic void
238132904Spjdg_mirror_event_cancel(struct g_mirror_disk *disk)
239132904Spjd{
240132904Spjd	struct g_mirror_softc *sc;
241132904Spjd	struct g_mirror_event *ep, *tmpep;
242132904Spjd
243156610Spjd	sc = disk->d_softc;
244156610Spjd	sx_assert(&sc->sc_lock, SX_XLOCKED);
245132904Spjd
246132904Spjd	mtx_lock(&sc->sc_events_mtx);
247132904Spjd	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
248132904Spjd		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
249132904Spjd			continue;
250132904Spjd		if (ep->e_disk != disk)
251132904Spjd			continue;
252132904Spjd		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
253132904Spjd		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
254132904Spjd			g_mirror_event_free(ep);
255132904Spjd		else {
256132904Spjd			ep->e_error = ECANCELED;
257132904Spjd			wakeup(ep);
258132904Spjd		}
259132904Spjd	}
260132904Spjd	mtx_unlock(&sc->sc_events_mtx);
261132904Spjd}
262132904Spjd
263132904Spjd/*
264132904Spjd * Return the number of disks in given state.
265132904Spjd * If state is equal to -1, count all connected disks.
266132904Spjd */
267132904Spjdu_int
268132904Spjdg_mirror_ndisks(struct g_mirror_softc *sc, int state)
269132904Spjd{
270132904Spjd	struct g_mirror_disk *disk;
271132904Spjd	u_int n = 0;
272132904Spjd
273156610Spjd	sx_assert(&sc->sc_lock, SX_LOCKED);
274156610Spjd
275132904Spjd	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
276132904Spjd		if (state == -1 || disk->d_state == state)
277132904Spjd			n++;
278132904Spjd	}
279132904Spjd	return (n);
280132904Spjd}
281132904Spjd
282132904Spjd/*
283132904Spjd * Find a disk in mirror by its disk ID.
284132904Spjd */
285132904Spjdstatic struct g_mirror_disk *
286132904Spjdg_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
287132904Spjd{
288132904Spjd	struct g_mirror_disk *disk;
289132904Spjd
290156610Spjd	sx_assert(&sc->sc_lock, SX_XLOCKED);
291132904Spjd
292132904Spjd	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
293132904Spjd		if (disk->d_id == id)
294132904Spjd			return (disk);
295132904Spjd	}
296132904Spjd	return (NULL);
297132904Spjd}
298132904Spjd
299132904Spjdstatic u_int
300132904Spjdg_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
301132904Spjd{
302132904Spjd	struct bio *bp;
303132904Spjd	u_int nreqs = 0;
304132904Spjd
305132904Spjd	mtx_lock(&sc->sc_queue_mtx);
306132904Spjd	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
307132904Spjd		if (bp->bio_from == cp)
308132904Spjd			nreqs++;
309132904Spjd	}
310132904Spjd	mtx_unlock(&sc->sc_queue_mtx);
311132904Spjd	return (nreqs);
312132904Spjd}
313132904Spjd
314133484Spjdstatic int
315133484Spjdg_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
316132904Spjd{
317132904Spjd
318137248Spjd	if (cp->index > 0) {
319132904Spjd		G_MIRROR_DEBUG(2,
320132904Spjd		    "I/O requests for %s exist, can't destroy it now.",
321132904Spjd		    cp->provider->name);
322133484Spjd		return (1);
323132904Spjd	}
324132904Spjd	if (g_mirror_nrequests(sc, cp) > 0) {
325132904Spjd		G_MIRROR_DEBUG(2,
326132904Spjd		    "I/O requests for %s in queue, can't destroy it now.",
327132904Spjd		    cp->provider->name);
328133484Spjd		return (1);
329133484Spjd	}
330133484Spjd	return (0);
331133484Spjd}
332133484Spjd
333133484Spjdstatic void
334139053Spjdg_mirror_destroy_consumer(void *arg, int flags __unused)
335139051Spjd{
336139051Spjd	struct g_consumer *cp;
337139051Spjd
338156610Spjd	g_topology_assert();
339156610Spjd
340139051Spjd	cp = arg;
341139051Spjd	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
342139051Spjd	g_detach(cp);
343139051Spjd	g_destroy_consumer(cp);
344139051Spjd}
345139051Spjd
346139051Spjdstatic void
347133484Spjdg_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
348133484Spjd{
349139051Spjd	struct g_provider *pp;
350139051Spjd	int retaste_wait;
351133484Spjd
352133484Spjd	g_topology_assert();
353133484Spjd
354133484Spjd	cp->private = NULL;
355133484Spjd	if (g_mirror_is_busy(sc, cp))
356132904Spjd		return;
357139051Spjd	pp = cp->provider;
358139051Spjd	retaste_wait = 0;
359139051Spjd	if (cp->acw == 1) {
360139051Spjd		if ((pp->geom->flags & G_GEOM_WITHER) == 0)
361139051Spjd			retaste_wait = 1;
362139051Spjd	}
363139051Spjd	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr,
364139051Spjd	    -cp->acw, -cp->ace, 0);
365139053Spjd	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
366139053Spjd		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
367139051Spjd	if (retaste_wait) {
368139051Spjd		/*
369139051Spjd		 * After retaste event was send (inside g_access()), we can send
370139051Spjd		 * event to detach and destroy consumer.
371139051Spjd		 * A class, which has consumer to the given provider connected
372139051Spjd		 * will not receive retaste event for the provider.
373139051Spjd		 * This is the way how I ignore retaste events when I close
374139051Spjd		 * consumers opened for write: I detach and destroy consumer
375139051Spjd		 * after retaste event is sent.
376139051Spjd		 */
377139051Spjd		g_post_event(g_mirror_destroy_consumer, cp, M_WAITOK, NULL);
378139051Spjd		return;
379139051Spjd	}
380139051Spjd	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", pp->name);
381132904Spjd	g_detach(cp);
382132904Spjd	g_destroy_consumer(cp);
383132904Spjd}
384132904Spjd
385132904Spjdstatic int
386132904Spjdg_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
387132904Spjd{
388144143Spjd	struct g_consumer *cp;
389132904Spjd	int error;
390132904Spjd
391156610Spjd	g_topology_assert_not();
392132904Spjd	KASSERT(disk->d_consumer == NULL,
393132904Spjd	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
394132904Spjd
395156610Spjd	g_topology_lock();
396144143Spjd	cp = g_new_consumer(disk->d_softc->sc_geom);
397144143Spjd	error = g_attach(cp, pp);
398144143Spjd	if (error != 0) {
399144143Spjd		g_destroy_consumer(cp);
400156610Spjd		g_topology_unlock();
401132904Spjd		return (error);
402144143Spjd	}
403144143Spjd	error = g_access(cp, 1, 1, 1);
404139051Spjd	if (error != 0) {
405144143Spjd		g_detach(cp);
406144143Spjd		g_destroy_consumer(cp);
407156610Spjd		g_topology_unlock();
408139051Spjd		G_MIRROR_DEBUG(0, "Cannot open consumer %s (error=%d).",
409139051Spjd		    pp->name, error);
410139051Spjd		return (error);
411139051Spjd	}
412156610Spjd	g_topology_unlock();
413144143Spjd	disk->d_consumer = cp;
414144143Spjd	disk->d_consumer->private = disk;
415144143Spjd	disk->d_consumer->index = 0;
416139051Spjd
417132904Spjd	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
418132904Spjd	return (0);
419132904Spjd}
420132904Spjd
421132904Spjdstatic void
422133484Spjdg_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
423132904Spjd{
424132904Spjd
425132904Spjd	g_topology_assert();
426132904Spjd
427132904Spjd	if (cp == NULL)
428132904Spjd		return;
429139051Spjd	if (cp->provider != NULL)
430133484Spjd		g_mirror_kill_consumer(sc, cp);
431139051Spjd	else
432132904Spjd		g_destroy_consumer(cp);
433132904Spjd}
434132904Spjd
435132904Spjd/*
436132904Spjd * Initialize disk. This means allocate memory, create consumer, attach it
437132904Spjd * to the provider and open access (r1w1e1) to it.
438132904Spjd */
439132904Spjdstatic struct g_mirror_disk *
440132904Spjdg_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
441132904Spjd    struct g_mirror_metadata *md, int *errorp)
442132904Spjd{
443132904Spjd	struct g_mirror_disk *disk;
444237930Sglebius	int i, error;
445132904Spjd
446132904Spjd	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
447132904Spjd	if (disk == NULL) {
448132904Spjd		error = ENOMEM;
449132904Spjd		goto fail;
450132904Spjd	}
451132904Spjd	disk->d_softc = sc;
452132904Spjd	error = g_mirror_connect_disk(disk, pp);
453132904Spjd	if (error != 0)
454132904Spjd		goto fail;
455132904Spjd	disk->d_id = md->md_did;
456132904Spjd	disk->d_state = G_MIRROR_DISK_STATE_NONE;
457132904Spjd	disk->d_priority = md->md_priority;
458132904Spjd	disk->d_flags = md->md_dflags;
459237930Sglebius	error = g_getattr("GEOM::candelete", disk->d_consumer, &i);
460245946Savg	if (error == 0 && i != 0)
461237930Sglebius		disk->d_flags |= G_MIRROR_DISK_FLAG_CANDELETE;
462133373Spjd	if (md->md_provider[0] != '\0')
463133373Spjd		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
464132904Spjd	disk->d_sync.ds_consumer = NULL;
465132904Spjd	disk->d_sync.ds_offset = md->md_sync_offset;
466132904Spjd	disk->d_sync.ds_offset_done = md->md_sync_offset;
467139213Spjd	disk->d_genid = md->md_genid;
468132904Spjd	disk->d_sync.ds_syncid = md->md_syncid;
469132904Spjd	if (errorp != NULL)
470132904Spjd		*errorp = 0;
471132904Spjd	return (disk);
472132904Spjdfail:
473132904Spjd	if (errorp != NULL)
474132904Spjd		*errorp = error;
475144143Spjd	if (disk != NULL)
476132904Spjd		free(disk, M_MIRROR);
477132904Spjd	return (NULL);
478132904Spjd}
479132904Spjd
480132904Spjdstatic void
481132904Spjdg_mirror_destroy_disk(struct g_mirror_disk *disk)
482132904Spjd{
483132904Spjd	struct g_mirror_softc *sc;
484132904Spjd
485156610Spjd	g_topology_assert_not();
486156610Spjd	sc = disk->d_softc;
487156610Spjd	sx_assert(&sc->sc_lock, SX_XLOCKED);
488132904Spjd
489132904Spjd	LIST_REMOVE(disk, d_next);
490132904Spjd	g_mirror_event_cancel(disk);
491132904Spjd	if (sc->sc_hint == disk)
492132904Spjd		sc->sc_hint = NULL;
493132904Spjd	switch (disk->d_state) {
494132904Spjd	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
495132904Spjd		g_mirror_sync_stop(disk, 1);
496132904Spjd		/* FALLTHROUGH */
497132904Spjd	case G_MIRROR_DISK_STATE_NEW:
498132904Spjd	case G_MIRROR_DISK_STATE_STALE:
499132904Spjd	case G_MIRROR_DISK_STATE_ACTIVE:
500156610Spjd		g_topology_lock();
501133484Spjd		g_mirror_disconnect_consumer(sc, disk->d_consumer);
502156610Spjd		g_topology_unlock();
503133114Spjd		free(disk, M_MIRROR);
504132904Spjd		break;
505132904Spjd	default:
506132904Spjd		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
507132904Spjd		    g_mirror_get_diskname(disk),
508132904Spjd		    g_mirror_disk_state2str(disk->d_state)));
509132904Spjd	}
510132904Spjd}
511132904Spjd
512132904Spjdstatic void
513132904Spjdg_mirror_destroy_device(struct g_mirror_softc *sc)
514132904Spjd{
515132904Spjd	struct g_mirror_disk *disk;
516132904Spjd	struct g_mirror_event *ep;
517132904Spjd	struct g_geom *gp;
518133484Spjd	struct g_consumer *cp, *tmpcp;
519132904Spjd
520156610Spjd	g_topology_assert_not();
521156610Spjd	sx_assert(&sc->sc_lock, SX_XLOCKED);
522132904Spjd
523132904Spjd	gp = sc->sc_geom;
524132904Spjd	if (sc->sc_provider != NULL)
525132904Spjd		g_mirror_destroy_provider(sc);
526132904Spjd	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
527132904Spjd	    disk = LIST_FIRST(&sc->sc_disks)) {
528137254Spjd		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
529137254Spjd		g_mirror_update_metadata(disk);
530132904Spjd		g_mirror_destroy_disk(disk);
531132904Spjd	}
532132904Spjd	while ((ep = g_mirror_event_get(sc)) != NULL) {
533139140Spjd		g_mirror_event_remove(sc, ep);
534132904Spjd		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
535132904Spjd			g_mirror_event_free(ep);
536132904Spjd		else {
537132904Spjd			ep->e_error = ECANCELED;
538132904Spjd			ep->e_flags |= G_MIRROR_EVENT_DONE;
539132904Spjd			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
540132904Spjd			mtx_lock(&sc->sc_events_mtx);
541132904Spjd			wakeup(ep);
542132904Spjd			mtx_unlock(&sc->sc_events_mtx);
543132904Spjd		}
544132904Spjd	}
545132904Spjd	callout_drain(&sc->sc_callout);
546133484Spjd
547156610Spjd	g_topology_lock();
548133484Spjd	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
549133484Spjd		g_mirror_disconnect_consumer(sc, cp);
550132922Spjd	}
551133484Spjd	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
552156610Spjd	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
553156610Spjd	g_wither_geom(gp, ENXIO);
554156610Spjd	g_topology_unlock();
555132904Spjd	mtx_destroy(&sc->sc_queue_mtx);
556132904Spjd	mtx_destroy(&sc->sc_events_mtx);
557156610Spjd	sx_xunlock(&sc->sc_lock);
558156610Spjd	sx_destroy(&sc->sc_lock);
559132904Spjd}
560132904Spjd
561132904Spjdstatic void
562132904Spjdg_mirror_orphan(struct g_consumer *cp)
563132904Spjd{
564132904Spjd	struct g_mirror_disk *disk;
565132904Spjd
566132904Spjd	g_topology_assert();
567132904Spjd
568132904Spjd	disk = cp->private;
569132904Spjd	if (disk == NULL)
570132904Spjd		return;
571139670Spjd	disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
572132904Spjd	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
573132904Spjd	    G_MIRROR_EVENT_DONTWAIT);
574132904Spjd}
575132904Spjd
576132904Spjd/*
577132904Spjd * Function should return the next active disk on the list.
578132904Spjd * It is possible that it will be the same disk as given.
579132904Spjd * If there are no active disks on list, NULL is returned.
580132904Spjd */
581132904Spjdstatic __inline struct g_mirror_disk *
582132904Spjdg_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
583132904Spjd{
584132904Spjd	struct g_mirror_disk *dp;
585132904Spjd
586132904Spjd	for (dp = LIST_NEXT(disk, d_next); dp != disk;
587132904Spjd	    dp = LIST_NEXT(dp, d_next)) {
588132904Spjd		if (dp == NULL)
589132904Spjd			dp = LIST_FIRST(&sc->sc_disks);
590132904Spjd		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
591132904Spjd			break;
592132904Spjd	}
593132904Spjd	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
594132904Spjd		return (NULL);
595132904Spjd	return (dp);
596132904Spjd}
597132904Spjd
598132904Spjdstatic struct g_mirror_disk *
599132904Spjdg_mirror_get_disk(struct g_mirror_softc *sc)
600132904Spjd{
601132904Spjd	struct g_mirror_disk *disk;
602132904Spjd
603132904Spjd	if (sc->sc_hint == NULL) {
604132904Spjd		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
605132904Spjd		if (sc->sc_hint == NULL)
606132904Spjd			return (NULL);
607132904Spjd	}
608132904Spjd	disk = sc->sc_hint;
609132904Spjd	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
610132904Spjd		disk = g_mirror_find_next(sc, disk);
611132904Spjd		if (disk == NULL)
612132904Spjd			return (NULL);
613132904Spjd	}
614132904Spjd	sc->sc_hint = g_mirror_find_next(sc, disk);
615132904Spjd	return (disk);
616132904Spjd}
617132904Spjd
618132904Spjdstatic int
619133752Spjdg_mirror_write_metadata(struct g_mirror_disk *disk,
620133752Spjd    struct g_mirror_metadata *md)
621132904Spjd{
622132904Spjd	struct g_mirror_softc *sc;
623132904Spjd	struct g_consumer *cp;
624132904Spjd	off_t offset, length;
625132904Spjd	u_char *sector;
626139051Spjd	int error = 0;
627132904Spjd
628156610Spjd	g_topology_assert_not();
629156610Spjd	sc = disk->d_softc;
630156610Spjd	sx_assert(&sc->sc_lock, SX_LOCKED);
631132904Spjd
632132904Spjd	cp = disk->d_consumer;
633132904Spjd	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
634132904Spjd	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
635156610Spjd	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
636139051Spjd	    ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
637139051Spjd	    cp->acw, cp->ace));
638132904Spjd	length = cp->provider->sectorsize;
639132904Spjd	offset = cp->provider->mediasize - length;
640132904Spjd	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
641139051Spjd	if (md != NULL)
642139051Spjd		mirror_metadata_encode(md, sector);
643139051Spjd	error = g_write_data(cp, offset, sector, length);
644132904Spjd	free(sector, M_MIRROR);
645132904Spjd	if (error != 0) {
646155545Spjd		if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
647155545Spjd			disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
648155545Spjd			G_MIRROR_DEBUG(0, "Cannot write metadata on %s "
649155545Spjd			    "(device=%s, error=%d).",
650155545Spjd			    g_mirror_get_diskname(disk), sc->sc_name, error);
651155545Spjd		} else {
652155545Spjd			G_MIRROR_DEBUG(1, "Cannot write metadata on %s "
653155545Spjd			    "(device=%s, error=%d).",
654155545Spjd			    g_mirror_get_diskname(disk), sc->sc_name, error);
655155545Spjd		}
656155545Spjd		if (g_mirror_disconnect_on_failure &&
657155545Spjd		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
658155545Spjd			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
659155545Spjd			g_mirror_event_send(disk,
660155545Spjd			    G_MIRROR_DISK_STATE_DISCONNECTED,
661155545Spjd			    G_MIRROR_EVENT_DONTWAIT);
662155545Spjd		}
663132904Spjd	}
664133752Spjd	return (error);
665132904Spjd}
666132904Spjd
667133752Spjdstatic int
668133752Spjdg_mirror_clear_metadata(struct g_mirror_disk *disk)
669133752Spjd{
670133752Spjd	int error;
671133752Spjd
672156610Spjd	g_topology_assert_not();
673156610Spjd	sx_assert(&disk->d_softc->sc_lock, SX_LOCKED);
674156610Spjd
675133752Spjd	error = g_mirror_write_metadata(disk, NULL);
676133752Spjd	if (error == 0) {
677133752Spjd		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
678133752Spjd		    g_mirror_get_diskname(disk));
679133752Spjd	} else {
680133752Spjd		G_MIRROR_DEBUG(0,
681133752Spjd		    "Cannot clear metadata on disk %s (error=%d).",
682133752Spjd		    g_mirror_get_diskname(disk), error);
683133752Spjd	}
684133752Spjd	return (error);
685133752Spjd}
686133752Spjd
687132904Spjdvoid
688132904Spjdg_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
689132904Spjd    struct g_mirror_metadata *md)
690132904Spjd{
691132904Spjd
692132904Spjd	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
693132904Spjd	md->md_version = G_MIRROR_VERSION;
694132904Spjd	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
695132904Spjd	md->md_mid = sc->sc_id;
696132904Spjd	md->md_all = sc->sc_ndisks;
697132904Spjd	md->md_slice = sc->sc_slice;
698132904Spjd	md->md_balance = sc->sc_balance;
699139213Spjd	md->md_genid = sc->sc_genid;
700132904Spjd	md->md_mediasize = sc->sc_mediasize;
701132904Spjd	md->md_sectorsize = sc->sc_sectorsize;
702132904Spjd	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
703133373Spjd	bzero(md->md_provider, sizeof(md->md_provider));
704132904Spjd	if (disk == NULL) {
705132904Spjd		md->md_did = arc4random();
706132904Spjd		md->md_priority = 0;
707132904Spjd		md->md_syncid = 0;
708132904Spjd		md->md_dflags = 0;
709132904Spjd		md->md_sync_offset = 0;
710142727Spjd		md->md_provsize = 0;
711132904Spjd	} else {
712132904Spjd		md->md_did = disk->d_id;
713132904Spjd		md->md_priority = disk->d_priority;
714132904Spjd		md->md_syncid = disk->d_sync.ds_syncid;
715132904Spjd		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
716132904Spjd		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
717132904Spjd			md->md_sync_offset = disk->d_sync.ds_offset_done;
718132904Spjd		else
719132904Spjd			md->md_sync_offset = 0;
720133373Spjd		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
721133373Spjd			strlcpy(md->md_provider,
722133373Spjd			    disk->d_consumer->provider->name,
723133373Spjd			    sizeof(md->md_provider));
724133373Spjd		}
725142727Spjd		md->md_provsize = disk->d_consumer->provider->mediasize;
726132904Spjd	}
727132904Spjd}
728132904Spjd
729132904Spjdvoid
730132904Spjdg_mirror_update_metadata(struct g_mirror_disk *disk)
731132904Spjd{
732156610Spjd	struct g_mirror_softc *sc;
733132904Spjd	struct g_mirror_metadata md;
734133752Spjd	int error;
735132904Spjd
736156610Spjd	g_topology_assert_not();
737156610Spjd	sc = disk->d_softc;
738156610Spjd	sx_assert(&sc->sc_lock, SX_LOCKED);
739156610Spjd
740156610Spjd	g_mirror_fill_metadata(sc, disk, &md);
741133752Spjd	error = g_mirror_write_metadata(disk, &md);
742133752Spjd	if (error == 0) {
743133752Spjd		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
744133752Spjd		    g_mirror_get_diskname(disk));
745132904Spjd	} else {
746132904Spjd		G_MIRROR_DEBUG(0,
747132904Spjd		    "Cannot update metadata on disk %s (error=%d).",
748132904Spjd		    g_mirror_get_diskname(disk), error);
749132904Spjd	}
750132904Spjd}
751132904Spjd
752132904Spjdstatic void
753139051Spjdg_mirror_bump_syncid(struct g_mirror_softc *sc)
754132904Spjd{
755132904Spjd	struct g_mirror_disk *disk;
756132904Spjd
757156610Spjd	g_topology_assert_not();
758156610Spjd	sx_assert(&sc->sc_lock, SX_XLOCKED);
759132904Spjd	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
760132904Spjd	    ("%s called with no active disks (device=%s).", __func__,
761132904Spjd	    sc->sc_name));
762132904Spjd
763132904Spjd	sc->sc_syncid++;
764132954Spjd	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
765132954Spjd	    sc->sc_syncid);
766132904Spjd	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
767132904Spjd		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
768132904Spjd		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
769132904Spjd			disk->d_sync.ds_syncid = sc->sc_syncid;
770132904Spjd			g_mirror_update_metadata(disk);
771132904Spjd		}
772132904Spjd	}
773132904Spjd}
774132904Spjd
775137248Spjdstatic void
776139213Spjdg_mirror_bump_genid(struct g_mirror_softc *sc)
777139213Spjd{
778139213Spjd	struct g_mirror_disk *disk;
779139213Spjd
780156610Spjd	g_topology_assert_not();
781156610Spjd	sx_assert(&sc->sc_lock, SX_XLOCKED);
782139213Spjd	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
783139213Spjd	    ("%s called with no active disks (device=%s).", __func__,
784139213Spjd	    sc->sc_name));
785139213Spjd
786139213Spjd	sc->sc_genid++;
787139213Spjd	G_MIRROR_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name,
788139213Spjd	    sc->sc_genid);
789139213Spjd	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
790139213Spjd		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
791139213Spjd		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
792139246Spjd			disk->d_genid = sc->sc_genid;
793139213Spjd			g_mirror_update_metadata(disk);
794139213Spjd		}
795139213Spjd	}
796139213Spjd}
797139213Spjd
798155539Spjdstatic int
799156610Spjdg_mirror_idle(struct g_mirror_softc *sc, int acw)
800137248Spjd{
801137248Spjd	struct g_mirror_disk *disk;
802155539Spjd	int timeout;
803137248Spjd
804156610Spjd	g_topology_assert_not();
805156610Spjd	sx_assert(&sc->sc_lock, SX_XLOCKED);
806156610Spjd
807155539Spjd	if (sc->sc_provider == NULL)
808155539Spjd		return (0);
809163888Spjd	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
810163888Spjd		return (0);
811155539Spjd	if (sc->sc_idle)
812155539Spjd		return (0);
813155539Spjd	if (sc->sc_writes > 0)
814155539Spjd		return (0);
815156610Spjd	if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) {
816155581Spjd		timeout = g_mirror_idletime - (time_uptime - sc->sc_last_write);
817245443Smav		if (!g_mirror_shutdown && timeout > 0)
818155539Spjd			return (timeout);
819155539Spjd	}
820137248Spjd	sc->sc_idle = 1;
821137248Spjd	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
822137248Spjd		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
823137248Spjd			continue;
824137248Spjd		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
825137248Spjd		    g_mirror_get_diskname(disk), sc->sc_name);
826137248Spjd		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
827137248Spjd		g_mirror_update_metadata(disk);
828137248Spjd	}
829155539Spjd	return (0);
830137248Spjd}
831137248Spjd
832137248Spjdstatic void
833137248Spjdg_mirror_unidle(struct g_mirror_softc *sc)
834137248Spjd{
835137248Spjd	struct g_mirror_disk *disk;
836137248Spjd
837156610Spjd	g_topology_assert_not();
838156610Spjd	sx_assert(&sc->sc_lock, SX_XLOCKED);
839156610Spjd
840163888Spjd	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
841163888Spjd		return;
842137248Spjd	sc->sc_idle = 0;
843155581Spjd	sc->sc_last_write = time_uptime;
844137248Spjd	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
845137248Spjd		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
846137248Spjd			continue;
847137248Spjd		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
848137248Spjd		    g_mirror_get_diskname(disk), sc->sc_name);
849137248Spjd		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
850137248Spjd		g_mirror_update_metadata(disk);
851137248Spjd	}
852137248Spjd}
853137248Spjd
854132904Spjdstatic void
855132904Spjdg_mirror_done(struct bio *bp)
856132904Spjd{
857132904Spjd	struct g_mirror_softc *sc;
858132904Spjd
859132904Spjd	sc = bp->bio_from->geom->softc;
860162282Spjd	bp->bio_cflags = G_MIRROR_BIO_FLAG_REGULAR;
861132904Spjd	mtx_lock(&sc->sc_queue_mtx);
862132904Spjd	bioq_disksort(&sc->sc_queue, bp);
863201566Smav	mtx_unlock(&sc->sc_queue_mtx);
864132904Spjd	wakeup(sc);
865132904Spjd}
866132904Spjd
867132904Spjdstatic void
868132904Spjdg_mirror_regular_request(struct bio *bp)
869132904Spjd{
870132904Spjd	struct g_mirror_softc *sc;
871132904Spjd	struct g_mirror_disk *disk;
872132904Spjd	struct bio *pbp;
873132904Spjd
874132904Spjd	g_topology_assert_not();
875132904Spjd
876132904Spjd	pbp = bp->bio_parent;
877132904Spjd	sc = pbp->bio_to->geom->softc;
878155539Spjd	bp->bio_from->index--;
879155539Spjd	if (bp->bio_cmd == BIO_WRITE)
880155539Spjd		sc->sc_writes--;
881132904Spjd	disk = bp->bio_from->private;
882132904Spjd	if (disk == NULL) {
883132904Spjd		g_topology_lock();
884132904Spjd		g_mirror_kill_consumer(sc, bp->bio_from);
885132904Spjd		g_topology_unlock();
886132904Spjd	}
887132904Spjd
888132904Spjd	pbp->bio_inbed++;
889132904Spjd	KASSERT(pbp->bio_inbed <= pbp->bio_children,
890132904Spjd	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
891132904Spjd	    pbp->bio_children));
892132904Spjd	if (bp->bio_error == 0 && pbp->bio_error == 0) {
893132904Spjd		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
894132904Spjd		g_destroy_bio(bp);
895132904Spjd		if (pbp->bio_children == pbp->bio_inbed) {
896132904Spjd			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
897132904Spjd			pbp->bio_completed = pbp->bio_length;
898237929Sglebius			if (pbp->bio_cmd == BIO_WRITE ||
899237929Sglebius			    pbp->bio_cmd == BIO_DELETE) {
900156610Spjd				bioq_remove(&sc->sc_inflight, pbp);
901156610Spjd				/* Release delayed sync requests if possible. */
902156610Spjd				g_mirror_sync_release(sc);
903156610Spjd			}
904132904Spjd			g_io_deliver(pbp, pbp->bio_error);
905132904Spjd		}
906132904Spjd		return;
907132904Spjd	} else if (bp->bio_error != 0) {
908132904Spjd		if (pbp->bio_error == 0)
909132904Spjd			pbp->bio_error = bp->bio_error;
910132904Spjd		if (disk != NULL) {
911155545Spjd			if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
912155545Spjd				disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
913155545Spjd				G_MIRROR_LOGREQ(0, bp,
914155545Spjd				    "Request failed (error=%d).",
915155545Spjd				    bp->bio_error);
916155545Spjd			} else {
917155545Spjd				G_MIRROR_LOGREQ(1, bp,
918155545Spjd				    "Request failed (error=%d).",
919155545Spjd				    bp->bio_error);
920155545Spjd			}
921155545Spjd			if (g_mirror_disconnect_on_failure &&
922155545Spjd			    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1)
923155545Spjd			{
924155545Spjd				sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
925155545Spjd				g_mirror_event_send(disk,
926155545Spjd				    G_MIRROR_DISK_STATE_DISCONNECTED,
927155545Spjd				    G_MIRROR_EVENT_DONTWAIT);
928155545Spjd			}
929132904Spjd		}
930132904Spjd		switch (pbp->bio_cmd) {
931132904Spjd		case BIO_DELETE:
932132904Spjd		case BIO_WRITE:
933132904Spjd			pbp->bio_inbed--;
934132904Spjd			pbp->bio_children--;
935132904Spjd			break;
936132904Spjd		}
937132904Spjd	}
938132904Spjd	g_destroy_bio(bp);
939132904Spjd
940132904Spjd	switch (pbp->bio_cmd) {
941132904Spjd	case BIO_READ:
942155545Spjd		if (pbp->bio_inbed < pbp->bio_children)
943155545Spjd			break;
944155545Spjd		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 1)
945155545Spjd			g_io_deliver(pbp, pbp->bio_error);
946155545Spjd		else {
947132904Spjd			pbp->bio_error = 0;
948132904Spjd			mtx_lock(&sc->sc_queue_mtx);
949132904Spjd			bioq_disksort(&sc->sc_queue, pbp);
950201566Smav			mtx_unlock(&sc->sc_queue_mtx);
951132904Spjd			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
952132904Spjd			wakeup(sc);
953132904Spjd		}
954132904Spjd		break;
955132904Spjd	case BIO_DELETE:
956132904Spjd	case BIO_WRITE:
957132904Spjd		if (pbp->bio_children == 0) {
958132904Spjd			/*
959132904Spjd			 * All requests failed.
960132904Spjd			 */
961132904Spjd		} else if (pbp->bio_inbed < pbp->bio_children) {
962132904Spjd			/* Do nothing. */
963132904Spjd			break;
964132904Spjd		} else if (pbp->bio_children == pbp->bio_inbed) {
965132904Spjd			/* Some requests succeeded. */
966132904Spjd			pbp->bio_error = 0;
967132904Spjd			pbp->bio_completed = pbp->bio_length;
968132904Spjd		}
969156610Spjd		bioq_remove(&sc->sc_inflight, pbp);
970156610Spjd		/* Release delayed sync requests if possible. */
971156610Spjd		g_mirror_sync_release(sc);
972132904Spjd		g_io_deliver(pbp, pbp->bio_error);
973132904Spjd		break;
974132904Spjd	default:
975132904Spjd		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
976132904Spjd		break;
977132904Spjd	}
978132904Spjd}
979132904Spjd
980132904Spjdstatic void
981132904Spjdg_mirror_sync_done(struct bio *bp)
982132904Spjd{
983132904Spjd	struct g_mirror_softc *sc;
984132904Spjd
985132904Spjd	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
986132904Spjd	sc = bp->bio_from->geom->softc;
987162282Spjd	bp->bio_cflags = G_MIRROR_BIO_FLAG_SYNC;
988132904Spjd	mtx_lock(&sc->sc_queue_mtx);
989132904Spjd	bioq_disksort(&sc->sc_queue, bp);
990201566Smav	mtx_unlock(&sc->sc_queue_mtx);
991132904Spjd	wakeup(sc);
992132904Spjd}
993132904Spjd
994132904Spjdstatic void
995156421Spjdg_mirror_kernel_dump(struct bio *bp)
996156421Spjd{
997156421Spjd	struct g_mirror_softc *sc;
998156421Spjd	struct g_mirror_disk *disk;
999156421Spjd	struct bio *cbp;
1000156421Spjd	struct g_kerneldump *gkd;
1001156421Spjd
1002156421Spjd	/*
1003156421Spjd	 * We configure dumping to the first component, because this component
1004156421Spjd	 * will be used for reading with 'prefer' balance algorithm.
1005156421Spjd	 * If the component with the higest priority is currently disconnected
1006156421Spjd	 * we will not be able to read the dump after the reboot if it will be
1007156421Spjd	 * connected and synchronized later. Can we do something better?
1008156421Spjd	 */
1009156421Spjd	sc = bp->bio_to->geom->softc;
1010156421Spjd	disk = LIST_FIRST(&sc->sc_disks);
1011156421Spjd
1012156421Spjd	gkd = (struct g_kerneldump *)bp->bio_data;
1013156421Spjd	if (gkd->length > bp->bio_to->mediasize)
1014156421Spjd		gkd->length = bp->bio_to->mediasize;
1015156421Spjd	cbp = g_clone_bio(bp);
1016156421Spjd	if (cbp == NULL) {
1017156421Spjd		g_io_deliver(bp, ENOMEM);
1018156421Spjd		return;
1019156421Spjd	}
1020156421Spjd	cbp->bio_done = g_std_done;
1021156421Spjd	g_io_request(cbp, disk->d_consumer);
1022156421Spjd	G_MIRROR_DEBUG(1, "Kernel dump will go to %s.",
1023156421Spjd	    g_mirror_get_diskname(disk));
1024156421Spjd}
1025156421Spjd
1026156421Spjdstatic void
1027163836Spjdg_mirror_flush(struct g_mirror_softc *sc, struct bio *bp)
1028163836Spjd{
1029163836Spjd	struct bio_queue_head queue;
1030163836Spjd	struct g_mirror_disk *disk;
1031163836Spjd	struct g_consumer *cp;
1032163836Spjd	struct bio *cbp;
1033163836Spjd
1034163836Spjd	bioq_init(&queue);
1035163836Spjd	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1036163836Spjd		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1037163836Spjd			continue;
1038163836Spjd		cbp = g_clone_bio(bp);
1039163836Spjd		if (cbp == NULL) {
1040163836Spjd			for (cbp = bioq_first(&queue); cbp != NULL;
1041163836Spjd			    cbp = bioq_first(&queue)) {
1042163836Spjd				bioq_remove(&queue, cbp);
1043163836Spjd				g_destroy_bio(cbp);
1044163836Spjd			}
1045163836Spjd			if (bp->bio_error == 0)
1046163836Spjd				bp->bio_error = ENOMEM;
1047163836Spjd			g_io_deliver(bp, bp->bio_error);
1048163836Spjd			return;
1049163836Spjd		}
1050163836Spjd		bioq_insert_tail(&queue, cbp);
1051163836Spjd		cbp->bio_done = g_std_done;
1052163836Spjd		cbp->bio_caller1 = disk;
1053163836Spjd		cbp->bio_to = disk->d_consumer->provider;
1054163836Spjd	}
1055163836Spjd	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
1056163836Spjd		bioq_remove(&queue, cbp);
1057163836Spjd		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1058163836Spjd		disk = cbp->bio_caller1;
1059163836Spjd		cbp->bio_caller1 = NULL;
1060163836Spjd		cp = disk->d_consumer;
1061163836Spjd		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1062163836Spjd		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1063163836Spjd		    cp->acr, cp->acw, cp->ace));
1064163836Spjd		g_io_request(cbp, disk->d_consumer);
1065163836Spjd	}
1066163836Spjd}
1067163836Spjd
1068163836Spjdstatic void
1069132904Spjdg_mirror_start(struct bio *bp)
1070132904Spjd{
1071132904Spjd	struct g_mirror_softc *sc;
1072132904Spjd
1073132904Spjd	sc = bp->bio_to->geom->softc;
1074132904Spjd	/*
1075132904Spjd	 * If sc == NULL or there are no valid disks, provider's error
1076132904Spjd	 * should be set and g_mirror_start() should not be called at all.
1077132904Spjd	 */
1078132904Spjd	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1079132904Spjd	    ("Provider's error should be set (error=%d)(mirror=%s).",
1080132904Spjd	    bp->bio_to->error, bp->bio_to->name));
1081132904Spjd	G_MIRROR_LOGREQ(3, bp, "Request received.");
1082132904Spjd
1083132904Spjd	switch (bp->bio_cmd) {
1084132904Spjd	case BIO_READ:
1085132904Spjd	case BIO_WRITE:
1086132904Spjd	case BIO_DELETE:
1087132904Spjd		break;
1088163836Spjd	case BIO_FLUSH:
1089163836Spjd		g_mirror_flush(sc, bp);
1090163836Spjd		return;
1091132904Spjd	case BIO_GETATTR:
1092237930Sglebius		if (g_handleattr_int(bp, "GEOM::candelete", 1))
1093237930Sglebius			return;
1094237930Sglebius		else if (strcmp("GEOM::kerneldump", bp->bio_attribute) == 0) {
1095156421Spjd			g_mirror_kernel_dump(bp);
1096156421Spjd			return;
1097156421Spjd		}
1098156421Spjd		/* FALLTHROUGH */
1099132904Spjd	default:
1100132904Spjd		g_io_deliver(bp, EOPNOTSUPP);
1101132904Spjd		return;
1102132904Spjd	}
1103132904Spjd	mtx_lock(&sc->sc_queue_mtx);
1104132904Spjd	bioq_disksort(&sc->sc_queue, bp);
1105201566Smav	mtx_unlock(&sc->sc_queue_mtx);
1106132904Spjd	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1107132904Spjd	wakeup(sc);
1108132904Spjd}
1109132904Spjd
1110132904Spjd/*
1111156610Spjd * Return TRUE if the given request is colliding with a in-progress
1112156610Spjd * synchronization request.
1113132904Spjd */
1114156610Spjdstatic int
1115156610Spjdg_mirror_sync_collision(struct g_mirror_softc *sc, struct bio *bp)
1116132904Spjd{
1117156610Spjd	struct g_mirror_disk *disk;
1118156610Spjd	struct bio *sbp;
1119156610Spjd	off_t rstart, rend, sstart, send;
1120156610Spjd	int i;
1121156610Spjd
1122156610Spjd	if (sc->sc_sync.ds_ndisks == 0)
1123156610Spjd		return (0);
1124156610Spjd	rstart = bp->bio_offset;
1125156610Spjd	rend = bp->bio_offset + bp->bio_length;
1126156610Spjd	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1127156610Spjd		if (disk->d_state != G_MIRROR_DISK_STATE_SYNCHRONIZING)
1128156610Spjd			continue;
1129156610Spjd		for (i = 0; i < g_mirror_syncreqs; i++) {
1130156610Spjd			sbp = disk->d_sync.ds_bios[i];
1131156610Spjd			if (sbp == NULL)
1132156610Spjd				continue;
1133156610Spjd			sstart = sbp->bio_offset;
1134156610Spjd			send = sbp->bio_offset + sbp->bio_length;
1135156610Spjd			if (rend > sstart && rstart < send)
1136156610Spjd				return (1);
1137156610Spjd		}
1138156610Spjd	}
1139156610Spjd	return (0);
1140156610Spjd}
1141156610Spjd
1142156610Spjd/*
1143156610Spjd * Return TRUE if the given sync request is colliding with a in-progress regular
1144156610Spjd * request.
1145156610Spjd */
1146156610Spjdstatic int
1147156610Spjdg_mirror_regular_collision(struct g_mirror_softc *sc, struct bio *sbp)
1148156610Spjd{
1149156610Spjd	off_t rstart, rend, sstart, send;
1150132904Spjd	struct bio *bp;
1151132904Spjd
1152156610Spjd	if (sc->sc_sync.ds_ndisks == 0)
1153156610Spjd		return (0);
1154156610Spjd	sstart = sbp->bio_offset;
1155156610Spjd	send = sbp->bio_offset + sbp->bio_length;
1156156610Spjd	TAILQ_FOREACH(bp, &sc->sc_inflight.queue, bio_queue) {
1157156610Spjd		rstart = bp->bio_offset;
1158156610Spjd		rend = bp->bio_offset + bp->bio_length;
1159156610Spjd		if (rend > sstart && rstart < send)
1160156610Spjd			return (1);
1161156610Spjd	}
1162156610Spjd	return (0);
1163156610Spjd}
1164132904Spjd
1165156610Spjd/*
1166156610Spjd * Puts request onto delayed queue.
1167156610Spjd */
1168156610Spjdstatic void
1169156610Spjdg_mirror_regular_delay(struct g_mirror_softc *sc, struct bio *bp)
1170156610Spjd{
1171156610Spjd
1172156610Spjd	G_MIRROR_LOGREQ(2, bp, "Delaying request.");
1173156610Spjd	bioq_insert_head(&sc->sc_regular_delayed, bp);
1174156610Spjd}
1175156610Spjd
1176156610Spjd/*
1177156610Spjd * Puts synchronization request onto delayed queue.
1178156610Spjd */
1179156610Spjdstatic void
1180156610Spjdg_mirror_sync_delay(struct g_mirror_softc *sc, struct bio *bp)
1181156610Spjd{
1182156610Spjd
1183156610Spjd	G_MIRROR_LOGREQ(2, bp, "Delaying synchronization request.");
1184156610Spjd	bioq_insert_tail(&sc->sc_sync_delayed, bp);
1185156610Spjd}
1186156610Spjd
1187156610Spjd/*
1188156610Spjd * Releases delayed regular requests which don't collide anymore with sync
1189156610Spjd * requests.
1190156610Spjd */
1191156610Spjdstatic void
1192156610Spjdg_mirror_regular_release(struct g_mirror_softc *sc)
1193156610Spjd{
1194156610Spjd	struct bio *bp, *bp2;
1195156610Spjd
1196156610Spjd	TAILQ_FOREACH_SAFE(bp, &sc->sc_regular_delayed.queue, bio_queue, bp2) {
1197156610Spjd		if (g_mirror_sync_collision(sc, bp))
1198156610Spjd			continue;
1199156610Spjd		bioq_remove(&sc->sc_regular_delayed, bp);
1200156610Spjd		G_MIRROR_LOGREQ(2, bp, "Releasing delayed request (%p).", bp);
1201156610Spjd		mtx_lock(&sc->sc_queue_mtx);
1202156610Spjd		bioq_insert_head(&sc->sc_queue, bp);
1203156610Spjd#if 0
1204156610Spjd		/*
1205156610Spjd		 * wakeup() is not needed, because this function is called from
1206156610Spjd		 * the worker thread.
1207156610Spjd		 */
1208156610Spjd		wakeup(&sc->sc_queue);
1209156610Spjd#endif
1210156610Spjd		mtx_unlock(&sc->sc_queue_mtx);
1211132904Spjd	}
1212132904Spjd}
1213132904Spjd
1214156610Spjd/*
1215156610Spjd * Releases delayed sync requests which don't collide anymore with regular
1216156610Spjd * requests.
1217156610Spjd */
1218132904Spjdstatic void
1219156610Spjdg_mirror_sync_release(struct g_mirror_softc *sc)
1220156610Spjd{
1221156610Spjd	struct bio *bp, *bp2;
1222156610Spjd
1223156610Spjd	TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed.queue, bio_queue, bp2) {
1224156610Spjd		if (g_mirror_regular_collision(sc, bp))
1225156610Spjd			continue;
1226156610Spjd		bioq_remove(&sc->sc_sync_delayed, bp);
1227156610Spjd		G_MIRROR_LOGREQ(2, bp,
1228156610Spjd		    "Releasing delayed synchronization request.");
1229156610Spjd		g_io_request(bp, bp->bio_from);
1230156610Spjd	}
1231156610Spjd}
1232156610Spjd
1233156610Spjd/*
1234156610Spjd * Handle synchronization requests.
1235156610Spjd * Every synchronization request is two-steps process: first, READ request is
1236156610Spjd * send to active provider and then WRITE request (with read data) to the provider
1237156610Spjd * beeing synchronized. When WRITE is finished, new synchronization request is
1238156610Spjd * send.
1239156610Spjd */
1240156610Spjdstatic void
1241132904Spjdg_mirror_sync_request(struct bio *bp)
1242132904Spjd{
1243132904Spjd	struct g_mirror_softc *sc;
1244132904Spjd	struct g_mirror_disk *disk;
1245132904Spjd
1246137248Spjd	bp->bio_from->index--;
1247132904Spjd	sc = bp->bio_from->geom->softc;
1248132904Spjd	disk = bp->bio_from->private;
1249132904Spjd	if (disk == NULL) {
1250156610Spjd		sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
1251132904Spjd		g_topology_lock();
1252132904Spjd		g_mirror_kill_consumer(sc, bp->bio_from);
1253132904Spjd		g_topology_unlock();
1254156610Spjd		free(bp->bio_data, M_MIRROR);
1255132904Spjd		g_destroy_bio(bp);
1256156610Spjd		sx_xlock(&sc->sc_lock);
1257132904Spjd		return;
1258132904Spjd	}
1259132904Spjd
1260132904Spjd	/*
1261132904Spjd	 * Synchronization request.
1262132904Spjd	 */
1263132904Spjd	switch (bp->bio_cmd) {
1264132904Spjd	case BIO_READ:
1265132904Spjd	    {
1266132904Spjd		struct g_consumer *cp;
1267132904Spjd
1268132904Spjd		if (bp->bio_error != 0) {
1269132904Spjd			G_MIRROR_LOGREQ(0, bp,
1270132904Spjd			    "Synchronization request failed (error=%d).",
1271132904Spjd			    bp->bio_error);
1272132904Spjd			g_destroy_bio(bp);
1273132904Spjd			return;
1274132904Spjd		}
1275137248Spjd		G_MIRROR_LOGREQ(3, bp,
1276137248Spjd		    "Synchronization request half-finished.");
1277132904Spjd		bp->bio_cmd = BIO_WRITE;
1278133142Spjd		bp->bio_cflags = 0;
1279132904Spjd		cp = disk->d_consumer;
1280156610Spjd		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1281132904Spjd		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1282132904Spjd		    cp->acr, cp->acw, cp->ace));
1283137248Spjd		cp->index++;
1284132904Spjd		g_io_request(bp, cp);
1285132904Spjd		return;
1286132904Spjd	    }
1287132904Spjd	case BIO_WRITE:
1288135833Spjd	    {
1289135833Spjd		struct g_mirror_disk_sync *sync;
1290156610Spjd		off_t offset;
1291156610Spjd		void *data;
1292156610Spjd		int i;
1293135833Spjd
1294132904Spjd		if (bp->bio_error != 0) {
1295132904Spjd			G_MIRROR_LOGREQ(0, bp,
1296132904Spjd			    "Synchronization request failed (error=%d).",
1297132904Spjd			    bp->bio_error);
1298132904Spjd			g_destroy_bio(bp);
1299139670Spjd			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
1300132904Spjd			g_mirror_event_send(disk,
1301132904Spjd			    G_MIRROR_DISK_STATE_DISCONNECTED,
1302132904Spjd			    G_MIRROR_EVENT_DONTWAIT);
1303132904Spjd			return;
1304132904Spjd		}
1305132904Spjd		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
1306135833Spjd		sync = &disk->d_sync;
1307156610Spjd		if (sync->ds_offset == sc->sc_mediasize ||
1308156610Spjd		    sync->ds_consumer == NULL ||
1309156610Spjd		    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1310156610Spjd			/* Don't send more synchronization requests. */
1311156610Spjd			sync->ds_inflight--;
1312156610Spjd			if (sync->ds_bios != NULL) {
1313156684Sru				i = (int)(uintptr_t)bp->bio_caller1;
1314156610Spjd				sync->ds_bios[i] = NULL;
1315156610Spjd			}
1316156610Spjd			free(bp->bio_data, M_MIRROR);
1317156610Spjd			g_destroy_bio(bp);
1318156610Spjd			if (sync->ds_inflight > 0)
1319156610Spjd				return;
1320156610Spjd			if (sync->ds_consumer == NULL ||
1321156610Spjd			    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1322156610Spjd				return;
1323156610Spjd			}
1324156610Spjd			/* Disk up-to-date, activate it. */
1325132904Spjd			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
1326132904Spjd			    G_MIRROR_EVENT_DONTWAIT);
1327132904Spjd			return;
1328156610Spjd		}
1329156610Spjd
1330156610Spjd		/* Send next synchronization request. */
1331156610Spjd		data = bp->bio_data;
1332156610Spjd		bzero(bp, sizeof(*bp));
1333156610Spjd		bp->bio_cmd = BIO_READ;
1334156610Spjd		bp->bio_offset = sync->ds_offset;
1335156610Spjd		bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
1336156610Spjd		sync->ds_offset += bp->bio_length;
1337156610Spjd		bp->bio_done = g_mirror_sync_done;
1338156610Spjd		bp->bio_data = data;
1339156610Spjd		bp->bio_from = sync->ds_consumer;
1340156610Spjd		bp->bio_to = sc->sc_provider;
1341156610Spjd		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
1342156610Spjd		sync->ds_consumer->index++;
1343156610Spjd		/*
1344156610Spjd		 * Delay the request if it is colliding with a regular request.
1345156610Spjd		 */
1346156610Spjd		if (g_mirror_regular_collision(sc, bp))
1347156610Spjd			g_mirror_sync_delay(sc, bp);
1348156610Spjd		else
1349156610Spjd			g_io_request(bp, sync->ds_consumer);
1350156610Spjd
1351156610Spjd		/* Release delayed requests if possible. */
1352156610Spjd		g_mirror_regular_release(sc);
1353156610Spjd
1354156610Spjd		/* Find the smallest offset */
1355156610Spjd		offset = sc->sc_mediasize;
1356156610Spjd		for (i = 0; i < g_mirror_syncreqs; i++) {
1357156610Spjd			bp = sync->ds_bios[i];
1358156610Spjd			if (bp->bio_offset < offset)
1359156610Spjd				offset = bp->bio_offset;
1360156610Spjd		}
1361156610Spjd		if (sync->ds_offset_done + (MAXPHYS * 100) < offset) {
1362156610Spjd			/* Update offset_done on every 100 blocks. */
1363156610Spjd			sync->ds_offset_done = offset;
1364132904Spjd			g_mirror_update_metadata(disk);
1365132904Spjd		}
1366132904Spjd		return;
1367135833Spjd	    }
1368132904Spjd	default:
1369132904Spjd		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1370132904Spjd		    bp->bio_cmd, sc->sc_name));
1371132904Spjd		break;
1372132904Spjd	}
1373132904Spjd}
1374132904Spjd
1375132904Spjdstatic void
1376133115Spjdg_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
1377133115Spjd{
1378133115Spjd	struct g_mirror_disk *disk;
1379133115Spjd	struct g_consumer *cp;
1380133115Spjd	struct bio *cbp;
1381133115Spjd
1382133115Spjd	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1383133115Spjd		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
1384133115Spjd			break;
1385133115Spjd	}
1386133115Spjd	if (disk == NULL) {
1387133115Spjd		if (bp->bio_error == 0)
1388133115Spjd			bp->bio_error = ENXIO;
1389133115Spjd		g_io_deliver(bp, bp->bio_error);
1390133115Spjd		return;
1391133115Spjd	}
1392133115Spjd	cbp = g_clone_bio(bp);
1393133115Spjd	if (cbp == NULL) {
1394133115Spjd		if (bp->bio_error == 0)
1395133115Spjd			bp->bio_error = ENOMEM;
1396133115Spjd		g_io_deliver(bp, bp->bio_error);
1397133115Spjd		return;
1398133115Spjd	}
1399133115Spjd	/*
1400133115Spjd	 * Fill in the component buf structure.
1401133115Spjd	 */
1402133115Spjd	cp = disk->d_consumer;
1403133115Spjd	cbp->bio_done = g_mirror_done;
1404133115Spjd	cbp->bio_to = cp->provider;
1405133115Spjd	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1406156610Spjd	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1407133115Spjd	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1408133115Spjd	    cp->acw, cp->ace));
1409137248Spjd	cp->index++;
1410133115Spjd	g_io_request(cbp, cp);
1411133115Spjd}
1412133115Spjd
1413133115Spjdstatic void
1414132904Spjdg_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
1415132904Spjd{
1416132904Spjd	struct g_mirror_disk *disk;
1417132904Spjd	struct g_consumer *cp;
1418132904Spjd	struct bio *cbp;
1419132904Spjd
1420132904Spjd	disk = g_mirror_get_disk(sc);
1421132904Spjd	if (disk == NULL) {
1422132904Spjd		if (bp->bio_error == 0)
1423132904Spjd			bp->bio_error = ENXIO;
1424132904Spjd		g_io_deliver(bp, bp->bio_error);
1425132904Spjd		return;
1426132904Spjd	}
1427132904Spjd	cbp = g_clone_bio(bp);
1428132904Spjd	if (cbp == NULL) {
1429132904Spjd		if (bp->bio_error == 0)
1430132904Spjd			bp->bio_error = ENOMEM;
1431132904Spjd		g_io_deliver(bp, bp->bio_error);
1432132904Spjd		return;
1433132904Spjd	}
1434132904Spjd	/*
1435132904Spjd	 * Fill in the component buf structure.
1436132904Spjd	 */
1437132904Spjd	cp = disk->d_consumer;
1438132904Spjd	cbp->bio_done = g_mirror_done;
1439132904Spjd	cbp->bio_to = cp->provider;
1440132904Spjd	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1441156610Spjd	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1442132904Spjd	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1443132904Spjd	    cp->acw, cp->ace));
1444137248Spjd	cp->index++;
1445132904Spjd	g_io_request(cbp, cp);
1446132904Spjd}
1447132904Spjd
1448200086Smav#define TRACK_SIZE  (1 * 1024 * 1024)
1449200086Smav#define LOAD_SCALE	256
1450200086Smav#define ABS(x)		(((x) >= 0) ? (x) : (-(x)))
1451200086Smav
1452132904Spjdstatic void
1453132904Spjdg_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
1454132904Spjd{
1455132904Spjd	struct g_mirror_disk *disk, *dp;
1456132904Spjd	struct g_consumer *cp;
1457132904Spjd	struct bio *cbp;
1458200086Smav	int prio, best;
1459132904Spjd
1460200086Smav	/* Find a disk with the smallest load. */
1461132904Spjd	disk = NULL;
1462200086Smav	best = INT_MAX;
1463132904Spjd	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1464132904Spjd		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1465132904Spjd			continue;
1466200086Smav		prio = dp->load;
1467200086Smav		/* If disk head is precisely in position - highly prefer it. */
1468200086Smav		if (dp->d_last_offset == bp->bio_offset)
1469200086Smav			prio -= 2 * LOAD_SCALE;
1470200086Smav		else
1471200086Smav		/* If disk head is close to position - prefer it. */
1472200086Smav		if (ABS(dp->d_last_offset - bp->bio_offset) < TRACK_SIZE)
1473200086Smav			prio -= 1 * LOAD_SCALE;
1474200086Smav		if (prio <= best) {
1475132904Spjd			disk = dp;
1476200086Smav			best = prio;
1477132904Spjd		}
1478132904Spjd	}
1479146110Spjd	KASSERT(disk != NULL, ("NULL disk for %s.", sc->sc_name));
1480132904Spjd	cbp = g_clone_bio(bp);
1481132904Spjd	if (cbp == NULL) {
1482132904Spjd		if (bp->bio_error == 0)
1483132904Spjd			bp->bio_error = ENOMEM;
1484132904Spjd		g_io_deliver(bp, bp->bio_error);
1485132904Spjd		return;
1486132904Spjd	}
1487132904Spjd	/*
1488132904Spjd	 * Fill in the component buf structure.
1489132904Spjd	 */
1490132904Spjd	cp = disk->d_consumer;
1491132904Spjd	cbp->bio_done = g_mirror_done;
1492132904Spjd	cbp->bio_to = cp->provider;
1493132904Spjd	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1494156610Spjd	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1495132904Spjd	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1496132904Spjd	    cp->acw, cp->ace));
1497137248Spjd	cp->index++;
1498200086Smav	/* Remember last head position */
1499200086Smav	disk->d_last_offset = bp->bio_offset + bp->bio_length;
1500200086Smav	/* Update loads. */
1501200086Smav	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1502200086Smav		dp->load = (dp->d_consumer->index * LOAD_SCALE +
1503200086Smav		    dp->load * 7) / 8;
1504200086Smav	}
1505132904Spjd	g_io_request(cbp, cp);
1506132904Spjd}
1507132904Spjd
1508132904Spjdstatic void
1509132904Spjdg_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
1510132904Spjd{
1511132904Spjd	struct bio_queue_head queue;
1512132904Spjd	struct g_mirror_disk *disk;
1513132904Spjd	struct g_consumer *cp;
1514132904Spjd	struct bio *cbp;
1515132904Spjd	off_t left, mod, offset, slice;
1516132904Spjd	u_char *data;
1517132904Spjd	u_int ndisks;
1518132904Spjd
1519132904Spjd	if (bp->bio_length <= sc->sc_slice) {
1520132904Spjd		g_mirror_request_round_robin(sc, bp);
1521132904Spjd		return;
1522132904Spjd	}
1523132904Spjd	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
1524132904Spjd	slice = bp->bio_length / ndisks;
1525132904Spjd	mod = slice % sc->sc_provider->sectorsize;
1526132904Spjd	if (mod != 0)
1527132904Spjd		slice += sc->sc_provider->sectorsize - mod;
1528132904Spjd	/*
1529132904Spjd	 * Allocate all bios before sending any request, so we can
1530132904Spjd	 * return ENOMEM in nice and clean way.
1531132904Spjd	 */
1532132904Spjd	left = bp->bio_length;
1533132904Spjd	offset = bp->bio_offset;
1534132904Spjd	data = bp->bio_data;
1535132904Spjd	bioq_init(&queue);
1536132904Spjd	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1537132904Spjd		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1538132904Spjd			continue;
1539132904Spjd		cbp = g_clone_bio(bp);
1540132904Spjd		if (cbp == NULL) {
1541132904Spjd			for (cbp = bioq_first(&queue); cbp != NULL;
1542132904Spjd			    cbp = bioq_first(&queue)) {
1543132904Spjd				bioq_remove(&queue, cbp);
1544132904Spjd				g_destroy_bio(cbp);
1545132904Spjd			}
1546132904Spjd			if (bp->bio_error == 0)
1547132904Spjd				bp->bio_error = ENOMEM;
1548132904Spjd			g_io_deliver(bp, bp->bio_error);
1549132904Spjd			return;
1550132904Spjd		}
1551132904Spjd		bioq_insert_tail(&queue, cbp);
1552132904Spjd		cbp->bio_done = g_mirror_done;
1553132904Spjd		cbp->bio_caller1 = disk;
1554132904Spjd		cbp->bio_to = disk->d_consumer->provider;
1555132904Spjd		cbp->bio_offset = offset;
1556132904Spjd		cbp->bio_data = data;
1557132904Spjd		cbp->bio_length = MIN(left, slice);
1558132904Spjd		left -= cbp->bio_length;
1559132904Spjd		if (left == 0)
1560132904Spjd			break;
1561132904Spjd		offset += cbp->bio_length;
1562132904Spjd		data += cbp->bio_length;
1563132904Spjd	}
1564132904Spjd	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
1565132904Spjd		bioq_remove(&queue, cbp);
1566132904Spjd		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1567132904Spjd		disk = cbp->bio_caller1;
1568132904Spjd		cbp->bio_caller1 = NULL;
1569132904Spjd		cp = disk->d_consumer;
1570156610Spjd		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1571132904Spjd		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1572132904Spjd		    cp->acr, cp->acw, cp->ace));
1573137248Spjd		disk->d_consumer->index++;
1574132904Spjd		g_io_request(cbp, disk->d_consumer);
1575132904Spjd	}
1576132904Spjd}
1577132904Spjd
1578132904Spjdstatic void
1579132904Spjdg_mirror_register_request(struct bio *bp)
1580132904Spjd{
1581132904Spjd	struct g_mirror_softc *sc;
1582132904Spjd
1583132904Spjd	sc = bp->bio_to->geom->softc;
1584132904Spjd	switch (bp->bio_cmd) {
1585132904Spjd	case BIO_READ:
1586132904Spjd		switch (sc->sc_balance) {
1587133115Spjd		case G_MIRROR_BALANCE_LOAD:
1588133115Spjd			g_mirror_request_load(sc, bp);
1589133115Spjd			break;
1590133115Spjd		case G_MIRROR_BALANCE_PREFER:
1591133115Spjd			g_mirror_request_prefer(sc, bp);
1592133115Spjd			break;
1593132904Spjd		case G_MIRROR_BALANCE_ROUND_ROBIN:
1594132904Spjd			g_mirror_request_round_robin(sc, bp);
1595132904Spjd			break;
1596132904Spjd		case G_MIRROR_BALANCE_SPLIT:
1597132904Spjd			g_mirror_request_split(sc, bp);
1598132904Spjd			break;
1599132904Spjd		}
1600132904Spjd		return;
1601132904Spjd	case BIO_WRITE:
1602132904Spjd	case BIO_DELETE:
1603132904Spjd	    {
1604132904Spjd		struct g_mirror_disk *disk;
1605135833Spjd		struct g_mirror_disk_sync *sync;
1606132904Spjd		struct bio_queue_head queue;
1607132904Spjd		struct g_consumer *cp;
1608132904Spjd		struct bio *cbp;
1609132904Spjd
1610156610Spjd		/*
1611156610Spjd		 * Delay the request if it is colliding with a synchronization
1612156610Spjd		 * request.
1613156610Spjd		 */
1614156610Spjd		if (g_mirror_sync_collision(sc, bp)) {
1615156610Spjd			g_mirror_regular_delay(sc, bp);
1616156610Spjd			return;
1617156610Spjd		}
1618156610Spjd
1619137248Spjd		if (sc->sc_idle)
1620137248Spjd			g_mirror_unidle(sc);
1621155539Spjd		else
1622155581Spjd			sc->sc_last_write = time_uptime;
1623155539Spjd
1624132904Spjd		/*
1625132904Spjd		 * Allocate all bios before sending any request, so we can
1626132904Spjd		 * return ENOMEM in nice and clean way.
1627132904Spjd		 */
1628132904Spjd		bioq_init(&queue);
1629132904Spjd		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1630135833Spjd			sync = &disk->d_sync;
1631132904Spjd			switch (disk->d_state) {
1632132904Spjd			case G_MIRROR_DISK_STATE_ACTIVE:
1633132904Spjd				break;
1634132904Spjd			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1635135833Spjd				if (bp->bio_offset >= sync->ds_offset)
1636132904Spjd					continue;
1637132904Spjd				break;
1638132904Spjd			default:
1639132904Spjd				continue;
1640132904Spjd			}
1641237930Sglebius			if (bp->bio_cmd == BIO_DELETE &&
1642237930Sglebius			    (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE) == 0)
1643237930Sglebius				continue;
1644132904Spjd			cbp = g_clone_bio(bp);
1645132904Spjd			if (cbp == NULL) {
1646132904Spjd				for (cbp = bioq_first(&queue); cbp != NULL;
1647132904Spjd				    cbp = bioq_first(&queue)) {
1648132904Spjd					bioq_remove(&queue, cbp);
1649132904Spjd					g_destroy_bio(cbp);
1650132904Spjd				}
1651132904Spjd				if (bp->bio_error == 0)
1652132904Spjd					bp->bio_error = ENOMEM;
1653132904Spjd				g_io_deliver(bp, bp->bio_error);
1654132904Spjd				return;
1655132904Spjd			}
1656132904Spjd			bioq_insert_tail(&queue, cbp);
1657135831Spjd			cbp->bio_done = g_mirror_done;
1658132904Spjd			cp = disk->d_consumer;
1659135831Spjd			cbp->bio_caller1 = cp;
1660132904Spjd			cbp->bio_to = cp->provider;
1661156610Spjd			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1662132904Spjd			    ("Consumer %s not opened (r%dw%de%d).",
1663132904Spjd			    cp->provider->name, cp->acr, cp->acw, cp->ace));
1664135831Spjd		}
1665135831Spjd		for (cbp = bioq_first(&queue); cbp != NULL;
1666135831Spjd		    cbp = bioq_first(&queue)) {
1667135831Spjd			bioq_remove(&queue, cbp);
1668135831Spjd			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1669135831Spjd			cp = cbp->bio_caller1;
1670135831Spjd			cbp->bio_caller1 = NULL;
1671137248Spjd			cp->index++;
1672155539Spjd			sc->sc_writes++;
1673132904Spjd			g_io_request(cbp, cp);
1674132904Spjd		}
1675132904Spjd		/*
1676156610Spjd		 * Put request onto inflight queue, so we can check if new
1677156610Spjd		 * synchronization requests don't collide with it.
1678156610Spjd		 */
1679156610Spjd		bioq_insert_tail(&sc->sc_inflight, bp);
1680156610Spjd		/*
1681132904Spjd		 * Bump syncid on first write.
1682132904Spjd		 */
1683139670Spjd		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) {
1684139213Spjd			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
1685139051Spjd			g_mirror_bump_syncid(sc);
1686132904Spjd		}
1687132904Spjd		return;
1688132904Spjd	    }
1689132904Spjd	default:
1690132904Spjd		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1691132904Spjd		    bp->bio_cmd, sc->sc_name));
1692132904Spjd		break;
1693132904Spjd	}
1694132904Spjd}
1695132904Spjd
1696133484Spjdstatic int
1697133484Spjdg_mirror_can_destroy(struct g_mirror_softc *sc)
1698133484Spjd{
1699133484Spjd	struct g_geom *gp;
1700133484Spjd	struct g_consumer *cp;
1701133484Spjd
1702133484Spjd	g_topology_assert();
1703133484Spjd	gp = sc->sc_geom;
1704158112Spjd	if (gp->softc == NULL)
1705158112Spjd		return (1);
1706235599Sae	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_TASTING) != 0)
1707235599Sae		return (0);
1708133484Spjd	LIST_FOREACH(cp, &gp->consumer, consumer) {
1709133484Spjd		if (g_mirror_is_busy(sc, cp))
1710133484Spjd			return (0);
1711133484Spjd	}
1712133484Spjd	gp = sc->sc_sync.ds_geom;
1713133484Spjd	LIST_FOREACH(cp, &gp->consumer, consumer) {
1714133484Spjd		if (g_mirror_is_busy(sc, cp))
1715133484Spjd			return (0);
1716133484Spjd	}
1717133484Spjd	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
1718133484Spjd	    sc->sc_name);
1719133484Spjd	return (1);
1720133484Spjd}
1721133484Spjd
1722133484Spjdstatic int
1723133484Spjdg_mirror_try_destroy(struct g_mirror_softc *sc)
1724133484Spjd{
1725133484Spjd
1726146616Spjd	if (sc->sc_rootmount != NULL) {
1727146616Spjd		G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
1728146616Spjd		    sc->sc_rootmount);
1729146616Spjd		root_mount_rel(sc->sc_rootmount);
1730146616Spjd		sc->sc_rootmount = NULL;
1731146616Spjd	}
1732139213Spjd	g_topology_lock();
1733139213Spjd	if (!g_mirror_can_destroy(sc)) {
1734139213Spjd		g_topology_unlock();
1735139213Spjd		return (0);
1736139213Spjd	}
1737158112Spjd	sc->sc_geom->softc = NULL;
1738158112Spjd	sc->sc_sync.ds_geom->softc = NULL;
1739133484Spjd	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WAIT) != 0) {
1740133484Spjd		g_topology_unlock();
1741133484Spjd		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1742133484Spjd		    &sc->sc_worker);
1743156610Spjd		/* Unlock sc_lock here, as it can be destroyed after wakeup. */
1744156610Spjd		sx_xunlock(&sc->sc_lock);
1745133484Spjd		wakeup(&sc->sc_worker);
1746133484Spjd		sc->sc_worker = NULL;
1747133484Spjd	} else {
1748156610Spjd		g_topology_unlock();
1749133484Spjd		g_mirror_destroy_device(sc);
1750133484Spjd		free(sc, M_MIRROR);
1751133484Spjd	}
1752133484Spjd	return (1);
1753133484Spjd}
1754133484Spjd
1755132904Spjd/*
1756132904Spjd * Worker thread.
1757132904Spjd */
1758132904Spjdstatic void
1759132904Spjdg_mirror_worker(void *arg)
1760132904Spjd{
1761132904Spjd	struct g_mirror_softc *sc;
1762132904Spjd	struct g_mirror_event *ep;
1763132904Spjd	struct bio *bp;
1764155539Spjd	int timeout;
1765132904Spjd
1766132904Spjd	sc = arg;
1767170307Sjeff	thread_lock(curthread);
1768139451Sjhb	sched_prio(curthread, PRIBIO);
1769170307Sjeff	thread_unlock(curthread);
1770132904Spjd
1771156610Spjd	sx_xlock(&sc->sc_lock);
1772132904Spjd	for (;;) {
1773132904Spjd		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
1774132904Spjd		/*
1775132904Spjd		 * First take a look at events.
1776132904Spjd		 * This is important to handle events before any I/O requests.
1777132904Spjd		 */
1778132904Spjd		ep = g_mirror_event_get(sc);
1779156610Spjd		if (ep != NULL) {
1780139140Spjd			g_mirror_event_remove(sc, ep);
1781132904Spjd			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
1782132904Spjd				/* Update only device status. */
1783132904Spjd				G_MIRROR_DEBUG(3,
1784132904Spjd				    "Running event for device %s.",
1785132904Spjd				    sc->sc_name);
1786132904Spjd				ep->e_error = 0;
1787139051Spjd				g_mirror_update_device(sc, 1);
1788132904Spjd			} else {
1789132904Spjd				/* Update disk status. */
1790132904Spjd				G_MIRROR_DEBUG(3, "Running event for disk %s.",
1791132904Spjd				     g_mirror_get_diskname(ep->e_disk));
1792132904Spjd				ep->e_error = g_mirror_update_disk(ep->e_disk,
1793139051Spjd				    ep->e_state);
1794132904Spjd				if (ep->e_error == 0)
1795139051Spjd					g_mirror_update_device(sc, 0);
1796132904Spjd			}
1797132904Spjd			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
1798132904Spjd				KASSERT(ep->e_error == 0,
1799132904Spjd				    ("Error cannot be handled."));
1800132904Spjd				g_mirror_event_free(ep);
1801132904Spjd			} else {
1802132904Spjd				ep->e_flags |= G_MIRROR_EVENT_DONE;
1803132904Spjd				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1804132904Spjd				    ep);
1805132904Spjd				mtx_lock(&sc->sc_events_mtx);
1806132904Spjd				wakeup(ep);
1807132904Spjd				mtx_unlock(&sc->sc_events_mtx);
1808132904Spjd			}
1809132904Spjd			if ((sc->sc_flags &
1810132904Spjd			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1811156610Spjd				if (g_mirror_try_destroy(sc)) {
1812156610Spjd					curthread->td_pflags &= ~TDP_GEOM;
1813156610Spjd					G_MIRROR_DEBUG(1, "Thread exiting.");
1814172836Sjulian					kproc_exit(0);
1815156610Spjd				}
1816132904Spjd			}
1817132904Spjd			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
1818132904Spjd			continue;
1819132904Spjd		}
1820132904Spjd		/*
1821155539Spjd		 * Check if we can mark array as CLEAN and if we can't take
1822155539Spjd		 * how much seconds should we wait.
1823155539Spjd		 */
1824156610Spjd		timeout = g_mirror_idle(sc, -1);
1825155539Spjd		/*
1826132904Spjd		 * Now I/O requests.
1827132904Spjd		 */
1828132904Spjd		/* Get first request from the queue. */
1829132904Spjd		mtx_lock(&sc->sc_queue_mtx);
1830132904Spjd		bp = bioq_first(&sc->sc_queue);
1831132904Spjd		if (bp == NULL) {
1832132904Spjd			if ((sc->sc_flags &
1833132904Spjd			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1834132904Spjd				mtx_unlock(&sc->sc_queue_mtx);
1835156610Spjd				if (g_mirror_try_destroy(sc)) {
1836156610Spjd					curthread->td_pflags &= ~TDP_GEOM;
1837156610Spjd					G_MIRROR_DEBUG(1, "Thread exiting.");
1838172836Sjulian					kproc_exit(0);
1839156610Spjd				}
1840133484Spjd				mtx_lock(&sc->sc_queue_mtx);
1841132904Spjd			}
1842156610Spjd			sx_xunlock(&sc->sc_lock);
1843158116Spjd			/*
1844158116Spjd			 * XXX: We can miss an event here, because an event
1845158116Spjd			 *      can be added without sx-device-lock and without
1846158116Spjd			 *      mtx-queue-lock. Maybe I should just stop using
1847158116Spjd			 *      dedicated mutex for events synchronization and
1848158116Spjd			 *      stick with the queue lock?
1849158116Spjd			 *      The event will hang here until next I/O request
1850158116Spjd			 *      or next event is received.
1851158116Spjd			 */
1852155539Spjd			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1",
1853155539Spjd			    timeout * hz);
1854156610Spjd			sx_xlock(&sc->sc_lock);
1855155539Spjd			G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
1856132904Spjd			continue;
1857132904Spjd		}
1858132904Spjd		bioq_remove(&sc->sc_queue, bp);
1859132904Spjd		mtx_unlock(&sc->sc_queue_mtx);
1860132904Spjd
1861162282Spjd		if (bp->bio_from->geom == sc->sc_sync.ds_geom &&
1862162282Spjd		    (bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
1863162282Spjd			g_mirror_sync_request(bp);	/* READ */
1864162282Spjd		} else if (bp->bio_to != sc->sc_provider) {
1865161116Spjd			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0)
1866161116Spjd				g_mirror_regular_request(bp);
1867161116Spjd			else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
1868162282Spjd				g_mirror_sync_request(bp);	/* WRITE */
1869161116Spjd			else {
1870161116Spjd				KASSERT(0,
1871161116Spjd				    ("Invalid request cflags=0x%hhx to=%s.",
1872161116Spjd				    bp->bio_cflags, bp->bio_to->name));
1873161116Spjd			}
1874161116Spjd		} else {
1875132904Spjd			g_mirror_register_request(bp);
1876161116Spjd		}
1877139140Spjd		G_MIRROR_DEBUG(5, "%s: I'm here 9.", __func__);
1878132904Spjd	}
1879132904Spjd}
1880132904Spjd
1881132904Spjdstatic void
1882155539Spjdg_mirror_update_idle(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
1883132904Spjd{
1884132904Spjd
1885156610Spjd	sx_assert(&sc->sc_lock, SX_LOCKED);
1886156610Spjd
1887163888Spjd	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
1888163888Spjd		return;
1889155539Spjd	if (!sc->sc_idle && (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
1890155539Spjd		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
1891156610Spjd		    g_mirror_get_diskname(disk), sc->sc_name);
1892155539Spjd		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1893155539Spjd	} else if (sc->sc_idle &&
1894155539Spjd	    (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
1895155539Spjd		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
1896156610Spjd		    g_mirror_get_diskname(disk), sc->sc_name);
1897155539Spjd		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1898132904Spjd	}
1899132904Spjd}
1900132904Spjd
1901132904Spjdstatic void
1902132904Spjdg_mirror_sync_start(struct g_mirror_disk *disk)
1903132904Spjd{
1904132904Spjd	struct g_mirror_softc *sc;
1905156610Spjd	struct g_consumer *cp;
1906156610Spjd	struct bio *bp;
1907156610Spjd	int error, i;
1908132904Spjd
1909156610Spjd	g_topology_assert_not();
1910156610Spjd	sc = disk->d_softc;
1911156610Spjd	sx_assert(&sc->sc_lock, SX_LOCKED);
1912132904Spjd
1913156610Spjd	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
1914156610Spjd	    ("Disk %s is not marked for synchronization.",
1915156610Spjd	    g_mirror_get_diskname(disk)));
1916132904Spjd	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1917132904Spjd	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
1918132904Spjd	    sc->sc_state));
1919132904Spjd
1920156610Spjd	sx_xunlock(&sc->sc_lock);
1921156610Spjd	g_topology_lock();
1922156610Spjd	cp = g_new_consumer(sc->sc_sync.ds_geom);
1923156610Spjd	error = g_attach(cp, sc->sc_provider);
1924156610Spjd	KASSERT(error == 0,
1925156610Spjd	    ("Cannot attach to %s (error=%d).", sc->sc_name, error));
1926156610Spjd	error = g_access(cp, 1, 0, 0);
1927156610Spjd	KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error));
1928156610Spjd	g_topology_unlock();
1929156610Spjd	sx_xlock(&sc->sc_lock);
1930156610Spjd
1931132904Spjd	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
1932132904Spjd	    g_mirror_get_diskname(disk));
1933163888Spjd	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) == 0)
1934163888Spjd		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1935132904Spjd	KASSERT(disk->d_sync.ds_consumer == NULL,
1936132904Spjd	    ("Sync consumer already exists (device=%s, disk=%s).",
1937132904Spjd	    sc->sc_name, g_mirror_get_diskname(disk)));
1938156610Spjd
1939156610Spjd	disk->d_sync.ds_consumer = cp;
1940132904Spjd	disk->d_sync.ds_consumer->private = disk;
1941137248Spjd	disk->d_sync.ds_consumer->index = 0;
1942156610Spjd
1943156610Spjd	/*
1944156610Spjd	 * Allocate memory for synchronization bios and initialize them.
1945156610Spjd	 */
1946156610Spjd	disk->d_sync.ds_bios = malloc(sizeof(struct bio *) * g_mirror_syncreqs,
1947156610Spjd	    M_MIRROR, M_WAITOK);
1948156610Spjd	for (i = 0; i < g_mirror_syncreqs; i++) {
1949156610Spjd		bp = g_alloc_bio();
1950156610Spjd		disk->d_sync.ds_bios[i] = bp;
1951156610Spjd		bp->bio_parent = NULL;
1952156610Spjd		bp->bio_cmd = BIO_READ;
1953156610Spjd		bp->bio_data = malloc(MAXPHYS, M_MIRROR, M_WAITOK);
1954156610Spjd		bp->bio_cflags = 0;
1955156610Spjd		bp->bio_offset = disk->d_sync.ds_offset;
1956156610Spjd		bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
1957156610Spjd		disk->d_sync.ds_offset += bp->bio_length;
1958156610Spjd		bp->bio_done = g_mirror_sync_done;
1959156610Spjd		bp->bio_from = disk->d_sync.ds_consumer;
1960156610Spjd		bp->bio_to = sc->sc_provider;
1961156684Sru		bp->bio_caller1 = (void *)(uintptr_t)i;
1962156610Spjd	}
1963156610Spjd
1964156610Spjd	/* Increase the number of disks in SYNCHRONIZING state. */
1965132904Spjd	sc->sc_sync.ds_ndisks++;
1966156610Spjd	/* Set the number of in-flight synchronization requests. */
1967156610Spjd	disk->d_sync.ds_inflight = g_mirror_syncreqs;
1968156610Spjd
1969156610Spjd	/*
1970156610Spjd	 * Fire off first synchronization requests.
1971156610Spjd	 */
1972156610Spjd	for (i = 0; i < g_mirror_syncreqs; i++) {
1973156610Spjd		bp = disk->d_sync.ds_bios[i];
1974156610Spjd		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
1975156610Spjd		disk->d_sync.ds_consumer->index++;
1976156610Spjd		/*
1977156610Spjd		 * Delay the request if it is colliding with a regular request.
1978156610Spjd		 */
1979156610Spjd		if (g_mirror_regular_collision(sc, bp))
1980156610Spjd			g_mirror_sync_delay(sc, bp);
1981156610Spjd		else
1982156610Spjd			g_io_request(bp, disk->d_sync.ds_consumer);
1983156610Spjd	}
1984132904Spjd}
1985132904Spjd
1986132904Spjd/*
1987132904Spjd * Stop synchronization process.
1988132904Spjd * type: 0 - synchronization finished
1989132904Spjd *       1 - synchronization stopped
1990132904Spjd */
1991132904Spjdstatic void
1992132904Spjdg_mirror_sync_stop(struct g_mirror_disk *disk, int type)
1993132904Spjd{
1994156610Spjd	struct g_mirror_softc *sc;
1995156610Spjd	struct g_consumer *cp;
1996132904Spjd
1997156610Spjd	g_topology_assert_not();
1998156610Spjd	sc = disk->d_softc;
1999156610Spjd	sx_assert(&sc->sc_lock, SX_LOCKED);
2000156610Spjd
2001132904Spjd	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2002132904Spjd	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2003132904Spjd	    g_mirror_disk_state2str(disk->d_state)));
2004132904Spjd	if (disk->d_sync.ds_consumer == NULL)
2005132904Spjd		return;
2006132904Spjd
2007132904Spjd	if (type == 0) {
2008132904Spjd		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
2009156610Spjd		    sc->sc_name, g_mirror_get_diskname(disk));
2010132904Spjd	} else /* if (type == 1) */ {
2011132904Spjd		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
2012156610Spjd		    sc->sc_name, g_mirror_get_diskname(disk));
2013132904Spjd	}
2014156610Spjd	free(disk->d_sync.ds_bios, M_MIRROR);
2015156610Spjd	disk->d_sync.ds_bios = NULL;
2016156610Spjd	cp = disk->d_sync.ds_consumer;
2017132904Spjd	disk->d_sync.ds_consumer = NULL;
2018132904Spjd	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2019156610Spjd	sc->sc_sync.ds_ndisks--;
2020156610Spjd	sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
2021156610Spjd	g_topology_lock();
2022156610Spjd	g_mirror_kill_consumer(sc, cp);
2023156610Spjd	g_topology_unlock();
2024156610Spjd	sx_xlock(&sc->sc_lock);
2025132904Spjd}
2026132904Spjd
2027132904Spjdstatic void
2028132904Spjdg_mirror_launch_provider(struct g_mirror_softc *sc)
2029132904Spjd{
2030132904Spjd	struct g_mirror_disk *disk;
2031252010Sscottl	struct g_provider *pp, *dp;
2032132904Spjd
2033156610Spjd	sx_assert(&sc->sc_lock, SX_LOCKED);
2034132904Spjd
2035156610Spjd	g_topology_lock();
2036132904Spjd	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
2037132904Spjd	pp->mediasize = sc->sc_mediasize;
2038132904Spjd	pp->sectorsize = sc->sc_sectorsize;
2039200935Smav	pp->stripesize = 0;
2040200935Smav	pp->stripeoffset = 0;
2041252010Sscottl
2042252010Sscottl	/* Splitting of unmapped BIO's could work but isn't implemented now */
2043252011Sscottl	if (sc->sc_balance != G_MIRROR_BALANCE_SPLIT)
2044252010Sscottl		pp->flags |= G_PF_ACCEPT_UNMAPPED;
2045252010Sscottl
2046200935Smav	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2047252010Sscottl		if (disk->d_consumer && disk->d_consumer->provider) {
2048252010Sscottl			dp = disk->d_consumer->provider;
2049252010Sscottl			if (dp->stripesize > pp->stripesize) {
2050252010Sscottl				pp->stripesize = dp->stripesize;
2051252010Sscottl				pp->stripeoffset = dp->stripeoffset;
2052252010Sscottl			}
2053252010Sscottl			/* A provider underneath us doesn't support unmapped */
2054252010Sscottl			if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
2055254252Sed				G_MIRROR_DEBUG(0, "Cancelling unmapped "
2056254252Sed				    "because of %s.", dp->name);
2057252010Sscottl				pp->flags &= ~G_PF_ACCEPT_UNMAPPED;
2058252010Sscottl			}
2059200935Smav		}
2060200935Smav	}
2061132904Spjd	sc->sc_provider = pp;
2062132904Spjd	g_error_provider(pp, 0);
2063156610Spjd	g_topology_unlock();
2064162188Sjmg	G_MIRROR_DEBUG(0, "Device %s launched (%u/%u).", pp->name,
2065162188Sjmg	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE), sc->sc_ndisks);
2066132904Spjd	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2067132904Spjd		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2068132904Spjd			g_mirror_sync_start(disk);
2069132904Spjd	}
2070132904Spjd}
2071132904Spjd
2072132904Spjdstatic void
2073132904Spjdg_mirror_destroy_provider(struct g_mirror_softc *sc)
2074132904Spjd{
2075132904Spjd	struct g_mirror_disk *disk;
2076132904Spjd	struct bio *bp;
2077132904Spjd
2078156610Spjd	g_topology_assert_not();
2079132904Spjd	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
2080132904Spjd	    sc->sc_name));
2081132904Spjd
2082156610Spjd	g_topology_lock();
2083132904Spjd	g_error_provider(sc->sc_provider, ENXIO);
2084132904Spjd	mtx_lock(&sc->sc_queue_mtx);
2085132904Spjd	while ((bp = bioq_first(&sc->sc_queue)) != NULL) {
2086132904Spjd		bioq_remove(&sc->sc_queue, bp);
2087132904Spjd		g_io_deliver(bp, ENXIO);
2088132904Spjd	}
2089132904Spjd	mtx_unlock(&sc->sc_queue_mtx);
2090132904Spjd	G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name,
2091132904Spjd	    sc->sc_provider->name);
2092132904Spjd	sc->sc_provider->flags |= G_PF_WITHER;
2093132904Spjd	g_orphan_provider(sc->sc_provider, ENXIO);
2094156610Spjd	g_topology_unlock();
2095132904Spjd	sc->sc_provider = NULL;
2096132904Spjd	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2097132904Spjd		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2098132904Spjd			g_mirror_sync_stop(disk, 1);
2099132904Spjd	}
2100132904Spjd}
2101132904Spjd
2102132904Spjdstatic void
2103132904Spjdg_mirror_go(void *arg)
2104132904Spjd{
2105132904Spjd	struct g_mirror_softc *sc;
2106132904Spjd
2107132904Spjd	sc = arg;
2108132904Spjd	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
2109132904Spjd	g_mirror_event_send(sc, 0,
2110132904Spjd	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
2111132904Spjd}
2112132904Spjd
2113132904Spjdstatic u_int
2114132904Spjdg_mirror_determine_state(struct g_mirror_disk *disk)
2115132904Spjd{
2116132904Spjd	struct g_mirror_softc *sc;
2117132904Spjd	u_int state;
2118132904Spjd
2119132904Spjd	sc = disk->d_softc;
2120132904Spjd	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
2121132904Spjd		if ((disk->d_flags &
2122132904Spjd		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
2123132904Spjd			/* Disk does not need synchronization. */
2124132904Spjd			state = G_MIRROR_DISK_STATE_ACTIVE;
2125132904Spjd		} else {
2126132904Spjd			if ((sc->sc_flags &
2127156873Spjd			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2128132904Spjd			    (disk->d_flags &
2129132904Spjd			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2130132904Spjd				/*
2131132904Spjd				 * We can start synchronization from
2132132904Spjd				 * the stored offset.
2133132904Spjd				 */
2134132904Spjd				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2135132904Spjd			} else {
2136132904Spjd				state = G_MIRROR_DISK_STATE_STALE;
2137132904Spjd			}
2138132904Spjd		}
2139132904Spjd	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
2140132904Spjd		/*
2141132904Spjd		 * Reset all synchronization data for this disk,
2142132904Spjd		 * because if it even was synchronized, it was
2143132904Spjd		 * synchronized to disks with different syncid.
2144132904Spjd		 */
2145132904Spjd		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2146132904Spjd		disk->d_sync.ds_offset = 0;
2147132904Spjd		disk->d_sync.ds_offset_done = 0;
2148132904Spjd		disk->d_sync.ds_syncid = sc->sc_syncid;
2149132904Spjd		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2150132904Spjd		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2151132904Spjd			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2152132904Spjd		} else {
2153132904Spjd			state = G_MIRROR_DISK_STATE_STALE;
2154132904Spjd		}
2155132904Spjd	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
2156132904Spjd		/*
2157132904Spjd		 * Not good, NOT GOOD!
2158132904Spjd		 * It means that mirror was started on stale disks
2159132904Spjd		 * and more fresh disk just arrive.
2160160895Spjd		 * If there were writes, mirror is broken, sorry.
2161132904Spjd		 * I think the best choice here is don't touch
2162160964Syar		 * this disk and inform the user loudly.
2163132904Spjd		 */
2164132904Spjd		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
2165132904Spjd		    "disk (%s) arrives!! It will not be connected to the "
2166132904Spjd		    "running device.", sc->sc_name,
2167132904Spjd		    g_mirror_get_diskname(disk));
2168132904Spjd		g_mirror_destroy_disk(disk);
2169132904Spjd		state = G_MIRROR_DISK_STATE_NONE;
2170132904Spjd		/* Return immediately, because disk was destroyed. */
2171132904Spjd		return (state);
2172132904Spjd	}
2173132904Spjd	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
2174132904Spjd	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
2175132904Spjd	return (state);
2176132904Spjd}
2177132904Spjd
2178132904Spjd/*
2179132904Spjd * Update device state.
2180132904Spjd */
2181132904Spjdstatic void
2182139051Spjdg_mirror_update_device(struct g_mirror_softc *sc, boolean_t force)
2183132904Spjd{
2184132904Spjd	struct g_mirror_disk *disk;
2185132904Spjd	u_int state;
2186132904Spjd
2187156610Spjd	sx_assert(&sc->sc_lock, SX_XLOCKED);
2188132904Spjd
2189132904Spjd	switch (sc->sc_state) {
2190132904Spjd	case G_MIRROR_DEVICE_STATE_STARTING:
2191132904Spjd	    {
2192139213Spjd		struct g_mirror_disk *pdisk, *tdisk;
2193139213Spjd		u_int dirty, ndisks, genid, syncid;
2194132904Spjd
2195132904Spjd		KASSERT(sc->sc_provider == NULL,
2196132904Spjd		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
2197132904Spjd		/*
2198132904Spjd		 * Are we ready? We are, if all disks are connected or
2199132904Spjd		 * if we have any disks and 'force' is true.
2200132904Spjd		 */
2201156610Spjd		ndisks = g_mirror_ndisks(sc, -1);
2202157290Spjd		if (sc->sc_ndisks == ndisks || (force && ndisks > 0)) {
2203132904Spjd			;
2204156610Spjd		} else if (ndisks == 0) {
2205132904Spjd			/*
2206132904Spjd			 * Disks went down in starting phase, so destroy
2207132904Spjd			 * device.
2208132904Spjd			 */
2209146616Spjd			callout_drain(&sc->sc_callout);
2210146616Spjd			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2211146616Spjd			G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
2212146616Spjd			    sc->sc_rootmount);
2213145305Spjd			root_mount_rel(sc->sc_rootmount);
2214145305Spjd			sc->sc_rootmount = NULL;
2215132904Spjd			return;
2216132904Spjd		} else {
2217132904Spjd			return;
2218132904Spjd		}
2219132904Spjd
2220132904Spjd		/*
2221132904Spjd		 * Activate all disks with the biggest syncid.
2222132904Spjd		 */
2223132904Spjd		if (force) {
2224132904Spjd			/*
2225133079Spjd			 * If 'force' is true, we have been called due to
2226133079Spjd			 * timeout, so don't bother canceling timeout.
2227132904Spjd			 */
2228132904Spjd			ndisks = 0;
2229132904Spjd			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2230132904Spjd				if ((disk->d_flags &
2231132904Spjd				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
2232132904Spjd					ndisks++;
2233132904Spjd				}
2234132904Spjd			}
2235132904Spjd			if (ndisks == 0) {
2236132941Spjd				/* No valid disks found, destroy device. */
2237132941Spjd				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2238146616Spjd				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2239146616Spjd				    __LINE__, sc->sc_rootmount);
2240146616Spjd				root_mount_rel(sc->sc_rootmount);
2241146616Spjd				sc->sc_rootmount = NULL;
2242132904Spjd				return;
2243132904Spjd			}
2244132904Spjd		} else {
2245132904Spjd			/* Cancel timeout. */
2246132904Spjd			callout_drain(&sc->sc_callout);
2247132904Spjd		}
2248132904Spjd
2249132904Spjd		/*
2250139213Spjd		 * Find the biggest genid.
2251132904Spjd		 */
2252139213Spjd		genid = 0;
2253139213Spjd		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2254139213Spjd			if (disk->d_genid > genid)
2255139213Spjd				genid = disk->d_genid;
2256139213Spjd		}
2257139213Spjd		sc->sc_genid = genid;
2258139213Spjd		/*
2259139213Spjd		 * Remove all disks without the biggest genid.
2260139213Spjd		 */
2261139213Spjd		LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
2262139213Spjd			if (disk->d_genid < genid) {
2263139213Spjd				G_MIRROR_DEBUG(0,
2264139213Spjd				    "Component %s (device %s) broken, skipping.",
2265139213Spjd				    g_mirror_get_diskname(disk), sc->sc_name);
2266139213Spjd				g_mirror_destroy_disk(disk);
2267139213Spjd			}
2268139213Spjd		}
2269139213Spjd
2270139213Spjd		/*
2271139213Spjd		 * Find the biggest syncid.
2272139213Spjd		 */
2273132904Spjd		syncid = 0;
2274132904Spjd		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2275132904Spjd			if (disk->d_sync.ds_syncid > syncid)
2276132904Spjd				syncid = disk->d_sync.ds_syncid;
2277132904Spjd		}
2278132904Spjd
2279132904Spjd		/*
2280132904Spjd		 * Here we need to look for dirty disks and if all disks
2281132904Spjd		 * with the biggest syncid are dirty, we have to choose
2282132904Spjd		 * one with the biggest priority and rebuild the rest.
2283132904Spjd		 */
2284132904Spjd		/*
2285132904Spjd		 * Find the number of dirty disks with the biggest syncid.
2286132904Spjd		 * Find the number of disks with the biggest syncid.
2287132904Spjd		 * While here, find a disk with the biggest priority.
2288132904Spjd		 */
2289132904Spjd		dirty = ndisks = 0;
2290132904Spjd		pdisk = NULL;
2291132904Spjd		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2292132904Spjd			if (disk->d_sync.ds_syncid != syncid)
2293132904Spjd				continue;
2294132904Spjd			if ((disk->d_flags &
2295132904Spjd			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2296132904Spjd				continue;
2297132904Spjd			}
2298132904Spjd			ndisks++;
2299132904Spjd			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
2300132904Spjd				dirty++;
2301132904Spjd				if (pdisk == NULL ||
2302132904Spjd				    pdisk->d_priority < disk->d_priority) {
2303132904Spjd					pdisk = disk;
2304132904Spjd				}
2305132904Spjd			}
2306132904Spjd		}
2307132904Spjd		if (dirty == 0) {
2308132904Spjd			/* No dirty disks at all, great. */
2309132904Spjd		} else if (dirty == ndisks) {
2310132904Spjd			/*
2311132904Spjd			 * Force synchronization for all dirty disks except one
2312132904Spjd			 * with the biggest priority.
2313132904Spjd			 */
2314132904Spjd			KASSERT(pdisk != NULL, ("pdisk == NULL"));
2315132904Spjd			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
2316132904Spjd			    "master disk for synchronization.",
2317132904Spjd			    g_mirror_get_diskname(pdisk), sc->sc_name);
2318132904Spjd			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2319132904Spjd				if (disk->d_sync.ds_syncid != syncid)
2320132904Spjd					continue;
2321132904Spjd				if ((disk->d_flags &
2322132904Spjd				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2323132904Spjd					continue;
2324132904Spjd				}
2325132904Spjd				KASSERT((disk->d_flags &
2326132904Spjd				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
2327132904Spjd				    ("Disk %s isn't marked as dirty.",
2328132904Spjd				    g_mirror_get_diskname(disk)));
2329132904Spjd				/* Skip the disk with the biggest priority. */
2330132904Spjd				if (disk == pdisk)
2331132904Spjd					continue;
2332132904Spjd				disk->d_sync.ds_syncid = 0;
2333132904Spjd			}
2334132904Spjd		} else if (dirty < ndisks) {
2335132904Spjd			/*
2336132904Spjd			 * Force synchronization for all dirty disks.
2337132904Spjd			 * We have some non-dirty disks.
2338132904Spjd			 */
2339132904Spjd			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2340132904Spjd				if (disk->d_sync.ds_syncid != syncid)
2341132904Spjd					continue;
2342132904Spjd				if ((disk->d_flags &
2343132904Spjd				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2344132904Spjd					continue;
2345132904Spjd				}
2346132904Spjd				if ((disk->d_flags &
2347132904Spjd				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
2348132904Spjd					continue;
2349132904Spjd				}
2350132904Spjd				disk->d_sync.ds_syncid = 0;
2351132904Spjd			}
2352132904Spjd		}
2353132904Spjd
2354132904Spjd		/* Reset hint. */
2355132904Spjd		sc->sc_hint = NULL;
2356132904Spjd		sc->sc_syncid = syncid;
2357132904Spjd		if (force) {
2358132904Spjd			/* Remember to bump syncid on first write. */
2359139670Spjd			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2360132904Spjd		}
2361132904Spjd		state = G_MIRROR_DEVICE_STATE_RUNNING;
2362132904Spjd		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
2363132904Spjd		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
2364132904Spjd		    g_mirror_device_state2str(state));
2365132904Spjd		sc->sc_state = state;
2366132904Spjd		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2367132904Spjd			state = g_mirror_determine_state(disk);
2368132904Spjd			g_mirror_event_send(disk, state,
2369132904Spjd			    G_MIRROR_EVENT_DONTWAIT);
2370139213Spjd			if (state == G_MIRROR_DISK_STATE_STALE)
2371139670Spjd				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2372132904Spjd		}
2373132904Spjd		break;
2374132904Spjd	    }
2375132904Spjd	case G_MIRROR_DEVICE_STATE_RUNNING:
2376137248Spjd		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
2377132904Spjd		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2378132904Spjd			/*
2379137248Spjd			 * No active disks or no disks at all,
2380137248Spjd			 * so destroy device.
2381132904Spjd			 */
2382132904Spjd			if (sc->sc_provider != NULL)
2383132904Spjd				g_mirror_destroy_provider(sc);
2384137248Spjd			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2385133946Spjd			break;
2386132904Spjd		} else if (g_mirror_ndisks(sc,
2387132954Spjd		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
2388132954Spjd		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2389132904Spjd			/*
2390132904Spjd			 * We have active disks, launch provider if it doesn't
2391132904Spjd			 * exist.
2392132904Spjd			 */
2393132904Spjd			if (sc->sc_provider == NULL)
2394132904Spjd				g_mirror_launch_provider(sc);
2395146624Spjd			if (sc->sc_rootmount != NULL) {
2396146624Spjd				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2397146624Spjd				    __LINE__, sc->sc_rootmount);
2398146624Spjd				root_mount_rel(sc->sc_rootmount);
2399146624Spjd				sc->sc_rootmount = NULL;
2400146624Spjd			}
2401132904Spjd		}
2402133946Spjd		/*
2403139670Spjd		 * Genid should be bumped immediately, so do it here.
2404133946Spjd		 */
2405139670Spjd		if ((sc->sc_bump_id & G_MIRROR_BUMP_GENID) != 0) {
2406139213Spjd			sc->sc_bump_id &= ~G_MIRROR_BUMP_GENID;
2407139213Spjd			g_mirror_bump_genid(sc);
2408139213Spjd		}
2409132904Spjd		break;
2410132904Spjd	default:
2411132904Spjd		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
2412132904Spjd		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
2413132904Spjd		break;
2414132904Spjd	}
2415132904Spjd}
2416132904Spjd
2417132904Spjd/*
2418132904Spjd * Update disk state and device state if needed.
2419132904Spjd */
2420132904Spjd#define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
2421132904Spjd	"Disk %s state changed from %s to %s (device %s).",		\
2422132904Spjd	g_mirror_get_diskname(disk),					\
2423132904Spjd	g_mirror_disk_state2str(disk->d_state),				\
2424132904Spjd	g_mirror_disk_state2str(state), sc->sc_name)
2425132904Spjdstatic int
2426139051Spjdg_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
2427132904Spjd{
2428132904Spjd	struct g_mirror_softc *sc;
2429132904Spjd
2430156610Spjd	sc = disk->d_softc;
2431156610Spjd	sx_assert(&sc->sc_lock, SX_XLOCKED);
2432132904Spjd
2433132904Spjdagain:
2434132904Spjd	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
2435132904Spjd	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
2436132904Spjd	    g_mirror_disk_state2str(state));
2437132904Spjd	switch (state) {
2438132904Spjd	case G_MIRROR_DISK_STATE_NEW:
2439132904Spjd		/*
2440132904Spjd		 * Possible scenarios:
2441132904Spjd		 * 1. New disk arrive.
2442132904Spjd		 */
2443132904Spjd		/* Previous state should be NONE. */
2444132904Spjd		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
2445132904Spjd		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2446132904Spjd		    g_mirror_disk_state2str(disk->d_state)));
2447132904Spjd		DISK_STATE_CHANGED();
2448132904Spjd
2449132904Spjd		disk->d_state = state;
2450133115Spjd		if (LIST_EMPTY(&sc->sc_disks))
2451133115Spjd			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
2452133115Spjd		else {
2453133115Spjd			struct g_mirror_disk *dp;
2454133115Spjd
2455133115Spjd			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
2456133115Spjd				if (disk->d_priority >= dp->d_priority) {
2457133115Spjd					LIST_INSERT_BEFORE(dp, disk, d_next);
2458133115Spjd					dp = NULL;
2459133115Spjd					break;
2460133115Spjd				}
2461133115Spjd				if (LIST_NEXT(dp, d_next) == NULL)
2462133115Spjd					break;
2463133115Spjd			}
2464133115Spjd			if (dp != NULL)
2465133115Spjd				LIST_INSERT_AFTER(dp, disk, d_next);
2466133115Spjd		}
2467162188Sjmg		G_MIRROR_DEBUG(1, "Device %s: provider %s detected.",
2468132904Spjd		    sc->sc_name, g_mirror_get_diskname(disk));
2469132904Spjd		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2470132904Spjd			break;
2471132904Spjd		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2472132904Spjd		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2473132904Spjd		    g_mirror_device_state2str(sc->sc_state),
2474132904Spjd		    g_mirror_get_diskname(disk),
2475132904Spjd		    g_mirror_disk_state2str(disk->d_state)));
2476132904Spjd		state = g_mirror_determine_state(disk);
2477132904Spjd		if (state != G_MIRROR_DISK_STATE_NONE)
2478132904Spjd			goto again;
2479132904Spjd		break;
2480132904Spjd	case G_MIRROR_DISK_STATE_ACTIVE:
2481132904Spjd		/*
2482132904Spjd		 * Possible scenarios:
2483132904Spjd		 * 1. New disk does not need synchronization.
2484132904Spjd		 * 2. Synchronization process finished successfully.
2485132904Spjd		 */
2486132904Spjd		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2487132904Spjd		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2488132904Spjd		    g_mirror_device_state2str(sc->sc_state),
2489132904Spjd		    g_mirror_get_diskname(disk),
2490132904Spjd		    g_mirror_disk_state2str(disk->d_state)));
2491132904Spjd		/* Previous state should be NEW or SYNCHRONIZING. */
2492132904Spjd		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
2493132904Spjd		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2494132904Spjd		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2495132904Spjd		    g_mirror_disk_state2str(disk->d_state)));
2496132904Spjd		DISK_STATE_CHANGED();
2497132904Spjd
2498155582Spjd		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2499132904Spjd			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2500132904Spjd			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
2501132904Spjd			g_mirror_sync_stop(disk, 0);
2502132904Spjd		}
2503132904Spjd		disk->d_state = state;
2504132904Spjd		disk->d_sync.ds_offset = 0;
2505132904Spjd		disk->d_sync.ds_offset_done = 0;
2506155539Spjd		g_mirror_update_idle(sc, disk);
2507155582Spjd		g_mirror_update_metadata(disk);
2508162188Sjmg		G_MIRROR_DEBUG(1, "Device %s: provider %s activated.",
2509132904Spjd		    sc->sc_name, g_mirror_get_diskname(disk));
2510132904Spjd		break;
2511132904Spjd	case G_MIRROR_DISK_STATE_STALE:
2512132904Spjd		/*
2513132904Spjd		 * Possible scenarios:
2514132904Spjd		 * 1. Stale disk was connected.
2515132904Spjd		 */
2516132904Spjd		/* Previous state should be NEW. */
2517132904Spjd		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2518132904Spjd		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2519132904Spjd		    g_mirror_disk_state2str(disk->d_state)));
2520132904Spjd		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2521132904Spjd		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2522132904Spjd		    g_mirror_device_state2str(sc->sc_state),
2523132904Spjd		    g_mirror_get_diskname(disk),
2524132904Spjd		    g_mirror_disk_state2str(disk->d_state)));
2525132904Spjd		/*
2526132904Spjd		 * STALE state is only possible if device is marked
2527132904Spjd		 * NOAUTOSYNC.
2528132904Spjd		 */
2529132904Spjd		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
2530132904Spjd		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2531132904Spjd		    g_mirror_device_state2str(sc->sc_state),
2532132904Spjd		    g_mirror_get_diskname(disk),
2533132904Spjd		    g_mirror_disk_state2str(disk->d_state)));
2534132904Spjd		DISK_STATE_CHANGED();
2535132904Spjd
2536132904Spjd		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2537132904Spjd		disk->d_state = state;
2538132904Spjd		g_mirror_update_metadata(disk);
2539132904Spjd		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
2540132904Spjd		    sc->sc_name, g_mirror_get_diskname(disk));
2541132904Spjd		break;
2542132904Spjd	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
2543132904Spjd		/*
2544132904Spjd		 * Possible scenarios:
2545132904Spjd		 * 1. Disk which needs synchronization was connected.
2546132904Spjd		 */
2547132904Spjd		/* Previous state should be NEW. */
2548132904Spjd		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2549132904Spjd		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2550132904Spjd		    g_mirror_disk_state2str(disk->d_state)));
2551132904Spjd		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2552132904Spjd		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2553132904Spjd		    g_mirror_device_state2str(sc->sc_state),
2554132904Spjd		    g_mirror_get_diskname(disk),
2555132904Spjd		    g_mirror_disk_state2str(disk->d_state)));
2556132904Spjd		DISK_STATE_CHANGED();
2557132904Spjd
2558132904Spjd		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2559132904Spjd			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2560132904Spjd		disk->d_state = state;
2561132904Spjd		if (sc->sc_provider != NULL) {
2562132904Spjd			g_mirror_sync_start(disk);
2563132904Spjd			g_mirror_update_metadata(disk);
2564132904Spjd		}
2565132904Spjd		break;
2566132904Spjd	case G_MIRROR_DISK_STATE_DISCONNECTED:
2567132904Spjd		/*
2568132904Spjd		 * Possible scenarios:
2569132904Spjd		 * 1. Device wasn't running yet, but disk disappear.
2570132904Spjd		 * 2. Disk was active and disapppear.
2571132904Spjd		 * 3. Disk disappear during synchronization process.
2572132904Spjd		 */
2573132904Spjd		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
2574132904Spjd			/*
2575132904Spjd			 * Previous state should be ACTIVE, STALE or
2576132904Spjd			 * SYNCHRONIZING.
2577132904Spjd			 */
2578132904Spjd			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
2579132904Spjd			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
2580132904Spjd			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2581132904Spjd			    ("Wrong disk state (%s, %s).",
2582132904Spjd			    g_mirror_get_diskname(disk),
2583132904Spjd			    g_mirror_disk_state2str(disk->d_state)));
2584132904Spjd		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
2585132904Spjd			/* Previous state should be NEW. */
2586132904Spjd			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2587132904Spjd			    ("Wrong disk state (%s, %s).",
2588132904Spjd			    g_mirror_get_diskname(disk),
2589132904Spjd			    g_mirror_disk_state2str(disk->d_state)));
2590132904Spjd			/*
2591132904Spjd			 * Reset bumping syncid if disk disappeared in STARTING
2592132904Spjd			 * state.
2593132904Spjd			 */
2594139670Spjd			if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0)
2595139213Spjd				sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
2596132904Spjd#ifdef	INVARIANTS
2597132904Spjd		} else {
2598132904Spjd			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2599132904Spjd			    sc->sc_name,
2600132904Spjd			    g_mirror_device_state2str(sc->sc_state),
2601132904Spjd			    g_mirror_get_diskname(disk),
2602132904Spjd			    g_mirror_disk_state2str(disk->d_state)));
2603132904Spjd#endif
2604132904Spjd		}
2605132904Spjd		DISK_STATE_CHANGED();
2606132904Spjd		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
2607132904Spjd		    sc->sc_name, g_mirror_get_diskname(disk));
2608132904Spjd
2609132904Spjd		g_mirror_destroy_disk(disk);
2610132904Spjd		break;
2611132904Spjd	case G_MIRROR_DISK_STATE_DESTROY:
2612132904Spjd	    {
2613132904Spjd		int error;
2614132904Spjd
2615132904Spjd		error = g_mirror_clear_metadata(disk);
2616132904Spjd		if (error != 0)
2617132904Spjd			return (error);
2618132904Spjd		DISK_STATE_CHANGED();
2619132904Spjd		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
2620132904Spjd		    sc->sc_name, g_mirror_get_diskname(disk));
2621132904Spjd
2622132904Spjd		g_mirror_destroy_disk(disk);
2623132904Spjd		sc->sc_ndisks--;
2624132904Spjd		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2625132904Spjd			g_mirror_update_metadata(disk);
2626132904Spjd		}
2627132904Spjd		break;
2628132904Spjd	    }
2629132904Spjd	default:
2630132904Spjd		KASSERT(1 == 0, ("Unknown state (%u).", state));
2631132904Spjd		break;
2632132904Spjd	}
2633132904Spjd	return (0);
2634132904Spjd}
2635132904Spjd#undef	DISK_STATE_CHANGED
2636132904Spjd
2637139650Spjdint
2638132904Spjdg_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
2639132904Spjd{
2640132904Spjd	struct g_provider *pp;
2641132904Spjd	u_char *buf;
2642132904Spjd	int error;
2643132904Spjd
2644132904Spjd	g_topology_assert();
2645132904Spjd
2646132904Spjd	error = g_access(cp, 1, 0, 0);
2647132904Spjd	if (error != 0)
2648132904Spjd		return (error);
2649132904Spjd	pp = cp->provider;
2650132904Spjd	g_topology_unlock();
2651132904Spjd	/* Metadata are stored on last sector. */
2652132904Spjd	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2653132904Spjd	    &error);
2654132904Spjd	g_topology_lock();
2655139051Spjd	g_access(cp, -1, 0, 0);
2656152967Ssobomax	if (buf == NULL) {
2657139213Spjd		G_MIRROR_DEBUG(1, "Cannot read metadata from %s (error=%d).",
2658139213Spjd		    cp->provider->name, error);
2659132904Spjd		return (error);
2660132904Spjd	}
2661132904Spjd
2662132904Spjd	/* Decode metadata. */
2663132904Spjd	error = mirror_metadata_decode(buf, md);
2664132904Spjd	g_free(buf);
2665132904Spjd	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
2666132904Spjd		return (EINVAL);
2667139213Spjd	if (md->md_version > G_MIRROR_VERSION) {
2668139213Spjd		G_MIRROR_DEBUG(0,
2669139213Spjd		    "Kernel module is too old to handle metadata from %s.",
2670139213Spjd		    cp->provider->name);
2671139213Spjd		return (EINVAL);
2672139213Spjd	}
2673132904Spjd	if (error != 0) {
2674132904Spjd		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2675132904Spjd		    cp->provider->name);
2676132904Spjd		return (error);
2677132904Spjd	}
2678132904Spjd
2679132904Spjd	return (0);
2680132904Spjd}
2681132904Spjd
2682132904Spjdstatic int
2683132904Spjdg_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
2684132904Spjd    struct g_mirror_metadata *md)
2685132904Spjd{
2686132904Spjd
2687132904Spjd	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
2688132904Spjd		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
2689132904Spjd		    pp->name, md->md_did);
2690132904Spjd		return (EEXIST);
2691132904Spjd	}
2692132904Spjd	if (md->md_all != sc->sc_ndisks) {
2693132904Spjd		G_MIRROR_DEBUG(1,
2694132904Spjd		    "Invalid '%s' field on disk %s (device %s), skipping.",
2695132904Spjd		    "md_all", pp->name, sc->sc_name);
2696132904Spjd		return (EINVAL);
2697132904Spjd	}
2698132904Spjd	if (md->md_slice != sc->sc_slice) {
2699132904Spjd		G_MIRROR_DEBUG(1,
2700132904Spjd		    "Invalid '%s' field on disk %s (device %s), skipping.",
2701132904Spjd		    "md_slice", pp->name, sc->sc_name);
2702132904Spjd		return (EINVAL);
2703132904Spjd	}
2704132904Spjd	if (md->md_balance != sc->sc_balance) {
2705132904Spjd		G_MIRROR_DEBUG(1,
2706132904Spjd		    "Invalid '%s' field on disk %s (device %s), skipping.",
2707132904Spjd		    "md_balance", pp->name, sc->sc_name);
2708132904Spjd		return (EINVAL);
2709132904Spjd	}
2710132904Spjd	if (md->md_mediasize != sc->sc_mediasize) {
2711132904Spjd		G_MIRROR_DEBUG(1,
2712132904Spjd		    "Invalid '%s' field on disk %s (device %s), skipping.",
2713132904Spjd		    "md_mediasize", pp->name, sc->sc_name);
2714132904Spjd		return (EINVAL);
2715132904Spjd	}
2716132904Spjd	if (sc->sc_mediasize > pp->mediasize) {
2717132904Spjd		G_MIRROR_DEBUG(1,
2718132904Spjd		    "Invalid size of disk %s (device %s), skipping.", pp->name,
2719132904Spjd		    sc->sc_name);
2720132904Spjd		return (EINVAL);
2721132904Spjd	}
2722132904Spjd	if (md->md_sectorsize != sc->sc_sectorsize) {
2723132904Spjd		G_MIRROR_DEBUG(1,
2724132904Spjd		    "Invalid '%s' field on disk %s (device %s), skipping.",
2725132904Spjd		    "md_sectorsize", pp->name, sc->sc_name);
2726132904Spjd		return (EINVAL);
2727132904Spjd	}
2728132904Spjd	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2729132904Spjd		G_MIRROR_DEBUG(1,
2730132904Spjd		    "Invalid sector size of disk %s (device %s), skipping.",
2731132904Spjd		    pp->name, sc->sc_name);
2732132904Spjd		return (EINVAL);
2733132904Spjd	}
2734132904Spjd	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
2735132904Spjd		G_MIRROR_DEBUG(1,
2736132904Spjd		    "Invalid device flags on disk %s (device %s), skipping.",
2737132904Spjd		    pp->name, sc->sc_name);
2738132904Spjd		return (EINVAL);
2739132904Spjd	}
2740132904Spjd	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
2741132904Spjd		G_MIRROR_DEBUG(1,
2742132904Spjd		    "Invalid disk flags on disk %s (device %s), skipping.",
2743132904Spjd		    pp->name, sc->sc_name);
2744132904Spjd		return (EINVAL);
2745132904Spjd	}
2746132904Spjd	return (0);
2747132904Spjd}
2748132904Spjd
2749139650Spjdint
2750132904Spjdg_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
2751132904Spjd    struct g_mirror_metadata *md)
2752132904Spjd{
2753132904Spjd	struct g_mirror_disk *disk;
2754132904Spjd	int error;
2755132904Spjd
2756156610Spjd	g_topology_assert_not();
2757132904Spjd	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
2758132904Spjd
2759132904Spjd	error = g_mirror_check_metadata(sc, pp, md);
2760132904Spjd	if (error != 0)
2761132904Spjd		return (error);
2762139213Spjd	if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING &&
2763139213Spjd	    md->md_genid < sc->sc_genid) {
2764139213Spjd		G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.",
2765139213Spjd		    pp->name, sc->sc_name);
2766139213Spjd		return (EINVAL);
2767139213Spjd	}
2768132904Spjd	disk = g_mirror_init_disk(sc, pp, md, &error);
2769132904Spjd	if (disk == NULL)
2770132904Spjd		return (error);
2771132904Spjd	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
2772132904Spjd	    G_MIRROR_EVENT_WAIT);
2773139213Spjd	if (error != 0)
2774139213Spjd		return (error);
2775139213Spjd	if (md->md_version < G_MIRROR_VERSION) {
2776139213Spjd		G_MIRROR_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).",
2777139213Spjd		    pp->name, md->md_version, G_MIRROR_VERSION);
2778139213Spjd		g_mirror_update_metadata(disk);
2779139213Spjd	}
2780139213Spjd	return (0);
2781132904Spjd}
2782132904Spjd
2783157630Spjdstatic void
2784157630Spjdg_mirror_destroy_delayed(void *arg, int flag)
2785157630Spjd{
2786157630Spjd	struct g_mirror_softc *sc;
2787157630Spjd	int error;
2788157630Spjd
2789157630Spjd	if (flag == EV_CANCEL) {
2790157630Spjd		G_MIRROR_DEBUG(1, "Destroying canceled.");
2791157630Spjd		return;
2792157630Spjd	}
2793157630Spjd	sc = arg;
2794157630Spjd	g_topology_unlock();
2795157630Spjd	sx_xlock(&sc->sc_lock);
2796157630Spjd	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) == 0,
2797157630Spjd	    ("DESTROY flag set on %s.", sc->sc_name));
2798157630Spjd	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROYING) != 0,
2799157630Spjd	    ("DESTROYING flag not set on %s.", sc->sc_name));
2800157630Spjd	G_MIRROR_DEBUG(1, "Destroying %s (delayed).", sc->sc_name);
2801157630Spjd	error = g_mirror_destroy(sc, G_MIRROR_DESTROY_SOFT);
2802157630Spjd	if (error != 0) {
2803157630Spjd		G_MIRROR_DEBUG(0, "Cannot destroy %s.", sc->sc_name);
2804157630Spjd		sx_xunlock(&sc->sc_lock);
2805157630Spjd	}
2806157630Spjd	g_topology_lock();
2807157630Spjd}
2808157630Spjd
2809132904Spjdstatic int
2810132904Spjdg_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
2811132904Spjd{
2812132904Spjd	struct g_mirror_softc *sc;
2813157630Spjd	int dcr, dcw, dce, error = 0;
2814132904Spjd
2815132904Spjd	g_topology_assert();
2816132904Spjd	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
2817132904Spjd	    acw, ace);
2818132904Spjd
2819160081Spjd	sc = pp->geom->softc;
2820160081Spjd	if (sc == NULL && acr <= 0 && acw <= 0 && ace <= 0)
2821160081Spjd		return (0);
2822160081Spjd	KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name));
2823160081Spjd
2824132904Spjd	dcr = pp->acr + acr;
2825132904Spjd	dcw = pp->acw + acw;
2826132904Spjd	dce = pp->ace + ace;
2827132904Spjd
2828157630Spjd	g_topology_unlock();
2829157630Spjd	sx_xlock(&sc->sc_lock);
2830157630Spjd	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0 ||
2831157630Spjd	    LIST_EMPTY(&sc->sc_disks)) {
2832157630Spjd		if (acr > 0 || acw > 0 || ace > 0)
2833157630Spjd			error = ENXIO;
2834157630Spjd		goto end;
2835132904Spjd	}
2836245443Smav	if (dcw == 0)
2837156610Spjd		g_mirror_idle(sc, dcw);
2838157630Spjd	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROYING) != 0) {
2839157630Spjd		if (acr > 0 || acw > 0 || ace > 0) {
2840157630Spjd			error = ENXIO;
2841157630Spjd			goto end;
2842157630Spjd		}
2843157630Spjd		if (dcr == 0 && dcw == 0 && dce == 0) {
2844157630Spjd			g_post_event(g_mirror_destroy_delayed, sc, M_WAITOK,
2845157630Spjd			    sc, NULL);
2846157630Spjd		}
2847156610Spjd	}
2848157630Spjdend:
2849157630Spjd	sx_xunlock(&sc->sc_lock);
2850157630Spjd	g_topology_lock();
2851157630Spjd	return (error);
2852132904Spjd}
2853132904Spjd
2854132904Spjdstatic struct g_geom *
2855132904Spjdg_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md)
2856132904Spjd{
2857132904Spjd	struct g_mirror_softc *sc;
2858132904Spjd	struct g_geom *gp;
2859132904Spjd	int error, timeout;
2860132904Spjd
2861132904Spjd	g_topology_assert();
2862132904Spjd	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
2863132904Spjd	    md->md_mid);
2864132904Spjd
2865132904Spjd	/* One disk is minimum. */
2866132904Spjd	if (md->md_all < 1)
2867132904Spjd		return (NULL);
2868132904Spjd	/*
2869132904Spjd	 * Action geom.
2870132904Spjd	 */
2871132904Spjd	gp = g_new_geomf(mp, "%s", md->md_name);
2872132904Spjd	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
2873132904Spjd	gp->start = g_mirror_start;
2874132904Spjd	gp->orphan = g_mirror_orphan;
2875132904Spjd	gp->access = g_mirror_access;
2876132904Spjd	gp->dumpconf = g_mirror_dumpconf;
2877132904Spjd
2878132904Spjd	sc->sc_id = md->md_mid;
2879132904Spjd	sc->sc_slice = md->md_slice;
2880132904Spjd	sc->sc_balance = md->md_balance;
2881132904Spjd	sc->sc_mediasize = md->md_mediasize;
2882132904Spjd	sc->sc_sectorsize = md->md_sectorsize;
2883132904Spjd	sc->sc_ndisks = md->md_all;
2884132904Spjd	sc->sc_flags = md->md_mflags;
2885139213Spjd	sc->sc_bump_id = 0;
2886155539Spjd	sc->sc_idle = 1;
2887155581Spjd	sc->sc_last_write = time_uptime;
2888155539Spjd	sc->sc_writes = 0;
2889156610Spjd	sx_init(&sc->sc_lock, "gmirror:lock");
2890132904Spjd	bioq_init(&sc->sc_queue);
2891132904Spjd	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
2892156610Spjd	bioq_init(&sc->sc_regular_delayed);
2893156610Spjd	bioq_init(&sc->sc_inflight);
2894156610Spjd	bioq_init(&sc->sc_sync_delayed);
2895132904Spjd	LIST_INIT(&sc->sc_disks);
2896132904Spjd	TAILQ_INIT(&sc->sc_events);
2897132904Spjd	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
2898132904Spjd	callout_init(&sc->sc_callout, CALLOUT_MPSAFE);
2899132904Spjd	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
2900132904Spjd	gp->softc = sc;
2901132904Spjd	sc->sc_geom = gp;
2902132904Spjd	sc->sc_provider = NULL;
2903132904Spjd	/*
2904132904Spjd	 * Synchronization geom.
2905132904Spjd	 */
2906132904Spjd	gp = g_new_geomf(mp, "%s.sync", md->md_name);
2907132904Spjd	gp->softc = sc;
2908132904Spjd	gp->orphan = g_mirror_orphan;
2909132904Spjd	sc->sc_sync.ds_geom = gp;
2910132904Spjd	sc->sc_sync.ds_ndisks = 0;
2911172836Sjulian	error = kproc_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
2912132904Spjd	    "g_mirror %s", md->md_name);
2913132904Spjd	if (error != 0) {
2914132904Spjd		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
2915132904Spjd		    sc->sc_name);
2916132904Spjd		g_destroy_geom(sc->sc_sync.ds_geom);
2917132904Spjd		mtx_destroy(&sc->sc_events_mtx);
2918132904Spjd		mtx_destroy(&sc->sc_queue_mtx);
2919156610Spjd		sx_destroy(&sc->sc_lock);
2920132904Spjd		g_destroy_geom(sc->sc_geom);
2921132904Spjd		free(sc, M_MIRROR);
2922132904Spjd		return (NULL);
2923132904Spjd	}
2924132904Spjd
2925162188Sjmg	G_MIRROR_DEBUG(1, "Device %s created (%u components, id=%u).",
2926162188Sjmg	    sc->sc_name, sc->sc_ndisks, sc->sc_id);
2927132904Spjd
2928190878Sthompsa	sc->sc_rootmount = root_mount_hold("GMIRROR");
2929146538Spjd	G_MIRROR_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
2930132904Spjd	/*
2931132904Spjd	 * Run timeout.
2932132904Spjd	 */
2933137251Spjd	timeout = g_mirror_timeout * hz;
2934137251Spjd	callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc);
2935132904Spjd	return (sc->sc_geom);
2936132904Spjd}
2937132904Spjd
2938132904Spjdint
2939157630Spjdg_mirror_destroy(struct g_mirror_softc *sc, int how)
2940132904Spjd{
2941157630Spjd	struct g_mirror_disk *disk;
2942132904Spjd	struct g_provider *pp;
2943132904Spjd
2944156610Spjd	g_topology_assert_not();
2945132904Spjd	if (sc == NULL)
2946132904Spjd		return (ENXIO);
2947156610Spjd	sx_assert(&sc->sc_lock, SX_XLOCKED);
2948156610Spjd
2949132904Spjd	pp = sc->sc_provider;
2950132904Spjd	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
2951157630Spjd		switch (how) {
2952157630Spjd		case G_MIRROR_DESTROY_SOFT:
2953132904Spjd			G_MIRROR_DEBUG(1,
2954132904Spjd			    "Device %s is still open (r%dw%de%d).", pp->name,
2955132904Spjd			    pp->acr, pp->acw, pp->ace);
2956132904Spjd			return (EBUSY);
2957157630Spjd		case G_MIRROR_DESTROY_DELAYED:
2958157630Spjd			G_MIRROR_DEBUG(1,
2959157630Spjd			    "Device %s will be destroyed on last close.",
2960157630Spjd			    pp->name);
2961157630Spjd			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2962157630Spjd				if (disk->d_state ==
2963157630Spjd				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2964157630Spjd					g_mirror_sync_stop(disk, 1);
2965157630Spjd				}
2966157630Spjd			}
2967157630Spjd			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROYING;
2968157630Spjd			return (EBUSY);
2969157630Spjd		case G_MIRROR_DESTROY_HARD:
2970157630Spjd			G_MIRROR_DEBUG(1, "Device %s is still open, so it "
2971157630Spjd			    "can't be definitely removed.", pp->name);
2972132904Spjd		}
2973132904Spjd	}
2974132904Spjd
2975158112Spjd	g_topology_lock();
2976158112Spjd	if (sc->sc_geom->softc == NULL) {
2977158112Spjd		g_topology_unlock();
2978158112Spjd		return (0);
2979158112Spjd	}
2980158112Spjd	sc->sc_geom->softc = NULL;
2981158112Spjd	sc->sc_sync.ds_geom->softc = NULL;
2982158112Spjd	g_topology_unlock();
2983158112Spjd
2984132904Spjd	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2985132904Spjd	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_WAIT;
2986132904Spjd	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
2987156610Spjd	sx_xunlock(&sc->sc_lock);
2988132904Spjd	mtx_lock(&sc->sc_queue_mtx);
2989132904Spjd	wakeup(sc);
2990132904Spjd	mtx_unlock(&sc->sc_queue_mtx);
2991132904Spjd	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
2992132904Spjd	while (sc->sc_worker != NULL)
2993132904Spjd		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
2994132904Spjd	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
2995156610Spjd	sx_xlock(&sc->sc_lock);
2996132904Spjd	g_mirror_destroy_device(sc);
2997132904Spjd	free(sc, M_MIRROR);
2998132904Spjd	return (0);
2999132904Spjd}
3000132904Spjd
3001132904Spjdstatic void
3002132904Spjdg_mirror_taste_orphan(struct g_consumer *cp)
3003132904Spjd{
3004132904Spjd
3005132904Spjd	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
3006132904Spjd	    cp->provider->name));
3007132904Spjd}
3008132904Spjd
3009132904Spjdstatic struct g_geom *
3010132904Spjdg_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
3011132904Spjd{
3012132904Spjd	struct g_mirror_metadata md;
3013132904Spjd	struct g_mirror_softc *sc;
3014132904Spjd	struct g_consumer *cp;
3015132904Spjd	struct g_geom *gp;
3016132904Spjd	int error;
3017132904Spjd
3018132904Spjd	g_topology_assert();
3019132904Spjd	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
3020132904Spjd	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
3021132904Spjd
3022132904Spjd	gp = g_new_geomf(mp, "mirror:taste");
3023132904Spjd	/*
3024132904Spjd	 * This orphan function should be never called.
3025132904Spjd	 */
3026132904Spjd	gp->orphan = g_mirror_taste_orphan;
3027132904Spjd	cp = g_new_consumer(gp);
3028132904Spjd	g_attach(cp, pp);
3029132904Spjd	error = g_mirror_read_metadata(cp, &md);
3030132904Spjd	g_detach(cp);
3031132904Spjd	g_destroy_consumer(cp);
3032132904Spjd	g_destroy_geom(gp);
3033132904Spjd	if (error != 0)
3034132904Spjd		return (NULL);
3035132904Spjd	gp = NULL;
3036132904Spjd
3037221101Smav	if (md.md_provider[0] != '\0' &&
3038221101Smav	    !g_compare_names(md.md_provider, pp->name))
3039133373Spjd		return (NULL);
3040142727Spjd	if (md.md_provsize != 0 && md.md_provsize != pp->mediasize)
3041142727Spjd		return (NULL);
3042132904Spjd	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
3043132904Spjd		G_MIRROR_DEBUG(0,
3044132904Spjd		    "Device %s: provider %s marked as inactive, skipping.",
3045132904Spjd		    md.md_name, pp->name);
3046132904Spjd		return (NULL);
3047132904Spjd	}
3048132904Spjd	if (g_mirror_debug >= 2)
3049132904Spjd		mirror_metadata_dump(&md);
3050132904Spjd
3051132904Spjd	/*
3052132904Spjd	 * Let's check if device already exists.
3053132904Spjd	 */
3054134486Spjd	sc = NULL;
3055132904Spjd	LIST_FOREACH(gp, &mp->geom, geom) {
3056132904Spjd		sc = gp->softc;
3057132904Spjd		if (sc == NULL)
3058132904Spjd			continue;
3059132904Spjd		if (sc->sc_sync.ds_geom == gp)
3060132904Spjd			continue;
3061132904Spjd		if (strcmp(md.md_name, sc->sc_name) != 0)
3062132904Spjd			continue;
3063132904Spjd		if (md.md_mid != sc->sc_id) {
3064132904Spjd			G_MIRROR_DEBUG(0, "Device %s already configured.",
3065132904Spjd			    sc->sc_name);
3066132904Spjd			return (NULL);
3067132904Spjd		}
3068132904Spjd		break;
3069132904Spjd	}
3070132904Spjd	if (gp == NULL) {
3071132904Spjd		gp = g_mirror_create(mp, &md);
3072132904Spjd		if (gp == NULL) {
3073132976Spjd			G_MIRROR_DEBUG(0, "Cannot create device %s.",
3074132904Spjd			    md.md_name);
3075132904Spjd			return (NULL);
3076132904Spjd		}
3077132904Spjd		sc = gp->softc;
3078132904Spjd	}
3079132904Spjd	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
3080156610Spjd	g_topology_unlock();
3081156610Spjd	sx_xlock(&sc->sc_lock);
3082235599Sae	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_TASTING;
3083132904Spjd	error = g_mirror_add_disk(sc, pp, &md);
3084132904Spjd	if (error != 0) {
3085132904Spjd		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
3086132904Spjd		    pp->name, gp->name, error);
3087156610Spjd		if (LIST_EMPTY(&sc->sc_disks)) {
3088157630Spjd			g_cancel_event(sc);
3089160248Spjd			g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3090156610Spjd			g_topology_lock();
3091156610Spjd			return (NULL);
3092156610Spjd		}
3093156610Spjd		gp = NULL;
3094132904Spjd	}
3095235599Sae	sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_TASTING;
3096235599Sae	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
3097235599Sae		g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3098235599Sae		g_topology_lock();
3099235599Sae		return (NULL);
3100235599Sae	}
3101156610Spjd	sx_xunlock(&sc->sc_lock);
3102156610Spjd	g_topology_lock();
3103132904Spjd	return (gp);
3104132904Spjd}
3105132904Spjd
3106132904Spjdstatic int
3107132904Spjdg_mirror_destroy_geom(struct gctl_req *req __unused,
3108132904Spjd    struct g_class *mp __unused, struct g_geom *gp)
3109132904Spjd{
3110156610Spjd	struct g_mirror_softc *sc;
3111156610Spjd	int error;
3112132904Spjd
3113156610Spjd	g_topology_unlock();
3114156610Spjd	sc = gp->softc;
3115156610Spjd	sx_xlock(&sc->sc_lock);
3116157630Spjd	g_cancel_event(sc);
3117160248Spjd	error = g_mirror_destroy(gp->softc, G_MIRROR_DESTROY_SOFT);
3118156610Spjd	if (error != 0)
3119156610Spjd		sx_xunlock(&sc->sc_lock);
3120156610Spjd	g_topology_lock();
3121156610Spjd	return (error);
3122132904Spjd}
3123132904Spjd
3124132904Spjdstatic void
3125132904Spjdg_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
3126132904Spjd    struct g_consumer *cp, struct g_provider *pp)
3127132904Spjd{
3128132904Spjd	struct g_mirror_softc *sc;
3129132904Spjd
3130132904Spjd	g_topology_assert();
3131132904Spjd
3132132904Spjd	sc = gp->softc;
3133132904Spjd	if (sc == NULL)
3134132904Spjd		return;
3135132904Spjd	/* Skip synchronization geom. */
3136132904Spjd	if (gp == sc->sc_sync.ds_geom)
3137132904Spjd		return;
3138132904Spjd	if (pp != NULL) {
3139132904Spjd		/* Nothing here. */
3140132904Spjd	} else if (cp != NULL) {
3141132904Spjd		struct g_mirror_disk *disk;
3142132904Spjd
3143132904Spjd		disk = cp->private;
3144132904Spjd		if (disk == NULL)
3145132904Spjd			return;
3146156610Spjd		g_topology_unlock();
3147156610Spjd		sx_xlock(&sc->sc_lock);
3148132904Spjd		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
3149132904Spjd		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3150132904Spjd			sbuf_printf(sb, "%s<Synchronized>", indent);
3151156610Spjd			if (disk->d_sync.ds_offset == 0)
3152132904Spjd				sbuf_printf(sb, "0%%");
3153132904Spjd			else {
3154132904Spjd				sbuf_printf(sb, "%u%%",
3155156610Spjd				    (u_int)((disk->d_sync.ds_offset * 100) /
3156132904Spjd				    sc->sc_provider->mediasize));
3157132904Spjd			}
3158132904Spjd			sbuf_printf(sb, "</Synchronized>\n");
3159240371Sglebius			if (disk->d_sync.ds_offset > 0) {
3160240371Sglebius				sbuf_printf(sb, "%s<BytesSynced>%jd"
3161240371Sglebius				    "</BytesSynced>\n", indent,
3162240371Sglebius				    (intmax_t)disk->d_sync.ds_offset);
3163240371Sglebius			}
3164132904Spjd		}
3165132904Spjd		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
3166132904Spjd		    disk->d_sync.ds_syncid);
3167139213Spjd		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent,
3168139213Spjd		    disk->d_genid);
3169132904Spjd		sbuf_printf(sb, "%s<Flags>", indent);
3170132904Spjd		if (disk->d_flags == 0)
3171132904Spjd			sbuf_printf(sb, "NONE");
3172132904Spjd		else {
3173132904Spjd			int first = 1;
3174132904Spjd
3175133448Spjd#define	ADD_FLAG(flag, name)	do {					\
3176133448Spjd	if ((disk->d_flags & (flag)) != 0) {				\
3177133448Spjd		if (!first)						\
3178133448Spjd			sbuf_printf(sb, ", ");				\
3179133448Spjd		else							\
3180133448Spjd			first = 0;					\
3181133448Spjd		sbuf_printf(sb, name);					\
3182133448Spjd	}								\
3183133448Spjd} while (0)
3184133448Spjd			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
3185133448Spjd			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
3186133448Spjd			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
3187133448Spjd			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
3188133448Spjd			    "SYNCHRONIZING");
3189133448Spjd			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
3190155545Spjd			ADD_FLAG(G_MIRROR_DISK_FLAG_BROKEN, "BROKEN");
3191133448Spjd#undef	ADD_FLAG
3192132904Spjd		}
3193132904Spjd		sbuf_printf(sb, "</Flags>\n");
3194132904Spjd		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
3195132907Spjd		    disk->d_priority);
3196132904Spjd		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
3197132904Spjd		    g_mirror_disk_state2str(disk->d_state));
3198156610Spjd		sx_xunlock(&sc->sc_lock);
3199156610Spjd		g_topology_lock();
3200132904Spjd	} else {
3201156610Spjd		g_topology_unlock();
3202156610Spjd		sx_xlock(&sc->sc_lock);
3203132904Spjd		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
3204132904Spjd		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
3205139213Spjd		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid);
3206132904Spjd		sbuf_printf(sb, "%s<Flags>", indent);
3207132904Spjd		if (sc->sc_flags == 0)
3208132904Spjd			sbuf_printf(sb, "NONE");
3209132904Spjd		else {
3210132904Spjd			int first = 1;
3211132904Spjd
3212133448Spjd#define	ADD_FLAG(flag, name)	do {					\
3213133448Spjd	if ((sc->sc_flags & (flag)) != 0) {				\
3214133448Spjd		if (!first)						\
3215133448Spjd			sbuf_printf(sb, ", ");				\
3216133448Spjd		else							\
3217133448Spjd			first = 0;					\
3218133448Spjd		sbuf_printf(sb, name);					\
3219133448Spjd	}								\
3220133448Spjd} while (0)
3221163888Spjd			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC");
3222133448Spjd			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
3223133448Spjd#undef	ADD_FLAG
3224132904Spjd		}
3225132904Spjd		sbuf_printf(sb, "</Flags>\n");
3226132904Spjd		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
3227132904Spjd		    (u_int)sc->sc_slice);
3228132904Spjd		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
3229132904Spjd		    balance_name(sc->sc_balance));
3230132904Spjd		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
3231132904Spjd		    sc->sc_ndisks);
3232134957Spjd		sbuf_printf(sb, "%s<State>", indent);
3233134957Spjd		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
3234134957Spjd			sbuf_printf(sb, "%s", "STARTING");
3235134957Spjd		else if (sc->sc_ndisks ==
3236134957Spjd		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
3237134957Spjd			sbuf_printf(sb, "%s", "COMPLETE");
3238134957Spjd		else
3239134957Spjd			sbuf_printf(sb, "%s", "DEGRADED");
3240134957Spjd		sbuf_printf(sb, "</State>\n");
3241156610Spjd		sx_xunlock(&sc->sc_lock);
3242156610Spjd		g_topology_lock();
3243132904Spjd	}
3244132904Spjd}
3245132904Spjd
3246137254Spjdstatic void
3247245443Smavg_mirror_shutdown_post_sync(void *arg, int howto)
3248137254Spjd{
3249137254Spjd	struct g_class *mp;
3250137254Spjd	struct g_geom *gp, *gp2;
3251156610Spjd	struct g_mirror_softc *sc;
3252157630Spjd	int error;
3253137254Spjd
3254137254Spjd	mp = arg;
3255137421Spjd	DROP_GIANT();
3256137254Spjd	g_topology_lock();
3257245443Smav	g_mirror_shutdown = 1;
3258137254Spjd	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
3259156610Spjd		if ((sc = gp->softc) == NULL)
3260137254Spjd			continue;
3261157630Spjd		/* Skip synchronization geom. */
3262157630Spjd		if (gp == sc->sc_sync.ds_geom)
3263156610Spjd			continue;
3264156610Spjd		g_topology_unlock();
3265156610Spjd		sx_xlock(&sc->sc_lock);
3266245443Smav		g_mirror_idle(sc, -1);
3267157630Spjd		g_cancel_event(sc);
3268157630Spjd		error = g_mirror_destroy(sc, G_MIRROR_DESTROY_DELAYED);
3269157630Spjd		if (error != 0)
3270157630Spjd			sx_xunlock(&sc->sc_lock);
3271156610Spjd		g_topology_lock();
3272156610Spjd	}
3273156610Spjd	g_topology_unlock();
3274156610Spjd	PICKUP_GIANT();
3275137254Spjd}
3276137254Spjd
3277137254Spjdstatic void
3278137254Spjdg_mirror_init(struct g_class *mp)
3279137254Spjd{
3280137254Spjd
3281245443Smav	g_mirror_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync,
3282245443Smav	    g_mirror_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST);
3283245443Smav	if (g_mirror_post_sync == NULL)
3284137254Spjd		G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event.");
3285137254Spjd}
3286137254Spjd
3287137254Spjdstatic void
3288137254Spjdg_mirror_fini(struct g_class *mp)
3289137254Spjd{
3290137254Spjd
3291245443Smav	if (g_mirror_post_sync != NULL)
3292245443Smav		EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_post_sync);
3293137254Spjd}
3294137254Spjd
3295132904SpjdDECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
3296