g_mirror.c revision 240371
1/*-
2 * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/geom/mirror/g_mirror.c 240371 2012-09-11 20:20:13Z glebius $");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/kernel.h>
33#include <sys/module.h>
34#include <sys/limits.h>
35#include <sys/lock.h>
36#include <sys/mutex.h>
37#include <sys/bio.h>
38#include <sys/sbuf.h>
39#include <sys/sysctl.h>
40#include <sys/malloc.h>
41#include <sys/eventhandler.h>
42#include <vm/uma.h>
43#include <geom/geom.h>
44#include <sys/proc.h>
45#include <sys/kthread.h>
46#include <sys/sched.h>
47#include <geom/mirror/g_mirror.h>
48
49FEATURE(geom_mirror, "GEOM mirroring support");
50
51static MALLOC_DEFINE(M_MIRROR, "mirror_data", "GEOM_MIRROR Data");
52
53SYSCTL_DECL(_kern_geom);
54static SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0,
55    "GEOM_MIRROR stuff");
56u_int g_mirror_debug = 0;
57TUNABLE_INT("kern.geom.mirror.debug", &g_mirror_debug);
58SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RW, &g_mirror_debug, 0,
59    "Debug level");
60static u_int g_mirror_timeout = 4;
61TUNABLE_INT("kern.geom.mirror.timeout", &g_mirror_timeout);
62SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RW, &g_mirror_timeout,
63    0, "Time to wait on all mirror components");
64static u_int g_mirror_idletime = 5;
65TUNABLE_INT("kern.geom.mirror.idletime", &g_mirror_idletime);
66SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RW,
67    &g_mirror_idletime, 0, "Mark components as clean when idling");
68static u_int g_mirror_disconnect_on_failure = 1;
69TUNABLE_INT("kern.geom.mirror.disconnect_on_failure",
70    &g_mirror_disconnect_on_failure);
71SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, disconnect_on_failure, CTLFLAG_RW,
72    &g_mirror_disconnect_on_failure, 0, "Disconnect component on I/O failure.");
73static u_int g_mirror_syncreqs = 2;
74TUNABLE_INT("kern.geom.mirror.sync_requests", &g_mirror_syncreqs);
75SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_requests, CTLFLAG_RDTUN,
76    &g_mirror_syncreqs, 0, "Parallel synchronization I/O requests.");
77
78#define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
79	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
80	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
81	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
82} while (0)
83
84static eventhandler_tag g_mirror_pre_sync = NULL;
85
86static int g_mirror_destroy_geom(struct gctl_req *req, struct g_class *mp,
87    struct g_geom *gp);
88static g_taste_t g_mirror_taste;
89static void g_mirror_init(struct g_class *mp);
90static void g_mirror_fini(struct g_class *mp);
91
92struct g_class g_mirror_class = {
93	.name = G_MIRROR_CLASS_NAME,
94	.version = G_VERSION,
95	.ctlreq = g_mirror_config,
96	.taste = g_mirror_taste,
97	.destroy_geom = g_mirror_destroy_geom,
98	.init = g_mirror_init,
99	.fini = g_mirror_fini
100};
101
102
103static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
104static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
105static void g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force);
106static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
107    struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
108static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
109static void g_mirror_register_request(struct bio *bp);
110static void g_mirror_sync_release(struct g_mirror_softc *sc);
111
112
113static const char *
114g_mirror_disk_state2str(int state)
115{
116
117	switch (state) {
118	case G_MIRROR_DISK_STATE_NONE:
119		return ("NONE");
120	case G_MIRROR_DISK_STATE_NEW:
121		return ("NEW");
122	case G_MIRROR_DISK_STATE_ACTIVE:
123		return ("ACTIVE");
124	case G_MIRROR_DISK_STATE_STALE:
125		return ("STALE");
126	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
127		return ("SYNCHRONIZING");
128	case G_MIRROR_DISK_STATE_DISCONNECTED:
129		return ("DISCONNECTED");
130	case G_MIRROR_DISK_STATE_DESTROY:
131		return ("DESTROY");
132	default:
133		return ("INVALID");
134	}
135}
136
137static const char *
138g_mirror_device_state2str(int state)
139{
140
141	switch (state) {
142	case G_MIRROR_DEVICE_STATE_STARTING:
143		return ("STARTING");
144	case G_MIRROR_DEVICE_STATE_RUNNING:
145		return ("RUNNING");
146	default:
147		return ("INVALID");
148	}
149}
150
151static const char *
152g_mirror_get_diskname(struct g_mirror_disk *disk)
153{
154
155	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
156		return ("[unknown]");
157	return (disk->d_name);
158}
159
160/*
161 * --- Events handling functions ---
162 * Events in geom_mirror are used to maintain disks and device status
163 * from one thread to simplify locking.
164 */
165static void
166g_mirror_event_free(struct g_mirror_event *ep)
167{
168
169	free(ep, M_MIRROR);
170}
171
172int
173g_mirror_event_send(void *arg, int state, int flags)
174{
175	struct g_mirror_softc *sc;
176	struct g_mirror_disk *disk;
177	struct g_mirror_event *ep;
178	int error;
179
180	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
181	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
182	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
183		disk = NULL;
184		sc = arg;
185	} else {
186		disk = arg;
187		sc = disk->d_softc;
188	}
189	ep->e_disk = disk;
190	ep->e_state = state;
191	ep->e_flags = flags;
192	ep->e_error = 0;
193	mtx_lock(&sc->sc_events_mtx);
194	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
195	mtx_unlock(&sc->sc_events_mtx);
196	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
197	mtx_lock(&sc->sc_queue_mtx);
198	wakeup(sc);
199	mtx_unlock(&sc->sc_queue_mtx);
200	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
201		return (0);
202	sx_assert(&sc->sc_lock, SX_XLOCKED);
203	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
204	sx_xunlock(&sc->sc_lock);
205	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
206		mtx_lock(&sc->sc_events_mtx);
207		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
208		    hz * 5);
209	}
210	error = ep->e_error;
211	g_mirror_event_free(ep);
212	sx_xlock(&sc->sc_lock);
213	return (error);
214}
215
216static struct g_mirror_event *
217g_mirror_event_get(struct g_mirror_softc *sc)
218{
219	struct g_mirror_event *ep;
220
221	mtx_lock(&sc->sc_events_mtx);
222	ep = TAILQ_FIRST(&sc->sc_events);
223	mtx_unlock(&sc->sc_events_mtx);
224	return (ep);
225}
226
227static void
228g_mirror_event_remove(struct g_mirror_softc *sc, struct g_mirror_event *ep)
229{
230
231	mtx_lock(&sc->sc_events_mtx);
232	TAILQ_REMOVE(&sc->sc_events, ep, e_next);
233	mtx_unlock(&sc->sc_events_mtx);
234}
235
236static void
237g_mirror_event_cancel(struct g_mirror_disk *disk)
238{
239	struct g_mirror_softc *sc;
240	struct g_mirror_event *ep, *tmpep;
241
242	sc = disk->d_softc;
243	sx_assert(&sc->sc_lock, SX_XLOCKED);
244
245	mtx_lock(&sc->sc_events_mtx);
246	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
247		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
248			continue;
249		if (ep->e_disk != disk)
250			continue;
251		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
252		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
253			g_mirror_event_free(ep);
254		else {
255			ep->e_error = ECANCELED;
256			wakeup(ep);
257		}
258	}
259	mtx_unlock(&sc->sc_events_mtx);
260}
261
262/*
263 * Return the number of disks in given state.
264 * If state is equal to -1, count all connected disks.
265 */
266u_int
267g_mirror_ndisks(struct g_mirror_softc *sc, int state)
268{
269	struct g_mirror_disk *disk;
270	u_int n = 0;
271
272	sx_assert(&sc->sc_lock, SX_LOCKED);
273
274	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
275		if (state == -1 || disk->d_state == state)
276			n++;
277	}
278	return (n);
279}
280
281/*
282 * Find a disk in mirror by its disk ID.
283 */
284static struct g_mirror_disk *
285g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
286{
287	struct g_mirror_disk *disk;
288
289	sx_assert(&sc->sc_lock, SX_XLOCKED);
290
291	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
292		if (disk->d_id == id)
293			return (disk);
294	}
295	return (NULL);
296}
297
298static u_int
299g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
300{
301	struct bio *bp;
302	u_int nreqs = 0;
303
304	mtx_lock(&sc->sc_queue_mtx);
305	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
306		if (bp->bio_from == cp)
307			nreqs++;
308	}
309	mtx_unlock(&sc->sc_queue_mtx);
310	return (nreqs);
311}
312
313static int
314g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
315{
316
317	if (cp->index > 0) {
318		G_MIRROR_DEBUG(2,
319		    "I/O requests for %s exist, can't destroy it now.",
320		    cp->provider->name);
321		return (1);
322	}
323	if (g_mirror_nrequests(sc, cp) > 0) {
324		G_MIRROR_DEBUG(2,
325		    "I/O requests for %s in queue, can't destroy it now.",
326		    cp->provider->name);
327		return (1);
328	}
329	return (0);
330}
331
332static void
333g_mirror_destroy_consumer(void *arg, int flags __unused)
334{
335	struct g_consumer *cp;
336
337	g_topology_assert();
338
339	cp = arg;
340	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
341	g_detach(cp);
342	g_destroy_consumer(cp);
343}
344
345static void
346g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
347{
348	struct g_provider *pp;
349	int retaste_wait;
350
351	g_topology_assert();
352
353	cp->private = NULL;
354	if (g_mirror_is_busy(sc, cp))
355		return;
356	pp = cp->provider;
357	retaste_wait = 0;
358	if (cp->acw == 1) {
359		if ((pp->geom->flags & G_GEOM_WITHER) == 0)
360			retaste_wait = 1;
361	}
362	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr,
363	    -cp->acw, -cp->ace, 0);
364	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
365		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
366	if (retaste_wait) {
367		/*
368		 * After retaste event was send (inside g_access()), we can send
369		 * event to detach and destroy consumer.
370		 * A class, which has consumer to the given provider connected
371		 * will not receive retaste event for the provider.
372		 * This is the way how I ignore retaste events when I close
373		 * consumers opened for write: I detach and destroy consumer
374		 * after retaste event is sent.
375		 */
376		g_post_event(g_mirror_destroy_consumer, cp, M_WAITOK, NULL);
377		return;
378	}
379	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", pp->name);
380	g_detach(cp);
381	g_destroy_consumer(cp);
382}
383
384static int
385g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
386{
387	struct g_consumer *cp;
388	int error;
389
390	g_topology_assert_not();
391	KASSERT(disk->d_consumer == NULL,
392	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
393
394	g_topology_lock();
395	cp = g_new_consumer(disk->d_softc->sc_geom);
396	error = g_attach(cp, pp);
397	if (error != 0) {
398		g_destroy_consumer(cp);
399		g_topology_unlock();
400		return (error);
401	}
402	error = g_access(cp, 1, 1, 1);
403	if (error != 0) {
404		g_detach(cp);
405		g_destroy_consumer(cp);
406		g_topology_unlock();
407		G_MIRROR_DEBUG(0, "Cannot open consumer %s (error=%d).",
408		    pp->name, error);
409		return (error);
410	}
411	g_topology_unlock();
412	disk->d_consumer = cp;
413	disk->d_consumer->private = disk;
414	disk->d_consumer->index = 0;
415
416	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
417	return (0);
418}
419
420static void
421g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
422{
423
424	g_topology_assert();
425
426	if (cp == NULL)
427		return;
428	if (cp->provider != NULL)
429		g_mirror_kill_consumer(sc, cp);
430	else
431		g_destroy_consumer(cp);
432}
433
434/*
435 * Initialize disk. This means allocate memory, create consumer, attach it
436 * to the provider and open access (r1w1e1) to it.
437 */
438static struct g_mirror_disk *
439g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
440    struct g_mirror_metadata *md, int *errorp)
441{
442	struct g_mirror_disk *disk;
443	int i, error;
444
445	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
446	if (disk == NULL) {
447		error = ENOMEM;
448		goto fail;
449	}
450	disk->d_softc = sc;
451	error = g_mirror_connect_disk(disk, pp);
452	if (error != 0)
453		goto fail;
454	disk->d_id = md->md_did;
455	disk->d_state = G_MIRROR_DISK_STATE_NONE;
456	disk->d_priority = md->md_priority;
457	disk->d_flags = md->md_dflags;
458	error = g_getattr("GEOM::candelete", disk->d_consumer, &i);
459	if (error != 0)
460		goto fail;
461	if (i)
462		disk->d_flags |= G_MIRROR_DISK_FLAG_CANDELETE;
463	if (md->md_provider[0] != '\0')
464		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
465	disk->d_sync.ds_consumer = NULL;
466	disk->d_sync.ds_offset = md->md_sync_offset;
467	disk->d_sync.ds_offset_done = md->md_sync_offset;
468	disk->d_genid = md->md_genid;
469	disk->d_sync.ds_syncid = md->md_syncid;
470	if (errorp != NULL)
471		*errorp = 0;
472	return (disk);
473fail:
474	if (errorp != NULL)
475		*errorp = error;
476	if (disk != NULL)
477		free(disk, M_MIRROR);
478	return (NULL);
479}
480
481static void
482g_mirror_destroy_disk(struct g_mirror_disk *disk)
483{
484	struct g_mirror_softc *sc;
485
486	g_topology_assert_not();
487	sc = disk->d_softc;
488	sx_assert(&sc->sc_lock, SX_XLOCKED);
489
490	LIST_REMOVE(disk, d_next);
491	g_mirror_event_cancel(disk);
492	if (sc->sc_hint == disk)
493		sc->sc_hint = NULL;
494	switch (disk->d_state) {
495	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
496		g_mirror_sync_stop(disk, 1);
497		/* FALLTHROUGH */
498	case G_MIRROR_DISK_STATE_NEW:
499	case G_MIRROR_DISK_STATE_STALE:
500	case G_MIRROR_DISK_STATE_ACTIVE:
501		g_topology_lock();
502		g_mirror_disconnect_consumer(sc, disk->d_consumer);
503		g_topology_unlock();
504		free(disk, M_MIRROR);
505		break;
506	default:
507		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
508		    g_mirror_get_diskname(disk),
509		    g_mirror_disk_state2str(disk->d_state)));
510	}
511}
512
513static void
514g_mirror_destroy_device(struct g_mirror_softc *sc)
515{
516	struct g_mirror_disk *disk;
517	struct g_mirror_event *ep;
518	struct g_geom *gp;
519	struct g_consumer *cp, *tmpcp;
520
521	g_topology_assert_not();
522	sx_assert(&sc->sc_lock, SX_XLOCKED);
523
524	gp = sc->sc_geom;
525	if (sc->sc_provider != NULL)
526		g_mirror_destroy_provider(sc);
527	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
528	    disk = LIST_FIRST(&sc->sc_disks)) {
529		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
530		g_mirror_update_metadata(disk);
531		g_mirror_destroy_disk(disk);
532	}
533	while ((ep = g_mirror_event_get(sc)) != NULL) {
534		g_mirror_event_remove(sc, ep);
535		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
536			g_mirror_event_free(ep);
537		else {
538			ep->e_error = ECANCELED;
539			ep->e_flags |= G_MIRROR_EVENT_DONE;
540			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
541			mtx_lock(&sc->sc_events_mtx);
542			wakeup(ep);
543			mtx_unlock(&sc->sc_events_mtx);
544		}
545	}
546	callout_drain(&sc->sc_callout);
547
548	g_topology_lock();
549	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
550		g_mirror_disconnect_consumer(sc, cp);
551	}
552	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
553	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
554	g_wither_geom(gp, ENXIO);
555	g_topology_unlock();
556	mtx_destroy(&sc->sc_queue_mtx);
557	mtx_destroy(&sc->sc_events_mtx);
558	sx_xunlock(&sc->sc_lock);
559	sx_destroy(&sc->sc_lock);
560}
561
562static void
563g_mirror_orphan(struct g_consumer *cp)
564{
565	struct g_mirror_disk *disk;
566
567	g_topology_assert();
568
569	disk = cp->private;
570	if (disk == NULL)
571		return;
572	disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
573	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
574	    G_MIRROR_EVENT_DONTWAIT);
575}
576
577/*
578 * Function should return the next active disk on the list.
579 * It is possible that it will be the same disk as given.
580 * If there are no active disks on list, NULL is returned.
581 */
582static __inline struct g_mirror_disk *
583g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
584{
585	struct g_mirror_disk *dp;
586
587	for (dp = LIST_NEXT(disk, d_next); dp != disk;
588	    dp = LIST_NEXT(dp, d_next)) {
589		if (dp == NULL)
590			dp = LIST_FIRST(&sc->sc_disks);
591		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
592			break;
593	}
594	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
595		return (NULL);
596	return (dp);
597}
598
599static struct g_mirror_disk *
600g_mirror_get_disk(struct g_mirror_softc *sc)
601{
602	struct g_mirror_disk *disk;
603
604	if (sc->sc_hint == NULL) {
605		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
606		if (sc->sc_hint == NULL)
607			return (NULL);
608	}
609	disk = sc->sc_hint;
610	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
611		disk = g_mirror_find_next(sc, disk);
612		if (disk == NULL)
613			return (NULL);
614	}
615	sc->sc_hint = g_mirror_find_next(sc, disk);
616	return (disk);
617}
618
619static int
620g_mirror_write_metadata(struct g_mirror_disk *disk,
621    struct g_mirror_metadata *md)
622{
623	struct g_mirror_softc *sc;
624	struct g_consumer *cp;
625	off_t offset, length;
626	u_char *sector;
627	int error = 0;
628
629	g_topology_assert_not();
630	sc = disk->d_softc;
631	sx_assert(&sc->sc_lock, SX_LOCKED);
632
633	cp = disk->d_consumer;
634	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
635	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
636	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
637	    ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
638	    cp->acw, cp->ace));
639	length = cp->provider->sectorsize;
640	offset = cp->provider->mediasize - length;
641	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
642	if (md != NULL)
643		mirror_metadata_encode(md, sector);
644	error = g_write_data(cp, offset, sector, length);
645	free(sector, M_MIRROR);
646	if (error != 0) {
647		if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
648			disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
649			G_MIRROR_DEBUG(0, "Cannot write metadata on %s "
650			    "(device=%s, error=%d).",
651			    g_mirror_get_diskname(disk), sc->sc_name, error);
652		} else {
653			G_MIRROR_DEBUG(1, "Cannot write metadata on %s "
654			    "(device=%s, error=%d).",
655			    g_mirror_get_diskname(disk), sc->sc_name, error);
656		}
657		if (g_mirror_disconnect_on_failure &&
658		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
659			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
660			g_mirror_event_send(disk,
661			    G_MIRROR_DISK_STATE_DISCONNECTED,
662			    G_MIRROR_EVENT_DONTWAIT);
663		}
664	}
665	return (error);
666}
667
668static int
669g_mirror_clear_metadata(struct g_mirror_disk *disk)
670{
671	int error;
672
673	g_topology_assert_not();
674	sx_assert(&disk->d_softc->sc_lock, SX_LOCKED);
675
676	error = g_mirror_write_metadata(disk, NULL);
677	if (error == 0) {
678		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
679		    g_mirror_get_diskname(disk));
680	} else {
681		G_MIRROR_DEBUG(0,
682		    "Cannot clear metadata on disk %s (error=%d).",
683		    g_mirror_get_diskname(disk), error);
684	}
685	return (error);
686}
687
688void
689g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
690    struct g_mirror_metadata *md)
691{
692
693	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
694	md->md_version = G_MIRROR_VERSION;
695	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
696	md->md_mid = sc->sc_id;
697	md->md_all = sc->sc_ndisks;
698	md->md_slice = sc->sc_slice;
699	md->md_balance = sc->sc_balance;
700	md->md_genid = sc->sc_genid;
701	md->md_mediasize = sc->sc_mediasize;
702	md->md_sectorsize = sc->sc_sectorsize;
703	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
704	bzero(md->md_provider, sizeof(md->md_provider));
705	if (disk == NULL) {
706		md->md_did = arc4random();
707		md->md_priority = 0;
708		md->md_syncid = 0;
709		md->md_dflags = 0;
710		md->md_sync_offset = 0;
711		md->md_provsize = 0;
712	} else {
713		md->md_did = disk->d_id;
714		md->md_priority = disk->d_priority;
715		md->md_syncid = disk->d_sync.ds_syncid;
716		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
717		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
718			md->md_sync_offset = disk->d_sync.ds_offset_done;
719		else
720			md->md_sync_offset = 0;
721		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
722			strlcpy(md->md_provider,
723			    disk->d_consumer->provider->name,
724			    sizeof(md->md_provider));
725		}
726		md->md_provsize = disk->d_consumer->provider->mediasize;
727	}
728}
729
730void
731g_mirror_update_metadata(struct g_mirror_disk *disk)
732{
733	struct g_mirror_softc *sc;
734	struct g_mirror_metadata md;
735	int error;
736
737	g_topology_assert_not();
738	sc = disk->d_softc;
739	sx_assert(&sc->sc_lock, SX_LOCKED);
740
741	g_mirror_fill_metadata(sc, disk, &md);
742	error = g_mirror_write_metadata(disk, &md);
743	if (error == 0) {
744		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
745		    g_mirror_get_diskname(disk));
746	} else {
747		G_MIRROR_DEBUG(0,
748		    "Cannot update metadata on disk %s (error=%d).",
749		    g_mirror_get_diskname(disk), error);
750	}
751}
752
753static void
754g_mirror_bump_syncid(struct g_mirror_softc *sc)
755{
756	struct g_mirror_disk *disk;
757
758	g_topology_assert_not();
759	sx_assert(&sc->sc_lock, SX_XLOCKED);
760	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
761	    ("%s called with no active disks (device=%s).", __func__,
762	    sc->sc_name));
763
764	sc->sc_syncid++;
765	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
766	    sc->sc_syncid);
767	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
768		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
769		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
770			disk->d_sync.ds_syncid = sc->sc_syncid;
771			g_mirror_update_metadata(disk);
772		}
773	}
774}
775
776static void
777g_mirror_bump_genid(struct g_mirror_softc *sc)
778{
779	struct g_mirror_disk *disk;
780
781	g_topology_assert_not();
782	sx_assert(&sc->sc_lock, SX_XLOCKED);
783	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
784	    ("%s called with no active disks (device=%s).", __func__,
785	    sc->sc_name));
786
787	sc->sc_genid++;
788	G_MIRROR_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name,
789	    sc->sc_genid);
790	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
791		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
792		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
793			disk->d_genid = sc->sc_genid;
794			g_mirror_update_metadata(disk);
795		}
796	}
797}
798
799static int
800g_mirror_idle(struct g_mirror_softc *sc, int acw)
801{
802	struct g_mirror_disk *disk;
803	int timeout;
804
805	g_topology_assert_not();
806	sx_assert(&sc->sc_lock, SX_XLOCKED);
807
808	if (sc->sc_provider == NULL)
809		return (0);
810	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
811		return (0);
812	if (sc->sc_idle)
813		return (0);
814	if (sc->sc_writes > 0)
815		return (0);
816	if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) {
817		timeout = g_mirror_idletime - (time_uptime - sc->sc_last_write);
818		if (timeout > 0)
819			return (timeout);
820	}
821	sc->sc_idle = 1;
822	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
823		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
824			continue;
825		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
826		    g_mirror_get_diskname(disk), sc->sc_name);
827		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
828		g_mirror_update_metadata(disk);
829	}
830	return (0);
831}
832
833static void
834g_mirror_unidle(struct g_mirror_softc *sc)
835{
836	struct g_mirror_disk *disk;
837
838	g_topology_assert_not();
839	sx_assert(&sc->sc_lock, SX_XLOCKED);
840
841	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
842		return;
843	sc->sc_idle = 0;
844	sc->sc_last_write = time_uptime;
845	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
846		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
847			continue;
848		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
849		    g_mirror_get_diskname(disk), sc->sc_name);
850		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
851		g_mirror_update_metadata(disk);
852	}
853}
854
855static void
856g_mirror_done(struct bio *bp)
857{
858	struct g_mirror_softc *sc;
859
860	sc = bp->bio_from->geom->softc;
861	bp->bio_cflags = G_MIRROR_BIO_FLAG_REGULAR;
862	mtx_lock(&sc->sc_queue_mtx);
863	bioq_disksort(&sc->sc_queue, bp);
864	mtx_unlock(&sc->sc_queue_mtx);
865	wakeup(sc);
866}
867
868static void
869g_mirror_regular_request(struct bio *bp)
870{
871	struct g_mirror_softc *sc;
872	struct g_mirror_disk *disk;
873	struct bio *pbp;
874
875	g_topology_assert_not();
876
877	pbp = bp->bio_parent;
878	sc = pbp->bio_to->geom->softc;
879	bp->bio_from->index--;
880	if (bp->bio_cmd == BIO_WRITE)
881		sc->sc_writes--;
882	disk = bp->bio_from->private;
883	if (disk == NULL) {
884		g_topology_lock();
885		g_mirror_kill_consumer(sc, bp->bio_from);
886		g_topology_unlock();
887	}
888
889	pbp->bio_inbed++;
890	KASSERT(pbp->bio_inbed <= pbp->bio_children,
891	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
892	    pbp->bio_children));
893	if (bp->bio_error == 0 && pbp->bio_error == 0) {
894		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
895		g_destroy_bio(bp);
896		if (pbp->bio_children == pbp->bio_inbed) {
897			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
898			pbp->bio_completed = pbp->bio_length;
899			if (pbp->bio_cmd == BIO_WRITE ||
900			    pbp->bio_cmd == BIO_DELETE) {
901				bioq_remove(&sc->sc_inflight, pbp);
902				/* Release delayed sync requests if possible. */
903				g_mirror_sync_release(sc);
904			}
905			g_io_deliver(pbp, pbp->bio_error);
906		}
907		return;
908	} else if (bp->bio_error != 0) {
909		if (pbp->bio_error == 0)
910			pbp->bio_error = bp->bio_error;
911		if (disk != NULL) {
912			if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
913				disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
914				G_MIRROR_LOGREQ(0, bp,
915				    "Request failed (error=%d).",
916				    bp->bio_error);
917			} else {
918				G_MIRROR_LOGREQ(1, bp,
919				    "Request failed (error=%d).",
920				    bp->bio_error);
921			}
922			if (g_mirror_disconnect_on_failure &&
923			    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1)
924			{
925				sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
926				g_mirror_event_send(disk,
927				    G_MIRROR_DISK_STATE_DISCONNECTED,
928				    G_MIRROR_EVENT_DONTWAIT);
929			}
930		}
931		switch (pbp->bio_cmd) {
932		case BIO_DELETE:
933		case BIO_WRITE:
934			pbp->bio_inbed--;
935			pbp->bio_children--;
936			break;
937		}
938	}
939	g_destroy_bio(bp);
940
941	switch (pbp->bio_cmd) {
942	case BIO_READ:
943		if (pbp->bio_inbed < pbp->bio_children)
944			break;
945		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 1)
946			g_io_deliver(pbp, pbp->bio_error);
947		else {
948			pbp->bio_error = 0;
949			mtx_lock(&sc->sc_queue_mtx);
950			bioq_disksort(&sc->sc_queue, pbp);
951			mtx_unlock(&sc->sc_queue_mtx);
952			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
953			wakeup(sc);
954		}
955		break;
956	case BIO_DELETE:
957	case BIO_WRITE:
958		if (pbp->bio_children == 0) {
959			/*
960			 * All requests failed.
961			 */
962		} else if (pbp->bio_inbed < pbp->bio_children) {
963			/* Do nothing. */
964			break;
965		} else if (pbp->bio_children == pbp->bio_inbed) {
966			/* Some requests succeeded. */
967			pbp->bio_error = 0;
968			pbp->bio_completed = pbp->bio_length;
969		}
970		bioq_remove(&sc->sc_inflight, pbp);
971		/* Release delayed sync requests if possible. */
972		g_mirror_sync_release(sc);
973		g_io_deliver(pbp, pbp->bio_error);
974		break;
975	default:
976		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
977		break;
978	}
979}
980
981static void
982g_mirror_sync_done(struct bio *bp)
983{
984	struct g_mirror_softc *sc;
985
986	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
987	sc = bp->bio_from->geom->softc;
988	bp->bio_cflags = G_MIRROR_BIO_FLAG_SYNC;
989	mtx_lock(&sc->sc_queue_mtx);
990	bioq_disksort(&sc->sc_queue, bp);
991	mtx_unlock(&sc->sc_queue_mtx);
992	wakeup(sc);
993}
994
995static void
996g_mirror_kernel_dump(struct bio *bp)
997{
998	struct g_mirror_softc *sc;
999	struct g_mirror_disk *disk;
1000	struct bio *cbp;
1001	struct g_kerneldump *gkd;
1002
1003	/*
1004	 * We configure dumping to the first component, because this component
1005	 * will be used for reading with 'prefer' balance algorithm.
1006	 * If the component with the higest priority is currently disconnected
1007	 * we will not be able to read the dump after the reboot if it will be
1008	 * connected and synchronized later. Can we do something better?
1009	 */
1010	sc = bp->bio_to->geom->softc;
1011	disk = LIST_FIRST(&sc->sc_disks);
1012
1013	gkd = (struct g_kerneldump *)bp->bio_data;
1014	if (gkd->length > bp->bio_to->mediasize)
1015		gkd->length = bp->bio_to->mediasize;
1016	cbp = g_clone_bio(bp);
1017	if (cbp == NULL) {
1018		g_io_deliver(bp, ENOMEM);
1019		return;
1020	}
1021	cbp->bio_done = g_std_done;
1022	g_io_request(cbp, disk->d_consumer);
1023	G_MIRROR_DEBUG(1, "Kernel dump will go to %s.",
1024	    g_mirror_get_diskname(disk));
1025}
1026
1027static void
1028g_mirror_flush(struct g_mirror_softc *sc, struct bio *bp)
1029{
1030	struct bio_queue_head queue;
1031	struct g_mirror_disk *disk;
1032	struct g_consumer *cp;
1033	struct bio *cbp;
1034
1035	bioq_init(&queue);
1036	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1037		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1038			continue;
1039		cbp = g_clone_bio(bp);
1040		if (cbp == NULL) {
1041			for (cbp = bioq_first(&queue); cbp != NULL;
1042			    cbp = bioq_first(&queue)) {
1043				bioq_remove(&queue, cbp);
1044				g_destroy_bio(cbp);
1045			}
1046			if (bp->bio_error == 0)
1047				bp->bio_error = ENOMEM;
1048			g_io_deliver(bp, bp->bio_error);
1049			return;
1050		}
1051		bioq_insert_tail(&queue, cbp);
1052		cbp->bio_done = g_std_done;
1053		cbp->bio_caller1 = disk;
1054		cbp->bio_to = disk->d_consumer->provider;
1055	}
1056	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
1057		bioq_remove(&queue, cbp);
1058		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1059		disk = cbp->bio_caller1;
1060		cbp->bio_caller1 = NULL;
1061		cp = disk->d_consumer;
1062		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1063		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1064		    cp->acr, cp->acw, cp->ace));
1065		g_io_request(cbp, disk->d_consumer);
1066	}
1067}
1068
1069static void
1070g_mirror_start(struct bio *bp)
1071{
1072	struct g_mirror_softc *sc;
1073
1074	sc = bp->bio_to->geom->softc;
1075	/*
1076	 * If sc == NULL or there are no valid disks, provider's error
1077	 * should be set and g_mirror_start() should not be called at all.
1078	 */
1079	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1080	    ("Provider's error should be set (error=%d)(mirror=%s).",
1081	    bp->bio_to->error, bp->bio_to->name));
1082	G_MIRROR_LOGREQ(3, bp, "Request received.");
1083
1084	switch (bp->bio_cmd) {
1085	case BIO_READ:
1086	case BIO_WRITE:
1087	case BIO_DELETE:
1088		break;
1089	case BIO_FLUSH:
1090		g_mirror_flush(sc, bp);
1091		return;
1092	case BIO_GETATTR:
1093		if (g_handleattr_int(bp, "GEOM::candelete", 1))
1094			return;
1095		else if (strcmp("GEOM::kerneldump", bp->bio_attribute) == 0) {
1096			g_mirror_kernel_dump(bp);
1097			return;
1098		}
1099		/* FALLTHROUGH */
1100	default:
1101		g_io_deliver(bp, EOPNOTSUPP);
1102		return;
1103	}
1104	mtx_lock(&sc->sc_queue_mtx);
1105	bioq_disksort(&sc->sc_queue, bp);
1106	mtx_unlock(&sc->sc_queue_mtx);
1107	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1108	wakeup(sc);
1109}
1110
1111/*
1112 * Return TRUE if the given request is colliding with a in-progress
1113 * synchronization request.
1114 */
1115static int
1116g_mirror_sync_collision(struct g_mirror_softc *sc, struct bio *bp)
1117{
1118	struct g_mirror_disk *disk;
1119	struct bio *sbp;
1120	off_t rstart, rend, sstart, send;
1121	int i;
1122
1123	if (sc->sc_sync.ds_ndisks == 0)
1124		return (0);
1125	rstart = bp->bio_offset;
1126	rend = bp->bio_offset + bp->bio_length;
1127	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1128		if (disk->d_state != G_MIRROR_DISK_STATE_SYNCHRONIZING)
1129			continue;
1130		for (i = 0; i < g_mirror_syncreqs; i++) {
1131			sbp = disk->d_sync.ds_bios[i];
1132			if (sbp == NULL)
1133				continue;
1134			sstart = sbp->bio_offset;
1135			send = sbp->bio_offset + sbp->bio_length;
1136			if (rend > sstart && rstart < send)
1137				return (1);
1138		}
1139	}
1140	return (0);
1141}
1142
1143/*
1144 * Return TRUE if the given sync request is colliding with a in-progress regular
1145 * request.
1146 */
1147static int
1148g_mirror_regular_collision(struct g_mirror_softc *sc, struct bio *sbp)
1149{
1150	off_t rstart, rend, sstart, send;
1151	struct bio *bp;
1152
1153	if (sc->sc_sync.ds_ndisks == 0)
1154		return (0);
1155	sstart = sbp->bio_offset;
1156	send = sbp->bio_offset + sbp->bio_length;
1157	TAILQ_FOREACH(bp, &sc->sc_inflight.queue, bio_queue) {
1158		rstart = bp->bio_offset;
1159		rend = bp->bio_offset + bp->bio_length;
1160		if (rend > sstart && rstart < send)
1161			return (1);
1162	}
1163	return (0);
1164}
1165
1166/*
1167 * Puts request onto delayed queue.
1168 */
1169static void
1170g_mirror_regular_delay(struct g_mirror_softc *sc, struct bio *bp)
1171{
1172
1173	G_MIRROR_LOGREQ(2, bp, "Delaying request.");
1174	bioq_insert_head(&sc->sc_regular_delayed, bp);
1175}
1176
1177/*
1178 * Puts synchronization request onto delayed queue.
1179 */
1180static void
1181g_mirror_sync_delay(struct g_mirror_softc *sc, struct bio *bp)
1182{
1183
1184	G_MIRROR_LOGREQ(2, bp, "Delaying synchronization request.");
1185	bioq_insert_tail(&sc->sc_sync_delayed, bp);
1186}
1187
1188/*
1189 * Releases delayed regular requests which don't collide anymore with sync
1190 * requests.
1191 */
1192static void
1193g_mirror_regular_release(struct g_mirror_softc *sc)
1194{
1195	struct bio *bp, *bp2;
1196
1197	TAILQ_FOREACH_SAFE(bp, &sc->sc_regular_delayed.queue, bio_queue, bp2) {
1198		if (g_mirror_sync_collision(sc, bp))
1199			continue;
1200		bioq_remove(&sc->sc_regular_delayed, bp);
1201		G_MIRROR_LOGREQ(2, bp, "Releasing delayed request (%p).", bp);
1202		mtx_lock(&sc->sc_queue_mtx);
1203		bioq_insert_head(&sc->sc_queue, bp);
1204#if 0
1205		/*
1206		 * wakeup() is not needed, because this function is called from
1207		 * the worker thread.
1208		 */
1209		wakeup(&sc->sc_queue);
1210#endif
1211		mtx_unlock(&sc->sc_queue_mtx);
1212	}
1213}
1214
1215/*
1216 * Releases delayed sync requests which don't collide anymore with regular
1217 * requests.
1218 */
1219static void
1220g_mirror_sync_release(struct g_mirror_softc *sc)
1221{
1222	struct bio *bp, *bp2;
1223
1224	TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed.queue, bio_queue, bp2) {
1225		if (g_mirror_regular_collision(sc, bp))
1226			continue;
1227		bioq_remove(&sc->sc_sync_delayed, bp);
1228		G_MIRROR_LOGREQ(2, bp,
1229		    "Releasing delayed synchronization request.");
1230		g_io_request(bp, bp->bio_from);
1231	}
1232}
1233
1234/*
1235 * Handle synchronization requests.
1236 * Every synchronization request is two-steps process: first, READ request is
1237 * send to active provider and then WRITE request (with read data) to the provider
1238 * beeing synchronized. When WRITE is finished, new synchronization request is
1239 * send.
1240 */
1241static void
1242g_mirror_sync_request(struct bio *bp)
1243{
1244	struct g_mirror_softc *sc;
1245	struct g_mirror_disk *disk;
1246
1247	bp->bio_from->index--;
1248	sc = bp->bio_from->geom->softc;
1249	disk = bp->bio_from->private;
1250	if (disk == NULL) {
1251		sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
1252		g_topology_lock();
1253		g_mirror_kill_consumer(sc, bp->bio_from);
1254		g_topology_unlock();
1255		free(bp->bio_data, M_MIRROR);
1256		g_destroy_bio(bp);
1257		sx_xlock(&sc->sc_lock);
1258		return;
1259	}
1260
1261	/*
1262	 * Synchronization request.
1263	 */
1264	switch (bp->bio_cmd) {
1265	case BIO_READ:
1266	    {
1267		struct g_consumer *cp;
1268
1269		if (bp->bio_error != 0) {
1270			G_MIRROR_LOGREQ(0, bp,
1271			    "Synchronization request failed (error=%d).",
1272			    bp->bio_error);
1273			g_destroy_bio(bp);
1274			return;
1275		}
1276		G_MIRROR_LOGREQ(3, bp,
1277		    "Synchronization request half-finished.");
1278		bp->bio_cmd = BIO_WRITE;
1279		bp->bio_cflags = 0;
1280		cp = disk->d_consumer;
1281		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1282		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1283		    cp->acr, cp->acw, cp->ace));
1284		cp->index++;
1285		g_io_request(bp, cp);
1286		return;
1287	    }
1288	case BIO_WRITE:
1289	    {
1290		struct g_mirror_disk_sync *sync;
1291		off_t offset;
1292		void *data;
1293		int i;
1294
1295		if (bp->bio_error != 0) {
1296			G_MIRROR_LOGREQ(0, bp,
1297			    "Synchronization request failed (error=%d).",
1298			    bp->bio_error);
1299			g_destroy_bio(bp);
1300			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
1301			g_mirror_event_send(disk,
1302			    G_MIRROR_DISK_STATE_DISCONNECTED,
1303			    G_MIRROR_EVENT_DONTWAIT);
1304			return;
1305		}
1306		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
1307		sync = &disk->d_sync;
1308		if (sync->ds_offset == sc->sc_mediasize ||
1309		    sync->ds_consumer == NULL ||
1310		    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1311			/* Don't send more synchronization requests. */
1312			sync->ds_inflight--;
1313			if (sync->ds_bios != NULL) {
1314				i = (int)(uintptr_t)bp->bio_caller1;
1315				sync->ds_bios[i] = NULL;
1316			}
1317			free(bp->bio_data, M_MIRROR);
1318			g_destroy_bio(bp);
1319			if (sync->ds_inflight > 0)
1320				return;
1321			if (sync->ds_consumer == NULL ||
1322			    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1323				return;
1324			}
1325			/* Disk up-to-date, activate it. */
1326			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
1327			    G_MIRROR_EVENT_DONTWAIT);
1328			return;
1329		}
1330
1331		/* Send next synchronization request. */
1332		data = bp->bio_data;
1333		bzero(bp, sizeof(*bp));
1334		bp->bio_cmd = BIO_READ;
1335		bp->bio_offset = sync->ds_offset;
1336		bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
1337		sync->ds_offset += bp->bio_length;
1338		bp->bio_done = g_mirror_sync_done;
1339		bp->bio_data = data;
1340		bp->bio_from = sync->ds_consumer;
1341		bp->bio_to = sc->sc_provider;
1342		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
1343		sync->ds_consumer->index++;
1344		/*
1345		 * Delay the request if it is colliding with a regular request.
1346		 */
1347		if (g_mirror_regular_collision(sc, bp))
1348			g_mirror_sync_delay(sc, bp);
1349		else
1350			g_io_request(bp, sync->ds_consumer);
1351
1352		/* Release delayed requests if possible. */
1353		g_mirror_regular_release(sc);
1354
1355		/* Find the smallest offset */
1356		offset = sc->sc_mediasize;
1357		for (i = 0; i < g_mirror_syncreqs; i++) {
1358			bp = sync->ds_bios[i];
1359			if (bp->bio_offset < offset)
1360				offset = bp->bio_offset;
1361		}
1362		if (sync->ds_offset_done + (MAXPHYS * 100) < offset) {
1363			/* Update offset_done on every 100 blocks. */
1364			sync->ds_offset_done = offset;
1365			g_mirror_update_metadata(disk);
1366		}
1367		return;
1368	    }
1369	default:
1370		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1371		    bp->bio_cmd, sc->sc_name));
1372		break;
1373	}
1374}
1375
1376static void
1377g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
1378{
1379	struct g_mirror_disk *disk;
1380	struct g_consumer *cp;
1381	struct bio *cbp;
1382
1383	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1384		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
1385			break;
1386	}
1387	if (disk == NULL) {
1388		if (bp->bio_error == 0)
1389			bp->bio_error = ENXIO;
1390		g_io_deliver(bp, bp->bio_error);
1391		return;
1392	}
1393	cbp = g_clone_bio(bp);
1394	if (cbp == NULL) {
1395		if (bp->bio_error == 0)
1396			bp->bio_error = ENOMEM;
1397		g_io_deliver(bp, bp->bio_error);
1398		return;
1399	}
1400	/*
1401	 * Fill in the component buf structure.
1402	 */
1403	cp = disk->d_consumer;
1404	cbp->bio_done = g_mirror_done;
1405	cbp->bio_to = cp->provider;
1406	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1407	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1408	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1409	    cp->acw, cp->ace));
1410	cp->index++;
1411	g_io_request(cbp, cp);
1412}
1413
1414static void
1415g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
1416{
1417	struct g_mirror_disk *disk;
1418	struct g_consumer *cp;
1419	struct bio *cbp;
1420
1421	disk = g_mirror_get_disk(sc);
1422	if (disk == NULL) {
1423		if (bp->bio_error == 0)
1424			bp->bio_error = ENXIO;
1425		g_io_deliver(bp, bp->bio_error);
1426		return;
1427	}
1428	cbp = g_clone_bio(bp);
1429	if (cbp == NULL) {
1430		if (bp->bio_error == 0)
1431			bp->bio_error = ENOMEM;
1432		g_io_deliver(bp, bp->bio_error);
1433		return;
1434	}
1435	/*
1436	 * Fill in the component buf structure.
1437	 */
1438	cp = disk->d_consumer;
1439	cbp->bio_done = g_mirror_done;
1440	cbp->bio_to = cp->provider;
1441	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1442	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1443	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1444	    cp->acw, cp->ace));
1445	cp->index++;
1446	g_io_request(cbp, cp);
1447}
1448
1449#define TRACK_SIZE  (1 * 1024 * 1024)
1450#define LOAD_SCALE	256
1451#define ABS(x)		(((x) >= 0) ? (x) : (-(x)))
1452
1453static void
1454g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
1455{
1456	struct g_mirror_disk *disk, *dp;
1457	struct g_consumer *cp;
1458	struct bio *cbp;
1459	int prio, best;
1460
1461	/* Find a disk with the smallest load. */
1462	disk = NULL;
1463	best = INT_MAX;
1464	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1465		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1466			continue;
1467		prio = dp->load;
1468		/* If disk head is precisely in position - highly prefer it. */
1469		if (dp->d_last_offset == bp->bio_offset)
1470			prio -= 2 * LOAD_SCALE;
1471		else
1472		/* If disk head is close to position - prefer it. */
1473		if (ABS(dp->d_last_offset - bp->bio_offset) < TRACK_SIZE)
1474			prio -= 1 * LOAD_SCALE;
1475		if (prio <= best) {
1476			disk = dp;
1477			best = prio;
1478		}
1479	}
1480	KASSERT(disk != NULL, ("NULL disk for %s.", sc->sc_name));
1481	cbp = g_clone_bio(bp);
1482	if (cbp == NULL) {
1483		if (bp->bio_error == 0)
1484			bp->bio_error = ENOMEM;
1485		g_io_deliver(bp, bp->bio_error);
1486		return;
1487	}
1488	/*
1489	 * Fill in the component buf structure.
1490	 */
1491	cp = disk->d_consumer;
1492	cbp->bio_done = g_mirror_done;
1493	cbp->bio_to = cp->provider;
1494	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1495	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1496	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1497	    cp->acw, cp->ace));
1498	cp->index++;
1499	/* Remember last head position */
1500	disk->d_last_offset = bp->bio_offset + bp->bio_length;
1501	/* Update loads. */
1502	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1503		dp->load = (dp->d_consumer->index * LOAD_SCALE +
1504		    dp->load * 7) / 8;
1505	}
1506	g_io_request(cbp, cp);
1507}
1508
1509static void
1510g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
1511{
1512	struct bio_queue_head queue;
1513	struct g_mirror_disk *disk;
1514	struct g_consumer *cp;
1515	struct bio *cbp;
1516	off_t left, mod, offset, slice;
1517	u_char *data;
1518	u_int ndisks;
1519
1520	if (bp->bio_length <= sc->sc_slice) {
1521		g_mirror_request_round_robin(sc, bp);
1522		return;
1523	}
1524	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
1525	slice = bp->bio_length / ndisks;
1526	mod = slice % sc->sc_provider->sectorsize;
1527	if (mod != 0)
1528		slice += sc->sc_provider->sectorsize - mod;
1529	/*
1530	 * Allocate all bios before sending any request, so we can
1531	 * return ENOMEM in nice and clean way.
1532	 */
1533	left = bp->bio_length;
1534	offset = bp->bio_offset;
1535	data = bp->bio_data;
1536	bioq_init(&queue);
1537	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1538		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1539			continue;
1540		cbp = g_clone_bio(bp);
1541		if (cbp == NULL) {
1542			for (cbp = bioq_first(&queue); cbp != NULL;
1543			    cbp = bioq_first(&queue)) {
1544				bioq_remove(&queue, cbp);
1545				g_destroy_bio(cbp);
1546			}
1547			if (bp->bio_error == 0)
1548				bp->bio_error = ENOMEM;
1549			g_io_deliver(bp, bp->bio_error);
1550			return;
1551		}
1552		bioq_insert_tail(&queue, cbp);
1553		cbp->bio_done = g_mirror_done;
1554		cbp->bio_caller1 = disk;
1555		cbp->bio_to = disk->d_consumer->provider;
1556		cbp->bio_offset = offset;
1557		cbp->bio_data = data;
1558		cbp->bio_length = MIN(left, slice);
1559		left -= cbp->bio_length;
1560		if (left == 0)
1561			break;
1562		offset += cbp->bio_length;
1563		data += cbp->bio_length;
1564	}
1565	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
1566		bioq_remove(&queue, cbp);
1567		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1568		disk = cbp->bio_caller1;
1569		cbp->bio_caller1 = NULL;
1570		cp = disk->d_consumer;
1571		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1572		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1573		    cp->acr, cp->acw, cp->ace));
1574		disk->d_consumer->index++;
1575		g_io_request(cbp, disk->d_consumer);
1576	}
1577}
1578
1579static void
1580g_mirror_register_request(struct bio *bp)
1581{
1582	struct g_mirror_softc *sc;
1583
1584	sc = bp->bio_to->geom->softc;
1585	switch (bp->bio_cmd) {
1586	case BIO_READ:
1587		switch (sc->sc_balance) {
1588		case G_MIRROR_BALANCE_LOAD:
1589			g_mirror_request_load(sc, bp);
1590			break;
1591		case G_MIRROR_BALANCE_PREFER:
1592			g_mirror_request_prefer(sc, bp);
1593			break;
1594		case G_MIRROR_BALANCE_ROUND_ROBIN:
1595			g_mirror_request_round_robin(sc, bp);
1596			break;
1597		case G_MIRROR_BALANCE_SPLIT:
1598			g_mirror_request_split(sc, bp);
1599			break;
1600		}
1601		return;
1602	case BIO_WRITE:
1603	case BIO_DELETE:
1604	    {
1605		struct g_mirror_disk *disk;
1606		struct g_mirror_disk_sync *sync;
1607		struct bio_queue_head queue;
1608		struct g_consumer *cp;
1609		struct bio *cbp;
1610
1611		/*
1612		 * Delay the request if it is colliding with a synchronization
1613		 * request.
1614		 */
1615		if (g_mirror_sync_collision(sc, bp)) {
1616			g_mirror_regular_delay(sc, bp);
1617			return;
1618		}
1619
1620		if (sc->sc_idle)
1621			g_mirror_unidle(sc);
1622		else
1623			sc->sc_last_write = time_uptime;
1624
1625		/*
1626		 * Allocate all bios before sending any request, so we can
1627		 * return ENOMEM in nice and clean way.
1628		 */
1629		bioq_init(&queue);
1630		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1631			sync = &disk->d_sync;
1632			switch (disk->d_state) {
1633			case G_MIRROR_DISK_STATE_ACTIVE:
1634				break;
1635			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1636				if (bp->bio_offset >= sync->ds_offset)
1637					continue;
1638				break;
1639			default:
1640				continue;
1641			}
1642			if (bp->bio_cmd == BIO_DELETE &&
1643			    (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE) == 0)
1644				continue;
1645			cbp = g_clone_bio(bp);
1646			if (cbp == NULL) {
1647				for (cbp = bioq_first(&queue); cbp != NULL;
1648				    cbp = bioq_first(&queue)) {
1649					bioq_remove(&queue, cbp);
1650					g_destroy_bio(cbp);
1651				}
1652				if (bp->bio_error == 0)
1653					bp->bio_error = ENOMEM;
1654				g_io_deliver(bp, bp->bio_error);
1655				return;
1656			}
1657			bioq_insert_tail(&queue, cbp);
1658			cbp->bio_done = g_mirror_done;
1659			cp = disk->d_consumer;
1660			cbp->bio_caller1 = cp;
1661			cbp->bio_to = cp->provider;
1662			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1663			    ("Consumer %s not opened (r%dw%de%d).",
1664			    cp->provider->name, cp->acr, cp->acw, cp->ace));
1665		}
1666		for (cbp = bioq_first(&queue); cbp != NULL;
1667		    cbp = bioq_first(&queue)) {
1668			bioq_remove(&queue, cbp);
1669			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1670			cp = cbp->bio_caller1;
1671			cbp->bio_caller1 = NULL;
1672			cp->index++;
1673			sc->sc_writes++;
1674			g_io_request(cbp, cp);
1675		}
1676		/*
1677		 * Put request onto inflight queue, so we can check if new
1678		 * synchronization requests don't collide with it.
1679		 */
1680		bioq_insert_tail(&sc->sc_inflight, bp);
1681		/*
1682		 * Bump syncid on first write.
1683		 */
1684		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) {
1685			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
1686			g_mirror_bump_syncid(sc);
1687		}
1688		return;
1689	    }
1690	default:
1691		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1692		    bp->bio_cmd, sc->sc_name));
1693		break;
1694	}
1695}
1696
1697static int
1698g_mirror_can_destroy(struct g_mirror_softc *sc)
1699{
1700	struct g_geom *gp;
1701	struct g_consumer *cp;
1702
1703	g_topology_assert();
1704	gp = sc->sc_geom;
1705	if (gp->softc == NULL)
1706		return (1);
1707	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_TASTING) != 0)
1708		return (0);
1709	LIST_FOREACH(cp, &gp->consumer, consumer) {
1710		if (g_mirror_is_busy(sc, cp))
1711			return (0);
1712	}
1713	gp = sc->sc_sync.ds_geom;
1714	LIST_FOREACH(cp, &gp->consumer, consumer) {
1715		if (g_mirror_is_busy(sc, cp))
1716			return (0);
1717	}
1718	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
1719	    sc->sc_name);
1720	return (1);
1721}
1722
1723static int
1724g_mirror_try_destroy(struct g_mirror_softc *sc)
1725{
1726
1727	if (sc->sc_rootmount != NULL) {
1728		G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
1729		    sc->sc_rootmount);
1730		root_mount_rel(sc->sc_rootmount);
1731		sc->sc_rootmount = NULL;
1732	}
1733	g_topology_lock();
1734	if (!g_mirror_can_destroy(sc)) {
1735		g_topology_unlock();
1736		return (0);
1737	}
1738	sc->sc_geom->softc = NULL;
1739	sc->sc_sync.ds_geom->softc = NULL;
1740	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WAIT) != 0) {
1741		g_topology_unlock();
1742		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1743		    &sc->sc_worker);
1744		/* Unlock sc_lock here, as it can be destroyed after wakeup. */
1745		sx_xunlock(&sc->sc_lock);
1746		wakeup(&sc->sc_worker);
1747		sc->sc_worker = NULL;
1748	} else {
1749		g_topology_unlock();
1750		g_mirror_destroy_device(sc);
1751		free(sc, M_MIRROR);
1752	}
1753	return (1);
1754}
1755
1756/*
1757 * Worker thread.
1758 */
1759static void
1760g_mirror_worker(void *arg)
1761{
1762	struct g_mirror_softc *sc;
1763	struct g_mirror_event *ep;
1764	struct bio *bp;
1765	int timeout;
1766
1767	sc = arg;
1768	thread_lock(curthread);
1769	sched_prio(curthread, PRIBIO);
1770	thread_unlock(curthread);
1771
1772	sx_xlock(&sc->sc_lock);
1773	for (;;) {
1774		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
1775		/*
1776		 * First take a look at events.
1777		 * This is important to handle events before any I/O requests.
1778		 */
1779		ep = g_mirror_event_get(sc);
1780		if (ep != NULL) {
1781			g_mirror_event_remove(sc, ep);
1782			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
1783				/* Update only device status. */
1784				G_MIRROR_DEBUG(3,
1785				    "Running event for device %s.",
1786				    sc->sc_name);
1787				ep->e_error = 0;
1788				g_mirror_update_device(sc, 1);
1789			} else {
1790				/* Update disk status. */
1791				G_MIRROR_DEBUG(3, "Running event for disk %s.",
1792				     g_mirror_get_diskname(ep->e_disk));
1793				ep->e_error = g_mirror_update_disk(ep->e_disk,
1794				    ep->e_state);
1795				if (ep->e_error == 0)
1796					g_mirror_update_device(sc, 0);
1797			}
1798			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
1799				KASSERT(ep->e_error == 0,
1800				    ("Error cannot be handled."));
1801				g_mirror_event_free(ep);
1802			} else {
1803				ep->e_flags |= G_MIRROR_EVENT_DONE;
1804				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1805				    ep);
1806				mtx_lock(&sc->sc_events_mtx);
1807				wakeup(ep);
1808				mtx_unlock(&sc->sc_events_mtx);
1809			}
1810			if ((sc->sc_flags &
1811			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1812				if (g_mirror_try_destroy(sc)) {
1813					curthread->td_pflags &= ~TDP_GEOM;
1814					G_MIRROR_DEBUG(1, "Thread exiting.");
1815					kproc_exit(0);
1816				}
1817			}
1818			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
1819			continue;
1820		}
1821		/*
1822		 * Check if we can mark array as CLEAN and if we can't take
1823		 * how much seconds should we wait.
1824		 */
1825		timeout = g_mirror_idle(sc, -1);
1826		/*
1827		 * Now I/O requests.
1828		 */
1829		/* Get first request from the queue. */
1830		mtx_lock(&sc->sc_queue_mtx);
1831		bp = bioq_first(&sc->sc_queue);
1832		if (bp == NULL) {
1833			if ((sc->sc_flags &
1834			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1835				mtx_unlock(&sc->sc_queue_mtx);
1836				if (g_mirror_try_destroy(sc)) {
1837					curthread->td_pflags &= ~TDP_GEOM;
1838					G_MIRROR_DEBUG(1, "Thread exiting.");
1839					kproc_exit(0);
1840				}
1841				mtx_lock(&sc->sc_queue_mtx);
1842			}
1843			sx_xunlock(&sc->sc_lock);
1844			/*
1845			 * XXX: We can miss an event here, because an event
1846			 *      can be added without sx-device-lock and without
1847			 *      mtx-queue-lock. Maybe I should just stop using
1848			 *      dedicated mutex for events synchronization and
1849			 *      stick with the queue lock?
1850			 *      The event will hang here until next I/O request
1851			 *      or next event is received.
1852			 */
1853			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1",
1854			    timeout * hz);
1855			sx_xlock(&sc->sc_lock);
1856			G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
1857			continue;
1858		}
1859		bioq_remove(&sc->sc_queue, bp);
1860		mtx_unlock(&sc->sc_queue_mtx);
1861
1862		if (bp->bio_from->geom == sc->sc_sync.ds_geom &&
1863		    (bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
1864			g_mirror_sync_request(bp);	/* READ */
1865		} else if (bp->bio_to != sc->sc_provider) {
1866			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0)
1867				g_mirror_regular_request(bp);
1868			else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
1869				g_mirror_sync_request(bp);	/* WRITE */
1870			else {
1871				KASSERT(0,
1872				    ("Invalid request cflags=0x%hhx to=%s.",
1873				    bp->bio_cflags, bp->bio_to->name));
1874			}
1875		} else {
1876			g_mirror_register_request(bp);
1877		}
1878		G_MIRROR_DEBUG(5, "%s: I'm here 9.", __func__);
1879	}
1880}
1881
1882static void
1883g_mirror_update_idle(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
1884{
1885
1886	sx_assert(&sc->sc_lock, SX_LOCKED);
1887
1888	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
1889		return;
1890	if (!sc->sc_idle && (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
1891		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
1892		    g_mirror_get_diskname(disk), sc->sc_name);
1893		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1894	} else if (sc->sc_idle &&
1895	    (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
1896		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
1897		    g_mirror_get_diskname(disk), sc->sc_name);
1898		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1899	}
1900}
1901
1902static void
1903g_mirror_sync_start(struct g_mirror_disk *disk)
1904{
1905	struct g_mirror_softc *sc;
1906	struct g_consumer *cp;
1907	struct bio *bp;
1908	int error, i;
1909
1910	g_topology_assert_not();
1911	sc = disk->d_softc;
1912	sx_assert(&sc->sc_lock, SX_LOCKED);
1913
1914	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
1915	    ("Disk %s is not marked for synchronization.",
1916	    g_mirror_get_diskname(disk)));
1917	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1918	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
1919	    sc->sc_state));
1920
1921	sx_xunlock(&sc->sc_lock);
1922	g_topology_lock();
1923	cp = g_new_consumer(sc->sc_sync.ds_geom);
1924	error = g_attach(cp, sc->sc_provider);
1925	KASSERT(error == 0,
1926	    ("Cannot attach to %s (error=%d).", sc->sc_name, error));
1927	error = g_access(cp, 1, 0, 0);
1928	KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error));
1929	g_topology_unlock();
1930	sx_xlock(&sc->sc_lock);
1931
1932	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
1933	    g_mirror_get_diskname(disk));
1934	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) == 0)
1935		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1936	KASSERT(disk->d_sync.ds_consumer == NULL,
1937	    ("Sync consumer already exists (device=%s, disk=%s).",
1938	    sc->sc_name, g_mirror_get_diskname(disk)));
1939
1940	disk->d_sync.ds_consumer = cp;
1941	disk->d_sync.ds_consumer->private = disk;
1942	disk->d_sync.ds_consumer->index = 0;
1943
1944	/*
1945	 * Allocate memory for synchronization bios and initialize them.
1946	 */
1947	disk->d_sync.ds_bios = malloc(sizeof(struct bio *) * g_mirror_syncreqs,
1948	    M_MIRROR, M_WAITOK);
1949	for (i = 0; i < g_mirror_syncreqs; i++) {
1950		bp = g_alloc_bio();
1951		disk->d_sync.ds_bios[i] = bp;
1952		bp->bio_parent = NULL;
1953		bp->bio_cmd = BIO_READ;
1954		bp->bio_data = malloc(MAXPHYS, M_MIRROR, M_WAITOK);
1955		bp->bio_cflags = 0;
1956		bp->bio_offset = disk->d_sync.ds_offset;
1957		bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
1958		disk->d_sync.ds_offset += bp->bio_length;
1959		bp->bio_done = g_mirror_sync_done;
1960		bp->bio_from = disk->d_sync.ds_consumer;
1961		bp->bio_to = sc->sc_provider;
1962		bp->bio_caller1 = (void *)(uintptr_t)i;
1963	}
1964
1965	/* Increase the number of disks in SYNCHRONIZING state. */
1966	sc->sc_sync.ds_ndisks++;
1967	/* Set the number of in-flight synchronization requests. */
1968	disk->d_sync.ds_inflight = g_mirror_syncreqs;
1969
1970	/*
1971	 * Fire off first synchronization requests.
1972	 */
1973	for (i = 0; i < g_mirror_syncreqs; i++) {
1974		bp = disk->d_sync.ds_bios[i];
1975		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
1976		disk->d_sync.ds_consumer->index++;
1977		/*
1978		 * Delay the request if it is colliding with a regular request.
1979		 */
1980		if (g_mirror_regular_collision(sc, bp))
1981			g_mirror_sync_delay(sc, bp);
1982		else
1983			g_io_request(bp, disk->d_sync.ds_consumer);
1984	}
1985}
1986
1987/*
1988 * Stop synchronization process.
1989 * type: 0 - synchronization finished
1990 *       1 - synchronization stopped
1991 */
1992static void
1993g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
1994{
1995	struct g_mirror_softc *sc;
1996	struct g_consumer *cp;
1997
1998	g_topology_assert_not();
1999	sc = disk->d_softc;
2000	sx_assert(&sc->sc_lock, SX_LOCKED);
2001
2002	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2003	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2004	    g_mirror_disk_state2str(disk->d_state)));
2005	if (disk->d_sync.ds_consumer == NULL)
2006		return;
2007
2008	if (type == 0) {
2009		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
2010		    sc->sc_name, g_mirror_get_diskname(disk));
2011	} else /* if (type == 1) */ {
2012		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
2013		    sc->sc_name, g_mirror_get_diskname(disk));
2014	}
2015	free(disk->d_sync.ds_bios, M_MIRROR);
2016	disk->d_sync.ds_bios = NULL;
2017	cp = disk->d_sync.ds_consumer;
2018	disk->d_sync.ds_consumer = NULL;
2019	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2020	sc->sc_sync.ds_ndisks--;
2021	sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
2022	g_topology_lock();
2023	g_mirror_kill_consumer(sc, cp);
2024	g_topology_unlock();
2025	sx_xlock(&sc->sc_lock);
2026}
2027
2028static void
2029g_mirror_launch_provider(struct g_mirror_softc *sc)
2030{
2031	struct g_mirror_disk *disk;
2032	struct g_provider *pp;
2033
2034	sx_assert(&sc->sc_lock, SX_LOCKED);
2035
2036	g_topology_lock();
2037	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
2038	pp->mediasize = sc->sc_mediasize;
2039	pp->sectorsize = sc->sc_sectorsize;
2040	pp->stripesize = 0;
2041	pp->stripeoffset = 0;
2042	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2043		if (disk->d_consumer && disk->d_consumer->provider &&
2044		    disk->d_consumer->provider->stripesize > pp->stripesize) {
2045			pp->stripesize = disk->d_consumer->provider->stripesize;
2046			pp->stripeoffset = disk->d_consumer->provider->stripeoffset;
2047		}
2048	}
2049	sc->sc_provider = pp;
2050	g_error_provider(pp, 0);
2051	g_topology_unlock();
2052	G_MIRROR_DEBUG(0, "Device %s launched (%u/%u).", pp->name,
2053	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE), sc->sc_ndisks);
2054	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2055		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2056			g_mirror_sync_start(disk);
2057	}
2058}
2059
2060static void
2061g_mirror_destroy_provider(struct g_mirror_softc *sc)
2062{
2063	struct g_mirror_disk *disk;
2064	struct bio *bp;
2065
2066	g_topology_assert_not();
2067	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
2068	    sc->sc_name));
2069
2070	g_topology_lock();
2071	g_error_provider(sc->sc_provider, ENXIO);
2072	mtx_lock(&sc->sc_queue_mtx);
2073	while ((bp = bioq_first(&sc->sc_queue)) != NULL) {
2074		bioq_remove(&sc->sc_queue, bp);
2075		g_io_deliver(bp, ENXIO);
2076	}
2077	mtx_unlock(&sc->sc_queue_mtx);
2078	G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name,
2079	    sc->sc_provider->name);
2080	sc->sc_provider->flags |= G_PF_WITHER;
2081	g_orphan_provider(sc->sc_provider, ENXIO);
2082	g_topology_unlock();
2083	sc->sc_provider = NULL;
2084	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2085		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2086			g_mirror_sync_stop(disk, 1);
2087	}
2088}
2089
2090static void
2091g_mirror_go(void *arg)
2092{
2093	struct g_mirror_softc *sc;
2094
2095	sc = arg;
2096	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
2097	g_mirror_event_send(sc, 0,
2098	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
2099}
2100
2101static u_int
2102g_mirror_determine_state(struct g_mirror_disk *disk)
2103{
2104	struct g_mirror_softc *sc;
2105	u_int state;
2106
2107	sc = disk->d_softc;
2108	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
2109		if ((disk->d_flags &
2110		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
2111			/* Disk does not need synchronization. */
2112			state = G_MIRROR_DISK_STATE_ACTIVE;
2113		} else {
2114			if ((sc->sc_flags &
2115			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2116			    (disk->d_flags &
2117			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2118				/*
2119				 * We can start synchronization from
2120				 * the stored offset.
2121				 */
2122				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2123			} else {
2124				state = G_MIRROR_DISK_STATE_STALE;
2125			}
2126		}
2127	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
2128		/*
2129		 * Reset all synchronization data for this disk,
2130		 * because if it even was synchronized, it was
2131		 * synchronized to disks with different syncid.
2132		 */
2133		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2134		disk->d_sync.ds_offset = 0;
2135		disk->d_sync.ds_offset_done = 0;
2136		disk->d_sync.ds_syncid = sc->sc_syncid;
2137		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2138		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2139			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2140		} else {
2141			state = G_MIRROR_DISK_STATE_STALE;
2142		}
2143	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
2144		/*
2145		 * Not good, NOT GOOD!
2146		 * It means that mirror was started on stale disks
2147		 * and more fresh disk just arrive.
2148		 * If there were writes, mirror is broken, sorry.
2149		 * I think the best choice here is don't touch
2150		 * this disk and inform the user loudly.
2151		 */
2152		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
2153		    "disk (%s) arrives!! It will not be connected to the "
2154		    "running device.", sc->sc_name,
2155		    g_mirror_get_diskname(disk));
2156		g_mirror_destroy_disk(disk);
2157		state = G_MIRROR_DISK_STATE_NONE;
2158		/* Return immediately, because disk was destroyed. */
2159		return (state);
2160	}
2161	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
2162	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
2163	return (state);
2164}
2165
2166/*
2167 * Update device state.
2168 */
2169static void
2170g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force)
2171{
2172	struct g_mirror_disk *disk;
2173	u_int state;
2174
2175	sx_assert(&sc->sc_lock, SX_XLOCKED);
2176
2177	switch (sc->sc_state) {
2178	case G_MIRROR_DEVICE_STATE_STARTING:
2179	    {
2180		struct g_mirror_disk *pdisk, *tdisk;
2181		u_int dirty, ndisks, genid, syncid;
2182
2183		KASSERT(sc->sc_provider == NULL,
2184		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
2185		/*
2186		 * Are we ready? We are, if all disks are connected or
2187		 * if we have any disks and 'force' is true.
2188		 */
2189		ndisks = g_mirror_ndisks(sc, -1);
2190		if (sc->sc_ndisks == ndisks || (force && ndisks > 0)) {
2191			;
2192		} else if (ndisks == 0) {
2193			/*
2194			 * Disks went down in starting phase, so destroy
2195			 * device.
2196			 */
2197			callout_drain(&sc->sc_callout);
2198			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2199			G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
2200			    sc->sc_rootmount);
2201			root_mount_rel(sc->sc_rootmount);
2202			sc->sc_rootmount = NULL;
2203			return;
2204		} else {
2205			return;
2206		}
2207
2208		/*
2209		 * Activate all disks with the biggest syncid.
2210		 */
2211		if (force) {
2212			/*
2213			 * If 'force' is true, we have been called due to
2214			 * timeout, so don't bother canceling timeout.
2215			 */
2216			ndisks = 0;
2217			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2218				if ((disk->d_flags &
2219				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
2220					ndisks++;
2221				}
2222			}
2223			if (ndisks == 0) {
2224				/* No valid disks found, destroy device. */
2225				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2226				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2227				    __LINE__, sc->sc_rootmount);
2228				root_mount_rel(sc->sc_rootmount);
2229				sc->sc_rootmount = NULL;
2230				return;
2231			}
2232		} else {
2233			/* Cancel timeout. */
2234			callout_drain(&sc->sc_callout);
2235		}
2236
2237		/*
2238		 * Find the biggest genid.
2239		 */
2240		genid = 0;
2241		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2242			if (disk->d_genid > genid)
2243				genid = disk->d_genid;
2244		}
2245		sc->sc_genid = genid;
2246		/*
2247		 * Remove all disks without the biggest genid.
2248		 */
2249		LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
2250			if (disk->d_genid < genid) {
2251				G_MIRROR_DEBUG(0,
2252				    "Component %s (device %s) broken, skipping.",
2253				    g_mirror_get_diskname(disk), sc->sc_name);
2254				g_mirror_destroy_disk(disk);
2255			}
2256		}
2257
2258		/*
2259		 * Find the biggest syncid.
2260		 */
2261		syncid = 0;
2262		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2263			if (disk->d_sync.ds_syncid > syncid)
2264				syncid = disk->d_sync.ds_syncid;
2265		}
2266
2267		/*
2268		 * Here we need to look for dirty disks and if all disks
2269		 * with the biggest syncid are dirty, we have to choose
2270		 * one with the biggest priority and rebuild the rest.
2271		 */
2272		/*
2273		 * Find the number of dirty disks with the biggest syncid.
2274		 * Find the number of disks with the biggest syncid.
2275		 * While here, find a disk with the biggest priority.
2276		 */
2277		dirty = ndisks = 0;
2278		pdisk = NULL;
2279		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2280			if (disk->d_sync.ds_syncid != syncid)
2281				continue;
2282			if ((disk->d_flags &
2283			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2284				continue;
2285			}
2286			ndisks++;
2287			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
2288				dirty++;
2289				if (pdisk == NULL ||
2290				    pdisk->d_priority < disk->d_priority) {
2291					pdisk = disk;
2292				}
2293			}
2294		}
2295		if (dirty == 0) {
2296			/* No dirty disks at all, great. */
2297		} else if (dirty == ndisks) {
2298			/*
2299			 * Force synchronization for all dirty disks except one
2300			 * with the biggest priority.
2301			 */
2302			KASSERT(pdisk != NULL, ("pdisk == NULL"));
2303			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
2304			    "master disk for synchronization.",
2305			    g_mirror_get_diskname(pdisk), sc->sc_name);
2306			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2307				if (disk->d_sync.ds_syncid != syncid)
2308					continue;
2309				if ((disk->d_flags &
2310				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2311					continue;
2312				}
2313				KASSERT((disk->d_flags &
2314				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
2315				    ("Disk %s isn't marked as dirty.",
2316				    g_mirror_get_diskname(disk)));
2317				/* Skip the disk with the biggest priority. */
2318				if (disk == pdisk)
2319					continue;
2320				disk->d_sync.ds_syncid = 0;
2321			}
2322		} else if (dirty < ndisks) {
2323			/*
2324			 * Force synchronization for all dirty disks.
2325			 * We have some non-dirty disks.
2326			 */
2327			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2328				if (disk->d_sync.ds_syncid != syncid)
2329					continue;
2330				if ((disk->d_flags &
2331				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2332					continue;
2333				}
2334				if ((disk->d_flags &
2335				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
2336					continue;
2337				}
2338				disk->d_sync.ds_syncid = 0;
2339			}
2340		}
2341
2342		/* Reset hint. */
2343		sc->sc_hint = NULL;
2344		sc->sc_syncid = syncid;
2345		if (force) {
2346			/* Remember to bump syncid on first write. */
2347			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2348		}
2349		state = G_MIRROR_DEVICE_STATE_RUNNING;
2350		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
2351		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
2352		    g_mirror_device_state2str(state));
2353		sc->sc_state = state;
2354		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2355			state = g_mirror_determine_state(disk);
2356			g_mirror_event_send(disk, state,
2357			    G_MIRROR_EVENT_DONTWAIT);
2358			if (state == G_MIRROR_DISK_STATE_STALE)
2359				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2360		}
2361		break;
2362	    }
2363	case G_MIRROR_DEVICE_STATE_RUNNING:
2364		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
2365		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2366			/*
2367			 * No active disks or no disks at all,
2368			 * so destroy device.
2369			 */
2370			if (sc->sc_provider != NULL)
2371				g_mirror_destroy_provider(sc);
2372			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2373			break;
2374		} else if (g_mirror_ndisks(sc,
2375		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
2376		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2377			/*
2378			 * We have active disks, launch provider if it doesn't
2379			 * exist.
2380			 */
2381			if (sc->sc_provider == NULL)
2382				g_mirror_launch_provider(sc);
2383			if (sc->sc_rootmount != NULL) {
2384				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2385				    __LINE__, sc->sc_rootmount);
2386				root_mount_rel(sc->sc_rootmount);
2387				sc->sc_rootmount = NULL;
2388			}
2389		}
2390		/*
2391		 * Genid should be bumped immediately, so do it here.
2392		 */
2393		if ((sc->sc_bump_id & G_MIRROR_BUMP_GENID) != 0) {
2394			sc->sc_bump_id &= ~G_MIRROR_BUMP_GENID;
2395			g_mirror_bump_genid(sc);
2396		}
2397		break;
2398	default:
2399		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
2400		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
2401		break;
2402	}
2403}
2404
2405/*
2406 * Update disk state and device state if needed.
2407 */
2408#define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
2409	"Disk %s state changed from %s to %s (device %s).",		\
2410	g_mirror_get_diskname(disk),					\
2411	g_mirror_disk_state2str(disk->d_state),				\
2412	g_mirror_disk_state2str(state), sc->sc_name)
2413static int
2414g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
2415{
2416	struct g_mirror_softc *sc;
2417
2418	sc = disk->d_softc;
2419	sx_assert(&sc->sc_lock, SX_XLOCKED);
2420
2421again:
2422	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
2423	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
2424	    g_mirror_disk_state2str(state));
2425	switch (state) {
2426	case G_MIRROR_DISK_STATE_NEW:
2427		/*
2428		 * Possible scenarios:
2429		 * 1. New disk arrive.
2430		 */
2431		/* Previous state should be NONE. */
2432		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
2433		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2434		    g_mirror_disk_state2str(disk->d_state)));
2435		DISK_STATE_CHANGED();
2436
2437		disk->d_state = state;
2438		if (LIST_EMPTY(&sc->sc_disks))
2439			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
2440		else {
2441			struct g_mirror_disk *dp;
2442
2443			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
2444				if (disk->d_priority >= dp->d_priority) {
2445					LIST_INSERT_BEFORE(dp, disk, d_next);
2446					dp = NULL;
2447					break;
2448				}
2449				if (LIST_NEXT(dp, d_next) == NULL)
2450					break;
2451			}
2452			if (dp != NULL)
2453				LIST_INSERT_AFTER(dp, disk, d_next);
2454		}
2455		G_MIRROR_DEBUG(1, "Device %s: provider %s detected.",
2456		    sc->sc_name, g_mirror_get_diskname(disk));
2457		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2458			break;
2459		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2460		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2461		    g_mirror_device_state2str(sc->sc_state),
2462		    g_mirror_get_diskname(disk),
2463		    g_mirror_disk_state2str(disk->d_state)));
2464		state = g_mirror_determine_state(disk);
2465		if (state != G_MIRROR_DISK_STATE_NONE)
2466			goto again;
2467		break;
2468	case G_MIRROR_DISK_STATE_ACTIVE:
2469		/*
2470		 * Possible scenarios:
2471		 * 1. New disk does not need synchronization.
2472		 * 2. Synchronization process finished successfully.
2473		 */
2474		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2475		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2476		    g_mirror_device_state2str(sc->sc_state),
2477		    g_mirror_get_diskname(disk),
2478		    g_mirror_disk_state2str(disk->d_state)));
2479		/* Previous state should be NEW or SYNCHRONIZING. */
2480		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
2481		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2482		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2483		    g_mirror_disk_state2str(disk->d_state)));
2484		DISK_STATE_CHANGED();
2485
2486		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2487			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2488			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
2489			g_mirror_sync_stop(disk, 0);
2490		}
2491		disk->d_state = state;
2492		disk->d_sync.ds_offset = 0;
2493		disk->d_sync.ds_offset_done = 0;
2494		g_mirror_update_idle(sc, disk);
2495		g_mirror_update_metadata(disk);
2496		G_MIRROR_DEBUG(1, "Device %s: provider %s activated.",
2497		    sc->sc_name, g_mirror_get_diskname(disk));
2498		break;
2499	case G_MIRROR_DISK_STATE_STALE:
2500		/*
2501		 * Possible scenarios:
2502		 * 1. Stale disk was connected.
2503		 */
2504		/* Previous state should be NEW. */
2505		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2506		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2507		    g_mirror_disk_state2str(disk->d_state)));
2508		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2509		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2510		    g_mirror_device_state2str(sc->sc_state),
2511		    g_mirror_get_diskname(disk),
2512		    g_mirror_disk_state2str(disk->d_state)));
2513		/*
2514		 * STALE state is only possible if device is marked
2515		 * NOAUTOSYNC.
2516		 */
2517		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
2518		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2519		    g_mirror_device_state2str(sc->sc_state),
2520		    g_mirror_get_diskname(disk),
2521		    g_mirror_disk_state2str(disk->d_state)));
2522		DISK_STATE_CHANGED();
2523
2524		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2525		disk->d_state = state;
2526		g_mirror_update_metadata(disk);
2527		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
2528		    sc->sc_name, g_mirror_get_diskname(disk));
2529		break;
2530	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
2531		/*
2532		 * Possible scenarios:
2533		 * 1. Disk which needs synchronization was connected.
2534		 */
2535		/* Previous state should be NEW. */
2536		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2537		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2538		    g_mirror_disk_state2str(disk->d_state)));
2539		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2540		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2541		    g_mirror_device_state2str(sc->sc_state),
2542		    g_mirror_get_diskname(disk),
2543		    g_mirror_disk_state2str(disk->d_state)));
2544		DISK_STATE_CHANGED();
2545
2546		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2547			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2548		disk->d_state = state;
2549		if (sc->sc_provider != NULL) {
2550			g_mirror_sync_start(disk);
2551			g_mirror_update_metadata(disk);
2552		}
2553		break;
2554	case G_MIRROR_DISK_STATE_DISCONNECTED:
2555		/*
2556		 * Possible scenarios:
2557		 * 1. Device wasn't running yet, but disk disappear.
2558		 * 2. Disk was active and disapppear.
2559		 * 3. Disk disappear during synchronization process.
2560		 */
2561		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
2562			/*
2563			 * Previous state should be ACTIVE, STALE or
2564			 * SYNCHRONIZING.
2565			 */
2566			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
2567			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
2568			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2569			    ("Wrong disk state (%s, %s).",
2570			    g_mirror_get_diskname(disk),
2571			    g_mirror_disk_state2str(disk->d_state)));
2572		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
2573			/* Previous state should be NEW. */
2574			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2575			    ("Wrong disk state (%s, %s).",
2576			    g_mirror_get_diskname(disk),
2577			    g_mirror_disk_state2str(disk->d_state)));
2578			/*
2579			 * Reset bumping syncid if disk disappeared in STARTING
2580			 * state.
2581			 */
2582			if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0)
2583				sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
2584#ifdef	INVARIANTS
2585		} else {
2586			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2587			    sc->sc_name,
2588			    g_mirror_device_state2str(sc->sc_state),
2589			    g_mirror_get_diskname(disk),
2590			    g_mirror_disk_state2str(disk->d_state)));
2591#endif
2592		}
2593		DISK_STATE_CHANGED();
2594		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
2595		    sc->sc_name, g_mirror_get_diskname(disk));
2596
2597		g_mirror_destroy_disk(disk);
2598		break;
2599	case G_MIRROR_DISK_STATE_DESTROY:
2600	    {
2601		int error;
2602
2603		error = g_mirror_clear_metadata(disk);
2604		if (error != 0)
2605			return (error);
2606		DISK_STATE_CHANGED();
2607		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
2608		    sc->sc_name, g_mirror_get_diskname(disk));
2609
2610		g_mirror_destroy_disk(disk);
2611		sc->sc_ndisks--;
2612		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2613			g_mirror_update_metadata(disk);
2614		}
2615		break;
2616	    }
2617	default:
2618		KASSERT(1 == 0, ("Unknown state (%u).", state));
2619		break;
2620	}
2621	return (0);
2622}
2623#undef	DISK_STATE_CHANGED
2624
2625int
2626g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
2627{
2628	struct g_provider *pp;
2629	u_char *buf;
2630	int error;
2631
2632	g_topology_assert();
2633
2634	error = g_access(cp, 1, 0, 0);
2635	if (error != 0)
2636		return (error);
2637	pp = cp->provider;
2638	g_topology_unlock();
2639	/* Metadata are stored on last sector. */
2640	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2641	    &error);
2642	g_topology_lock();
2643	g_access(cp, -1, 0, 0);
2644	if (buf == NULL) {
2645		G_MIRROR_DEBUG(1, "Cannot read metadata from %s (error=%d).",
2646		    cp->provider->name, error);
2647		return (error);
2648	}
2649
2650	/* Decode metadata. */
2651	error = mirror_metadata_decode(buf, md);
2652	g_free(buf);
2653	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
2654		return (EINVAL);
2655	if (md->md_version > G_MIRROR_VERSION) {
2656		G_MIRROR_DEBUG(0,
2657		    "Kernel module is too old to handle metadata from %s.",
2658		    cp->provider->name);
2659		return (EINVAL);
2660	}
2661	if (error != 0) {
2662		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2663		    cp->provider->name);
2664		return (error);
2665	}
2666
2667	return (0);
2668}
2669
2670static int
2671g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
2672    struct g_mirror_metadata *md)
2673{
2674
2675	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
2676		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
2677		    pp->name, md->md_did);
2678		return (EEXIST);
2679	}
2680	if (md->md_all != sc->sc_ndisks) {
2681		G_MIRROR_DEBUG(1,
2682		    "Invalid '%s' field on disk %s (device %s), skipping.",
2683		    "md_all", pp->name, sc->sc_name);
2684		return (EINVAL);
2685	}
2686	if (md->md_slice != sc->sc_slice) {
2687		G_MIRROR_DEBUG(1,
2688		    "Invalid '%s' field on disk %s (device %s), skipping.",
2689		    "md_slice", pp->name, sc->sc_name);
2690		return (EINVAL);
2691	}
2692	if (md->md_balance != sc->sc_balance) {
2693		G_MIRROR_DEBUG(1,
2694		    "Invalid '%s' field on disk %s (device %s), skipping.",
2695		    "md_balance", pp->name, sc->sc_name);
2696		return (EINVAL);
2697	}
2698	if (md->md_mediasize != sc->sc_mediasize) {
2699		G_MIRROR_DEBUG(1,
2700		    "Invalid '%s' field on disk %s (device %s), skipping.",
2701		    "md_mediasize", pp->name, sc->sc_name);
2702		return (EINVAL);
2703	}
2704	if (sc->sc_mediasize > pp->mediasize) {
2705		G_MIRROR_DEBUG(1,
2706		    "Invalid size of disk %s (device %s), skipping.", pp->name,
2707		    sc->sc_name);
2708		return (EINVAL);
2709	}
2710	if (md->md_sectorsize != sc->sc_sectorsize) {
2711		G_MIRROR_DEBUG(1,
2712		    "Invalid '%s' field on disk %s (device %s), skipping.",
2713		    "md_sectorsize", pp->name, sc->sc_name);
2714		return (EINVAL);
2715	}
2716	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2717		G_MIRROR_DEBUG(1,
2718		    "Invalid sector size of disk %s (device %s), skipping.",
2719		    pp->name, sc->sc_name);
2720		return (EINVAL);
2721	}
2722	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
2723		G_MIRROR_DEBUG(1,
2724		    "Invalid device flags on disk %s (device %s), skipping.",
2725		    pp->name, sc->sc_name);
2726		return (EINVAL);
2727	}
2728	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
2729		G_MIRROR_DEBUG(1,
2730		    "Invalid disk flags on disk %s (device %s), skipping.",
2731		    pp->name, sc->sc_name);
2732		return (EINVAL);
2733	}
2734	return (0);
2735}
2736
2737int
2738g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
2739    struct g_mirror_metadata *md)
2740{
2741	struct g_mirror_disk *disk;
2742	int error;
2743
2744	g_topology_assert_not();
2745	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
2746
2747	error = g_mirror_check_metadata(sc, pp, md);
2748	if (error != 0)
2749		return (error);
2750	if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING &&
2751	    md->md_genid < sc->sc_genid) {
2752		G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.",
2753		    pp->name, sc->sc_name);
2754		return (EINVAL);
2755	}
2756	disk = g_mirror_init_disk(sc, pp, md, &error);
2757	if (disk == NULL)
2758		return (error);
2759	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
2760	    G_MIRROR_EVENT_WAIT);
2761	if (error != 0)
2762		return (error);
2763	if (md->md_version < G_MIRROR_VERSION) {
2764		G_MIRROR_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).",
2765		    pp->name, md->md_version, G_MIRROR_VERSION);
2766		g_mirror_update_metadata(disk);
2767	}
2768	return (0);
2769}
2770
2771static void
2772g_mirror_destroy_delayed(void *arg, int flag)
2773{
2774	struct g_mirror_softc *sc;
2775	int error;
2776
2777	if (flag == EV_CANCEL) {
2778		G_MIRROR_DEBUG(1, "Destroying canceled.");
2779		return;
2780	}
2781	sc = arg;
2782	g_topology_unlock();
2783	sx_xlock(&sc->sc_lock);
2784	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) == 0,
2785	    ("DESTROY flag set on %s.", sc->sc_name));
2786	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROYING) != 0,
2787	    ("DESTROYING flag not set on %s.", sc->sc_name));
2788	G_MIRROR_DEBUG(1, "Destroying %s (delayed).", sc->sc_name);
2789	error = g_mirror_destroy(sc, G_MIRROR_DESTROY_SOFT);
2790	if (error != 0) {
2791		G_MIRROR_DEBUG(0, "Cannot destroy %s.", sc->sc_name);
2792		sx_xunlock(&sc->sc_lock);
2793	}
2794	g_topology_lock();
2795}
2796
2797static int
2798g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
2799{
2800	struct g_mirror_softc *sc;
2801	int dcr, dcw, dce, error = 0;
2802
2803	g_topology_assert();
2804	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
2805	    acw, ace);
2806
2807	sc = pp->geom->softc;
2808	if (sc == NULL && acr <= 0 && acw <= 0 && ace <= 0)
2809		return (0);
2810	KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name));
2811
2812	dcr = pp->acr + acr;
2813	dcw = pp->acw + acw;
2814	dce = pp->ace + ace;
2815
2816	g_topology_unlock();
2817	sx_xlock(&sc->sc_lock);
2818	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0 ||
2819	    LIST_EMPTY(&sc->sc_disks)) {
2820		if (acr > 0 || acw > 0 || ace > 0)
2821			error = ENXIO;
2822		goto end;
2823	}
2824	if (dcw == 0 && !sc->sc_idle)
2825		g_mirror_idle(sc, dcw);
2826	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROYING) != 0) {
2827		if (acr > 0 || acw > 0 || ace > 0) {
2828			error = ENXIO;
2829			goto end;
2830		}
2831		if (dcr == 0 && dcw == 0 && dce == 0) {
2832			g_post_event(g_mirror_destroy_delayed, sc, M_WAITOK,
2833			    sc, NULL);
2834		}
2835	}
2836end:
2837	sx_xunlock(&sc->sc_lock);
2838	g_topology_lock();
2839	return (error);
2840}
2841
2842static struct g_geom *
2843g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md)
2844{
2845	struct g_mirror_softc *sc;
2846	struct g_geom *gp;
2847	int error, timeout;
2848
2849	g_topology_assert();
2850	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
2851	    md->md_mid);
2852
2853	/* One disk is minimum. */
2854	if (md->md_all < 1)
2855		return (NULL);
2856	/*
2857	 * Action geom.
2858	 */
2859	gp = g_new_geomf(mp, "%s", md->md_name);
2860	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
2861	gp->start = g_mirror_start;
2862	gp->orphan = g_mirror_orphan;
2863	gp->access = g_mirror_access;
2864	gp->dumpconf = g_mirror_dumpconf;
2865
2866	sc->sc_id = md->md_mid;
2867	sc->sc_slice = md->md_slice;
2868	sc->sc_balance = md->md_balance;
2869	sc->sc_mediasize = md->md_mediasize;
2870	sc->sc_sectorsize = md->md_sectorsize;
2871	sc->sc_ndisks = md->md_all;
2872	sc->sc_flags = md->md_mflags;
2873	sc->sc_bump_id = 0;
2874	sc->sc_idle = 1;
2875	sc->sc_last_write = time_uptime;
2876	sc->sc_writes = 0;
2877	sx_init(&sc->sc_lock, "gmirror:lock");
2878	bioq_init(&sc->sc_queue);
2879	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
2880	bioq_init(&sc->sc_regular_delayed);
2881	bioq_init(&sc->sc_inflight);
2882	bioq_init(&sc->sc_sync_delayed);
2883	LIST_INIT(&sc->sc_disks);
2884	TAILQ_INIT(&sc->sc_events);
2885	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
2886	callout_init(&sc->sc_callout, CALLOUT_MPSAFE);
2887	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
2888	gp->softc = sc;
2889	sc->sc_geom = gp;
2890	sc->sc_provider = NULL;
2891	/*
2892	 * Synchronization geom.
2893	 */
2894	gp = g_new_geomf(mp, "%s.sync", md->md_name);
2895	gp->softc = sc;
2896	gp->orphan = g_mirror_orphan;
2897	sc->sc_sync.ds_geom = gp;
2898	sc->sc_sync.ds_ndisks = 0;
2899	error = kproc_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
2900	    "g_mirror %s", md->md_name);
2901	if (error != 0) {
2902		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
2903		    sc->sc_name);
2904		g_destroy_geom(sc->sc_sync.ds_geom);
2905		mtx_destroy(&sc->sc_events_mtx);
2906		mtx_destroy(&sc->sc_queue_mtx);
2907		sx_destroy(&sc->sc_lock);
2908		g_destroy_geom(sc->sc_geom);
2909		free(sc, M_MIRROR);
2910		return (NULL);
2911	}
2912
2913	G_MIRROR_DEBUG(1, "Device %s created (%u components, id=%u).",
2914	    sc->sc_name, sc->sc_ndisks, sc->sc_id);
2915
2916	sc->sc_rootmount = root_mount_hold("GMIRROR");
2917	G_MIRROR_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
2918	/*
2919	 * Run timeout.
2920	 */
2921	timeout = g_mirror_timeout * hz;
2922	callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc);
2923	return (sc->sc_geom);
2924}
2925
2926int
2927g_mirror_destroy(struct g_mirror_softc *sc, int how)
2928{
2929	struct g_mirror_disk *disk;
2930	struct g_provider *pp;
2931
2932	g_topology_assert_not();
2933	if (sc == NULL)
2934		return (ENXIO);
2935	sx_assert(&sc->sc_lock, SX_XLOCKED);
2936
2937	pp = sc->sc_provider;
2938	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
2939		switch (how) {
2940		case G_MIRROR_DESTROY_SOFT:
2941			G_MIRROR_DEBUG(1,
2942			    "Device %s is still open (r%dw%de%d).", pp->name,
2943			    pp->acr, pp->acw, pp->ace);
2944			return (EBUSY);
2945		case G_MIRROR_DESTROY_DELAYED:
2946			G_MIRROR_DEBUG(1,
2947			    "Device %s will be destroyed on last close.",
2948			    pp->name);
2949			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2950				if (disk->d_state ==
2951				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2952					g_mirror_sync_stop(disk, 1);
2953				}
2954			}
2955			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROYING;
2956			return (EBUSY);
2957		case G_MIRROR_DESTROY_HARD:
2958			G_MIRROR_DEBUG(1, "Device %s is still open, so it "
2959			    "can't be definitely removed.", pp->name);
2960		}
2961	}
2962
2963	g_topology_lock();
2964	if (sc->sc_geom->softc == NULL) {
2965		g_topology_unlock();
2966		return (0);
2967	}
2968	sc->sc_geom->softc = NULL;
2969	sc->sc_sync.ds_geom->softc = NULL;
2970	g_topology_unlock();
2971
2972	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2973	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_WAIT;
2974	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
2975	sx_xunlock(&sc->sc_lock);
2976	mtx_lock(&sc->sc_queue_mtx);
2977	wakeup(sc);
2978	mtx_unlock(&sc->sc_queue_mtx);
2979	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
2980	while (sc->sc_worker != NULL)
2981		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
2982	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
2983	sx_xlock(&sc->sc_lock);
2984	g_mirror_destroy_device(sc);
2985	free(sc, M_MIRROR);
2986	return (0);
2987}
2988
2989static void
2990g_mirror_taste_orphan(struct g_consumer *cp)
2991{
2992
2993	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
2994	    cp->provider->name));
2995}
2996
2997static struct g_geom *
2998g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
2999{
3000	struct g_mirror_metadata md;
3001	struct g_mirror_softc *sc;
3002	struct g_consumer *cp;
3003	struct g_geom *gp;
3004	int error;
3005
3006	g_topology_assert();
3007	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
3008	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
3009
3010	gp = g_new_geomf(mp, "mirror:taste");
3011	/*
3012	 * This orphan function should be never called.
3013	 */
3014	gp->orphan = g_mirror_taste_orphan;
3015	cp = g_new_consumer(gp);
3016	g_attach(cp, pp);
3017	error = g_mirror_read_metadata(cp, &md);
3018	g_detach(cp);
3019	g_destroy_consumer(cp);
3020	g_destroy_geom(gp);
3021	if (error != 0)
3022		return (NULL);
3023	gp = NULL;
3024
3025	if (md.md_provider[0] != '\0' &&
3026	    !g_compare_names(md.md_provider, pp->name))
3027		return (NULL);
3028	if (md.md_provsize != 0 && md.md_provsize != pp->mediasize)
3029		return (NULL);
3030	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
3031		G_MIRROR_DEBUG(0,
3032		    "Device %s: provider %s marked as inactive, skipping.",
3033		    md.md_name, pp->name);
3034		return (NULL);
3035	}
3036	if (g_mirror_debug >= 2)
3037		mirror_metadata_dump(&md);
3038
3039	/*
3040	 * Let's check if device already exists.
3041	 */
3042	sc = NULL;
3043	LIST_FOREACH(gp, &mp->geom, geom) {
3044		sc = gp->softc;
3045		if (sc == NULL)
3046			continue;
3047		if (sc->sc_sync.ds_geom == gp)
3048			continue;
3049		if (strcmp(md.md_name, sc->sc_name) != 0)
3050			continue;
3051		if (md.md_mid != sc->sc_id) {
3052			G_MIRROR_DEBUG(0, "Device %s already configured.",
3053			    sc->sc_name);
3054			return (NULL);
3055		}
3056		break;
3057	}
3058	if (gp == NULL) {
3059		gp = g_mirror_create(mp, &md);
3060		if (gp == NULL) {
3061			G_MIRROR_DEBUG(0, "Cannot create device %s.",
3062			    md.md_name);
3063			return (NULL);
3064		}
3065		sc = gp->softc;
3066	}
3067	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
3068	g_topology_unlock();
3069	sx_xlock(&sc->sc_lock);
3070	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_TASTING;
3071	error = g_mirror_add_disk(sc, pp, &md);
3072	if (error != 0) {
3073		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
3074		    pp->name, gp->name, error);
3075		if (LIST_EMPTY(&sc->sc_disks)) {
3076			g_cancel_event(sc);
3077			g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3078			g_topology_lock();
3079			return (NULL);
3080		}
3081		gp = NULL;
3082	}
3083	sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_TASTING;
3084	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
3085		g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3086		g_topology_lock();
3087		return (NULL);
3088	}
3089	sx_xunlock(&sc->sc_lock);
3090	g_topology_lock();
3091	return (gp);
3092}
3093
3094static int
3095g_mirror_destroy_geom(struct gctl_req *req __unused,
3096    struct g_class *mp __unused, struct g_geom *gp)
3097{
3098	struct g_mirror_softc *sc;
3099	int error;
3100
3101	g_topology_unlock();
3102	sc = gp->softc;
3103	sx_xlock(&sc->sc_lock);
3104	g_cancel_event(sc);
3105	error = g_mirror_destroy(gp->softc, G_MIRROR_DESTROY_SOFT);
3106	if (error != 0)
3107		sx_xunlock(&sc->sc_lock);
3108	g_topology_lock();
3109	return (error);
3110}
3111
3112static void
3113g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
3114    struct g_consumer *cp, struct g_provider *pp)
3115{
3116	struct g_mirror_softc *sc;
3117
3118	g_topology_assert();
3119
3120	sc = gp->softc;
3121	if (sc == NULL)
3122		return;
3123	/* Skip synchronization geom. */
3124	if (gp == sc->sc_sync.ds_geom)
3125		return;
3126	if (pp != NULL) {
3127		/* Nothing here. */
3128	} else if (cp != NULL) {
3129		struct g_mirror_disk *disk;
3130
3131		disk = cp->private;
3132		if (disk == NULL)
3133			return;
3134		g_topology_unlock();
3135		sx_xlock(&sc->sc_lock);
3136		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
3137		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3138			sbuf_printf(sb, "%s<Synchronized>", indent);
3139			if (disk->d_sync.ds_offset == 0)
3140				sbuf_printf(sb, "0%%");
3141			else {
3142				sbuf_printf(sb, "%u%%",
3143				    (u_int)((disk->d_sync.ds_offset * 100) /
3144				    sc->sc_provider->mediasize));
3145			}
3146			sbuf_printf(sb, "</Synchronized>\n");
3147			if (disk->d_sync.ds_offset > 0) {
3148				sbuf_printf(sb, "%s<BytesSynced>%jd"
3149				    "</BytesSynced>\n", indent,
3150				    (intmax_t)disk->d_sync.ds_offset);
3151			}
3152		}
3153		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
3154		    disk->d_sync.ds_syncid);
3155		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent,
3156		    disk->d_genid);
3157		sbuf_printf(sb, "%s<Flags>", indent);
3158		if (disk->d_flags == 0)
3159			sbuf_printf(sb, "NONE");
3160		else {
3161			int first = 1;
3162
3163#define	ADD_FLAG(flag, name)	do {					\
3164	if ((disk->d_flags & (flag)) != 0) {				\
3165		if (!first)						\
3166			sbuf_printf(sb, ", ");				\
3167		else							\
3168			first = 0;					\
3169		sbuf_printf(sb, name);					\
3170	}								\
3171} while (0)
3172			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
3173			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
3174			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
3175			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
3176			    "SYNCHRONIZING");
3177			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
3178			ADD_FLAG(G_MIRROR_DISK_FLAG_BROKEN, "BROKEN");
3179#undef	ADD_FLAG
3180		}
3181		sbuf_printf(sb, "</Flags>\n");
3182		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
3183		    disk->d_priority);
3184		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
3185		    g_mirror_disk_state2str(disk->d_state));
3186		sx_xunlock(&sc->sc_lock);
3187		g_topology_lock();
3188	} else {
3189		g_topology_unlock();
3190		sx_xlock(&sc->sc_lock);
3191		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
3192		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
3193		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid);
3194		sbuf_printf(sb, "%s<Flags>", indent);
3195		if (sc->sc_flags == 0)
3196			sbuf_printf(sb, "NONE");
3197		else {
3198			int first = 1;
3199
3200#define	ADD_FLAG(flag, name)	do {					\
3201	if ((sc->sc_flags & (flag)) != 0) {				\
3202		if (!first)						\
3203			sbuf_printf(sb, ", ");				\
3204		else							\
3205			first = 0;					\
3206		sbuf_printf(sb, name);					\
3207	}								\
3208} while (0)
3209			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC");
3210			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
3211#undef	ADD_FLAG
3212		}
3213		sbuf_printf(sb, "</Flags>\n");
3214		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
3215		    (u_int)sc->sc_slice);
3216		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
3217		    balance_name(sc->sc_balance));
3218		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
3219		    sc->sc_ndisks);
3220		sbuf_printf(sb, "%s<State>", indent);
3221		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
3222			sbuf_printf(sb, "%s", "STARTING");
3223		else if (sc->sc_ndisks ==
3224		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
3225			sbuf_printf(sb, "%s", "COMPLETE");
3226		else
3227			sbuf_printf(sb, "%s", "DEGRADED");
3228		sbuf_printf(sb, "</State>\n");
3229		sx_xunlock(&sc->sc_lock);
3230		g_topology_lock();
3231	}
3232}
3233
3234static void
3235g_mirror_shutdown_pre_sync(void *arg, int howto)
3236{
3237	struct g_class *mp;
3238	struct g_geom *gp, *gp2;
3239	struct g_mirror_softc *sc;
3240	int error;
3241
3242	mp = arg;
3243	DROP_GIANT();
3244	g_topology_lock();
3245	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
3246		if ((sc = gp->softc) == NULL)
3247			continue;
3248		/* Skip synchronization geom. */
3249		if (gp == sc->sc_sync.ds_geom)
3250			continue;
3251		g_topology_unlock();
3252		sx_xlock(&sc->sc_lock);
3253		g_cancel_event(sc);
3254		error = g_mirror_destroy(sc, G_MIRROR_DESTROY_DELAYED);
3255		if (error != 0)
3256			sx_xunlock(&sc->sc_lock);
3257		g_topology_lock();
3258	}
3259	g_topology_unlock();
3260	PICKUP_GIANT();
3261}
3262
3263static void
3264g_mirror_init(struct g_class *mp)
3265{
3266
3267	g_mirror_pre_sync = EVENTHANDLER_REGISTER(shutdown_pre_sync,
3268	    g_mirror_shutdown_pre_sync, mp, SHUTDOWN_PRI_FIRST);
3269	if (g_mirror_pre_sync == NULL)
3270		G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event.");
3271}
3272
3273static void
3274g_mirror_fini(struct g_class *mp)
3275{
3276
3277	if (g_mirror_pre_sync != NULL)
3278		EVENTHANDLER_DEREGISTER(shutdown_pre_sync, g_mirror_pre_sync);
3279}
3280
3281DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
3282