g_mirror.c revision 139051
1/*-
2 * Copyright (c) 2004 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/geom/mirror/g_mirror.c 139051 2004-12-19 23:12:00Z pjd $");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/kernel.h>
33#include <sys/module.h>
34#include <sys/limits.h>
35#include <sys/lock.h>
36#include <sys/mutex.h>
37#include <sys/bio.h>
38#include <sys/sysctl.h>
39#include <sys/malloc.h>
40#include <sys/eventhandler.h>
41#include <vm/uma.h>
42#include <geom/geom.h>
43#include <sys/proc.h>
44#include <sys/kthread.h>
45#include <geom/mirror/g_mirror.h>
46
47
48static MALLOC_DEFINE(M_MIRROR, "mirror data", "GEOM_MIRROR Data");
49
50SYSCTL_DECL(_kern_geom);
51SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0, "GEOM_MIRROR stuff");
52u_int g_mirror_debug = 0;
53TUNABLE_INT("kern.geom.mirror.debug", &g_mirror_debug);
54SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RW, &g_mirror_debug, 0,
55    "Debug level");
56static u_int g_mirror_timeout = 4;
57TUNABLE_INT("kern.geom.mirror.timeout", &g_mirror_timeout);
58SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RW, &g_mirror_timeout,
59    0, "Time to wait on all mirror components");
60static u_int g_mirror_idletime = 5;
61TUNABLE_INT("kern.geom.mirror.idletime", &g_mirror_idletime);
62SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RW,
63    &g_mirror_idletime, 0, "Mark components as clean when idling");
64static u_int g_mirror_reqs_per_sync = 5;
65SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, reqs_per_sync, CTLFLAG_RW,
66    &g_mirror_reqs_per_sync, 0,
67    "Number of regular I/O requests per synchronization request");
68static u_int g_mirror_syncs_per_sec = 100;
69SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, syncs_per_sec, CTLFLAG_RW,
70    &g_mirror_syncs_per_sec, 0,
71    "Number of synchronizations requests per second");
72
73#define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
74	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
75	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
76	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
77} while (0)
78
79static eventhandler_tag g_mirror_ehtag = NULL;
80
81static int g_mirror_destroy_geom(struct gctl_req *req, struct g_class *mp,
82    struct g_geom *gp);
83static g_taste_t g_mirror_taste;
84static void g_mirror_init(struct g_class *mp);
85static void g_mirror_fini(struct g_class *mp);
86
87struct g_class g_mirror_class = {
88	.name = G_MIRROR_CLASS_NAME,
89	.version = G_VERSION,
90	.ctlreq = g_mirror_config,
91	.taste = g_mirror_taste,
92	.destroy_geom = g_mirror_destroy_geom,
93	.init = g_mirror_init,
94	.fini = g_mirror_fini
95};
96
97
98static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
99static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
100static void g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force);
101static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
102    struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
103static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
104
105
106static const char *
107g_mirror_disk_state2str(int state)
108{
109
110	switch (state) {
111	case G_MIRROR_DISK_STATE_NONE:
112		return ("NONE");
113	case G_MIRROR_DISK_STATE_NEW:
114		return ("NEW");
115	case G_MIRROR_DISK_STATE_ACTIVE:
116		return ("ACTIVE");
117	case G_MIRROR_DISK_STATE_STALE:
118		return ("STALE");
119	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
120		return ("SYNCHRONIZING");
121	case G_MIRROR_DISK_STATE_DISCONNECTED:
122		return ("DISCONNECTED");
123	case G_MIRROR_DISK_STATE_DESTROY:
124		return ("DESTROY");
125	default:
126		return ("INVALID");
127	}
128}
129
130static const char *
131g_mirror_device_state2str(int state)
132{
133
134	switch (state) {
135	case G_MIRROR_DEVICE_STATE_STARTING:
136		return ("STARTING");
137	case G_MIRROR_DEVICE_STATE_RUNNING:
138		return ("RUNNING");
139	default:
140		return ("INVALID");
141	}
142}
143
144static const char *
145g_mirror_get_diskname(struct g_mirror_disk *disk)
146{
147
148	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
149		return ("[unknown]");
150	return (disk->d_name);
151}
152
153/*
154 * --- Events handling functions ---
155 * Events in geom_mirror are used to maintain disks and device status
156 * from one thread to simplify locking.
157 */
158static void
159g_mirror_event_free(struct g_mirror_event *ep)
160{
161
162	free(ep, M_MIRROR);
163}
164
165int
166g_mirror_event_send(void *arg, int state, int flags)
167{
168	struct g_mirror_softc *sc;
169	struct g_mirror_disk *disk;
170	struct g_mirror_event *ep;
171	int error;
172
173	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
174	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
175	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
176		disk = NULL;
177		sc = arg;
178	} else {
179		disk = arg;
180		sc = disk->d_softc;
181	}
182	ep->e_disk = disk;
183	ep->e_state = state;
184	ep->e_flags = flags;
185	ep->e_error = 0;
186	mtx_lock(&sc->sc_events_mtx);
187	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
188	mtx_unlock(&sc->sc_events_mtx);
189	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
190	mtx_lock(&sc->sc_queue_mtx);
191	wakeup(sc);
192	mtx_unlock(&sc->sc_queue_mtx);
193	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
194		return (0);
195	g_topology_assert();
196	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
197	g_topology_unlock();
198	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
199		mtx_lock(&sc->sc_events_mtx);
200		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
201		    hz * 5);
202	}
203	/* Don't even try to use 'sc' here, because it could be already dead. */
204	g_topology_lock();
205	error = ep->e_error;
206	g_mirror_event_free(ep);
207	return (error);
208}
209
210static struct g_mirror_event *
211g_mirror_event_get(struct g_mirror_softc *sc)
212{
213	struct g_mirror_event *ep;
214
215	mtx_lock(&sc->sc_events_mtx);
216	ep = TAILQ_FIRST(&sc->sc_events);
217	if (ep != NULL)
218		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
219	mtx_unlock(&sc->sc_events_mtx);
220	return (ep);
221}
222
223static void
224g_mirror_event_cancel(struct g_mirror_disk *disk)
225{
226	struct g_mirror_softc *sc;
227	struct g_mirror_event *ep, *tmpep;
228
229	g_topology_assert();
230
231	sc = disk->d_softc;
232	mtx_lock(&sc->sc_events_mtx);
233	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
234		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
235			continue;
236		if (ep->e_disk != disk)
237			continue;
238		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
239		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
240			g_mirror_event_free(ep);
241		else {
242			ep->e_error = ECANCELED;
243			wakeup(ep);
244		}
245	}
246	mtx_unlock(&sc->sc_events_mtx);
247}
248
249/*
250 * Return the number of disks in given state.
251 * If state is equal to -1, count all connected disks.
252 */
253u_int
254g_mirror_ndisks(struct g_mirror_softc *sc, int state)
255{
256	struct g_mirror_disk *disk;
257	u_int n = 0;
258
259	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
260		if (state == -1 || disk->d_state == state)
261			n++;
262	}
263	return (n);
264}
265
266/*
267 * Find a disk in mirror by its disk ID.
268 */
269static struct g_mirror_disk *
270g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
271{
272	struct g_mirror_disk *disk;
273
274	g_topology_assert();
275
276	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
277		if (disk->d_id == id)
278			return (disk);
279	}
280	return (NULL);
281}
282
283static u_int
284g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
285{
286	struct bio *bp;
287	u_int nreqs = 0;
288
289	mtx_lock(&sc->sc_queue_mtx);
290	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
291		if (bp->bio_from == cp)
292			nreqs++;
293	}
294	mtx_unlock(&sc->sc_queue_mtx);
295	return (nreqs);
296}
297
298static int
299g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
300{
301
302	if (cp->index > 0) {
303		G_MIRROR_DEBUG(2,
304		    "I/O requests for %s exist, can't destroy it now.",
305		    cp->provider->name);
306		return (1);
307	}
308	if (g_mirror_nrequests(sc, cp) > 0) {
309		G_MIRROR_DEBUG(2,
310		    "I/O requests for %s in queue, can't destroy it now.",
311		    cp->provider->name);
312		return (1);
313	}
314	return (0);
315}
316
317static void
318g_mirror_destroy_consumer(void *arg, int flags)
319{
320	struct g_consumer *cp;
321
322	cp = arg;
323	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
324	g_detach(cp);
325	g_destroy_consumer(cp);
326}
327
328static void
329g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
330{
331	struct g_provider *pp;
332	int retaste_wait;
333
334	g_topology_assert();
335
336	cp->private = NULL;
337	if (g_mirror_is_busy(sc, cp))
338		return;
339	pp = cp->provider;
340	retaste_wait = 0;
341	if (cp->acw == 1) {
342		if ((pp->geom->flags & G_GEOM_WITHER) == 0)
343			retaste_wait = 1;
344	}
345	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr,
346	    -cp->acw, -cp->ace, 0);
347	g_access(cp, -cp->acr, -cp->acw, -cp->ace);
348	if (retaste_wait) {
349		/*
350		 * After retaste event was send (inside g_access()), we can send
351		 * event to detach and destroy consumer.
352		 * A class, which has consumer to the given provider connected
353		 * will not receive retaste event for the provider.
354		 * This is the way how I ignore retaste events when I close
355		 * consumers opened for write: I detach and destroy consumer
356		 * after retaste event is sent.
357		 */
358		g_post_event(g_mirror_destroy_consumer, cp, M_WAITOK, NULL);
359		return;
360	}
361	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", pp->name);
362	g_detach(cp);
363	g_destroy_consumer(cp);
364}
365
366static int
367g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
368{
369	int error;
370
371	g_topology_assert();
372	KASSERT(disk->d_consumer == NULL,
373	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
374
375	disk->d_consumer = g_new_consumer(disk->d_softc->sc_geom);
376	disk->d_consumer->private = disk;
377	disk->d_consumer->index = 0;
378	error = g_attach(disk->d_consumer, pp);
379	if (error != 0)
380		return (error);
381	error = g_access(disk->d_consumer, 1, 1, 1);
382	if (error != 0) {
383		G_MIRROR_DEBUG(0, "Cannot open consumer %s (error=%d).",
384		    pp->name, error);
385		return (error);
386	}
387
388	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
389	return (0);
390}
391
392static void
393g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
394{
395
396	g_topology_assert();
397
398	if (cp == NULL)
399		return;
400	if (cp->provider != NULL)
401		g_mirror_kill_consumer(sc, cp);
402	else
403		g_destroy_consumer(cp);
404}
405
406/*
407 * Initialize disk. This means allocate memory, create consumer, attach it
408 * to the provider and open access (r1w1e1) to it.
409 */
410static struct g_mirror_disk *
411g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
412    struct g_mirror_metadata *md, int *errorp)
413{
414	struct g_mirror_disk *disk;
415	int error;
416
417	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
418	if (disk == NULL) {
419		error = ENOMEM;
420		goto fail;
421	}
422	disk->d_softc = sc;
423	error = g_mirror_connect_disk(disk, pp);
424	if (error != 0)
425		goto fail;
426	disk->d_id = md->md_did;
427	disk->d_state = G_MIRROR_DISK_STATE_NONE;
428	disk->d_priority = md->md_priority;
429	disk->d_delay.sec = 0;
430	disk->d_delay.frac = 0;
431	binuptime(&disk->d_last_used);
432	disk->d_flags = md->md_dflags;
433	if (md->md_provider[0] != '\0')
434		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
435	disk->d_sync.ds_consumer = NULL;
436	disk->d_sync.ds_offset = md->md_sync_offset;
437	disk->d_sync.ds_offset_done = md->md_sync_offset;
438	disk->d_sync.ds_resync = -1;
439	disk->d_sync.ds_syncid = md->md_syncid;
440	if (errorp != NULL)
441		*errorp = 0;
442	return (disk);
443fail:
444	if (errorp != NULL)
445		*errorp = error;
446	if (disk != NULL) {
447		g_mirror_disconnect_consumer(sc, disk->d_consumer);
448		free(disk, M_MIRROR);
449	}
450	return (NULL);
451}
452
453static void
454g_mirror_destroy_disk(struct g_mirror_disk *disk)
455{
456	struct g_mirror_softc *sc;
457
458	g_topology_assert();
459
460	LIST_REMOVE(disk, d_next);
461	g_mirror_event_cancel(disk);
462	sc = disk->d_softc;
463	if (sc->sc_hint == disk)
464		sc->sc_hint = NULL;
465	switch (disk->d_state) {
466	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
467		g_mirror_sync_stop(disk, 1);
468		/* FALLTHROUGH */
469	case G_MIRROR_DISK_STATE_NEW:
470	case G_MIRROR_DISK_STATE_STALE:
471	case G_MIRROR_DISK_STATE_ACTIVE:
472		g_mirror_disconnect_consumer(sc, disk->d_consumer);
473		free(disk, M_MIRROR);
474		break;
475	default:
476		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
477		    g_mirror_get_diskname(disk),
478		    g_mirror_disk_state2str(disk->d_state)));
479	}
480}
481
482static void
483g_mirror_destroy_device(struct g_mirror_softc *sc)
484{
485	struct g_mirror_disk *disk;
486	struct g_mirror_event *ep;
487	struct g_geom *gp;
488	struct g_consumer *cp, *tmpcp;
489
490	g_topology_assert();
491
492	gp = sc->sc_geom;
493	if (sc->sc_provider != NULL)
494		g_mirror_destroy_provider(sc);
495	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
496	    disk = LIST_FIRST(&sc->sc_disks)) {
497		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
498		g_mirror_update_metadata(disk);
499		g_mirror_destroy_disk(disk);
500	}
501	while ((ep = g_mirror_event_get(sc)) != NULL) {
502		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
503			g_mirror_event_free(ep);
504		else {
505			ep->e_error = ECANCELED;
506			ep->e_flags |= G_MIRROR_EVENT_DONE;
507			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
508			mtx_lock(&sc->sc_events_mtx);
509			wakeup(ep);
510			mtx_unlock(&sc->sc_events_mtx);
511		}
512	}
513	callout_drain(&sc->sc_callout);
514	gp->softc = NULL;
515
516	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
517		g_mirror_disconnect_consumer(sc, cp);
518	}
519	sc->sc_sync.ds_geom->softc = NULL;
520	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
521	mtx_destroy(&sc->sc_queue_mtx);
522	mtx_destroy(&sc->sc_events_mtx);
523	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
524	g_wither_geom(gp, ENXIO);
525}
526
527static void
528g_mirror_orphan(struct g_consumer *cp)
529{
530	struct g_mirror_disk *disk;
531
532	g_topology_assert();
533
534	disk = cp->private;
535	if (disk == NULL)
536		return;
537	disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
538	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
539	    G_MIRROR_EVENT_DONTWAIT);
540}
541
542static void
543g_mirror_spoiled(struct g_consumer *cp)
544{
545	struct g_mirror_disk *disk;
546
547	g_topology_assert();
548
549	disk = cp->private;
550	if (disk == NULL)
551		return;
552	disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
553	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
554	    G_MIRROR_EVENT_DONTWAIT);
555}
556
557/*
558 * Function should return the next active disk on the list.
559 * It is possible that it will be the same disk as given.
560 * If there are no active disks on list, NULL is returned.
561 */
562static __inline struct g_mirror_disk *
563g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
564{
565	struct g_mirror_disk *dp;
566
567	for (dp = LIST_NEXT(disk, d_next); dp != disk;
568	    dp = LIST_NEXT(dp, d_next)) {
569		if (dp == NULL)
570			dp = LIST_FIRST(&sc->sc_disks);
571		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
572			break;
573	}
574	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
575		return (NULL);
576	return (dp);
577}
578
579static struct g_mirror_disk *
580g_mirror_get_disk(struct g_mirror_softc *sc)
581{
582	struct g_mirror_disk *disk;
583
584	if (sc->sc_hint == NULL) {
585		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
586		if (sc->sc_hint == NULL)
587			return (NULL);
588	}
589	disk = sc->sc_hint;
590	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
591		disk = g_mirror_find_next(sc, disk);
592		if (disk == NULL)
593			return (NULL);
594	}
595	sc->sc_hint = g_mirror_find_next(sc, disk);
596	return (disk);
597}
598
599static int
600g_mirror_write_metadata(struct g_mirror_disk *disk,
601    struct g_mirror_metadata *md)
602{
603	struct g_mirror_softc *sc;
604	struct g_consumer *cp;
605	off_t offset, length;
606	u_char *sector;
607	int error = 0;
608
609	g_topology_assert();
610
611	sc = disk->d_softc;
612	cp = disk->d_consumer;
613	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
614	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
615	KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
616	    ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
617	    cp->acw, cp->ace));
618	length = cp->provider->sectorsize;
619	offset = cp->provider->mediasize - length;
620	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
621	if (md != NULL)
622		mirror_metadata_encode(md, sector);
623	g_topology_unlock();
624	error = g_write_data(cp, offset, sector, length);
625	g_topology_lock();
626	free(sector, M_MIRROR);
627	if (error != 0) {
628		disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
629		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
630		    G_MIRROR_EVENT_DONTWAIT);
631	}
632	return (error);
633}
634
635static int
636g_mirror_clear_metadata(struct g_mirror_disk *disk)
637{
638	int error;
639
640	g_topology_assert();
641	error = g_mirror_write_metadata(disk, NULL);
642	if (error == 0) {
643		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
644		    g_mirror_get_diskname(disk));
645	} else {
646		G_MIRROR_DEBUG(0,
647		    "Cannot clear metadata on disk %s (error=%d).",
648		    g_mirror_get_diskname(disk), error);
649	}
650	return (error);
651}
652
653void
654g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
655    struct g_mirror_metadata *md)
656{
657
658	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
659	md->md_version = G_MIRROR_VERSION;
660	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
661	md->md_mid = sc->sc_id;
662	md->md_all = sc->sc_ndisks;
663	md->md_slice = sc->sc_slice;
664	md->md_balance = sc->sc_balance;
665	md->md_mediasize = sc->sc_mediasize;
666	md->md_sectorsize = sc->sc_sectorsize;
667	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
668	bzero(md->md_provider, sizeof(md->md_provider));
669	if (disk == NULL) {
670		md->md_did = arc4random();
671		md->md_priority = 0;
672		md->md_syncid = 0;
673		md->md_dflags = 0;
674		md->md_sync_offset = 0;
675	} else {
676		md->md_did = disk->d_id;
677		md->md_priority = disk->d_priority;
678		md->md_syncid = disk->d_sync.ds_syncid;
679		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
680		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
681			md->md_sync_offset = disk->d_sync.ds_offset_done;
682		else
683			md->md_sync_offset = 0;
684		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
685			strlcpy(md->md_provider,
686			    disk->d_consumer->provider->name,
687			    sizeof(md->md_provider));
688		}
689	}
690}
691
692void
693g_mirror_update_metadata(struct g_mirror_disk *disk)
694{
695	struct g_mirror_metadata md;
696	int error;
697
698	g_topology_assert();
699	g_mirror_fill_metadata(disk->d_softc, disk, &md);
700	error = g_mirror_write_metadata(disk, &md);
701	if (error == 0) {
702		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
703		    g_mirror_get_diskname(disk));
704	} else {
705		G_MIRROR_DEBUG(0,
706		    "Cannot update metadata on disk %s (error=%d).",
707		    g_mirror_get_diskname(disk), error);
708	}
709}
710
711static void
712g_mirror_bump_syncid(struct g_mirror_softc *sc)
713{
714	struct g_mirror_disk *disk;
715
716	g_topology_assert();
717	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
718	    ("%s called with no active disks (device=%s).", __func__,
719	    sc->sc_name));
720
721	sc->sc_syncid++;
722	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
723	    sc->sc_syncid);
724	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
725		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
726		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
727			disk->d_sync.ds_syncid = sc->sc_syncid;
728			g_mirror_update_metadata(disk);
729		}
730	}
731}
732
733static void
734g_mirror_idle(struct g_mirror_softc *sc)
735{
736	struct g_mirror_disk *disk;
737
738	if (sc->sc_provider == NULL || sc->sc_provider->acw == 0)
739		return;
740	sc->sc_idle = 1;
741	g_topology_lock();
742	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
743		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
744			continue;
745		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
746		    g_mirror_get_diskname(disk), sc->sc_name);
747		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
748		g_mirror_update_metadata(disk);
749	}
750	g_topology_unlock();
751}
752
753static void
754g_mirror_unidle(struct g_mirror_softc *sc)
755{
756	struct g_mirror_disk *disk;
757
758	sc->sc_idle = 0;
759	g_topology_lock();
760	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
761		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
762			continue;
763		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
764		    g_mirror_get_diskname(disk), sc->sc_name);
765		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
766		g_mirror_update_metadata(disk);
767	}
768	g_topology_unlock();
769}
770
771/*
772 * Return 1 if we should check if mirror is idling.
773 */
774static int
775g_mirror_check_idle(struct g_mirror_softc *sc)
776{
777	struct g_mirror_disk *disk;
778
779	if (sc->sc_idle)
780		return (0);
781	if (sc->sc_provider != NULL && sc->sc_provider->acw == 0)
782		return (0);
783	/*
784	 * Check if there are no in-flight requests.
785	 */
786	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
787		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
788			continue;
789		if (disk->d_consumer->index > 0)
790			return (0);
791	}
792	return (1);
793}
794
795static __inline int
796bintime_cmp(struct bintime *bt1, struct bintime *bt2)
797{
798
799	if (bt1->sec < bt2->sec)
800		return (-1);
801	else if (bt1->sec > bt2->sec)
802		return (1);
803	if (bt1->frac < bt2->frac)
804		return (-1);
805	else if (bt1->frac > bt2->frac)
806		return (1);
807	return (0);
808}
809
810static void
811g_mirror_update_delay(struct g_mirror_disk *disk, struct bio *bp)
812{
813
814	if (disk->d_softc->sc_balance != G_MIRROR_BALANCE_LOAD)
815		return;
816	binuptime(&disk->d_delay);
817	bintime_sub(&disk->d_delay, &bp->bio_t0);
818}
819
820static void
821g_mirror_done(struct bio *bp)
822{
823	struct g_mirror_softc *sc;
824
825	sc = bp->bio_from->geom->softc;
826	bp->bio_cflags |= G_MIRROR_BIO_FLAG_REGULAR;
827	mtx_lock(&sc->sc_queue_mtx);
828	bioq_disksort(&sc->sc_queue, bp);
829	wakeup(sc);
830	mtx_unlock(&sc->sc_queue_mtx);
831}
832
833static void
834g_mirror_regular_request(struct bio *bp)
835{
836	struct g_mirror_softc *sc;
837	struct g_mirror_disk *disk;
838	struct bio *pbp;
839
840	g_topology_assert_not();
841
842	bp->bio_from->index--;
843	pbp = bp->bio_parent;
844	sc = pbp->bio_to->geom->softc;
845	disk = bp->bio_from->private;
846	if (disk == NULL) {
847		g_topology_lock();
848		g_mirror_kill_consumer(sc, bp->bio_from);
849		g_topology_unlock();
850	} else {
851		g_mirror_update_delay(disk, bp);
852	}
853
854	pbp->bio_inbed++;
855	KASSERT(pbp->bio_inbed <= pbp->bio_children,
856	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
857	    pbp->bio_children));
858	if (bp->bio_error == 0 && pbp->bio_error == 0) {
859		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
860		g_destroy_bio(bp);
861		if (pbp->bio_children == pbp->bio_inbed) {
862			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
863			pbp->bio_completed = pbp->bio_length;
864			g_io_deliver(pbp, pbp->bio_error);
865		}
866		return;
867	} else if (bp->bio_error != 0) {
868		if (pbp->bio_error == 0)
869			pbp->bio_error = bp->bio_error;
870		G_MIRROR_LOGREQ(0, bp, "Request failed (error=%d).",
871		    bp->bio_error);
872		if (disk != NULL) {
873			sc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
874			g_mirror_event_send(disk,
875			    G_MIRROR_DISK_STATE_DISCONNECTED,
876			    G_MIRROR_EVENT_DONTWAIT);
877		}
878		switch (pbp->bio_cmd) {
879		case BIO_DELETE:
880		case BIO_WRITE:
881			pbp->bio_inbed--;
882			pbp->bio_children--;
883			break;
884		}
885	}
886	g_destroy_bio(bp);
887
888	switch (pbp->bio_cmd) {
889	case BIO_READ:
890		if (pbp->bio_children == pbp->bio_inbed) {
891			pbp->bio_error = 0;
892			mtx_lock(&sc->sc_queue_mtx);
893			bioq_disksort(&sc->sc_queue, pbp);
894			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
895			wakeup(sc);
896			mtx_unlock(&sc->sc_queue_mtx);
897		}
898		break;
899	case BIO_DELETE:
900	case BIO_WRITE:
901		if (pbp->bio_children == 0) {
902			/*
903			 * All requests failed.
904			 */
905		} else if (pbp->bio_inbed < pbp->bio_children) {
906			/* Do nothing. */
907			break;
908		} else if (pbp->bio_children == pbp->bio_inbed) {
909			/* Some requests succeeded. */
910			pbp->bio_error = 0;
911			pbp->bio_completed = pbp->bio_length;
912		}
913		g_io_deliver(pbp, pbp->bio_error);
914		break;
915	default:
916		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
917		break;
918	}
919}
920
921static void
922g_mirror_sync_done(struct bio *bp)
923{
924	struct g_mirror_softc *sc;
925
926	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
927	sc = bp->bio_from->geom->softc;
928	bp->bio_cflags |= G_MIRROR_BIO_FLAG_SYNC;
929	mtx_lock(&sc->sc_queue_mtx);
930	bioq_disksort(&sc->sc_queue, bp);
931	wakeup(sc);
932	mtx_unlock(&sc->sc_queue_mtx);
933}
934
935static void
936g_mirror_start(struct bio *bp)
937{
938	struct g_mirror_softc *sc;
939
940	sc = bp->bio_to->geom->softc;
941	/*
942	 * If sc == NULL or there are no valid disks, provider's error
943	 * should be set and g_mirror_start() should not be called at all.
944	 */
945	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
946	    ("Provider's error should be set (error=%d)(mirror=%s).",
947	    bp->bio_to->error, bp->bio_to->name));
948	G_MIRROR_LOGREQ(3, bp, "Request received.");
949
950	switch (bp->bio_cmd) {
951	case BIO_READ:
952	case BIO_WRITE:
953	case BIO_DELETE:
954		break;
955	case BIO_GETATTR:
956	default:
957		g_io_deliver(bp, EOPNOTSUPP);
958		return;
959	}
960	mtx_lock(&sc->sc_queue_mtx);
961	bioq_disksort(&sc->sc_queue, bp);
962	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
963	wakeup(sc);
964	mtx_unlock(&sc->sc_queue_mtx);
965}
966
967/*
968 * Send one synchronization request.
969 */
970static void
971g_mirror_sync_one(struct g_mirror_disk *disk)
972{
973	struct g_mirror_softc *sc;
974	struct bio *bp;
975
976	sc = disk->d_softc;
977	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
978	    ("Disk %s is not marked for synchronization.",
979	    g_mirror_get_diskname(disk)));
980
981	bp = g_new_bio();
982	if (bp == NULL)
983		return;
984	bp->bio_parent = NULL;
985	bp->bio_cmd = BIO_READ;
986	bp->bio_offset = disk->d_sync.ds_offset;
987	bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
988	bp->bio_cflags = 0;
989	bp->bio_done = g_mirror_sync_done;
990	bp->bio_data = disk->d_sync.ds_data;
991	if (bp->bio_data == NULL) {
992		g_destroy_bio(bp);
993		return;
994	}
995	disk->d_sync.ds_offset += bp->bio_length;
996	bp->bio_to = sc->sc_provider;
997	G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
998	disk->d_sync.ds_consumer->index++;
999	g_io_request(bp, disk->d_sync.ds_consumer);
1000}
1001
1002static void
1003g_mirror_sync_request(struct bio *bp)
1004{
1005	struct g_mirror_softc *sc;
1006	struct g_mirror_disk *disk;
1007
1008	bp->bio_from->index--;
1009	sc = bp->bio_from->geom->softc;
1010	disk = bp->bio_from->private;
1011	if (disk == NULL) {
1012		g_topology_lock();
1013		g_mirror_kill_consumer(sc, bp->bio_from);
1014		g_topology_unlock();
1015		g_destroy_bio(bp);
1016		return;
1017	}
1018
1019	/*
1020	 * Synchronization request.
1021	 */
1022	switch (bp->bio_cmd) {
1023	case BIO_READ:
1024	    {
1025		struct g_consumer *cp;
1026
1027		if (bp->bio_error != 0) {
1028			G_MIRROR_LOGREQ(0, bp,
1029			    "Synchronization request failed (error=%d).",
1030			    bp->bio_error);
1031			g_destroy_bio(bp);
1032			return;
1033		}
1034		G_MIRROR_LOGREQ(3, bp,
1035		    "Synchronization request half-finished.");
1036		bp->bio_cmd = BIO_WRITE;
1037		bp->bio_cflags = 0;
1038		cp = disk->d_consumer;
1039		KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
1040		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1041		    cp->acr, cp->acw, cp->ace));
1042		cp->index++;
1043		g_io_request(bp, cp);
1044		return;
1045	    }
1046	case BIO_WRITE:
1047	    {
1048		struct g_mirror_disk_sync *sync;
1049
1050		if (bp->bio_error != 0) {
1051			G_MIRROR_LOGREQ(0, bp,
1052			    "Synchronization request failed (error=%d).",
1053			    bp->bio_error);
1054			g_destroy_bio(bp);
1055			sc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
1056			g_mirror_event_send(disk,
1057			    G_MIRROR_DISK_STATE_DISCONNECTED,
1058			    G_MIRROR_EVENT_DONTWAIT);
1059			return;
1060		}
1061		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
1062		sync = &disk->d_sync;
1063		sync->ds_offset_done = bp->bio_offset + bp->bio_length;
1064		g_destroy_bio(bp);
1065		if (sync->ds_resync != -1)
1066			break;
1067		if (sync->ds_offset_done == sc->sc_provider->mediasize) {
1068			/*
1069			 * Disk up-to-date, activate it.
1070			 */
1071			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
1072			    G_MIRROR_EVENT_DONTWAIT);
1073			return;
1074		} else if (sync->ds_offset_done % (MAXPHYS * 100) == 0) {
1075			/*
1076			 * Update offset_done on every 100 blocks.
1077			 * XXX: This should be configurable.
1078			 */
1079			g_topology_lock();
1080			g_mirror_update_metadata(disk);
1081			g_topology_unlock();
1082		}
1083		return;
1084	    }
1085	default:
1086		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1087		    bp->bio_cmd, sc->sc_name));
1088		break;
1089	}
1090}
1091
1092static void
1093g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
1094{
1095	struct g_mirror_disk *disk;
1096	struct g_consumer *cp;
1097	struct bio *cbp;
1098
1099	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1100		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
1101			break;
1102	}
1103	if (disk == NULL) {
1104		if (bp->bio_error == 0)
1105			bp->bio_error = ENXIO;
1106		g_io_deliver(bp, bp->bio_error);
1107		return;
1108	}
1109	cbp = g_clone_bio(bp);
1110	if (cbp == NULL) {
1111		if (bp->bio_error == 0)
1112			bp->bio_error = ENOMEM;
1113		g_io_deliver(bp, bp->bio_error);
1114		return;
1115	}
1116	/*
1117	 * Fill in the component buf structure.
1118	 */
1119	cp = disk->d_consumer;
1120	cbp->bio_done = g_mirror_done;
1121	cbp->bio_to = cp->provider;
1122	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1123	KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
1124	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1125	    cp->acw, cp->ace));
1126	cp->index++;
1127	g_io_request(cbp, cp);
1128}
1129
1130static void
1131g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
1132{
1133	struct g_mirror_disk *disk;
1134	struct g_consumer *cp;
1135	struct bio *cbp;
1136
1137	disk = g_mirror_get_disk(sc);
1138	if (disk == NULL) {
1139		if (bp->bio_error == 0)
1140			bp->bio_error = ENXIO;
1141		g_io_deliver(bp, bp->bio_error);
1142		return;
1143	}
1144	cbp = g_clone_bio(bp);
1145	if (cbp == NULL) {
1146		if (bp->bio_error == 0)
1147			bp->bio_error = ENOMEM;
1148		g_io_deliver(bp, bp->bio_error);
1149		return;
1150	}
1151	/*
1152	 * Fill in the component buf structure.
1153	 */
1154	cp = disk->d_consumer;
1155	cbp->bio_done = g_mirror_done;
1156	cbp->bio_to = cp->provider;
1157	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1158	KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
1159	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1160	    cp->acw, cp->ace));
1161	cp->index++;
1162	g_io_request(cbp, cp);
1163}
1164
1165static void
1166g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
1167{
1168	struct g_mirror_disk *disk, *dp;
1169	struct g_consumer *cp;
1170	struct bio *cbp;
1171	struct bintime curtime;
1172
1173	binuptime(&curtime);
1174	/*
1175	 * Find a disk which the smallest load.
1176	 */
1177	disk = NULL;
1178	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1179		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1180			continue;
1181		/* If disk wasn't used for more than 2 sec, use it. */
1182		if (curtime.sec - dp->d_last_used.sec >= 2) {
1183			disk = dp;
1184			break;
1185		}
1186		if (disk == NULL ||
1187		    bintime_cmp(&dp->d_delay, &disk->d_delay) < 0) {
1188			disk = dp;
1189		}
1190	}
1191	cbp = g_clone_bio(bp);
1192	if (cbp == NULL) {
1193		if (bp->bio_error == 0)
1194			bp->bio_error = ENOMEM;
1195		g_io_deliver(bp, bp->bio_error);
1196		return;
1197	}
1198	/*
1199	 * Fill in the component buf structure.
1200	 */
1201	cp = disk->d_consumer;
1202	cbp->bio_done = g_mirror_done;
1203	cbp->bio_to = cp->provider;
1204	binuptime(&disk->d_last_used);
1205	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1206	KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
1207	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1208	    cp->acw, cp->ace));
1209	cp->index++;
1210	g_io_request(cbp, cp);
1211}
1212
1213static void
1214g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
1215{
1216	struct bio_queue_head queue;
1217	struct g_mirror_disk *disk;
1218	struct g_consumer *cp;
1219	struct bio *cbp;
1220	off_t left, mod, offset, slice;
1221	u_char *data;
1222	u_int ndisks;
1223
1224	if (bp->bio_length <= sc->sc_slice) {
1225		g_mirror_request_round_robin(sc, bp);
1226		return;
1227	}
1228	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
1229	slice = bp->bio_length / ndisks;
1230	mod = slice % sc->sc_provider->sectorsize;
1231	if (mod != 0)
1232		slice += sc->sc_provider->sectorsize - mod;
1233	/*
1234	 * Allocate all bios before sending any request, so we can
1235	 * return ENOMEM in nice and clean way.
1236	 */
1237	left = bp->bio_length;
1238	offset = bp->bio_offset;
1239	data = bp->bio_data;
1240	bioq_init(&queue);
1241	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1242		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1243			continue;
1244		cbp = g_clone_bio(bp);
1245		if (cbp == NULL) {
1246			for (cbp = bioq_first(&queue); cbp != NULL;
1247			    cbp = bioq_first(&queue)) {
1248				bioq_remove(&queue, cbp);
1249				g_destroy_bio(cbp);
1250			}
1251			if (bp->bio_error == 0)
1252				bp->bio_error = ENOMEM;
1253			g_io_deliver(bp, bp->bio_error);
1254			return;
1255		}
1256		bioq_insert_tail(&queue, cbp);
1257		cbp->bio_done = g_mirror_done;
1258		cbp->bio_caller1 = disk;
1259		cbp->bio_to = disk->d_consumer->provider;
1260		cbp->bio_offset = offset;
1261		cbp->bio_data = data;
1262		cbp->bio_length = MIN(left, slice);
1263		left -= cbp->bio_length;
1264		if (left == 0)
1265			break;
1266		offset += cbp->bio_length;
1267		data += cbp->bio_length;
1268	}
1269	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
1270		bioq_remove(&queue, cbp);
1271		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1272		disk = cbp->bio_caller1;
1273		cbp->bio_caller1 = NULL;
1274		cp = disk->d_consumer;
1275		KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
1276		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1277		    cp->acr, cp->acw, cp->ace));
1278		disk->d_consumer->index++;
1279		g_io_request(cbp, disk->d_consumer);
1280	}
1281}
1282
1283static void
1284g_mirror_register_request(struct bio *bp)
1285{
1286	struct g_mirror_softc *sc;
1287
1288	sc = bp->bio_to->geom->softc;
1289	switch (bp->bio_cmd) {
1290	case BIO_READ:
1291		switch (sc->sc_balance) {
1292		case G_MIRROR_BALANCE_LOAD:
1293			g_mirror_request_load(sc, bp);
1294			break;
1295		case G_MIRROR_BALANCE_PREFER:
1296			g_mirror_request_prefer(sc, bp);
1297			break;
1298		case G_MIRROR_BALANCE_ROUND_ROBIN:
1299			g_mirror_request_round_robin(sc, bp);
1300			break;
1301		case G_MIRROR_BALANCE_SPLIT:
1302			g_mirror_request_split(sc, bp);
1303			break;
1304		}
1305		return;
1306	case BIO_WRITE:
1307	case BIO_DELETE:
1308	    {
1309		struct g_mirror_disk *disk;
1310		struct g_mirror_disk_sync *sync;
1311		struct bio_queue_head queue;
1312		struct g_consumer *cp;
1313		struct bio *cbp;
1314
1315		if (sc->sc_idle)
1316			g_mirror_unidle(sc);
1317		/*
1318		 * Allocate all bios before sending any request, so we can
1319		 * return ENOMEM in nice and clean way.
1320		 */
1321		bioq_init(&queue);
1322		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1323			sync = &disk->d_sync;
1324			switch (disk->d_state) {
1325			case G_MIRROR_DISK_STATE_ACTIVE:
1326				break;
1327			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1328				if (bp->bio_offset >= sync->ds_offset)
1329					continue;
1330				else if (bp->bio_offset + bp->bio_length >
1331				    sync->ds_offset_done &&
1332				    (bp->bio_offset < sync->ds_resync ||
1333				     sync->ds_resync == -1)) {
1334					sync->ds_resync = bp->bio_offset -
1335					    (bp->bio_offset % MAXPHYS);
1336				}
1337				break;
1338			default:
1339				continue;
1340			}
1341			cbp = g_clone_bio(bp);
1342			if (cbp == NULL) {
1343				for (cbp = bioq_first(&queue); cbp != NULL;
1344				    cbp = bioq_first(&queue)) {
1345					bioq_remove(&queue, cbp);
1346					g_destroy_bio(cbp);
1347				}
1348				if (bp->bio_error == 0)
1349					bp->bio_error = ENOMEM;
1350				g_io_deliver(bp, bp->bio_error);
1351				return;
1352			}
1353			bioq_insert_tail(&queue, cbp);
1354			cbp->bio_done = g_mirror_done;
1355			cp = disk->d_consumer;
1356			cbp->bio_caller1 = cp;
1357			cbp->bio_to = cp->provider;
1358			KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
1359			    ("Consumer %s not opened (r%dw%de%d).",
1360			    cp->provider->name, cp->acr, cp->acw, cp->ace));
1361		}
1362		for (cbp = bioq_first(&queue); cbp != NULL;
1363		    cbp = bioq_first(&queue)) {
1364			bioq_remove(&queue, cbp);
1365			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1366			cp = cbp->bio_caller1;
1367			cbp->bio_caller1 = NULL;
1368			cp->index++;
1369			g_io_request(cbp, cp);
1370		}
1371		/*
1372		 * Bump syncid on first write.
1373		 */
1374		if (sc->sc_bump_syncid == G_MIRROR_BUMP_ON_FIRST_WRITE) {
1375			sc->sc_bump_syncid = 0;
1376			g_topology_lock();
1377			g_mirror_bump_syncid(sc);
1378			g_topology_unlock();
1379		}
1380		return;
1381	    }
1382	default:
1383		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1384		    bp->bio_cmd, sc->sc_name));
1385		break;
1386	}
1387}
1388
1389static int
1390g_mirror_can_destroy(struct g_mirror_softc *sc)
1391{
1392	struct g_geom *gp;
1393	struct g_consumer *cp;
1394
1395	g_topology_assert();
1396	gp = sc->sc_geom;
1397	LIST_FOREACH(cp, &gp->consumer, consumer) {
1398		if (g_mirror_is_busy(sc, cp))
1399			return (0);
1400	}
1401	gp = sc->sc_sync.ds_geom;
1402	LIST_FOREACH(cp, &gp->consumer, consumer) {
1403		if (g_mirror_is_busy(sc, cp))
1404			return (0);
1405	}
1406	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
1407	    sc->sc_name);
1408	return (1);
1409}
1410
1411static int
1412g_mirror_try_destroy(struct g_mirror_softc *sc)
1413{
1414
1415	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WAIT) != 0) {
1416		g_topology_lock();
1417		if (!g_mirror_can_destroy(sc)) {
1418			g_topology_unlock();
1419			return (0);
1420		}
1421		g_topology_unlock();
1422		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1423		    &sc->sc_worker);
1424		wakeup(&sc->sc_worker);
1425		sc->sc_worker = NULL;
1426	} else {
1427		g_topology_lock();
1428		if (!g_mirror_can_destroy(sc)) {
1429			g_topology_unlock();
1430			return (0);
1431		}
1432		g_mirror_destroy_device(sc);
1433		g_topology_unlock();
1434		free(sc, M_MIRROR);
1435	}
1436	return (1);
1437}
1438
1439/*
1440 * Worker thread.
1441 */
1442static void
1443g_mirror_worker(void *arg)
1444{
1445	struct g_mirror_softc *sc;
1446	struct g_mirror_disk *disk;
1447	struct g_mirror_disk_sync *sync;
1448	struct g_mirror_event *ep;
1449	struct bio *bp;
1450	u_int nreqs;
1451
1452	sc = arg;
1453	curthread->td_base_pri = PRIBIO;
1454
1455	nreqs = 0;
1456	for (;;) {
1457		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
1458		/*
1459		 * First take a look at events.
1460		 * This is important to handle events before any I/O requests.
1461		 */
1462		ep = g_mirror_event_get(sc);
1463		if (ep != NULL) {
1464			g_topology_lock();
1465			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
1466				/* Update only device status. */
1467				G_MIRROR_DEBUG(3,
1468				    "Running event for device %s.",
1469				    sc->sc_name);
1470				ep->e_error = 0;
1471				g_mirror_update_device(sc, 1);
1472			} else {
1473				/* Update disk status. */
1474				G_MIRROR_DEBUG(3, "Running event for disk %s.",
1475				     g_mirror_get_diskname(ep->e_disk));
1476				ep->e_error = g_mirror_update_disk(ep->e_disk,
1477				    ep->e_state);
1478				if (ep->e_error == 0)
1479					g_mirror_update_device(sc, 0);
1480			}
1481			g_topology_unlock();
1482			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
1483				KASSERT(ep->e_error == 0,
1484				    ("Error cannot be handled."));
1485				g_mirror_event_free(ep);
1486			} else {
1487				ep->e_flags |= G_MIRROR_EVENT_DONE;
1488				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1489				    ep);
1490				mtx_lock(&sc->sc_events_mtx);
1491				wakeup(ep);
1492				mtx_unlock(&sc->sc_events_mtx);
1493			}
1494			if ((sc->sc_flags &
1495			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1496				if (g_mirror_try_destroy(sc))
1497					kthread_exit(0);
1498			}
1499			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
1500			continue;
1501		}
1502		/*
1503		 * Now I/O requests.
1504		 */
1505		/* Get first request from the queue. */
1506		mtx_lock(&sc->sc_queue_mtx);
1507		bp = bioq_first(&sc->sc_queue);
1508		if (bp == NULL) {
1509			if ((sc->sc_flags &
1510			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1511				mtx_unlock(&sc->sc_queue_mtx);
1512				if (g_mirror_try_destroy(sc))
1513					kthread_exit(0);
1514				mtx_lock(&sc->sc_queue_mtx);
1515			}
1516		}
1517		if (sc->sc_sync.ds_ndisks > 0 &&
1518		    (bp == NULL || nreqs > g_mirror_reqs_per_sync)) {
1519			mtx_unlock(&sc->sc_queue_mtx);
1520			/*
1521			 * It is time for synchronization...
1522			 */
1523			nreqs = 0;
1524			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1525				if (disk->d_state !=
1526				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
1527					continue;
1528				}
1529				sync = &disk->d_sync;
1530				if (sync->ds_offset >=
1531				    sc->sc_provider->mediasize) {
1532					continue;
1533				}
1534				if (sync->ds_offset > sync->ds_offset_done)
1535					continue;
1536				if (sync->ds_resync != -1) {
1537					sync->ds_offset = sync->ds_resync;
1538					sync->ds_offset_done = sync->ds_resync;
1539					sync->ds_resync = -1;
1540				}
1541				g_mirror_sync_one(disk);
1542			}
1543			G_MIRROR_DEBUG(5, "%s: I'm here 2.", __func__);
1544			goto sleep;
1545		}
1546		if (bp == NULL) {
1547			if (g_mirror_check_idle(sc)) {
1548				u_int idletime;
1549
1550				idletime = g_mirror_idletime;
1551				if (idletime == 0)
1552					idletime = 1;
1553				idletime *= hz;
1554				if (msleep(sc, &sc->sc_queue_mtx, PRIBIO | PDROP,
1555				    "m:w1", idletime) == EWOULDBLOCK) {
1556					G_MIRROR_DEBUG(5, "%s: I'm here 3.",
1557					    __func__);
1558					/*
1559					 * No I/O requests in 'idletime' seconds,
1560					 * so mark components as clean.
1561					 */
1562					g_mirror_idle(sc);
1563				}
1564				G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
1565			} else {
1566				MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP,
1567				    "m:w2", 0);
1568				G_MIRROR_DEBUG(5, "%s: I'm here 5.", __func__);
1569			}
1570			continue;
1571		}
1572		nreqs++;
1573		bioq_remove(&sc->sc_queue, bp);
1574		mtx_unlock(&sc->sc_queue_mtx);
1575
1576		if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0) {
1577			g_mirror_regular_request(bp);
1578		} else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
1579			u_int timeout, sps;
1580
1581			g_mirror_sync_request(bp);
1582sleep:
1583			sps = g_mirror_syncs_per_sec;
1584			if (sps == 0) {
1585				G_MIRROR_DEBUG(5, "%s: I'm here 6.", __func__);
1586				continue;
1587			}
1588			mtx_lock(&sc->sc_queue_mtx);
1589			if (bioq_first(&sc->sc_queue) != NULL) {
1590				mtx_unlock(&sc->sc_queue_mtx);
1591				G_MIRROR_DEBUG(5, "%s: I'm here 7.", __func__);
1592				continue;
1593			}
1594			timeout = hz / sps;
1595			if (timeout == 0)
1596				timeout = 1;
1597			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w3",
1598			    timeout);
1599		} else {
1600			g_mirror_register_request(bp);
1601		}
1602		G_MIRROR_DEBUG(5, "%s: I'm here 8.", __func__);
1603	}
1604}
1605
1606/*
1607 * Open disk's consumer if needed.
1608 */
1609static void
1610g_mirror_update_access(struct g_mirror_disk *disk)
1611{
1612	struct g_provider *pp;
1613
1614	g_topology_assert();
1615
1616	pp = disk->d_softc->sc_provider;
1617	if (pp == NULL)
1618		return;
1619	if (pp->acw > 0) {
1620		if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
1621			G_MIRROR_DEBUG(1,
1622			    "Disk %s (device %s) marked as dirty.",
1623			    g_mirror_get_diskname(disk),
1624			    disk->d_softc->sc_name);
1625			disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1626		}
1627	} else if (pp->acw == 0) {
1628		if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
1629			G_MIRROR_DEBUG(1,
1630			    "Disk %s (device %s) marked as clean.",
1631			    g_mirror_get_diskname(disk),
1632			    disk->d_softc->sc_name);
1633			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1634		}
1635	}
1636}
1637
1638static void
1639g_mirror_sync_start(struct g_mirror_disk *disk)
1640{
1641	struct g_mirror_softc *sc;
1642	struct g_consumer *cp;
1643	int error;
1644
1645	g_topology_assert();
1646
1647	sc = disk->d_softc;
1648	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1649	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
1650	    sc->sc_state));
1651	cp = disk->d_consumer;
1652
1653	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
1654	    g_mirror_get_diskname(disk));
1655	disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1656	KASSERT(disk->d_sync.ds_consumer == NULL,
1657	    ("Sync consumer already exists (device=%s, disk=%s).",
1658	    sc->sc_name, g_mirror_get_diskname(disk)));
1659	disk->d_sync.ds_consumer = g_new_consumer(sc->sc_sync.ds_geom);
1660	disk->d_sync.ds_consumer->private = disk;
1661	disk->d_sync.ds_consumer->index = 0;
1662	error = g_attach(disk->d_sync.ds_consumer, disk->d_softc->sc_provider);
1663	KASSERT(error == 0, ("Cannot attach to %s (error=%d).",
1664	    disk->d_softc->sc_name, error));
1665	error = g_access(disk->d_sync.ds_consumer, 1, 0, 0);
1666	KASSERT(error == 0, ("Cannot open %s (error=%d).",
1667	    disk->d_softc->sc_name, error));
1668	disk->d_sync.ds_data = malloc(MAXPHYS, M_MIRROR, M_WAITOK);
1669	sc->sc_sync.ds_ndisks++;
1670}
1671
1672/*
1673 * Stop synchronization process.
1674 * type: 0 - synchronization finished
1675 *       1 - synchronization stopped
1676 */
1677static void
1678g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
1679{
1680	struct g_consumer *cp;
1681
1682	g_topology_assert();
1683	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
1684	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
1685	    g_mirror_disk_state2str(disk->d_state)));
1686	if (disk->d_sync.ds_consumer == NULL)
1687		return;
1688
1689	if (type == 0) {
1690		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
1691		    disk->d_softc->sc_name, g_mirror_get_diskname(disk));
1692	} else /* if (type == 1) */ {
1693		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
1694		    disk->d_softc->sc_name, g_mirror_get_diskname(disk));
1695	}
1696	cp = disk->d_sync.ds_consumer;
1697	g_mirror_kill_consumer(disk->d_softc, cp);
1698	free(disk->d_sync.ds_data, M_MIRROR);
1699	disk->d_sync.ds_consumer = NULL;
1700	disk->d_softc->sc_sync.ds_ndisks--;
1701	cp = disk->d_consumer;
1702	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1703}
1704
1705static void
1706g_mirror_launch_provider(struct g_mirror_softc *sc)
1707{
1708	struct g_mirror_disk *disk;
1709	struct g_provider *pp;
1710
1711	g_topology_assert();
1712
1713	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
1714	pp->mediasize = sc->sc_mediasize;
1715	pp->sectorsize = sc->sc_sectorsize;
1716	sc->sc_provider = pp;
1717	g_error_provider(pp, 0);
1718	G_MIRROR_DEBUG(0, "Device %s: provider %s launched.", sc->sc_name,
1719	    pp->name);
1720	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1721		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
1722			g_mirror_sync_start(disk);
1723	}
1724}
1725
1726static void
1727g_mirror_destroy_provider(struct g_mirror_softc *sc)
1728{
1729	struct g_mirror_disk *disk;
1730	struct bio *bp;
1731
1732	g_topology_assert();
1733	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
1734	    sc->sc_name));
1735
1736	g_error_provider(sc->sc_provider, ENXIO);
1737	mtx_lock(&sc->sc_queue_mtx);
1738	while ((bp = bioq_first(&sc->sc_queue)) != NULL) {
1739		bioq_remove(&sc->sc_queue, bp);
1740		g_io_deliver(bp, ENXIO);
1741	}
1742	mtx_unlock(&sc->sc_queue_mtx);
1743	G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name,
1744	    sc->sc_provider->name);
1745	sc->sc_provider->flags |= G_PF_WITHER;
1746	g_orphan_provider(sc->sc_provider, ENXIO);
1747	sc->sc_provider = NULL;
1748	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1749		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
1750			g_mirror_sync_stop(disk, 1);
1751	}
1752}
1753
1754static void
1755g_mirror_go(void *arg)
1756{
1757	struct g_mirror_softc *sc;
1758
1759	sc = arg;
1760	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
1761	g_mirror_event_send(sc, 0,
1762	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
1763}
1764
1765static u_int
1766g_mirror_determine_state(struct g_mirror_disk *disk)
1767{
1768	struct g_mirror_softc *sc;
1769	u_int state;
1770
1771	sc = disk->d_softc;
1772	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
1773		if ((disk->d_flags &
1774		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
1775			/* Disk does not need synchronization. */
1776			state = G_MIRROR_DISK_STATE_ACTIVE;
1777		} else {
1778			if ((sc->sc_flags &
1779			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0  ||
1780			    (disk->d_flags &
1781			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
1782				/*
1783				 * We can start synchronization from
1784				 * the stored offset.
1785				 */
1786				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
1787			} else {
1788				state = G_MIRROR_DISK_STATE_STALE;
1789			}
1790		}
1791	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
1792		/*
1793		 * Reset all synchronization data for this disk,
1794		 * because if it even was synchronized, it was
1795		 * synchronized to disks with different syncid.
1796		 */
1797		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
1798		disk->d_sync.ds_offset = 0;
1799		disk->d_sync.ds_offset_done = 0;
1800		disk->d_sync.ds_syncid = sc->sc_syncid;
1801		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
1802		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
1803			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
1804		} else {
1805			state = G_MIRROR_DISK_STATE_STALE;
1806		}
1807	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
1808		/*
1809		 * Not good, NOT GOOD!
1810		 * It means that mirror was started on stale disks
1811		 * and more fresh disk just arrive.
1812		 * If there were writes, mirror is fucked up, sorry.
1813		 * I think the best choice here is don't touch
1814		 * this disk and inform the user laudly.
1815		 */
1816		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
1817		    "disk (%s) arrives!! It will not be connected to the "
1818		    "running device.", sc->sc_name,
1819		    g_mirror_get_diskname(disk));
1820		g_mirror_destroy_disk(disk);
1821		state = G_MIRROR_DISK_STATE_NONE;
1822		/* Return immediately, because disk was destroyed. */
1823		return (state);
1824	}
1825	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
1826	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
1827	return (state);
1828}
1829
1830/*
1831 * Update device state.
1832 */
1833static void
1834g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force)
1835{
1836	struct g_mirror_disk *disk;
1837	u_int state;
1838
1839	g_topology_assert();
1840
1841	switch (sc->sc_state) {
1842	case G_MIRROR_DEVICE_STATE_STARTING:
1843	    {
1844		struct g_mirror_disk *pdisk;
1845		u_int dirty, ndisks, syncid;
1846
1847		KASSERT(sc->sc_provider == NULL,
1848		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
1849		/*
1850		 * Are we ready? We are, if all disks are connected or
1851		 * if we have any disks and 'force' is true.
1852		 */
1853		if ((force && g_mirror_ndisks(sc, -1) > 0) ||
1854		    sc->sc_ndisks == g_mirror_ndisks(sc, -1)) {
1855			;
1856		} else if (g_mirror_ndisks(sc, -1) == 0) {
1857			/*
1858			 * Disks went down in starting phase, so destroy
1859			 * device.
1860			 */
1861			callout_drain(&sc->sc_callout);
1862			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
1863			return;
1864		} else {
1865			return;
1866		}
1867
1868		/*
1869		 * Activate all disks with the biggest syncid.
1870		 */
1871		if (force) {
1872			/*
1873			 * If 'force' is true, we have been called due to
1874			 * timeout, so don't bother canceling timeout.
1875			 */
1876			ndisks = 0;
1877			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1878				if ((disk->d_flags &
1879				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
1880					ndisks++;
1881				}
1882			}
1883			if (ndisks == 0) {
1884				/* No valid disks found, destroy device. */
1885				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
1886				return;
1887			}
1888		} else {
1889			/* Cancel timeout. */
1890			callout_drain(&sc->sc_callout);
1891		}
1892
1893		/*
1894		 * Find disk with the biggest syncid.
1895		 */
1896		syncid = 0;
1897		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1898			if (disk->d_sync.ds_syncid > syncid)
1899				syncid = disk->d_sync.ds_syncid;
1900		}
1901
1902		/*
1903		 * Here we need to look for dirty disks and if all disks
1904		 * with the biggest syncid are dirty, we have to choose
1905		 * one with the biggest priority and rebuild the rest.
1906		 */
1907		/*
1908		 * Find the number of dirty disks with the biggest syncid.
1909		 * Find the number of disks with the biggest syncid.
1910		 * While here, find a disk with the biggest priority.
1911		 */
1912		dirty = ndisks = 0;
1913		pdisk = NULL;
1914		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1915			if (disk->d_sync.ds_syncid != syncid)
1916				continue;
1917			if ((disk->d_flags &
1918			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1919				continue;
1920			}
1921			ndisks++;
1922			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
1923				dirty++;
1924				if (pdisk == NULL ||
1925				    pdisk->d_priority < disk->d_priority) {
1926					pdisk = disk;
1927				}
1928			}
1929		}
1930		if (dirty == 0) {
1931			/* No dirty disks at all, great. */
1932		} else if (dirty == ndisks) {
1933			/*
1934			 * Force synchronization for all dirty disks except one
1935			 * with the biggest priority.
1936			 */
1937			KASSERT(pdisk != NULL, ("pdisk == NULL"));
1938			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
1939			    "master disk for synchronization.",
1940			    g_mirror_get_diskname(pdisk), sc->sc_name);
1941			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1942				if (disk->d_sync.ds_syncid != syncid)
1943					continue;
1944				if ((disk->d_flags &
1945				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1946					continue;
1947				}
1948				KASSERT((disk->d_flags &
1949				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
1950				    ("Disk %s isn't marked as dirty.",
1951				    g_mirror_get_diskname(disk)));
1952				/* Skip the disk with the biggest priority. */
1953				if (disk == pdisk)
1954					continue;
1955				disk->d_sync.ds_syncid = 0;
1956			}
1957		} else if (dirty < ndisks) {
1958			/*
1959			 * Force synchronization for all dirty disks.
1960			 * We have some non-dirty disks.
1961			 */
1962			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1963				if (disk->d_sync.ds_syncid != syncid)
1964					continue;
1965				if ((disk->d_flags &
1966				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1967					continue;
1968				}
1969				if ((disk->d_flags &
1970				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
1971					continue;
1972				}
1973				disk->d_sync.ds_syncid = 0;
1974			}
1975		}
1976
1977		/* Reset hint. */
1978		sc->sc_hint = NULL;
1979		sc->sc_syncid = syncid;
1980		if (force) {
1981			/* Remember to bump syncid on first write. */
1982			sc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
1983		}
1984		state = G_MIRROR_DEVICE_STATE_RUNNING;
1985		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
1986		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
1987		    g_mirror_device_state2str(state));
1988		sc->sc_state = state;
1989		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1990			state = g_mirror_determine_state(disk);
1991			g_mirror_event_send(disk, state,
1992			    G_MIRROR_EVENT_DONTWAIT);
1993			if (state == G_MIRROR_DISK_STATE_STALE) {
1994				sc->sc_bump_syncid =
1995				    G_MIRROR_BUMP_ON_FIRST_WRITE;
1996			}
1997		}
1998		wakeup(&g_mirror_class);
1999		break;
2000	    }
2001	case G_MIRROR_DEVICE_STATE_RUNNING:
2002		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
2003		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2004			/*
2005			 * No active disks or no disks at all,
2006			 * so destroy device.
2007			 */
2008			if (sc->sc_provider != NULL)
2009				g_mirror_destroy_provider(sc);
2010			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2011			break;
2012		} else if (g_mirror_ndisks(sc,
2013		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
2014		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2015			/*
2016			 * We have active disks, launch provider if it doesn't
2017			 * exist.
2018			 */
2019			if (sc->sc_provider == NULL)
2020				g_mirror_launch_provider(sc);
2021		}
2022		/*
2023		 * Bump syncid here, if we need to do it immediately.
2024		 */
2025		if (sc->sc_bump_syncid == G_MIRROR_BUMP_IMMEDIATELY) {
2026			sc->sc_bump_syncid = 0;
2027			g_mirror_bump_syncid(sc);
2028		}
2029		break;
2030	default:
2031		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
2032		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
2033		break;
2034	}
2035}
2036
2037/*
2038 * Update disk state and device state if needed.
2039 */
2040#define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
2041	"Disk %s state changed from %s to %s (device %s).",		\
2042	g_mirror_get_diskname(disk),					\
2043	g_mirror_disk_state2str(disk->d_state),				\
2044	g_mirror_disk_state2str(state), sc->sc_name)
2045static int
2046g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
2047{
2048	struct g_mirror_softc *sc;
2049
2050	g_topology_assert();
2051
2052	sc = disk->d_softc;
2053again:
2054	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
2055	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
2056	    g_mirror_disk_state2str(state));
2057	switch (state) {
2058	case G_MIRROR_DISK_STATE_NEW:
2059		/*
2060		 * Possible scenarios:
2061		 * 1. New disk arrive.
2062		 */
2063		/* Previous state should be NONE. */
2064		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
2065		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2066		    g_mirror_disk_state2str(disk->d_state)));
2067		DISK_STATE_CHANGED();
2068
2069		disk->d_state = state;
2070		if (LIST_EMPTY(&sc->sc_disks))
2071			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
2072		else {
2073			struct g_mirror_disk *dp;
2074
2075			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
2076				if (disk->d_priority >= dp->d_priority) {
2077					LIST_INSERT_BEFORE(dp, disk, d_next);
2078					dp = NULL;
2079					break;
2080				}
2081				if (LIST_NEXT(dp, d_next) == NULL)
2082					break;
2083			}
2084			if (dp != NULL)
2085				LIST_INSERT_AFTER(dp, disk, d_next);
2086		}
2087		G_MIRROR_DEBUG(0, "Device %s: provider %s detected.",
2088		    sc->sc_name, g_mirror_get_diskname(disk));
2089		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2090			break;
2091		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2092		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2093		    g_mirror_device_state2str(sc->sc_state),
2094		    g_mirror_get_diskname(disk),
2095		    g_mirror_disk_state2str(disk->d_state)));
2096		state = g_mirror_determine_state(disk);
2097		if (state != G_MIRROR_DISK_STATE_NONE)
2098			goto again;
2099		break;
2100	case G_MIRROR_DISK_STATE_ACTIVE:
2101		/*
2102		 * Possible scenarios:
2103		 * 1. New disk does not need synchronization.
2104		 * 2. Synchronization process finished successfully.
2105		 */
2106		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2107		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2108		    g_mirror_device_state2str(sc->sc_state),
2109		    g_mirror_get_diskname(disk),
2110		    g_mirror_disk_state2str(disk->d_state)));
2111		/* Previous state should be NEW or SYNCHRONIZING. */
2112		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
2113		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2114		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2115		    g_mirror_disk_state2str(disk->d_state)));
2116		DISK_STATE_CHANGED();
2117
2118		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2119			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2120		else if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2121			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2122			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
2123			g_mirror_sync_stop(disk, 0);
2124		}
2125		disk->d_state = state;
2126		disk->d_sync.ds_offset = 0;
2127		disk->d_sync.ds_offset_done = 0;
2128		g_mirror_update_access(disk);
2129		g_mirror_update_metadata(disk);
2130		G_MIRROR_DEBUG(0, "Device %s: provider %s activated.",
2131		    sc->sc_name, g_mirror_get_diskname(disk));
2132		break;
2133	case G_MIRROR_DISK_STATE_STALE:
2134		/*
2135		 * Possible scenarios:
2136		 * 1. Stale disk was connected.
2137		 */
2138		/* Previous state should be NEW. */
2139		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2140		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2141		    g_mirror_disk_state2str(disk->d_state)));
2142		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2143		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2144		    g_mirror_device_state2str(sc->sc_state),
2145		    g_mirror_get_diskname(disk),
2146		    g_mirror_disk_state2str(disk->d_state)));
2147		/*
2148		 * STALE state is only possible if device is marked
2149		 * NOAUTOSYNC.
2150		 */
2151		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
2152		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2153		    g_mirror_device_state2str(sc->sc_state),
2154		    g_mirror_get_diskname(disk),
2155		    g_mirror_disk_state2str(disk->d_state)));
2156		DISK_STATE_CHANGED();
2157
2158		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2159		disk->d_state = state;
2160		g_mirror_update_metadata(disk);
2161		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
2162		    sc->sc_name, g_mirror_get_diskname(disk));
2163		break;
2164	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
2165		/*
2166		 * Possible scenarios:
2167		 * 1. Disk which needs synchronization was connected.
2168		 */
2169		/* Previous state should be NEW. */
2170		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2171		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2172		    g_mirror_disk_state2str(disk->d_state)));
2173		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2174		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2175		    g_mirror_device_state2str(sc->sc_state),
2176		    g_mirror_get_diskname(disk),
2177		    g_mirror_disk_state2str(disk->d_state)));
2178		DISK_STATE_CHANGED();
2179
2180		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2181			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2182		disk->d_state = state;
2183		if (sc->sc_provider != NULL) {
2184			g_mirror_sync_start(disk);
2185			g_mirror_update_metadata(disk);
2186		}
2187		break;
2188	case G_MIRROR_DISK_STATE_DISCONNECTED:
2189		/*
2190		 * Possible scenarios:
2191		 * 1. Device wasn't running yet, but disk disappear.
2192		 * 2. Disk was active and disapppear.
2193		 * 3. Disk disappear during synchronization process.
2194		 */
2195		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
2196			/*
2197			 * Previous state should be ACTIVE, STALE or
2198			 * SYNCHRONIZING.
2199			 */
2200			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
2201			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
2202			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2203			    ("Wrong disk state (%s, %s).",
2204			    g_mirror_get_diskname(disk),
2205			    g_mirror_disk_state2str(disk->d_state)));
2206		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
2207			/* Previous state should be NEW. */
2208			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2209			    ("Wrong disk state (%s, %s).",
2210			    g_mirror_get_diskname(disk),
2211			    g_mirror_disk_state2str(disk->d_state)));
2212			/*
2213			 * Reset bumping syncid if disk disappeared in STARTING
2214			 * state.
2215			 */
2216			if (sc->sc_bump_syncid == G_MIRROR_BUMP_ON_FIRST_WRITE)
2217				sc->sc_bump_syncid = 0;
2218#ifdef	INVARIANTS
2219		} else {
2220			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2221			    sc->sc_name,
2222			    g_mirror_device_state2str(sc->sc_state),
2223			    g_mirror_get_diskname(disk),
2224			    g_mirror_disk_state2str(disk->d_state)));
2225#endif
2226		}
2227		DISK_STATE_CHANGED();
2228		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
2229		    sc->sc_name, g_mirror_get_diskname(disk));
2230
2231		g_mirror_destroy_disk(disk);
2232		break;
2233	case G_MIRROR_DISK_STATE_DESTROY:
2234	    {
2235		int error;
2236
2237		error = g_mirror_clear_metadata(disk);
2238		if (error != 0)
2239			return (error);
2240		DISK_STATE_CHANGED();
2241		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
2242		    sc->sc_name, g_mirror_get_diskname(disk));
2243
2244		g_mirror_destroy_disk(disk);
2245		sc->sc_ndisks--;
2246		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2247			g_mirror_update_metadata(disk);
2248		}
2249		break;
2250	    }
2251	default:
2252		KASSERT(1 == 0, ("Unknown state (%u).", state));
2253		break;
2254	}
2255	return (0);
2256}
2257#undef	DISK_STATE_CHANGED
2258
2259static int
2260g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
2261{
2262	struct g_provider *pp;
2263	u_char *buf;
2264	int error;
2265
2266	g_topology_assert();
2267
2268	error = g_access(cp, 1, 0, 0);
2269	if (error != 0)
2270		return (error);
2271	pp = cp->provider;
2272	g_topology_unlock();
2273	/* Metadata are stored on last sector. */
2274	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2275	    &error);
2276	g_topology_lock();
2277	g_access(cp, -1, 0, 0);
2278	if (error != 0) {
2279		if (buf != NULL)
2280			g_free(buf);
2281		return (error);
2282	}
2283
2284	/* Decode metadata. */
2285	error = mirror_metadata_decode(buf, md);
2286	g_free(buf);
2287	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
2288		return (EINVAL);
2289	if (error != 0) {
2290		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2291		    cp->provider->name);
2292		return (error);
2293	}
2294
2295	return (0);
2296}
2297
2298static int
2299g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
2300    struct g_mirror_metadata *md)
2301{
2302
2303	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
2304		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
2305		    pp->name, md->md_did);
2306		return (EEXIST);
2307	}
2308	if (md->md_all != sc->sc_ndisks) {
2309		G_MIRROR_DEBUG(1,
2310		    "Invalid '%s' field on disk %s (device %s), skipping.",
2311		    "md_all", pp->name, sc->sc_name);
2312		return (EINVAL);
2313	}
2314	if (md->md_slice != sc->sc_slice) {
2315		G_MIRROR_DEBUG(1,
2316		    "Invalid '%s' field on disk %s (device %s), skipping.",
2317		    "md_slice", pp->name, sc->sc_name);
2318		return (EINVAL);
2319	}
2320	if (md->md_balance != sc->sc_balance) {
2321		G_MIRROR_DEBUG(1,
2322		    "Invalid '%s' field on disk %s (device %s), skipping.",
2323		    "md_balance", pp->name, sc->sc_name);
2324		return (EINVAL);
2325	}
2326	if (md->md_mediasize != sc->sc_mediasize) {
2327		G_MIRROR_DEBUG(1,
2328		    "Invalid '%s' field on disk %s (device %s), skipping.",
2329		    "md_mediasize", pp->name, sc->sc_name);
2330		return (EINVAL);
2331	}
2332	if (sc->sc_mediasize > pp->mediasize) {
2333		G_MIRROR_DEBUG(1,
2334		    "Invalid size of disk %s (device %s), skipping.", pp->name,
2335		    sc->sc_name);
2336		return (EINVAL);
2337	}
2338	if (md->md_sectorsize != sc->sc_sectorsize) {
2339		G_MIRROR_DEBUG(1,
2340		    "Invalid '%s' field on disk %s (device %s), skipping.",
2341		    "md_sectorsize", pp->name, sc->sc_name);
2342		return (EINVAL);
2343	}
2344	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2345		G_MIRROR_DEBUG(1,
2346		    "Invalid sector size of disk %s (device %s), skipping.",
2347		    pp->name, sc->sc_name);
2348		return (EINVAL);
2349	}
2350	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
2351		G_MIRROR_DEBUG(1,
2352		    "Invalid device flags on disk %s (device %s), skipping.",
2353		    pp->name, sc->sc_name);
2354		return (EINVAL);
2355	}
2356	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
2357		G_MIRROR_DEBUG(1,
2358		    "Invalid disk flags on disk %s (device %s), skipping.",
2359		    pp->name, sc->sc_name);
2360		return (EINVAL);
2361	}
2362	return (0);
2363}
2364
2365static int
2366g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
2367    struct g_mirror_metadata *md)
2368{
2369	struct g_mirror_disk *disk;
2370	int error;
2371
2372	g_topology_assert();
2373	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
2374
2375	error = g_mirror_check_metadata(sc, pp, md);
2376	if (error != 0)
2377		return (error);
2378	disk = g_mirror_init_disk(sc, pp, md, &error);
2379	if (disk == NULL)
2380		return (error);
2381	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
2382	    G_MIRROR_EVENT_WAIT);
2383	return (error);
2384}
2385
2386static int
2387g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
2388{
2389	struct g_mirror_softc *sc;
2390	struct g_mirror_disk *disk;
2391	int dcr, dcw, dce;
2392
2393	g_topology_assert();
2394	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
2395	    acw, ace);
2396
2397	dcr = pp->acr + acr;
2398	dcw = pp->acw + acw;
2399	dce = pp->ace + ace;
2400
2401	sc = pp->geom->softc;
2402	if (sc == NULL || LIST_EMPTY(&sc->sc_disks) ||
2403	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
2404		if (acr <= 0 && acw <= 0 && ace <= 0)
2405			return (0);
2406		else
2407			return (ENXIO);
2408	}
2409	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2410		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
2411			continue;
2412		/*
2413		 * Mark disk as dirty on open and unmark on close.
2414		 */
2415		if (pp->acw == 0 && dcw > 0) {
2416			G_MIRROR_DEBUG(1,
2417			    "Disk %s (device %s) marked as dirty.",
2418			    g_mirror_get_diskname(disk), sc->sc_name);
2419			disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2420			g_mirror_update_metadata(disk);
2421		} else if (pp->acw > 0 && dcw == 0) {
2422			G_MIRROR_DEBUG(1,
2423			    "Disk %s (device %s) marked as clean.",
2424			    g_mirror_get_diskname(disk), sc->sc_name);
2425			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2426			g_mirror_update_metadata(disk);
2427		}
2428	}
2429	return (0);
2430}
2431
2432static struct g_geom *
2433g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md)
2434{
2435	struct g_mirror_softc *sc;
2436	struct g_geom *gp;
2437	int error, timeout;
2438
2439	g_topology_assert();
2440	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
2441	    md->md_mid);
2442
2443	/* One disk is minimum. */
2444	if (md->md_all < 1)
2445		return (NULL);
2446	/*
2447	 * Action geom.
2448	 */
2449	gp = g_new_geomf(mp, "%s", md->md_name);
2450	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
2451	gp->start = g_mirror_start;
2452	gp->spoiled = g_mirror_spoiled;
2453	gp->orphan = g_mirror_orphan;
2454	gp->access = g_mirror_access;
2455	gp->dumpconf = g_mirror_dumpconf;
2456
2457	sc->sc_id = md->md_mid;
2458	sc->sc_slice = md->md_slice;
2459	sc->sc_balance = md->md_balance;
2460	sc->sc_mediasize = md->md_mediasize;
2461	sc->sc_sectorsize = md->md_sectorsize;
2462	sc->sc_ndisks = md->md_all;
2463	sc->sc_flags = md->md_mflags;
2464	sc->sc_bump_syncid = 0;
2465	sc->sc_idle = 0;
2466	bioq_init(&sc->sc_queue);
2467	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
2468	LIST_INIT(&sc->sc_disks);
2469	TAILQ_INIT(&sc->sc_events);
2470	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
2471	callout_init(&sc->sc_callout, CALLOUT_MPSAFE);
2472	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
2473	gp->softc = sc;
2474	sc->sc_geom = gp;
2475	sc->sc_provider = NULL;
2476	/*
2477	 * Synchronization geom.
2478	 */
2479	gp = g_new_geomf(mp, "%s.sync", md->md_name);
2480	gp->softc = sc;
2481	gp->orphan = g_mirror_orphan;
2482	sc->sc_sync.ds_geom = gp;
2483	sc->sc_sync.ds_ndisks = 0;
2484	error = kthread_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
2485	    "g_mirror %s", md->md_name);
2486	if (error != 0) {
2487		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
2488		    sc->sc_name);
2489		g_destroy_geom(sc->sc_sync.ds_geom);
2490		mtx_destroy(&sc->sc_events_mtx);
2491		mtx_destroy(&sc->sc_queue_mtx);
2492		g_destroy_geom(sc->sc_geom);
2493		free(sc, M_MIRROR);
2494		return (NULL);
2495	}
2496
2497	G_MIRROR_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id);
2498
2499	/*
2500	 * Run timeout.
2501	 */
2502	timeout = g_mirror_timeout * hz;
2503	callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc);
2504	return (sc->sc_geom);
2505}
2506
2507int
2508g_mirror_destroy(struct g_mirror_softc *sc, boolean_t force)
2509{
2510	struct g_provider *pp;
2511
2512	g_topology_assert();
2513
2514	if (sc == NULL)
2515		return (ENXIO);
2516	pp = sc->sc_provider;
2517	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
2518		if (force) {
2519			G_MIRROR_DEBUG(0, "Device %s is still open, so it "
2520			    "can't be definitely removed.", pp->name);
2521		} else {
2522			G_MIRROR_DEBUG(1,
2523			    "Device %s is still open (r%dw%de%d).", pp->name,
2524			    pp->acr, pp->acw, pp->ace);
2525			return (EBUSY);
2526		}
2527	}
2528
2529	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2530	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_WAIT;
2531	g_topology_unlock();
2532	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
2533	mtx_lock(&sc->sc_queue_mtx);
2534	wakeup(sc);
2535	mtx_unlock(&sc->sc_queue_mtx);
2536	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
2537	while (sc->sc_worker != NULL)
2538		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
2539	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
2540	g_topology_lock();
2541	g_mirror_destroy_device(sc);
2542	free(sc, M_MIRROR);
2543	return (0);
2544}
2545
2546static void
2547g_mirror_taste_orphan(struct g_consumer *cp)
2548{
2549
2550	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
2551	    cp->provider->name));
2552}
2553
2554static struct g_geom *
2555g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
2556{
2557	struct g_mirror_metadata md;
2558	struct g_mirror_softc *sc;
2559	struct g_consumer *cp;
2560	struct g_geom *gp;
2561	int error;
2562
2563	g_topology_assert();
2564	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
2565	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
2566
2567	gp = g_new_geomf(mp, "mirror:taste");
2568	/*
2569	 * This orphan function should be never called.
2570	 */
2571	gp->orphan = g_mirror_taste_orphan;
2572	cp = g_new_consumer(gp);
2573	g_attach(cp, pp);
2574	error = g_mirror_read_metadata(cp, &md);
2575	g_detach(cp);
2576	g_destroy_consumer(cp);
2577	g_destroy_geom(gp);
2578	if (error != 0)
2579		return (NULL);
2580	gp = NULL;
2581
2582	if (md.md_version > G_MIRROR_VERSION) {
2583		printf("geom_mirror.ko module is too old to handle %s.\n",
2584		    pp->name);
2585		return (NULL);
2586	}
2587	if (md.md_provider[0] != '\0' && strcmp(md.md_provider, pp->name) != 0)
2588		return (NULL);
2589	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
2590		G_MIRROR_DEBUG(0,
2591		    "Device %s: provider %s marked as inactive, skipping.",
2592		    md.md_name, pp->name);
2593		return (NULL);
2594	}
2595	if (g_mirror_debug >= 2)
2596		mirror_metadata_dump(&md);
2597
2598	/*
2599	 * Let's check if device already exists.
2600	 */
2601	sc = NULL;
2602	LIST_FOREACH(gp, &mp->geom, geom) {
2603		sc = gp->softc;
2604		if (sc == NULL)
2605			continue;
2606		if (sc->sc_sync.ds_geom == gp)
2607			continue;
2608		if (strcmp(md.md_name, sc->sc_name) != 0)
2609			continue;
2610		if (md.md_mid != sc->sc_id) {
2611			G_MIRROR_DEBUG(0, "Device %s already configured.",
2612			    sc->sc_name);
2613			return (NULL);
2614		}
2615		break;
2616	}
2617	if (gp == NULL) {
2618		gp = g_mirror_create(mp, &md);
2619		if (gp == NULL) {
2620			G_MIRROR_DEBUG(0, "Cannot create device %s.",
2621			    md.md_name);
2622			return (NULL);
2623		}
2624		sc = gp->softc;
2625	}
2626	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
2627	error = g_mirror_add_disk(sc, pp, &md);
2628	if (error != 0) {
2629		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
2630		    pp->name, gp->name, error);
2631		if (LIST_EMPTY(&sc->sc_disks))
2632			g_mirror_destroy(sc, 1);
2633		return (NULL);
2634	}
2635	return (gp);
2636}
2637
2638static int
2639g_mirror_destroy_geom(struct gctl_req *req __unused,
2640    struct g_class *mp __unused, struct g_geom *gp)
2641{
2642
2643	return (g_mirror_destroy(gp->softc, 0));
2644}
2645
2646static void
2647g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
2648    struct g_consumer *cp, struct g_provider *pp)
2649{
2650	struct g_mirror_softc *sc;
2651
2652	g_topology_assert();
2653
2654	sc = gp->softc;
2655	if (sc == NULL)
2656		return;
2657	/* Skip synchronization geom. */
2658	if (gp == sc->sc_sync.ds_geom)
2659		return;
2660	if (pp != NULL) {
2661		/* Nothing here. */
2662	} else if (cp != NULL) {
2663		struct g_mirror_disk *disk;
2664
2665		disk = cp->private;
2666		if (disk == NULL)
2667			return;
2668		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
2669		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2670			sbuf_printf(sb, "%s<Synchronized>", indent);
2671			if (disk->d_sync.ds_offset_done == 0)
2672				sbuf_printf(sb, "0%%");
2673			else {
2674				sbuf_printf(sb, "%u%%",
2675				    (u_int)((disk->d_sync.ds_offset_done * 100) /
2676				    sc->sc_provider->mediasize));
2677			}
2678			sbuf_printf(sb, "</Synchronized>\n");
2679		}
2680		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
2681		    disk->d_sync.ds_syncid);
2682		sbuf_printf(sb, "%s<Flags>", indent);
2683		if (disk->d_flags == 0)
2684			sbuf_printf(sb, "NONE");
2685		else {
2686			int first = 1;
2687
2688#define	ADD_FLAG(flag, name)	do {					\
2689	if ((disk->d_flags & (flag)) != 0) {				\
2690		if (!first)						\
2691			sbuf_printf(sb, ", ");				\
2692		else							\
2693			first = 0;					\
2694		sbuf_printf(sb, name);					\
2695	}								\
2696} while (0)
2697			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
2698			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
2699			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
2700			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
2701			    "SYNCHRONIZING");
2702			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
2703#undef	ADD_FLAG
2704		}
2705		sbuf_printf(sb, "</Flags>\n");
2706		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
2707		    disk->d_priority);
2708		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
2709		    g_mirror_disk_state2str(disk->d_state));
2710	} else {
2711		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
2712		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
2713		sbuf_printf(sb, "%s<Flags>", indent);
2714		if (sc->sc_flags == 0)
2715			sbuf_printf(sb, "NONE");
2716		else {
2717			int first = 1;
2718
2719#define	ADD_FLAG(flag, name)	do {					\
2720	if ((sc->sc_flags & (flag)) != 0) {				\
2721		if (!first)						\
2722			sbuf_printf(sb, ", ");				\
2723		else							\
2724			first = 0;					\
2725		sbuf_printf(sb, name);					\
2726	}								\
2727} while (0)
2728			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
2729#undef	ADD_FLAG
2730		}
2731		sbuf_printf(sb, "</Flags>\n");
2732		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
2733		    (u_int)sc->sc_slice);
2734		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
2735		    balance_name(sc->sc_balance));
2736		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
2737		    sc->sc_ndisks);
2738		sbuf_printf(sb, "%s<State>", indent);
2739		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2740			sbuf_printf(sb, "%s", "STARTING");
2741		else if (sc->sc_ndisks ==
2742		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
2743			sbuf_printf(sb, "%s", "COMPLETE");
2744		else
2745			sbuf_printf(sb, "%s", "DEGRADED");
2746		sbuf_printf(sb, "</State>\n");
2747	}
2748}
2749
2750static void
2751g_mirror_shutdown(void *arg, int howto)
2752{
2753	struct g_class *mp;
2754	struct g_geom *gp, *gp2;
2755
2756	mp = arg;
2757	DROP_GIANT();
2758	g_topology_lock();
2759	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
2760		if (gp->softc == NULL)
2761			continue;
2762		g_mirror_destroy(gp->softc, 1);
2763	}
2764	g_topology_unlock();
2765	PICKUP_GIANT();
2766#if 0
2767	tsleep(&gp, PRIBIO, "m:shutdown", hz * 20);
2768#endif
2769}
2770
2771static void
2772g_mirror_init(struct g_class *mp)
2773{
2774
2775	g_mirror_ehtag = EVENTHANDLER_REGISTER(shutdown_post_sync,
2776	    g_mirror_shutdown, mp, SHUTDOWN_PRI_FIRST);
2777	if (g_mirror_ehtag == NULL)
2778		G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event.");
2779}
2780
2781static void
2782g_mirror_fini(struct g_class *mp)
2783{
2784
2785	if (g_mirror_ehtag == NULL)
2786		return;
2787	EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_ehtag);
2788}
2789
2790static int
2791g_mirror_can_go(void)
2792{
2793	struct g_mirror_softc *sc;
2794	struct g_geom *gp;
2795	struct g_provider *pp;
2796	int can_go;
2797
2798	DROP_GIANT();
2799	can_go = 1;
2800	g_topology_lock();
2801	LIST_FOREACH(gp, &g_mirror_class.geom, geom) {
2802		sc = gp->softc;
2803		if (sc == NULL) {
2804			can_go = 0;
2805			break;
2806		}
2807		pp = sc->sc_provider;
2808		if (pp == NULL || pp->error != 0) {
2809			can_go = 0;
2810			break;
2811		}
2812	}
2813	g_topology_unlock();
2814	PICKUP_GIANT();
2815	return (can_go);
2816}
2817
2818static void
2819g_mirror_rootwait(void)
2820{
2821
2822	/*
2823	 * HACK: Wait for GEOM, because g_mirror_rootwait() can be called,
2824	 * HACK: before we get providers for tasting.
2825	 */
2826	tsleep(&g_mirror_class, PRIBIO, "mroot", hz * 3);
2827	/*
2828	 * Wait for mirrors in degraded state.
2829	 */
2830	for (;;) {
2831		if (g_mirror_can_go())
2832			break;
2833		tsleep(&g_mirror_class, PRIBIO, "mroot", hz);
2834	}
2835}
2836
2837SYSINIT(g_mirror_root, SI_SUB_RAID, SI_ORDER_FIRST, g_mirror_rootwait, NULL)
2838
2839DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
2840