g_mirror.c revision 137487
1/*-
2 * Copyright (c) 2004 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/geom/mirror/g_mirror.c 137487 2004-11-09 23:15:40Z pjd $");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/kernel.h>
33#include <sys/module.h>
34#include <sys/limits.h>
35#include <sys/lock.h>
36#include <sys/mutex.h>
37#include <sys/bio.h>
38#include <sys/sysctl.h>
39#include <sys/malloc.h>
40#include <sys/eventhandler.h>
41#include <vm/uma.h>
42#include <geom/geom.h>
43#include <sys/proc.h>
44#include <sys/kthread.h>
45#include <geom/mirror/g_mirror.h>
46
47
48static MALLOC_DEFINE(M_MIRROR, "mirror data", "GEOM_MIRROR Data");
49
50SYSCTL_DECL(_kern_geom);
51SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0, "GEOM_MIRROR stuff");
52u_int g_mirror_debug = 0;
53TUNABLE_INT("kern.geom.mirror.debug", &g_mirror_debug);
54SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RW, &g_mirror_debug, 0,
55    "Debug level");
56static u_int g_mirror_timeout = 4;
57TUNABLE_INT("kern.geom.mirror.timeout", &g_mirror_timeout);
58SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RW, &g_mirror_timeout,
59    0, "Time to wait on all mirror components");
60static u_int g_mirror_idletime = 5;
61TUNABLE_INT("kern.geom.mirror.idletime", &g_mirror_idletime);
62SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RW,
63    &g_mirror_idletime, 0, "Mark components as clean when idling");
64static u_int g_mirror_reqs_per_sync = 5;
65SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, reqs_per_sync, CTLFLAG_RW,
66    &g_mirror_reqs_per_sync, 0,
67    "Number of regular I/O requests per synchronization request");
68static u_int g_mirror_syncs_per_sec = 100;
69SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, syncs_per_sec, CTLFLAG_RW,
70    &g_mirror_syncs_per_sec, 0,
71    "Number of synchronizations requests per second");
72
73#define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
74	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
75	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
76	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
77} while (0)
78
79static eventhandler_tag g_mirror_ehtag = NULL;
80
81static int g_mirror_destroy_geom(struct gctl_req *req, struct g_class *mp,
82    struct g_geom *gp);
83static g_taste_t g_mirror_taste;
84static void g_mirror_init(struct g_class *mp);
85static void g_mirror_fini(struct g_class *mp);
86
87struct g_class g_mirror_class = {
88	.name = G_MIRROR_CLASS_NAME,
89	.version = G_VERSION,
90	.ctlreq = g_mirror_config,
91	.taste = g_mirror_taste,
92	.destroy_geom = g_mirror_destroy_geom,
93	.init = g_mirror_init,
94	.fini = g_mirror_fini
95};
96
97
98static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
99static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
100static void g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force);
101static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
102    struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
103static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
104
105
106static const char *
107g_mirror_disk_state2str(int state)
108{
109
110	switch (state) {
111	case G_MIRROR_DISK_STATE_NONE:
112		return ("NONE");
113	case G_MIRROR_DISK_STATE_NEW:
114		return ("NEW");
115	case G_MIRROR_DISK_STATE_ACTIVE:
116		return ("ACTIVE");
117	case G_MIRROR_DISK_STATE_STALE:
118		return ("STALE");
119	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
120		return ("SYNCHRONIZING");
121	case G_MIRROR_DISK_STATE_DISCONNECTED:
122		return ("DISCONNECTED");
123	case G_MIRROR_DISK_STATE_DESTROY:
124		return ("DESTROY");
125	default:
126		return ("INVALID");
127	}
128}
129
130static const char *
131g_mirror_device_state2str(int state)
132{
133
134	switch (state) {
135	case G_MIRROR_DEVICE_STATE_STARTING:
136		return ("STARTING");
137	case G_MIRROR_DEVICE_STATE_RUNNING:
138		return ("RUNNING");
139	default:
140		return ("INVALID");
141	}
142}
143
144static const char *
145g_mirror_get_diskname(struct g_mirror_disk *disk)
146{
147
148	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
149		return ("[unknown]");
150	return (disk->d_name);
151}
152
153/*
154 * --- Events handling functions ---
155 * Events in geom_mirror are used to maintain disks and device status
156 * from one thread to simplify locking.
157 */
158static void
159g_mirror_event_free(struct g_mirror_event *ep)
160{
161
162	free(ep, M_MIRROR);
163}
164
165int
166g_mirror_event_send(void *arg, int state, int flags)
167{
168	struct g_mirror_softc *sc;
169	struct g_mirror_disk *disk;
170	struct g_mirror_event *ep;
171	int error;
172
173	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
174	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
175	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
176		disk = NULL;
177		sc = arg;
178	} else {
179		disk = arg;
180		sc = disk->d_softc;
181	}
182	ep->e_disk = disk;
183	ep->e_state = state;
184	ep->e_flags = flags;
185	ep->e_error = 0;
186	mtx_lock(&sc->sc_events_mtx);
187	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
188	mtx_unlock(&sc->sc_events_mtx);
189	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
190	mtx_lock(&sc->sc_queue_mtx);
191	wakeup(sc);
192	mtx_unlock(&sc->sc_queue_mtx);
193	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
194		return (0);
195	g_topology_assert();
196	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
197	g_topology_unlock();
198	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
199		mtx_lock(&sc->sc_events_mtx);
200		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
201		    hz * 5);
202	}
203	/* Don't even try to use 'sc' here, because it could be already dead. */
204	g_topology_lock();
205	error = ep->e_error;
206	g_mirror_event_free(ep);
207	return (error);
208}
209
210static struct g_mirror_event *
211g_mirror_event_get(struct g_mirror_softc *sc)
212{
213	struct g_mirror_event *ep;
214
215	mtx_lock(&sc->sc_events_mtx);
216	ep = TAILQ_FIRST(&sc->sc_events);
217	if (ep != NULL)
218		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
219	mtx_unlock(&sc->sc_events_mtx);
220	return (ep);
221}
222
223static void
224g_mirror_event_cancel(struct g_mirror_disk *disk)
225{
226	struct g_mirror_softc *sc;
227	struct g_mirror_event *ep, *tmpep;
228
229	g_topology_assert();
230
231	sc = disk->d_softc;
232	mtx_lock(&sc->sc_events_mtx);
233	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
234		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
235			continue;
236		if (ep->e_disk != disk)
237			continue;
238		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
239		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
240			g_mirror_event_free(ep);
241		else {
242			ep->e_error = ECANCELED;
243			wakeup(ep);
244		}
245	}
246	mtx_unlock(&sc->sc_events_mtx);
247}
248
249/*
250 * Return the number of disks in given state.
251 * If state is equal to -1, count all connected disks.
252 */
253u_int
254g_mirror_ndisks(struct g_mirror_softc *sc, int state)
255{
256	struct g_mirror_disk *disk;
257	u_int n = 0;
258
259	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
260		if (state == -1 || disk->d_state == state)
261			n++;
262	}
263	return (n);
264}
265
266/*
267 * Find a disk in mirror by its disk ID.
268 */
269static struct g_mirror_disk *
270g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
271{
272	struct g_mirror_disk *disk;
273
274	g_topology_assert();
275
276	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
277		if (disk->d_id == id)
278			return (disk);
279	}
280	return (NULL);
281}
282
283static u_int
284g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
285{
286	struct bio *bp;
287	u_int nreqs = 0;
288
289	mtx_lock(&sc->sc_queue_mtx);
290	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
291		if (bp->bio_from == cp)
292			nreqs++;
293	}
294	mtx_unlock(&sc->sc_queue_mtx);
295	return (nreqs);
296}
297
298static int
299g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
300{
301
302	if (cp->index > 0) {
303		G_MIRROR_DEBUG(2,
304		    "I/O requests for %s exist, can't destroy it now.",
305		    cp->provider->name);
306		return (1);
307	}
308	if (g_mirror_nrequests(sc, cp) > 0) {
309		G_MIRROR_DEBUG(2,
310		    "I/O requests for %s in queue, can't destroy it now.",
311		    cp->provider->name);
312		return (1);
313	}
314	return (0);
315}
316
317static void
318g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
319{
320
321	g_topology_assert();
322
323	cp->private = NULL;
324	if (g_mirror_is_busy(sc, cp))
325		return;
326	G_MIRROR_DEBUG(2, "Consumer %s destroyed.", cp->provider->name);
327	g_detach(cp);
328	g_destroy_consumer(cp);
329}
330
331static int
332g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
333{
334	int error;
335
336	g_topology_assert();
337	KASSERT(disk->d_consumer == NULL,
338	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
339
340	disk->d_consumer = g_new_consumer(disk->d_softc->sc_geom);
341	disk->d_consumer->private = disk;
342	disk->d_consumer->index = 0;
343	error = g_attach(disk->d_consumer, pp);
344	if (error != 0)
345		return (error);
346	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
347	return (0);
348}
349
350static void
351g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
352{
353
354	g_topology_assert();
355
356	if (cp == NULL)
357		return;
358	if (cp->provider != NULL) {
359		G_MIRROR_DEBUG(2, "Disk %s disconnected.", cp->provider->name);
360		if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) {
361			G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
362			    cp->provider->name, -cp->acr, -cp->acw, -cp->ace,
363			    0);
364			g_access(cp, -cp->acr, -cp->acw, -cp->ace);
365		}
366		g_mirror_kill_consumer(sc, cp);
367	} else {
368		g_destroy_consumer(cp);
369	}
370}
371
372/*
373 * Initialize disk. This means allocate memory, create consumer, attach it
374 * to the provider and open access (r1w1e1) to it.
375 */
376static struct g_mirror_disk *
377g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
378    struct g_mirror_metadata *md, int *errorp)
379{
380	struct g_mirror_disk *disk;
381	int error;
382
383	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
384	if (disk == NULL) {
385		error = ENOMEM;
386		goto fail;
387	}
388	disk->d_softc = sc;
389	error = g_mirror_connect_disk(disk, pp);
390	if (error != 0)
391		goto fail;
392	disk->d_id = md->md_did;
393	disk->d_state = G_MIRROR_DISK_STATE_NONE;
394	disk->d_priority = md->md_priority;
395	disk->d_delay.sec = 0;
396	disk->d_delay.frac = 0;
397	binuptime(&disk->d_last_used);
398	disk->d_flags = md->md_dflags;
399	if (md->md_provider[0] != '\0')
400		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
401	disk->d_sync.ds_consumer = NULL;
402	disk->d_sync.ds_offset = md->md_sync_offset;
403	disk->d_sync.ds_offset_done = md->md_sync_offset;
404	disk->d_sync.ds_resync = -1;
405	disk->d_sync.ds_syncid = md->md_syncid;
406	if (errorp != NULL)
407		*errorp = 0;
408	return (disk);
409fail:
410	if (errorp != NULL)
411		*errorp = error;
412	if (disk != NULL) {
413		g_mirror_disconnect_consumer(sc, disk->d_consumer);
414		free(disk, M_MIRROR);
415	}
416	return (NULL);
417}
418
419static void
420g_mirror_destroy_disk(struct g_mirror_disk *disk)
421{
422	struct g_mirror_softc *sc;
423
424	g_topology_assert();
425
426	LIST_REMOVE(disk, d_next);
427	g_mirror_event_cancel(disk);
428	sc = disk->d_softc;
429	if (sc->sc_hint == disk)
430		sc->sc_hint = NULL;
431	switch (disk->d_state) {
432	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
433		g_mirror_sync_stop(disk, 1);
434		/* FALLTHROUGH */
435	case G_MIRROR_DISK_STATE_NEW:
436	case G_MIRROR_DISK_STATE_STALE:
437	case G_MIRROR_DISK_STATE_ACTIVE:
438		g_mirror_disconnect_consumer(sc, disk->d_consumer);
439		free(disk, M_MIRROR);
440		break;
441	default:
442		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
443		    g_mirror_get_diskname(disk),
444		    g_mirror_disk_state2str(disk->d_state)));
445	}
446}
447
448static void
449g_mirror_destroy_device(struct g_mirror_softc *sc)
450{
451	struct g_mirror_disk *disk;
452	struct g_mirror_event *ep;
453	struct g_geom *gp;
454	struct g_consumer *cp, *tmpcp;
455
456	g_topology_assert();
457
458	gp = sc->sc_geom;
459	if (sc->sc_provider != NULL)
460		g_mirror_destroy_provider(sc);
461	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
462	    disk = LIST_FIRST(&sc->sc_disks)) {
463		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
464		g_mirror_update_metadata(disk);
465		g_mirror_destroy_disk(disk);
466	}
467	while ((ep = g_mirror_event_get(sc)) != NULL) {
468		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
469			g_mirror_event_free(ep);
470		else {
471			ep->e_error = ECANCELED;
472			ep->e_flags |= G_MIRROR_EVENT_DONE;
473			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
474			mtx_lock(&sc->sc_events_mtx);
475			wakeup(ep);
476			mtx_unlock(&sc->sc_events_mtx);
477		}
478	}
479	callout_drain(&sc->sc_callout);
480	gp->softc = NULL;
481
482	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
483		g_mirror_disconnect_consumer(sc, cp);
484	}
485	sc->sc_sync.ds_geom->softc = NULL;
486	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
487	mtx_destroy(&sc->sc_queue_mtx);
488	mtx_destroy(&sc->sc_events_mtx);
489	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
490	g_wither_geom(gp, ENXIO);
491}
492
493static void
494g_mirror_orphan(struct g_consumer *cp)
495{
496	struct g_mirror_disk *disk;
497
498	g_topology_assert();
499
500	disk = cp->private;
501	if (disk == NULL)
502		return;
503	disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
504	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
505	    G_MIRROR_EVENT_DONTWAIT);
506}
507
508static void
509g_mirror_spoiled(struct g_consumer *cp)
510{
511	struct g_mirror_disk *disk;
512
513	g_topology_assert();
514
515	disk = cp->private;
516	if (disk == NULL)
517		return;
518	disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
519	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
520	    G_MIRROR_EVENT_DONTWAIT);
521}
522
523/*
524 * Function should return the next active disk on the list.
525 * It is possible that it will be the same disk as given.
526 * If there are no active disks on list, NULL is returned.
527 */
528static __inline struct g_mirror_disk *
529g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
530{
531	struct g_mirror_disk *dp;
532
533	for (dp = LIST_NEXT(disk, d_next); dp != disk;
534	    dp = LIST_NEXT(dp, d_next)) {
535		if (dp == NULL)
536			dp = LIST_FIRST(&sc->sc_disks);
537		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
538			break;
539	}
540	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
541		return (NULL);
542	return (dp);
543}
544
545static struct g_mirror_disk *
546g_mirror_get_disk(struct g_mirror_softc *sc)
547{
548	struct g_mirror_disk *disk;
549
550	if (sc->sc_hint == NULL) {
551		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
552		if (sc->sc_hint == NULL)
553			return (NULL);
554	}
555	disk = sc->sc_hint;
556	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
557		disk = g_mirror_find_next(sc, disk);
558		if (disk == NULL)
559			return (NULL);
560	}
561	sc->sc_hint = g_mirror_find_next(sc, disk);
562	return (disk);
563}
564
565static int
566g_mirror_write_metadata(struct g_mirror_disk *disk,
567    struct g_mirror_metadata *md)
568{
569	struct g_mirror_softc *sc;
570	struct g_consumer *cp;
571	off_t offset, length;
572	u_char *sector;
573	int close = 0, error = 0;
574
575	g_topology_assert();
576
577	sc = disk->d_softc;
578	cp = disk->d_consumer;
579	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
580	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
581	length = cp->provider->sectorsize;
582	offset = cp->provider->mediasize - length;
583	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
584	/*
585	 * Open consumer if it wasn't opened and remember to close it.
586	 */
587	if (cp->acw == 0) {
588		error = g_access(cp, 0, 1, 1);
589		G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
590		    cp->provider->name, 0, 1, 1, error);
591		if (error == 0)
592			close = 1;
593#ifdef	INVARIANTS
594	} else {
595		KASSERT(cp->acw > 0 && cp->ace > 0,
596		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
597		    cp->acr, cp->acw, cp->ace));
598#endif
599	}
600	if (error == 0) {
601		if (md != NULL)
602			mirror_metadata_encode(md, sector);
603		g_topology_unlock();
604		error = g_write_data(cp, offset, sector, length);
605		g_topology_lock();
606	}
607	free(sector, M_MIRROR);
608	if (close) {
609		g_access(cp, 0, -1, -1);
610		G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
611		    cp->provider->name, 0, -1, -1, 0);
612	}
613	if (error != 0) {
614		disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
615		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
616		    G_MIRROR_EVENT_DONTWAIT);
617	}
618	return (error);
619}
620
621static int
622g_mirror_clear_metadata(struct g_mirror_disk *disk)
623{
624	int error;
625
626	g_topology_assert();
627	error = g_mirror_write_metadata(disk, NULL);
628	if (error == 0) {
629		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
630		    g_mirror_get_diskname(disk));
631	} else {
632		G_MIRROR_DEBUG(0,
633		    "Cannot clear metadata on disk %s (error=%d).",
634		    g_mirror_get_diskname(disk), error);
635	}
636	return (error);
637}
638
639void
640g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
641    struct g_mirror_metadata *md)
642{
643
644	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
645	md->md_version = G_MIRROR_VERSION;
646	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
647	md->md_mid = sc->sc_id;
648	md->md_all = sc->sc_ndisks;
649	md->md_slice = sc->sc_slice;
650	md->md_balance = sc->sc_balance;
651	md->md_mediasize = sc->sc_mediasize;
652	md->md_sectorsize = sc->sc_sectorsize;
653	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
654	bzero(md->md_provider, sizeof(md->md_provider));
655	if (disk == NULL) {
656		md->md_did = arc4random();
657		md->md_priority = 0;
658		md->md_syncid = 0;
659		md->md_dflags = 0;
660		md->md_sync_offset = 0;
661	} else {
662		md->md_did = disk->d_id;
663		md->md_priority = disk->d_priority;
664		md->md_syncid = disk->d_sync.ds_syncid;
665		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
666		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
667			md->md_sync_offset = disk->d_sync.ds_offset_done;
668		else
669			md->md_sync_offset = 0;
670		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
671			strlcpy(md->md_provider,
672			    disk->d_consumer->provider->name,
673			    sizeof(md->md_provider));
674		}
675	}
676}
677
678void
679g_mirror_update_metadata(struct g_mirror_disk *disk)
680{
681	struct g_mirror_metadata md;
682	int error;
683
684	g_topology_assert();
685	g_mirror_fill_metadata(disk->d_softc, disk, &md);
686	error = g_mirror_write_metadata(disk, &md);
687	if (error == 0) {
688		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
689		    g_mirror_get_diskname(disk));
690	} else {
691		G_MIRROR_DEBUG(0,
692		    "Cannot update metadata on disk %s (error=%d).",
693		    g_mirror_get_diskname(disk), error);
694	}
695}
696
697static void
698g_mirror_bump_syncid(struct g_mirror_softc *sc)
699{
700	struct g_mirror_disk *disk;
701
702	g_topology_assert();
703	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
704	    ("%s called with no active disks (device=%s).", __func__,
705	    sc->sc_name));
706
707	sc->sc_syncid++;
708	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
709	    sc->sc_syncid);
710	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
711		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
712		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
713			disk->d_sync.ds_syncid = sc->sc_syncid;
714			g_mirror_update_metadata(disk);
715		}
716	}
717}
718
719static void
720g_mirror_idle(struct g_mirror_softc *sc)
721{
722	struct g_mirror_disk *disk;
723
724	if (sc->sc_provider == NULL || sc->sc_provider->acw == 0)
725		return;
726	sc->sc_idle = 1;
727	g_topology_lock();
728	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
729		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
730			continue;
731		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
732		    g_mirror_get_diskname(disk), sc->sc_name);
733		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
734		g_mirror_update_metadata(disk);
735	}
736	g_topology_unlock();
737}
738
739static void
740g_mirror_unidle(struct g_mirror_softc *sc)
741{
742	struct g_mirror_disk *disk;
743
744	sc->sc_idle = 0;
745	g_topology_lock();
746	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
747		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
748			continue;
749		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
750		    g_mirror_get_diskname(disk), sc->sc_name);
751		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
752		g_mirror_update_metadata(disk);
753	}
754	g_topology_unlock();
755}
756
757/*
758 * Return 1 if we should check if mirror is idling.
759 */
760static int
761g_mirror_check_idle(struct g_mirror_softc *sc)
762{
763	struct g_mirror_disk *disk;
764
765	if (sc->sc_idle)
766		return (0);
767	if (sc->sc_provider != NULL && sc->sc_provider->acw == 0)
768		return (0);
769	/*
770	 * Check if there are no in-flight requests.
771	 */
772	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
773		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
774			continue;
775		if (disk->d_consumer->index > 0)
776			return (0);
777	}
778	return (1);
779}
780
781static __inline int
782bintime_cmp(struct bintime *bt1, struct bintime *bt2)
783{
784
785	if (bt1->sec < bt2->sec)
786		return (-1);
787	else if (bt1->sec > bt2->sec)
788		return (1);
789	if (bt1->frac < bt2->frac)
790		return (-1);
791	else if (bt1->frac > bt2->frac)
792		return (1);
793	return (0);
794}
795
796static void
797g_mirror_update_delay(struct g_mirror_disk *disk, struct bio *bp)
798{
799
800	if (disk->d_softc->sc_balance != G_MIRROR_BALANCE_LOAD)
801		return;
802	binuptime(&disk->d_delay);
803	bintime_sub(&disk->d_delay, &bp->bio_t0);
804}
805
806static void
807g_mirror_done(struct bio *bp)
808{
809	struct g_mirror_softc *sc;
810
811	sc = bp->bio_from->geom->softc;
812	bp->bio_cflags |= G_MIRROR_BIO_FLAG_REGULAR;
813	mtx_lock(&sc->sc_queue_mtx);
814	bioq_disksort(&sc->sc_queue, bp);
815	wakeup(sc);
816	mtx_unlock(&sc->sc_queue_mtx);
817}
818
819static void
820g_mirror_regular_request(struct bio *bp)
821{
822	struct g_mirror_softc *sc;
823	struct g_mirror_disk *disk;
824	struct bio *pbp;
825
826	g_topology_assert_not();
827
828	bp->bio_from->index--;
829	pbp = bp->bio_parent;
830	sc = pbp->bio_to->geom->softc;
831	disk = bp->bio_from->private;
832	if (disk == NULL) {
833		g_topology_lock();
834		g_mirror_kill_consumer(sc, bp->bio_from);
835		g_topology_unlock();
836	} else {
837		g_mirror_update_delay(disk, bp);
838	}
839
840	pbp->bio_inbed++;
841	KASSERT(pbp->bio_inbed <= pbp->bio_children,
842	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
843	    pbp->bio_children));
844	if (bp->bio_error == 0 && pbp->bio_error == 0) {
845		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
846		g_destroy_bio(bp);
847		if (pbp->bio_children == pbp->bio_inbed) {
848			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
849			pbp->bio_completed = pbp->bio_length;
850			g_io_deliver(pbp, pbp->bio_error);
851		}
852		return;
853	} else if (bp->bio_error != 0) {
854		if (pbp->bio_error == 0)
855			pbp->bio_error = bp->bio_error;
856		G_MIRROR_LOGREQ(0, bp, "Request failed (error=%d).",
857		    bp->bio_error);
858		if (disk != NULL) {
859			sc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
860			g_mirror_event_send(disk,
861			    G_MIRROR_DISK_STATE_DISCONNECTED,
862			    G_MIRROR_EVENT_DONTWAIT);
863		}
864		switch (pbp->bio_cmd) {
865		case BIO_DELETE:
866		case BIO_WRITE:
867			pbp->bio_inbed--;
868			pbp->bio_children--;
869			break;
870		}
871	}
872	g_destroy_bio(bp);
873
874	switch (pbp->bio_cmd) {
875	case BIO_READ:
876		if (pbp->bio_children == pbp->bio_inbed) {
877			pbp->bio_error = 0;
878			mtx_lock(&sc->sc_queue_mtx);
879			bioq_disksort(&sc->sc_queue, pbp);
880			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
881			wakeup(sc);
882			mtx_unlock(&sc->sc_queue_mtx);
883		}
884		break;
885	case BIO_DELETE:
886	case BIO_WRITE:
887		if (pbp->bio_children == 0) {
888			/*
889			 * All requests failed.
890			 */
891		} else if (pbp->bio_inbed < pbp->bio_children) {
892			/* Do nothing. */
893			break;
894		} else if (pbp->bio_children == pbp->bio_inbed) {
895			/* Some requests succeeded. */
896			pbp->bio_error = 0;
897			pbp->bio_completed = pbp->bio_length;
898		}
899		g_io_deliver(pbp, pbp->bio_error);
900		break;
901	default:
902		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
903		break;
904	}
905}
906
907static void
908g_mirror_sync_done(struct bio *bp)
909{
910	struct g_mirror_softc *sc;
911
912	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
913	sc = bp->bio_from->geom->softc;
914	bp->bio_cflags |= G_MIRROR_BIO_FLAG_SYNC;
915	mtx_lock(&sc->sc_queue_mtx);
916	bioq_disksort(&sc->sc_queue, bp);
917	wakeup(sc);
918	mtx_unlock(&sc->sc_queue_mtx);
919}
920
921static void
922g_mirror_start(struct bio *bp)
923{
924	struct g_mirror_softc *sc;
925
926	sc = bp->bio_to->geom->softc;
927	/*
928	 * If sc == NULL or there are no valid disks, provider's error
929	 * should be set and g_mirror_start() should not be called at all.
930	 */
931	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
932	    ("Provider's error should be set (error=%d)(mirror=%s).",
933	    bp->bio_to->error, bp->bio_to->name));
934	G_MIRROR_LOGREQ(3, bp, "Request received.");
935
936	switch (bp->bio_cmd) {
937	case BIO_READ:
938	case BIO_WRITE:
939	case BIO_DELETE:
940		break;
941	case BIO_GETATTR:
942	default:
943		g_io_deliver(bp, EOPNOTSUPP);
944		return;
945	}
946	mtx_lock(&sc->sc_queue_mtx);
947	bioq_disksort(&sc->sc_queue, bp);
948	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
949	wakeup(sc);
950	mtx_unlock(&sc->sc_queue_mtx);
951}
952
953/*
954 * Send one synchronization request.
955 */
956static void
957g_mirror_sync_one(struct g_mirror_disk *disk)
958{
959	struct g_mirror_softc *sc;
960	struct bio *bp;
961
962	sc = disk->d_softc;
963	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
964	    ("Disk %s is not marked for synchronization.",
965	    g_mirror_get_diskname(disk)));
966
967	bp = g_new_bio();
968	if (bp == NULL)
969		return;
970	bp->bio_parent = NULL;
971	bp->bio_cmd = BIO_READ;
972	bp->bio_offset = disk->d_sync.ds_offset;
973	bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
974	bp->bio_cflags = 0;
975	bp->bio_done = g_mirror_sync_done;
976	bp->bio_data = disk->d_sync.ds_data;
977	if (bp->bio_data == NULL) {
978		g_destroy_bio(bp);
979		return;
980	}
981	disk->d_sync.ds_offset += bp->bio_length;
982	bp->bio_to = sc->sc_provider;
983	G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
984	disk->d_sync.ds_consumer->index++;
985	g_io_request(bp, disk->d_sync.ds_consumer);
986}
987
988static void
989g_mirror_sync_request(struct bio *bp)
990{
991	struct g_mirror_softc *sc;
992	struct g_mirror_disk *disk;
993
994	bp->bio_from->index--;
995	sc = bp->bio_from->geom->softc;
996	disk = bp->bio_from->private;
997	if (disk == NULL) {
998		g_topology_lock();
999		g_mirror_kill_consumer(sc, bp->bio_from);
1000		g_topology_unlock();
1001		g_destroy_bio(bp);
1002		return;
1003	}
1004
1005	/*
1006	 * Synchronization request.
1007	 */
1008	switch (bp->bio_cmd) {
1009	case BIO_READ:
1010	    {
1011		struct g_consumer *cp;
1012
1013		if (bp->bio_error != 0) {
1014			G_MIRROR_LOGREQ(0, bp,
1015			    "Synchronization request failed (error=%d).",
1016			    bp->bio_error);
1017			g_destroy_bio(bp);
1018			return;
1019		}
1020		G_MIRROR_LOGREQ(3, bp,
1021		    "Synchronization request half-finished.");
1022		bp->bio_cmd = BIO_WRITE;
1023		bp->bio_cflags = 0;
1024		cp = disk->d_consumer;
1025		KASSERT(cp->acr == 0 && cp->acw == 1 && cp->ace == 1,
1026		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1027		    cp->acr, cp->acw, cp->ace));
1028		cp->index++;
1029		g_io_request(bp, cp);
1030		return;
1031	    }
1032	case BIO_WRITE:
1033	    {
1034		struct g_mirror_disk_sync *sync;
1035
1036		if (bp->bio_error != 0) {
1037			G_MIRROR_LOGREQ(0, bp,
1038			    "Synchronization request failed (error=%d).",
1039			    bp->bio_error);
1040			g_destroy_bio(bp);
1041			sc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
1042			g_mirror_event_send(disk,
1043			    G_MIRROR_DISK_STATE_DISCONNECTED,
1044			    G_MIRROR_EVENT_DONTWAIT);
1045			return;
1046		}
1047		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
1048		sync = &disk->d_sync;
1049		sync->ds_offset_done = bp->bio_offset + bp->bio_length;
1050		g_destroy_bio(bp);
1051		if (sync->ds_resync != -1)
1052			break;
1053		if (sync->ds_offset_done == sc->sc_provider->mediasize) {
1054			/*
1055			 * Disk up-to-date, activate it.
1056			 */
1057			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
1058			    G_MIRROR_EVENT_DONTWAIT);
1059			return;
1060		} else if (sync->ds_offset_done % (MAXPHYS * 100) == 0) {
1061			/*
1062			 * Update offset_done on every 100 blocks.
1063			 * XXX: This should be configurable.
1064			 */
1065			g_topology_lock();
1066			g_mirror_update_metadata(disk);
1067			g_topology_unlock();
1068		}
1069		return;
1070	    }
1071	default:
1072		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1073		    bp->bio_cmd, sc->sc_name));
1074		break;
1075	}
1076}
1077
1078static void
1079g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
1080{
1081	struct g_mirror_disk *disk;
1082	struct g_consumer *cp;
1083	struct bio *cbp;
1084
1085	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1086		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
1087			break;
1088	}
1089	if (disk == NULL) {
1090		if (bp->bio_error == 0)
1091			bp->bio_error = ENXIO;
1092		g_io_deliver(bp, bp->bio_error);
1093		return;
1094	}
1095	cbp = g_clone_bio(bp);
1096	if (cbp == NULL) {
1097		if (bp->bio_error == 0)
1098			bp->bio_error = ENOMEM;
1099		g_io_deliver(bp, bp->bio_error);
1100		return;
1101	}
1102	/*
1103	 * Fill in the component buf structure.
1104	 */
1105	cp = disk->d_consumer;
1106	cbp->bio_done = g_mirror_done;
1107	cbp->bio_to = cp->provider;
1108	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1109	KASSERT(cp->acr > 0 && cp->ace > 0,
1110	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1111	    cp->acw, cp->ace));
1112	cp->index++;
1113	g_io_request(cbp, cp);
1114}
1115
1116static void
1117g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
1118{
1119	struct g_mirror_disk *disk;
1120	struct g_consumer *cp;
1121	struct bio *cbp;
1122
1123	disk = g_mirror_get_disk(sc);
1124	if (disk == NULL) {
1125		if (bp->bio_error == 0)
1126			bp->bio_error = ENXIO;
1127		g_io_deliver(bp, bp->bio_error);
1128		return;
1129	}
1130	cbp = g_clone_bio(bp);
1131	if (cbp == NULL) {
1132		if (bp->bio_error == 0)
1133			bp->bio_error = ENOMEM;
1134		g_io_deliver(bp, bp->bio_error);
1135		return;
1136	}
1137	/*
1138	 * Fill in the component buf structure.
1139	 */
1140	cp = disk->d_consumer;
1141	cbp->bio_done = g_mirror_done;
1142	cbp->bio_to = cp->provider;
1143	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1144	KASSERT(cp->acr > 0 && cp->ace > 0,
1145	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1146	    cp->acw, cp->ace));
1147	cp->index++;
1148	g_io_request(cbp, cp);
1149}
1150
1151static void
1152g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
1153{
1154	struct g_mirror_disk *disk, *dp;
1155	struct g_consumer *cp;
1156	struct bio *cbp;
1157	struct bintime curtime;
1158
1159	binuptime(&curtime);
1160	/*
1161	 * Find a disk which the smallest load.
1162	 */
1163	disk = NULL;
1164	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1165		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1166			continue;
1167		/* If disk wasn't used for more than 2 sec, use it. */
1168		if (curtime.sec - dp->d_last_used.sec >= 2) {
1169			disk = dp;
1170			break;
1171		}
1172		if (disk == NULL ||
1173		    bintime_cmp(&dp->d_delay, &disk->d_delay) < 0) {
1174			disk = dp;
1175		}
1176	}
1177	cbp = g_clone_bio(bp);
1178	if (cbp == NULL) {
1179		if (bp->bio_error == 0)
1180			bp->bio_error = ENOMEM;
1181		g_io_deliver(bp, bp->bio_error);
1182		return;
1183	}
1184	/*
1185	 * Fill in the component buf structure.
1186	 */
1187	cp = disk->d_consumer;
1188	cbp->bio_done = g_mirror_done;
1189	cbp->bio_to = cp->provider;
1190	binuptime(&disk->d_last_used);
1191	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1192	KASSERT(cp->acr > 0 && cp->ace > 0,
1193	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1194	    cp->acw, cp->ace));
1195	cp->index++;
1196	g_io_request(cbp, cp);
1197}
1198
1199static void
1200g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
1201{
1202	struct bio_queue_head queue;
1203	struct g_mirror_disk *disk;
1204	struct g_consumer *cp;
1205	struct bio *cbp;
1206	off_t left, mod, offset, slice;
1207	u_char *data;
1208	u_int ndisks;
1209
1210	if (bp->bio_length <= sc->sc_slice) {
1211		g_mirror_request_round_robin(sc, bp);
1212		return;
1213	}
1214	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
1215	slice = bp->bio_length / ndisks;
1216	mod = slice % sc->sc_provider->sectorsize;
1217	if (mod != 0)
1218		slice += sc->sc_provider->sectorsize - mod;
1219	/*
1220	 * Allocate all bios before sending any request, so we can
1221	 * return ENOMEM in nice and clean way.
1222	 */
1223	left = bp->bio_length;
1224	offset = bp->bio_offset;
1225	data = bp->bio_data;
1226	bioq_init(&queue);
1227	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1228		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1229			continue;
1230		cbp = g_clone_bio(bp);
1231		if (cbp == NULL) {
1232			for (cbp = bioq_first(&queue); cbp != NULL;
1233			    cbp = bioq_first(&queue)) {
1234				bioq_remove(&queue, cbp);
1235				g_destroy_bio(cbp);
1236			}
1237			if (bp->bio_error == 0)
1238				bp->bio_error = ENOMEM;
1239			g_io_deliver(bp, bp->bio_error);
1240			return;
1241		}
1242		bioq_insert_tail(&queue, cbp);
1243		cbp->bio_done = g_mirror_done;
1244		cbp->bio_caller1 = disk;
1245		cbp->bio_to = disk->d_consumer->provider;
1246		cbp->bio_offset = offset;
1247		cbp->bio_data = data;
1248		cbp->bio_length = MIN(left, slice);
1249		left -= cbp->bio_length;
1250		if (left == 0)
1251			break;
1252		offset += cbp->bio_length;
1253		data += cbp->bio_length;
1254	}
1255	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
1256		bioq_remove(&queue, cbp);
1257		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1258		disk = cbp->bio_caller1;
1259		cbp->bio_caller1 = NULL;
1260		cp = disk->d_consumer;
1261		KASSERT(cp->acr > 0 && cp->ace > 0,
1262		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1263		    cp->acr, cp->acw, cp->ace));
1264		disk->d_consumer->index++;
1265		g_io_request(cbp, disk->d_consumer);
1266	}
1267}
1268
1269static void
1270g_mirror_register_request(struct bio *bp)
1271{
1272	struct g_mirror_softc *sc;
1273
1274	sc = bp->bio_to->geom->softc;
1275	switch (bp->bio_cmd) {
1276	case BIO_READ:
1277		switch (sc->sc_balance) {
1278		case G_MIRROR_BALANCE_LOAD:
1279			g_mirror_request_load(sc, bp);
1280			break;
1281		case G_MIRROR_BALANCE_PREFER:
1282			g_mirror_request_prefer(sc, bp);
1283			break;
1284		case G_MIRROR_BALANCE_ROUND_ROBIN:
1285			g_mirror_request_round_robin(sc, bp);
1286			break;
1287		case G_MIRROR_BALANCE_SPLIT:
1288			g_mirror_request_split(sc, bp);
1289			break;
1290		}
1291		return;
1292	case BIO_WRITE:
1293	case BIO_DELETE:
1294	    {
1295		struct g_mirror_disk *disk;
1296		struct g_mirror_disk_sync *sync;
1297		struct bio_queue_head queue;
1298		struct g_consumer *cp;
1299		struct bio *cbp;
1300
1301		if (sc->sc_idle)
1302			g_mirror_unidle(sc);
1303		/*
1304		 * Allocate all bios before sending any request, so we can
1305		 * return ENOMEM in nice and clean way.
1306		 */
1307		bioq_init(&queue);
1308		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1309			sync = &disk->d_sync;
1310			switch (disk->d_state) {
1311			case G_MIRROR_DISK_STATE_ACTIVE:
1312				break;
1313			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1314				if (bp->bio_offset >= sync->ds_offset)
1315					continue;
1316				else if (bp->bio_offset + bp->bio_length >
1317				    sync->ds_offset_done &&
1318				    (bp->bio_offset < sync->ds_resync ||
1319				     sync->ds_resync == -1)) {
1320					sync->ds_resync = bp->bio_offset -
1321					    (bp->bio_offset % MAXPHYS);
1322				}
1323				break;
1324			default:
1325				continue;
1326			}
1327			cbp = g_clone_bio(bp);
1328			if (cbp == NULL) {
1329				for (cbp = bioq_first(&queue); cbp != NULL;
1330				    cbp = bioq_first(&queue)) {
1331					bioq_remove(&queue, cbp);
1332					g_destroy_bio(cbp);
1333				}
1334				if (bp->bio_error == 0)
1335					bp->bio_error = ENOMEM;
1336				g_io_deliver(bp, bp->bio_error);
1337				return;
1338			}
1339			bioq_insert_tail(&queue, cbp);
1340			cbp->bio_done = g_mirror_done;
1341			cp = disk->d_consumer;
1342			cbp->bio_caller1 = cp;
1343			cbp->bio_to = cp->provider;
1344			KASSERT(cp->acw > 0 && cp->ace > 0,
1345			    ("Consumer %s not opened (r%dw%de%d).",
1346			    cp->provider->name, cp->acr, cp->acw, cp->ace));
1347		}
1348		for (cbp = bioq_first(&queue); cbp != NULL;
1349		    cbp = bioq_first(&queue)) {
1350			bioq_remove(&queue, cbp);
1351			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1352			cp = cbp->bio_caller1;
1353			cbp->bio_caller1 = NULL;
1354			cp->index++;
1355			g_io_request(cbp, cp);
1356		}
1357		/*
1358		 * Bump syncid on first write.
1359		 */
1360		if (sc->sc_bump_syncid == G_MIRROR_BUMP_ON_FIRST_WRITE) {
1361			sc->sc_bump_syncid = 0;
1362			g_topology_lock();
1363			g_mirror_bump_syncid(sc);
1364			g_topology_unlock();
1365		}
1366		return;
1367	    }
1368	default:
1369		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1370		    bp->bio_cmd, sc->sc_name));
1371		break;
1372	}
1373}
1374
1375static int
1376g_mirror_can_destroy(struct g_mirror_softc *sc)
1377{
1378	struct g_geom *gp;
1379	struct g_consumer *cp;
1380
1381	g_topology_assert();
1382	gp = sc->sc_geom;
1383	LIST_FOREACH(cp, &gp->consumer, consumer) {
1384		if (g_mirror_is_busy(sc, cp))
1385			return (0);
1386	}
1387	gp = sc->sc_sync.ds_geom;
1388	LIST_FOREACH(cp, &gp->consumer, consumer) {
1389		if (g_mirror_is_busy(sc, cp))
1390			return (0);
1391	}
1392	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
1393	    sc->sc_name);
1394	return (1);
1395}
1396
1397static int
1398g_mirror_try_destroy(struct g_mirror_softc *sc)
1399{
1400
1401	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WAIT) != 0) {
1402		g_topology_lock();
1403		if (!g_mirror_can_destroy(sc)) {
1404			g_topology_unlock();
1405			return (0);
1406		}
1407		g_topology_unlock();
1408		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1409		    &sc->sc_worker);
1410		wakeup(&sc->sc_worker);
1411		sc->sc_worker = NULL;
1412	} else {
1413		g_topology_lock();
1414		if (!g_mirror_can_destroy(sc)) {
1415			g_topology_unlock();
1416			return (0);
1417		}
1418		g_mirror_destroy_device(sc);
1419		g_topology_unlock();
1420		free(sc, M_MIRROR);
1421	}
1422	return (1);
1423}
1424
1425/*
1426 * Worker thread.
1427 */
1428static void
1429g_mirror_worker(void *arg)
1430{
1431	struct g_mirror_softc *sc;
1432	struct g_mirror_disk *disk;
1433	struct g_mirror_disk_sync *sync;
1434	struct g_mirror_event *ep;
1435	struct bio *bp;
1436	u_int nreqs;
1437
1438	sc = arg;
1439	curthread->td_base_pri = PRIBIO;
1440
1441	nreqs = 0;
1442	for (;;) {
1443		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
1444		/*
1445		 * First take a look at events.
1446		 * This is important to handle events before any I/O requests.
1447		 */
1448		ep = g_mirror_event_get(sc);
1449		if (ep != NULL) {
1450			g_topology_lock();
1451			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
1452				/* Update only device status. */
1453				G_MIRROR_DEBUG(3,
1454				    "Running event for device %s.",
1455				    sc->sc_name);
1456				ep->e_error = 0;
1457				g_mirror_update_device(sc, 1);
1458			} else {
1459				/* Update disk status. */
1460				G_MIRROR_DEBUG(3, "Running event for disk %s.",
1461				     g_mirror_get_diskname(ep->e_disk));
1462				ep->e_error = g_mirror_update_disk(ep->e_disk,
1463				    ep->e_state);
1464				if (ep->e_error == 0)
1465					g_mirror_update_device(sc, 0);
1466			}
1467			g_topology_unlock();
1468			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
1469				KASSERT(ep->e_error == 0,
1470				    ("Error cannot be handled."));
1471				g_mirror_event_free(ep);
1472			} else {
1473				ep->e_flags |= G_MIRROR_EVENT_DONE;
1474				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1475				    ep);
1476				mtx_lock(&sc->sc_events_mtx);
1477				wakeup(ep);
1478				mtx_unlock(&sc->sc_events_mtx);
1479			}
1480			if ((sc->sc_flags &
1481			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1482				if (g_mirror_try_destroy(sc))
1483					kthread_exit(0);
1484			}
1485			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
1486			continue;
1487		}
1488		/*
1489		 * Now I/O requests.
1490		 */
1491		/* Get first request from the queue. */
1492		mtx_lock(&sc->sc_queue_mtx);
1493		bp = bioq_first(&sc->sc_queue);
1494		if (bp == NULL) {
1495			if ((sc->sc_flags &
1496			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1497				mtx_unlock(&sc->sc_queue_mtx);
1498				if (g_mirror_try_destroy(sc))
1499					kthread_exit(0);
1500				mtx_lock(&sc->sc_queue_mtx);
1501			}
1502		}
1503		if (sc->sc_sync.ds_ndisks > 0 &&
1504		    (bp == NULL || nreqs > g_mirror_reqs_per_sync)) {
1505			mtx_unlock(&sc->sc_queue_mtx);
1506			/*
1507			 * It is time for synchronization...
1508			 */
1509			nreqs = 0;
1510			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1511				if (disk->d_state !=
1512				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
1513					continue;
1514				}
1515				sync = &disk->d_sync;
1516				if (sync->ds_offset >=
1517				    sc->sc_provider->mediasize) {
1518					continue;
1519				}
1520				if (sync->ds_offset > sync->ds_offset_done)
1521					continue;
1522				if (sync->ds_resync != -1) {
1523					sync->ds_offset = sync->ds_resync;
1524					sync->ds_offset_done = sync->ds_resync;
1525					sync->ds_resync = -1;
1526				}
1527				g_mirror_sync_one(disk);
1528			}
1529			G_MIRROR_DEBUG(5, "%s: I'm here 2.", __func__);
1530			goto sleep;
1531		}
1532		if (bp == NULL) {
1533			if (g_mirror_check_idle(sc)) {
1534				u_int idletime;
1535
1536				idletime = g_mirror_idletime;
1537				if (idletime == 0)
1538					idletime = 1;
1539				idletime *= hz;
1540				if (msleep(sc, &sc->sc_queue_mtx, PRIBIO | PDROP,
1541				    "m:w1", idletime) == EWOULDBLOCK) {
1542					G_MIRROR_DEBUG(5, "%s: I'm here 3.",
1543					    __func__);
1544					/*
1545					 * No I/O requests in 'idletime' seconds,
1546					 * so mark components as clean.
1547					 */
1548					g_mirror_idle(sc);
1549				}
1550				G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
1551			} else {
1552				MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP,
1553				    "m:w2", 0);
1554				G_MIRROR_DEBUG(5, "%s: I'm here 5.", __func__);
1555			}
1556			continue;
1557		}
1558		nreqs++;
1559		bioq_remove(&sc->sc_queue, bp);
1560		mtx_unlock(&sc->sc_queue_mtx);
1561
1562		if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0) {
1563			g_mirror_regular_request(bp);
1564		} else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
1565			u_int timeout, sps;
1566
1567			g_mirror_sync_request(bp);
1568sleep:
1569			sps = g_mirror_syncs_per_sec;
1570			if (sps == 0) {
1571				G_MIRROR_DEBUG(5, "%s: I'm here 6.", __func__);
1572				continue;
1573			}
1574			mtx_lock(&sc->sc_queue_mtx);
1575			if (bioq_first(&sc->sc_queue) != NULL) {
1576				mtx_unlock(&sc->sc_queue_mtx);
1577				G_MIRROR_DEBUG(5, "%s: I'm here 7.", __func__);
1578				continue;
1579			}
1580			timeout = hz / sps;
1581			if (timeout == 0)
1582				timeout = 1;
1583			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w3",
1584			    timeout);
1585		} else {
1586			g_mirror_register_request(bp);
1587		}
1588		G_MIRROR_DEBUG(5, "%s: I'm here 8.", __func__);
1589	}
1590}
1591
1592/*
1593 * Open disk's consumer if needed.
1594 */
1595static void
1596g_mirror_update_access(struct g_mirror_disk *disk)
1597{
1598	struct g_provider *pp;
1599	struct g_consumer *cp;
1600	int acr, acw, ace, cpw, error;
1601
1602	g_topology_assert();
1603
1604	cp = disk->d_consumer;
1605	pp = disk->d_softc->sc_provider;
1606	if (pp == NULL) {
1607		acr = -cp->acr;
1608		acw = -cp->acw;
1609		ace = -cp->ace;
1610	} else {
1611		acr = pp->acr - cp->acr;
1612		acw = pp->acw - cp->acw;
1613		ace = pp->ace - cp->ace;
1614		/* Grab an extra "exclusive" bit. */
1615		if (pp->acr > 0 || pp->acw > 0 || pp->ace > 0)
1616			ace++;
1617	}
1618	if (acr == 0 && acw == 0 && ace == 0)
1619		return;
1620	cpw = cp->acw;
1621	error = g_access(cp, acr, acw, ace);
1622	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", cp->provider->name, acr,
1623	    acw, ace, error);
1624	if (error != 0) {
1625		disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
1626		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
1627		    G_MIRROR_EVENT_DONTWAIT);
1628		return;
1629	}
1630	if (cpw == 0 && cp->acw > 0) {
1631		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
1632		    g_mirror_get_diskname(disk), disk->d_softc->sc_name);
1633		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1634	} else if (cpw > 0 && cp->acw == 0) {
1635		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
1636		    g_mirror_get_diskname(disk), disk->d_softc->sc_name);
1637		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1638	}
1639}
1640
1641static void
1642g_mirror_sync_start(struct g_mirror_disk *disk)
1643{
1644	struct g_mirror_softc *sc;
1645	struct g_consumer *cp;
1646	int error;
1647
1648	g_topology_assert();
1649
1650	sc = disk->d_softc;
1651	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1652	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
1653	    sc->sc_state));
1654	cp = disk->d_consumer;
1655	KASSERT(cp->acr == 0 && cp->acw == 0 && cp->ace == 0,
1656	    ("Consumer %s already opened.", cp->provider->name));
1657
1658	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
1659	    g_mirror_get_diskname(disk));
1660	error = g_access(cp, 0, 1, 1);
1661	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", cp->provider->name, 0, 1,
1662	    1, error);
1663	if (error != 0) {
1664		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
1665		    G_MIRROR_EVENT_DONTWAIT);
1666		return;
1667	}
1668	disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1669	KASSERT(disk->d_sync.ds_consumer == NULL,
1670	    ("Sync consumer already exists (device=%s, disk=%s).",
1671	    sc->sc_name, g_mirror_get_diskname(disk)));
1672	disk->d_sync.ds_consumer = g_new_consumer(sc->sc_sync.ds_geom);
1673	disk->d_sync.ds_consumer->private = disk;
1674	disk->d_sync.ds_consumer->index = 0;
1675	error = g_attach(disk->d_sync.ds_consumer, disk->d_softc->sc_provider);
1676	KASSERT(error == 0, ("Cannot attach to %s (error=%d).",
1677	    disk->d_softc->sc_name, error));
1678	error = g_access(disk->d_sync.ds_consumer, 1, 0, 0);
1679	KASSERT(error == 0, ("Cannot open %s (error=%d).",
1680	    disk->d_softc->sc_name, error));
1681	disk->d_sync.ds_data = malloc(MAXPHYS, M_MIRROR, M_WAITOK);
1682	sc->sc_sync.ds_ndisks++;
1683}
1684
1685/*
1686 * Stop synchronization process.
1687 * type: 0 - synchronization finished
1688 *       1 - synchronization stopped
1689 */
1690static void
1691g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
1692{
1693	struct g_consumer *cp;
1694
1695	g_topology_assert();
1696	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
1697	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
1698	    g_mirror_disk_state2str(disk->d_state)));
1699	if (disk->d_sync.ds_consumer == NULL)
1700		return;
1701
1702	if (type == 0) {
1703		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
1704		    disk->d_softc->sc_name, g_mirror_get_diskname(disk));
1705	} else /* if (type == 1) */ {
1706		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
1707		    disk->d_softc->sc_name, g_mirror_get_diskname(disk));
1708	}
1709	cp = disk->d_sync.ds_consumer;
1710	g_access(cp, -1, 0, 0);
1711	g_mirror_kill_consumer(disk->d_softc, cp);
1712	free(disk->d_sync.ds_data, M_MIRROR);
1713	disk->d_sync.ds_consumer = NULL;
1714	disk->d_softc->sc_sync.ds_ndisks--;
1715	cp = disk->d_consumer;
1716	KASSERT(cp->acr == 0 && cp->acw == 1 && cp->ace == 1,
1717	    ("Consumer %s not opened.", cp->provider->name));
1718	g_access(cp, 0, -1, -1);
1719	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", cp->provider->name, 0, -1,
1720	    -1, 0);
1721	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1722}
1723
1724static void
1725g_mirror_launch_provider(struct g_mirror_softc *sc)
1726{
1727	struct g_mirror_disk *disk;
1728	struct g_provider *pp;
1729
1730	g_topology_assert();
1731
1732	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
1733	pp->mediasize = sc->sc_mediasize;
1734	pp->sectorsize = sc->sc_sectorsize;
1735	sc->sc_provider = pp;
1736	g_error_provider(pp, 0);
1737	G_MIRROR_DEBUG(0, "Device %s: provider %s launched.", sc->sc_name,
1738	    pp->name);
1739	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1740		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
1741			g_mirror_sync_start(disk);
1742	}
1743}
1744
1745static void
1746g_mirror_destroy_provider(struct g_mirror_softc *sc)
1747{
1748	struct g_mirror_disk *disk;
1749	struct bio *bp;
1750
1751	g_topology_assert();
1752	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
1753	    sc->sc_name));
1754
1755	g_error_provider(sc->sc_provider, ENXIO);
1756	mtx_lock(&sc->sc_queue_mtx);
1757	while ((bp = bioq_first(&sc->sc_queue)) != NULL) {
1758		bioq_remove(&sc->sc_queue, bp);
1759		g_io_deliver(bp, ENXIO);
1760	}
1761	mtx_unlock(&sc->sc_queue_mtx);
1762	G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name,
1763	    sc->sc_provider->name);
1764	sc->sc_provider->flags |= G_PF_WITHER;
1765	g_orphan_provider(sc->sc_provider, ENXIO);
1766	sc->sc_provider = NULL;
1767	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1768		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
1769			g_mirror_sync_stop(disk, 1);
1770	}
1771}
1772
1773static void
1774g_mirror_go(void *arg)
1775{
1776	struct g_mirror_softc *sc;
1777
1778	sc = arg;
1779	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
1780	g_mirror_event_send(sc, 0,
1781	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
1782}
1783
1784static u_int
1785g_mirror_determine_state(struct g_mirror_disk *disk)
1786{
1787	struct g_mirror_softc *sc;
1788	u_int state;
1789
1790	sc = disk->d_softc;
1791	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
1792		if ((disk->d_flags &
1793		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
1794			/* Disk does not need synchronization. */
1795			state = G_MIRROR_DISK_STATE_ACTIVE;
1796		} else {
1797			if ((sc->sc_flags &
1798			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0  ||
1799			    (disk->d_flags &
1800			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
1801				/*
1802				 * We can start synchronization from
1803				 * the stored offset.
1804				 */
1805				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
1806			} else {
1807				state = G_MIRROR_DISK_STATE_STALE;
1808			}
1809		}
1810	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
1811		/*
1812		 * Reset all synchronization data for this disk,
1813		 * because if it even was synchronized, it was
1814		 * synchronized to disks with different syncid.
1815		 */
1816		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
1817		disk->d_sync.ds_offset = 0;
1818		disk->d_sync.ds_offset_done = 0;
1819		disk->d_sync.ds_syncid = sc->sc_syncid;
1820		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
1821		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
1822			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
1823		} else {
1824			state = G_MIRROR_DISK_STATE_STALE;
1825		}
1826	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
1827		/*
1828		 * Not good, NOT GOOD!
1829		 * It means that mirror was started on stale disks
1830		 * and more fresh disk just arrive.
1831		 * If there were writes, mirror is fucked up, sorry.
1832		 * I think the best choice here is don't touch
1833		 * this disk and inform the user laudly.
1834		 */
1835		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
1836		    "disk (%s) arrives!! It will not be connected to the "
1837		    "running device.", sc->sc_name,
1838		    g_mirror_get_diskname(disk));
1839		g_mirror_destroy_disk(disk);
1840		state = G_MIRROR_DISK_STATE_NONE;
1841		/* Return immediately, because disk was destroyed. */
1842		return (state);
1843	}
1844	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
1845	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
1846	return (state);
1847}
1848
1849/*
1850 * Update device state.
1851 */
1852static void
1853g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force)
1854{
1855	struct g_mirror_disk *disk;
1856	u_int state;
1857
1858	g_topology_assert();
1859
1860	switch (sc->sc_state) {
1861	case G_MIRROR_DEVICE_STATE_STARTING:
1862	    {
1863		struct g_mirror_disk *pdisk;
1864		u_int dirty, ndisks, syncid;
1865
1866		KASSERT(sc->sc_provider == NULL,
1867		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
1868		/*
1869		 * Are we ready? We are, if all disks are connected or
1870		 * if we have any disks and 'force' is true.
1871		 */
1872		if ((force && g_mirror_ndisks(sc, -1) > 0) ||
1873		    sc->sc_ndisks == g_mirror_ndisks(sc, -1)) {
1874			;
1875		} else if (g_mirror_ndisks(sc, -1) == 0) {
1876			/*
1877			 * Disks went down in starting phase, so destroy
1878			 * device.
1879			 */
1880			callout_drain(&sc->sc_callout);
1881			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
1882			return;
1883		} else {
1884			return;
1885		}
1886
1887		/*
1888		 * Activate all disks with the biggest syncid.
1889		 */
1890		if (force) {
1891			/*
1892			 * If 'force' is true, we have been called due to
1893			 * timeout, so don't bother canceling timeout.
1894			 */
1895			ndisks = 0;
1896			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1897				if ((disk->d_flags &
1898				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
1899					ndisks++;
1900				}
1901			}
1902			if (ndisks == 0) {
1903				/* No valid disks found, destroy device. */
1904				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
1905				return;
1906			}
1907		} else {
1908			/* Cancel timeout. */
1909			callout_drain(&sc->sc_callout);
1910		}
1911
1912		/*
1913		 * Find disk with the biggest syncid.
1914		 */
1915		syncid = 0;
1916		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1917			if (disk->d_sync.ds_syncid > syncid)
1918				syncid = disk->d_sync.ds_syncid;
1919		}
1920
1921		/*
1922		 * Here we need to look for dirty disks and if all disks
1923		 * with the biggest syncid are dirty, we have to choose
1924		 * one with the biggest priority and rebuild the rest.
1925		 */
1926		/*
1927		 * Find the number of dirty disks with the biggest syncid.
1928		 * Find the number of disks with the biggest syncid.
1929		 * While here, find a disk with the biggest priority.
1930		 */
1931		dirty = ndisks = 0;
1932		pdisk = NULL;
1933		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1934			if (disk->d_sync.ds_syncid != syncid)
1935				continue;
1936			if ((disk->d_flags &
1937			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1938				continue;
1939			}
1940			ndisks++;
1941			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
1942				dirty++;
1943				if (pdisk == NULL ||
1944				    pdisk->d_priority < disk->d_priority) {
1945					pdisk = disk;
1946				}
1947			}
1948		}
1949		if (dirty == 0) {
1950			/* No dirty disks at all, great. */
1951		} else if (dirty == ndisks) {
1952			/*
1953			 * Force synchronization for all dirty disks except one
1954			 * with the biggest priority.
1955			 */
1956			KASSERT(pdisk != NULL, ("pdisk == NULL"));
1957			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
1958			    "master disk for synchronization.",
1959			    g_mirror_get_diskname(pdisk), sc->sc_name);
1960			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1961				if (disk->d_sync.ds_syncid != syncid)
1962					continue;
1963				if ((disk->d_flags &
1964				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1965					continue;
1966				}
1967				KASSERT((disk->d_flags &
1968				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
1969				    ("Disk %s isn't marked as dirty.",
1970				    g_mirror_get_diskname(disk)));
1971				/* Skip the disk with the biggest priority. */
1972				if (disk == pdisk)
1973					continue;
1974				disk->d_sync.ds_syncid = 0;
1975			}
1976		} else if (dirty < ndisks) {
1977			/*
1978			 * Force synchronization for all dirty disks.
1979			 * We have some non-dirty disks.
1980			 */
1981			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1982				if (disk->d_sync.ds_syncid != syncid)
1983					continue;
1984				if ((disk->d_flags &
1985				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1986					continue;
1987				}
1988				if ((disk->d_flags &
1989				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
1990					continue;
1991				}
1992				disk->d_sync.ds_syncid = 0;
1993			}
1994		}
1995
1996		/* Reset hint. */
1997		sc->sc_hint = NULL;
1998		sc->sc_syncid = syncid;
1999		if (force) {
2000			/* Remember to bump syncid on first write. */
2001			sc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
2002		}
2003		state = G_MIRROR_DEVICE_STATE_RUNNING;
2004		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
2005		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
2006		    g_mirror_device_state2str(state));
2007		sc->sc_state = state;
2008		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2009			state = g_mirror_determine_state(disk);
2010			g_mirror_event_send(disk, state,
2011			    G_MIRROR_EVENT_DONTWAIT);
2012			if (state == G_MIRROR_DISK_STATE_STALE) {
2013				sc->sc_bump_syncid =
2014				    G_MIRROR_BUMP_ON_FIRST_WRITE;
2015			}
2016		}
2017		wakeup(&g_mirror_class);
2018		break;
2019	    }
2020	case G_MIRROR_DEVICE_STATE_RUNNING:
2021		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
2022		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2023			/*
2024			 * No active disks or no disks at all,
2025			 * so destroy device.
2026			 */
2027			if (sc->sc_provider != NULL)
2028				g_mirror_destroy_provider(sc);
2029			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2030			break;
2031		} else if (g_mirror_ndisks(sc,
2032		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
2033		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2034			/*
2035			 * We have active disks, launch provider if it doesn't
2036			 * exist.
2037			 */
2038			if (sc->sc_provider == NULL)
2039				g_mirror_launch_provider(sc);
2040		}
2041		/*
2042		 * Bump syncid here, if we need to do it immediately.
2043		 */
2044		if (sc->sc_bump_syncid == G_MIRROR_BUMP_IMMEDIATELY) {
2045			sc->sc_bump_syncid = 0;
2046			g_mirror_bump_syncid(sc);
2047		}
2048		break;
2049	default:
2050		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
2051		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
2052		break;
2053	}
2054}
2055
2056/*
2057 * Update disk state and device state if needed.
2058 */
2059#define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
2060	"Disk %s state changed from %s to %s (device %s).",		\
2061	g_mirror_get_diskname(disk),					\
2062	g_mirror_disk_state2str(disk->d_state),				\
2063	g_mirror_disk_state2str(state), sc->sc_name)
2064static int
2065g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
2066{
2067	struct g_mirror_softc *sc;
2068
2069	g_topology_assert();
2070
2071	sc = disk->d_softc;
2072again:
2073	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
2074	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
2075	    g_mirror_disk_state2str(state));
2076	switch (state) {
2077	case G_MIRROR_DISK_STATE_NEW:
2078		/*
2079		 * Possible scenarios:
2080		 * 1. New disk arrive.
2081		 */
2082		/* Previous state should be NONE. */
2083		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
2084		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2085		    g_mirror_disk_state2str(disk->d_state)));
2086		DISK_STATE_CHANGED();
2087
2088		disk->d_state = state;
2089		if (LIST_EMPTY(&sc->sc_disks))
2090			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
2091		else {
2092			struct g_mirror_disk *dp;
2093
2094			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
2095				if (disk->d_priority >= dp->d_priority) {
2096					LIST_INSERT_BEFORE(dp, disk, d_next);
2097					dp = NULL;
2098					break;
2099				}
2100				if (LIST_NEXT(dp, d_next) == NULL)
2101					break;
2102			}
2103			if (dp != NULL)
2104				LIST_INSERT_AFTER(dp, disk, d_next);
2105		}
2106		G_MIRROR_DEBUG(0, "Device %s: provider %s detected.",
2107		    sc->sc_name, g_mirror_get_diskname(disk));
2108		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2109			break;
2110		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2111		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2112		    g_mirror_device_state2str(sc->sc_state),
2113		    g_mirror_get_diskname(disk),
2114		    g_mirror_disk_state2str(disk->d_state)));
2115		state = g_mirror_determine_state(disk);
2116		if (state != G_MIRROR_DISK_STATE_NONE)
2117			goto again;
2118		break;
2119	case G_MIRROR_DISK_STATE_ACTIVE:
2120		/*
2121		 * Possible scenarios:
2122		 * 1. New disk does not need synchronization.
2123		 * 2. Synchronization process finished successfully.
2124		 */
2125		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2126		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2127		    g_mirror_device_state2str(sc->sc_state),
2128		    g_mirror_get_diskname(disk),
2129		    g_mirror_disk_state2str(disk->d_state)));
2130		/* Previous state should be NEW or SYNCHRONIZING. */
2131		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
2132		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2133		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2134		    g_mirror_disk_state2str(disk->d_state)));
2135		DISK_STATE_CHANGED();
2136
2137		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2138			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2139		else if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2140			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2141			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
2142			g_mirror_sync_stop(disk, 0);
2143		}
2144		disk->d_state = state;
2145		disk->d_sync.ds_offset = 0;
2146		disk->d_sync.ds_offset_done = 0;
2147		g_mirror_update_access(disk);
2148		g_mirror_update_metadata(disk);
2149		G_MIRROR_DEBUG(0, "Device %s: provider %s activated.",
2150		    sc->sc_name, g_mirror_get_diskname(disk));
2151		break;
2152	case G_MIRROR_DISK_STATE_STALE:
2153		/*
2154		 * Possible scenarios:
2155		 * 1. Stale disk was connected.
2156		 */
2157		/* Previous state should be NEW. */
2158		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2159		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2160		    g_mirror_disk_state2str(disk->d_state)));
2161		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2162		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2163		    g_mirror_device_state2str(sc->sc_state),
2164		    g_mirror_get_diskname(disk),
2165		    g_mirror_disk_state2str(disk->d_state)));
2166		/*
2167		 * STALE state is only possible if device is marked
2168		 * NOAUTOSYNC.
2169		 */
2170		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
2171		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2172		    g_mirror_device_state2str(sc->sc_state),
2173		    g_mirror_get_diskname(disk),
2174		    g_mirror_disk_state2str(disk->d_state)));
2175		DISK_STATE_CHANGED();
2176
2177		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2178		disk->d_state = state;
2179		g_mirror_update_metadata(disk);
2180		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
2181		    sc->sc_name, g_mirror_get_diskname(disk));
2182		break;
2183	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
2184		/*
2185		 * Possible scenarios:
2186		 * 1. Disk which needs synchronization was connected.
2187		 */
2188		/* Previous state should be NEW. */
2189		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2190		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2191		    g_mirror_disk_state2str(disk->d_state)));
2192		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2193		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2194		    g_mirror_device_state2str(sc->sc_state),
2195		    g_mirror_get_diskname(disk),
2196		    g_mirror_disk_state2str(disk->d_state)));
2197		DISK_STATE_CHANGED();
2198
2199		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2200			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2201		disk->d_state = state;
2202		if (sc->sc_provider != NULL) {
2203			g_mirror_sync_start(disk);
2204			g_mirror_update_metadata(disk);
2205		}
2206		break;
2207	case G_MIRROR_DISK_STATE_DISCONNECTED:
2208		/*
2209		 * Possible scenarios:
2210		 * 1. Device wasn't running yet, but disk disappear.
2211		 * 2. Disk was active and disapppear.
2212		 * 3. Disk disappear during synchronization process.
2213		 */
2214		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
2215			/*
2216			 * Previous state should be ACTIVE, STALE or
2217			 * SYNCHRONIZING.
2218			 */
2219			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
2220			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
2221			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2222			    ("Wrong disk state (%s, %s).",
2223			    g_mirror_get_diskname(disk),
2224			    g_mirror_disk_state2str(disk->d_state)));
2225		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
2226			/* Previous state should be NEW. */
2227			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2228			    ("Wrong disk state (%s, %s).",
2229			    g_mirror_get_diskname(disk),
2230			    g_mirror_disk_state2str(disk->d_state)));
2231			/*
2232			 * Reset bumping syncid if disk disappeared in STARTING
2233			 * state.
2234			 */
2235			if (sc->sc_bump_syncid == G_MIRROR_BUMP_ON_FIRST_WRITE)
2236				sc->sc_bump_syncid = 0;
2237#ifdef	INVARIANTS
2238		} else {
2239			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2240			    sc->sc_name,
2241			    g_mirror_device_state2str(sc->sc_state),
2242			    g_mirror_get_diskname(disk),
2243			    g_mirror_disk_state2str(disk->d_state)));
2244#endif
2245		}
2246		DISK_STATE_CHANGED();
2247		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
2248		    sc->sc_name, g_mirror_get_diskname(disk));
2249
2250		g_mirror_destroy_disk(disk);
2251		break;
2252	case G_MIRROR_DISK_STATE_DESTROY:
2253	    {
2254		int error;
2255
2256		error = g_mirror_clear_metadata(disk);
2257		if (error != 0)
2258			return (error);
2259		DISK_STATE_CHANGED();
2260		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
2261		    sc->sc_name, g_mirror_get_diskname(disk));
2262
2263		g_mirror_destroy_disk(disk);
2264		sc->sc_ndisks--;
2265		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2266			g_mirror_update_metadata(disk);
2267		}
2268		break;
2269	    }
2270	default:
2271		KASSERT(1 == 0, ("Unknown state (%u).", state));
2272		break;
2273	}
2274	return (0);
2275}
2276#undef	DISK_STATE_CHANGED
2277
2278static int
2279g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
2280{
2281	struct g_provider *pp;
2282	u_char *buf;
2283	int error;
2284
2285	g_topology_assert();
2286
2287	error = g_access(cp, 1, 0, 0);
2288	if (error != 0)
2289		return (error);
2290	pp = cp->provider;
2291	g_topology_unlock();
2292	/* Metadata are stored on last sector. */
2293	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2294	    &error);
2295	g_topology_lock();
2296	if (buf == NULL) {
2297		g_access(cp, -1, 0, 0);
2298		return (error);
2299	}
2300	if (error != 0) {
2301		g_access(cp, -1, 0, 0);
2302		g_free(buf);
2303		return (error);
2304	}
2305	error = g_access(cp, -1, 0, 0);
2306	KASSERT(error == 0, ("Cannot decrease access count for %s.", pp->name));
2307
2308	/* Decode metadata. */
2309	error = mirror_metadata_decode(buf, md);
2310	g_free(buf);
2311	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
2312		return (EINVAL);
2313	if (error != 0) {
2314		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2315		    cp->provider->name);
2316		return (error);
2317	}
2318
2319	return (0);
2320}
2321
2322static int
2323g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
2324    struct g_mirror_metadata *md)
2325{
2326
2327	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
2328		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
2329		    pp->name, md->md_did);
2330		return (EEXIST);
2331	}
2332	if (md->md_all != sc->sc_ndisks) {
2333		G_MIRROR_DEBUG(1,
2334		    "Invalid '%s' field on disk %s (device %s), skipping.",
2335		    "md_all", pp->name, sc->sc_name);
2336		return (EINVAL);
2337	}
2338	if (md->md_slice != sc->sc_slice) {
2339		G_MIRROR_DEBUG(1,
2340		    "Invalid '%s' field on disk %s (device %s), skipping.",
2341		    "md_slice", pp->name, sc->sc_name);
2342		return (EINVAL);
2343	}
2344	if (md->md_balance != sc->sc_balance) {
2345		G_MIRROR_DEBUG(1,
2346		    "Invalid '%s' field on disk %s (device %s), skipping.",
2347		    "md_balance", pp->name, sc->sc_name);
2348		return (EINVAL);
2349	}
2350	if (md->md_mediasize != sc->sc_mediasize) {
2351		G_MIRROR_DEBUG(1,
2352		    "Invalid '%s' field on disk %s (device %s), skipping.",
2353		    "md_mediasize", pp->name, sc->sc_name);
2354		return (EINVAL);
2355	}
2356	if (sc->sc_mediasize > pp->mediasize) {
2357		G_MIRROR_DEBUG(1,
2358		    "Invalid size of disk %s (device %s), skipping.", pp->name,
2359		    sc->sc_name);
2360		return (EINVAL);
2361	}
2362	if (md->md_sectorsize != sc->sc_sectorsize) {
2363		G_MIRROR_DEBUG(1,
2364		    "Invalid '%s' field on disk %s (device %s), skipping.",
2365		    "md_sectorsize", pp->name, sc->sc_name);
2366		return (EINVAL);
2367	}
2368	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2369		G_MIRROR_DEBUG(1,
2370		    "Invalid sector size of disk %s (device %s), skipping.",
2371		    pp->name, sc->sc_name);
2372		return (EINVAL);
2373	}
2374	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
2375		G_MIRROR_DEBUG(1,
2376		    "Invalid device flags on disk %s (device %s), skipping.",
2377		    pp->name, sc->sc_name);
2378		return (EINVAL);
2379	}
2380	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
2381		G_MIRROR_DEBUG(1,
2382		    "Invalid disk flags on disk %s (device %s), skipping.",
2383		    pp->name, sc->sc_name);
2384		return (EINVAL);
2385	}
2386	return (0);
2387}
2388
2389static int
2390g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
2391    struct g_mirror_metadata *md)
2392{
2393	struct g_mirror_disk *disk;
2394	int error;
2395
2396	g_topology_assert();
2397	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
2398
2399	error = g_mirror_check_metadata(sc, pp, md);
2400	if (error != 0)
2401		return (error);
2402	disk = g_mirror_init_disk(sc, pp, md, &error);
2403	if (disk == NULL)
2404		return (error);
2405	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
2406	    G_MIRROR_EVENT_WAIT);
2407	return (error);
2408}
2409
2410static int
2411g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
2412{
2413	struct g_mirror_softc *sc;
2414	struct g_mirror_disk *disk;
2415	int dcr, dcw, dce, err, error;
2416
2417	g_topology_assert();
2418	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
2419	    acw, ace);
2420
2421	dcr = pp->acr + acr;
2422	dcw = pp->acw + acw;
2423	dce = pp->ace + ace;
2424
2425	/* On first open, grab an extra "exclusive" bit */
2426	if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
2427		ace++;
2428	/* ... and let go of it on last close */
2429	if (dcr == 0 && dcw == 0 && dce == 0)
2430		ace--;
2431
2432	sc = pp->geom->softc;
2433	if (sc == NULL || LIST_EMPTY(&sc->sc_disks) ||
2434	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
2435		if (acr <= 0 && acw <= 0 && ace <= 0)
2436			return (0);
2437		else
2438			return (ENXIO);
2439	}
2440	error = ENXIO;
2441	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2442		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
2443			continue;
2444		err = g_access(disk->d_consumer, acr, acw, ace);
2445		G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
2446		    g_mirror_get_diskname(disk), acr, acw, ace, err);
2447		if (err == 0) {
2448			/*
2449			 * Mark disk as dirty on open and unmark on close.
2450			 */
2451			if (pp->acw == 0 && dcw > 0) {
2452				G_MIRROR_DEBUG(1,
2453				    "Disk %s (device %s) marked as dirty.",
2454				    g_mirror_get_diskname(disk), sc->sc_name);
2455				disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2456				g_mirror_update_metadata(disk);
2457			} else if (pp->acw > 0 && dcw == 0) {
2458				G_MIRROR_DEBUG(1,
2459				    "Disk %s (device %s) marked as clean.",
2460				    g_mirror_get_diskname(disk), sc->sc_name);
2461				disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2462				g_mirror_update_metadata(disk);
2463			}
2464			error = 0;
2465		} else {
2466			sc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
2467			g_mirror_event_send(disk,
2468			    G_MIRROR_DISK_STATE_DISCONNECTED,
2469			    G_MIRROR_EVENT_DONTWAIT);
2470		}
2471	}
2472	/*
2473	 * Be sure to return 0 for negativate access requests.
2474	 * In case of some HW problems, it is possible that we don't have
2475	 * any active disk here, so loop above will be no-op and error will
2476	 * be ENXIO.
2477	 */
2478	if (error != 0 && acr <= 0 && acw <= 0 && ace <= 0)
2479		error = 0;
2480	return (error);
2481}
2482
2483static struct g_geom *
2484g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md)
2485{
2486	struct g_mirror_softc *sc;
2487	struct g_geom *gp;
2488	int error, timeout;
2489
2490	g_topology_assert();
2491	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
2492	    md->md_mid);
2493
2494	/* One disk is minimum. */
2495	if (md->md_all < 1)
2496		return (NULL);
2497	/*
2498	 * Action geom.
2499	 */
2500	gp = g_new_geomf(mp, "%s", md->md_name);
2501	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
2502	gp->start = g_mirror_start;
2503	gp->spoiled = g_mirror_spoiled;
2504	gp->orphan = g_mirror_orphan;
2505	gp->access = g_mirror_access;
2506	gp->dumpconf = g_mirror_dumpconf;
2507
2508	sc->sc_id = md->md_mid;
2509	sc->sc_slice = md->md_slice;
2510	sc->sc_balance = md->md_balance;
2511	sc->sc_mediasize = md->md_mediasize;
2512	sc->sc_sectorsize = md->md_sectorsize;
2513	sc->sc_ndisks = md->md_all;
2514	sc->sc_flags = md->md_mflags;
2515	sc->sc_bump_syncid = 0;
2516	sc->sc_idle = 0;
2517	bioq_init(&sc->sc_queue);
2518	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
2519	LIST_INIT(&sc->sc_disks);
2520	TAILQ_INIT(&sc->sc_events);
2521	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
2522	callout_init(&sc->sc_callout, CALLOUT_MPSAFE);
2523	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
2524	gp->softc = sc;
2525	sc->sc_geom = gp;
2526	sc->sc_provider = NULL;
2527	/*
2528	 * Synchronization geom.
2529	 */
2530	gp = g_new_geomf(mp, "%s.sync", md->md_name);
2531	gp->softc = sc;
2532	gp->orphan = g_mirror_orphan;
2533	sc->sc_sync.ds_geom = gp;
2534	sc->sc_sync.ds_ndisks = 0;
2535	error = kthread_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
2536	    "g_mirror %s", md->md_name);
2537	if (error != 0) {
2538		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
2539		    sc->sc_name);
2540		g_destroy_geom(sc->sc_sync.ds_geom);
2541		mtx_destroy(&sc->sc_events_mtx);
2542		mtx_destroy(&sc->sc_queue_mtx);
2543		g_destroy_geom(sc->sc_geom);
2544		free(sc, M_MIRROR);
2545		return (NULL);
2546	}
2547
2548	G_MIRROR_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id);
2549
2550	/*
2551	 * Run timeout.
2552	 */
2553	timeout = g_mirror_timeout * hz;
2554	callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc);
2555	return (sc->sc_geom);
2556}
2557
2558int
2559g_mirror_destroy(struct g_mirror_softc *sc, boolean_t force)
2560{
2561	struct g_provider *pp;
2562
2563	g_topology_assert();
2564
2565	if (sc == NULL)
2566		return (ENXIO);
2567	pp = sc->sc_provider;
2568	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
2569		if (force) {
2570			G_MIRROR_DEBUG(0, "Device %s is still open, so it "
2571			    "can't be definitely removed.", pp->name);
2572		} else {
2573			G_MIRROR_DEBUG(1,
2574			    "Device %s is still open (r%dw%de%d).", pp->name,
2575			    pp->acr, pp->acw, pp->ace);
2576			return (EBUSY);
2577		}
2578	}
2579
2580	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2581	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_WAIT;
2582	g_topology_unlock();
2583	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
2584	mtx_lock(&sc->sc_queue_mtx);
2585	wakeup(sc);
2586	mtx_unlock(&sc->sc_queue_mtx);
2587	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
2588	while (sc->sc_worker != NULL)
2589		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
2590	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
2591	g_topology_lock();
2592	g_mirror_destroy_device(sc);
2593	free(sc, M_MIRROR);
2594	return (0);
2595}
2596
2597static void
2598g_mirror_taste_orphan(struct g_consumer *cp)
2599{
2600
2601	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
2602	    cp->provider->name));
2603}
2604
2605static struct g_geom *
2606g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
2607{
2608	struct g_mirror_metadata md;
2609	struct g_mirror_softc *sc;
2610	struct g_consumer *cp;
2611	struct g_geom *gp;
2612	int error;
2613
2614	g_topology_assert();
2615	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
2616	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
2617
2618	gp = g_new_geomf(mp, "mirror:taste");
2619	/*
2620	 * This orphan function should be never called.
2621	 */
2622	gp->orphan = g_mirror_taste_orphan;
2623	cp = g_new_consumer(gp);
2624	g_attach(cp, pp);
2625	error = g_mirror_read_metadata(cp, &md);
2626	g_detach(cp);
2627	g_destroy_consumer(cp);
2628	g_destroy_geom(gp);
2629	if (error != 0)
2630		return (NULL);
2631	gp = NULL;
2632
2633	if (md.md_version > G_MIRROR_VERSION) {
2634		printf("geom_mirror.ko module is too old to handle %s.\n",
2635		    pp->name);
2636		return (NULL);
2637	}
2638	if (md.md_provider[0] != '\0' && strcmp(md.md_provider, pp->name) != 0)
2639		return (NULL);
2640	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
2641		G_MIRROR_DEBUG(0,
2642		    "Device %s: provider %s marked as inactive, skipping.",
2643		    md.md_name, pp->name);
2644		return (NULL);
2645	}
2646	if (g_mirror_debug >= 2)
2647		mirror_metadata_dump(&md);
2648
2649	/*
2650	 * Let's check if device already exists.
2651	 */
2652	sc = NULL;
2653	LIST_FOREACH(gp, &mp->geom, geom) {
2654		sc = gp->softc;
2655		if (sc == NULL)
2656			continue;
2657		if (sc->sc_sync.ds_geom == gp)
2658			continue;
2659		if (strcmp(md.md_name, sc->sc_name) != 0)
2660			continue;
2661		if (md.md_mid != sc->sc_id) {
2662			G_MIRROR_DEBUG(0, "Device %s already configured.",
2663			    sc->sc_name);
2664			return (NULL);
2665		}
2666		break;
2667	}
2668	if (gp == NULL) {
2669		gp = g_mirror_create(mp, &md);
2670		if (gp == NULL) {
2671			G_MIRROR_DEBUG(0, "Cannot create device %s.",
2672			    md.md_name);
2673			return (NULL);
2674		}
2675		sc = gp->softc;
2676	}
2677	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
2678	error = g_mirror_add_disk(sc, pp, &md);
2679	if (error != 0) {
2680		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
2681		    pp->name, gp->name, error);
2682		if (LIST_EMPTY(&sc->sc_disks))
2683			g_mirror_destroy(sc, 1);
2684		return (NULL);
2685	}
2686	return (gp);
2687}
2688
2689static int
2690g_mirror_destroy_geom(struct gctl_req *req __unused,
2691    struct g_class *mp __unused, struct g_geom *gp)
2692{
2693
2694	return (g_mirror_destroy(gp->softc, 0));
2695}
2696
2697static void
2698g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
2699    struct g_consumer *cp, struct g_provider *pp)
2700{
2701	struct g_mirror_softc *sc;
2702
2703	g_topology_assert();
2704
2705	sc = gp->softc;
2706	if (sc == NULL)
2707		return;
2708	/* Skip synchronization geom. */
2709	if (gp == sc->sc_sync.ds_geom)
2710		return;
2711	if (pp != NULL) {
2712		/* Nothing here. */
2713	} else if (cp != NULL) {
2714		struct g_mirror_disk *disk;
2715
2716		disk = cp->private;
2717		if (disk == NULL)
2718			return;
2719		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
2720		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2721			sbuf_printf(sb, "%s<Synchronized>", indent);
2722			if (disk->d_sync.ds_offset_done == 0)
2723				sbuf_printf(sb, "0%%");
2724			else {
2725				sbuf_printf(sb, "%u%%",
2726				    (u_int)((disk->d_sync.ds_offset_done * 100) /
2727				    sc->sc_provider->mediasize));
2728			}
2729			sbuf_printf(sb, "</Synchronized>\n");
2730		}
2731		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
2732		    disk->d_sync.ds_syncid);
2733		sbuf_printf(sb, "%s<Flags>", indent);
2734		if (disk->d_flags == 0)
2735			sbuf_printf(sb, "NONE");
2736		else {
2737			int first = 1;
2738
2739#define	ADD_FLAG(flag, name)	do {					\
2740	if ((disk->d_flags & (flag)) != 0) {				\
2741		if (!first)						\
2742			sbuf_printf(sb, ", ");				\
2743		else							\
2744			first = 0;					\
2745		sbuf_printf(sb, name);					\
2746	}								\
2747} while (0)
2748			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
2749			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
2750			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
2751			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
2752			    "SYNCHRONIZING");
2753			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
2754#undef	ADD_FLAG
2755		}
2756		sbuf_printf(sb, "</Flags>\n");
2757		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
2758		    disk->d_priority);
2759		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
2760		    g_mirror_disk_state2str(disk->d_state));
2761	} else {
2762		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
2763		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
2764		sbuf_printf(sb, "%s<Flags>", indent);
2765		if (sc->sc_flags == 0)
2766			sbuf_printf(sb, "NONE");
2767		else {
2768			int first = 1;
2769
2770#define	ADD_FLAG(flag, name)	do {					\
2771	if ((sc->sc_flags & (flag)) != 0) {				\
2772		if (!first)						\
2773			sbuf_printf(sb, ", ");				\
2774		else							\
2775			first = 0;					\
2776		sbuf_printf(sb, name);					\
2777	}								\
2778} while (0)
2779			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
2780#undef	ADD_FLAG
2781		}
2782		sbuf_printf(sb, "</Flags>\n");
2783		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
2784		    (u_int)sc->sc_slice);
2785		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
2786		    balance_name(sc->sc_balance));
2787		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
2788		    sc->sc_ndisks);
2789		sbuf_printf(sb, "%s<State>", indent);
2790		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2791			sbuf_printf(sb, "%s", "STARTING");
2792		else if (sc->sc_ndisks ==
2793		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
2794			sbuf_printf(sb, "%s", "COMPLETE");
2795		else
2796			sbuf_printf(sb, "%s", "DEGRADED");
2797		sbuf_printf(sb, "</State>\n");
2798	}
2799}
2800
2801static void
2802g_mirror_shutdown(void *arg, int howto)
2803{
2804	struct g_class *mp;
2805	struct g_geom *gp, *gp2;
2806
2807	mp = arg;
2808	DROP_GIANT();
2809	g_topology_lock();
2810	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
2811		if (gp->softc == NULL)
2812			continue;
2813		g_mirror_destroy(gp->softc, 1);
2814	}
2815	g_topology_unlock();
2816	PICKUP_GIANT();
2817#if 0
2818	tsleep(&gp, PRIBIO, "m:shutdown", hz * 20);
2819#endif
2820}
2821
2822static void
2823g_mirror_init(struct g_class *mp)
2824{
2825
2826	g_mirror_ehtag = EVENTHANDLER_REGISTER(shutdown_post_sync,
2827	    g_mirror_shutdown, mp, SHUTDOWN_PRI_FIRST);
2828	if (g_mirror_ehtag == NULL)
2829		G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event.");
2830}
2831
2832static void
2833g_mirror_fini(struct g_class *mp)
2834{
2835
2836	if (g_mirror_ehtag == NULL)
2837		return;
2838	EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_ehtag);
2839}
2840
2841static int
2842g_mirror_can_go(void)
2843{
2844	struct g_mirror_softc *sc;
2845	struct g_geom *gp;
2846	struct g_provider *pp;
2847	int can_go;
2848
2849	DROP_GIANT();
2850	can_go = 1;
2851	g_topology_lock();
2852	LIST_FOREACH(gp, &g_mirror_class.geom, geom) {
2853		sc = gp->softc;
2854		if (sc == NULL) {
2855			can_go = 0;
2856			break;
2857		}
2858		pp = sc->sc_provider;
2859		if (pp == NULL || pp->error != 0) {
2860			can_go = 0;
2861			break;
2862		}
2863	}
2864	g_topology_unlock();
2865	PICKUP_GIANT();
2866	return (can_go);
2867}
2868
2869static void
2870g_mirror_rootwait(void)
2871{
2872
2873	/*
2874	 * HACK: Wait for GEOM, because g_mirror_rootwait() can be called,
2875	 * HACK: before we get providers for tasting.
2876	 */
2877	tsleep(&g_mirror_class, PRIBIO, "mroot", hz * 3);
2878	/*
2879	 * Wait for mirrors in degraded state.
2880	 */
2881	for (;;) {
2882		if (g_mirror_can_go())
2883			break;
2884		tsleep(&g_mirror_class, PRIBIO, "mroot", hz);
2885	}
2886}
2887
2888SYSINIT(g_mirror_root, SI_SUB_RAID, SI_ORDER_FIRST, g_mirror_rootwait, NULL)
2889
2890DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
2891