g_mirror.c revision 137254
1/*-
2 * Copyright (c) 2004 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/geom/mirror/g_mirror.c 137254 2004-11-05 12:35:21Z pjd $");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/kernel.h>
33#include <sys/module.h>
34#include <sys/limits.h>
35#include <sys/lock.h>
36#include <sys/mutex.h>
37#include <sys/bio.h>
38#include <sys/sysctl.h>
39#include <sys/malloc.h>
40#include <sys/eventhandler.h>
41#include <vm/uma.h>
42#include <geom/geom.h>
43#include <sys/proc.h>
44#include <sys/kthread.h>
45#include <geom/mirror/g_mirror.h>
46
47
48static MALLOC_DEFINE(M_MIRROR, "mirror data", "GEOM_MIRROR Data");
49
50SYSCTL_DECL(_kern_geom);
51SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0, "GEOM_MIRROR stuff");
52u_int g_mirror_debug = 0;
53TUNABLE_INT("kern.geom.mirror.debug", &g_mirror_debug);
54SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RW, &g_mirror_debug, 0,
55    "Debug level");
56static u_int g_mirror_timeout = 4;
57TUNABLE_INT("kern.geom.mirror.timeout", &g_mirror_timeout);
58SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RW, &g_mirror_timeout,
59    0, "Time to wait on all mirror components");
60static u_int g_mirror_idletime = 5;
61TUNABLE_INT("kern.geom.mirror.idletime", &g_mirror_idletime);
62SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RW,
63    &g_mirror_idletime, 0, "Mark components as clean when idling");
64static u_int g_mirror_reqs_per_sync = 5;
65SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, reqs_per_sync, CTLFLAG_RW,
66    &g_mirror_reqs_per_sync, 0,
67    "Number of regular I/O requests per synchronization request");
68static u_int g_mirror_syncs_per_sec = 100;
69SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, syncs_per_sec, CTLFLAG_RW,
70    &g_mirror_syncs_per_sec, 0,
71    "Number of synchronizations requests per second");
72
73#define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
74	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
75	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
76	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
77} while (0)
78
79static eventhandler_tag g_mirror_ehtag = NULL;
80
81static int g_mirror_destroy_geom(struct gctl_req *req, struct g_class *mp,
82    struct g_geom *gp);
83static g_taste_t g_mirror_taste;
84static void g_mirror_init(struct g_class *mp);
85static void g_mirror_fini(struct g_class *mp);
86
87struct g_class g_mirror_class = {
88	.name = G_MIRROR_CLASS_NAME,
89	.version = G_VERSION,
90	.ctlreq = g_mirror_config,
91	.taste = g_mirror_taste,
92	.destroy_geom = g_mirror_destroy_geom,
93	.init = g_mirror_init,
94	.fini = g_mirror_fini
95};
96
97
98static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
99static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
100static void g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force);
101static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
102    struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
103static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
104
105
106static const char *
107g_mirror_disk_state2str(int state)
108{
109
110	switch (state) {
111	case G_MIRROR_DISK_STATE_NONE:
112		return ("NONE");
113	case G_MIRROR_DISK_STATE_NEW:
114		return ("NEW");
115	case G_MIRROR_DISK_STATE_ACTIVE:
116		return ("ACTIVE");
117	case G_MIRROR_DISK_STATE_STALE:
118		return ("STALE");
119	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
120		return ("SYNCHRONIZING");
121	case G_MIRROR_DISK_STATE_DISCONNECTED:
122		return ("DISCONNECTED");
123	case G_MIRROR_DISK_STATE_DESTROY:
124		return ("DESTROY");
125	default:
126		return ("INVALID");
127	}
128}
129
130static const char *
131g_mirror_device_state2str(int state)
132{
133
134	switch (state) {
135	case G_MIRROR_DEVICE_STATE_STARTING:
136		return ("STARTING");
137	case G_MIRROR_DEVICE_STATE_RUNNING:
138		return ("RUNNING");
139	default:
140		return ("INVALID");
141	}
142}
143
144static const char *
145g_mirror_get_diskname(struct g_mirror_disk *disk)
146{
147
148	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
149		return ("[unknown]");
150	return (disk->d_name);
151}
152
153/*
154 * --- Events handling functions ---
155 * Events in geom_mirror are used to maintain disks and device status
156 * from one thread to simplify locking.
157 */
158static void
159g_mirror_event_free(struct g_mirror_event *ep)
160{
161
162	free(ep, M_MIRROR);
163}
164
165int
166g_mirror_event_send(void *arg, int state, int flags)
167{
168	struct g_mirror_softc *sc;
169	struct g_mirror_disk *disk;
170	struct g_mirror_event *ep;
171	int error;
172
173	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
174	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
175	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
176		disk = NULL;
177		sc = arg;
178	} else {
179		disk = arg;
180		sc = disk->d_softc;
181	}
182	ep->e_disk = disk;
183	ep->e_state = state;
184	ep->e_flags = flags;
185	ep->e_error = 0;
186	mtx_lock(&sc->sc_events_mtx);
187	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
188	mtx_unlock(&sc->sc_events_mtx);
189	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
190	mtx_lock(&sc->sc_queue_mtx);
191	wakeup(sc);
192	mtx_unlock(&sc->sc_queue_mtx);
193	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
194		return (0);
195	g_topology_assert();
196	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
197	g_topology_unlock();
198	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
199		mtx_lock(&sc->sc_events_mtx);
200		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
201		    hz * 5);
202	}
203	/* Don't even try to use 'sc' here, because it could be already dead. */
204	g_topology_lock();
205	error = ep->e_error;
206	g_mirror_event_free(ep);
207	return (error);
208}
209
210static struct g_mirror_event *
211g_mirror_event_get(struct g_mirror_softc *sc)
212{
213	struct g_mirror_event *ep;
214
215	mtx_lock(&sc->sc_events_mtx);
216	ep = TAILQ_FIRST(&sc->sc_events);
217	if (ep != NULL)
218		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
219	mtx_unlock(&sc->sc_events_mtx);
220	return (ep);
221}
222
223static void
224g_mirror_event_cancel(struct g_mirror_disk *disk)
225{
226	struct g_mirror_softc *sc;
227	struct g_mirror_event *ep, *tmpep;
228
229	g_topology_assert();
230
231	sc = disk->d_softc;
232	mtx_lock(&sc->sc_events_mtx);
233	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
234		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
235			continue;
236		if (ep->e_disk != disk)
237			continue;
238		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
239		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
240			g_mirror_event_free(ep);
241		else {
242			ep->e_error = ECANCELED;
243			wakeup(ep);
244		}
245	}
246	mtx_unlock(&sc->sc_events_mtx);
247}
248
249/*
250 * Return the number of disks in given state.
251 * If state is equal to -1, count all connected disks.
252 */
253u_int
254g_mirror_ndisks(struct g_mirror_softc *sc, int state)
255{
256	struct g_mirror_disk *disk;
257	u_int n = 0;
258
259	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
260		if (state == -1 || disk->d_state == state)
261			n++;
262	}
263	return (n);
264}
265
266/*
267 * Find a disk in mirror by its disk ID.
268 */
269static struct g_mirror_disk *
270g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
271{
272	struct g_mirror_disk *disk;
273
274	g_topology_assert();
275
276	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
277		if (disk->d_id == id)
278			return (disk);
279	}
280	return (NULL);
281}
282
283static u_int
284g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
285{
286	struct bio *bp;
287	u_int nreqs = 0;
288
289	mtx_lock(&sc->sc_queue_mtx);
290	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
291		if (bp->bio_from == cp)
292			nreqs++;
293	}
294	mtx_unlock(&sc->sc_queue_mtx);
295	return (nreqs);
296}
297
298static int
299g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
300{
301
302	if (cp->index > 0) {
303		G_MIRROR_DEBUG(2,
304		    "I/O requests for %s exist, can't destroy it now.",
305		    cp->provider->name);
306		return (1);
307	}
308	if (g_mirror_nrequests(sc, cp) > 0) {
309		G_MIRROR_DEBUG(2,
310		    "I/O requests for %s in queue, can't destroy it now.",
311		    cp->provider->name);
312		return (1);
313	}
314	return (0);
315}
316
317static void
318g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
319{
320
321	g_topology_assert();
322
323	cp->private = NULL;
324	if (g_mirror_is_busy(sc, cp))
325		return;
326	G_MIRROR_DEBUG(2, "Consumer %s destroyed.", cp->provider->name);
327	g_detach(cp);
328	g_destroy_consumer(cp);
329}
330
331static int
332g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
333{
334	int error;
335
336	g_topology_assert();
337	KASSERT(disk->d_consumer == NULL,
338	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
339
340	disk->d_consumer = g_new_consumer(disk->d_softc->sc_geom);
341	disk->d_consumer->private = disk;
342	disk->d_consumer->index = 0;
343	error = g_attach(disk->d_consumer, pp);
344	if (error != 0)
345		return (error);
346	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
347	return (0);
348}
349
350static void
351g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
352{
353
354	g_topology_assert();
355
356	if (cp == NULL)
357		return;
358	if (cp->provider != NULL) {
359		G_MIRROR_DEBUG(2, "Disk %s disconnected.", cp->provider->name);
360		if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) {
361			G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
362			    cp->provider->name, -cp->acr, -cp->acw, -cp->ace,
363			    0);
364			g_access(cp, -cp->acr, -cp->acw, -cp->ace);
365		}
366		g_mirror_kill_consumer(sc, cp);
367	} else {
368		g_destroy_consumer(cp);
369	}
370}
371
372/*
373 * Initialize disk. This means allocate memory, create consumer, attach it
374 * to the provider and open access (r1w1e1) to it.
375 */
376static struct g_mirror_disk *
377g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
378    struct g_mirror_metadata *md, int *errorp)
379{
380	struct g_mirror_disk *disk;
381	int error;
382
383	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
384	if (disk == NULL) {
385		error = ENOMEM;
386		goto fail;
387	}
388	disk->d_softc = sc;
389	error = g_mirror_connect_disk(disk, pp);
390	if (error != 0)
391		goto fail;
392	disk->d_id = md->md_did;
393	disk->d_state = G_MIRROR_DISK_STATE_NONE;
394	disk->d_priority = md->md_priority;
395	disk->d_delay.sec = 0;
396	disk->d_delay.frac = 0;
397	binuptime(&disk->d_last_used);
398	disk->d_flags = md->md_dflags;
399	if (md->md_provider[0] != '\0')
400		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
401	disk->d_sync.ds_consumer = NULL;
402	disk->d_sync.ds_offset = md->md_sync_offset;
403	disk->d_sync.ds_offset_done = md->md_sync_offset;
404	disk->d_sync.ds_resync = -1;
405	disk->d_sync.ds_syncid = md->md_syncid;
406	if (errorp != NULL)
407		*errorp = 0;
408	return (disk);
409fail:
410	if (errorp != NULL)
411		*errorp = error;
412	if (disk != NULL) {
413		g_mirror_disconnect_consumer(sc, disk->d_consumer);
414		free(disk, M_MIRROR);
415	}
416	return (NULL);
417}
418
419static void
420g_mirror_destroy_disk(struct g_mirror_disk *disk)
421{
422	struct g_mirror_softc *sc;
423
424	g_topology_assert();
425
426	LIST_REMOVE(disk, d_next);
427	g_mirror_event_cancel(disk);
428	sc = disk->d_softc;
429	if (sc->sc_hint == disk)
430		sc->sc_hint = NULL;
431	switch (disk->d_state) {
432	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
433		g_mirror_sync_stop(disk, 1);
434		/* FALLTHROUGH */
435	case G_MIRROR_DISK_STATE_NEW:
436	case G_MIRROR_DISK_STATE_STALE:
437	case G_MIRROR_DISK_STATE_ACTIVE:
438		g_mirror_disconnect_consumer(sc, disk->d_consumer);
439		free(disk, M_MIRROR);
440		break;
441	default:
442		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
443		    g_mirror_get_diskname(disk),
444		    g_mirror_disk_state2str(disk->d_state)));
445	}
446}
447
448static void
449g_mirror_destroy_device(struct g_mirror_softc *sc)
450{
451	struct g_mirror_disk *disk;
452	struct g_mirror_event *ep;
453	struct g_geom *gp;
454	struct g_consumer *cp, *tmpcp;
455
456	g_topology_assert();
457
458	gp = sc->sc_geom;
459	if (sc->sc_provider != NULL)
460		g_mirror_destroy_provider(sc);
461	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
462	    disk = LIST_FIRST(&sc->sc_disks)) {
463		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
464		g_mirror_update_metadata(disk);
465		g_mirror_destroy_disk(disk);
466	}
467	while ((ep = g_mirror_event_get(sc)) != NULL) {
468		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
469			g_mirror_event_free(ep);
470		else {
471			ep->e_error = ECANCELED;
472			ep->e_flags |= G_MIRROR_EVENT_DONE;
473			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
474			mtx_lock(&sc->sc_events_mtx);
475			wakeup(ep);
476			mtx_unlock(&sc->sc_events_mtx);
477		}
478	}
479	callout_drain(&sc->sc_callout);
480	gp->softc = NULL;
481
482	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
483		g_mirror_disconnect_consumer(sc, cp);
484	}
485	sc->sc_sync.ds_geom->softc = NULL;
486	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
487	mtx_destroy(&sc->sc_queue_mtx);
488	mtx_destroy(&sc->sc_events_mtx);
489	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
490	g_wither_geom(gp, ENXIO);
491}
492
493static void
494g_mirror_orphan(struct g_consumer *cp)
495{
496	struct g_mirror_disk *disk;
497
498	g_topology_assert();
499
500	disk = cp->private;
501	if (disk == NULL)
502		return;
503	disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
504	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
505	    G_MIRROR_EVENT_DONTWAIT);
506}
507
508static void
509g_mirror_spoiled(struct g_consumer *cp)
510{
511	struct g_mirror_disk *disk;
512
513	g_topology_assert();
514
515	disk = cp->private;
516	if (disk == NULL)
517		return;
518	disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
519	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
520	    G_MIRROR_EVENT_DONTWAIT);
521}
522
523/*
524 * Function should return the next active disk on the list.
525 * It is possible that it will be the same disk as given.
526 * If there are no active disks on list, NULL is returned.
527 */
528static __inline struct g_mirror_disk *
529g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
530{
531	struct g_mirror_disk *dp;
532
533	for (dp = LIST_NEXT(disk, d_next); dp != disk;
534	    dp = LIST_NEXT(dp, d_next)) {
535		if (dp == NULL)
536			dp = LIST_FIRST(&sc->sc_disks);
537		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
538			break;
539	}
540	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
541		return (NULL);
542	return (dp);
543}
544
545static struct g_mirror_disk *
546g_mirror_get_disk(struct g_mirror_softc *sc)
547{
548	struct g_mirror_disk *disk;
549
550	if (sc->sc_hint == NULL) {
551		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
552		if (sc->sc_hint == NULL)
553			return (NULL);
554	}
555	disk = sc->sc_hint;
556	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
557		disk = g_mirror_find_next(sc, disk);
558		if (disk == NULL)
559			return (NULL);
560	}
561	sc->sc_hint = g_mirror_find_next(sc, disk);
562	return (disk);
563}
564
565static int
566g_mirror_write_metadata(struct g_mirror_disk *disk,
567    struct g_mirror_metadata *md)
568{
569	struct g_mirror_softc *sc;
570	struct g_consumer *cp;
571	off_t offset, length;
572	u_char *sector;
573	int close = 0, error = 0;
574
575	g_topology_assert();
576
577	sc = disk->d_softc;
578	cp = disk->d_consumer;
579	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
580	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
581	length = cp->provider->sectorsize;
582	offset = cp->provider->mediasize - length;
583	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
584	/*
585	 * Open consumer if it wasn't opened and remember to close it.
586	 */
587	if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
588		error = g_access(cp, 0, 1, 1);
589		G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
590		    cp->provider->name, 0, 1, 1, error);
591		if (error == 0)
592			close = 1;
593#ifdef	INVARIANTS
594	} else {
595		KASSERT(cp->acw > 0 && cp->ace > 0,
596		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
597		    cp->acr, cp->acw, cp->ace));
598#endif
599	}
600	if (error == 0) {
601		if (md != NULL)
602			mirror_metadata_encode(md, sector);
603		g_topology_unlock();
604		error = g_write_data(cp, offset, sector, length);
605		g_topology_lock();
606	}
607	free(sector, M_MIRROR);
608	if (close) {
609		g_access(cp, 0, -1, -1);
610		G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
611		    cp->provider->name, 0, -1, -1, 0);
612	}
613	if (error != 0) {
614		disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
615		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
616		    G_MIRROR_EVENT_DONTWAIT);
617	}
618	return (error);
619}
620
621static int
622g_mirror_clear_metadata(struct g_mirror_disk *disk)
623{
624	int error;
625
626	g_topology_assert();
627	error = g_mirror_write_metadata(disk, NULL);
628	if (error == 0) {
629		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
630		    g_mirror_get_diskname(disk));
631	} else {
632		G_MIRROR_DEBUG(0,
633		    "Cannot clear metadata on disk %s (error=%d).",
634		    g_mirror_get_diskname(disk), error);
635	}
636	return (error);
637}
638
639void
640g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
641    struct g_mirror_metadata *md)
642{
643
644	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
645	md->md_version = G_MIRROR_VERSION;
646	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
647	md->md_mid = sc->sc_id;
648	md->md_all = sc->sc_ndisks;
649	md->md_slice = sc->sc_slice;
650	md->md_balance = sc->sc_balance;
651	md->md_mediasize = sc->sc_mediasize;
652	md->md_sectorsize = sc->sc_sectorsize;
653	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
654	bzero(md->md_provider, sizeof(md->md_provider));
655	if (disk == NULL) {
656		md->md_did = arc4random();
657		md->md_priority = 0;
658		md->md_syncid = 0;
659		md->md_dflags = 0;
660		md->md_sync_offset = 0;
661	} else {
662		md->md_did = disk->d_id;
663		md->md_priority = disk->d_priority;
664		md->md_syncid = disk->d_sync.ds_syncid;
665		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
666		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
667			md->md_sync_offset = disk->d_sync.ds_offset_done;
668		else
669			md->md_sync_offset = 0;
670		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
671			strlcpy(md->md_provider,
672			    disk->d_consumer->provider->name,
673			    sizeof(md->md_provider));
674		}
675	}
676}
677
678void
679g_mirror_update_metadata(struct g_mirror_disk *disk)
680{
681	struct g_mirror_metadata md;
682	int error;
683
684	g_topology_assert();
685	g_mirror_fill_metadata(disk->d_softc, disk, &md);
686	error = g_mirror_write_metadata(disk, &md);
687	if (error == 0) {
688		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
689		    g_mirror_get_diskname(disk));
690	} else {
691		G_MIRROR_DEBUG(0,
692		    "Cannot update metadata on disk %s (error=%d).",
693		    g_mirror_get_diskname(disk), error);
694	}
695}
696
697static void
698g_mirror_bump_syncid(struct g_mirror_softc *sc)
699{
700	struct g_mirror_disk *disk;
701
702	g_topology_assert();
703	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
704	    ("%s called with no active disks (device=%s).", __func__,
705	    sc->sc_name));
706
707	sc->sc_syncid++;
708	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
709	    sc->sc_syncid);
710	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
711		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
712		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
713			disk->d_sync.ds_syncid = sc->sc_syncid;
714			g_mirror_update_metadata(disk);
715		}
716	}
717}
718
719static void
720g_mirror_idle(struct g_mirror_softc *sc)
721{
722	struct g_mirror_disk *disk;
723
724	if (sc->sc_provider == NULL || sc->sc_provider->acw == 0)
725		return;
726	sc->sc_idle = 1;
727	g_topology_lock();
728	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
729		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
730			continue;
731		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
732		    g_mirror_get_diskname(disk), sc->sc_name);
733		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
734		g_mirror_update_metadata(disk);
735	}
736	g_topology_unlock();
737}
738
739static void
740g_mirror_unidle(struct g_mirror_softc *sc)
741{
742	struct g_mirror_disk *disk;
743
744	sc->sc_idle = 0;
745	g_topology_lock();
746	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
747		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
748			continue;
749		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
750		    g_mirror_get_diskname(disk), sc->sc_name);
751		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
752		g_mirror_update_metadata(disk);
753	}
754	g_topology_unlock();
755}
756
757static __inline int
758bintime_cmp(struct bintime *bt1, struct bintime *bt2)
759{
760
761	if (bt1->sec < bt2->sec)
762		return (-1);
763	else if (bt1->sec > bt2->sec)
764		return (1);
765	if (bt1->frac < bt2->frac)
766		return (-1);
767	else if (bt1->frac > bt2->frac)
768		return (1);
769	return (0);
770}
771
772static void
773g_mirror_update_delay(struct g_mirror_disk *disk, struct bio *bp)
774{
775
776	if (disk->d_softc->sc_balance != G_MIRROR_BALANCE_LOAD)
777		return;
778	binuptime(&disk->d_delay);
779	bintime_sub(&disk->d_delay, &bp->bio_t0);
780}
781
782static void
783g_mirror_done(struct bio *bp)
784{
785	struct g_mirror_softc *sc;
786
787	sc = bp->bio_from->geom->softc;
788	bp->bio_cflags |= G_MIRROR_BIO_FLAG_REGULAR;
789	mtx_lock(&sc->sc_queue_mtx);
790	bioq_disksort(&sc->sc_queue, bp);
791	wakeup(sc);
792	mtx_unlock(&sc->sc_queue_mtx);
793}
794
795static void
796g_mirror_regular_request(struct bio *bp)
797{
798	struct g_mirror_softc *sc;
799	struct g_mirror_disk *disk;
800	struct bio *pbp;
801
802	g_topology_assert_not();
803
804	bp->bio_from->index--;
805	pbp = bp->bio_parent;
806	sc = pbp->bio_to->geom->softc;
807	disk = bp->bio_from->private;
808	if (disk == NULL) {
809		g_topology_lock();
810		g_mirror_kill_consumer(sc, bp->bio_from);
811		g_topology_unlock();
812	} else {
813		g_mirror_update_delay(disk, bp);
814	}
815
816	pbp->bio_inbed++;
817	KASSERT(pbp->bio_inbed <= pbp->bio_children,
818	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
819	    pbp->bio_children));
820	if (bp->bio_error == 0 && pbp->bio_error == 0) {
821		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
822		g_destroy_bio(bp);
823		if (pbp->bio_children == pbp->bio_inbed) {
824			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
825			pbp->bio_completed = pbp->bio_length;
826			g_io_deliver(pbp, pbp->bio_error);
827		}
828		return;
829	} else if (bp->bio_error != 0) {
830		if (pbp->bio_error == 0)
831			pbp->bio_error = bp->bio_error;
832		G_MIRROR_LOGREQ(0, bp, "Request failed (error=%d).",
833		    bp->bio_error);
834		if (disk != NULL) {
835			sc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
836			g_mirror_event_send(disk,
837			    G_MIRROR_DISK_STATE_DISCONNECTED,
838			    G_MIRROR_EVENT_DONTWAIT);
839		}
840		switch (pbp->bio_cmd) {
841		case BIO_DELETE:
842		case BIO_WRITE:
843			pbp->bio_inbed--;
844			pbp->bio_children--;
845			break;
846		}
847	}
848	g_destroy_bio(bp);
849
850	switch (pbp->bio_cmd) {
851	case BIO_READ:
852		if (pbp->bio_children == pbp->bio_inbed) {
853			pbp->bio_error = 0;
854			mtx_lock(&sc->sc_queue_mtx);
855			bioq_disksort(&sc->sc_queue, pbp);
856			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
857			wakeup(sc);
858			mtx_unlock(&sc->sc_queue_mtx);
859		}
860		break;
861	case BIO_DELETE:
862	case BIO_WRITE:
863		if (pbp->bio_children == 0) {
864			/*
865			 * All requests failed.
866			 */
867		} else if (pbp->bio_inbed < pbp->bio_children) {
868			/* Do nothing. */
869			break;
870		} else if (pbp->bio_children == pbp->bio_inbed) {
871			/* Some requests succeeded. */
872			pbp->bio_error = 0;
873			pbp->bio_completed = pbp->bio_length;
874		}
875		g_io_deliver(pbp, pbp->bio_error);
876		break;
877	default:
878		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
879		break;
880	}
881}
882
883static void
884g_mirror_sync_done(struct bio *bp)
885{
886	struct g_mirror_softc *sc;
887
888	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
889	sc = bp->bio_from->geom->softc;
890	bp->bio_cflags |= G_MIRROR_BIO_FLAG_SYNC;
891	mtx_lock(&sc->sc_queue_mtx);
892	bioq_disksort(&sc->sc_queue, bp);
893	wakeup(sc);
894	mtx_unlock(&sc->sc_queue_mtx);
895}
896
897static void
898g_mirror_start(struct bio *bp)
899{
900	struct g_mirror_softc *sc;
901
902	sc = bp->bio_to->geom->softc;
903	/*
904	 * If sc == NULL or there are no valid disks, provider's error
905	 * should be set and g_mirror_start() should not be called at all.
906	 */
907	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
908	    ("Provider's error should be set (error=%d)(mirror=%s).",
909	    bp->bio_to->error, bp->bio_to->name));
910	G_MIRROR_LOGREQ(3, bp, "Request received.");
911
912	switch (bp->bio_cmd) {
913	case BIO_READ:
914	case BIO_WRITE:
915	case BIO_DELETE:
916		break;
917	case BIO_GETATTR:
918	default:
919		g_io_deliver(bp, EOPNOTSUPP);
920		return;
921	}
922	mtx_lock(&sc->sc_queue_mtx);
923	bioq_disksort(&sc->sc_queue, bp);
924	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
925	wakeup(sc);
926	mtx_unlock(&sc->sc_queue_mtx);
927}
928
929/*
930 * Send one synchronization request.
931 */
932static void
933g_mirror_sync_one(struct g_mirror_disk *disk)
934{
935	struct g_mirror_softc *sc;
936	struct bio *bp;
937
938	sc = disk->d_softc;
939	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
940	    ("Disk %s is not marked for synchronization.",
941	    g_mirror_get_diskname(disk)));
942
943	bp = g_new_bio();
944	if (bp == NULL)
945		return;
946	bp->bio_parent = NULL;
947	bp->bio_cmd = BIO_READ;
948	bp->bio_offset = disk->d_sync.ds_offset;
949	bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
950	bp->bio_cflags = 0;
951	bp->bio_done = g_mirror_sync_done;
952	bp->bio_data = disk->d_sync.ds_data;
953	if (bp->bio_data == NULL) {
954		g_destroy_bio(bp);
955		return;
956	}
957	disk->d_sync.ds_offset += bp->bio_length;
958	bp->bio_to = sc->sc_provider;
959	G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
960	disk->d_sync.ds_consumer->index++;
961	g_io_request(bp, disk->d_sync.ds_consumer);
962}
963
964static void
965g_mirror_sync_request(struct bio *bp)
966{
967	struct g_mirror_softc *sc;
968	struct g_mirror_disk *disk;
969
970	bp->bio_from->index--;
971	sc = bp->bio_from->geom->softc;
972	disk = bp->bio_from->private;
973	if (disk == NULL) {
974		g_topology_lock();
975		g_mirror_kill_consumer(sc, bp->bio_from);
976		g_topology_unlock();
977		g_destroy_bio(bp);
978		return;
979	}
980
981	/*
982	 * Synchronization request.
983	 */
984	switch (bp->bio_cmd) {
985	case BIO_READ:
986	    {
987		struct g_consumer *cp;
988
989		if (bp->bio_error != 0) {
990			G_MIRROR_LOGREQ(0, bp,
991			    "Synchronization request failed (error=%d).",
992			    bp->bio_error);
993			g_destroy_bio(bp);
994			return;
995		}
996		G_MIRROR_LOGREQ(3, bp,
997		    "Synchronization request half-finished.");
998		bp->bio_cmd = BIO_WRITE;
999		bp->bio_cflags = 0;
1000		cp = disk->d_consumer;
1001		KASSERT(cp->acr == 0 && cp->acw == 1 && cp->ace == 1,
1002		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1003		    cp->acr, cp->acw, cp->ace));
1004		cp->index++;
1005		g_io_request(bp, cp);
1006		return;
1007	    }
1008	case BIO_WRITE:
1009	    {
1010		struct g_mirror_disk_sync *sync;
1011
1012		if (bp->bio_error != 0) {
1013			G_MIRROR_LOGREQ(0, bp,
1014			    "Synchronization request failed (error=%d).",
1015			    bp->bio_error);
1016			g_destroy_bio(bp);
1017			sc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
1018			g_mirror_event_send(disk,
1019			    G_MIRROR_DISK_STATE_DISCONNECTED,
1020			    G_MIRROR_EVENT_DONTWAIT);
1021			return;
1022		}
1023		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
1024		sync = &disk->d_sync;
1025		sync->ds_offset_done = bp->bio_offset + bp->bio_length;
1026		g_destroy_bio(bp);
1027		if (sync->ds_resync != -1)
1028			break;
1029		if (sync->ds_offset_done == sc->sc_provider->mediasize) {
1030			/*
1031			 * Disk up-to-date, activate it.
1032			 */
1033			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
1034			    G_MIRROR_EVENT_DONTWAIT);
1035			return;
1036		} else if (sync->ds_offset_done % (MAXPHYS * 100) == 0) {
1037			/*
1038			 * Update offset_done on every 100 blocks.
1039			 * XXX: This should be configurable.
1040			 */
1041			g_topology_lock();
1042			g_mirror_update_metadata(disk);
1043			g_topology_unlock();
1044		}
1045		return;
1046	    }
1047	default:
1048		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1049		    bp->bio_cmd, sc->sc_name));
1050		break;
1051	}
1052}
1053
1054static void
1055g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
1056{
1057	struct g_mirror_disk *disk;
1058	struct g_consumer *cp;
1059	struct bio *cbp;
1060
1061	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1062		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
1063			break;
1064	}
1065	if (disk == NULL) {
1066		if (bp->bio_error == 0)
1067			bp->bio_error = ENXIO;
1068		g_io_deliver(bp, bp->bio_error);
1069		return;
1070	}
1071	cbp = g_clone_bio(bp);
1072	if (cbp == NULL) {
1073		if (bp->bio_error == 0)
1074			bp->bio_error = ENOMEM;
1075		g_io_deliver(bp, bp->bio_error);
1076		return;
1077	}
1078	/*
1079	 * Fill in the component buf structure.
1080	 */
1081	cp = disk->d_consumer;
1082	cbp->bio_done = g_mirror_done;
1083	cbp->bio_to = cp->provider;
1084	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1085	KASSERT(cp->acr > 0 && cp->ace > 0,
1086	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1087	    cp->acw, cp->ace));
1088	cp->index++;
1089	g_io_request(cbp, cp);
1090}
1091
1092static void
1093g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
1094{
1095	struct g_mirror_disk *disk;
1096	struct g_consumer *cp;
1097	struct bio *cbp;
1098
1099	disk = g_mirror_get_disk(sc);
1100	if (disk == NULL) {
1101		if (bp->bio_error == 0)
1102			bp->bio_error = ENXIO;
1103		g_io_deliver(bp, bp->bio_error);
1104		return;
1105	}
1106	cbp = g_clone_bio(bp);
1107	if (cbp == NULL) {
1108		if (bp->bio_error == 0)
1109			bp->bio_error = ENOMEM;
1110		g_io_deliver(bp, bp->bio_error);
1111		return;
1112	}
1113	/*
1114	 * Fill in the component buf structure.
1115	 */
1116	cp = disk->d_consumer;
1117	cbp->bio_done = g_mirror_done;
1118	cbp->bio_to = cp->provider;
1119	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1120	KASSERT(cp->acr > 0 && cp->ace > 0,
1121	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1122	    cp->acw, cp->ace));
1123	cp->index++;
1124	g_io_request(cbp, cp);
1125}
1126
1127static void
1128g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
1129{
1130	struct g_mirror_disk *disk, *dp;
1131	struct g_consumer *cp;
1132	struct bio *cbp;
1133	struct bintime curtime;
1134
1135	binuptime(&curtime);
1136	/*
1137	 * Find a disk which the smallest load.
1138	 */
1139	disk = NULL;
1140	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1141		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1142			continue;
1143		/* If disk wasn't used for more than 2 sec, use it. */
1144		if (curtime.sec - dp->d_last_used.sec >= 2) {
1145			disk = dp;
1146			break;
1147		}
1148		if (disk == NULL ||
1149		    bintime_cmp(&dp->d_delay, &disk->d_delay) < 0) {
1150			disk = dp;
1151		}
1152	}
1153	cbp = g_clone_bio(bp);
1154	if (cbp == NULL) {
1155		if (bp->bio_error == 0)
1156			bp->bio_error = ENOMEM;
1157		g_io_deliver(bp, bp->bio_error);
1158		return;
1159	}
1160	/*
1161	 * Fill in the component buf structure.
1162	 */
1163	cp = disk->d_consumer;
1164	cbp->bio_done = g_mirror_done;
1165	cbp->bio_to = cp->provider;
1166	binuptime(&disk->d_last_used);
1167	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1168	KASSERT(cp->acr > 0 && cp->ace > 0,
1169	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1170	    cp->acw, cp->ace));
1171	cp->index++;
1172	g_io_request(cbp, cp);
1173}
1174
1175static void
1176g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
1177{
1178	struct bio_queue_head queue;
1179	struct g_mirror_disk *disk;
1180	struct g_consumer *cp;
1181	struct bio *cbp;
1182	off_t left, mod, offset, slice;
1183	u_char *data;
1184	u_int ndisks;
1185
1186	if (bp->bio_length <= sc->sc_slice) {
1187		g_mirror_request_round_robin(sc, bp);
1188		return;
1189	}
1190	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
1191	slice = bp->bio_length / ndisks;
1192	mod = slice % sc->sc_provider->sectorsize;
1193	if (mod != 0)
1194		slice += sc->sc_provider->sectorsize - mod;
1195	/*
1196	 * Allocate all bios before sending any request, so we can
1197	 * return ENOMEM in nice and clean way.
1198	 */
1199	left = bp->bio_length;
1200	offset = bp->bio_offset;
1201	data = bp->bio_data;
1202	bioq_init(&queue);
1203	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1204		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1205			continue;
1206		cbp = g_clone_bio(bp);
1207		if (cbp == NULL) {
1208			for (cbp = bioq_first(&queue); cbp != NULL;
1209			    cbp = bioq_first(&queue)) {
1210				bioq_remove(&queue, cbp);
1211				g_destroy_bio(cbp);
1212			}
1213			if (bp->bio_error == 0)
1214				bp->bio_error = ENOMEM;
1215			g_io_deliver(bp, bp->bio_error);
1216			return;
1217		}
1218		bioq_insert_tail(&queue, cbp);
1219		cbp->bio_done = g_mirror_done;
1220		cbp->bio_caller1 = disk;
1221		cbp->bio_to = disk->d_consumer->provider;
1222		cbp->bio_offset = offset;
1223		cbp->bio_data = data;
1224		cbp->bio_length = MIN(left, slice);
1225		left -= cbp->bio_length;
1226		if (left == 0)
1227			break;
1228		offset += cbp->bio_length;
1229		data += cbp->bio_length;
1230	}
1231	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
1232		bioq_remove(&queue, cbp);
1233		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1234		disk = cbp->bio_caller1;
1235		cbp->bio_caller1 = NULL;
1236		cp = disk->d_consumer;
1237		KASSERT(cp->acr > 0 && cp->ace > 0,
1238		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1239		    cp->acr, cp->acw, cp->ace));
1240		disk->d_consumer->index++;
1241		g_io_request(cbp, disk->d_consumer);
1242	}
1243}
1244
1245static void
1246g_mirror_register_request(struct bio *bp)
1247{
1248	struct g_mirror_softc *sc;
1249
1250	sc = bp->bio_to->geom->softc;
1251	switch (bp->bio_cmd) {
1252	case BIO_READ:
1253		switch (sc->sc_balance) {
1254		case G_MIRROR_BALANCE_LOAD:
1255			g_mirror_request_load(sc, bp);
1256			break;
1257		case G_MIRROR_BALANCE_PREFER:
1258			g_mirror_request_prefer(sc, bp);
1259			break;
1260		case G_MIRROR_BALANCE_ROUND_ROBIN:
1261			g_mirror_request_round_robin(sc, bp);
1262			break;
1263		case G_MIRROR_BALANCE_SPLIT:
1264			g_mirror_request_split(sc, bp);
1265			break;
1266		}
1267		return;
1268	case BIO_WRITE:
1269	case BIO_DELETE:
1270	    {
1271		struct g_mirror_disk *disk;
1272		struct g_mirror_disk_sync *sync;
1273		struct bio_queue_head queue;
1274		struct g_consumer *cp;
1275		struct bio *cbp;
1276
1277		if (sc->sc_idle)
1278			g_mirror_unidle(sc);
1279		/*
1280		 * Allocate all bios before sending any request, so we can
1281		 * return ENOMEM in nice and clean way.
1282		 */
1283		bioq_init(&queue);
1284		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1285			sync = &disk->d_sync;
1286			switch (disk->d_state) {
1287			case G_MIRROR_DISK_STATE_ACTIVE:
1288				break;
1289			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1290				if (bp->bio_offset >= sync->ds_offset)
1291					continue;
1292				else if (bp->bio_offset + bp->bio_length >
1293				    sync->ds_offset_done &&
1294				    (bp->bio_offset < sync->ds_resync ||
1295				     sync->ds_resync == -1)) {
1296					sync->ds_resync = bp->bio_offset -
1297					    (bp->bio_offset % MAXPHYS);
1298				}
1299				break;
1300			default:
1301				continue;
1302			}
1303			cbp = g_clone_bio(bp);
1304			if (cbp == NULL) {
1305				for (cbp = bioq_first(&queue); cbp != NULL;
1306				    cbp = bioq_first(&queue)) {
1307					bioq_remove(&queue, cbp);
1308					g_destroy_bio(cbp);
1309				}
1310				if (bp->bio_error == 0)
1311					bp->bio_error = ENOMEM;
1312				g_io_deliver(bp, bp->bio_error);
1313				return;
1314			}
1315			bioq_insert_tail(&queue, cbp);
1316			cbp->bio_done = g_mirror_done;
1317			cp = disk->d_consumer;
1318			cbp->bio_caller1 = cp;
1319			cbp->bio_to = cp->provider;
1320			KASSERT(cp->acw > 0 && cp->ace > 0,
1321			    ("Consumer %s not opened (r%dw%de%d).",
1322			    cp->provider->name, cp->acr, cp->acw, cp->ace));
1323		}
1324		for (cbp = bioq_first(&queue); cbp != NULL;
1325		    cbp = bioq_first(&queue)) {
1326			bioq_remove(&queue, cbp);
1327			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1328			cp = cbp->bio_caller1;
1329			cbp->bio_caller1 = NULL;
1330			cp->index++;
1331			g_io_request(cbp, cp);
1332		}
1333		/*
1334		 * Bump syncid on first write.
1335		 */
1336		if (sc->sc_bump_syncid == G_MIRROR_BUMP_ON_FIRST_WRITE) {
1337			sc->sc_bump_syncid = 0;
1338			g_topology_lock();
1339			g_mirror_bump_syncid(sc);
1340			g_topology_unlock();
1341		}
1342		return;
1343	    }
1344	default:
1345		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1346		    bp->bio_cmd, sc->sc_name));
1347		break;
1348	}
1349}
1350
1351static int
1352g_mirror_can_destroy(struct g_mirror_softc *sc)
1353{
1354	struct g_geom *gp;
1355	struct g_consumer *cp;
1356
1357	g_topology_assert();
1358	gp = sc->sc_geom;
1359	LIST_FOREACH(cp, &gp->consumer, consumer) {
1360		if (g_mirror_is_busy(sc, cp))
1361			return (0);
1362	}
1363	gp = sc->sc_sync.ds_geom;
1364	LIST_FOREACH(cp, &gp->consumer, consumer) {
1365		if (g_mirror_is_busy(sc, cp))
1366			return (0);
1367	}
1368	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
1369	    sc->sc_name);
1370	return (1);
1371}
1372
1373static int
1374g_mirror_try_destroy(struct g_mirror_softc *sc)
1375{
1376
1377	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WAIT) != 0) {
1378		g_topology_lock();
1379		if (!g_mirror_can_destroy(sc)) {
1380			g_topology_unlock();
1381			return (0);
1382		}
1383		g_topology_unlock();
1384		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1385		    &sc->sc_worker);
1386		wakeup(&sc->sc_worker);
1387		sc->sc_worker = NULL;
1388	} else {
1389		g_topology_lock();
1390		if (!g_mirror_can_destroy(sc)) {
1391			g_topology_unlock();
1392			return (0);
1393		}
1394		g_mirror_destroy_device(sc);
1395		g_topology_unlock();
1396		free(sc, M_MIRROR);
1397	}
1398	return (1);
1399}
1400
1401/*
1402 * Worker thread.
1403 */
1404static void
1405g_mirror_worker(void *arg)
1406{
1407	struct g_mirror_softc *sc;
1408	struct g_mirror_disk *disk;
1409	struct g_mirror_disk_sync *sync;
1410	struct g_mirror_event *ep;
1411	struct bio *bp;
1412	u_int nreqs;
1413
1414	sc = arg;
1415	curthread->td_base_pri = PRIBIO;
1416
1417	nreqs = 0;
1418	for (;;) {
1419		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
1420		/*
1421		 * First take a look at events.
1422		 * This is important to handle events before any I/O requests.
1423		 */
1424		ep = g_mirror_event_get(sc);
1425		if (ep != NULL) {
1426			g_topology_lock();
1427			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
1428				/* Update only device status. */
1429				G_MIRROR_DEBUG(3,
1430				    "Running event for device %s.",
1431				    sc->sc_name);
1432				ep->e_error = 0;
1433				g_mirror_update_device(sc, 1);
1434			} else {
1435				/* Update disk status. */
1436				G_MIRROR_DEBUG(3, "Running event for disk %s.",
1437				     g_mirror_get_diskname(ep->e_disk));
1438				ep->e_error = g_mirror_update_disk(ep->e_disk,
1439				    ep->e_state);
1440				if (ep->e_error == 0)
1441					g_mirror_update_device(sc, 0);
1442			}
1443			g_topology_unlock();
1444			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
1445				KASSERT(ep->e_error == 0,
1446				    ("Error cannot be handled."));
1447				g_mirror_event_free(ep);
1448			} else {
1449				ep->e_flags |= G_MIRROR_EVENT_DONE;
1450				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1451				    ep);
1452				mtx_lock(&sc->sc_events_mtx);
1453				wakeup(ep);
1454				mtx_unlock(&sc->sc_events_mtx);
1455			}
1456			if ((sc->sc_flags &
1457			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1458				if (g_mirror_try_destroy(sc))
1459					kthread_exit(0);
1460			}
1461			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
1462			continue;
1463		}
1464		/*
1465		 * Now I/O requests.
1466		 */
1467		/* Get first request from the queue. */
1468		mtx_lock(&sc->sc_queue_mtx);
1469		bp = bioq_first(&sc->sc_queue);
1470		if (bp == NULL) {
1471			if ((sc->sc_flags &
1472			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1473				mtx_unlock(&sc->sc_queue_mtx);
1474				if (g_mirror_try_destroy(sc))
1475					kthread_exit(0);
1476				mtx_lock(&sc->sc_queue_mtx);
1477			}
1478		}
1479		if (sc->sc_sync.ds_ndisks > 0 &&
1480		    (bp == NULL || nreqs > g_mirror_reqs_per_sync)) {
1481			mtx_unlock(&sc->sc_queue_mtx);
1482			/*
1483			 * It is time for synchronization...
1484			 */
1485			nreqs = 0;
1486			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1487				if (disk->d_state !=
1488				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
1489					continue;
1490				}
1491				sync = &disk->d_sync;
1492				if (sync->ds_offset >=
1493				    sc->sc_provider->mediasize) {
1494					continue;
1495				}
1496				if (sync->ds_offset > sync->ds_offset_done)
1497					continue;
1498				if (sync->ds_resync != -1) {
1499					sync->ds_offset = sync->ds_resync;
1500					sync->ds_offset_done = sync->ds_resync;
1501					sync->ds_resync = -1;
1502				}
1503				g_mirror_sync_one(disk);
1504			}
1505			G_MIRROR_DEBUG(5, "%s: I'm here 2.", __func__);
1506			goto sleep;
1507		}
1508		if (bp == NULL) {
1509#define	G_MIRROR_IS_IDLE(sc)	((sc)->sc_idle ||			\
1510				 ((sc)->sc_provider != NULL &&		\
1511				  (sc)->sc_provider->acw == 0))
1512			if (G_MIRROR_IS_IDLE(sc)) {
1513				/*
1514				 * If we're already in idle state, sleep without
1515				 * a timeout.
1516				 */
1517				MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP,
1518				    "m:w1", 0);
1519				G_MIRROR_DEBUG(5, "%s: I'm here 3.", __func__);
1520			} else {
1521				u_int idletime;
1522
1523				idletime = g_mirror_idletime;
1524				if (idletime == 0)
1525					idletime = 1;
1526				idletime *= hz;
1527				if (msleep(sc, &sc->sc_queue_mtx, PRIBIO | PDROP,
1528				    "m:w2", idletime) == EWOULDBLOCK) {
1529					G_MIRROR_DEBUG(5, "%s: I'm here 4.",
1530					    __func__);
1531					/*
1532					 * No I/O requests in 5 seconds, so mark
1533					 * components as clean.
1534					 */
1535					g_mirror_idle(sc);
1536				}
1537				G_MIRROR_DEBUG(5, "%s: I'm here 5.", __func__);
1538			}
1539			continue;
1540		}
1541		nreqs++;
1542		bioq_remove(&sc->sc_queue, bp);
1543		mtx_unlock(&sc->sc_queue_mtx);
1544
1545		if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0) {
1546			g_mirror_regular_request(bp);
1547		} else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
1548			u_int timeout, sps;
1549
1550			g_mirror_sync_request(bp);
1551sleep:
1552			sps = g_mirror_syncs_per_sec;
1553			if (sps == 0) {
1554				G_MIRROR_DEBUG(5, "%s: I'm here 6.", __func__);
1555				continue;
1556			}
1557			mtx_lock(&sc->sc_queue_mtx);
1558			if (bioq_first(&sc->sc_queue) != NULL) {
1559				mtx_unlock(&sc->sc_queue_mtx);
1560				G_MIRROR_DEBUG(5, "%s: I'm here 7.", __func__);
1561				continue;
1562			}
1563			timeout = hz / sps;
1564			if (timeout == 0)
1565				timeout = 1;
1566			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w3",
1567			    timeout);
1568		} else {
1569			g_mirror_register_request(bp);
1570		}
1571		G_MIRROR_DEBUG(5, "%s: I'm here 8.", __func__);
1572	}
1573}
1574
1575/*
1576 * Open disk's consumer if needed.
1577 */
1578static void
1579g_mirror_update_access(struct g_mirror_disk *disk)
1580{
1581	struct g_provider *pp;
1582	struct g_consumer *cp;
1583	int acr, acw, ace, cpw, error;
1584
1585	g_topology_assert();
1586
1587	cp = disk->d_consumer;
1588	pp = disk->d_softc->sc_provider;
1589	if (pp == NULL) {
1590		acr = -cp->acr;
1591		acw = -cp->acw;
1592		ace = -cp->ace;
1593	} else {
1594		acr = pp->acr - cp->acr;
1595		acw = pp->acw - cp->acw;
1596		ace = pp->ace - cp->ace;
1597		/* Grab an extra "exclusive" bit. */
1598		if (pp->acr > 0 || pp->acw > 0 || pp->ace > 0)
1599			ace++;
1600	}
1601	if (acr == 0 && acw == 0 && ace == 0)
1602		return;
1603	cpw = cp->acw;
1604	error = g_access(cp, acr, acw, ace);
1605	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", cp->provider->name, acr,
1606	    acw, ace, error);
1607	if (error != 0) {
1608		disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
1609		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
1610		    G_MIRROR_EVENT_DONTWAIT);
1611		return;
1612	}
1613	if (cpw == 0 && cp->acw > 0) {
1614		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
1615		    g_mirror_get_diskname(disk), disk->d_softc->sc_name);
1616		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1617	} else if (cpw > 0 && cp->acw == 0) {
1618		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
1619		    g_mirror_get_diskname(disk), disk->d_softc->sc_name);
1620		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1621	}
1622}
1623
1624static void
1625g_mirror_sync_start(struct g_mirror_disk *disk)
1626{
1627	struct g_mirror_softc *sc;
1628	struct g_consumer *cp;
1629	int error;
1630
1631	g_topology_assert();
1632
1633	sc = disk->d_softc;
1634	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1635	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
1636	    sc->sc_state));
1637	cp = disk->d_consumer;
1638	KASSERT(cp->acr == 0 && cp->acw == 0 && cp->ace == 0,
1639	    ("Consumer %s already opened.", cp->provider->name));
1640
1641	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
1642	    g_mirror_get_diskname(disk));
1643	error = g_access(cp, 0, 1, 1);
1644	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", cp->provider->name, 0, 1,
1645	    1, error);
1646	if (error != 0) {
1647		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
1648		    G_MIRROR_EVENT_DONTWAIT);
1649		return;
1650	}
1651	disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1652	KASSERT(disk->d_sync.ds_consumer == NULL,
1653	    ("Sync consumer already exists (device=%s, disk=%s).",
1654	    sc->sc_name, g_mirror_get_diskname(disk)));
1655	disk->d_sync.ds_consumer = g_new_consumer(sc->sc_sync.ds_geom);
1656	disk->d_sync.ds_consumer->private = disk;
1657	disk->d_sync.ds_consumer->index = 0;
1658	error = g_attach(disk->d_sync.ds_consumer, disk->d_softc->sc_provider);
1659	KASSERT(error == 0, ("Cannot attach to %s (error=%d).",
1660	    disk->d_softc->sc_name, error));
1661	error = g_access(disk->d_sync.ds_consumer, 1, 0, 0);
1662	KASSERT(error == 0, ("Cannot open %s (error=%d).",
1663	    disk->d_softc->sc_name, error));
1664	disk->d_sync.ds_data = malloc(MAXPHYS, M_MIRROR, M_WAITOK);
1665	sc->sc_sync.ds_ndisks++;
1666}
1667
1668/*
1669 * Stop synchronization process.
1670 * type: 0 - synchronization finished
1671 *       1 - synchronization stopped
1672 */
1673static void
1674g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
1675{
1676	struct g_consumer *cp;
1677
1678	g_topology_assert();
1679	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
1680	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
1681	    g_mirror_disk_state2str(disk->d_state)));
1682	if (disk->d_sync.ds_consumer == NULL)
1683		return;
1684
1685	if (type == 0) {
1686		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
1687		    disk->d_softc->sc_name, g_mirror_get_diskname(disk));
1688	} else /* if (type == 1) */ {
1689		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
1690		    disk->d_softc->sc_name, g_mirror_get_diskname(disk));
1691	}
1692	cp = disk->d_sync.ds_consumer;
1693	g_access(cp, -1, 0, 0);
1694	g_mirror_kill_consumer(disk->d_softc, cp);
1695	free(disk->d_sync.ds_data, M_MIRROR);
1696	disk->d_sync.ds_consumer = NULL;
1697	disk->d_softc->sc_sync.ds_ndisks--;
1698	cp = disk->d_consumer;
1699	KASSERT(cp->acr == 0 && cp->acw == 1 && cp->ace == 1,
1700	    ("Consumer %s not opened.", cp->provider->name));
1701	g_access(cp, 0, -1, -1);
1702	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", cp->provider->name, 0, -1,
1703	    -1, 0);
1704	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1705}
1706
1707static void
1708g_mirror_launch_provider(struct g_mirror_softc *sc)
1709{
1710	struct g_mirror_disk *disk;
1711	struct g_provider *pp;
1712
1713	g_topology_assert();
1714
1715	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
1716	pp->mediasize = sc->sc_mediasize;
1717	pp->sectorsize = sc->sc_sectorsize;
1718	sc->sc_provider = pp;
1719	g_error_provider(pp, 0);
1720	G_MIRROR_DEBUG(0, "Device %s: provider %s launched.", sc->sc_name,
1721	    pp->name);
1722	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1723		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
1724			g_mirror_sync_start(disk);
1725	}
1726}
1727
1728static void
1729g_mirror_destroy_provider(struct g_mirror_softc *sc)
1730{
1731	struct g_mirror_disk *disk;
1732	struct bio *bp;
1733
1734	g_topology_assert();
1735	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
1736	    sc->sc_name));
1737
1738	g_error_provider(sc->sc_provider, ENXIO);
1739	mtx_lock(&sc->sc_queue_mtx);
1740	while ((bp = bioq_first(&sc->sc_queue)) != NULL) {
1741		bioq_remove(&sc->sc_queue, bp);
1742		g_io_deliver(bp, ENXIO);
1743	}
1744	mtx_unlock(&sc->sc_queue_mtx);
1745	G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name,
1746	    sc->sc_provider->name);
1747	sc->sc_provider->flags |= G_PF_WITHER;
1748	g_orphan_provider(sc->sc_provider, ENXIO);
1749	sc->sc_provider = NULL;
1750	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1751		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
1752			g_mirror_sync_stop(disk, 1);
1753	}
1754}
1755
1756static void
1757g_mirror_go(void *arg)
1758{
1759	struct g_mirror_softc *sc;
1760
1761	sc = arg;
1762	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
1763	g_mirror_event_send(sc, 0,
1764	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
1765}
1766
1767static u_int
1768g_mirror_determine_state(struct g_mirror_disk *disk)
1769{
1770	struct g_mirror_softc *sc;
1771	u_int state;
1772
1773	sc = disk->d_softc;
1774	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
1775		if ((disk->d_flags &
1776		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
1777			/* Disk does not need synchronization. */
1778			state = G_MIRROR_DISK_STATE_ACTIVE;
1779		} else {
1780			if ((sc->sc_flags &
1781			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0  ||
1782			    (disk->d_flags &
1783			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
1784				/*
1785				 * We can start synchronization from
1786				 * the stored offset.
1787				 */
1788				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
1789			} else {
1790				state = G_MIRROR_DISK_STATE_STALE;
1791			}
1792		}
1793	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
1794		/*
1795		 * Reset all synchronization data for this disk,
1796		 * because if it even was synchronized, it was
1797		 * synchronized to disks with different syncid.
1798		 */
1799		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
1800		disk->d_sync.ds_offset = 0;
1801		disk->d_sync.ds_offset_done = 0;
1802		disk->d_sync.ds_syncid = sc->sc_syncid;
1803		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
1804		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
1805			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
1806		} else {
1807			state = G_MIRROR_DISK_STATE_STALE;
1808		}
1809	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
1810		/*
1811		 * Not good, NOT GOOD!
1812		 * It means that mirror was started on stale disks
1813		 * and more fresh disk just arrive.
1814		 * If there were writes, mirror is fucked up, sorry.
1815		 * I think the best choice here is don't touch
1816		 * this disk and inform the user laudly.
1817		 */
1818		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
1819		    "disk (%s) arrives!! It will not be connected to the "
1820		    "running device.", sc->sc_name,
1821		    g_mirror_get_diskname(disk));
1822		g_mirror_destroy_disk(disk);
1823		state = G_MIRROR_DISK_STATE_NONE;
1824		/* Return immediately, because disk was destroyed. */
1825		return (state);
1826	}
1827	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
1828	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
1829	return (state);
1830}
1831
1832/*
1833 * Update device state.
1834 */
1835static void
1836g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force)
1837{
1838	struct g_mirror_disk *disk;
1839	u_int state;
1840
1841	g_topology_assert();
1842
1843	switch (sc->sc_state) {
1844	case G_MIRROR_DEVICE_STATE_STARTING:
1845	    {
1846		struct g_mirror_disk *pdisk;
1847		u_int dirty, ndisks, syncid;
1848
1849		KASSERT(sc->sc_provider == NULL,
1850		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
1851		/*
1852		 * Are we ready? We are, if all disks are connected or
1853		 * if we have any disks and 'force' is true.
1854		 */
1855		if ((force && g_mirror_ndisks(sc, -1) > 0) ||
1856		    sc->sc_ndisks == g_mirror_ndisks(sc, -1)) {
1857			;
1858		} else if (g_mirror_ndisks(sc, -1) == 0) {
1859			/*
1860			 * Disks went down in starting phase, so destroy
1861			 * device.
1862			 */
1863			callout_drain(&sc->sc_callout);
1864			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
1865			return;
1866		} else {
1867			return;
1868		}
1869
1870		/*
1871		 * Activate all disks with the biggest syncid.
1872		 */
1873		if (force) {
1874			/*
1875			 * If 'force' is true, we have been called due to
1876			 * timeout, so don't bother canceling timeout.
1877			 */
1878			ndisks = 0;
1879			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1880				if ((disk->d_flags &
1881				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
1882					ndisks++;
1883				}
1884			}
1885			if (ndisks == 0) {
1886				/* No valid disks found, destroy device. */
1887				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
1888				return;
1889			}
1890		} else {
1891			/* Cancel timeout. */
1892			callout_drain(&sc->sc_callout);
1893		}
1894
1895		/*
1896		 * Find disk with the biggest syncid.
1897		 */
1898		syncid = 0;
1899		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1900			if (disk->d_sync.ds_syncid > syncid)
1901				syncid = disk->d_sync.ds_syncid;
1902		}
1903
1904		/*
1905		 * Here we need to look for dirty disks and if all disks
1906		 * with the biggest syncid are dirty, we have to choose
1907		 * one with the biggest priority and rebuild the rest.
1908		 */
1909		/*
1910		 * Find the number of dirty disks with the biggest syncid.
1911		 * Find the number of disks with the biggest syncid.
1912		 * While here, find a disk with the biggest priority.
1913		 */
1914		dirty = ndisks = 0;
1915		pdisk = NULL;
1916		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1917			if (disk->d_sync.ds_syncid != syncid)
1918				continue;
1919			if ((disk->d_flags &
1920			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1921				continue;
1922			}
1923			ndisks++;
1924			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
1925				dirty++;
1926				if (pdisk == NULL ||
1927				    pdisk->d_priority < disk->d_priority) {
1928					pdisk = disk;
1929				}
1930			}
1931		}
1932		if (dirty == 0) {
1933			/* No dirty disks at all, great. */
1934		} else if (dirty == ndisks) {
1935			/*
1936			 * Force synchronization for all dirty disks except one
1937			 * with the biggest priority.
1938			 */
1939			KASSERT(pdisk != NULL, ("pdisk == NULL"));
1940			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
1941			    "master disk for synchronization.",
1942			    g_mirror_get_diskname(pdisk), sc->sc_name);
1943			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1944				if (disk->d_sync.ds_syncid != syncid)
1945					continue;
1946				if ((disk->d_flags &
1947				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1948					continue;
1949				}
1950				KASSERT((disk->d_flags &
1951				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
1952				    ("Disk %s isn't marked as dirty.",
1953				    g_mirror_get_diskname(disk)));
1954				/* Skip the disk with the biggest priority. */
1955				if (disk == pdisk)
1956					continue;
1957				disk->d_sync.ds_syncid = 0;
1958			}
1959		} else if (dirty < ndisks) {
1960			/*
1961			 * Force synchronization for all dirty disks.
1962			 * We have some non-dirty disks.
1963			 */
1964			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1965				if (disk->d_sync.ds_syncid != syncid)
1966					continue;
1967				if ((disk->d_flags &
1968				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1969					continue;
1970				}
1971				if ((disk->d_flags &
1972				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
1973					continue;
1974				}
1975				disk->d_sync.ds_syncid = 0;
1976			}
1977		}
1978
1979		/* Reset hint. */
1980		sc->sc_hint = NULL;
1981		sc->sc_syncid = syncid;
1982		if (force) {
1983			/* Remember to bump syncid on first write. */
1984			sc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
1985		}
1986		state = G_MIRROR_DEVICE_STATE_RUNNING;
1987		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
1988		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
1989		    g_mirror_device_state2str(state));
1990		sc->sc_state = state;
1991		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1992			state = g_mirror_determine_state(disk);
1993			g_mirror_event_send(disk, state,
1994			    G_MIRROR_EVENT_DONTWAIT);
1995			if (state == G_MIRROR_DISK_STATE_STALE) {
1996				sc->sc_bump_syncid =
1997				    G_MIRROR_BUMP_ON_FIRST_WRITE;
1998			}
1999		}
2000		wakeup(&g_mirror_class);
2001		break;
2002	    }
2003	case G_MIRROR_DEVICE_STATE_RUNNING:
2004		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
2005		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2006			/*
2007			 * No active disks or no disks at all,
2008			 * so destroy device.
2009			 */
2010			if (sc->sc_provider != NULL)
2011				g_mirror_destroy_provider(sc);
2012			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2013			break;
2014		} else if (g_mirror_ndisks(sc,
2015		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
2016		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2017			/*
2018			 * We have active disks, launch provider if it doesn't
2019			 * exist.
2020			 */
2021			if (sc->sc_provider == NULL)
2022				g_mirror_launch_provider(sc);
2023		}
2024		/*
2025		 * Bump syncid here, if we need to do it immediately.
2026		 */
2027		if (sc->sc_bump_syncid == G_MIRROR_BUMP_IMMEDIATELY) {
2028			sc->sc_bump_syncid = 0;
2029			g_mirror_bump_syncid(sc);
2030		}
2031		break;
2032	default:
2033		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
2034		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
2035		break;
2036	}
2037}
2038
2039/*
2040 * Update disk state and device state if needed.
2041 */
2042#define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
2043	"Disk %s state changed from %s to %s (device %s).",		\
2044	g_mirror_get_diskname(disk),					\
2045	g_mirror_disk_state2str(disk->d_state),				\
2046	g_mirror_disk_state2str(state), sc->sc_name)
2047static int
2048g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
2049{
2050	struct g_mirror_softc *sc;
2051
2052	g_topology_assert();
2053
2054	sc = disk->d_softc;
2055again:
2056	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
2057	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
2058	    g_mirror_disk_state2str(state));
2059	switch (state) {
2060	case G_MIRROR_DISK_STATE_NEW:
2061		/*
2062		 * Possible scenarios:
2063		 * 1. New disk arrive.
2064		 */
2065		/* Previous state should be NONE. */
2066		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
2067		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2068		    g_mirror_disk_state2str(disk->d_state)));
2069		DISK_STATE_CHANGED();
2070
2071		disk->d_state = state;
2072		if (LIST_EMPTY(&sc->sc_disks))
2073			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
2074		else {
2075			struct g_mirror_disk *dp;
2076
2077			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
2078				if (disk->d_priority >= dp->d_priority) {
2079					LIST_INSERT_BEFORE(dp, disk, d_next);
2080					dp = NULL;
2081					break;
2082				}
2083				if (LIST_NEXT(dp, d_next) == NULL)
2084					break;
2085			}
2086			if (dp != NULL)
2087				LIST_INSERT_AFTER(dp, disk, d_next);
2088		}
2089		G_MIRROR_DEBUG(0, "Device %s: provider %s detected.",
2090		    sc->sc_name, g_mirror_get_diskname(disk));
2091		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2092			break;
2093		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2094		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2095		    g_mirror_device_state2str(sc->sc_state),
2096		    g_mirror_get_diskname(disk),
2097		    g_mirror_disk_state2str(disk->d_state)));
2098		state = g_mirror_determine_state(disk);
2099		if (state != G_MIRROR_DISK_STATE_NONE)
2100			goto again;
2101		break;
2102	case G_MIRROR_DISK_STATE_ACTIVE:
2103		/*
2104		 * Possible scenarios:
2105		 * 1. New disk does not need synchronization.
2106		 * 2. Synchronization process finished successfully.
2107		 */
2108		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2109		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2110		    g_mirror_device_state2str(sc->sc_state),
2111		    g_mirror_get_diskname(disk),
2112		    g_mirror_disk_state2str(disk->d_state)));
2113		/* Previous state should be NEW or SYNCHRONIZING. */
2114		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
2115		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2116		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2117		    g_mirror_disk_state2str(disk->d_state)));
2118		DISK_STATE_CHANGED();
2119
2120		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2121			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2122		else if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2123			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2124			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
2125			g_mirror_sync_stop(disk, 0);
2126		}
2127		disk->d_state = state;
2128		disk->d_sync.ds_offset = 0;
2129		disk->d_sync.ds_offset_done = 0;
2130		g_mirror_update_access(disk);
2131		g_mirror_update_metadata(disk);
2132		G_MIRROR_DEBUG(0, "Device %s: provider %s activated.",
2133		    sc->sc_name, g_mirror_get_diskname(disk));
2134		break;
2135	case G_MIRROR_DISK_STATE_STALE:
2136		/*
2137		 * Possible scenarios:
2138		 * 1. Stale disk was connected.
2139		 */
2140		/* Previous state should be NEW. */
2141		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2142		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2143		    g_mirror_disk_state2str(disk->d_state)));
2144		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2145		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2146		    g_mirror_device_state2str(sc->sc_state),
2147		    g_mirror_get_diskname(disk),
2148		    g_mirror_disk_state2str(disk->d_state)));
2149		/*
2150		 * STALE state is only possible if device is marked
2151		 * NOAUTOSYNC.
2152		 */
2153		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
2154		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2155		    g_mirror_device_state2str(sc->sc_state),
2156		    g_mirror_get_diskname(disk),
2157		    g_mirror_disk_state2str(disk->d_state)));
2158		DISK_STATE_CHANGED();
2159
2160		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2161		disk->d_state = state;
2162		g_mirror_update_metadata(disk);
2163		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
2164		    sc->sc_name, g_mirror_get_diskname(disk));
2165		break;
2166	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
2167		/*
2168		 * Possible scenarios:
2169		 * 1. Disk which needs synchronization was connected.
2170		 */
2171		/* Previous state should be NEW. */
2172		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2173		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2174		    g_mirror_disk_state2str(disk->d_state)));
2175		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2176		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2177		    g_mirror_device_state2str(sc->sc_state),
2178		    g_mirror_get_diskname(disk),
2179		    g_mirror_disk_state2str(disk->d_state)));
2180		DISK_STATE_CHANGED();
2181
2182		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2183			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2184		disk->d_state = state;
2185		if (sc->sc_provider != NULL) {
2186			g_mirror_sync_start(disk);
2187			g_mirror_update_metadata(disk);
2188		}
2189		break;
2190	case G_MIRROR_DISK_STATE_DISCONNECTED:
2191		/*
2192		 * Possible scenarios:
2193		 * 1. Device wasn't running yet, but disk disappear.
2194		 * 2. Disk was active and disapppear.
2195		 * 3. Disk disappear during synchronization process.
2196		 */
2197		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
2198			/*
2199			 * Previous state should be ACTIVE, STALE or
2200			 * SYNCHRONIZING.
2201			 */
2202			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
2203			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
2204			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2205			    ("Wrong disk state (%s, %s).",
2206			    g_mirror_get_diskname(disk),
2207			    g_mirror_disk_state2str(disk->d_state)));
2208		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
2209			/* Previous state should be NEW. */
2210			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2211			    ("Wrong disk state (%s, %s).",
2212			    g_mirror_get_diskname(disk),
2213			    g_mirror_disk_state2str(disk->d_state)));
2214			/*
2215			 * Reset bumping syncid if disk disappeared in STARTING
2216			 * state.
2217			 */
2218			if (sc->sc_bump_syncid == G_MIRROR_BUMP_ON_FIRST_WRITE)
2219				sc->sc_bump_syncid = 0;
2220#ifdef	INVARIANTS
2221		} else {
2222			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2223			    sc->sc_name,
2224			    g_mirror_device_state2str(sc->sc_state),
2225			    g_mirror_get_diskname(disk),
2226			    g_mirror_disk_state2str(disk->d_state)));
2227#endif
2228		}
2229		DISK_STATE_CHANGED();
2230		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
2231		    sc->sc_name, g_mirror_get_diskname(disk));
2232
2233		g_mirror_destroy_disk(disk);
2234		break;
2235	case G_MIRROR_DISK_STATE_DESTROY:
2236	    {
2237		int error;
2238
2239		error = g_mirror_clear_metadata(disk);
2240		if (error != 0)
2241			return (error);
2242		DISK_STATE_CHANGED();
2243		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
2244		    sc->sc_name, g_mirror_get_diskname(disk));
2245
2246		g_mirror_destroy_disk(disk);
2247		sc->sc_ndisks--;
2248		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2249			g_mirror_update_metadata(disk);
2250		}
2251		break;
2252	    }
2253	default:
2254		KASSERT(1 == 0, ("Unknown state (%u).", state));
2255		break;
2256	}
2257	return (0);
2258}
2259#undef	DISK_STATE_CHANGED
2260
2261static int
2262g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
2263{
2264	struct g_provider *pp;
2265	u_char *buf;
2266	int error;
2267
2268	g_topology_assert();
2269
2270	error = g_access(cp, 1, 0, 0);
2271	if (error != 0)
2272		return (error);
2273	pp = cp->provider;
2274	g_topology_unlock();
2275	/* Metadata are stored on last sector. */
2276	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2277	    &error);
2278	g_topology_lock();
2279	if (buf == NULL) {
2280		g_access(cp, -1, 0, 0);
2281		return (error);
2282	}
2283	if (error != 0) {
2284		g_access(cp, -1, 0, 0);
2285		g_free(buf);
2286		return (error);
2287	}
2288	error = g_access(cp, -1, 0, 0);
2289	KASSERT(error == 0, ("Cannot decrease access count for %s.", pp->name));
2290
2291	/* Decode metadata. */
2292	error = mirror_metadata_decode(buf, md);
2293	g_free(buf);
2294	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
2295		return (EINVAL);
2296	if (error != 0) {
2297		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2298		    cp->provider->name);
2299		return (error);
2300	}
2301
2302	return (0);
2303}
2304
2305static int
2306g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
2307    struct g_mirror_metadata *md)
2308{
2309
2310	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
2311		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
2312		    pp->name, md->md_did);
2313		return (EEXIST);
2314	}
2315	if (md->md_all != sc->sc_ndisks) {
2316		G_MIRROR_DEBUG(1,
2317		    "Invalid '%s' field on disk %s (device %s), skipping.",
2318		    "md_all", pp->name, sc->sc_name);
2319		return (EINVAL);
2320	}
2321	if (md->md_slice != sc->sc_slice) {
2322		G_MIRROR_DEBUG(1,
2323		    "Invalid '%s' field on disk %s (device %s), skipping.",
2324		    "md_slice", pp->name, sc->sc_name);
2325		return (EINVAL);
2326	}
2327	if (md->md_balance != sc->sc_balance) {
2328		G_MIRROR_DEBUG(1,
2329		    "Invalid '%s' field on disk %s (device %s), skipping.",
2330		    "md_balance", pp->name, sc->sc_name);
2331		return (EINVAL);
2332	}
2333	if (md->md_mediasize != sc->sc_mediasize) {
2334		G_MIRROR_DEBUG(1,
2335		    "Invalid '%s' field on disk %s (device %s), skipping.",
2336		    "md_mediasize", pp->name, sc->sc_name);
2337		return (EINVAL);
2338	}
2339	if (sc->sc_mediasize > pp->mediasize) {
2340		G_MIRROR_DEBUG(1,
2341		    "Invalid size of disk %s (device %s), skipping.", pp->name,
2342		    sc->sc_name);
2343		return (EINVAL);
2344	}
2345	if (md->md_sectorsize != sc->sc_sectorsize) {
2346		G_MIRROR_DEBUG(1,
2347		    "Invalid '%s' field on disk %s (device %s), skipping.",
2348		    "md_sectorsize", pp->name, sc->sc_name);
2349		return (EINVAL);
2350	}
2351	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2352		G_MIRROR_DEBUG(1,
2353		    "Invalid sector size of disk %s (device %s), skipping.",
2354		    pp->name, sc->sc_name);
2355		return (EINVAL);
2356	}
2357	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
2358		G_MIRROR_DEBUG(1,
2359		    "Invalid device flags on disk %s (device %s), skipping.",
2360		    pp->name, sc->sc_name);
2361		return (EINVAL);
2362	}
2363	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
2364		G_MIRROR_DEBUG(1,
2365		    "Invalid disk flags on disk %s (device %s), skipping.",
2366		    pp->name, sc->sc_name);
2367		return (EINVAL);
2368	}
2369	return (0);
2370}
2371
2372static int
2373g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
2374    struct g_mirror_metadata *md)
2375{
2376	struct g_mirror_disk *disk;
2377	int error;
2378
2379	g_topology_assert();
2380	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
2381
2382	error = g_mirror_check_metadata(sc, pp, md);
2383	if (error != 0)
2384		return (error);
2385	disk = g_mirror_init_disk(sc, pp, md, &error);
2386	if (disk == NULL)
2387		return (error);
2388	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
2389	    G_MIRROR_EVENT_WAIT);
2390	return (error);
2391}
2392
2393static int
2394g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
2395{
2396	struct g_mirror_softc *sc;
2397	struct g_mirror_disk *disk;
2398	int dcr, dcw, dce, err, error;
2399
2400	g_topology_assert();
2401	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
2402	    acw, ace);
2403
2404	dcr = pp->acr + acr;
2405	dcw = pp->acw + acw;
2406	dce = pp->ace + ace;
2407
2408	/* On first open, grab an extra "exclusive" bit */
2409	if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
2410		ace++;
2411	/* ... and let go of it on last close */
2412	if (dcr == 0 && dcw == 0 && dce == 0)
2413		ace--;
2414
2415	sc = pp->geom->softc;
2416	if (sc == NULL || LIST_EMPTY(&sc->sc_disks)) {
2417		if (acr <= 0 && acw <= 0 && ace <= 0)
2418			return (0);
2419		else
2420			return (ENXIO);
2421	}
2422	error = ENXIO;
2423	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2424		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
2425			continue;
2426		err = g_access(disk->d_consumer, acr, acw, ace);
2427		G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
2428		    g_mirror_get_diskname(disk), acr, acw, ace, err);
2429		if (err == 0) {
2430			/*
2431			 * Mark disk as dirty on open and unmark on close.
2432			 */
2433			if (pp->acw == 0 && dcw > 0) {
2434				G_MIRROR_DEBUG(1,
2435				    "Disk %s (device %s) marked as dirty.",
2436				    g_mirror_get_diskname(disk), sc->sc_name);
2437				disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2438				g_mirror_update_metadata(disk);
2439			} else if (pp->acw > 0 && dcw == 0) {
2440				G_MIRROR_DEBUG(1,
2441				    "Disk %s (device %s) marked as clean.",
2442				    g_mirror_get_diskname(disk), sc->sc_name);
2443				disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2444				g_mirror_update_metadata(disk);
2445			}
2446			error = 0;
2447		} else {
2448			sc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
2449			g_mirror_event_send(disk,
2450			    G_MIRROR_DISK_STATE_DISCONNECTED,
2451			    G_MIRROR_EVENT_DONTWAIT);
2452		}
2453	}
2454	/*
2455	 * Be sure to return 0 for negativate access requests.
2456	 * In case of some HW problems, it is possible that we don't have
2457	 * any active disk here, so loop above will be no-op and error will
2458	 * be ENXIO.
2459	 */
2460	if (error != 0 && acr <= 0 && acw <= 0 && ace <= 0)
2461		error = 0;
2462	return (error);
2463}
2464
2465static struct g_geom *
2466g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md)
2467{
2468	struct g_mirror_softc *sc;
2469	struct g_geom *gp;
2470	int error, timeout;
2471
2472	g_topology_assert();
2473	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
2474	    md->md_mid);
2475
2476	/* One disk is minimum. */
2477	if (md->md_all < 1)
2478		return (NULL);
2479	/*
2480	 * Action geom.
2481	 */
2482	gp = g_new_geomf(mp, "%s", md->md_name);
2483	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
2484	gp->start = g_mirror_start;
2485	gp->spoiled = g_mirror_spoiled;
2486	gp->orphan = g_mirror_orphan;
2487	gp->access = g_mirror_access;
2488	gp->dumpconf = g_mirror_dumpconf;
2489
2490	sc->sc_id = md->md_mid;
2491	sc->sc_slice = md->md_slice;
2492	sc->sc_balance = md->md_balance;
2493	sc->sc_mediasize = md->md_mediasize;
2494	sc->sc_sectorsize = md->md_sectorsize;
2495	sc->sc_ndisks = md->md_all;
2496	sc->sc_flags = md->md_mflags;
2497	sc->sc_bump_syncid = 0;
2498	sc->sc_idle = 0;
2499	bioq_init(&sc->sc_queue);
2500	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
2501	LIST_INIT(&sc->sc_disks);
2502	TAILQ_INIT(&sc->sc_events);
2503	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
2504	callout_init(&sc->sc_callout, CALLOUT_MPSAFE);
2505	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
2506	gp->softc = sc;
2507	sc->sc_geom = gp;
2508	sc->sc_provider = NULL;
2509	/*
2510	 * Synchronization geom.
2511	 */
2512	gp = g_new_geomf(mp, "%s.sync", md->md_name);
2513	gp->softc = sc;
2514	gp->orphan = g_mirror_orphan;
2515	sc->sc_sync.ds_geom = gp;
2516	sc->sc_sync.ds_ndisks = 0;
2517	error = kthread_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
2518	    "g_mirror %s", md->md_name);
2519	if (error != 0) {
2520		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
2521		    sc->sc_name);
2522		g_destroy_geom(sc->sc_sync.ds_geom);
2523		mtx_destroy(&sc->sc_events_mtx);
2524		mtx_destroy(&sc->sc_queue_mtx);
2525		g_destroy_geom(sc->sc_geom);
2526		free(sc, M_MIRROR);
2527		return (NULL);
2528	}
2529
2530	G_MIRROR_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id);
2531
2532	/*
2533	 * Run timeout.
2534	 */
2535	timeout = g_mirror_timeout * hz;
2536	callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc);
2537	return (sc->sc_geom);
2538}
2539
2540int
2541g_mirror_destroy(struct g_mirror_softc *sc, boolean_t force)
2542{
2543	struct g_provider *pp;
2544
2545	g_topology_assert();
2546
2547	if (sc == NULL)
2548		return (ENXIO);
2549	pp = sc->sc_provider;
2550	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
2551		if (force) {
2552			G_MIRROR_DEBUG(0, "Device %s is still open, so it "
2553			    "can't be definitely removed.", pp->name);
2554		} else {
2555			G_MIRROR_DEBUG(1,
2556			    "Device %s is still open (r%dw%de%d).", pp->name,
2557			    pp->acr, pp->acw, pp->ace);
2558			return (EBUSY);
2559		}
2560	}
2561
2562	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2563	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_WAIT;
2564	g_topology_unlock();
2565	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
2566	mtx_lock(&sc->sc_queue_mtx);
2567	wakeup(sc);
2568	mtx_unlock(&sc->sc_queue_mtx);
2569	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
2570	while (sc->sc_worker != NULL)
2571		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
2572	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
2573	g_topology_lock();
2574	g_mirror_destroy_device(sc);
2575	free(sc, M_MIRROR);
2576	return (0);
2577}
2578
2579static void
2580g_mirror_taste_orphan(struct g_consumer *cp)
2581{
2582
2583	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
2584	    cp->provider->name));
2585}
2586
2587static struct g_geom *
2588g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
2589{
2590	struct g_mirror_metadata md;
2591	struct g_mirror_softc *sc;
2592	struct g_consumer *cp;
2593	struct g_geom *gp;
2594	int error;
2595
2596	g_topology_assert();
2597	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
2598	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
2599
2600	gp = g_new_geomf(mp, "mirror:taste");
2601	/*
2602	 * This orphan function should be never called.
2603	 */
2604	gp->orphan = g_mirror_taste_orphan;
2605	cp = g_new_consumer(gp);
2606	g_attach(cp, pp);
2607	error = g_mirror_read_metadata(cp, &md);
2608	g_detach(cp);
2609	g_destroy_consumer(cp);
2610	g_destroy_geom(gp);
2611	if (error != 0)
2612		return (NULL);
2613	gp = NULL;
2614
2615	if (md.md_version > G_MIRROR_VERSION) {
2616		printf("geom_mirror.ko module is too old to handle %s.\n",
2617		    pp->name);
2618		return (NULL);
2619	}
2620	if (md.md_provider[0] != '\0' && strcmp(md.md_provider, pp->name) != 0)
2621		return (NULL);
2622	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
2623		G_MIRROR_DEBUG(0,
2624		    "Device %s: provider %s marked as inactive, skipping.",
2625		    md.md_name, pp->name);
2626		return (NULL);
2627	}
2628	if (g_mirror_debug >= 2)
2629		mirror_metadata_dump(&md);
2630
2631	/*
2632	 * Let's check if device already exists.
2633	 */
2634	sc = NULL;
2635	LIST_FOREACH(gp, &mp->geom, geom) {
2636		sc = gp->softc;
2637		if (sc == NULL)
2638			continue;
2639		if (sc->sc_sync.ds_geom == gp)
2640			continue;
2641		if (strcmp(md.md_name, sc->sc_name) != 0)
2642			continue;
2643		if (md.md_mid != sc->sc_id) {
2644			G_MIRROR_DEBUG(0, "Device %s already configured.",
2645			    sc->sc_name);
2646			return (NULL);
2647		}
2648		break;
2649	}
2650	if (gp == NULL) {
2651		gp = g_mirror_create(mp, &md);
2652		if (gp == NULL) {
2653			G_MIRROR_DEBUG(0, "Cannot create device %s.",
2654			    md.md_name);
2655			return (NULL);
2656		}
2657		sc = gp->softc;
2658	}
2659	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
2660	error = g_mirror_add_disk(sc, pp, &md);
2661	if (error != 0) {
2662		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
2663		    pp->name, gp->name, error);
2664		if (LIST_EMPTY(&sc->sc_disks))
2665			g_mirror_destroy(sc, 1);
2666		return (NULL);
2667	}
2668	return (gp);
2669}
2670
2671static int
2672g_mirror_destroy_geom(struct gctl_req *req __unused,
2673    struct g_class *mp __unused, struct g_geom *gp)
2674{
2675
2676	return (g_mirror_destroy(gp->softc, 0));
2677}
2678
2679static void
2680g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
2681    struct g_consumer *cp, struct g_provider *pp)
2682{
2683	struct g_mirror_softc *sc;
2684
2685	g_topology_assert();
2686
2687	sc = gp->softc;
2688	if (sc == NULL)
2689		return;
2690	/* Skip synchronization geom. */
2691	if (gp == sc->sc_sync.ds_geom)
2692		return;
2693	if (pp != NULL) {
2694		/* Nothing here. */
2695	} else if (cp != NULL) {
2696		struct g_mirror_disk *disk;
2697
2698		disk = cp->private;
2699		if (disk == NULL)
2700			return;
2701		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
2702		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2703			sbuf_printf(sb, "%s<Synchronized>", indent);
2704			if (disk->d_sync.ds_offset_done == 0)
2705				sbuf_printf(sb, "0%%");
2706			else {
2707				sbuf_printf(sb, "%u%%",
2708				    (u_int)((disk->d_sync.ds_offset_done * 100) /
2709				    sc->sc_provider->mediasize));
2710			}
2711			sbuf_printf(sb, "</Synchronized>\n");
2712		}
2713		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
2714		    disk->d_sync.ds_syncid);
2715		sbuf_printf(sb, "%s<Flags>", indent);
2716		if (disk->d_flags == 0)
2717			sbuf_printf(sb, "NONE");
2718		else {
2719			int first = 1;
2720
2721#define	ADD_FLAG(flag, name)	do {					\
2722	if ((disk->d_flags & (flag)) != 0) {				\
2723		if (!first)						\
2724			sbuf_printf(sb, ", ");				\
2725		else							\
2726			first = 0;					\
2727		sbuf_printf(sb, name);					\
2728	}								\
2729} while (0)
2730			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
2731			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
2732			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
2733			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
2734			    "SYNCHRONIZING");
2735			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
2736#undef	ADD_FLAG
2737		}
2738		sbuf_printf(sb, "</Flags>\n");
2739		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
2740		    disk->d_priority);
2741		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
2742		    g_mirror_disk_state2str(disk->d_state));
2743	} else {
2744		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
2745		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
2746		sbuf_printf(sb, "%s<Flags>", indent);
2747		if (sc->sc_flags == 0)
2748			sbuf_printf(sb, "NONE");
2749		else {
2750			int first = 1;
2751
2752#define	ADD_FLAG(flag, name)	do {					\
2753	if ((sc->sc_flags & (flag)) != 0) {				\
2754		if (!first)						\
2755			sbuf_printf(sb, ", ");				\
2756		else							\
2757			first = 0;					\
2758		sbuf_printf(sb, name);					\
2759	}								\
2760} while (0)
2761			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
2762#undef	ADD_FLAG
2763		}
2764		sbuf_printf(sb, "</Flags>\n");
2765		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
2766		    (u_int)sc->sc_slice);
2767		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
2768		    balance_name(sc->sc_balance));
2769		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
2770		    sc->sc_ndisks);
2771		sbuf_printf(sb, "%s<State>", indent);
2772		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2773			sbuf_printf(sb, "%s", "STARTING");
2774		else if (sc->sc_ndisks ==
2775		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
2776			sbuf_printf(sb, "%s", "COMPLETE");
2777		else
2778			sbuf_printf(sb, "%s", "DEGRADED");
2779		sbuf_printf(sb, "</State>\n");
2780	}
2781}
2782
2783static void
2784g_mirror_shutdown(void *arg, int howto)
2785{
2786	struct g_class *mp;
2787	struct g_geom *gp, *gp2;
2788
2789	mp = arg;
2790	g_topology_lock();
2791	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
2792		if (gp->softc == NULL)
2793			continue;
2794		g_mirror_destroy(gp->softc, 1);
2795	}
2796	g_topology_unlock();
2797#if 0
2798	tsleep(&gp, PRIBIO, "m:shutdown", hz * 20);
2799#endif
2800}
2801
2802static void
2803g_mirror_init(struct g_class *mp)
2804{
2805
2806	g_mirror_ehtag = EVENTHANDLER_REGISTER(shutdown_post_sync,
2807	    g_mirror_shutdown, mp, SHUTDOWN_PRI_FIRST);
2808	if (g_mirror_ehtag == NULL)
2809		G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event.");
2810}
2811
2812static void
2813g_mirror_fini(struct g_class *mp)
2814{
2815
2816	if (g_mirror_ehtag == NULL)
2817		return;
2818	EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_ehtag);
2819}
2820
2821static int
2822g_mirror_can_go(void)
2823{
2824	struct g_mirror_softc *sc;
2825	struct g_geom *gp;
2826	struct g_provider *pp;
2827	int can_go;
2828
2829	DROP_GIANT();
2830	can_go = 1;
2831	g_topology_lock();
2832	LIST_FOREACH(gp, &g_mirror_class.geom, geom) {
2833		sc = gp->softc;
2834		if (sc == NULL) {
2835			can_go = 0;
2836			break;
2837		}
2838		pp = sc->sc_provider;
2839		if (pp == NULL || pp->error != 0) {
2840			can_go = 0;
2841			break;
2842		}
2843	}
2844	g_topology_unlock();
2845	PICKUP_GIANT();
2846	return (can_go);
2847}
2848
2849static void
2850g_mirror_rootwait(void)
2851{
2852
2853	/*
2854	 * HACK: Wait for GEOM, because g_mirror_rootwait() can be called,
2855	 * HACK: before we get providers for tasting.
2856	 */
2857	tsleep(&g_mirror_class, PRIBIO, "mroot", hz * 3);
2858	/*
2859	 * Wait for mirrors in degraded state.
2860	 */
2861	for (;;) {
2862		if (g_mirror_can_go())
2863			break;
2864		tsleep(&g_mirror_class, PRIBIO, "mroot", hz);
2865	}
2866}
2867
2868SYSINIT(g_mirror_root, SI_SUB_RAID, SI_ORDER_FIRST, g_mirror_rootwait, NULL)
2869
2870DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
2871