g_mirror.c revision 133752
1/*-
2 * Copyright (c) 2004 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/geom/mirror/g_mirror.c 133752 2004-08-15 13:58:29Z pjd $");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/kernel.h>
33#include <sys/module.h>
34#include <sys/limits.h>
35#include <sys/lock.h>
36#include <sys/mutex.h>
37#include <sys/bio.h>
38#include <sys/sysctl.h>
39#include <sys/malloc.h>
40#include <sys/bitstring.h>
41#include <vm/uma.h>
42#include <machine/atomic.h>
43#include <geom/geom.h>
44#include <sys/proc.h>
45#include <sys/kthread.h>
46#include <geom/mirror/g_mirror.h>
47
48
49static MALLOC_DEFINE(M_MIRROR, "mirror data", "GEOM_MIRROR Data");
50
51SYSCTL_DECL(_kern_geom);
52SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0, "GEOM_MIRROR stuff");
53u_int g_mirror_debug = 0;
54SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RW, &g_mirror_debug, 0,
55    "Debug level");
56static u_int g_mirror_timeout = 8;
57SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RW, &g_mirror_timeout,
58    0, "Time to wait on all mirror components");
59static u_int g_mirror_reqs_per_sync = 5;
60SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, reqs_per_sync, CTLFLAG_RW,
61    &g_mirror_reqs_per_sync, 0,
62    "Number of regular I/O requests per synchronization request");
63static u_int g_mirror_syncs_per_sec = 100;
64SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, syncs_per_sec, CTLFLAG_RW,
65    &g_mirror_syncs_per_sec, 0,
66    "Number of synchronizations requests per second");
67
68#define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
69	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
70	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
71	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
72} while (0)
73
74
75static int g_mirror_destroy_geom(struct gctl_req *req, struct g_class *mp,
76    struct g_geom *gp);
77static g_taste_t g_mirror_taste;
78
79struct g_class g_mirror_class = {
80	.name = G_MIRROR_CLASS_NAME,
81	.version = G_VERSION,
82	.ctlreq = g_mirror_config,
83	.taste = g_mirror_taste,
84	.destroy_geom = g_mirror_destroy_geom
85};
86
87
88static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
89static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
90static void g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force);
91static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
92    struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
93static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
94
95
96static const char *
97g_mirror_disk_state2str(int state)
98{
99
100	switch (state) {
101	case G_MIRROR_DISK_STATE_NONE:
102		return ("NONE");
103	case G_MIRROR_DISK_STATE_NEW:
104		return ("NEW");
105	case G_MIRROR_DISK_STATE_ACTIVE:
106		return ("ACTIVE");
107	case G_MIRROR_DISK_STATE_STALE:
108		return ("STALE");
109	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
110		return ("SYNCHRONIZING");
111	case G_MIRROR_DISK_STATE_DISCONNECTED:
112		return ("DISCONNECTED");
113	case G_MIRROR_DISK_STATE_DESTROY:
114		return ("DESTROY");
115	default:
116		return ("INVALID");
117	}
118}
119
120static const char *
121g_mirror_device_state2str(int state)
122{
123
124	switch (state) {
125	case G_MIRROR_DEVICE_STATE_STARTING:
126		return ("STARTING");
127	case G_MIRROR_DEVICE_STATE_RUNNING:
128		return ("RUNNING");
129	default:
130		return ("INVALID");
131	}
132}
133
134static const char *
135g_mirror_get_diskname(struct g_mirror_disk *disk)
136{
137
138	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
139		return ("[unknown]");
140	return (disk->d_name);
141}
142
143/*
144 * --- Events handling functions ---
145 * Events in geom_mirror are used to maintain disks and device status
146 * from one thread to simplify locking.
147 */
148static void
149g_mirror_event_free(struct g_mirror_event *ep)
150{
151
152	free(ep, M_MIRROR);
153}
154
155int
156g_mirror_event_send(void *arg, int state, int flags)
157{
158	struct g_mirror_softc *sc;
159	struct g_mirror_disk *disk;
160	struct g_mirror_event *ep;
161	int error;
162
163	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
164	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
165	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
166		disk = NULL;
167		sc = arg;
168	} else {
169		disk = arg;
170		sc = disk->d_softc;
171	}
172	ep->e_disk = disk;
173	ep->e_state = state;
174	ep->e_flags = flags;
175	ep->e_error = 0;
176	mtx_lock(&sc->sc_events_mtx);
177	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
178	mtx_unlock(&sc->sc_events_mtx);
179	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
180	mtx_lock(&sc->sc_queue_mtx);
181	wakeup(sc);
182	mtx_unlock(&sc->sc_queue_mtx);
183	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
184		return (0);
185	g_topology_assert();
186	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
187	g_topology_unlock();
188	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
189		mtx_lock(&sc->sc_events_mtx);
190		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
191		    hz * 5);
192	}
193	/* Don't even try to use 'sc' here, because it could be already dead. */
194	g_topology_lock();
195	error = ep->e_error;
196	g_mirror_event_free(ep);
197	return (error);
198}
199
200static struct g_mirror_event *
201g_mirror_event_get(struct g_mirror_softc *sc)
202{
203	struct g_mirror_event *ep;
204
205	mtx_lock(&sc->sc_events_mtx);
206	ep = TAILQ_FIRST(&sc->sc_events);
207	if (ep != NULL)
208		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
209	mtx_unlock(&sc->sc_events_mtx);
210	return (ep);
211}
212
213static void
214g_mirror_event_cancel(struct g_mirror_disk *disk)
215{
216	struct g_mirror_softc *sc;
217	struct g_mirror_event *ep, *tmpep;
218
219	g_topology_assert();
220
221	sc = disk->d_softc;
222	mtx_lock(&sc->sc_events_mtx);
223	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
224		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
225			continue;
226		if (ep->e_disk != disk)
227			continue;
228		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
229		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
230			g_mirror_event_free(ep);
231		else {
232			ep->e_error = ECANCELED;
233			wakeup(ep);
234		}
235	}
236	mtx_unlock(&sc->sc_events_mtx);
237}
238
239/*
240 * Return the number of disks in given state.
241 * If state is equal to -1, count all connected disks.
242 */
243u_int
244g_mirror_ndisks(struct g_mirror_softc *sc, int state)
245{
246	struct g_mirror_disk *disk;
247	u_int n = 0;
248
249	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
250		if (state == -1 || disk->d_state == state)
251			n++;
252	}
253	return (n);
254}
255
256/*
257 * Find a disk in mirror by its disk ID.
258 */
259static struct g_mirror_disk *
260g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
261{
262	struct g_mirror_disk *disk;
263
264	g_topology_assert();
265
266	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
267		if (disk->d_id == id)
268			return (disk);
269	}
270	return (NULL);
271}
272
273static u_int
274g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
275{
276	struct bio *bp;
277	u_int nreqs = 0;
278
279	mtx_lock(&sc->sc_queue_mtx);
280	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
281		if (bp->bio_from == cp)
282			nreqs++;
283	}
284	mtx_unlock(&sc->sc_queue_mtx);
285	return (nreqs);
286}
287
288static int
289g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
290{
291
292	if (cp->nstart != cp->nend) {
293		G_MIRROR_DEBUG(2,
294		    "I/O requests for %s exist, can't destroy it now.",
295		    cp->provider->name);
296		return (1);
297	}
298	if (g_mirror_nrequests(sc, cp) > 0) {
299		G_MIRROR_DEBUG(2,
300		    "I/O requests for %s in queue, can't destroy it now.",
301		    cp->provider->name);
302		return (1);
303	}
304	return (0);
305}
306
307static void
308g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
309{
310
311	g_topology_assert();
312
313	cp->private = NULL;
314	if (g_mirror_is_busy(sc, cp))
315		return;
316	G_MIRROR_DEBUG(2, "Consumer %s destroyed.", cp->provider->name);
317	g_detach(cp);
318	g_destroy_consumer(cp);
319}
320
321static int
322g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
323{
324	int error;
325
326	g_topology_assert();
327	KASSERT(disk->d_consumer == NULL,
328	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
329
330	disk->d_consumer = g_new_consumer(disk->d_softc->sc_geom);
331	disk->d_consumer->private = disk;
332	error = g_attach(disk->d_consumer, pp);
333	if (error != 0)
334		return (error);
335	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
336	return (0);
337}
338
339static void
340g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
341{
342
343	g_topology_assert();
344
345	if (cp == NULL)
346		return;
347	if (cp->provider != NULL) {
348		G_MIRROR_DEBUG(2, "Disk %s disconnected.", cp->provider->name);
349		if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) {
350			G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
351			    cp->provider->name, -cp->acr, -cp->acw, -cp->ace,
352			    0);
353			g_access(cp, -cp->acr, -cp->acw, -cp->ace);
354		}
355		g_mirror_kill_consumer(sc, cp);
356	} else {
357		g_destroy_consumer(cp);
358	}
359}
360
361/*
362 * Initialize disk. This means allocate memory, create consumer, attach it
363 * to the provider and open access (r1w1e1) to it.
364 */
365static struct g_mirror_disk *
366g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
367    struct g_mirror_metadata *md, int *errorp)
368{
369	struct g_mirror_disk *disk;
370	int error;
371
372	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
373	if (disk == NULL) {
374		error = ENOMEM;
375		goto fail;
376	}
377	disk->d_softc = sc;
378	error = g_mirror_connect_disk(disk, pp);
379	if (error != 0)
380		goto fail;
381	disk->d_id = md->md_did;
382	disk->d_state = G_MIRROR_DISK_STATE_NONE;
383	disk->d_priority = md->md_priority;
384	disk->d_delay.sec = 0;
385	disk->d_delay.frac = 0;
386	binuptime(&disk->d_last_used);
387	disk->d_flags = md->md_dflags;
388	if (md->md_provider[0] != '\0')
389		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
390	disk->d_sync.ds_consumer = NULL;
391	disk->d_sync.ds_offset = md->md_sync_offset;
392	disk->d_sync.ds_offset_done = md->md_sync_offset;
393	disk->d_sync.ds_syncid = md->md_syncid;
394	if (errorp != NULL)
395		*errorp = 0;
396	return (disk);
397fail:
398	if (errorp != NULL)
399		*errorp = error;
400	if (disk != NULL) {
401		g_mirror_disconnect_consumer(sc, disk->d_consumer);
402		free(disk, M_MIRROR);
403	}
404	return (NULL);
405}
406
407static void
408g_mirror_destroy_disk(struct g_mirror_disk *disk)
409{
410	struct g_mirror_softc *sc;
411
412	g_topology_assert();
413
414	LIST_REMOVE(disk, d_next);
415	g_mirror_event_cancel(disk);
416	sc = disk->d_softc;
417	if (sc->sc_hint == disk)
418		sc->sc_hint = NULL;
419	switch (disk->d_state) {
420	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
421		g_mirror_sync_stop(disk, 1);
422		/* FALLTHROUGH */
423	case G_MIRROR_DISK_STATE_NEW:
424	case G_MIRROR_DISK_STATE_STALE:
425	case G_MIRROR_DISK_STATE_ACTIVE:
426		g_mirror_disconnect_consumer(sc, disk->d_consumer);
427		free(disk, M_MIRROR);
428		break;
429	default:
430		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
431		    g_mirror_get_diskname(disk),
432		    g_mirror_disk_state2str(disk->d_state)));
433	}
434}
435
436static void
437g_mirror_destroy_device(struct g_mirror_softc *sc)
438{
439	struct g_mirror_disk *disk;
440	struct g_mirror_event *ep;
441	struct g_geom *gp;
442	struct g_consumer *cp, *tmpcp;
443
444	g_topology_assert();
445
446	gp = sc->sc_geom;
447	if (sc->sc_provider != NULL)
448		g_mirror_destroy_provider(sc);
449	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
450	    disk = LIST_FIRST(&sc->sc_disks)) {
451		g_mirror_destroy_disk(disk);
452	}
453	while ((ep = g_mirror_event_get(sc)) != NULL) {
454		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
455			g_mirror_event_free(ep);
456		else {
457			ep->e_error = ECANCELED;
458			ep->e_flags |= G_MIRROR_EVENT_DONE;
459			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
460			mtx_lock(&sc->sc_events_mtx);
461			wakeup(ep);
462			mtx_unlock(&sc->sc_events_mtx);
463		}
464	}
465	callout_drain(&sc->sc_callout);
466	gp->softc = NULL;
467
468	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
469		g_mirror_disconnect_consumer(sc, cp);
470	}
471	sc->sc_sync.ds_geom->softc = NULL;
472	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
473	mtx_destroy(&sc->sc_queue_mtx);
474	mtx_destroy(&sc->sc_events_mtx);
475	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
476	g_wither_geom(gp, ENXIO);
477}
478
479static void
480g_mirror_orphan(struct g_consumer *cp)
481{
482	struct g_mirror_disk *disk;
483
484	g_topology_assert();
485
486	disk = cp->private;
487	if (disk == NULL)
488		return;
489	disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
490	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
491	    G_MIRROR_EVENT_DONTWAIT);
492}
493
494static void
495g_mirror_spoiled(struct g_consumer *cp)
496{
497	struct g_mirror_disk *disk;
498
499	g_topology_assert();
500
501	disk = cp->private;
502	if (disk == NULL)
503		return;
504	disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
505	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
506	    G_MIRROR_EVENT_DONTWAIT);
507}
508
509/*
510 * Function should return the next active disk on the list.
511 * It is possible that it will be the same disk as given.
512 * If there are no active disks on list, NULL is returned.
513 */
514static __inline struct g_mirror_disk *
515g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
516{
517	struct g_mirror_disk *dp;
518
519	for (dp = LIST_NEXT(disk, d_next); dp != disk;
520	    dp = LIST_NEXT(dp, d_next)) {
521		if (dp == NULL)
522			dp = LIST_FIRST(&sc->sc_disks);
523		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
524			break;
525	}
526	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
527		return (NULL);
528	return (dp);
529}
530
531static struct g_mirror_disk *
532g_mirror_get_disk(struct g_mirror_softc *sc)
533{
534	struct g_mirror_disk *disk;
535
536	if (sc->sc_hint == NULL) {
537		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
538		if (sc->sc_hint == NULL)
539			return (NULL);
540	}
541	disk = sc->sc_hint;
542	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
543		disk = g_mirror_find_next(sc, disk);
544		if (disk == NULL)
545			return (NULL);
546	}
547	sc->sc_hint = g_mirror_find_next(sc, disk);
548	return (disk);
549}
550
551static int
552g_mirror_write_metadata(struct g_mirror_disk *disk,
553    struct g_mirror_metadata *md)
554{
555	struct g_mirror_softc *sc;
556	struct g_consumer *cp;
557	off_t offset, length;
558	u_char *sector;
559	int close = 0, error = 0;
560
561	g_topology_assert();
562
563	sc = disk->d_softc;
564	cp = disk->d_consumer;
565	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
566	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
567	length = cp->provider->sectorsize;
568	offset = cp->provider->mediasize - length;
569	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
570	/*
571	 * Open consumer if it wasn't opened and remember to close it.
572	 */
573	if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
574		error = g_access(cp, 0, 1, 1);
575		G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
576		    cp->provider->name, 0, 1, 1, error);
577		if (error == 0)
578			close = 1;
579#ifdef	INVARIANTS
580	} else {
581		KASSERT(cp->acw > 0 && cp->ace > 0,
582		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
583		    cp->acr, cp->acw, cp->ace));
584#endif
585	}
586	if (error == 0) {
587		if (md != NULL)
588			mirror_metadata_encode(md, sector);
589		g_topology_unlock();
590		error = g_write_data(cp, offset, sector, length);
591		g_topology_lock();
592	}
593	free(sector, M_MIRROR);
594	if (close) {
595		g_access(cp, 0, -1, -1);
596		G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
597		    cp->provider->name, 0, -1, -1, 0);
598	}
599	if (error != 0) {
600		disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
601		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
602		    G_MIRROR_EVENT_DONTWAIT);
603	}
604	return (error);
605}
606
607static int
608g_mirror_clear_metadata(struct g_mirror_disk *disk)
609{
610	int error;
611
612	g_topology_assert();
613	error = g_mirror_write_metadata(disk, NULL);
614	if (error == 0) {
615		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
616		    g_mirror_get_diskname(disk));
617	} else {
618		G_MIRROR_DEBUG(0,
619		    "Cannot clear metadata on disk %s (error=%d).",
620		    g_mirror_get_diskname(disk), error);
621	}
622	return (error);
623}
624
625void
626g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
627    struct g_mirror_metadata *md)
628{
629
630	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
631	md->md_version = G_MIRROR_VERSION;
632	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
633	md->md_mid = sc->sc_id;
634	md->md_all = sc->sc_ndisks;
635	md->md_slice = sc->sc_slice;
636	md->md_balance = sc->sc_balance;
637	md->md_mediasize = sc->sc_mediasize;
638	md->md_sectorsize = sc->sc_sectorsize;
639	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
640	bzero(md->md_provider, sizeof(md->md_provider));
641	if (disk == NULL) {
642		md->md_did = arc4random();
643		md->md_priority = 0;
644		md->md_syncid = 0;
645		md->md_dflags = 0;
646		md->md_sync_offset = 0;
647	} else {
648		md->md_did = disk->d_id;
649		md->md_priority = disk->d_priority;
650		md->md_syncid = disk->d_sync.ds_syncid;
651		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
652		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
653			md->md_sync_offset = disk->d_sync.ds_offset_done;
654		else
655			md->md_sync_offset = 0;
656		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
657			strlcpy(md->md_provider,
658			    disk->d_consumer->provider->name,
659			    sizeof(md->md_provider));
660		}
661	}
662}
663
664void
665g_mirror_update_metadata(struct g_mirror_disk *disk)
666{
667	struct g_mirror_metadata md;
668	int error;
669
670	g_topology_assert();
671	g_mirror_fill_metadata(disk->d_softc, disk, &md);
672	error = g_mirror_write_metadata(disk, &md);
673	if (error == 0) {
674		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
675		    g_mirror_get_diskname(disk));
676	} else {
677		G_MIRROR_DEBUG(0,
678		    "Cannot update metadata on disk %s (error=%d).",
679		    g_mirror_get_diskname(disk), error);
680	}
681}
682
683static void
684g_mirror_bump_syncid(struct g_mirror_softc *sc)
685{
686	struct g_mirror_disk *disk;
687
688	g_topology_assert();
689	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
690	    ("%s called with no active disks (device=%s).", __func__,
691	    sc->sc_name));
692
693	sc->sc_syncid++;
694	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
695	    sc->sc_syncid);
696	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
697		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
698		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
699			disk->d_sync.ds_syncid = sc->sc_syncid;
700			g_mirror_update_metadata(disk);
701		}
702	}
703}
704
705static __inline int
706bintime_cmp(struct bintime *bt1, struct bintime *bt2)
707{
708
709	if (bt1->sec < bt2->sec)
710		return (-1);
711	else if (bt1->sec > bt2->sec)
712		return (1);
713	if (bt1->frac < bt2->frac)
714		return (-1);
715	else if (bt1->frac > bt2->frac)
716		return (1);
717	return (0);
718}
719
720static void
721g_mirror_update_delay(struct g_mirror_disk *disk, struct bio *bp)
722{
723
724	if (disk->d_softc->sc_balance != G_MIRROR_BALANCE_LOAD)
725		return;
726	binuptime(&disk->d_delay);
727	bintime_sub(&disk->d_delay, &bp->bio_t0);
728}
729
730static void
731g_mirror_done(struct bio *bp)
732{
733	struct g_mirror_softc *sc;
734
735	sc = bp->bio_from->geom->softc;
736	bp->bio_cflags |= G_MIRROR_BIO_FLAG_REGULAR;
737	mtx_lock(&sc->sc_queue_mtx);
738	bioq_disksort(&sc->sc_queue, bp);
739	wakeup(sc);
740	mtx_unlock(&sc->sc_queue_mtx);
741}
742
743static void
744g_mirror_regular_request(struct bio *bp)
745{
746	struct g_mirror_softc *sc;
747	struct g_mirror_disk *disk;
748	struct bio *pbp;
749
750	g_topology_assert_not();
751
752	pbp = bp->bio_parent;
753	sc = pbp->bio_to->geom->softc;
754	disk = bp->bio_from->private;
755	if (disk == NULL) {
756		g_topology_lock();
757		g_mirror_kill_consumer(sc, bp->bio_from);
758		g_topology_unlock();
759	} else {
760		g_mirror_update_delay(disk, bp);
761	}
762
763	pbp->bio_inbed++;
764	KASSERT(pbp->bio_inbed <= pbp->bio_children,
765	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
766	    pbp->bio_children));
767	if (bp->bio_error == 0 && pbp->bio_error == 0) {
768		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
769		g_destroy_bio(bp);
770		if (pbp->bio_children == pbp->bio_inbed) {
771			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
772			pbp->bio_completed = pbp->bio_length;
773			g_io_deliver(pbp, pbp->bio_error);
774		}
775		return;
776	} else if (bp->bio_error != 0) {
777		if (pbp->bio_error == 0)
778			pbp->bio_error = bp->bio_error;
779		G_MIRROR_LOGREQ(0, bp, "Request failed (error=%d).",
780		    bp->bio_error);
781		if (disk != NULL) {
782			sc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
783			g_mirror_event_send(disk,
784			    G_MIRROR_DISK_STATE_DISCONNECTED,
785			    G_MIRROR_EVENT_DONTWAIT);
786		}
787		switch (pbp->bio_cmd) {
788		case BIO_DELETE:
789		case BIO_WRITE:
790			pbp->bio_inbed--;
791			pbp->bio_children--;
792			break;
793		}
794	}
795	g_destroy_bio(bp);
796
797	switch (pbp->bio_cmd) {
798	case BIO_READ:
799		if (pbp->bio_children == pbp->bio_inbed) {
800			pbp->bio_error = 0;
801			mtx_lock(&sc->sc_queue_mtx);
802			bioq_disksort(&sc->sc_queue, pbp);
803			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
804			wakeup(sc);
805			mtx_unlock(&sc->sc_queue_mtx);
806		}
807		break;
808	case BIO_DELETE:
809	case BIO_WRITE:
810		if (pbp->bio_children == 0) {
811			/*
812			 * All requests failed.
813			 */
814		} else if (pbp->bio_inbed < pbp->bio_children) {
815			/* Do nothing. */
816			break;
817		} else if (pbp->bio_children == pbp->bio_inbed) {
818			/* Some requests succeeded. */
819			pbp->bio_error = 0;
820			pbp->bio_completed = pbp->bio_length;
821		}
822		g_io_deliver(pbp, pbp->bio_error);
823		break;
824	default:
825		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
826		break;
827	}
828}
829
830static void
831g_mirror_sync_done(struct bio *bp)
832{
833	struct g_mirror_softc *sc;
834
835	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
836	sc = bp->bio_from->geom->softc;
837	bp->bio_cflags |= G_MIRROR_BIO_FLAG_SYNC;
838	mtx_lock(&sc->sc_queue_mtx);
839	bioq_disksort(&sc->sc_queue, bp);
840	wakeup(sc);
841	mtx_unlock(&sc->sc_queue_mtx);
842}
843
844static void
845g_mirror_start(struct bio *bp)
846{
847	struct g_mirror_softc *sc;
848
849	sc = bp->bio_to->geom->softc;
850	/*
851	 * If sc == NULL or there are no valid disks, provider's error
852	 * should be set and g_mirror_start() should not be called at all.
853	 */
854	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
855	    ("Provider's error should be set (error=%d)(mirror=%s).",
856	    bp->bio_to->error, bp->bio_to->name));
857	G_MIRROR_LOGREQ(3, bp, "Request received.");
858
859	switch (bp->bio_cmd) {
860	case BIO_READ:
861	case BIO_WRITE:
862	case BIO_DELETE:
863		break;
864	case BIO_GETATTR:
865	default:
866		g_io_deliver(bp, EOPNOTSUPP);
867		return;
868	}
869	mtx_lock(&sc->sc_queue_mtx);
870	bioq_disksort(&sc->sc_queue, bp);
871	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
872	wakeup(sc);
873	mtx_unlock(&sc->sc_queue_mtx);
874}
875
876/*
877 * Send one synchronization request.
878 */
879static void
880g_mirror_sync_one(struct g_mirror_disk *disk)
881{
882	struct g_mirror_softc *sc;
883	struct bio *bp;
884
885	sc = disk->d_softc;
886	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
887	    ("Disk %s is not marked for synchronization.",
888	    g_mirror_get_diskname(disk)));
889
890	bp = g_new_bio();
891	if (bp == NULL)
892		return;
893	bp->bio_parent = NULL;
894	bp->bio_cmd = BIO_READ;
895	bp->bio_offset = disk->d_sync.ds_offset;
896	bp->bio_length = MIN(G_MIRROR_SYNC_BLOCK_SIZE,
897	    sc->sc_mediasize - bp->bio_offset);
898	bp->bio_cflags = 0;
899	bp->bio_done = g_mirror_sync_done;
900	bp->bio_data = disk->d_sync.ds_data;
901	if (bp->bio_data == NULL) {
902		g_destroy_bio(bp);
903		return;
904	}
905	disk->d_sync.ds_offset += bp->bio_length;
906	bp->bio_to = sc->sc_provider;
907	G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
908	g_io_request(bp, disk->d_sync.ds_consumer);
909}
910
911static void
912g_mirror_sync_request(struct bio *bp)
913{
914	struct g_mirror_softc *sc;
915	struct g_mirror_disk *disk;
916
917	sc = bp->bio_from->geom->softc;
918	disk = bp->bio_from->private;
919	if (disk == NULL) {
920		g_topology_lock();
921		g_mirror_kill_consumer(sc, bp->bio_from);
922		g_topology_unlock();
923		g_destroy_bio(bp);
924		return;
925	}
926
927	/*
928	 * Synchronization request.
929	 */
930	switch (bp->bio_cmd) {
931	case BIO_READ:
932	    {
933		struct g_consumer *cp;
934
935		if (bp->bio_error != 0) {
936			G_MIRROR_LOGREQ(0, bp,
937			    "Synchronization request failed (error=%d).",
938			    bp->bio_error);
939			g_destroy_bio(bp);
940			return;
941		}
942		bp->bio_cmd = BIO_WRITE;
943		bp->bio_cflags = 0;
944		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
945		cp = disk->d_consumer;
946		KASSERT(cp->acr == 0 && cp->acw == 1 && cp->ace == 1,
947		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
948		    cp->acr, cp->acw, cp->ace));
949		g_io_request(bp, cp);
950		return;
951	    }
952	case BIO_WRITE:
953		if (bp->bio_error != 0) {
954			G_MIRROR_LOGREQ(0, bp,
955			    "Synchronization request failed (error=%d).",
956			    bp->bio_error);
957			g_destroy_bio(bp);
958			sc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
959			g_mirror_event_send(disk,
960			    G_MIRROR_DISK_STATE_DISCONNECTED,
961			    G_MIRROR_EVENT_DONTWAIT);
962			return;
963		}
964		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
965		disk->d_sync.ds_offset_done = bp->bio_offset + bp->bio_length;
966		g_destroy_bio(bp);
967		if (disk->d_sync.ds_offset_done == sc->sc_provider->mediasize) {
968			/*
969			 * Disk up-to-date, activate it.
970			 */
971			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
972			    G_MIRROR_EVENT_DONTWAIT);
973			return;
974		} else if ((disk->d_sync.ds_offset_done %
975		    (G_MIRROR_SYNC_BLOCK_SIZE * 100)) == 0) {
976			/*
977			 * Update offset_done on every 100 blocks.
978			 * XXX: This should be configurable.
979			 */
980			g_topology_lock();
981			g_mirror_update_metadata(disk);
982			g_topology_unlock();
983		}
984		return;
985	default:
986		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
987		    bp->bio_cmd, sc->sc_name));
988		break;
989	}
990}
991
992static void
993g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
994{
995	struct g_mirror_disk *disk;
996	struct g_consumer *cp;
997	struct bio *cbp;
998
999	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1000		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
1001			break;
1002	}
1003	if (disk == NULL) {
1004		if (bp->bio_error == 0)
1005			bp->bio_error = ENXIO;
1006		g_io_deliver(bp, bp->bio_error);
1007		return;
1008	}
1009	cbp = g_clone_bio(bp);
1010	if (cbp == NULL) {
1011		if (bp->bio_error == 0)
1012			bp->bio_error = ENOMEM;
1013		g_io_deliver(bp, bp->bio_error);
1014		return;
1015	}
1016	/*
1017	 * Fill in the component buf structure.
1018	 */
1019	cp = disk->d_consumer;
1020	cbp->bio_done = g_mirror_done;
1021	cbp->bio_to = cp->provider;
1022	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1023	KASSERT(cp->acr > 0 && cp->ace > 0,
1024	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1025	    cp->acw, cp->ace));
1026	g_io_request(cbp, cp);
1027}
1028
1029static void
1030g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
1031{
1032	struct g_mirror_disk *disk;
1033	struct g_consumer *cp;
1034	struct bio *cbp;
1035
1036	disk = g_mirror_get_disk(sc);
1037	if (disk == NULL) {
1038		if (bp->bio_error == 0)
1039			bp->bio_error = ENXIO;
1040		g_io_deliver(bp, bp->bio_error);
1041		return;
1042	}
1043	cbp = g_clone_bio(bp);
1044	if (cbp == NULL) {
1045		if (bp->bio_error == 0)
1046			bp->bio_error = ENOMEM;
1047		g_io_deliver(bp, bp->bio_error);
1048		return;
1049	}
1050	/*
1051	 * Fill in the component buf structure.
1052	 */
1053	cp = disk->d_consumer;
1054	cbp->bio_done = g_mirror_done;
1055	cbp->bio_to = cp->provider;
1056	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1057	KASSERT(cp->acr > 0 && cp->ace > 0,
1058	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1059	    cp->acw, cp->ace));
1060	g_io_request(cbp, cp);
1061}
1062
1063static void
1064g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
1065{
1066	struct g_mirror_disk *disk, *dp;
1067	struct g_consumer *cp;
1068	struct bio *cbp;
1069	struct bintime curtime;
1070
1071	binuptime(&curtime);
1072	/*
1073	 * Find a disk which the smallest load.
1074	 */
1075	disk = NULL;
1076	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1077		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1078			continue;
1079		/* If disk wasn't used for more than 2 sec, use it. */
1080		if (curtime.sec - dp->d_last_used.sec >= 2) {
1081			disk = dp;
1082			break;
1083		}
1084		if (disk == NULL ||
1085		    bintime_cmp(&dp->d_delay, &disk->d_delay) < 0) {
1086			disk = dp;
1087		}
1088	}
1089	cbp = g_clone_bio(bp);
1090	if (cbp == NULL) {
1091		if (bp->bio_error == 0)
1092			bp->bio_error = ENOMEM;
1093		g_io_deliver(bp, bp->bio_error);
1094		return;
1095	}
1096	/*
1097	 * Fill in the component buf structure.
1098	 */
1099	cp = disk->d_consumer;
1100	cbp->bio_done = g_mirror_done;
1101	cbp->bio_to = cp->provider;
1102	binuptime(&disk->d_last_used);
1103	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1104	KASSERT(cp->acr > 0 && cp->ace > 0,
1105	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1106	    cp->acw, cp->ace));
1107	g_io_request(cbp, cp);
1108}
1109
1110static void
1111g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
1112{
1113	struct bio_queue_head queue;
1114	struct g_mirror_disk *disk;
1115	struct g_consumer *cp;
1116	struct bio *cbp;
1117	off_t left, mod, offset, slice;
1118	u_char *data;
1119	u_int ndisks;
1120
1121	if (bp->bio_length <= sc->sc_slice) {
1122		g_mirror_request_round_robin(sc, bp);
1123		return;
1124	}
1125	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
1126	slice = bp->bio_length / ndisks;
1127	mod = slice % sc->sc_provider->sectorsize;
1128	if (mod != 0)
1129		slice += sc->sc_provider->sectorsize - mod;
1130	/*
1131	 * Allocate all bios before sending any request, so we can
1132	 * return ENOMEM in nice and clean way.
1133	 */
1134	left = bp->bio_length;
1135	offset = bp->bio_offset;
1136	data = bp->bio_data;
1137	bioq_init(&queue);
1138	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1139		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1140			continue;
1141		cbp = g_clone_bio(bp);
1142		if (cbp == NULL) {
1143			for (cbp = bioq_first(&queue); cbp != NULL;
1144			    cbp = bioq_first(&queue)) {
1145				bioq_remove(&queue, cbp);
1146				g_destroy_bio(cbp);
1147			}
1148			if (bp->bio_error == 0)
1149				bp->bio_error = ENOMEM;
1150			g_io_deliver(bp, bp->bio_error);
1151			return;
1152		}
1153		bioq_insert_tail(&queue, cbp);
1154		cbp->bio_done = g_mirror_done;
1155		cbp->bio_caller1 = disk;
1156		cbp->bio_to = disk->d_consumer->provider;
1157		cbp->bio_offset = offset;
1158		cbp->bio_data = data;
1159		cbp->bio_length = MIN(left, slice);
1160		left -= cbp->bio_length;
1161		if (left == 0)
1162			break;
1163		offset += cbp->bio_length;
1164		data += cbp->bio_length;
1165	}
1166	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
1167		bioq_remove(&queue, cbp);
1168		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1169		disk = cbp->bio_caller1;
1170		cbp->bio_caller1 = NULL;
1171		cp = disk->d_consumer;
1172		KASSERT(cp->acr > 0 && cp->ace > 0,
1173		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1174		    cp->acr, cp->acw, cp->ace));
1175		g_io_request(cbp, disk->d_consumer);
1176	}
1177}
1178
1179static void
1180g_mirror_register_request(struct bio *bp)
1181{
1182	struct g_mirror_softc *sc;
1183
1184	sc = bp->bio_to->geom->softc;
1185	switch (bp->bio_cmd) {
1186	case BIO_READ:
1187		switch (sc->sc_balance) {
1188		case G_MIRROR_BALANCE_LOAD:
1189			g_mirror_request_load(sc, bp);
1190			break;
1191		case G_MIRROR_BALANCE_PREFER:
1192			g_mirror_request_prefer(sc, bp);
1193			break;
1194		case G_MIRROR_BALANCE_ROUND_ROBIN:
1195			g_mirror_request_round_robin(sc, bp);
1196			break;
1197		case G_MIRROR_BALANCE_SPLIT:
1198			g_mirror_request_split(sc, bp);
1199			break;
1200		}
1201		return;
1202	case BIO_WRITE:
1203	case BIO_DELETE:
1204	    {
1205		struct g_mirror_disk *disk;
1206		struct bio_queue_head queue;
1207		struct g_consumer *cp;
1208		struct bio *cbp;
1209
1210		/*
1211		 * Allocate all bios before sending any request, so we can
1212		 * return ENOMEM in nice and clean way.
1213		 */
1214		bioq_init(&queue);
1215		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1216			switch (disk->d_state) {
1217			case G_MIRROR_DISK_STATE_ACTIVE:
1218				break;
1219			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1220				if (bp->bio_offset >= disk->d_sync.ds_offset)
1221					continue;
1222				break;
1223			default:
1224				continue;
1225			}
1226			cbp = g_clone_bio(bp);
1227			if (cbp == NULL) {
1228				for (cbp = bioq_first(&queue); cbp != NULL;
1229				    cbp = bioq_first(&queue)) {
1230					bioq_remove(&queue, cbp);
1231					g_destroy_bio(cbp);
1232				}
1233				if (bp->bio_error == 0)
1234					bp->bio_error = ENOMEM;
1235				g_io_deliver(bp, bp->bio_error);
1236				return;
1237			}
1238			bioq_insert_tail(&queue, cbp);
1239		}
1240		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1241			switch (disk->d_state) {
1242			case G_MIRROR_DISK_STATE_ACTIVE:
1243				break;
1244			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1245				if (bp->bio_offset >= disk->d_sync.ds_offset)
1246					continue;
1247				break;
1248			default:
1249				continue;
1250			}
1251			cbp = bioq_first(&queue);
1252			KASSERT(cbp != NULL, ("NULL cbp! (device %s).",
1253			    sc->sc_name));
1254			bioq_remove(&queue, cbp);
1255			cp = disk->d_consumer;
1256			cbp->bio_done = g_mirror_done;
1257			cbp->bio_to = cp->provider;
1258			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1259			KASSERT(cp->acw > 0 && cp->ace > 0,
1260			    ("Consumer %s not opened (r%dw%de%d).",
1261			    cp->provider->name, cp->acr, cp->acw, cp->ace));
1262			g_io_request(cbp, cp);
1263		}
1264		/*
1265		 * Bump syncid on first write.
1266		 */
1267		if (sc->sc_bump_syncid == G_MIRROR_BUMP_ON_FIRST_WRITE) {
1268			sc->sc_bump_syncid = 0;
1269			g_topology_lock();
1270			g_mirror_bump_syncid(sc);
1271			g_topology_unlock();
1272		}
1273		return;
1274	    }
1275	default:
1276		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1277		    bp->bio_cmd, sc->sc_name));
1278		break;
1279	}
1280}
1281
1282static int
1283g_mirror_can_destroy(struct g_mirror_softc *sc)
1284{
1285	struct g_geom *gp;
1286	struct g_consumer *cp;
1287
1288	g_topology_assert();
1289	gp = sc->sc_geom;
1290	LIST_FOREACH(cp, &gp->consumer, consumer) {
1291		if (g_mirror_is_busy(sc, cp))
1292			return (0);
1293	}
1294	gp = sc->sc_sync.ds_geom;
1295	LIST_FOREACH(cp, &gp->consumer, consumer) {
1296		if (g_mirror_is_busy(sc, cp))
1297			return (0);
1298	}
1299	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
1300	    sc->sc_name);
1301	return (1);
1302}
1303
1304static int
1305g_mirror_try_destroy(struct g_mirror_softc *sc)
1306{
1307
1308	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WAIT) != 0) {
1309		g_topology_lock();
1310		if (!g_mirror_can_destroy(sc)) {
1311			g_topology_unlock();
1312			return (0);
1313		}
1314		g_topology_unlock();
1315		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1316		    &sc->sc_worker);
1317		wakeup(&sc->sc_worker);
1318		sc->sc_worker = NULL;
1319	} else {
1320		g_topology_lock();
1321		if (!g_mirror_can_destroy(sc)) {
1322			g_topology_unlock();
1323			return (0);
1324		}
1325		g_mirror_destroy_device(sc);
1326		g_topology_unlock();
1327		free(sc, M_MIRROR);
1328	}
1329	return (1);
1330}
1331
1332/*
1333 * Worker thread.
1334 */
1335static void
1336g_mirror_worker(void *arg)
1337{
1338	struct g_mirror_softc *sc;
1339	struct g_mirror_disk *disk;
1340	struct g_mirror_event *ep;
1341	struct bio *bp;
1342	u_int nreqs;
1343
1344	sc = arg;
1345	curthread->td_base_pri = PRIBIO;
1346
1347	nreqs = 0;
1348	for (;;) {
1349		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
1350		/*
1351		 * First take a look at events.
1352		 * This is important to handle events before any I/O requests.
1353		 */
1354		ep = g_mirror_event_get(sc);
1355		if (ep != NULL) {
1356			g_topology_lock();
1357			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
1358				/* Update only device status. */
1359				G_MIRROR_DEBUG(3,
1360				    "Running event for device %s.",
1361				    sc->sc_name);
1362				ep->e_error = 0;
1363				g_mirror_update_device(sc, 1);
1364			} else {
1365				/* Update disk status. */
1366				G_MIRROR_DEBUG(3, "Running event for disk %s.",
1367				     g_mirror_get_diskname(ep->e_disk));
1368				ep->e_error = g_mirror_update_disk(ep->e_disk,
1369				    ep->e_state);
1370				if (ep->e_error == 0)
1371					g_mirror_update_device(sc, 0);
1372			}
1373			g_topology_unlock();
1374			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
1375				KASSERT(ep->e_error == 0,
1376				    ("Error cannot be handled."));
1377				g_mirror_event_free(ep);
1378			} else {
1379				ep->e_flags |= G_MIRROR_EVENT_DONE;
1380				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1381				    ep);
1382				mtx_lock(&sc->sc_events_mtx);
1383				wakeup(ep);
1384				mtx_unlock(&sc->sc_events_mtx);
1385			}
1386			if ((sc->sc_flags &
1387			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1388				if (g_mirror_try_destroy(sc))
1389					kthread_exit(0);
1390			}
1391			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
1392			continue;
1393		}
1394		/*
1395		 * Now I/O requests.
1396		 */
1397		/* Get first request from the queue. */
1398		mtx_lock(&sc->sc_queue_mtx);
1399		bp = bioq_first(&sc->sc_queue);
1400		if (bp == NULL) {
1401			if ((sc->sc_flags &
1402			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1403				mtx_unlock(&sc->sc_queue_mtx);
1404				if (g_mirror_try_destroy(sc))
1405					kthread_exit(0);
1406				mtx_lock(&sc->sc_queue_mtx);
1407			}
1408		}
1409		if (sc->sc_sync.ds_ndisks > 0 &&
1410		    (bp == NULL || nreqs > g_mirror_reqs_per_sync)) {
1411			mtx_unlock(&sc->sc_queue_mtx);
1412			/*
1413			 * It is time for synchronization...
1414			 */
1415			nreqs = 0;
1416			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1417				if (disk->d_state !=
1418				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
1419					continue;
1420				}
1421				if (disk->d_sync.ds_offset >=
1422				    sc->sc_provider->mediasize) {
1423					continue;
1424				}
1425				if (disk->d_sync.ds_offset >
1426				    disk->d_sync.ds_offset_done) {
1427					continue;
1428				}
1429				g_mirror_sync_one(disk);
1430			}
1431			G_MIRROR_DEBUG(5, "%s: I'm here 2.", __func__);
1432			goto sleep;
1433		}
1434		if (bp == NULL) {
1435			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1", 0);
1436			G_MIRROR_DEBUG(5, "%s: I'm here 3.", __func__);
1437			continue;
1438		}
1439		nreqs++;
1440		bioq_remove(&sc->sc_queue, bp);
1441		mtx_unlock(&sc->sc_queue_mtx);
1442
1443		if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0) {
1444			g_mirror_regular_request(bp);
1445		} else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
1446			u_int timeout, sps;
1447
1448			g_mirror_sync_request(bp);
1449sleep:
1450			sps = atomic_load_acq_int(&g_mirror_syncs_per_sec);
1451			if (sps == 0) {
1452				G_MIRROR_DEBUG(5, "%s: I'm here 5.", __func__);
1453				continue;
1454			}
1455			mtx_lock(&sc->sc_queue_mtx);
1456			if (bioq_first(&sc->sc_queue) != NULL) {
1457				mtx_unlock(&sc->sc_queue_mtx);
1458				G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
1459				continue;
1460			}
1461			timeout = hz / sps;
1462			if (timeout == 0)
1463				timeout = 1;
1464			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w2",
1465			    timeout);
1466		} else {
1467			g_mirror_register_request(bp);
1468		}
1469		G_MIRROR_DEBUG(5, "%s: I'm here 6.", __func__);
1470	}
1471}
1472
1473/*
1474 * Open disk's consumer if needed.
1475 */
1476static void
1477g_mirror_update_access(struct g_mirror_disk *disk)
1478{
1479	struct g_provider *pp;
1480	struct g_consumer *cp;
1481	int acr, acw, ace, cpw, error;
1482
1483	g_topology_assert();
1484
1485	cp = disk->d_consumer;
1486	pp = disk->d_softc->sc_provider;
1487	if (pp == NULL) {
1488		acr = -cp->acr;
1489		acw = -cp->acw;
1490		ace = -cp->ace;
1491	} else {
1492		acr = pp->acr - cp->acr;
1493		acw = pp->acw - cp->acw;
1494		ace = pp->ace - cp->ace;
1495		/* Grab an extra "exclusive" bit. */
1496		if (pp->acr > 0 || pp->acw > 0 || pp->ace > 0)
1497			ace++;
1498	}
1499	if (acr == 0 && acw == 0 && ace == 0)
1500		return;
1501	cpw = cp->acw;
1502	error = g_access(cp, acr, acw, ace);
1503	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", cp->provider->name, acr,
1504	    acw, ace, error);
1505	if (error != 0) {
1506		disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
1507		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
1508		    G_MIRROR_EVENT_DONTWAIT);
1509		return;
1510	}
1511	if (cpw == 0 && cp->acw > 0) {
1512		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
1513		    g_mirror_get_diskname(disk), disk->d_softc->sc_name);
1514		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1515	} else if (cpw > 0 && cp->acw == 0) {
1516		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
1517		    g_mirror_get_diskname(disk), disk->d_softc->sc_name);
1518		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1519	}
1520}
1521
1522static void
1523g_mirror_sync_start(struct g_mirror_disk *disk)
1524{
1525	struct g_mirror_softc *sc;
1526	struct g_consumer *cp;
1527	int error;
1528
1529	g_topology_assert();
1530
1531	sc = disk->d_softc;
1532	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1533	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
1534	    sc->sc_state));
1535	cp = disk->d_consumer;
1536	KASSERT(cp->acr == 0 && cp->acw == 0 && cp->ace == 0,
1537	    ("Consumer %s already opened.", cp->provider->name));
1538
1539	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
1540	    g_mirror_get_diskname(disk));
1541	error = g_access(cp, 0, 1, 1);
1542	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", cp->provider->name, 0, 1,
1543	    1, error);
1544	if (error != 0) {
1545		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
1546		    G_MIRROR_EVENT_DONTWAIT);
1547		return;
1548	}
1549	disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1550	KASSERT(disk->d_sync.ds_consumer == NULL,
1551	    ("Sync consumer already exists (device=%s, disk=%s).",
1552	    sc->sc_name, g_mirror_get_diskname(disk)));
1553	disk->d_sync.ds_consumer = g_new_consumer(sc->sc_sync.ds_geom);
1554	disk->d_sync.ds_consumer->private = disk;
1555	error = g_attach(disk->d_sync.ds_consumer, disk->d_softc->sc_provider);
1556	KASSERT(error == 0, ("Cannot attach to %s (error=%d).",
1557	    disk->d_softc->sc_name, error));
1558	error = g_access(disk->d_sync.ds_consumer, 1, 0, 0);
1559	KASSERT(error == 0, ("Cannot open %s (error=%d).",
1560	    disk->d_softc->sc_name, error));
1561	disk->d_sync.ds_data = malloc(G_MIRROR_SYNC_BLOCK_SIZE, M_MIRROR,
1562	    M_WAITOK);
1563	sc->sc_sync.ds_ndisks++;
1564}
1565
1566/*
1567 * Stop synchronization process.
1568 * type: 0 - synchronization finished
1569 *       1 - synchronization stopped
1570 */
1571static void
1572g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
1573{
1574	struct g_consumer *cp;
1575
1576	g_topology_assert();
1577	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
1578	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
1579	    g_mirror_disk_state2str(disk->d_state)));
1580	if (disk->d_sync.ds_consumer == NULL)
1581		return;
1582
1583	if (type == 0) {
1584		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
1585		    disk->d_softc->sc_name, g_mirror_get_diskname(disk));
1586	} else /* if (type == 1) */ {
1587		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
1588		    disk->d_softc->sc_name, g_mirror_get_diskname(disk));
1589	}
1590	cp = disk->d_sync.ds_consumer;
1591	g_access(cp, -1, 0, 0);
1592	g_mirror_kill_consumer(disk->d_softc, cp);
1593	free(disk->d_sync.ds_data, M_MIRROR);
1594	disk->d_sync.ds_consumer = NULL;
1595	disk->d_softc->sc_sync.ds_ndisks--;
1596	cp = disk->d_consumer;
1597	KASSERT(cp->acr == 0 && cp->acw == 1 && cp->ace == 1,
1598	    ("Consumer %s not opened.", cp->provider->name));
1599	g_access(cp, 0, -1, -1);
1600	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", cp->provider->name, 0, -1,
1601	    -1, 0);
1602	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1603}
1604
1605static void
1606g_mirror_launch_provider(struct g_mirror_softc *sc)
1607{
1608	struct g_mirror_disk *disk;
1609	struct g_provider *pp;
1610
1611	g_topology_assert();
1612
1613	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
1614	pp->mediasize = sc->sc_mediasize;
1615	pp->sectorsize = sc->sc_sectorsize;
1616	sc->sc_provider = pp;
1617	g_error_provider(pp, 0);
1618	G_MIRROR_DEBUG(0, "Device %s: provider %s launched.", sc->sc_name,
1619	    pp->name);
1620	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1621		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
1622			g_mirror_sync_start(disk);
1623	}
1624}
1625
1626static void
1627g_mirror_destroy_provider(struct g_mirror_softc *sc)
1628{
1629	struct g_mirror_disk *disk;
1630	struct bio *bp;
1631
1632	g_topology_assert();
1633	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
1634	    sc->sc_name));
1635
1636	g_error_provider(sc->sc_provider, ENXIO);
1637	mtx_lock(&sc->sc_queue_mtx);
1638	while ((bp = bioq_first(&sc->sc_queue)) != NULL) {
1639		bioq_remove(&sc->sc_queue, bp);
1640		g_io_deliver(bp, ENXIO);
1641	}
1642	mtx_unlock(&sc->sc_queue_mtx);
1643	G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name,
1644	    sc->sc_provider->name);
1645	sc->sc_provider->flags |= G_PF_WITHER;
1646	g_orphan_provider(sc->sc_provider, ENXIO);
1647	sc->sc_provider = NULL;
1648	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1649		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
1650			g_mirror_sync_stop(disk, 1);
1651	}
1652}
1653
1654static void
1655g_mirror_go(void *arg)
1656{
1657	struct g_mirror_softc *sc;
1658
1659	sc = arg;
1660	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
1661	g_mirror_event_send(sc, 0,
1662	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
1663}
1664
1665static u_int
1666g_mirror_determine_state(struct g_mirror_disk *disk)
1667{
1668	struct g_mirror_softc *sc;
1669	u_int state;
1670
1671	sc = disk->d_softc;
1672	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
1673		if ((disk->d_flags &
1674		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
1675			/* Disk does not need synchronization. */
1676			state = G_MIRROR_DISK_STATE_ACTIVE;
1677		} else {
1678			if ((sc->sc_flags &
1679			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0  ||
1680			    (disk->d_flags &
1681			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
1682				/*
1683				 * We can start synchronization from
1684				 * the stored offset.
1685				 */
1686				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
1687			} else {
1688				state = G_MIRROR_DISK_STATE_STALE;
1689			}
1690		}
1691	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
1692		/*
1693		 * Reset all synchronization data for this disk,
1694		 * because if it even was synchronized, it was
1695		 * synchronized to disks with different syncid.
1696		 */
1697		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
1698		disk->d_sync.ds_offset = 0;
1699		disk->d_sync.ds_offset_done = 0;
1700		disk->d_sync.ds_syncid = sc->sc_syncid;
1701		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
1702		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
1703			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
1704		} else {
1705			state = G_MIRROR_DISK_STATE_STALE;
1706		}
1707	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
1708		/*
1709		 * Not good, NOT GOOD!
1710		 * It means that mirror was started on stale disks
1711		 * and more fresh disk just arrive.
1712		 * If there were writes, mirror is fucked up, sorry.
1713		 * I think the best choice here is don't touch
1714		 * this disk and inform the user laudly.
1715		 */
1716		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
1717		    "disk (%s) arrives!! It will not be connected to the "
1718		    "running device.", sc->sc_name,
1719		    g_mirror_get_diskname(disk));
1720		g_mirror_destroy_disk(disk);
1721		state = G_MIRROR_DISK_STATE_NONE;
1722		/* Return immediately, because disk was destroyed. */
1723		return (state);
1724	}
1725	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
1726	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
1727	return (state);
1728}
1729
1730/*
1731 * Update device state.
1732 */
1733static void
1734g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force)
1735{
1736	struct g_mirror_disk *disk;
1737	u_int state;
1738
1739	g_topology_assert();
1740
1741	switch (sc->sc_state) {
1742	case G_MIRROR_DEVICE_STATE_STARTING:
1743	    {
1744		struct g_mirror_disk *pdisk;
1745		u_int dirty, ndisks, syncid;
1746
1747		KASSERT(sc->sc_provider == NULL,
1748		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
1749		/*
1750		 * Are we ready? We are, if all disks are connected or
1751		 * if we have any disks and 'force' is true.
1752		 */
1753		if ((force && g_mirror_ndisks(sc, -1) > 0) ||
1754		    sc->sc_ndisks == g_mirror_ndisks(sc, -1)) {
1755			;
1756		} else if (g_mirror_ndisks(sc, -1) == 0) {
1757			/*
1758			 * Disks went down in starting phase, so destroy
1759			 * device.
1760			 */
1761			callout_drain(&sc->sc_callout);
1762			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
1763			return;
1764		} else {
1765			return;
1766		}
1767
1768		/*
1769		 * Activate all disks with the biggest syncid.
1770		 */
1771		if (force) {
1772			/*
1773			 * If 'force' is true, we have been called due to
1774			 * timeout, so don't bother canceling timeout.
1775			 */
1776			ndisks = 0;
1777			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1778				if ((disk->d_flags &
1779				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
1780					ndisks++;
1781				}
1782			}
1783			if (ndisks == 0) {
1784				/* No valid disks found, destroy device. */
1785				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
1786				return;
1787			}
1788		} else {
1789			/* Cancel timeout. */
1790			callout_drain(&sc->sc_callout);
1791		}
1792
1793		/*
1794		 * Find disk with the biggest syncid.
1795		 */
1796		syncid = 0;
1797		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1798			if (disk->d_sync.ds_syncid > syncid)
1799				syncid = disk->d_sync.ds_syncid;
1800		}
1801
1802		/*
1803		 * Here we need to look for dirty disks and if all disks
1804		 * with the biggest syncid are dirty, we have to choose
1805		 * one with the biggest priority and rebuild the rest.
1806		 */
1807		/*
1808		 * Find the number of dirty disks with the biggest syncid.
1809		 * Find the number of disks with the biggest syncid.
1810		 * While here, find a disk with the biggest priority.
1811		 */
1812		dirty = ndisks = 0;
1813		pdisk = NULL;
1814		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1815			if (disk->d_sync.ds_syncid != syncid)
1816				continue;
1817			if ((disk->d_flags &
1818			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1819				continue;
1820			}
1821			ndisks++;
1822			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
1823				dirty++;
1824				if (pdisk == NULL ||
1825				    pdisk->d_priority < disk->d_priority) {
1826					pdisk = disk;
1827				}
1828			}
1829		}
1830		if (dirty == 0) {
1831			/* No dirty disks at all, great. */
1832		} else if (dirty == ndisks) {
1833			/*
1834			 * Force synchronization for all dirty disks except one
1835			 * with the biggest priority.
1836			 */
1837			KASSERT(pdisk != NULL, ("pdisk == NULL"));
1838			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
1839			    "master disk for synchronization.",
1840			    g_mirror_get_diskname(pdisk), sc->sc_name);
1841			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1842				if (disk->d_sync.ds_syncid != syncid)
1843					continue;
1844				if ((disk->d_flags &
1845				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1846					continue;
1847				}
1848				KASSERT((disk->d_flags &
1849				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
1850				    ("Disk %s isn't marked as dirty.",
1851				    g_mirror_get_diskname(disk)));
1852				/* Skip the disk with the biggest priority. */
1853				if (disk == pdisk)
1854					continue;
1855				disk->d_sync.ds_syncid = 0;
1856			}
1857		} else if (dirty < ndisks) {
1858			/*
1859			 * Force synchronization for all dirty disks.
1860			 * We have some non-dirty disks.
1861			 */
1862			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1863				if (disk->d_sync.ds_syncid != syncid)
1864					continue;
1865				if ((disk->d_flags &
1866				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1867					continue;
1868				}
1869				if ((disk->d_flags &
1870				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
1871					continue;
1872				}
1873				disk->d_sync.ds_syncid = 0;
1874			}
1875		}
1876
1877		/* Reset hint. */
1878		sc->sc_hint = NULL;
1879		sc->sc_syncid = syncid;
1880		if (force) {
1881			/* Remember to bump syncid on first write. */
1882			sc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
1883		}
1884		state = G_MIRROR_DEVICE_STATE_RUNNING;
1885		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
1886		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
1887		    g_mirror_device_state2str(state));
1888		sc->sc_state = state;
1889		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1890			state = g_mirror_determine_state(disk);
1891			g_mirror_event_send(disk, state,
1892			    G_MIRROR_EVENT_DONTWAIT);
1893			if (state == G_MIRROR_DISK_STATE_STALE) {
1894				sc->sc_bump_syncid =
1895				    G_MIRROR_BUMP_ON_FIRST_WRITE;
1896			}
1897		}
1898		break;
1899	    }
1900	case G_MIRROR_DEVICE_STATE_RUNNING:
1901		/*
1902		 * Bump syncid here, if we need to do it immediately.
1903		 */
1904		if (sc->sc_bump_syncid == G_MIRROR_BUMP_IMMEDIATELY) {
1905			sc->sc_bump_syncid = 0;
1906			g_mirror_bump_syncid(sc);
1907		}
1908		if (g_mirror_ndisks(sc, -1) == 0) {
1909			/*
1910			 * No disks at all, we need to destroy device.
1911			 */
1912			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
1913		} else if (g_mirror_ndisks(sc,
1914		    G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
1915		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
1916			/*
1917			 * No active disks, destroy provider.
1918			 */
1919			if (sc->sc_provider != NULL)
1920				g_mirror_destroy_provider(sc);
1921		} else if (g_mirror_ndisks(sc,
1922		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
1923		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
1924			/*
1925			 * We have active disks, launch provider if it doesn't
1926			 * exist.
1927			 */
1928			if (sc->sc_provider == NULL)
1929				g_mirror_launch_provider(sc);
1930		}
1931		break;
1932	default:
1933		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
1934		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
1935		break;
1936	}
1937}
1938
1939/*
1940 * Update disk state and device state if needed.
1941 */
1942#define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
1943	"Disk %s state changed from %s to %s (device %s).",		\
1944	g_mirror_get_diskname(disk),					\
1945	g_mirror_disk_state2str(disk->d_state),				\
1946	g_mirror_disk_state2str(state), sc->sc_name)
1947static int
1948g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
1949{
1950	struct g_mirror_softc *sc;
1951
1952	g_topology_assert();
1953
1954	sc = disk->d_softc;
1955again:
1956	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
1957	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
1958	    g_mirror_disk_state2str(state));
1959	switch (state) {
1960	case G_MIRROR_DISK_STATE_NEW:
1961		/*
1962		 * Possible scenarios:
1963		 * 1. New disk arrive.
1964		 */
1965		/* Previous state should be NONE. */
1966		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
1967		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
1968		    g_mirror_disk_state2str(disk->d_state)));
1969		DISK_STATE_CHANGED();
1970
1971		disk->d_state = state;
1972		if (LIST_EMPTY(&sc->sc_disks))
1973			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
1974		else {
1975			struct g_mirror_disk *dp;
1976
1977			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1978				if (disk->d_priority >= dp->d_priority) {
1979					LIST_INSERT_BEFORE(dp, disk, d_next);
1980					dp = NULL;
1981					break;
1982				}
1983				if (LIST_NEXT(dp, d_next) == NULL)
1984					break;
1985			}
1986			if (dp != NULL)
1987				LIST_INSERT_AFTER(dp, disk, d_next);
1988		}
1989		G_MIRROR_DEBUG(0, "Device %s: provider %s detected.",
1990		    sc->sc_name, g_mirror_get_diskname(disk));
1991		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
1992			break;
1993		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1994		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
1995		    g_mirror_device_state2str(sc->sc_state),
1996		    g_mirror_get_diskname(disk),
1997		    g_mirror_disk_state2str(disk->d_state)));
1998		state = g_mirror_determine_state(disk);
1999		if (state != G_MIRROR_DISK_STATE_NONE)
2000			goto again;
2001		break;
2002	case G_MIRROR_DISK_STATE_ACTIVE:
2003		/*
2004		 * Possible scenarios:
2005		 * 1. New disk does not need synchronization.
2006		 * 2. Synchronization process finished successfully.
2007		 */
2008		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2009		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2010		    g_mirror_device_state2str(sc->sc_state),
2011		    g_mirror_get_diskname(disk),
2012		    g_mirror_disk_state2str(disk->d_state)));
2013		/* Previous state should be NEW or SYNCHRONIZING. */
2014		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
2015		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2016		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2017		    g_mirror_disk_state2str(disk->d_state)));
2018		DISK_STATE_CHANGED();
2019
2020		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2021			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2022		else if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2023			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2024			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
2025			g_mirror_sync_stop(disk, 0);
2026		}
2027		disk->d_state = state;
2028		disk->d_sync.ds_offset = 0;
2029		disk->d_sync.ds_offset_done = 0;
2030		g_mirror_update_access(disk);
2031		g_mirror_update_metadata(disk);
2032		G_MIRROR_DEBUG(0, "Device %s: provider %s activated.",
2033		    sc->sc_name, g_mirror_get_diskname(disk));
2034		break;
2035	case G_MIRROR_DISK_STATE_STALE:
2036		/*
2037		 * Possible scenarios:
2038		 * 1. Stale disk was connected.
2039		 */
2040		/* Previous state should be NEW. */
2041		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2042		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2043		    g_mirror_disk_state2str(disk->d_state)));
2044		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2045		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2046		    g_mirror_device_state2str(sc->sc_state),
2047		    g_mirror_get_diskname(disk),
2048		    g_mirror_disk_state2str(disk->d_state)));
2049		/*
2050		 * STALE state is only possible if device is marked
2051		 * NOAUTOSYNC.
2052		 */
2053		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
2054		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2055		    g_mirror_device_state2str(sc->sc_state),
2056		    g_mirror_get_diskname(disk),
2057		    g_mirror_disk_state2str(disk->d_state)));
2058		DISK_STATE_CHANGED();
2059
2060		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2061		disk->d_state = state;
2062		g_mirror_update_metadata(disk);
2063		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
2064		    sc->sc_name, g_mirror_get_diskname(disk));
2065		break;
2066	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
2067		/*
2068		 * Possible scenarios:
2069		 * 1. Disk which needs synchronization was connected.
2070		 */
2071		/* Previous state should be NEW. */
2072		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2073		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2074		    g_mirror_disk_state2str(disk->d_state)));
2075		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2076		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2077		    g_mirror_device_state2str(sc->sc_state),
2078		    g_mirror_get_diskname(disk),
2079		    g_mirror_disk_state2str(disk->d_state)));
2080		DISK_STATE_CHANGED();
2081
2082		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2083			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2084		disk->d_state = state;
2085		if (sc->sc_provider != NULL) {
2086			g_mirror_sync_start(disk);
2087			g_mirror_update_metadata(disk);
2088		}
2089		break;
2090	case G_MIRROR_DISK_STATE_DISCONNECTED:
2091		/*
2092		 * Possible scenarios:
2093		 * 1. Device wasn't running yet, but disk disappear.
2094		 * 2. Disk was active and disapppear.
2095		 * 3. Disk disappear during synchronization process.
2096		 */
2097		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
2098			/*
2099			 * Previous state should be ACTIVE, STALE or
2100			 * SYNCHRONIZING.
2101			 */
2102			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
2103			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
2104			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2105			    ("Wrong disk state (%s, %s).",
2106			    g_mirror_get_diskname(disk),
2107			    g_mirror_disk_state2str(disk->d_state)));
2108		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
2109			/* Previous state should be NEW. */
2110			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2111			    ("Wrong disk state (%s, %s).",
2112			    g_mirror_get_diskname(disk),
2113			    g_mirror_disk_state2str(disk->d_state)));
2114			/*
2115			 * Reset bumping syncid if disk disappeared in STARTING
2116			 * state.
2117			 */
2118			if (sc->sc_bump_syncid == G_MIRROR_BUMP_ON_FIRST_WRITE)
2119				sc->sc_bump_syncid = 0;
2120#ifdef	INVARIANTS
2121		} else {
2122			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2123			    sc->sc_name,
2124			    g_mirror_device_state2str(sc->sc_state),
2125			    g_mirror_get_diskname(disk),
2126			    g_mirror_disk_state2str(disk->d_state)));
2127#endif
2128		}
2129		DISK_STATE_CHANGED();
2130		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
2131		    sc->sc_name, g_mirror_get_diskname(disk));
2132
2133		g_mirror_destroy_disk(disk);
2134		break;
2135	case G_MIRROR_DISK_STATE_DESTROY:
2136	    {
2137		int error;
2138
2139		error = g_mirror_clear_metadata(disk);
2140		if (error != 0)
2141			return (error);
2142		DISK_STATE_CHANGED();
2143		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
2144		    sc->sc_name, g_mirror_get_diskname(disk));
2145
2146		g_mirror_destroy_disk(disk);
2147		sc->sc_ndisks--;
2148		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2149			g_mirror_update_metadata(disk);
2150		}
2151		break;
2152	    }
2153	default:
2154		KASSERT(1 == 0, ("Unknown state (%u).", state));
2155		break;
2156	}
2157	return (0);
2158}
2159#undef	DISK_STATE_CHANGED
2160
2161static int
2162g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
2163{
2164	struct g_provider *pp;
2165	u_char *buf;
2166	int error;
2167
2168	g_topology_assert();
2169
2170	error = g_access(cp, 1, 0, 0);
2171	if (error != 0)
2172		return (error);
2173	pp = cp->provider;
2174	g_topology_unlock();
2175	/* Metadata are stored on last sector. */
2176	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2177	    &error);
2178	g_topology_lock();
2179	if (buf == NULL) {
2180		g_access(cp, -1, 0, 0);
2181		return (error);
2182	}
2183	if (error != 0) {
2184		g_access(cp, -1, 0, 0);
2185		g_free(buf);
2186		return (error);
2187	}
2188	error = g_access(cp, -1, 0, 0);
2189	KASSERT(error == 0, ("Cannot decrease access count for %s.", pp->name));
2190
2191	/* Decode metadata. */
2192	error = mirror_metadata_decode(buf, md);
2193	g_free(buf);
2194	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
2195		return (EINVAL);
2196	if (error != 0) {
2197		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2198		    cp->provider->name);
2199		return (error);
2200	}
2201
2202	return (0);
2203}
2204
2205static int
2206g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
2207    struct g_mirror_metadata *md)
2208{
2209
2210	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
2211		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
2212		    pp->name, md->md_did);
2213		return (EEXIST);
2214	}
2215	if (md->md_all != sc->sc_ndisks) {
2216		G_MIRROR_DEBUG(1,
2217		    "Invalid '%s' field on disk %s (device %s), skipping.",
2218		    "md_all", pp->name, sc->sc_name);
2219		return (EINVAL);
2220	}
2221	if (md->md_slice != sc->sc_slice) {
2222		G_MIRROR_DEBUG(1,
2223		    "Invalid '%s' field on disk %s (device %s), skipping.",
2224		    "md_slice", pp->name, sc->sc_name);
2225		return (EINVAL);
2226	}
2227	if (md->md_balance != sc->sc_balance) {
2228		G_MIRROR_DEBUG(1,
2229		    "Invalid '%s' field on disk %s (device %s), skipping.",
2230		    "md_balance", pp->name, sc->sc_name);
2231		return (EINVAL);
2232	}
2233	if (md->md_mediasize != sc->sc_mediasize) {
2234		G_MIRROR_DEBUG(1,
2235		    "Invalid '%s' field on disk %s (device %s), skipping.",
2236		    "md_mediasize", pp->name, sc->sc_name);
2237		return (EINVAL);
2238	}
2239	if (sc->sc_mediasize > pp->mediasize) {
2240		G_MIRROR_DEBUG(1,
2241		    "Invalid size of disk %s (device %s), skipping.", pp->name,
2242		    sc->sc_name);
2243		return (EINVAL);
2244	}
2245	if (md->md_sectorsize != sc->sc_sectorsize) {
2246		G_MIRROR_DEBUG(1,
2247		    "Invalid '%s' field on disk %s (device %s), skipping.",
2248		    "md_sectorsize", pp->name, sc->sc_name);
2249		return (EINVAL);
2250	}
2251	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2252		G_MIRROR_DEBUG(1,
2253		    "Invalid sector size of disk %s (device %s), skipping.",
2254		    pp->name, sc->sc_name);
2255		return (EINVAL);
2256	}
2257	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
2258		G_MIRROR_DEBUG(1,
2259		    "Invalid device flags on disk %s (device %s), skipping.",
2260		    pp->name, sc->sc_name);
2261		return (EINVAL);
2262	}
2263	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
2264		G_MIRROR_DEBUG(1,
2265		    "Invalid disk flags on disk %s (device %s), skipping.",
2266		    pp->name, sc->sc_name);
2267		return (EINVAL);
2268	}
2269	return (0);
2270}
2271
2272static int
2273g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
2274    struct g_mirror_metadata *md)
2275{
2276	struct g_mirror_disk *disk;
2277	int error;
2278
2279	g_topology_assert();
2280	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
2281
2282	error = g_mirror_check_metadata(sc, pp, md);
2283	if (error != 0)
2284		return (error);
2285	disk = g_mirror_init_disk(sc, pp, md, &error);
2286	if (disk == NULL)
2287		return (error);
2288	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
2289	    G_MIRROR_EVENT_WAIT);
2290	return (error);
2291}
2292
2293static int
2294g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
2295{
2296	struct g_mirror_softc *sc;
2297	struct g_mirror_disk *disk;
2298	int dcr, dcw, dce, err, error;
2299
2300	g_topology_assert();
2301	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
2302	    acw, ace);
2303
2304	dcr = pp->acr + acr;
2305	dcw = pp->acw + acw;
2306	dce = pp->ace + ace;
2307
2308	/* On first open, grab an extra "exclusive" bit */
2309	if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
2310		ace++;
2311	/* ... and let go of it on last close */
2312	if (dcr == 0 && dcw == 0 && dce == 0)
2313		ace--;
2314
2315	sc = pp->geom->softc;
2316	if (sc == NULL || LIST_EMPTY(&sc->sc_disks)) {
2317		if (acr <= 0 && acw <= 0 && ace <= 0)
2318			return (0);
2319		else
2320			return (ENXIO);
2321	}
2322	error = ENXIO;
2323	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2324		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
2325			continue;
2326		err = g_access(disk->d_consumer, acr, acw, ace);
2327		G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
2328		    g_mirror_get_diskname(disk), acr, acw, ace, err);
2329		if (err == 0) {
2330			/*
2331			 * Mark disk as dirty on open and unmark on close.
2332			 */
2333			if (pp->acw == 0 && dcw > 0) {
2334				G_MIRROR_DEBUG(1,
2335				    "Disk %s (device %s) marked as dirty.",
2336				    g_mirror_get_diskname(disk), sc->sc_name);
2337				disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2338				g_mirror_update_metadata(disk);
2339			} else if (pp->acw > 0 && dcw == 0) {
2340				G_MIRROR_DEBUG(1,
2341				    "Disk %s (device %s) marked as clean.",
2342				    g_mirror_get_diskname(disk), sc->sc_name);
2343				disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2344				g_mirror_update_metadata(disk);
2345			}
2346			error = 0;
2347		} else {
2348			sc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
2349			g_mirror_event_send(disk,
2350			    G_MIRROR_DISK_STATE_DISCONNECTED,
2351			    G_MIRROR_EVENT_DONTWAIT);
2352		}
2353	}
2354	return (error);
2355}
2356
2357static struct g_geom *
2358g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md)
2359{
2360	struct g_mirror_softc *sc;
2361	struct g_geom *gp;
2362	int error, timeout;
2363
2364	g_topology_assert();
2365	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
2366	    md->md_mid);
2367
2368	/* One disk is minimum. */
2369	if (md->md_all < 1)
2370		return (NULL);
2371	/*
2372	 * Action geom.
2373	 */
2374	gp = g_new_geomf(mp, "%s", md->md_name);
2375	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
2376	gp->start = g_mirror_start;
2377	gp->spoiled = g_mirror_spoiled;
2378	gp->orphan = g_mirror_orphan;
2379	gp->access = g_mirror_access;
2380	gp->dumpconf = g_mirror_dumpconf;
2381
2382	sc->sc_id = md->md_mid;
2383	sc->sc_slice = md->md_slice;
2384	sc->sc_balance = md->md_balance;
2385	sc->sc_mediasize = md->md_mediasize;
2386	sc->sc_sectorsize = md->md_sectorsize;
2387	sc->sc_ndisks = md->md_all;
2388	sc->sc_flags = md->md_mflags;
2389	sc->sc_bump_syncid = 0;
2390	bioq_init(&sc->sc_queue);
2391	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
2392	LIST_INIT(&sc->sc_disks);
2393	TAILQ_INIT(&sc->sc_events);
2394	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
2395	callout_init(&sc->sc_callout, CALLOUT_MPSAFE);
2396	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
2397	gp->softc = sc;
2398	sc->sc_geom = gp;
2399	sc->sc_provider = NULL;
2400	/*
2401	 * Synchronization geom.
2402	 */
2403	gp = g_new_geomf(mp, "%s.sync", md->md_name);
2404	gp->softc = sc;
2405	gp->spoiled = g_mirror_spoiled;
2406	gp->orphan = g_mirror_orphan;
2407	sc->sc_sync.ds_geom = gp;
2408	sc->sc_sync.ds_ndisks = 0;
2409	error = kthread_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
2410	    "g_mirror %s", md->md_name);
2411	if (error != 0) {
2412		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
2413		    sc->sc_name);
2414		g_destroy_geom(sc->sc_sync.ds_geom);
2415		mtx_destroy(&sc->sc_events_mtx);
2416		mtx_destroy(&sc->sc_queue_mtx);
2417		g_destroy_geom(sc->sc_geom);
2418		free(sc, M_MIRROR);
2419		return (NULL);
2420	}
2421
2422	G_MIRROR_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id);
2423
2424	/*
2425	 * Run timeout.
2426	 */
2427	timeout = atomic_load_acq_int(&g_mirror_timeout);
2428	callout_reset(&sc->sc_callout, timeout * hz, g_mirror_go, sc);
2429	return (sc->sc_geom);
2430}
2431
2432int
2433g_mirror_destroy(struct g_mirror_softc *sc, boolean_t force)
2434{
2435	struct g_provider *pp;
2436
2437	g_topology_assert();
2438
2439	if (sc == NULL)
2440		return (ENXIO);
2441	pp = sc->sc_provider;
2442	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
2443		if (force) {
2444			G_MIRROR_DEBUG(0, "Device %s is still open, so it "
2445			    "can't be definitely removed.", pp->name);
2446		} else {
2447			G_MIRROR_DEBUG(1,
2448			    "Device %s is still open (r%dw%de%d).", pp->name,
2449			    pp->acr, pp->acw, pp->ace);
2450			return (EBUSY);
2451		}
2452	}
2453
2454	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2455	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_WAIT;
2456	g_topology_unlock();
2457	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
2458	mtx_lock(&sc->sc_queue_mtx);
2459	wakeup(sc);
2460	mtx_unlock(&sc->sc_queue_mtx);
2461	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
2462	while (sc->sc_worker != NULL)
2463		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
2464	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
2465	g_topology_lock();
2466	g_mirror_destroy_device(sc);
2467	free(sc, M_MIRROR);
2468	return (0);
2469}
2470
2471static void
2472g_mirror_taste_orphan(struct g_consumer *cp)
2473{
2474
2475	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
2476	    cp->provider->name));
2477}
2478
2479static struct g_geom *
2480g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
2481{
2482	struct g_mirror_metadata md;
2483	struct g_mirror_softc *sc;
2484	struct g_consumer *cp;
2485	struct g_geom *gp;
2486	int error;
2487
2488	g_topology_assert();
2489	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
2490	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
2491
2492	gp = g_new_geomf(mp, "mirror:taste");
2493	/*
2494	 * This orphan function should be never called.
2495	 */
2496	gp->orphan = g_mirror_taste_orphan;
2497	cp = g_new_consumer(gp);
2498	g_attach(cp, pp);
2499	error = g_mirror_read_metadata(cp, &md);
2500	g_detach(cp);
2501	g_destroy_consumer(cp);
2502	g_destroy_geom(gp);
2503	if (error != 0)
2504		return (NULL);
2505	gp = NULL;
2506
2507	if (md.md_version > G_MIRROR_VERSION) {
2508		printf("geom_mirror.ko module is too old to handle %s.\n",
2509		    pp->name);
2510		return (NULL);
2511	}
2512	if (md.md_provider[0] != '\0' && strcmp(md.md_provider, pp->name) != 0)
2513		return (NULL);
2514	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
2515		G_MIRROR_DEBUG(0,
2516		    "Device %s: provider %s marked as inactive, skipping.",
2517		    md.md_name, pp->name);
2518		return (NULL);
2519	}
2520	if (g_mirror_debug >= 2)
2521		mirror_metadata_dump(&md);
2522
2523	/*
2524	 * Let's check if device already exists.
2525	 */
2526	LIST_FOREACH(gp, &mp->geom, geom) {
2527		sc = gp->softc;
2528		if (sc == NULL)
2529			continue;
2530		if (sc->sc_sync.ds_geom == gp)
2531			continue;
2532		if (strcmp(md.md_name, sc->sc_name) != 0)
2533			continue;
2534		if (md.md_mid != sc->sc_id) {
2535			G_MIRROR_DEBUG(0, "Device %s already configured.",
2536			    sc->sc_name);
2537			return (NULL);
2538		}
2539		break;
2540	}
2541	if (gp == NULL) {
2542		gp = g_mirror_create(mp, &md);
2543		if (gp == NULL) {
2544			G_MIRROR_DEBUG(0, "Cannot create device %s.",
2545			    md.md_name);
2546			return (NULL);
2547		}
2548		sc = gp->softc;
2549	}
2550	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
2551	error = g_mirror_add_disk(sc, pp, &md);
2552	if (error != 0) {
2553		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
2554		    pp->name, gp->name, error);
2555		if (LIST_EMPTY(&sc->sc_disks))
2556			g_mirror_destroy(sc, 1);
2557		return (NULL);
2558	}
2559	return (gp);
2560}
2561
2562static int
2563g_mirror_destroy_geom(struct gctl_req *req __unused,
2564    struct g_class *mp __unused, struct g_geom *gp)
2565{
2566
2567	return (g_mirror_destroy(gp->softc, 0));
2568}
2569
2570static void
2571g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
2572    struct g_consumer *cp, struct g_provider *pp)
2573{
2574	struct g_mirror_softc *sc;
2575
2576	g_topology_assert();
2577
2578	sc = gp->softc;
2579	if (sc == NULL)
2580		return;
2581	/* Skip synchronization geom. */
2582	if (gp == sc->sc_sync.ds_geom)
2583		return;
2584	if (pp != NULL) {
2585		/* Nothing here. */
2586	} else if (cp != NULL) {
2587		struct g_mirror_disk *disk;
2588
2589		disk = cp->private;
2590		if (disk == NULL)
2591			return;
2592		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
2593		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2594			sbuf_printf(sb, "%s<Synchronized>", indent);
2595			if (disk->d_sync.ds_offset_done == 0)
2596				sbuf_printf(sb, "0%%");
2597			else {
2598				sbuf_printf(sb, "%u%%",
2599				    (u_int)((disk->d_sync.ds_offset_done * 100) /
2600				    sc->sc_provider->mediasize));
2601			}
2602			sbuf_printf(sb, "</Synchronized>\n");
2603		}
2604		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
2605		    disk->d_sync.ds_syncid);
2606		sbuf_printf(sb, "%s<Flags>", indent);
2607		if (disk->d_flags == 0)
2608			sbuf_printf(sb, "NONE");
2609		else {
2610			int first = 1;
2611
2612#define	ADD_FLAG(flag, name)	do {					\
2613	if ((disk->d_flags & (flag)) != 0) {				\
2614		if (!first)						\
2615			sbuf_printf(sb, ", ");				\
2616		else							\
2617			first = 0;					\
2618		sbuf_printf(sb, name);					\
2619	}								\
2620} while (0)
2621			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
2622			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
2623			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
2624			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
2625			    "SYNCHRONIZING");
2626			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
2627#undef	ADD_FLAG
2628		}
2629		sbuf_printf(sb, "</Flags>\n");
2630		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
2631		    disk->d_priority);
2632		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
2633		    g_mirror_disk_state2str(disk->d_state));
2634	} else {
2635		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
2636		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
2637		sbuf_printf(sb, "%s<Flags>", indent);
2638		if (sc->sc_flags == 0)
2639			sbuf_printf(sb, "NONE");
2640		else {
2641			int first = 1;
2642
2643#define	ADD_FLAG(flag, name)	do {					\
2644	if ((sc->sc_flags & (flag)) != 0) {				\
2645		if (!first)						\
2646			sbuf_printf(sb, ", ");				\
2647		else							\
2648			first = 0;					\
2649		sbuf_printf(sb, name);					\
2650	}								\
2651} while (0)
2652			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
2653#undef	ADD_FLAG
2654		}
2655		sbuf_printf(sb, "</Flags>\n");
2656		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
2657		    (u_int)sc->sc_slice);
2658		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
2659		    balance_name(sc->sc_balance));
2660		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
2661		    sc->sc_ndisks);
2662	}
2663}
2664
2665DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
2666