g_mirror.c revision 135831
1/*-
2 * Copyright (c) 2004 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/geom/mirror/g_mirror.c 135831 2004-09-26 20:30:15Z pjd $");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/kernel.h>
33#include <sys/module.h>
34#include <sys/limits.h>
35#include <sys/lock.h>
36#include <sys/mutex.h>
37#include <sys/bio.h>
38#include <sys/sysctl.h>
39#include <sys/malloc.h>
40#include <sys/bitstring.h>
41#include <vm/uma.h>
42#include <machine/atomic.h>
43#include <geom/geom.h>
44#include <sys/proc.h>
45#include <sys/kthread.h>
46#include <geom/mirror/g_mirror.h>
47
48
49static MALLOC_DEFINE(M_MIRROR, "mirror data", "GEOM_MIRROR Data");
50
51SYSCTL_DECL(_kern_geom);
52SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0, "GEOM_MIRROR stuff");
53u_int g_mirror_debug = 0;
54TUNABLE_INT("kern.geom.mirror.debug", &g_mirror_debug);
55SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RW, &g_mirror_debug, 0,
56    "Debug level");
57static u_int g_mirror_timeout = 8;
58TUNABLE_INT("kern.geom.mirror.timeout", &g_mirror_timeout);
59SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RW, &g_mirror_timeout,
60    0, "Time to wait on all mirror components");
61static u_int g_mirror_reqs_per_sync = 5;
62SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, reqs_per_sync, CTLFLAG_RW,
63    &g_mirror_reqs_per_sync, 0,
64    "Number of regular I/O requests per synchronization request");
65static u_int g_mirror_syncs_per_sec = 100;
66SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, syncs_per_sec, CTLFLAG_RW,
67    &g_mirror_syncs_per_sec, 0,
68    "Number of synchronizations requests per second");
69
70#define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
71	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
72	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
73	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
74} while (0)
75
76
77static int g_mirror_destroy_geom(struct gctl_req *req, struct g_class *mp,
78    struct g_geom *gp);
79static g_taste_t g_mirror_taste;
80
81struct g_class g_mirror_class = {
82	.name = G_MIRROR_CLASS_NAME,
83	.version = G_VERSION,
84	.ctlreq = g_mirror_config,
85	.taste = g_mirror_taste,
86	.destroy_geom = g_mirror_destroy_geom
87};
88
89
90static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
91static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
92static void g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force);
93static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
94    struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
95static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
96
97
98static const char *
99g_mirror_disk_state2str(int state)
100{
101
102	switch (state) {
103	case G_MIRROR_DISK_STATE_NONE:
104		return ("NONE");
105	case G_MIRROR_DISK_STATE_NEW:
106		return ("NEW");
107	case G_MIRROR_DISK_STATE_ACTIVE:
108		return ("ACTIVE");
109	case G_MIRROR_DISK_STATE_STALE:
110		return ("STALE");
111	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
112		return ("SYNCHRONIZING");
113	case G_MIRROR_DISK_STATE_DISCONNECTED:
114		return ("DISCONNECTED");
115	case G_MIRROR_DISK_STATE_DESTROY:
116		return ("DESTROY");
117	default:
118		return ("INVALID");
119	}
120}
121
122static const char *
123g_mirror_device_state2str(int state)
124{
125
126	switch (state) {
127	case G_MIRROR_DEVICE_STATE_STARTING:
128		return ("STARTING");
129	case G_MIRROR_DEVICE_STATE_RUNNING:
130		return ("RUNNING");
131	default:
132		return ("INVALID");
133	}
134}
135
136static const char *
137g_mirror_get_diskname(struct g_mirror_disk *disk)
138{
139
140	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
141		return ("[unknown]");
142	return (disk->d_name);
143}
144
145/*
146 * --- Events handling functions ---
147 * Events in geom_mirror are used to maintain disks and device status
148 * from one thread to simplify locking.
149 */
150static void
151g_mirror_event_free(struct g_mirror_event *ep)
152{
153
154	free(ep, M_MIRROR);
155}
156
157int
158g_mirror_event_send(void *arg, int state, int flags)
159{
160	struct g_mirror_softc *sc;
161	struct g_mirror_disk *disk;
162	struct g_mirror_event *ep;
163	int error;
164
165	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
166	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
167	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
168		disk = NULL;
169		sc = arg;
170	} else {
171		disk = arg;
172		sc = disk->d_softc;
173	}
174	ep->e_disk = disk;
175	ep->e_state = state;
176	ep->e_flags = flags;
177	ep->e_error = 0;
178	mtx_lock(&sc->sc_events_mtx);
179	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
180	mtx_unlock(&sc->sc_events_mtx);
181	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
182	mtx_lock(&sc->sc_queue_mtx);
183	wakeup(sc);
184	mtx_unlock(&sc->sc_queue_mtx);
185	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
186		return (0);
187	g_topology_assert();
188	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
189	g_topology_unlock();
190	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
191		mtx_lock(&sc->sc_events_mtx);
192		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
193		    hz * 5);
194	}
195	/* Don't even try to use 'sc' here, because it could be already dead. */
196	g_topology_lock();
197	error = ep->e_error;
198	g_mirror_event_free(ep);
199	return (error);
200}
201
202static struct g_mirror_event *
203g_mirror_event_get(struct g_mirror_softc *sc)
204{
205	struct g_mirror_event *ep;
206
207	mtx_lock(&sc->sc_events_mtx);
208	ep = TAILQ_FIRST(&sc->sc_events);
209	if (ep != NULL)
210		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
211	mtx_unlock(&sc->sc_events_mtx);
212	return (ep);
213}
214
215static void
216g_mirror_event_cancel(struct g_mirror_disk *disk)
217{
218	struct g_mirror_softc *sc;
219	struct g_mirror_event *ep, *tmpep;
220
221	g_topology_assert();
222
223	sc = disk->d_softc;
224	mtx_lock(&sc->sc_events_mtx);
225	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
226		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
227			continue;
228		if (ep->e_disk != disk)
229			continue;
230		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
231		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
232			g_mirror_event_free(ep);
233		else {
234			ep->e_error = ECANCELED;
235			wakeup(ep);
236		}
237	}
238	mtx_unlock(&sc->sc_events_mtx);
239}
240
241/*
242 * Return the number of disks in given state.
243 * If state is equal to -1, count all connected disks.
244 */
245u_int
246g_mirror_ndisks(struct g_mirror_softc *sc, int state)
247{
248	struct g_mirror_disk *disk;
249	u_int n = 0;
250
251	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
252		if (state == -1 || disk->d_state == state)
253			n++;
254	}
255	return (n);
256}
257
258/*
259 * Find a disk in mirror by its disk ID.
260 */
261static struct g_mirror_disk *
262g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
263{
264	struct g_mirror_disk *disk;
265
266	g_topology_assert();
267
268	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
269		if (disk->d_id == id)
270			return (disk);
271	}
272	return (NULL);
273}
274
275static u_int
276g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
277{
278	struct bio *bp;
279	u_int nreqs = 0;
280
281	mtx_lock(&sc->sc_queue_mtx);
282	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
283		if (bp->bio_from == cp)
284			nreqs++;
285	}
286	mtx_unlock(&sc->sc_queue_mtx);
287	return (nreqs);
288}
289
290static int
291g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
292{
293
294	if (cp->nstart != cp->nend) {
295		G_MIRROR_DEBUG(2,
296		    "I/O requests for %s exist, can't destroy it now.",
297		    cp->provider->name);
298		return (1);
299	}
300	if (g_mirror_nrequests(sc, cp) > 0) {
301		G_MIRROR_DEBUG(2,
302		    "I/O requests for %s in queue, can't destroy it now.",
303		    cp->provider->name);
304		return (1);
305	}
306	return (0);
307}
308
309static void
310g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
311{
312
313	g_topology_assert();
314
315	cp->private = NULL;
316	if (g_mirror_is_busy(sc, cp))
317		return;
318	G_MIRROR_DEBUG(2, "Consumer %s destroyed.", cp->provider->name);
319	g_detach(cp);
320	g_destroy_consumer(cp);
321}
322
323static int
324g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
325{
326	int error;
327
328	g_topology_assert();
329	KASSERT(disk->d_consumer == NULL,
330	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
331
332	disk->d_consumer = g_new_consumer(disk->d_softc->sc_geom);
333	disk->d_consumer->private = disk;
334	error = g_attach(disk->d_consumer, pp);
335	if (error != 0)
336		return (error);
337	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
338	return (0);
339}
340
341static void
342g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
343{
344
345	g_topology_assert();
346
347	if (cp == NULL)
348		return;
349	if (cp->provider != NULL) {
350		G_MIRROR_DEBUG(2, "Disk %s disconnected.", cp->provider->name);
351		if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) {
352			G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
353			    cp->provider->name, -cp->acr, -cp->acw, -cp->ace,
354			    0);
355			g_access(cp, -cp->acr, -cp->acw, -cp->ace);
356		}
357		g_mirror_kill_consumer(sc, cp);
358	} else {
359		g_destroy_consumer(cp);
360	}
361}
362
363/*
364 * Initialize disk. This means allocate memory, create consumer, attach it
365 * to the provider and open access (r1w1e1) to it.
366 */
367static struct g_mirror_disk *
368g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
369    struct g_mirror_metadata *md, int *errorp)
370{
371	struct g_mirror_disk *disk;
372	int error;
373
374	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
375	if (disk == NULL) {
376		error = ENOMEM;
377		goto fail;
378	}
379	disk->d_softc = sc;
380	error = g_mirror_connect_disk(disk, pp);
381	if (error != 0)
382		goto fail;
383	disk->d_id = md->md_did;
384	disk->d_state = G_MIRROR_DISK_STATE_NONE;
385	disk->d_priority = md->md_priority;
386	disk->d_delay.sec = 0;
387	disk->d_delay.frac = 0;
388	binuptime(&disk->d_last_used);
389	disk->d_flags = md->md_dflags;
390	if (md->md_provider[0] != '\0')
391		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
392	disk->d_sync.ds_consumer = NULL;
393	disk->d_sync.ds_offset = md->md_sync_offset;
394	disk->d_sync.ds_offset_done = md->md_sync_offset;
395	disk->d_sync.ds_syncid = md->md_syncid;
396	if (errorp != NULL)
397		*errorp = 0;
398	return (disk);
399fail:
400	if (errorp != NULL)
401		*errorp = error;
402	if (disk != NULL) {
403		g_mirror_disconnect_consumer(sc, disk->d_consumer);
404		free(disk, M_MIRROR);
405	}
406	return (NULL);
407}
408
409static void
410g_mirror_destroy_disk(struct g_mirror_disk *disk)
411{
412	struct g_mirror_softc *sc;
413
414	g_topology_assert();
415
416	LIST_REMOVE(disk, d_next);
417	g_mirror_event_cancel(disk);
418	sc = disk->d_softc;
419	if (sc->sc_hint == disk)
420		sc->sc_hint = NULL;
421	switch (disk->d_state) {
422	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
423		g_mirror_sync_stop(disk, 1);
424		/* FALLTHROUGH */
425	case G_MIRROR_DISK_STATE_NEW:
426	case G_MIRROR_DISK_STATE_STALE:
427	case G_MIRROR_DISK_STATE_ACTIVE:
428		g_mirror_disconnect_consumer(sc, disk->d_consumer);
429		free(disk, M_MIRROR);
430		break;
431	default:
432		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
433		    g_mirror_get_diskname(disk),
434		    g_mirror_disk_state2str(disk->d_state)));
435	}
436}
437
438static void
439g_mirror_destroy_device(struct g_mirror_softc *sc)
440{
441	struct g_mirror_disk *disk;
442	struct g_mirror_event *ep;
443	struct g_geom *gp;
444	struct g_consumer *cp, *tmpcp;
445
446	g_topology_assert();
447
448	gp = sc->sc_geom;
449	if (sc->sc_provider != NULL)
450		g_mirror_destroy_provider(sc);
451	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
452	    disk = LIST_FIRST(&sc->sc_disks)) {
453		g_mirror_destroy_disk(disk);
454	}
455	while ((ep = g_mirror_event_get(sc)) != NULL) {
456		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
457			g_mirror_event_free(ep);
458		else {
459			ep->e_error = ECANCELED;
460			ep->e_flags |= G_MIRROR_EVENT_DONE;
461			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
462			mtx_lock(&sc->sc_events_mtx);
463			wakeup(ep);
464			mtx_unlock(&sc->sc_events_mtx);
465		}
466	}
467	callout_drain(&sc->sc_callout);
468	gp->softc = NULL;
469
470	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
471		g_mirror_disconnect_consumer(sc, cp);
472	}
473	sc->sc_sync.ds_geom->softc = NULL;
474	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
475	mtx_destroy(&sc->sc_queue_mtx);
476	mtx_destroy(&sc->sc_events_mtx);
477	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
478	g_wither_geom(gp, ENXIO);
479}
480
481static void
482g_mirror_orphan(struct g_consumer *cp)
483{
484	struct g_mirror_disk *disk;
485
486	g_topology_assert();
487
488	disk = cp->private;
489	if (disk == NULL)
490		return;
491	disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
492	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
493	    G_MIRROR_EVENT_DONTWAIT);
494}
495
496static void
497g_mirror_spoiled(struct g_consumer *cp)
498{
499	struct g_mirror_disk *disk;
500
501	g_topology_assert();
502
503	disk = cp->private;
504	if (disk == NULL)
505		return;
506	disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
507	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
508	    G_MIRROR_EVENT_DONTWAIT);
509}
510
511/*
512 * Function should return the next active disk on the list.
513 * It is possible that it will be the same disk as given.
514 * If there are no active disks on list, NULL is returned.
515 */
516static __inline struct g_mirror_disk *
517g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
518{
519	struct g_mirror_disk *dp;
520
521	for (dp = LIST_NEXT(disk, d_next); dp != disk;
522	    dp = LIST_NEXT(dp, d_next)) {
523		if (dp == NULL)
524			dp = LIST_FIRST(&sc->sc_disks);
525		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
526			break;
527	}
528	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
529		return (NULL);
530	return (dp);
531}
532
533static struct g_mirror_disk *
534g_mirror_get_disk(struct g_mirror_softc *sc)
535{
536	struct g_mirror_disk *disk;
537
538	if (sc->sc_hint == NULL) {
539		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
540		if (sc->sc_hint == NULL)
541			return (NULL);
542	}
543	disk = sc->sc_hint;
544	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
545		disk = g_mirror_find_next(sc, disk);
546		if (disk == NULL)
547			return (NULL);
548	}
549	sc->sc_hint = g_mirror_find_next(sc, disk);
550	return (disk);
551}
552
553static int
554g_mirror_write_metadata(struct g_mirror_disk *disk,
555    struct g_mirror_metadata *md)
556{
557	struct g_mirror_softc *sc;
558	struct g_consumer *cp;
559	off_t offset, length;
560	u_char *sector;
561	int close = 0, error = 0;
562
563	g_topology_assert();
564
565	sc = disk->d_softc;
566	cp = disk->d_consumer;
567	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
568	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
569	length = cp->provider->sectorsize;
570	offset = cp->provider->mediasize - length;
571	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
572	/*
573	 * Open consumer if it wasn't opened and remember to close it.
574	 */
575	if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
576		error = g_access(cp, 0, 1, 1);
577		G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
578		    cp->provider->name, 0, 1, 1, error);
579		if (error == 0)
580			close = 1;
581#ifdef	INVARIANTS
582	} else {
583		KASSERT(cp->acw > 0 && cp->ace > 0,
584		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
585		    cp->acr, cp->acw, cp->ace));
586#endif
587	}
588	if (error == 0) {
589		if (md != NULL)
590			mirror_metadata_encode(md, sector);
591		g_topology_unlock();
592		error = g_write_data(cp, offset, sector, length);
593		g_topology_lock();
594	}
595	free(sector, M_MIRROR);
596	if (close) {
597		g_access(cp, 0, -1, -1);
598		G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
599		    cp->provider->name, 0, -1, -1, 0);
600	}
601	if (error != 0) {
602		disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
603		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
604		    G_MIRROR_EVENT_DONTWAIT);
605	}
606	return (error);
607}
608
609static int
610g_mirror_clear_metadata(struct g_mirror_disk *disk)
611{
612	int error;
613
614	g_topology_assert();
615	error = g_mirror_write_metadata(disk, NULL);
616	if (error == 0) {
617		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
618		    g_mirror_get_diskname(disk));
619	} else {
620		G_MIRROR_DEBUG(0,
621		    "Cannot clear metadata on disk %s (error=%d).",
622		    g_mirror_get_diskname(disk), error);
623	}
624	return (error);
625}
626
627void
628g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
629    struct g_mirror_metadata *md)
630{
631
632	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
633	md->md_version = G_MIRROR_VERSION;
634	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
635	md->md_mid = sc->sc_id;
636	md->md_all = sc->sc_ndisks;
637	md->md_slice = sc->sc_slice;
638	md->md_balance = sc->sc_balance;
639	md->md_mediasize = sc->sc_mediasize;
640	md->md_sectorsize = sc->sc_sectorsize;
641	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
642	bzero(md->md_provider, sizeof(md->md_provider));
643	if (disk == NULL) {
644		md->md_did = arc4random();
645		md->md_priority = 0;
646		md->md_syncid = 0;
647		md->md_dflags = 0;
648		md->md_sync_offset = 0;
649	} else {
650		md->md_did = disk->d_id;
651		md->md_priority = disk->d_priority;
652		md->md_syncid = disk->d_sync.ds_syncid;
653		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
654		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
655			md->md_sync_offset = disk->d_sync.ds_offset_done;
656		else
657			md->md_sync_offset = 0;
658		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
659			strlcpy(md->md_provider,
660			    disk->d_consumer->provider->name,
661			    sizeof(md->md_provider));
662		}
663	}
664}
665
666void
667g_mirror_update_metadata(struct g_mirror_disk *disk)
668{
669	struct g_mirror_metadata md;
670	int error;
671
672	g_topology_assert();
673	g_mirror_fill_metadata(disk->d_softc, disk, &md);
674	error = g_mirror_write_metadata(disk, &md);
675	if (error == 0) {
676		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
677		    g_mirror_get_diskname(disk));
678	} else {
679		G_MIRROR_DEBUG(0,
680		    "Cannot update metadata on disk %s (error=%d).",
681		    g_mirror_get_diskname(disk), error);
682	}
683}
684
685static void
686g_mirror_bump_syncid(struct g_mirror_softc *sc)
687{
688	struct g_mirror_disk *disk;
689
690	g_topology_assert();
691	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
692	    ("%s called with no active disks (device=%s).", __func__,
693	    sc->sc_name));
694
695	sc->sc_syncid++;
696	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
697	    sc->sc_syncid);
698	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
699		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
700		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
701			disk->d_sync.ds_syncid = sc->sc_syncid;
702			g_mirror_update_metadata(disk);
703		}
704	}
705}
706
707static __inline int
708bintime_cmp(struct bintime *bt1, struct bintime *bt2)
709{
710
711	if (bt1->sec < bt2->sec)
712		return (-1);
713	else if (bt1->sec > bt2->sec)
714		return (1);
715	if (bt1->frac < bt2->frac)
716		return (-1);
717	else if (bt1->frac > bt2->frac)
718		return (1);
719	return (0);
720}
721
722static void
723g_mirror_update_delay(struct g_mirror_disk *disk, struct bio *bp)
724{
725
726	if (disk->d_softc->sc_balance != G_MIRROR_BALANCE_LOAD)
727		return;
728	binuptime(&disk->d_delay);
729	bintime_sub(&disk->d_delay, &bp->bio_t0);
730}
731
732static void
733g_mirror_done(struct bio *bp)
734{
735	struct g_mirror_softc *sc;
736
737	sc = bp->bio_from->geom->softc;
738	bp->bio_cflags |= G_MIRROR_BIO_FLAG_REGULAR;
739	mtx_lock(&sc->sc_queue_mtx);
740	bioq_disksort(&sc->sc_queue, bp);
741	wakeup(sc);
742	mtx_unlock(&sc->sc_queue_mtx);
743}
744
745static void
746g_mirror_regular_request(struct bio *bp)
747{
748	struct g_mirror_softc *sc;
749	struct g_mirror_disk *disk;
750	struct bio *pbp;
751
752	g_topology_assert_not();
753
754	pbp = bp->bio_parent;
755	sc = pbp->bio_to->geom->softc;
756	disk = bp->bio_from->private;
757	if (disk == NULL) {
758		g_topology_lock();
759		g_mirror_kill_consumer(sc, bp->bio_from);
760		g_topology_unlock();
761	} else {
762		g_mirror_update_delay(disk, bp);
763	}
764
765	pbp->bio_inbed++;
766	KASSERT(pbp->bio_inbed <= pbp->bio_children,
767	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
768	    pbp->bio_children));
769	if (bp->bio_error == 0 && pbp->bio_error == 0) {
770		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
771		g_destroy_bio(bp);
772		if (pbp->bio_children == pbp->bio_inbed) {
773			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
774			pbp->bio_completed = pbp->bio_length;
775			g_io_deliver(pbp, pbp->bio_error);
776		}
777		return;
778	} else if (bp->bio_error != 0) {
779		if (pbp->bio_error == 0)
780			pbp->bio_error = bp->bio_error;
781		G_MIRROR_LOGREQ(0, bp, "Request failed (error=%d).",
782		    bp->bio_error);
783		if (disk != NULL) {
784			sc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
785			g_mirror_event_send(disk,
786			    G_MIRROR_DISK_STATE_DISCONNECTED,
787			    G_MIRROR_EVENT_DONTWAIT);
788		}
789		switch (pbp->bio_cmd) {
790		case BIO_DELETE:
791		case BIO_WRITE:
792			pbp->bio_inbed--;
793			pbp->bio_children--;
794			break;
795		}
796	}
797	g_destroy_bio(bp);
798
799	switch (pbp->bio_cmd) {
800	case BIO_READ:
801		if (pbp->bio_children == pbp->bio_inbed) {
802			pbp->bio_error = 0;
803			mtx_lock(&sc->sc_queue_mtx);
804			bioq_disksort(&sc->sc_queue, pbp);
805			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
806			wakeup(sc);
807			mtx_unlock(&sc->sc_queue_mtx);
808		}
809		break;
810	case BIO_DELETE:
811	case BIO_WRITE:
812		if (pbp->bio_children == 0) {
813			/*
814			 * All requests failed.
815			 */
816		} else if (pbp->bio_inbed < pbp->bio_children) {
817			/* Do nothing. */
818			break;
819		} else if (pbp->bio_children == pbp->bio_inbed) {
820			/* Some requests succeeded. */
821			pbp->bio_error = 0;
822			pbp->bio_completed = pbp->bio_length;
823		}
824		g_io_deliver(pbp, pbp->bio_error);
825		break;
826	default:
827		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
828		break;
829	}
830}
831
832static void
833g_mirror_sync_done(struct bio *bp)
834{
835	struct g_mirror_softc *sc;
836
837	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
838	sc = bp->bio_from->geom->softc;
839	bp->bio_cflags |= G_MIRROR_BIO_FLAG_SYNC;
840	mtx_lock(&sc->sc_queue_mtx);
841	bioq_disksort(&sc->sc_queue, bp);
842	wakeup(sc);
843	mtx_unlock(&sc->sc_queue_mtx);
844}
845
846static void
847g_mirror_start(struct bio *bp)
848{
849	struct g_mirror_softc *sc;
850
851	sc = bp->bio_to->geom->softc;
852	/*
853	 * If sc == NULL or there are no valid disks, provider's error
854	 * should be set and g_mirror_start() should not be called at all.
855	 */
856	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
857	    ("Provider's error should be set (error=%d)(mirror=%s).",
858	    bp->bio_to->error, bp->bio_to->name));
859	G_MIRROR_LOGREQ(3, bp, "Request received.");
860
861	switch (bp->bio_cmd) {
862	case BIO_READ:
863	case BIO_WRITE:
864	case BIO_DELETE:
865		break;
866	case BIO_GETATTR:
867	default:
868		g_io_deliver(bp, EOPNOTSUPP);
869		return;
870	}
871	mtx_lock(&sc->sc_queue_mtx);
872	bioq_disksort(&sc->sc_queue, bp);
873	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
874	wakeup(sc);
875	mtx_unlock(&sc->sc_queue_mtx);
876}
877
878/*
879 * Send one synchronization request.
880 */
881static void
882g_mirror_sync_one(struct g_mirror_disk *disk)
883{
884	struct g_mirror_softc *sc;
885	struct bio *bp;
886
887	sc = disk->d_softc;
888	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
889	    ("Disk %s is not marked for synchronization.",
890	    g_mirror_get_diskname(disk)));
891
892	bp = g_new_bio();
893	if (bp == NULL)
894		return;
895	bp->bio_parent = NULL;
896	bp->bio_cmd = BIO_READ;
897	bp->bio_offset = disk->d_sync.ds_offset;
898	bp->bio_length = MIN(G_MIRROR_SYNC_BLOCK_SIZE,
899	    sc->sc_mediasize - bp->bio_offset);
900	bp->bio_cflags = 0;
901	bp->bio_done = g_mirror_sync_done;
902	bp->bio_data = disk->d_sync.ds_data;
903	if (bp->bio_data == NULL) {
904		g_destroy_bio(bp);
905		return;
906	}
907	disk->d_sync.ds_offset += bp->bio_length;
908	bp->bio_to = sc->sc_provider;
909	G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
910	g_io_request(bp, disk->d_sync.ds_consumer);
911}
912
913static void
914g_mirror_sync_request(struct bio *bp)
915{
916	struct g_mirror_softc *sc;
917	struct g_mirror_disk *disk;
918
919	sc = bp->bio_from->geom->softc;
920	disk = bp->bio_from->private;
921	if (disk == NULL) {
922		g_topology_lock();
923		g_mirror_kill_consumer(sc, bp->bio_from);
924		g_topology_unlock();
925		g_destroy_bio(bp);
926		return;
927	}
928
929	/*
930	 * Synchronization request.
931	 */
932	switch (bp->bio_cmd) {
933	case BIO_READ:
934	    {
935		struct g_consumer *cp;
936
937		if (bp->bio_error != 0) {
938			G_MIRROR_LOGREQ(0, bp,
939			    "Synchronization request failed (error=%d).",
940			    bp->bio_error);
941			g_destroy_bio(bp);
942			return;
943		}
944		bp->bio_cmd = BIO_WRITE;
945		bp->bio_cflags = 0;
946		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
947		cp = disk->d_consumer;
948		KASSERT(cp->acr == 0 && cp->acw == 1 && cp->ace == 1,
949		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
950		    cp->acr, cp->acw, cp->ace));
951		g_io_request(bp, cp);
952		return;
953	    }
954	case BIO_WRITE:
955		if (bp->bio_error != 0) {
956			G_MIRROR_LOGREQ(0, bp,
957			    "Synchronization request failed (error=%d).",
958			    bp->bio_error);
959			g_destroy_bio(bp);
960			sc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
961			g_mirror_event_send(disk,
962			    G_MIRROR_DISK_STATE_DISCONNECTED,
963			    G_MIRROR_EVENT_DONTWAIT);
964			return;
965		}
966		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
967		disk->d_sync.ds_offset_done = bp->bio_offset + bp->bio_length;
968		g_destroy_bio(bp);
969		if (disk->d_sync.ds_offset_done == sc->sc_provider->mediasize) {
970			/*
971			 * Disk up-to-date, activate it.
972			 */
973			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
974			    G_MIRROR_EVENT_DONTWAIT);
975			return;
976		} else if ((disk->d_sync.ds_offset_done %
977		    (G_MIRROR_SYNC_BLOCK_SIZE * 100)) == 0) {
978			/*
979			 * Update offset_done on every 100 blocks.
980			 * XXX: This should be configurable.
981			 */
982			g_topology_lock();
983			g_mirror_update_metadata(disk);
984			g_topology_unlock();
985		}
986		return;
987	default:
988		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
989		    bp->bio_cmd, sc->sc_name));
990		break;
991	}
992}
993
994static void
995g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
996{
997	struct g_mirror_disk *disk;
998	struct g_consumer *cp;
999	struct bio *cbp;
1000
1001	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1002		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
1003			break;
1004	}
1005	if (disk == NULL) {
1006		if (bp->bio_error == 0)
1007			bp->bio_error = ENXIO;
1008		g_io_deliver(bp, bp->bio_error);
1009		return;
1010	}
1011	cbp = g_clone_bio(bp);
1012	if (cbp == NULL) {
1013		if (bp->bio_error == 0)
1014			bp->bio_error = ENOMEM;
1015		g_io_deliver(bp, bp->bio_error);
1016		return;
1017	}
1018	/*
1019	 * Fill in the component buf structure.
1020	 */
1021	cp = disk->d_consumer;
1022	cbp->bio_done = g_mirror_done;
1023	cbp->bio_to = cp->provider;
1024	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1025	KASSERT(cp->acr > 0 && cp->ace > 0,
1026	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1027	    cp->acw, cp->ace));
1028	g_io_request(cbp, cp);
1029}
1030
1031static void
1032g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
1033{
1034	struct g_mirror_disk *disk;
1035	struct g_consumer *cp;
1036	struct bio *cbp;
1037
1038	disk = g_mirror_get_disk(sc);
1039	if (disk == NULL) {
1040		if (bp->bio_error == 0)
1041			bp->bio_error = ENXIO;
1042		g_io_deliver(bp, bp->bio_error);
1043		return;
1044	}
1045	cbp = g_clone_bio(bp);
1046	if (cbp == NULL) {
1047		if (bp->bio_error == 0)
1048			bp->bio_error = ENOMEM;
1049		g_io_deliver(bp, bp->bio_error);
1050		return;
1051	}
1052	/*
1053	 * Fill in the component buf structure.
1054	 */
1055	cp = disk->d_consumer;
1056	cbp->bio_done = g_mirror_done;
1057	cbp->bio_to = cp->provider;
1058	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1059	KASSERT(cp->acr > 0 && cp->ace > 0,
1060	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1061	    cp->acw, cp->ace));
1062	g_io_request(cbp, cp);
1063}
1064
1065static void
1066g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
1067{
1068	struct g_mirror_disk *disk, *dp;
1069	struct g_consumer *cp;
1070	struct bio *cbp;
1071	struct bintime curtime;
1072
1073	binuptime(&curtime);
1074	/*
1075	 * Find a disk which the smallest load.
1076	 */
1077	disk = NULL;
1078	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1079		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1080			continue;
1081		/* If disk wasn't used for more than 2 sec, use it. */
1082		if (curtime.sec - dp->d_last_used.sec >= 2) {
1083			disk = dp;
1084			break;
1085		}
1086		if (disk == NULL ||
1087		    bintime_cmp(&dp->d_delay, &disk->d_delay) < 0) {
1088			disk = dp;
1089		}
1090	}
1091	cbp = g_clone_bio(bp);
1092	if (cbp == NULL) {
1093		if (bp->bio_error == 0)
1094			bp->bio_error = ENOMEM;
1095		g_io_deliver(bp, bp->bio_error);
1096		return;
1097	}
1098	/*
1099	 * Fill in the component buf structure.
1100	 */
1101	cp = disk->d_consumer;
1102	cbp->bio_done = g_mirror_done;
1103	cbp->bio_to = cp->provider;
1104	binuptime(&disk->d_last_used);
1105	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1106	KASSERT(cp->acr > 0 && cp->ace > 0,
1107	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1108	    cp->acw, cp->ace));
1109	g_io_request(cbp, cp);
1110}
1111
1112static void
1113g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
1114{
1115	struct bio_queue_head queue;
1116	struct g_mirror_disk *disk;
1117	struct g_consumer *cp;
1118	struct bio *cbp;
1119	off_t left, mod, offset, slice;
1120	u_char *data;
1121	u_int ndisks;
1122
1123	if (bp->bio_length <= sc->sc_slice) {
1124		g_mirror_request_round_robin(sc, bp);
1125		return;
1126	}
1127	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
1128	slice = bp->bio_length / ndisks;
1129	mod = slice % sc->sc_provider->sectorsize;
1130	if (mod != 0)
1131		slice += sc->sc_provider->sectorsize - mod;
1132	/*
1133	 * Allocate all bios before sending any request, so we can
1134	 * return ENOMEM in nice and clean way.
1135	 */
1136	left = bp->bio_length;
1137	offset = bp->bio_offset;
1138	data = bp->bio_data;
1139	bioq_init(&queue);
1140	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1141		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1142			continue;
1143		cbp = g_clone_bio(bp);
1144		if (cbp == NULL) {
1145			for (cbp = bioq_first(&queue); cbp != NULL;
1146			    cbp = bioq_first(&queue)) {
1147				bioq_remove(&queue, cbp);
1148				g_destroy_bio(cbp);
1149			}
1150			if (bp->bio_error == 0)
1151				bp->bio_error = ENOMEM;
1152			g_io_deliver(bp, bp->bio_error);
1153			return;
1154		}
1155		bioq_insert_tail(&queue, cbp);
1156		cbp->bio_done = g_mirror_done;
1157		cbp->bio_caller1 = disk;
1158		cbp->bio_to = disk->d_consumer->provider;
1159		cbp->bio_offset = offset;
1160		cbp->bio_data = data;
1161		cbp->bio_length = MIN(left, slice);
1162		left -= cbp->bio_length;
1163		if (left == 0)
1164			break;
1165		offset += cbp->bio_length;
1166		data += cbp->bio_length;
1167	}
1168	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
1169		bioq_remove(&queue, cbp);
1170		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1171		disk = cbp->bio_caller1;
1172		cbp->bio_caller1 = NULL;
1173		cp = disk->d_consumer;
1174		KASSERT(cp->acr > 0 && cp->ace > 0,
1175		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1176		    cp->acr, cp->acw, cp->ace));
1177		g_io_request(cbp, disk->d_consumer);
1178	}
1179}
1180
1181static void
1182g_mirror_register_request(struct bio *bp)
1183{
1184	struct g_mirror_softc *sc;
1185
1186	sc = bp->bio_to->geom->softc;
1187	switch (bp->bio_cmd) {
1188	case BIO_READ:
1189		switch (sc->sc_balance) {
1190		case G_MIRROR_BALANCE_LOAD:
1191			g_mirror_request_load(sc, bp);
1192			break;
1193		case G_MIRROR_BALANCE_PREFER:
1194			g_mirror_request_prefer(sc, bp);
1195			break;
1196		case G_MIRROR_BALANCE_ROUND_ROBIN:
1197			g_mirror_request_round_robin(sc, bp);
1198			break;
1199		case G_MIRROR_BALANCE_SPLIT:
1200			g_mirror_request_split(sc, bp);
1201			break;
1202		}
1203		return;
1204	case BIO_WRITE:
1205	case BIO_DELETE:
1206	    {
1207		struct g_mirror_disk *disk;
1208		struct bio_queue_head queue;
1209		struct g_consumer *cp;
1210		struct bio *cbp;
1211
1212		/*
1213		 * Allocate all bios before sending any request, so we can
1214		 * return ENOMEM in nice and clean way.
1215		 */
1216		bioq_init(&queue);
1217		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1218			switch (disk->d_state) {
1219			case G_MIRROR_DISK_STATE_ACTIVE:
1220				break;
1221			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1222				if (bp->bio_offset >= disk->d_sync.ds_offset)
1223					continue;
1224				break;
1225			default:
1226				continue;
1227			}
1228			cbp = g_clone_bio(bp);
1229			if (cbp == NULL) {
1230				for (cbp = bioq_first(&queue); cbp != NULL;
1231				    cbp = bioq_first(&queue)) {
1232					bioq_remove(&queue, cbp);
1233					g_destroy_bio(cbp);
1234				}
1235				if (bp->bio_error == 0)
1236					bp->bio_error = ENOMEM;
1237				g_io_deliver(bp, bp->bio_error);
1238				return;
1239			}
1240			bioq_insert_tail(&queue, cbp);
1241			cbp->bio_done = g_mirror_done;
1242			cp = disk->d_consumer;
1243			cbp->bio_caller1 = cp;
1244			cbp->bio_to = cp->provider;
1245			KASSERT(cp->acw > 0 && cp->ace > 0,
1246			    ("Consumer %s not opened (r%dw%de%d).",
1247			    cp->provider->name, cp->acr, cp->acw, cp->ace));
1248		}
1249		for (cbp = bioq_first(&queue); cbp != NULL;
1250		    cbp = bioq_first(&queue)) {
1251			bioq_remove(&queue, cbp);
1252			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1253			cp = cbp->bio_caller1;
1254			cbp->bio_caller1 = NULL;
1255			g_io_request(cbp, cp);
1256		}
1257		/*
1258		 * Bump syncid on first write.
1259		 */
1260		if (sc->sc_bump_syncid == G_MIRROR_BUMP_ON_FIRST_WRITE) {
1261			sc->sc_bump_syncid = 0;
1262			g_topology_lock();
1263			g_mirror_bump_syncid(sc);
1264			g_topology_unlock();
1265		}
1266		return;
1267	    }
1268	default:
1269		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1270		    bp->bio_cmd, sc->sc_name));
1271		break;
1272	}
1273}
1274
1275static int
1276g_mirror_can_destroy(struct g_mirror_softc *sc)
1277{
1278	struct g_geom *gp;
1279	struct g_consumer *cp;
1280
1281	g_topology_assert();
1282	gp = sc->sc_geom;
1283	LIST_FOREACH(cp, &gp->consumer, consumer) {
1284		if (g_mirror_is_busy(sc, cp))
1285			return (0);
1286	}
1287	gp = sc->sc_sync.ds_geom;
1288	LIST_FOREACH(cp, &gp->consumer, consumer) {
1289		if (g_mirror_is_busy(sc, cp))
1290			return (0);
1291	}
1292	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
1293	    sc->sc_name);
1294	return (1);
1295}
1296
1297static int
1298g_mirror_try_destroy(struct g_mirror_softc *sc)
1299{
1300
1301	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WAIT) != 0) {
1302		g_topology_lock();
1303		if (!g_mirror_can_destroy(sc)) {
1304			g_topology_unlock();
1305			return (0);
1306		}
1307		g_topology_unlock();
1308		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1309		    &sc->sc_worker);
1310		wakeup(&sc->sc_worker);
1311		sc->sc_worker = NULL;
1312	} else {
1313		g_topology_lock();
1314		if (!g_mirror_can_destroy(sc)) {
1315			g_topology_unlock();
1316			return (0);
1317		}
1318		g_mirror_destroy_device(sc);
1319		g_topology_unlock();
1320		free(sc, M_MIRROR);
1321	}
1322	return (1);
1323}
1324
1325/*
1326 * Worker thread.
1327 */
1328static void
1329g_mirror_worker(void *arg)
1330{
1331	struct g_mirror_softc *sc;
1332	struct g_mirror_disk *disk;
1333	struct g_mirror_event *ep;
1334	struct bio *bp;
1335	u_int nreqs;
1336
1337	sc = arg;
1338	curthread->td_base_pri = PRIBIO;
1339
1340	nreqs = 0;
1341	for (;;) {
1342		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
1343		/*
1344		 * First take a look at events.
1345		 * This is important to handle events before any I/O requests.
1346		 */
1347		ep = g_mirror_event_get(sc);
1348		if (ep != NULL) {
1349			g_topology_lock();
1350			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
1351				/* Update only device status. */
1352				G_MIRROR_DEBUG(3,
1353				    "Running event for device %s.",
1354				    sc->sc_name);
1355				ep->e_error = 0;
1356				g_mirror_update_device(sc, 1);
1357			} else {
1358				/* Update disk status. */
1359				G_MIRROR_DEBUG(3, "Running event for disk %s.",
1360				     g_mirror_get_diskname(ep->e_disk));
1361				ep->e_error = g_mirror_update_disk(ep->e_disk,
1362				    ep->e_state);
1363				if (ep->e_error == 0)
1364					g_mirror_update_device(sc, 0);
1365			}
1366			g_topology_unlock();
1367			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
1368				KASSERT(ep->e_error == 0,
1369				    ("Error cannot be handled."));
1370				g_mirror_event_free(ep);
1371			} else {
1372				ep->e_flags |= G_MIRROR_EVENT_DONE;
1373				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1374				    ep);
1375				mtx_lock(&sc->sc_events_mtx);
1376				wakeup(ep);
1377				mtx_unlock(&sc->sc_events_mtx);
1378			}
1379			if ((sc->sc_flags &
1380			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1381				if (g_mirror_try_destroy(sc))
1382					kthread_exit(0);
1383			}
1384			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
1385			continue;
1386		}
1387		/*
1388		 * Now I/O requests.
1389		 */
1390		/* Get first request from the queue. */
1391		mtx_lock(&sc->sc_queue_mtx);
1392		bp = bioq_first(&sc->sc_queue);
1393		if (bp == NULL) {
1394			if ((sc->sc_flags &
1395			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1396				mtx_unlock(&sc->sc_queue_mtx);
1397				if (g_mirror_try_destroy(sc))
1398					kthread_exit(0);
1399				mtx_lock(&sc->sc_queue_mtx);
1400			}
1401		}
1402		if (sc->sc_sync.ds_ndisks > 0 &&
1403		    (bp == NULL || nreqs > g_mirror_reqs_per_sync)) {
1404			mtx_unlock(&sc->sc_queue_mtx);
1405			/*
1406			 * It is time for synchronization...
1407			 */
1408			nreqs = 0;
1409			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1410				if (disk->d_state !=
1411				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
1412					continue;
1413				}
1414				if (disk->d_sync.ds_offset >=
1415				    sc->sc_provider->mediasize) {
1416					continue;
1417				}
1418				if (disk->d_sync.ds_offset >
1419				    disk->d_sync.ds_offset_done) {
1420					continue;
1421				}
1422				g_mirror_sync_one(disk);
1423			}
1424			G_MIRROR_DEBUG(5, "%s: I'm here 2.", __func__);
1425			goto sleep;
1426		}
1427		if (bp == NULL) {
1428			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1", 0);
1429			G_MIRROR_DEBUG(5, "%s: I'm here 3.", __func__);
1430			continue;
1431		}
1432		nreqs++;
1433		bioq_remove(&sc->sc_queue, bp);
1434		mtx_unlock(&sc->sc_queue_mtx);
1435
1436		if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0) {
1437			g_mirror_regular_request(bp);
1438		} else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
1439			u_int timeout, sps;
1440
1441			g_mirror_sync_request(bp);
1442sleep:
1443			sps = atomic_load_acq_int(&g_mirror_syncs_per_sec);
1444			if (sps == 0) {
1445				G_MIRROR_DEBUG(5, "%s: I'm here 5.", __func__);
1446				continue;
1447			}
1448			mtx_lock(&sc->sc_queue_mtx);
1449			if (bioq_first(&sc->sc_queue) != NULL) {
1450				mtx_unlock(&sc->sc_queue_mtx);
1451				G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
1452				continue;
1453			}
1454			timeout = hz / sps;
1455			if (timeout == 0)
1456				timeout = 1;
1457			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w2",
1458			    timeout);
1459		} else {
1460			g_mirror_register_request(bp);
1461		}
1462		G_MIRROR_DEBUG(5, "%s: I'm here 6.", __func__);
1463	}
1464}
1465
1466/*
1467 * Open disk's consumer if needed.
1468 */
1469static void
1470g_mirror_update_access(struct g_mirror_disk *disk)
1471{
1472	struct g_provider *pp;
1473	struct g_consumer *cp;
1474	int acr, acw, ace, cpw, error;
1475
1476	g_topology_assert();
1477
1478	cp = disk->d_consumer;
1479	pp = disk->d_softc->sc_provider;
1480	if (pp == NULL) {
1481		acr = -cp->acr;
1482		acw = -cp->acw;
1483		ace = -cp->ace;
1484	} else {
1485		acr = pp->acr - cp->acr;
1486		acw = pp->acw - cp->acw;
1487		ace = pp->ace - cp->ace;
1488		/* Grab an extra "exclusive" bit. */
1489		if (pp->acr > 0 || pp->acw > 0 || pp->ace > 0)
1490			ace++;
1491	}
1492	if (acr == 0 && acw == 0 && ace == 0)
1493		return;
1494	cpw = cp->acw;
1495	error = g_access(cp, acr, acw, ace);
1496	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", cp->provider->name, acr,
1497	    acw, ace, error);
1498	if (error != 0) {
1499		disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
1500		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
1501		    G_MIRROR_EVENT_DONTWAIT);
1502		return;
1503	}
1504	if (cpw == 0 && cp->acw > 0) {
1505		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
1506		    g_mirror_get_diskname(disk), disk->d_softc->sc_name);
1507		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1508	} else if (cpw > 0 && cp->acw == 0) {
1509		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
1510		    g_mirror_get_diskname(disk), disk->d_softc->sc_name);
1511		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1512	}
1513}
1514
1515static void
1516g_mirror_sync_start(struct g_mirror_disk *disk)
1517{
1518	struct g_mirror_softc *sc;
1519	struct g_consumer *cp;
1520	int error;
1521
1522	g_topology_assert();
1523
1524	sc = disk->d_softc;
1525	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1526	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
1527	    sc->sc_state));
1528	cp = disk->d_consumer;
1529	KASSERT(cp->acr == 0 && cp->acw == 0 && cp->ace == 0,
1530	    ("Consumer %s already opened.", cp->provider->name));
1531
1532	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
1533	    g_mirror_get_diskname(disk));
1534	error = g_access(cp, 0, 1, 1);
1535	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", cp->provider->name, 0, 1,
1536	    1, error);
1537	if (error != 0) {
1538		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
1539		    G_MIRROR_EVENT_DONTWAIT);
1540		return;
1541	}
1542	disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1543	KASSERT(disk->d_sync.ds_consumer == NULL,
1544	    ("Sync consumer already exists (device=%s, disk=%s).",
1545	    sc->sc_name, g_mirror_get_diskname(disk)));
1546	disk->d_sync.ds_consumer = g_new_consumer(sc->sc_sync.ds_geom);
1547	disk->d_sync.ds_consumer->private = disk;
1548	error = g_attach(disk->d_sync.ds_consumer, disk->d_softc->sc_provider);
1549	KASSERT(error == 0, ("Cannot attach to %s (error=%d).",
1550	    disk->d_softc->sc_name, error));
1551	error = g_access(disk->d_sync.ds_consumer, 1, 0, 0);
1552	KASSERT(error == 0, ("Cannot open %s (error=%d).",
1553	    disk->d_softc->sc_name, error));
1554	disk->d_sync.ds_data = malloc(G_MIRROR_SYNC_BLOCK_SIZE, M_MIRROR,
1555	    M_WAITOK);
1556	sc->sc_sync.ds_ndisks++;
1557}
1558
1559/*
1560 * Stop synchronization process.
1561 * type: 0 - synchronization finished
1562 *       1 - synchronization stopped
1563 */
1564static void
1565g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
1566{
1567	struct g_consumer *cp;
1568
1569	g_topology_assert();
1570	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
1571	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
1572	    g_mirror_disk_state2str(disk->d_state)));
1573	if (disk->d_sync.ds_consumer == NULL)
1574		return;
1575
1576	if (type == 0) {
1577		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
1578		    disk->d_softc->sc_name, g_mirror_get_diskname(disk));
1579	} else /* if (type == 1) */ {
1580		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
1581		    disk->d_softc->sc_name, g_mirror_get_diskname(disk));
1582	}
1583	cp = disk->d_sync.ds_consumer;
1584	g_access(cp, -1, 0, 0);
1585	g_mirror_kill_consumer(disk->d_softc, cp);
1586	free(disk->d_sync.ds_data, M_MIRROR);
1587	disk->d_sync.ds_consumer = NULL;
1588	disk->d_softc->sc_sync.ds_ndisks--;
1589	cp = disk->d_consumer;
1590	KASSERT(cp->acr == 0 && cp->acw == 1 && cp->ace == 1,
1591	    ("Consumer %s not opened.", cp->provider->name));
1592	g_access(cp, 0, -1, -1);
1593	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", cp->provider->name, 0, -1,
1594	    -1, 0);
1595	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1596}
1597
1598static void
1599g_mirror_launch_provider(struct g_mirror_softc *sc)
1600{
1601	struct g_mirror_disk *disk;
1602	struct g_provider *pp;
1603
1604	g_topology_assert();
1605
1606	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
1607	pp->mediasize = sc->sc_mediasize;
1608	pp->sectorsize = sc->sc_sectorsize;
1609	sc->sc_provider = pp;
1610	g_error_provider(pp, 0);
1611	G_MIRROR_DEBUG(0, "Device %s: provider %s launched.", sc->sc_name,
1612	    pp->name);
1613	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1614		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
1615			g_mirror_sync_start(disk);
1616	}
1617}
1618
1619static void
1620g_mirror_destroy_provider(struct g_mirror_softc *sc)
1621{
1622	struct g_mirror_disk *disk;
1623	struct bio *bp;
1624
1625	g_topology_assert();
1626	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
1627	    sc->sc_name));
1628
1629	g_error_provider(sc->sc_provider, ENXIO);
1630	mtx_lock(&sc->sc_queue_mtx);
1631	while ((bp = bioq_first(&sc->sc_queue)) != NULL) {
1632		bioq_remove(&sc->sc_queue, bp);
1633		g_io_deliver(bp, ENXIO);
1634	}
1635	mtx_unlock(&sc->sc_queue_mtx);
1636	G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name,
1637	    sc->sc_provider->name);
1638	sc->sc_provider->flags |= G_PF_WITHER;
1639	g_orphan_provider(sc->sc_provider, ENXIO);
1640	sc->sc_provider = NULL;
1641	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1642		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
1643			g_mirror_sync_stop(disk, 1);
1644	}
1645}
1646
1647static void
1648g_mirror_go(void *arg)
1649{
1650	struct g_mirror_softc *sc;
1651
1652	sc = arg;
1653	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
1654	g_mirror_event_send(sc, 0,
1655	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
1656}
1657
1658static u_int
1659g_mirror_determine_state(struct g_mirror_disk *disk)
1660{
1661	struct g_mirror_softc *sc;
1662	u_int state;
1663
1664	sc = disk->d_softc;
1665	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
1666		if ((disk->d_flags &
1667		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
1668			/* Disk does not need synchronization. */
1669			state = G_MIRROR_DISK_STATE_ACTIVE;
1670		} else {
1671			if ((sc->sc_flags &
1672			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0  ||
1673			    (disk->d_flags &
1674			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
1675				/*
1676				 * We can start synchronization from
1677				 * the stored offset.
1678				 */
1679				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
1680			} else {
1681				state = G_MIRROR_DISK_STATE_STALE;
1682			}
1683		}
1684	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
1685		/*
1686		 * Reset all synchronization data for this disk,
1687		 * because if it even was synchronized, it was
1688		 * synchronized to disks with different syncid.
1689		 */
1690		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
1691		disk->d_sync.ds_offset = 0;
1692		disk->d_sync.ds_offset_done = 0;
1693		disk->d_sync.ds_syncid = sc->sc_syncid;
1694		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
1695		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
1696			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
1697		} else {
1698			state = G_MIRROR_DISK_STATE_STALE;
1699		}
1700	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
1701		/*
1702		 * Not good, NOT GOOD!
1703		 * It means that mirror was started on stale disks
1704		 * and more fresh disk just arrive.
1705		 * If there were writes, mirror is fucked up, sorry.
1706		 * I think the best choice here is don't touch
1707		 * this disk and inform the user laudly.
1708		 */
1709		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
1710		    "disk (%s) arrives!! It will not be connected to the "
1711		    "running device.", sc->sc_name,
1712		    g_mirror_get_diskname(disk));
1713		g_mirror_destroy_disk(disk);
1714		state = G_MIRROR_DISK_STATE_NONE;
1715		/* Return immediately, because disk was destroyed. */
1716		return (state);
1717	}
1718	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
1719	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
1720	return (state);
1721}
1722
1723/*
1724 * Update device state.
1725 */
1726static void
1727g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force)
1728{
1729	struct g_mirror_disk *disk;
1730	u_int state;
1731
1732	g_topology_assert();
1733
1734	switch (sc->sc_state) {
1735	case G_MIRROR_DEVICE_STATE_STARTING:
1736	    {
1737		struct g_mirror_disk *pdisk;
1738		u_int dirty, ndisks, syncid;
1739
1740		KASSERT(sc->sc_provider == NULL,
1741		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
1742		/*
1743		 * Are we ready? We are, if all disks are connected or
1744		 * if we have any disks and 'force' is true.
1745		 */
1746		if ((force && g_mirror_ndisks(sc, -1) > 0) ||
1747		    sc->sc_ndisks == g_mirror_ndisks(sc, -1)) {
1748			;
1749		} else if (g_mirror_ndisks(sc, -1) == 0) {
1750			/*
1751			 * Disks went down in starting phase, so destroy
1752			 * device.
1753			 */
1754			callout_drain(&sc->sc_callout);
1755			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
1756			return;
1757		} else {
1758			return;
1759		}
1760
1761		/*
1762		 * Activate all disks with the biggest syncid.
1763		 */
1764		if (force) {
1765			/*
1766			 * If 'force' is true, we have been called due to
1767			 * timeout, so don't bother canceling timeout.
1768			 */
1769			ndisks = 0;
1770			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1771				if ((disk->d_flags &
1772				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
1773					ndisks++;
1774				}
1775			}
1776			if (ndisks == 0) {
1777				/* No valid disks found, destroy device. */
1778				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
1779				return;
1780			}
1781		} else {
1782			/* Cancel timeout. */
1783			callout_drain(&sc->sc_callout);
1784		}
1785
1786		/*
1787		 * Find disk with the biggest syncid.
1788		 */
1789		syncid = 0;
1790		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1791			if (disk->d_sync.ds_syncid > syncid)
1792				syncid = disk->d_sync.ds_syncid;
1793		}
1794
1795		/*
1796		 * Here we need to look for dirty disks and if all disks
1797		 * with the biggest syncid are dirty, we have to choose
1798		 * one with the biggest priority and rebuild the rest.
1799		 */
1800		/*
1801		 * Find the number of dirty disks with the biggest syncid.
1802		 * Find the number of disks with the biggest syncid.
1803		 * While here, find a disk with the biggest priority.
1804		 */
1805		dirty = ndisks = 0;
1806		pdisk = NULL;
1807		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1808			if (disk->d_sync.ds_syncid != syncid)
1809				continue;
1810			if ((disk->d_flags &
1811			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1812				continue;
1813			}
1814			ndisks++;
1815			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
1816				dirty++;
1817				if (pdisk == NULL ||
1818				    pdisk->d_priority < disk->d_priority) {
1819					pdisk = disk;
1820				}
1821			}
1822		}
1823		if (dirty == 0) {
1824			/* No dirty disks at all, great. */
1825		} else if (dirty == ndisks) {
1826			/*
1827			 * Force synchronization for all dirty disks except one
1828			 * with the biggest priority.
1829			 */
1830			KASSERT(pdisk != NULL, ("pdisk == NULL"));
1831			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
1832			    "master disk for synchronization.",
1833			    g_mirror_get_diskname(pdisk), sc->sc_name);
1834			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1835				if (disk->d_sync.ds_syncid != syncid)
1836					continue;
1837				if ((disk->d_flags &
1838				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1839					continue;
1840				}
1841				KASSERT((disk->d_flags &
1842				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
1843				    ("Disk %s isn't marked as dirty.",
1844				    g_mirror_get_diskname(disk)));
1845				/* Skip the disk with the biggest priority. */
1846				if (disk == pdisk)
1847					continue;
1848				disk->d_sync.ds_syncid = 0;
1849			}
1850		} else if (dirty < ndisks) {
1851			/*
1852			 * Force synchronization for all dirty disks.
1853			 * We have some non-dirty disks.
1854			 */
1855			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1856				if (disk->d_sync.ds_syncid != syncid)
1857					continue;
1858				if ((disk->d_flags &
1859				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1860					continue;
1861				}
1862				if ((disk->d_flags &
1863				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
1864					continue;
1865				}
1866				disk->d_sync.ds_syncid = 0;
1867			}
1868		}
1869
1870		/* Reset hint. */
1871		sc->sc_hint = NULL;
1872		sc->sc_syncid = syncid;
1873		if (force) {
1874			/* Remember to bump syncid on first write. */
1875			sc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
1876		}
1877		state = G_MIRROR_DEVICE_STATE_RUNNING;
1878		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
1879		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
1880		    g_mirror_device_state2str(state));
1881		sc->sc_state = state;
1882		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1883			state = g_mirror_determine_state(disk);
1884			g_mirror_event_send(disk, state,
1885			    G_MIRROR_EVENT_DONTWAIT);
1886			if (state == G_MIRROR_DISK_STATE_STALE) {
1887				sc->sc_bump_syncid =
1888				    G_MIRROR_BUMP_ON_FIRST_WRITE;
1889			}
1890		}
1891		break;
1892	    }
1893	case G_MIRROR_DEVICE_STATE_RUNNING:
1894		if (g_mirror_ndisks(sc, -1) == 0) {
1895			/*
1896			 * No disks at all, we need to destroy device.
1897			 */
1898			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
1899			break;
1900		} else if (g_mirror_ndisks(sc,
1901		    G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
1902		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
1903			/*
1904			 * No active disks, destroy provider.
1905			 */
1906			if (sc->sc_provider != NULL)
1907				g_mirror_destroy_provider(sc);
1908			break;
1909		} else if (g_mirror_ndisks(sc,
1910		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
1911		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
1912			/*
1913			 * We have active disks, launch provider if it doesn't
1914			 * exist.
1915			 */
1916			if (sc->sc_provider == NULL)
1917				g_mirror_launch_provider(sc);
1918		}
1919		/*
1920		 * Bump syncid here, if we need to do it immediately.
1921		 */
1922		if (sc->sc_bump_syncid == G_MIRROR_BUMP_IMMEDIATELY) {
1923			sc->sc_bump_syncid = 0;
1924			g_mirror_bump_syncid(sc);
1925		}
1926		break;
1927	default:
1928		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
1929		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
1930		break;
1931	}
1932}
1933
1934/*
1935 * Update disk state and device state if needed.
1936 */
1937#define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
1938	"Disk %s state changed from %s to %s (device %s).",		\
1939	g_mirror_get_diskname(disk),					\
1940	g_mirror_disk_state2str(disk->d_state),				\
1941	g_mirror_disk_state2str(state), sc->sc_name)
1942static int
1943g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
1944{
1945	struct g_mirror_softc *sc;
1946
1947	g_topology_assert();
1948
1949	sc = disk->d_softc;
1950again:
1951	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
1952	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
1953	    g_mirror_disk_state2str(state));
1954	switch (state) {
1955	case G_MIRROR_DISK_STATE_NEW:
1956		/*
1957		 * Possible scenarios:
1958		 * 1. New disk arrive.
1959		 */
1960		/* Previous state should be NONE. */
1961		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
1962		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
1963		    g_mirror_disk_state2str(disk->d_state)));
1964		DISK_STATE_CHANGED();
1965
1966		disk->d_state = state;
1967		if (LIST_EMPTY(&sc->sc_disks))
1968			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
1969		else {
1970			struct g_mirror_disk *dp;
1971
1972			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1973				if (disk->d_priority >= dp->d_priority) {
1974					LIST_INSERT_BEFORE(dp, disk, d_next);
1975					dp = NULL;
1976					break;
1977				}
1978				if (LIST_NEXT(dp, d_next) == NULL)
1979					break;
1980			}
1981			if (dp != NULL)
1982				LIST_INSERT_AFTER(dp, disk, d_next);
1983		}
1984		G_MIRROR_DEBUG(0, "Device %s: provider %s detected.",
1985		    sc->sc_name, g_mirror_get_diskname(disk));
1986		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
1987			break;
1988		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1989		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
1990		    g_mirror_device_state2str(sc->sc_state),
1991		    g_mirror_get_diskname(disk),
1992		    g_mirror_disk_state2str(disk->d_state)));
1993		state = g_mirror_determine_state(disk);
1994		if (state != G_MIRROR_DISK_STATE_NONE)
1995			goto again;
1996		break;
1997	case G_MIRROR_DISK_STATE_ACTIVE:
1998		/*
1999		 * Possible scenarios:
2000		 * 1. New disk does not need synchronization.
2001		 * 2. Synchronization process finished successfully.
2002		 */
2003		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2004		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2005		    g_mirror_device_state2str(sc->sc_state),
2006		    g_mirror_get_diskname(disk),
2007		    g_mirror_disk_state2str(disk->d_state)));
2008		/* Previous state should be NEW or SYNCHRONIZING. */
2009		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
2010		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2011		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2012		    g_mirror_disk_state2str(disk->d_state)));
2013		DISK_STATE_CHANGED();
2014
2015		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2016			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2017		else if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2018			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2019			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
2020			g_mirror_sync_stop(disk, 0);
2021		}
2022		disk->d_state = state;
2023		disk->d_sync.ds_offset = 0;
2024		disk->d_sync.ds_offset_done = 0;
2025		g_mirror_update_access(disk);
2026		g_mirror_update_metadata(disk);
2027		G_MIRROR_DEBUG(0, "Device %s: provider %s activated.",
2028		    sc->sc_name, g_mirror_get_diskname(disk));
2029		break;
2030	case G_MIRROR_DISK_STATE_STALE:
2031		/*
2032		 * Possible scenarios:
2033		 * 1. Stale disk was connected.
2034		 */
2035		/* Previous state should be NEW. */
2036		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2037		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2038		    g_mirror_disk_state2str(disk->d_state)));
2039		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2040		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2041		    g_mirror_device_state2str(sc->sc_state),
2042		    g_mirror_get_diskname(disk),
2043		    g_mirror_disk_state2str(disk->d_state)));
2044		/*
2045		 * STALE state is only possible if device is marked
2046		 * NOAUTOSYNC.
2047		 */
2048		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
2049		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2050		    g_mirror_device_state2str(sc->sc_state),
2051		    g_mirror_get_diskname(disk),
2052		    g_mirror_disk_state2str(disk->d_state)));
2053		DISK_STATE_CHANGED();
2054
2055		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2056		disk->d_state = state;
2057		g_mirror_update_metadata(disk);
2058		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
2059		    sc->sc_name, g_mirror_get_diskname(disk));
2060		break;
2061	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
2062		/*
2063		 * Possible scenarios:
2064		 * 1. Disk which needs synchronization was connected.
2065		 */
2066		/* Previous state should be NEW. */
2067		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2068		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2069		    g_mirror_disk_state2str(disk->d_state)));
2070		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2071		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2072		    g_mirror_device_state2str(sc->sc_state),
2073		    g_mirror_get_diskname(disk),
2074		    g_mirror_disk_state2str(disk->d_state)));
2075		DISK_STATE_CHANGED();
2076
2077		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2078			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2079		disk->d_state = state;
2080		if (sc->sc_provider != NULL) {
2081			g_mirror_sync_start(disk);
2082			g_mirror_update_metadata(disk);
2083		}
2084		break;
2085	case G_MIRROR_DISK_STATE_DISCONNECTED:
2086		/*
2087		 * Possible scenarios:
2088		 * 1. Device wasn't running yet, but disk disappear.
2089		 * 2. Disk was active and disapppear.
2090		 * 3. Disk disappear during synchronization process.
2091		 */
2092		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
2093			/*
2094			 * Previous state should be ACTIVE, STALE or
2095			 * SYNCHRONIZING.
2096			 */
2097			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
2098			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
2099			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2100			    ("Wrong disk state (%s, %s).",
2101			    g_mirror_get_diskname(disk),
2102			    g_mirror_disk_state2str(disk->d_state)));
2103		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
2104			/* Previous state should be NEW. */
2105			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2106			    ("Wrong disk state (%s, %s).",
2107			    g_mirror_get_diskname(disk),
2108			    g_mirror_disk_state2str(disk->d_state)));
2109			/*
2110			 * Reset bumping syncid if disk disappeared in STARTING
2111			 * state.
2112			 */
2113			if (sc->sc_bump_syncid == G_MIRROR_BUMP_ON_FIRST_WRITE)
2114				sc->sc_bump_syncid = 0;
2115#ifdef	INVARIANTS
2116		} else {
2117			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2118			    sc->sc_name,
2119			    g_mirror_device_state2str(sc->sc_state),
2120			    g_mirror_get_diskname(disk),
2121			    g_mirror_disk_state2str(disk->d_state)));
2122#endif
2123		}
2124		DISK_STATE_CHANGED();
2125		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
2126		    sc->sc_name, g_mirror_get_diskname(disk));
2127
2128		g_mirror_destroy_disk(disk);
2129		break;
2130	case G_MIRROR_DISK_STATE_DESTROY:
2131	    {
2132		int error;
2133
2134		error = g_mirror_clear_metadata(disk);
2135		if (error != 0)
2136			return (error);
2137		DISK_STATE_CHANGED();
2138		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
2139		    sc->sc_name, g_mirror_get_diskname(disk));
2140
2141		g_mirror_destroy_disk(disk);
2142		sc->sc_ndisks--;
2143		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2144			g_mirror_update_metadata(disk);
2145		}
2146		break;
2147	    }
2148	default:
2149		KASSERT(1 == 0, ("Unknown state (%u).", state));
2150		break;
2151	}
2152	return (0);
2153}
2154#undef	DISK_STATE_CHANGED
2155
2156static int
2157g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
2158{
2159	struct g_provider *pp;
2160	u_char *buf;
2161	int error;
2162
2163	g_topology_assert();
2164
2165	error = g_access(cp, 1, 0, 0);
2166	if (error != 0)
2167		return (error);
2168	pp = cp->provider;
2169	g_topology_unlock();
2170	/* Metadata are stored on last sector. */
2171	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2172	    &error);
2173	g_topology_lock();
2174	if (buf == NULL) {
2175		g_access(cp, -1, 0, 0);
2176		return (error);
2177	}
2178	if (error != 0) {
2179		g_access(cp, -1, 0, 0);
2180		g_free(buf);
2181		return (error);
2182	}
2183	error = g_access(cp, -1, 0, 0);
2184	KASSERT(error == 0, ("Cannot decrease access count for %s.", pp->name));
2185
2186	/* Decode metadata. */
2187	error = mirror_metadata_decode(buf, md);
2188	g_free(buf);
2189	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
2190		return (EINVAL);
2191	if (error != 0) {
2192		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2193		    cp->provider->name);
2194		return (error);
2195	}
2196
2197	return (0);
2198}
2199
2200static int
2201g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
2202    struct g_mirror_metadata *md)
2203{
2204
2205	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
2206		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
2207		    pp->name, md->md_did);
2208		return (EEXIST);
2209	}
2210	if (md->md_all != sc->sc_ndisks) {
2211		G_MIRROR_DEBUG(1,
2212		    "Invalid '%s' field on disk %s (device %s), skipping.",
2213		    "md_all", pp->name, sc->sc_name);
2214		return (EINVAL);
2215	}
2216	if (md->md_slice != sc->sc_slice) {
2217		G_MIRROR_DEBUG(1,
2218		    "Invalid '%s' field on disk %s (device %s), skipping.",
2219		    "md_slice", pp->name, sc->sc_name);
2220		return (EINVAL);
2221	}
2222	if (md->md_balance != sc->sc_balance) {
2223		G_MIRROR_DEBUG(1,
2224		    "Invalid '%s' field on disk %s (device %s), skipping.",
2225		    "md_balance", pp->name, sc->sc_name);
2226		return (EINVAL);
2227	}
2228	if (md->md_mediasize != sc->sc_mediasize) {
2229		G_MIRROR_DEBUG(1,
2230		    "Invalid '%s' field on disk %s (device %s), skipping.",
2231		    "md_mediasize", pp->name, sc->sc_name);
2232		return (EINVAL);
2233	}
2234	if (sc->sc_mediasize > pp->mediasize) {
2235		G_MIRROR_DEBUG(1,
2236		    "Invalid size of disk %s (device %s), skipping.", pp->name,
2237		    sc->sc_name);
2238		return (EINVAL);
2239	}
2240	if (md->md_sectorsize != sc->sc_sectorsize) {
2241		G_MIRROR_DEBUG(1,
2242		    "Invalid '%s' field on disk %s (device %s), skipping.",
2243		    "md_sectorsize", pp->name, sc->sc_name);
2244		return (EINVAL);
2245	}
2246	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2247		G_MIRROR_DEBUG(1,
2248		    "Invalid sector size of disk %s (device %s), skipping.",
2249		    pp->name, sc->sc_name);
2250		return (EINVAL);
2251	}
2252	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
2253		G_MIRROR_DEBUG(1,
2254		    "Invalid device flags on disk %s (device %s), skipping.",
2255		    pp->name, sc->sc_name);
2256		return (EINVAL);
2257	}
2258	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
2259		G_MIRROR_DEBUG(1,
2260		    "Invalid disk flags on disk %s (device %s), skipping.",
2261		    pp->name, sc->sc_name);
2262		return (EINVAL);
2263	}
2264	return (0);
2265}
2266
2267static int
2268g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
2269    struct g_mirror_metadata *md)
2270{
2271	struct g_mirror_disk *disk;
2272	int error;
2273
2274	g_topology_assert();
2275	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
2276
2277	error = g_mirror_check_metadata(sc, pp, md);
2278	if (error != 0)
2279		return (error);
2280	disk = g_mirror_init_disk(sc, pp, md, &error);
2281	if (disk == NULL)
2282		return (error);
2283	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
2284	    G_MIRROR_EVENT_WAIT);
2285	return (error);
2286}
2287
2288static int
2289g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
2290{
2291	struct g_mirror_softc *sc;
2292	struct g_mirror_disk *disk;
2293	int dcr, dcw, dce, err, error;
2294
2295	g_topology_assert();
2296	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
2297	    acw, ace);
2298
2299	dcr = pp->acr + acr;
2300	dcw = pp->acw + acw;
2301	dce = pp->ace + ace;
2302
2303	/* On first open, grab an extra "exclusive" bit */
2304	if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
2305		ace++;
2306	/* ... and let go of it on last close */
2307	if (dcr == 0 && dcw == 0 && dce == 0)
2308		ace--;
2309
2310	sc = pp->geom->softc;
2311	if (sc == NULL || LIST_EMPTY(&sc->sc_disks)) {
2312		if (acr <= 0 && acw <= 0 && ace <= 0)
2313			return (0);
2314		else
2315			return (ENXIO);
2316	}
2317	error = ENXIO;
2318	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2319		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
2320			continue;
2321		err = g_access(disk->d_consumer, acr, acw, ace);
2322		G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
2323		    g_mirror_get_diskname(disk), acr, acw, ace, err);
2324		if (err == 0) {
2325			/*
2326			 * Mark disk as dirty on open and unmark on close.
2327			 */
2328			if (pp->acw == 0 && dcw > 0) {
2329				G_MIRROR_DEBUG(1,
2330				    "Disk %s (device %s) marked as dirty.",
2331				    g_mirror_get_diskname(disk), sc->sc_name);
2332				disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2333				g_mirror_update_metadata(disk);
2334			} else if (pp->acw > 0 && dcw == 0) {
2335				G_MIRROR_DEBUG(1,
2336				    "Disk %s (device %s) marked as clean.",
2337				    g_mirror_get_diskname(disk), sc->sc_name);
2338				disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2339				g_mirror_update_metadata(disk);
2340			}
2341			error = 0;
2342		} else {
2343			sc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
2344			g_mirror_event_send(disk,
2345			    G_MIRROR_DISK_STATE_DISCONNECTED,
2346			    G_MIRROR_EVENT_DONTWAIT);
2347		}
2348	}
2349	return (error);
2350}
2351
2352static struct g_geom *
2353g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md)
2354{
2355	struct g_mirror_softc *sc;
2356	struct g_geom *gp;
2357	int error, timeout;
2358
2359	g_topology_assert();
2360	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
2361	    md->md_mid);
2362
2363	/* One disk is minimum. */
2364	if (md->md_all < 1)
2365		return (NULL);
2366	/*
2367	 * Action geom.
2368	 */
2369	gp = g_new_geomf(mp, "%s", md->md_name);
2370	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
2371	gp->start = g_mirror_start;
2372	gp->spoiled = g_mirror_spoiled;
2373	gp->orphan = g_mirror_orphan;
2374	gp->access = g_mirror_access;
2375	gp->dumpconf = g_mirror_dumpconf;
2376
2377	sc->sc_id = md->md_mid;
2378	sc->sc_slice = md->md_slice;
2379	sc->sc_balance = md->md_balance;
2380	sc->sc_mediasize = md->md_mediasize;
2381	sc->sc_sectorsize = md->md_sectorsize;
2382	sc->sc_ndisks = md->md_all;
2383	sc->sc_flags = md->md_mflags;
2384	sc->sc_bump_syncid = 0;
2385	bioq_init(&sc->sc_queue);
2386	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
2387	LIST_INIT(&sc->sc_disks);
2388	TAILQ_INIT(&sc->sc_events);
2389	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
2390	callout_init(&sc->sc_callout, CALLOUT_MPSAFE);
2391	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
2392	gp->softc = sc;
2393	sc->sc_geom = gp;
2394	sc->sc_provider = NULL;
2395	/*
2396	 * Synchronization geom.
2397	 */
2398	gp = g_new_geomf(mp, "%s.sync", md->md_name);
2399	gp->softc = sc;
2400	gp->orphan = g_mirror_orphan;
2401	sc->sc_sync.ds_geom = gp;
2402	sc->sc_sync.ds_ndisks = 0;
2403	error = kthread_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
2404	    "g_mirror %s", md->md_name);
2405	if (error != 0) {
2406		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
2407		    sc->sc_name);
2408		g_destroy_geom(sc->sc_sync.ds_geom);
2409		mtx_destroy(&sc->sc_events_mtx);
2410		mtx_destroy(&sc->sc_queue_mtx);
2411		g_destroy_geom(sc->sc_geom);
2412		free(sc, M_MIRROR);
2413		return (NULL);
2414	}
2415
2416	G_MIRROR_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id);
2417
2418	/*
2419	 * Run timeout.
2420	 */
2421	timeout = atomic_load_acq_int(&g_mirror_timeout);
2422	callout_reset(&sc->sc_callout, timeout * hz, g_mirror_go, sc);
2423	return (sc->sc_geom);
2424}
2425
2426int
2427g_mirror_destroy(struct g_mirror_softc *sc, boolean_t force)
2428{
2429	struct g_provider *pp;
2430
2431	g_topology_assert();
2432
2433	if (sc == NULL)
2434		return (ENXIO);
2435	pp = sc->sc_provider;
2436	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
2437		if (force) {
2438			G_MIRROR_DEBUG(0, "Device %s is still open, so it "
2439			    "can't be definitely removed.", pp->name);
2440		} else {
2441			G_MIRROR_DEBUG(1,
2442			    "Device %s is still open (r%dw%de%d).", pp->name,
2443			    pp->acr, pp->acw, pp->ace);
2444			return (EBUSY);
2445		}
2446	}
2447
2448	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2449	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_WAIT;
2450	g_topology_unlock();
2451	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
2452	mtx_lock(&sc->sc_queue_mtx);
2453	wakeup(sc);
2454	mtx_unlock(&sc->sc_queue_mtx);
2455	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
2456	while (sc->sc_worker != NULL)
2457		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
2458	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
2459	g_topology_lock();
2460	g_mirror_destroy_device(sc);
2461	free(sc, M_MIRROR);
2462	return (0);
2463}
2464
2465static void
2466g_mirror_taste_orphan(struct g_consumer *cp)
2467{
2468
2469	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
2470	    cp->provider->name));
2471}
2472
2473static struct g_geom *
2474g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
2475{
2476	struct g_mirror_metadata md;
2477	struct g_mirror_softc *sc;
2478	struct g_consumer *cp;
2479	struct g_geom *gp;
2480	int error;
2481
2482	g_topology_assert();
2483	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
2484	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
2485
2486	gp = g_new_geomf(mp, "mirror:taste");
2487	/*
2488	 * This orphan function should be never called.
2489	 */
2490	gp->orphan = g_mirror_taste_orphan;
2491	cp = g_new_consumer(gp);
2492	g_attach(cp, pp);
2493	error = g_mirror_read_metadata(cp, &md);
2494	g_detach(cp);
2495	g_destroy_consumer(cp);
2496	g_destroy_geom(gp);
2497	if (error != 0)
2498		return (NULL);
2499	gp = NULL;
2500
2501	if (md.md_version > G_MIRROR_VERSION) {
2502		printf("geom_mirror.ko module is too old to handle %s.\n",
2503		    pp->name);
2504		return (NULL);
2505	}
2506	if (md.md_provider[0] != '\0' && strcmp(md.md_provider, pp->name) != 0)
2507		return (NULL);
2508	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
2509		G_MIRROR_DEBUG(0,
2510		    "Device %s: provider %s marked as inactive, skipping.",
2511		    md.md_name, pp->name);
2512		return (NULL);
2513	}
2514	if (g_mirror_debug >= 2)
2515		mirror_metadata_dump(&md);
2516
2517	/*
2518	 * Let's check if device already exists.
2519	 */
2520	sc = NULL;
2521	LIST_FOREACH(gp, &mp->geom, geom) {
2522		sc = gp->softc;
2523		if (sc == NULL)
2524			continue;
2525		if (sc->sc_sync.ds_geom == gp)
2526			continue;
2527		if (strcmp(md.md_name, sc->sc_name) != 0)
2528			continue;
2529		if (md.md_mid != sc->sc_id) {
2530			G_MIRROR_DEBUG(0, "Device %s already configured.",
2531			    sc->sc_name);
2532			return (NULL);
2533		}
2534		break;
2535	}
2536	if (gp == NULL) {
2537		gp = g_mirror_create(mp, &md);
2538		if (gp == NULL) {
2539			G_MIRROR_DEBUG(0, "Cannot create device %s.",
2540			    md.md_name);
2541			return (NULL);
2542		}
2543		sc = gp->softc;
2544	}
2545	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
2546	error = g_mirror_add_disk(sc, pp, &md);
2547	if (error != 0) {
2548		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
2549		    pp->name, gp->name, error);
2550		if (LIST_EMPTY(&sc->sc_disks))
2551			g_mirror_destroy(sc, 1);
2552		return (NULL);
2553	}
2554	return (gp);
2555}
2556
2557static int
2558g_mirror_destroy_geom(struct gctl_req *req __unused,
2559    struct g_class *mp __unused, struct g_geom *gp)
2560{
2561
2562	return (g_mirror_destroy(gp->softc, 0));
2563}
2564
2565static void
2566g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
2567    struct g_consumer *cp, struct g_provider *pp)
2568{
2569	struct g_mirror_softc *sc;
2570
2571	g_topology_assert();
2572
2573	sc = gp->softc;
2574	if (sc == NULL)
2575		return;
2576	/* Skip synchronization geom. */
2577	if (gp == sc->sc_sync.ds_geom)
2578		return;
2579	if (pp != NULL) {
2580		/* Nothing here. */
2581	} else if (cp != NULL) {
2582		struct g_mirror_disk *disk;
2583
2584		disk = cp->private;
2585		if (disk == NULL)
2586			return;
2587		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
2588		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2589			sbuf_printf(sb, "%s<Synchronized>", indent);
2590			if (disk->d_sync.ds_offset_done == 0)
2591				sbuf_printf(sb, "0%%");
2592			else {
2593				sbuf_printf(sb, "%u%%",
2594				    (u_int)((disk->d_sync.ds_offset_done * 100) /
2595				    sc->sc_provider->mediasize));
2596			}
2597			sbuf_printf(sb, "</Synchronized>\n");
2598		}
2599		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
2600		    disk->d_sync.ds_syncid);
2601		sbuf_printf(sb, "%s<Flags>", indent);
2602		if (disk->d_flags == 0)
2603			sbuf_printf(sb, "NONE");
2604		else {
2605			int first = 1;
2606
2607#define	ADD_FLAG(flag, name)	do {					\
2608	if ((disk->d_flags & (flag)) != 0) {				\
2609		if (!first)						\
2610			sbuf_printf(sb, ", ");				\
2611		else							\
2612			first = 0;					\
2613		sbuf_printf(sb, name);					\
2614	}								\
2615} while (0)
2616			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
2617			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
2618			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
2619			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
2620			    "SYNCHRONIZING");
2621			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
2622#undef	ADD_FLAG
2623		}
2624		sbuf_printf(sb, "</Flags>\n");
2625		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
2626		    disk->d_priority);
2627		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
2628		    g_mirror_disk_state2str(disk->d_state));
2629	} else {
2630		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
2631		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
2632		sbuf_printf(sb, "%s<Flags>", indent);
2633		if (sc->sc_flags == 0)
2634			sbuf_printf(sb, "NONE");
2635		else {
2636			int first = 1;
2637
2638#define	ADD_FLAG(flag, name)	do {					\
2639	if ((sc->sc_flags & (flag)) != 0) {				\
2640		if (!first)						\
2641			sbuf_printf(sb, ", ");				\
2642		else							\
2643			first = 0;					\
2644		sbuf_printf(sb, name);					\
2645	}								\
2646} while (0)
2647			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
2648#undef	ADD_FLAG
2649		}
2650		sbuf_printf(sb, "</Flags>\n");
2651		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
2652		    (u_int)sc->sc_slice);
2653		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
2654		    balance_name(sc->sc_balance));
2655		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
2656		    sc->sc_ndisks);
2657		sbuf_printf(sb, "%s<State>", indent);
2658		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2659			sbuf_printf(sb, "%s", "STARTING");
2660		else if (sc->sc_ndisks ==
2661		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
2662			sbuf_printf(sb, "%s", "COMPLETE");
2663		else
2664			sbuf_printf(sb, "%s", "DEGRADED");
2665		sbuf_printf(sb, "</State>\n");
2666	}
2667}
2668
2669DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
2670